001package stdlib;
002public class XLinearRegression {
003        private final int N;
004        private final double beta0, beta1;
005        private final double R2;
006        private final double svar, svar0, svar1;
007
008        public XLinearRegression(double[] x, double[] y) {
009                N = x.length;
010
011                // first pass
012                double sumx = 0.0, sumy = 0.0; //, sumx2 = 0.0;
013                for (int i = 0; i < N; i++) sumx  += x[i];
014                //for (int i = 0; i < N; i++) sumx2 += x[i]*x[i];
015                for (int i = 0; i < N; i++) sumy  += y[i];
016                double xbar = sumx / N;
017                double ybar = sumy / N;
018
019                // second pass: compute summary statistics
020                double xxbar = 0.0, yybar = 0.0, xybar = 0.0;
021                for (int i = 0; i < N; i++) {
022                        xxbar += (x[i] - xbar) * (x[i] - xbar);
023                        yybar += (y[i] - ybar) * (y[i] - ybar);
024                        xybar += (x[i] - xbar) * (y[i] - ybar);
025                }
026                beta1 = xybar / xxbar;
027                beta0 = ybar - beta1 * xbar;
028
029                // more statistical analysis
030                double rss = 0.0;      // residual sum of squares
031                double ssr = 0.0;      // regression sum of squares
032                for (int i = 0; i < N; i++) {
033                        double fit = beta1*x[i] + beta0;
034                        rss += (fit - y[i]) * (fit - y[i]);
035                        ssr += (fit - ybar) * (fit - ybar);
036                }
037
038                int df = N-2;
039                R2    = ssr / yybar;
040                svar  = rss / df;
041                svar1 = svar / xxbar;
042                svar0 = svar/N + xbar*xbar*svar1;
043        }
044
045        // y = beta1*x + beta0
046        // y = slope*x + intercept  [ rename to slope and intercept ]
047        public double beta0() { return beta0; }
048        public double beta1() { return beta1; }
049
050        // R^2
051        public double R2() { return R2; }
052
053        // standard error of beta0 and beta1
054        public double beta0StdErr() { return Math.sqrt(svar0); }
055        public double beta1StdErr() { return Math.sqrt(svar1); }
056
057        // predict a value of y, given a value of x
058        public double predict(double x) {
059                return beta1*x + beta0;
060        }
061
062        public String toString() {
063                String s = "";
064                s += String.format("%.2f N + ", beta1());
065                s += String.format("%.2f ", beta0());
066                return s + " (R^2 = " + String.format("%.3f", R2()) + ")";
067        }
068
069
070}
071