This example uses a data set from Draper and Smith (1981, pp. 629-630). Class SelectionRegression
is invoked to find the best regression for each subset size using the criterion.
using System; using Imsl.Math; using Imsl.Stat; public class SelectionRegressionEx1 { public static void Main(String[] args) { double[,] x = { {7.0, 26.0, 6.0, 60.0}, {1.0, 29.0, 15.0, 52.0}, {11.0, 56.0, 8.0, 20.0}, {11.0, 31.0, 8.0, 47.0}, {7.0, 52.0, 6.0, 33.0}, {11.0, 55.0, 9.0, 22.0}, {3.0, 71.0, 17.0, 6.0}, {1.0, 31.0, 22.0, 44.0}, {2.0, 54.0, 18.0, 22.0}, {21.0, 47.0, 4.0, 26}, {1.0, 40.0, 23.0, 34.0}, {11.0, 66.0, 9.0, 12.0}, {10.0, 68.0, 8.0, 12.0} }; double[] y = new double[]{ 78.5, 74.3, 104.3, 87.6, 95.9, 109.2, 102.7, 72.5, 93.1, 115.9, 83.8, 113.3, 109.4}; SelectionRegression sr = new SelectionRegression(4); sr.Compute(x, y); SelectionRegression.SummaryStatistics stats = sr.Statistics; for (int i = 1; i <= 4; i++) { double[] tmpCrit = stats.GetCriterionValues(i); int[,] indvar = stats.GetIndependentVariables(i); Console.Out.WriteLine("Regressions with "+i+" variable(s) (R-squared)"); for (int j = 0; j < tmpCrit.GetLength(0); j++) { Console.Out.Write(" " + tmpCrit[j] + " "); for (int k = 0; k < indvar.GetLength(1); k++) Console.Out.Write(indvar[j,k] + " "); Console.Out.WriteLine(""); } Console.Out.WriteLine(""); } // Setup a PrintMatrix object for use in the loop below. PrintMatrix pm = new PrintMatrix(); pm.SetColumnSpacing(8); PrintMatrixFormat tst = new PrintMatrixFormat(); tst.SetNoColumnLabels(); tst.SetNoRowLabels(); tst.NumberFormat = "0.000"; for (int i = 0; i < 4; i++) { double[,] tmpCoef = stats.GetCoefficientStatistics(i); Console.Out.WriteLine("\n\nRegressions with "+(i+1)+" variable(s) " + "(R-squared)"); Console.Out.WriteLine("Variable Coefficient Standard Error " + "t-statistic p-value"); pm.Print(tst, tmpCoef); } } }
Regressions with 1 variable(s) (R-squared) 67.4541964131609 4 66.6268257633294 2 53.3948023835034 1 28.5872731229812 3 Regressions with 2 variable(s) (R-squared) 97.8678374535632 1 2 97.2471047716931 1 4 93.5289640615808 3 4 68.006040795005 2 4 54.8166748844824 1 3 Regressions with 3 variable(s) (R-squared) 98.2335451200427 1 2 4 98.2284679219087 1 2 3 98.1281092587344 1 3 4 97.2819959386273 2 3 4 Regressions with 4 variable(s) (R-squared) 98.237562040768 1 2 3 4 Regressions with 1 variable(s) (R-squared) Variable Coefficient Standard Error t-statistic p-value 4.000 -0.738 0.155 -4.775 0.001 Regressions with 2 variable(s) (R-squared) Variable Coefficient Standard Error t-statistic p-value 1.000 1.468 0.121 12.105 0.000 2.000 0.662 0.046 14.442 0.000 Regressions with 3 variable(s) (R-squared) Variable Coefficient Standard Error t-statistic p-value 1.000 1.452 0.117 12.410 0.000 2.000 0.416 0.186 2.242 0.052 4.000 -0.237 0.173 -1.365 0.205 Regressions with 4 variable(s) (R-squared) Variable Coefficient Standard Error t-statistic p-value 1.000 1.551 0.745 2.083 0.071 2.000 0.510 0.724 0.705 0.501 3.000 0.102 0.755 0.135 0.896 4.000 -0.144 0.709 -0.203 0.844Link to C# source.