This example uses a data set from Draper and Smith (1981, pp. 629-630). Class SelectionRegression
is invoked to find the best regression for each subset size using the
criterion.
using System;
using Imsl.Math;
using Imsl.Stat;
public class SelectionRegressionEx1
{
public static void Main(String[] args)
{
double[,] x = { {7.0, 26.0, 6.0, 60.0},
{1.0, 29.0, 15.0, 52.0},
{11.0, 56.0, 8.0, 20.0},
{11.0, 31.0, 8.0, 47.0},
{7.0, 52.0, 6.0, 33.0},
{11.0, 55.0, 9.0, 22.0},
{3.0, 71.0, 17.0, 6.0},
{1.0, 31.0, 22.0, 44.0},
{2.0, 54.0, 18.0, 22.0},
{21.0, 47.0, 4.0, 26},
{1.0, 40.0, 23.0, 34.0},
{11.0, 66.0, 9.0, 12.0},
{10.0, 68.0, 8.0, 12.0}
};
double[] y = new double[]{ 78.5, 74.3, 104.3,
87.6, 95.9, 109.2,
102.7, 72.5, 93.1,
115.9, 83.8, 113.3,
109.4};
SelectionRegression sr = new SelectionRegression(4);
sr.Compute(x, y);
SelectionRegression.SummaryStatistics stats = sr.Statistics;
for (int i = 1; i <= 4; i++)
{
double[] tmpCrit = stats.GetCriterionValues(i);
int[,] indvar = stats.GetIndependentVariables(i);
Console.Out.WriteLine("Regressions with "+i+" variable(s) (R-squared)");
for (int j = 0; j < tmpCrit.GetLength(0); j++)
{
Console.Out.Write(" " + tmpCrit[j] + " ");
for (int k = 0; k < indvar.GetLength(1); k++)
Console.Out.Write(indvar[j,k] + " ");
Console.Out.WriteLine("");
}
Console.Out.WriteLine("");
}
// Setup a PrintMatrix object for use in the loop below.
PrintMatrix pm = new PrintMatrix();
pm.SetColumnSpacing(8);
PrintMatrixFormat tst = new PrintMatrixFormat();
tst.SetNoColumnLabels();
tst.SetNoRowLabels();
tst.NumberFormat = "0.000";
for (int i = 0; i < 4; i++)
{
double[,] tmpCoef = stats.GetCoefficientStatistics(i);
Console.Out.WriteLine("\n\nRegressions with "+(i+1)+" variable(s) " +
"(R-squared)");
Console.Out.WriteLine("Variable Coefficient Standard Error " +
"t-statistic p-value");
pm.Print(tst, tmpCoef);
}
}
}
Regressions with 1 variable(s) (R-squared)
67.4541964131609 4
66.6268257633294 2
53.3948023835033 1
28.5872731229812 3
Regressions with 2 variable(s) (R-squared)
97.8678374535631 1 2
97.2471047716931 1 4
93.5289640615807 3 4
68.006040795005 2 4
54.8166748844857 1 3
Regressions with 3 variable(s) (R-squared)
98.2335451200426 1 2 4
98.2284679219086 1 2 3
98.1281092587343 1 3 4
97.2819959386273 2 3 4
Regressions with 4 variable(s) (R-squared)
98.237562040768 1 2 3 4
Regressions with 1 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
4.000 -0.738 0.155 -4.775 0.001
Regressions with 2 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1.000 1.468 0.121 12.105 0.000
2.000 0.662 0.046 14.442 0.000
Regressions with 3 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1.000 1.452 0.117 12.410 0.000
2.000 0.416 0.186 2.242 0.052
4.000 -0.237 0.173 -1.365 0.205
Regressions with 4 variable(s) (R-squared)
Variable Coefficient Standard Error t-statistic p-value
1.000 1.551 0.745 2.083 0.071
2.000 0.510 0.724 0.705 0.501
3.000 0.102 0.755 0.135 0.896
4.000 -0.144 0.709 -0.203 0.844
Link to C# source.