Example 1: SelectionRegression

This example uses a data set from Draper and Smith (1981, pp. 629-630). Class SelectionRegression is invoked to find the best regression for each subset size using the R^2 criterion.

using System;
using Imsl.Math;
using Imsl.Stat;

public class SelectionRegressionEx1
{
    public static void  Main(String[] args)
    {
           double[,] x =  {   {7.0, 26.0, 6.0, 60.0},
                               {1.0, 29.0, 15.0, 52.0},
                               {11.0, 56.0, 8.0, 20.0}, 
                               {11.0, 31.0, 8.0, 47.0}, 
                               {7.0, 52.0, 6.0, 33.0},
                               {11.0, 55.0, 9.0, 22.0},
                               {3.0, 71.0, 17.0, 6.0},
                               {1.0, 31.0, 22.0, 44.0},
                               {2.0, 54.0, 18.0, 22.0},
                               {21.0, 47.0, 4.0, 26}, 
                               {1.0, 40.0, 23.0, 34.0},
                               {11.0, 66.0, 9.0, 12.0}, 
                               {10.0, 68.0, 8.0, 12.0}
                           };
        
           double[] y = new double[]{   78.5, 74.3, 104.3,
                                        87.6, 95.9, 109.2, 
                                        102.7, 72.5, 93.1, 
                                        115.9, 83.8, 113.3, 
                                        109.4};
        SelectionRegression sr = new SelectionRegression(4);
        sr.Compute(x, y);
        SelectionRegression.SummaryStatistics stats = sr.Statistics;
        
        for (int i = 1; i <= 4; i++)
        {
            double[] tmpCrit = stats.GetCriterionValues(i);
            int[,] indvar = stats.GetIndependentVariables(i);
            Console.Out.WriteLine("Regressions with "+i+" variable(s)  (R-squared)");
            for (int j = 0; j < tmpCrit.GetLength(0); j++)
            {
                Console.Out.Write("     " + tmpCrit[j] + "        ");
                for (int k = 0; k < indvar.GetLength(1); k++)
                    Console.Out.Write(indvar[j,k] + "   ");
                Console.Out.WriteLine("");
            }
            Console.Out.WriteLine("");
        }

        // Setup a PrintMatrix object for use in the loop below.
        PrintMatrix pm = new PrintMatrix();
        pm.SetColumnSpacing(8);
        PrintMatrixFormat tst = new PrintMatrixFormat();
        tst.SetNoColumnLabels();
        tst.SetNoRowLabels();
                tst.NumberFormat = "0.000";
        for (int i = 0; i < 4; i++)
        {
            double[,] tmpCoef = stats.GetCoefficientStatistics(i);
            Console.Out.WriteLine("\n\nRegressions with "+(i+1)+" variable(s)  " +
            "(R-squared)");
            Console.Out.WriteLine("Variable   Coefficient   Standard Error  " +
            "t-statistic   p-value");
            pm.Print(tst, tmpCoef);
        }
    }
}

Output

Regressions with 1 variable(s)  (R-squared)
     67.4541964131609        4   
     66.6268257633294        2   
     53.3948023835033        1   
     28.5872731229812        3   

Regressions with 2 variable(s)  (R-squared)
     97.8678374535631        1   2   
     97.2471047716931        1   4   
     93.5289640615807        3   4   
     68.006040795005        2   4   
     54.8166748844857        1   3   

Regressions with 3 variable(s)  (R-squared)
     98.2335451200426        1   2   4   
     98.2284679219086        1   2   3   
     98.1281092587343        1   3   4   
     97.2819959386273        2   3   4   

Regressions with 4 variable(s)  (R-squared)
     98.237562040768        1   2   3   4   



Regressions with 1 variable(s)  (R-squared)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                           
4.000        -0.738        0.155        -4.775        0.001        



Regressions with 2 variable(s)  (R-squared)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                          
1.000        1.468        0.121        12.105        0.000        
2.000        0.662        0.046        14.442        0.000        



Regressions with 3 variable(s)  (R-squared)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                            
1.000         1.452        0.117         12.410        0.000        
2.000         0.416        0.186         2.242         0.052        
4.000        -0.237        0.173        -1.365         0.205        



Regressions with 4 variable(s)  (R-squared)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                           
1.000         1.551        0.745         2.083        0.071        
2.000         0.510        0.724         0.705        0.501        
3.000         0.102        0.755         0.135        0.896        
4.000        -0.144        0.709        -0.203        0.844        


Link to C# source.