Example 2: SelectionRegression

This example uses the same data set as the first example, but Mallow's C_p statistic is used as the criterion rather than R^2. Note that when Mallow's C_p statistic (or adjusted R^2) is specified, MaximumBestFound is used to indicate the total number of "best" regressions (rather than indicating the number of best regressions per subset size, as in the case of the R^2 criterion). In this example, the three best regressions are found to be (1, 2), (1, 2, 4), and (1, 2, 3).

using System;
using Imsl.Math;
using Imsl.Stat;

public class SelectionRegressionEx2
{
    public static void  Main(String[] args)
    {
        double[,] x =  {   {7.0, 26.0, 6.0, 60.0},  
                           {1.0, 29.0, 15.0, 52.0},
                           {11.0, 56.0, 8.0, 20.0}, 
                           {11.0, 31.0, 8.0, 47.0},
                           {7.0, 52.0, 6.0, 33.0},  
                           {11.0, 55.0, 9.0, 22.0}, 
                           {3.0, 71.0, 17.0, 6.0},  
                           {1.0, 31.0, 22.0, 44.0},
                           {2.0, 54.0, 18.0, 22.0}, 
                           {21.0, 47.0, 4.0, 26}, 
                           {1.0, 40.0, 23.0, 34.0}, 
                           {11.0, 66.0, 9.0, 12.0}, 
                           {10.0, 68.0, 8.0, 12.0}
                       };
        
        double[] y = new double[]{   78.5, 74.3, 104.3, 87.6,
                                     95.9, 109.2, 102.7, 72.5,
                                     93.1, 115.9, 83.8, 113.3,
                                     109.4};

        SelectionRegression sr = new SelectionRegression(4);
        sr.CriterionOption = Imsl.Stat.SelectionRegression.Criterion.MallowsCP;
        sr.MaximumBestFound = 3;
        sr.Compute(x, y);
        SelectionRegression.SummaryStatistics stats = sr.Statistics;
        
        for (int i = 1; i <= 4; i++)
        {
            double[] tmpCrit = stats.GetCriterionValues(i);
            int[,] indvar = stats.GetIndependentVariables(i);
            Console.Out.WriteLine("Regressions with "+i+" variable(s)  (MallowsCP)");
            for (int j = 0; j < tmpCrit.GetLength(0); j++)
            {
                Console.Out.Write("     " + tmpCrit[j] + "        ");
                for (int k = 0; k < indvar.GetLength(1); k++)
                    Console.Out.Write(indvar[j,k] + "   ");
                Console.Out.WriteLine("");
            }
            Console.Out.WriteLine("");
        }

        // Setup a PrintMatrix object for use in the loop below.
        PrintMatrix pm = new PrintMatrix();
        pm.SetColumnSpacing(9);
        PrintMatrixFormat tst = new PrintMatrixFormat();
        tst.SetNoColumnLabels();
        tst.SetNoRowLabels();
                tst.NumberFormat = "0.000";
        for (int i = 0; i < 3; i++)
        {
            double[,] tmpCoef = stats.GetCoefficientStatistics(i);
            Console.Out.WriteLine("\n\nRegressions with "+(i+1)+" variable(s)  (MallowsCP)");
            Console.Out.WriteLine("Variable   Coefficient   Standard Error  t-statistic   p-value");
            pm.Print(tst, tmpCoef);
        }
    }
}

Output

Regressions with 1 variable(s)  (MallowsCP)
     138.730833491674        4   
     142.486406936958        2   
     202.548769123445        1   
     315.154284140073        3   

Regressions with 2 variable(s)  (MallowsCP)
     2.6782415983184        1   2   
     5.4958508247584        1   4   
     22.3731119646967        3   4   
     138.225919754638        2   4   
     198.094652569569        1   3   

Regressions with 3 variable(s)  (MallowsCP)
     3.01823347348731        1   2   4   
     3.04127972306423        1   2   3   
     3.49682444234832        1   3   4   
     7.33747399565576        2   3   4   

Regressions with 4 variable(s)  (MallowsCP)
     5        1   2   3   4   



Regressions with 1 variable(s)  (MallowsCP)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                
1.000         1.468         0.121         12.105         0.000         
2.000         0.662         0.046         14.442         0.000         



Regressions with 2 variable(s)  (MallowsCP)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                  
1.000          1.452         0.117          12.410         0.000         
2.000          0.416         0.186          2.242          0.052         
4.000         -0.237         0.173         -1.365          0.205         



Regressions with 3 variable(s)  (MallowsCP)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                
1.000         1.696         0.205         8.290          0.000         
2.000         0.657         0.044         14.851         0.000         
3.000         0.250         0.185         1.354          0.209         


Link to C# source.