Example 2: SelectionRegression

This example uses the same data set as the first example, but Mallow's C_p statistic is used as the criterion rather than R^2. Note that when Mallow's C_p statistic (or adjusted R^2) is specified, the method setMaximumBestFound is used to indicate the total number of "best" regressions (rather than indicating the number of best regressions per subset size, as in the case of the R^2 criterion). In this example, the three best regressions are found to be (1, 2), (1, 2, 4), and (1, 2, 3).

import java.text.*;
import com.imsl.stat.*;
import com.imsl.math.PrintMatrix;
import com.imsl.math.PrintMatrixFormat;

public class SelectionRegressionEx2 {

    public static void main(String[] args)  throws Exception {
        double x[][] = {
            {7., 26., 6., 60.},
            {1., 29., 15., 52.},
            {11., 56., 8., 20.},
            {11., 31., 8., 47.},
            {7., 52., 6., 33.},
            {11., 55., 9., 22.},
            {3., 71., 17., 6.},
            {1., 31., 22., 44.},
            {2., 54., 18., 22.},
            {21., 47., 4., 26},
            {1., 40., 23., 34.},
            {11., 66., 9., 12.},
            {10.0, 68., 8., 12.}};
            
        double y[] = {   78.5, 74.3, 104.3, 87.6, 
                         95.9, 109.2, 102.7, 72.5, 
                         93.1, 115.9, 83.8, 113.3, 
                         109.4
					 };
            
        String criterionOption;
        MessageFormat critMsg =
           new MessageFormat("Regressions with {0} variable(s) ({1})");
        MessageFormat critLabel =
           new MessageFormat("   Criterion               Variables");
        MessageFormat coefMsg = new MessageFormat("Best Regressions with" +
           " {0} variable(s) ({1})");
        MessageFormat coefLabel = new MessageFormat("Variable   Coefficient" +
           "   Standard Error  t-statistic   p-value");
        MessageFormat critData = new MessageFormat("{0}   {1}   {2}   {3}" +
           "   {4}   {5}");
        
        SelectionRegression sr = new SelectionRegression(4);
        sr.setCriterionOption(sr.MALLOWS_CP_CRITERION);
        sr.setMaximumBestFound(3);
        sr.compute(x, y);
        SelectionRegression.Statistics stats = sr.getStatistics();
        
        criterionOption = new String("Mallows Cp");
        
        for (int i=1; i <= 4; i++) {
            double[] tmpCrit = stats.getCriterionValues(i);
            int[][] indvar = stats.getIndependentVariables(i);
            
            Object p[] = {new Integer(i), criterionOption};
            System.out.println(critMsg.format(p));
            Object p1[] = {null};
            System.out.println(critLabel.format(p1));
            
            for (int j=0; j< tmpCrit.length; j++) {
                System.out.print("     "+tmpCrit[j]+"        ");
                for (int k = 0; k < indvar[j].length ; k++) {
                    System.out.print(indvar[j][k]+"   ");
                }
                System.out.println("");
            }
            System.out.println("");
        }
        
        String tmp;
        for (int i=0; i < 3; i++) {
            System.out.println("");
            
            double[][] tmpCoef= stats.getCoefficientStatistics(i);
            
            Object p[] = {new Integer(tmpCoef.length), criterionOption};
            System.out.println(coefMsg.format(p));
            Object p2[] = {null};
            System.out.println(coefLabel.format(p2));
            
            PrintMatrix pm = new PrintMatrix();
            pm.setColumnSpacing(10);
            NumberFormat nf = NumberFormat.getInstance();
            nf.setMinimumFractionDigits(4);
            PrintMatrixFormat tst = new PrintMatrixFormat();
            tst.setNoColumnLabels();
            tst.setNoRowLabels();
            tst.setNumberFormat(nf);
            pm.print(tst, tmpCoef);
            System.out.println("");
            System.out.println("");
        }
   }
}

Output

Regressions with 1 variable(s) (Mallows Cp)
   Criterion               Variables
     138.73083349167362        4   
     142.4864069369577        2   
     202.54876912344534        1   
     315.154284140073        3   

Regressions with 2 variable(s) (Mallows Cp)
   Criterion               Variables
     2.6782415983184045        1   2   
     5.495850824758396        1   4   
     22.37311196469674        3   4   
     138.22591975463834        2   4   
     198.09465256956904        1   3   

Regressions with 3 variable(s) (Mallows Cp)
   Criterion               Variables
     3.0182334734873084        1   2   4   
     3.04127972306423        1   2   3   
     3.4968244423483164        1   3   4   
     7.337473995655756        2   3   4   

Regressions with 4 variable(s) (Mallows Cp)
   Criterion               Variables
     5.0        1   2   3   4   


Best Regressions with 2 variable(s) (Mallows Cp)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                           
1.0000          1.4683          0.1213          12.1047          0.0000          
2.0000          0.6623          0.0459          14.4424          0.0000          




Best Regressions with 3 variable(s) (Mallows Cp)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                            
1.0000           1.4519          0.1170          12.4100          0.0000          
2.0000           0.4161          0.1856           2.2418          0.0517          
4.0000          -0.2365          0.1733          -1.3650          0.2054          




Best Regressions with 3 variable(s) (Mallows Cp)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                           
1.0000          1.6959          0.2046           8.2895          0.0000          
2.0000          0.6569          0.0442          14.8508          0.0000          
3.0000          0.2500          0.1847           1.3536          0.2089          



Link to Java source.