Example 2: SelectionRegression

This example uses the same data set as the first example, but Mallow's C_p statistic is used as the criterion rather than R^2. Note that when Mallow's C_p statistic (or adjusted R^2) is specified, the method setMaximumBestFound is used to indicate the total number of "best" regressions (rather than indicating the number of best regressions per subset size, as in the case of the R^2 criterion). In this example, the three best regressions are found to be (1, 2), (1, 2, 4), and (1, 2, 3).


import java.text.*;
import com.imsl.stat.*;
import com.imsl.math.*;

public class SelectionRegressionEx2 {

    public static void main(String[] args) throws Exception {
        double x[][] = {
            {7., 26., 6., 60.},
            {1., 29., 15., 52.},
            {11., 56., 8., 20.},
            {11., 31., 8., 47.},
            {7., 52., 6., 33.},
            {11., 55., 9., 22.},
            {3., 71., 17., 6.},
            {1., 31., 22., 44.},
            {2., 54., 18., 22.},
            {21., 47., 4., 26},
            {1., 40., 23., 34.},
            {11., 66., 9., 12.},
            {10.0, 68., 8., 12.}
        };

        double y[] = {
            78.5, 74.3, 104.3, 87.6,
            95.9, 109.2, 102.7, 72.5,
            93.1, 115.9, 83.8, 113.3,
            109.4
        };

        String criterionOption;
        MessageFormat critMsg
                = new MessageFormat("Regressions with {0} variable(s) ({1})");
        MessageFormat critLabel
                = new MessageFormat("   Criterion               Variables");
        MessageFormat coefMsg = new MessageFormat("Best Regressions with"
                + " {0} variable(s) ({1})");
        MessageFormat coefLabel = new MessageFormat("Variable   Coefficient"
                + "   Standard Error  t-statistic   p-value");

        SelectionRegression sr = new SelectionRegression(4);
        sr.setCriterionOption(SelectionRegression.MALLOWS_CP_CRITERION);
        sr.setMaximumBestFound(3);
        sr.compute(x, y);
        SelectionRegression.Statistics stats = sr.getStatistics();

        criterionOption = "Mallows Cp";

        for (int i = 1; i <= 4; i++) {
            double[] tmpCrit = stats.getCriterionValues(i);
            int[][] indvar = stats.getIndependentVariables(i);

            Object p[] = {new Integer(i), criterionOption};
            System.out.println(critMsg.format(p));
            Object p1[] = {null};
            System.out.println(critLabel.format(p1));

            for (int j = 0; j < tmpCrit.length; j++) {
                System.out.print("     " + tmpCrit[j] + "        ");
                for (int k = 0; k < indvar[j].length; k++) {
                    System.out.print(indvar[j][k] + "   ");
                }
                System.out.println("");
            }
            System.out.println("");
        }

        for (int i = 0; i < 3; i++) {
            System.out.println("");

            double[][] tmpCoef = stats.getCoefficientStatistics(i);

            Object p[] = {new Integer(tmpCoef.length), criterionOption};
            System.out.println(coefMsg.format(p));
            Object p2[] = {null};
            System.out.println(coefLabel.format(p2));

            PrintMatrix pm = new PrintMatrix();
            pm.setColumnSpacing(10);
            NumberFormat nf = NumberFormat.getInstance();
            nf.setMinimumFractionDigits(4);
            PrintMatrixFormat tst = new PrintMatrixFormat();
            tst.setNoColumnLabels();
            tst.setNoRowLabels();
            tst.setNumberFormat(nf);
            pm.print(tst, tmpCoef);
            System.out.println();
            System.out.println();
        }
    }
}

Output

Regressions with 1 variable(s) (Mallows Cp)
   Criterion               Variables
     138.73083349167362        4   
     142.4864069369577        2   
     202.54876912344534        1   
     315.154284140073        3   

Regressions with 2 variable(s) (Mallows Cp)
   Criterion               Variables
     2.6782415983184045        1   2   
     5.495850824758396        1   4   
     22.37311196469674        3   4   
     138.22591975463834        2   4   
     198.09465256956904        1   3   

Regressions with 3 variable(s) (Mallows Cp)
   Criterion               Variables
     3.0182334734873084        1   2   4   
     3.04127972306423        1   2   3   
     3.4968244423483164        1   3   4   
     7.337473995655756        2   3   4   

Regressions with 4 variable(s) (Mallows Cp)
   Criterion               Variables
     5.0        1   2   3   4   


Best Regressions with 2 variable(s) (Mallows Cp)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                           
1.0000          1.4683          0.1213          12.1047          0.0000          
2.0000          0.6623          0.0459          14.4424          0.0000          




Best Regressions with 3 variable(s) (Mallows Cp)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                            
1.0000           1.4519          0.1170          12.4100          0.0000          
2.0000           0.4161          0.1856           2.2418          0.0517          
4.0000          -0.2365          0.1733          -1.3650          0.2054          




Best Regressions with 3 variable(s) (Mallows Cp)
Variable   Coefficient   Standard Error  t-statistic   p-value
                                                                                           
1.0000          1.6959          0.2046           8.2895          0.0000          
2.0000          0.6569          0.0442          14.8508          0.0000          
3.0000          0.2500          0.1847           1.3536          0.2089          



Link to Java source.