Example: Bootstrap Aggregation

This example illustrates bootstrap aggregation for a decision tree using a simulated data set.


import com.imsl.datamining.*;
import com.imsl.datamining.decisionTree.QUEST;
import com.imsl.stat.Random;

public class BootstrapAggregationEx1 {

    public static void main(String[] args) throws Exception {
        PredictiveModel.VariableType[] varType = {
            PredictiveModel.VariableType.CATEGORICAL,
            PredictiveModel.VariableType.QUANTITATIVE_CONTINUOUS,
            PredictiveModel.VariableType.CATEGORICAL,
            PredictiveModel.VariableType.CATEGORICAL
        };

        double[][] XY = {
            {2, 25.92869, 0, 0},
            {1, 51.63245, 1, 1},
            {1, 25.78432, 0, 2},
            {0, 39.37948, 0, 3},
            {2, 24.65058, 0, 2},
            {2, 45.20084, 0, 2},
            {2, 52.67960, 1, 3},
            {1, 44.28342, 1, 3},
            {2, 40.63523, 1, 3},
            {2, 51.76094, 0, 3},
            {2, 26.30368, 0, 1},
            {2, 20.70230, 1, 0},
            {2, 38.74273, 1, 3},
            {2, 19.47333, 0, 0},
            {1, 26.42211, 0, 0},
            {2, 37.05986, 1, 0},
            {1, 51.67043, 1, 3},
            {0, 42.40156, 0, 3},
            {2, 33.90027, 1, 2},
            {1, 35.43282, 0, 0},
            {1, 44.30369, 0, 1},
            {0, 46.72387, 0, 2},
            {1, 46.99262, 0, 2},
            {0, 36.05923, 0, 3},
            {2, 36.83197, 1, 1},
            {1, 61.66257, 1, 2},
            {0, 25.67714, 0, 3},
            {1, 39.08567, 1, 0},
            {0, 48.84341, 1, 1},
            {1, 39.34391, 0, 3},
            {2, 24.73522, 0, 2},
            {1, 50.55251, 1, 3},
            {0, 31.34263, 1, 3},
            {1, 27.15795, 1, 0},
            {0, 31.72685, 0, 2},
            {0, 25.00408, 0, 3},
            {1, 26.35457, 1, 3},
            {2, 38.12343, 0, 1},
            {0, 49.94030, 0, 2},
            {1, 42.45779, 1, 3},
            {0, 38.80948, 1, 1},
            {0, 43.22799, 1, 1},
            {0, 41.87624, 0, 3},
            {2, 48.07820, 0, 2},
            {0, 43.23673, 1, 0},
            {2, 39.41294, 0, 3},
            {1, 23.93346, 0, 2},
            {2, 42.84130, 1, 3},
            {2, 30.40669, 0, 1},
            {0, 37.77389, 0, 2}
        };

        double[][] XYTest = {
            {0, 44.28342, 0, 2},
            {0, 38.63523, 1, 3},
            {2, 42.76094, 1, 3},
            {2, 20.30368, 0, 1},
            {2, 25.70230, 1, 0},
            {2, 38.74273, 1, 3},
            {2, 19.47333, 0, 1}
        };

        Random r = new Random(123457);
        r.setMultiplier(16807);
        QUEST dt = new QUEST(XY, 3, varType);

        dt.fitModel();
        BootstrapAggregation ba = new BootstrapAggregation(dt);
        ba.setTestData(XYTest);
        ba.setRandomObject(r);
        ba.aggregate();
        double[] predictions = ba.getPredictions();
        double MSPE = ba.getMeanSquaredPredictionError();

        System.out.println("Actual value  Predicted value ");
        for (int k = 0; k < predictions.length; k++) {
            System.out.printf(" %3.2f \t\t %3.2f \n",
                    XYTest[k][3], predictions[k]);
        }
        System.out.printf("\n Mean squared prediction error: %3.2f \n",
                MSPE);
    }
}

Output

Actual value  Predicted value 
 2.00 		 3.00 
 3.00 		 3.00 
 3.00 		 3.00 
 1.00 		 2.00 
 0.00 		 3.00 
 3.00 		 3.00 
 1.00 		 2.00 

 Mean squared prediction error: 1.71 
Link to Java source.