package com.imsl.test.example.datamining.decisionTree; import com.imsl.datamining.decisionTree.*; /** * *
Fits a decision tree to the Kyphosis data using QUEST.
** This example uses the dataset Kyphosis. The 81 cases represent 81 children * who have undergone surgery to correct a type of spinal deformity known as * Kyphosis. The response variable is the presence or absence of Kyphosis after the * surgery. Three predictors are Age of the patient in months, Start, the number of * the vertebra where the surgery started, and Number, the number of vertebra * involved in the surgery. This example uses the method * QUESTto * produce a maximal tree. It also requests predictions for a test-data set * consisting of 10 "new" cases. *
* * * * @see Code * @see Output */ public class DecisionTreeEx4 { public static void main(String[] args) throws Exception { double[][] xy = { {0, 71, 3, 5}, {0, 158, 3, 14}, {1, 128, 4, 5}, {0, 2, 5, 1}, {0, 1, 4, 15}, {0, 1, 2, 16}, {0, 61, 2, 17}, {0, 37, 3, 16}, {0, 113, 2, 16}, {1, 59, 6, 12}, {1, 82, 5, 14}, {0, 148, 3, 16}, {0, 18, 5, 2}, {0, 1, 4, 12}, {0, 168, 3, 18}, {0, 1, 3, 16}, {0, 78, 6, 15}, {0, 175, 5, 13}, {0, 80, 5, 16}, {0, 27, 4, 9}, {0, 22, 2, 16}, {1, 105, 6, 5}, {1, 96, 3, 12}, {0, 131, 2, 3}, {1, 15, 7, 2}, {0, 9, 5, 13}, {0, 8, 3, 6}, {0, 100, 3, 14}, {0, 4, 3, 16}, {0, 151, 2, 16}, {0, 31, 3, 16}, {0, 125, 2, 11}, {0, 130, 5, 13}, {0, 112, 3, 16}, {0, 140, 5, 11}, {0, 93, 3, 16}, {0, 1, 3, 9}, {1, 52, 5, 6}, {0, 20, 6, 9}, {1, 91, 5, 12}, {1, 73, 5, 1}, {0, 35, 3, 13}, {0, 143, 9, 3}, {0, 61, 4, 1}, {0, 97, 3, 16}, {1, 139, 3, 10}, {0, 136, 4, 15}, {0, 131, 5, 13}, {1, 121, 3, 3}, {0, 177, 2, 14}, {0, 68, 5, 10}, {0, 9, 2, 17}, {1, 139, 10, 6}, {0, 2, 2, 17}, {0, 140, 4, 15}, {0, 72, 5, 15}, {0, 2, 3, 13}, {1, 120, 5, 8}, {0, 51, 7, 9}, {0, 102, 3, 13}, {1, 130, 4, 1}, {1, 114, 7, 8}, {0, 81, 4, 1}, {0, 118, 3, 16}, {0, 118, 4, 16}, {0, 17, 4, 10}, {0, 195, 2, 17}, {0, 159, 4, 13}, {0, 18, 4, 11}, {0, 15, 5, 16}, {0, 158, 5, 14}, {0, 127, 4, 12}, {0, 87, 4, 16}, {0, 206, 4, 10}, {0, 11, 3, 15}, {0, 178, 4, 15}, {1, 157, 3, 13}, {0, 26, 7, 13}, {0, 120, 2, 13}, {1, 42, 7, 6}, {0, 36, 4, 13} }; double[][] xyTest = { {0, 71, 3, 5}, {1, 128, 4, 5}, {0, 1, 4, 15}, {0, 61, 6, 10}, {0, 113, 2, 16}, {1, 82, 5, 14}, {0, 148, 3, 16}, {0, 1, 4, 12}, {0, 1, 3, 16}, {0, 175, 5, 13} }; DecisionTree.VariableType[] varType = { DecisionTree.VariableType.CATEGORICAL, DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS, DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS, DecisionTree.VariableType.QUANTITATIVE_CONTINUOUS }; String[] names = {"Age", "Number", "Start"}; String[] classNames = {"Absent", "Present"}; String responseName = "Kyphosis"; QUEST dt = new QUEST(xy, 0, varType); dt.setMinObsPerChildNode(5); dt.setMinObsPerNode(10); dt.setMaxNodes(50); dt.setPrintLevel(2); dt.fitModel(); double[] predictions = dt.predict(xyTest); double predErrSS = dt.getMeanSquaredPredictionError(); dt.printDecisionTree(responseName, names, classNames, null, true); System.out.println("\nPredictions for test data:"); System.out.printf("%5s%8s%7s%10s\n", names[0], names[1], names[2], responseName); for (int i = 0; i < xyTest.length; i++) { System.out.printf("%5.0f%8.0f%7.0f", xyTest[i][1], xyTest[i][2], xyTest[i][3]); int idx = (int) predictions[i]; System.out.printf("%10s\n", classNames[idx]); } System.out.printf("\nMean squared prediction error: %f\n", predErrSS); } }