package com.imsl.test.example.datamining.neural; import com.imsl.datamining.neural.*; import java.io.*; import java.util.logging.*; import com.imsl.math.*; import java.util.Random; /** *
* Trains a 3-layer network with a binary output variable and 4 categorical * input attributes.
** This example trains a 3-layer network using 48 training patterns from four * nominal input attributes. The first two nominal attributes have two * classifications. The third and fourth nominal attributes have three and four * classifications, respectively. All four attributes are encoded using binary * encoding. This results in eleven binary input columns. The output class is * defined to be 1 if the first two nominal attributes sum to 1, and 0 * otherwise. Note that since the relationship between input and output is * deterministic, the network will fit the data with 0 error. *
* The structure of the network consists of eleven input nodes and three layers, * with three perceptrons in the first hidden layer, two perceptrons in the * second hidden layer, and one perceptron in the output layer. *
** With 11 inputs, 3 and 2 perceptrons in the hidden layers, and 1 output * perceptron, there are a total of 47 weights in this network, including the * six bias weights. The linear activation function is used for both hidden * layers. Since the target output is binary the logistic activation function is * used in the output layer. Training is conducted using the quasi-Newton * trainer with the binary-entropy error function provided by the * BinaryClassification * class. *
* * @see Code * @see Output */ public class BinaryClassificationEx1 implements Serializable { private static int nObs = 48; private static int nInputs = 11; // Four nominal variables with 2,2,3,4 categories private static int nOutputs = 1; private static int nPerceptrons1 = 3; private static int nPerceptrons2 = 2; private static boolean trace = false; // Turns on/off training log private static Activation hiddenLayerActivation = Activation.LINEAR; private static Activation outputLayerActivation = Activation.LOGISTIC; private static int[] x1 = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; private static int[] x2 = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }; private static int[] x3 = { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 }; private static int[] x4 = { 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4 }; public static void main(String[] args) throws Exception { double xData[][]; int yData[]; int i, j; String trainLogName = "BinaryClassificationExample.log"; // Encode the categorical variables into binary columns UnsupervisedNominalFilter filter = new UnsupervisedNominalFilter(2); int[][] z1 = filter.encode(x1); int[][] z2 = filter.encode(x2); filter = new UnsupervisedNominalFilter(3); int[][] z3 = filter.encode(x3); filter = new UnsupervisedNominalFilter(4); int[][] z4 = filter.encode(x4); // Copy the z binary columns into xData xData = new double[nObs][nInputs]; yData = new int[nObs]; for (i = 0; i < nObs; i++) { for (j = 0; j < nInputs; j++) { xData[i][j] = 0; if (j < 2) { xData[i][j] = (double) z1[i][j]; } if (j > 1 && j < 4) { xData[i][j] = (double) z2[i][j - 2]; } if (j > 3 && j < 7) { xData[i][j] = (double) z3[i][j - 4]; } if (j > 6) { xData[i][j] = (double) z4[i][j - 7]; } } // Define the output variable yData[i] = ((x1[i] + x2[i] == 2) ? 1 : 0); } // Create the feedforward network FeedForwardNetwork network = new FeedForwardNetwork(); network.getInputLayer().createInputs(nInputs); network.createHiddenLayer().createPerceptrons(nPerceptrons1); network.createHiddenLayer().createPerceptrons(nPerceptrons2); network.getOutputLayer().createPerceptrons(nOutputs); // Set up as a binary classification problem on the network BinaryClassification classification = new BinaryClassification(network); network.linkAll(); Random r = new Random(123457L); network.setRandomWeights(xData, r); Perceptron perceptrons[] = network.getPerceptrons(); for (i = 0; i < perceptrons.length - 1; i++) { perceptrons[i].setActivation(hiddenLayerActivation); } perceptrons[perceptrons.length - 1]. setActivation(outputLayerActivation); // Set up a quasi-Newton trainer for the problem QuasiNewtonTrainer trainer = new QuasiNewtonTrainer(); trainer.setError(classification.getError()); trainer.setMaximumTrainingIterations(1000); trainer.setMaximumStepsize(3.0); trainer.setGradientTolerance(1.0e-20); trainer.setFalseConvergenceTolerance(1.0e-20); trainer.setStepTolerance(1.0e-20); trainer.setRelativeTolerance(1.0e-20); if (trace) { try { Handler handler = new FileHandler(trainLogName); Logger logger = Logger.getLogger("com.imsl.datamining.neural"); logger.setLevel(Level.FINEST); logger.addHandler(handler); handler.setFormatter(QuasiNewtonTrainer.getFormatter()); System.out.println("--> Training Log Created in " + trainLogName); } catch (Exception e) { Logger.getLogger(BinaryClassificationEx1.class.getName()).log(Level.SEVERE, "Cannot Create Training Log.", e); } } // Set the start time and train the network long t0 = System.currentTimeMillis(); classification.train(trainer, xData, yData); long t1 = System.currentTimeMillis(); double time = t1 - t0; time = time / 1000; System.out.println("****************Time: " + time); // Display training stats double stats[] = classification.computeStatistics(xData, yData); System.out.println("***********************************************"); System.out.println("--> Cross-entropy error: " + (float) stats[0]); System.out.println("--> Classification error rate: " + (float) stats[1]); System.out.println("***********************************************"); System.out.println(""); // Display network weights and gradients double weight[] = network.getWeights(); double gradient[] = trainer.getErrorGradient(); double wg[][] = new double[weight.length][2]; for (i = 0; i < weight.length; i++) { wg[i][0] = weight[i]; wg[i][1] = gradient[i]; } PrintMatrixFormat pmf = new PrintMatrixFormat(); pmf.setNumberFormat(new java.text.DecimalFormat("0.000000")); pmf.setColumnLabels(new String[]{"Weights", "Gradients"}); new PrintMatrix().print(pmf, wg); // Get the fitted values by predicting on the training data double report[][] = new double[nObs][6]; for (i = 0; i < nObs; i++) { report[i][0] = x1[i]; report[i][1] = x2[i]; report[i][2] = x3[i]; report[i][3] = x4[i]; report[i][4] = yData[i]; report[i][5] = classification.predictedClass(xData[i]); } pmf = new PrintMatrixFormat(); pmf.setColumnLabels(new String[]{ "X1", "X2", "X3", "X4", "Actual", "Predicted"} ); new PrintMatrix("Fitted values").print(pmf, report); } }