Example: K-means Cluster Analysis

This example performs K-means cluster analysis on Fisher's iris data. The initial cluster seed for each iris type is an observation known to be in the iris type.

/*
* -------------------------------------------------------------------------
*    Copyright (c) 1999 Visual Numerics Inc. All Rights Reserved.
*
*    This software is confidential information which is proprietary to
*    and a trade secret of Visual Numerics, Inc.  Use, duplication or
*    disclosure is subject to the terms of an appropriate license
*    agreement.
*
*    VISUAL NUMERICS MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE
*    SUITABILITY OF THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING
*    BUT NOT LIMITED TO THE IMPLIED WARRANTIES OF MERCHANTABILITY,
*    FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. VISUAL
*    NUMERICS SHALL NOT BE LIABLE FOR ANY DAMAGES SUFFERED BY LICENSEE
*    AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THIS SOFTWARE OR
*    ITS DERIVATIVES.
*--------------------------------------------------------------------------
*/
using System;
using Imsl.Stat;
using Imsl.Math;

public class ClusterKMeansEx1
{
    public static void  Main(String[] argv)
    {
        double[,] x = {{5.100, 3.500, 1.400, 0.200}, 
                           {4.900, 3.000, 1.400, 0.200}, 
                           {4.700, 3.200, 1.300, 0.200}, 
                           {4.600, 3.100, 1.500, 0.200}, 
                           {5.000, 3.600, 1.400, 0.200}, 
                           {5.400, 3.900, 1.700, 0.400}, 
                           {4.600, 3.400, 1.400, 0.300}, 
                           {5.000, 3.400, 1.500, 0.200}, 
                           {4.400, 2.900, 1.400, 0.200}, 
                           {4.900, 3.100, 1.500, 0.100}, 
                           {5.400, 3.700, 1.500, 0.200}, 
                           {4.800, 3.400, 1.600, 0.200}, 
                           {4.800, 3.000, 1.400, 0.100}, 
                           {4.300, 3.000, 1.100, 0.100},
                           {5.800, 4.000, 1.200, 0.200},
                           {5.700, 4.400, 1.500, 0.400}, 
                           {5.400, 3.900, 1.300, 0.400}, 
                           {5.100, 3.500, 1.400, 0.300},
                           {5.700, 3.800, 1.700, 0.300}, 
                           {5.100, 3.800, 1.500, 0.300}, 
                           {5.400, 3.400, 1.700, 0.200},
                           {5.100, 3.700, 1.500, 0.400},
                           {4.600, 3.600, 1.000, 0.200},
                           {5.100, 3.300, 1.700, 0.500},
                           {4.800, 3.400, 1.900, 0.200},
                           {5.000, 3.000, 1.600, 0.200},
                           {5.000, 3.400, 1.600, 0.400},
                           {5.200, 3.500, 1.500, 0.200},
                           {5.200, 3.400, 1.400, 0.200},
                           {4.700, 3.200, 1.600, 0.200},
                           {4.800, 3.100, 1.600, 0.200},
                           {5.400, 3.400, 1.500, 0.400},
                           {5.200, 4.100, 1.500, 0.100},
                           {5.500, 4.200, 1.400, 0.200},
                           {4.900, 3.100, 1.500, 0.200},
                           {5.000, 3.200, 1.200, 0.200}, 
                           {5.500, 3.500, 1.300, 0.200},
                           {4.900, 3.600, 1.400, 0.100}, 
                           {4.400, 3.000, 1.300, 0.200},
                           {5.100, 3.400, 1.500, 0.200},
                           {5.000, 3.500, 1.300, 0.300}, 
                           {4.500, 2.300, 1.300, 0.300}, 
                           {4.400, 3.200, 1.300, 0.200}, 
                           {5.000, 3.500, 1.600, 0.600},
                           {5.100, 3.800, 1.900, 0.400}, 
                           {4.800, 3.000, 1.400, 0.300}, 
                           {5.100, 3.800, 1.600, 0.200}, 
                           {4.600, 3.200, 1.400, 0.200}, 
                           {5.300, 3.700, 1.500, 0.200}, 
                           {5.000, 3.300, 1.400, 0.200}, 
                           {7.000, 3.200, 4.700, 1.400}, 
                           {6.400, 3.200, 4.500, 1.500}, 
                           {6.900, 3.100, 4.900, 1.500}, 
                           {5.500, 2.300, 4.000, 1.300}, 
                           {6.500, 2.800, 4.600, 1.500}, 
                           {5.700, 2.800, 4.500, 1.300}, 
                           {6.300, 3.300, 4.700, 1.600}, 
                           {4.900, 2.400, 3.300, 1.000}, 
                           {6.600, 2.900, 4.600, 1.300}, 
                           {5.200, 2.700, 3.900, 1.400}, 
                           {5.000, 2.000, 3.500, 1.000}, 
                           {5.900, 3.000, 4.200, 1.500}, 
                           {6.000, 2.200, 4.000, 1.000}, 
                           {6.100, 2.900, 4.700, 1.400}, 
                           {5.600, 2.900, 3.600, 1.300}, 
                           {6.700, 3.100, 4.400, 1.400}, 
                           {5.600, 3.000, 4.500, 1.500}, 
                           {5.800, 2.700, 4.100, 1.000}, 
                           {6.200, 2.200, 4.500, 1.500}, 
                           {5.600, 2.500, 3.900, 1.100}, 
                           {5.900, 3.200, 4.800, 1.800}, 
                           {6.100, 2.800, 4.000, 1.300}, 
                           {6.300, 2.500, 4.900, 1.500}, 
                           {6.100, 2.800, 4.700, 1.200}, 
                           {6.400, 2.900, 4.300, 1.300}, 
                           {6.600, 3.000, 4.400, 1.400}, 
                           {6.800, 2.800, 4.800, 1.400}, 
                           {6.700, 3.000, 5.000, 1.700}, 
                           {6.000, 2.900, 4.500, 1.500}, 
                           {5.700, 2.600, 3.500, 1.000}, 
                           {5.500, 2.400, 3.800, 1.100}, 
                           {5.500, 2.400, 3.700, 1.000}, 
                           {5.800, 2.700, 3.900, 1.200},
                           {6.000, 2.700, 5.100, 1.600},
                           {5.400, 3.000, 4.500, 1.500},
                           {6.000, 3.400, 4.500, 1.600}, 
                           {6.700, 3.100, 4.700, 1.500},
                           {6.300, 2.300, 4.400, 1.300}, 
                           {5.600, 3.000, 4.100, 1.300}, 
                           {5.500, 2.500, 4.000, 1.300}, 
                           {5.500, 2.600, 4.400, 1.200}, 
                           {6.100, 3.000, 4.600, 1.400}, 
                           {5.800, 2.600, 4.000, 1.200},
                           {5.000, 2.300, 3.300, 1.000},
                           {5.600, 2.700, 4.200, 1.300},
                           {5.700, 3.000, 4.200, 1.200},
                           {5.700, 2.900, 4.200, 1.300},
                           {6.200, 2.900, 4.300, 1.300},
                           {5.100, 2.500, 3.000, 1.100},
                           {5.700, 2.800, 4.100, 1.300},
                           {6.300, 3.300, 6.000, 2.500},
                           {5.800, 2.700, 5.100, 1.900},
                           {7.100, 3.000, 5.900, 2.100},
                           {6.300, 2.900, 5.600, 1.800},
                           {6.500, 3.000, 5.800, 2.200},
                           {7.600, 3.000, 6.600, 2.100},
                           {4.900, 2.500, 4.500, 1.700},
                           {7.300, 2.900, 6.300, 1.800},
                           {6.700, 2.500, 5.800, 1.800}, 
                           {7.200, 3.600, 6.100, 2.500}, 
                           {6.500, 3.200, 5.100, 2.000},
                           {6.400, 2.700, 5.300, 1.900}, 
                           {6.800, 3.000, 5.500, 2.100}, 
                           {5.700, 2.500, 5.000, 2.000},
                           {5.800, 2.800, 5.100, 2.400},
                           {6.400, 3.200, 5.300, 2.300},
                           {6.500, 3.000, 5.500, 1.800}, 
                           {7.700, 3.800, 6.700, 2.200},
                           {7.700, 2.600, 6.900, 2.300},
                           {6.000, 2.200, 5.000, 1.500},
                           {6.900, 3.200, 5.700, 2.300},
                           {5.600, 2.800, 4.900, 2.000},
                           {7.700, 2.800, 6.700, 2.000},
                           {6.300, 2.700, 4.900, 1.800},
                           {6.700, 3.300, 5.700, 2.100},
                           {7.200, 3.200, 6.000, 1.800},
                           {6.200, 2.800, 4.800, 1.800},
                           {6.100, 3.000, 4.900, 1.800},
                           {6.400, 2.800, 5.600, 2.100}, 
                           {7.200, 3.000, 5.800, 1.600}, 
                           {7.400, 2.800, 6.100, 1.900}, 
                           {7.900, 3.800, 6.400, 2.000},
                           {6.400, 2.800, 5.600, 2.200},
                           {6.300, 2.800, 5.100, 1.500},
                           {6.100, 2.600, 5.600, 1.400},
                           {7.700, 3.000, 6.100, 2.300},
                           {6.300, 3.400, 5.600, 2.400},
                           {6.400, 3.100, 5.500, 1.800},
                           {6.000, 3.000, 4.800, 1.800},
                           {6.900, 3.100, 5.400, 2.100},
                           {6.700, 3.100, 5.600, 2.400},
                           {6.900, 3.100, 5.100, 2.300},
                           {5.800, 2.700, 5.100, 1.900},
                           {6.800, 3.200, 5.900, 2.300},
                           {6.700, 3.300, 5.700, 2.500},
                           {6.700, 3.000, 5.200, 2.300},
                           {6.300, 2.500, 5.000, 1.900},
                           {6.500, 3.000, 5.200, 2.000},
                           {6.200, 3.400, 5.400, 2.300},
                           {5.900, 3.000, 5.100, 1.800}};
        
        
        double[,] cs = {{5.100, 3.500, 1.400, 0.200}, 
                            {7.000, 3.200, 4.700, 1.400}, 
                            {6.300, 3.300, 6.000, 2.500}};
        
        
        ClusterKMeans kmean = new ClusterKMeans(x, cs);
        
        double[,] cm = kmean.Compute();
        double[] wss = kmean.GetClusterSSQ();
        int[] ic = kmean.GetClusterMembership();
        int[] nc = kmean.GetClusterCounts();
        
        PrintMatrix pm = new PrintMatrix("Cluster Means");
        
        PrintMatrixFormat pmf = new PrintMatrixFormat();
        pmf.NumberFormat = "0.0000";
        pm.Print(pmf, cm);
        
        new PrintMatrix("Cluster Membership").Print(ic);
        new PrintMatrix("Sum of Squares").Print(wss);
        new PrintMatrix("Number of observations").Print(nc);
    }
}

Output

           Cluster Means
     0       1       2       3     
0  5.0060  3.4280  1.4620  0.2460  
1  5.9016  2.7484  4.3935  1.4339  
2  6.8500  3.0737  5.7421  2.0711  

Cluster Membership
     0  
  0  1  
  1  1  
  2  1  
  3  1  
  4  1  
  5  1  
  6  1  
  7  1  
  8  1  
  9  1  
 10  1  
 11  1  
 12  1  
 13  1  
 14  1  
 15  1  
 16  1  
 17  1  
 18  1  
 19  1  
 20  1  
 21  1  
 22  1  
 23  1  
 24  1  
 25  1  
 26  1  
 27  1  
 28  1  
 29  1  
 30  1  
 31  1  
 32  1  
 33  1  
 34  1  
 35  1  
 36  1  
 37  1  
 38  1  
 39  1  
 40  1  
 41  1  
 42  1  
 43  1  
 44  1  
 45  1  
 46  1  
 47  1  
 48  1  
 49  1  
 50  2  
 51  2  
 52  3  
 53  2  
 54  2  
 55  2  
 56  2  
 57  2  
 58  2  
 59  2  
 60  2  
 61  2  
 62  2  
 63  2  
 64  2  
 65  2  
 66  2  
 67  2  
 68  2  
 69  2  
 70  2  
 71  2  
 72  2  
 73  2  
 74  2  
 75  2  
 76  2  
 77  3  
 78  2  
 79  2  
 80  2  
 81  2  
 82  2  
 83  2  
 84  2  
 85  2  
 86  2  
 87  2  
 88  2  
 89  2  
 90  2  
 91  2  
 92  2  
 93  2  
 94  2  
 95  2  
 96  2  
 97  2  
 98  2  
 99  2  
100  3  
101  2  
102  3  
103  3  
104  3  
105  3  
106  2  
107  3  
108  3  
109  3  
110  3  
111  3  
112  3  
113  2  
114  2  
115  3  
116  3  
117  3  
118  3  
119  2  
120  3  
121  2  
122  3  
123  2  
124  3  
125  3  
126  2  
127  2  
128  3  
129  3  
130  3  
131  3  
132  3  
133  2  
134  3  
135  3  
136  3  
137  3  
138  2  
139  3  
140  3  
141  3  
142  2  
143  3  
144  3  
145  3  
146  2  
147  3  
148  3  
149  2  

   Sum of Squares
          0          
0  15.151            
1  39.8209677419355  
2  23.8794736842105  

Number of observations
   0   
0  50  
1  62  
2  38  


Link to C# source.