The data in this example was artificially generated using an autoregressive time series with a lag of 1, i.e., AR(1). The constant term in the model was set to zero and -0.7 was used for the autoregressive coefficient. The data were generated from a random gaussian distribution with a mean of zero and an innovation variance of 0.51. This series is stationary with Var(Y) = 1.0.
Two hundred values were generated. For this example, six values at times t=130, t=140, t=141, t=160, t=175, and t=176 are removed and designated as missing. ARMAEstimateMissing
is used to estimate these missing values using each of its estimation methods. The missing value estimates are compared to the actual values generated in the full series.
As expected, the AR(1) method produced the best missing value estimates in this example, closely followed by the AR(p) method.
import java.text.*; import com.imsl.stat.*; import com.imsl.math.*; import com.imsl.WarningObject; import com.imsl.Warning; public class ARMAEstimateMissingEx1 { public static void main(String args[]) throws Exception { int i, j, k, ntemp, ntimes; int maxlag = 20; int n_obs=194, n_miss=6; int[] missing_index = null; int[] tpointsMiss = null; int[] tpointsComplete = null; double missVar = 0; double dif = 0; double sum = 0; double variance = 0; double[] y = null; double[] yMiss = null; double[] yComplete={ 1.30540,-1.37166,1.47905,-0.91059,1.36191,-2.16966,3.11254, -1.99536,2.29740,-1.82474,-0.25445,0.33519,-0.25480,-0.50574, -0.21429,-0.45932,-0.63813,0.25646,-0.46243,-0.44104,0.42733, 0.61102,-0.82417,1.48537,-1.57733,-0.09846,0.46311,0.49156, -1.66090,2.02808,-1.45768,1.36115,-0.65973,1.13332,-0.86285, 1.23848,-0.57301,-0.28210,0.20195,0.06981,0.28454,0.19745, -0.16490,-1.05019,0.78652,-0.40447,0.71514,-0.90003,1.83604, -2.51205,1.00526,-1.01683,1.70691,-1.86564,1.84912,-1.33120, 2.35105,-0.45579,-0.57773,-0.55226,0.88371,0.23138,0.59984, 0.31971,0.59849,0.41873,-0.46955,0.53003,-1.17203,1.52937, -0.48017,-0.93830,1.00651,-1.41493,-0.42188,-0.67010,0.58079, -0.96193,0.22763,-0.92214,1.35697,-1.47008,2.47841,-1.50522, 0.41650,-0.21669,-0.90297,0.00274,-1.04863,0.66192,-0.39143, 0.40779,-0.68174,-0.04700,-0.84469,0.30735,-0.68412,0.25888, -1.08642,0.52928,0.72168,-0.18199,-0.09499,0.67610,0.14636, 0.46846,-0.13989,0.50856,-0.22268,0.92756,0.73069,0.78998, -1.01650,1.25637,-2.36179,1.99616,-1.54326,1.38220,0.19674, -0.85241,0.40463,0.39523,-0.60721,0.25041,-1.24967,0.26727, 1.40042,-0.66963,1.26049,-0.92074,0.05909,-0.61926,1.41550, 0.25537,-0.13240,-0.07543,0.10413,1.42445,-1.37379,0.44382, -1.57210,2.04702,-2.22450,1.27698,0.01073,-0.88459,0.88194, -0.25019,0.70224,-0.41855,0.93850,0.36007,-0.46043,0.18645, 0.06337,0.29414,-0.20054,0.83078,-1.62530,2.64925,-1.25355, 1.59094,-1.00684,1.03196,-1.58045,2.04295,-2.38264,1.65095, -0.33273,-1.29092,0.14020,-0.11434,0.04392,0.05293,-0.42277, 0.59143,-0.03347,-0.58457,0.87030,0.19985,-0.73500,0.73640, 0.29531,0.22325,-0.60035,1.42253,-1.11278,1.30468,-0.41923, -0.38019,0.50937,0.23051,0.46496,0.02459,-0.68478,0.25821, 1.17655,-2.26629,1.41173,-0.68331 }; ARMAEstimateMissing estMiss = null; ARAutoUnivariate arAuto = null; String title=" "; String[] colLabels = {"TIME", "ACTUAL", "PREDICTED", "DIFFERENCE"}; PrintMatrixFormat pmf = new PrintMatrixFormat(); PrintMatrix pm = new PrintMatrix(); NumberFormat nf = NumberFormat.getNumberInstance(); nf.setMinimumFractionDigits(3); nf.setMaximumFractionDigits(3); pmf.setNumberFormat(nf); pmf.setColumnLabels(colLabels); pmf.setFirstRowNumber(1); pm.setColumnSpacing(3); /* setup missing data arrays */ tpointsComplete = new int[200]; tpointsMiss = new int[194]; yMiss = new double[194]; for(i=1; i<=200; i++) tpointsComplete[i-1] = i; tpointsMiss[0] = tpointsComplete[0]; yMiss[0] = yComplete[0]; k = 0; for (i=1; i<200;i++) { /* Generate series with missing values */ if ( i!=129 && i!= 139 && i!=140 && i!=159 && i!=174 && i!=175 ) { k += 1; tpointsMiss[k] = tpointsComplete[i]; yMiss[k] = yComplete[i]; } } n_obs = k + 1; for (j=0;j<=3;j++) { estMiss = new ARMAEstimateMissing(tpointsMiss, yMiss); switch (j){ case ARMAEstimateMissing.MEDIAN: estMiss.setMissingValueMethod(ARMAEstimateMissing.MEDIAN); title = "MEDIAN ESTIMATES"; break; case ARMAEstimateMissing.CUBIC_SPLINE: estMiss.setMissingValueMethod( ARMAEstimateMissing.CUBIC_SPLINE); title = "CUBIC SPLINE ESTIMATES"; break; case ARMAEstimateMissing.AR_1: estMiss.setMissingValueMethod(ARMAEstimateMissing.AR_1); title = "AR(1) ESTIMATES"; break; case ARMAEstimateMissing.AR_P: estMiss.setMaxlag(maxlag); estMiss.setMissingValueMethod(ARMAEstimateMissing.AR_P); estMiss.setEstimationMethod( ARMAEstimateMissing.METHOD_OF_MOMENTS); title = "AR(P) ESTIMATES"; break; } WarningObject currentWarningLevel = Warning.getWarning(); /* For some data it is useful to turn off warnings produced by * the ARMA estimation process. This is only necessary for * the AR_1 and AR_P estimation methods */ Warning.setWarning(null); // turn off warnings y = estMiss.getCompleteTimeSeries(); Warning.setWarning(currentWarningLevel); // turn on warnings missing_index = estMiss.getMissingTimes(); n_miss = y.length - yMiss.length; double[][] printOutput = new double[n_miss][4]; sum = 0; for (i = 0; i < n_miss; i++) { k = missing_index[i]; printOutput[i][0] = tpointsComplete[k]; printOutput[i][1] = yComplete[k]; printOutput[i][2] = y[k]; printOutput[i][3] = Math.abs(yComplete[k] - y[k]); sum += Math.pow(printOutput[i][3],2); } arAuto = new ARAutoUnivariate(maxlag, y); arAuto.compute(); variance = arAuto.getInnovationVariance(); pm.setTitle(title); pm.print(pmf, printOutput); missVar = sum/n_miss; System.out.println( "Innovation Variance Analysis - Estimate (percent of actual)"); System.out.println(" Missing Values(only): "+missVar + " ("+Math.round(100.0*missVar /0.51)+"%)"); System.out.println(" Entire Series: "+variance+ " ("+Math.round(100.0*variance/0.51)+"%)"); System.out.println( "***********************************************************"); System.out.println(""); } } }
MEDIAN ESTIMATES TIME ACTUAL PREDICTED DIFFERENCE 1 130.000 -0.921 0.261 1.182 2 140.000 0.444 0.057 0.386 3 141.000 -1.572 0.057 1.630 4 160.000 2.649 0.047 2.602 5 175.000 -0.423 0.048 0.471 6 176.000 0.591 0.048 0.543 Innovation Variance Analysis - Estimate (percent of actual) Missing Values(only): 1.9152593761916663 (376%) Entire Series: 0.5350398415037567 (105%) *********************************************************** CUBIC SPLINE ESTIMATES TIME ACTUAL PREDICTED DIFFERENCE 1 130.000 -0.921 1.541 2.462 2 140.000 0.444 -0.407 0.851 3 141.000 -1.572 2.497 4.069 4 160.000 2.649 -2.947 5.596 5 175.000 -0.423 0.251 0.673 6 176.000 0.591 0.380 0.211 Innovation Variance Analysis - Estimate (percent of actual) Missing Values(only): 9.193464593399945 (1803%) Entire Series: 0.7591379904132599 (149%) *********************************************************** AR(1) ESTIMATES TIME ACTUAL PREDICTED DIFFERENCE 1 130.000 -0.921 -0.930 0.009 2 140.000 0.444 1.028 0.584 3 141.000 -1.572 -0.745 0.827 4 160.000 2.649 1.229 1.420 5 175.000 -0.423 0.010 0.433 6 176.000 0.591 0.037 0.555 Innovation Variance Analysis - Estimate (percent of actual) Missing Values(only): 0.5897529211558102 (116%) Entire Series: 0.5013106660252865 (98%) *********************************************************** AR(P) ESTIMATES TIME ACTUAL PREDICTED DIFFERENCE 1 130.000 -0.921 -0.889 0.032 2 140.000 0.444 1.009 0.565 3 141.000 -1.572 -0.688 0.884 4 160.000 2.649 1.210 1.439 5 175.000 -0.423 -0.002 0.421 6 176.000 0.591 0.038 0.553 Innovation Variance Analysis - Estimate (percent of actual) Missing Values(only): 0.6091513658345841 (119%) Entire Series: 0.5017903704689042 (98%) ***********************************************************Link to Java source.