├── 9781484233146.png
├── AN_COHERENCE.TXT
├── AN_CVARS.TXT
├── AN_EIGEN.TXT
├── AN_FACTOR.TXT
├── AN_ROTATE.TXT
├── BILINEAR.CPP
├── BRENTMIN.CPP
├── Contributing.md
├── DATAMINE_Manual.pdf
├── DENSITY_PLOTS.TXT
├── DataMine.exe
├── EVEC_RS.CPP
├── FREL.TXT
├── GLOB_MIN.CPP
├── HORNS_METHOD.TXT
├── INTEGRAT.CPP
├── INVERT.CPP
├── LICENSE.txt
├── MI_BIN.CPP
├── MUTINF_B.CPP
├── MUTINF_C.CPP
├── MUTINF_D.CPP
├── PART.CPP
├── PARZDENS.CPP
├── POWELL.CPP
├── QSORTD.CPP
├── RANDOM.CPP
├── README.md
├── SCREEN_BIVAR.CPP
├── SCREEN_RR.CPP
├── SCREEN_UNIVAR.CPP
├── SPEARMAN.CPP
├── SPLINE.CPP
├── STATS.CPP
├── SVDCMP.CPP
├── TEST_CON.CPP
├── TEST_DIS.CPP
├── TRANS_ENT.CPP
└── errata.md


/9781484233146.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/data-mining-algorithms-cpp/bf18dda7f2361534423c56076fd60999b1becd86/9781484233146.png


--------------------------------------------------------------------------------
/AN_COHERENCE.TXT:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  AN_COHERENCE - Coherence analysis and plot of values                      */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | class AnalyzeCoherenceChild {
  8 | 
  9 | public:
 10 |    AnalyzeCoherenceChild ( int npreds , int *preds , int n_dim , int nonpar ) ;
 11 |    ~AnalyzeCoherenceChild () ;
 12 | 
 13 |    int lookback ;
 14 |    int npred ;
 15 |    int preds[MAX_VARS] ;
 16 |    int nonpar ;
 17 |    int n ;        // Length of displayed series of coherences
 18 |    double *val ;  // Coherences for display
 19 |    // Work areas
 20 |    double *mean ;
 21 |    double *covar ;
 22 | } ;
 23 | 
 24 | 
 25 | /*
 26 | --------------------------------------------------------------------------------
 27 | 
 28 |    Constructor and Destructor
 29 | 
 30 | --------------------------------------------------------------------------------
 31 | */
 32 | 
 33 | AnalyzeCoherenceChild::AnalyzeCoherenceChild ( int np , int *p , int lb , int nonp )
 34 | {
 35 |    int icase, i, j, k ;
 36 |    double *dptr, *means, *evals, *evects, *workv, minval, maxval, meanval ;
 37 |    double sum, total, diff, diff2, *nonpar_work, factor ;
 38 |    char msg[512], line[1024] ;
 39 |    FILE *fp ;
 40 | 
 41 |    MEMTEXT ( "AN_COHERENCE AnalyzeCoherenceChild constructor" ) ;
 42 |    npred = np ;
 43 |    lookback = lb ;
 44 |    nonpar = nonp ;
 45 |    for (i=0 ; i<np ; i++)
 46 |       preds[i] = p[i] ;
 47 | 
 48 |    audit ( "" ) ;
 49 |    audit ( "" ) ;
 50 |    audit ( "******************************************" ) ;
 51 |    audit ( "*  Beginning coherence analysis...       *" ) ;
 52 |    if (nonpar)
 53 |       audit ( "*     Nonparametric correlation          *" ) ;
 54 |    else
 55 |       audit ( "*     Ordinary correlation               *" ) ;
 56 |    audit ( "******************************************" ) ;
 57 | 
 58 |    audit ( "" ) ;
 59 |    sprintf_s ( msg, "The following variables are tested with a lookback of %d:", lookback ) ;
 60 |    audit ( msg ) ;
 61 |    for ( i=0 ; i<npred ; i++) {
 62 |       sprintf_s ( msg , "   %s", var_names[preds[i]] ) ;
 63 |       audit ( msg ) ;
 64 |       }
 65 | 
 66 | 
 67 | /*
 68 |    Allocate memory
 69 | */
 70 | 
 71 |    MEMTEXT ( "AN_COHERENCE: val, means, covar, evals, evects, workv" ) ;
 72 |    val = (double *) malloc ( (n_cases-lookback+1) * sizeof(double) ) ;
 73 |    means = (double *) malloc ( npred * sizeof(double) ) ;
 74 |    covar = (double *) malloc ( npred * npred * sizeof(double) ) ;
 75 |    evals = (double *) malloc ( npred * sizeof(double) ) ;
 76 |    evects = (double *) malloc ( npred * npred * sizeof(double) ) ;
 77 |    workv = (double *) malloc ( npred * sizeof(double) ) ;
 78 |    if (nonpar) {
 79 |       MEMTEXT ( "AN_COHERENCE: nonpar_work" ) ;
 80 |       nonpar_work = (double *) malloc ( 2 * lookback * sizeof(double) ) ;  // Used for nonpar corr
 81 |       }
 82 |    else
 83 |       nonpar_work = NULL ;
 84 | 
 85 | 
 86 | /*
 87 |    Main outer loop does all cases
 88 | */
 89 | 
 90 |    minval = 1.e30 ;
 91 |    maxval = -1.e30 ;
 92 |    meanval = 0.0 ;
 93 | 
 94 |    for (icase=lookback-1 ; icase<n_cases ; icase++) {
 95 | 
 96 |       // If nonparametric, compute correlation matrix
 97 | 
 98 |       if (nonpar) {
 99 |          covar[0] = 1.0 ;
100 |          for (i=1 ; i<npred ; i++) {
101 |             for (j=0 ; j<i ; j++) {
102 |                for (k=0 ; k<lookback ; k++) {
103 |                   dptr = database + n_vars * (icase - k) ;  // Point to this case in database
104 |                   nonpar_work[k] = dptr[preds[i]] ;
105 |                   nonpar_work[lookback+k] = dptr[preds[j]] ;
106 |                   }
107 |                covar[i*npred+j] = spearman ( lookback , nonpar_work , nonpar_work+lookback , nonpar_work , nonpar_work+lookback ) ;
108 |                }
109 |             covar[i*npred+i] = 1.0 ;
110 |             }
111 |          }
112 | 
113 |       // Else not nonparametric, so compute means and covariances, then correlation
114 | 
115 |       else {
116 |          for (i=0 ; i<npred ; i++)
117 |             means[i] = 0.0 ;
118 | 
119 |          for (i=0 ; i<lookback ; i++) {
120 |             dptr = database + n_vars * (icase - i) ;  // Point to this case in database
121 |             for (j=0 ; j<npred ; j++)
122 |                means[j] += dptr[preds[j]] ;
123 |             }
124 | 
125 |          for (j=0 ; j<npred ; j++)
126 |             means[j] /= lookback ;
127 | 
128 |          for (i=0 ; i<npred ; i++) {
129 |             for (j=0 ; j<=i ; j++)
130 |                covar[i*npred+j] = 0.0 ;
131 |             }
132 | 
133 |          for (i=0 ; i<lookback ; i++) {
134 |             dptr = database + n_vars * (icase - i) ;  // Point to this case in database
135 |             for (j=0 ; j<npred ; j++) {
136 |                diff = dptr[preds[j]] - means[j] ;
137 |                for (k=0 ; k<=j ; k++) {
138 |                   diff2 = dptr[preds[k]] - means[k] ;
139 |                   covar[j*npred+k] += diff * diff2 ;
140 |                   }
141 |                }
142 |             }
143 | 
144 |          for (j=0 ; j<npred ; j++) {
145 |             for (k=0 ; k<=j ; k++)
146 |                covar[j*npred+k] /= lookback ;
147 |             }
148 | 
149 |          // Convert covariances to correlation
150 |          for (j=1 ; j<npred ; j++) {
151 |             for (k=0 ; k<j ; k++)
152 |                covar[j*npred+k] /= sqrt ( covar[j*npred+j] * covar[k*npred+k] ) ;
153 |             }
154 | 
155 |          for (j=0 ; j<npred ; j++)
156 |             covar[j*npred+j] = 1.0 ;
157 |             
158 |          } // Else not nonpar, so compute means and covar, correlation
159 | 
160 | /*
161 |    Compute eigenvalues and fill in 'val' which we will display
162 | */
163 | 
164 |       evec_rs ( covar , npred , 0 , evects , evals , workv ) ;
165 | 
166 |       factor = 0.5 * (npred - 1) ;
167 |       sum = total = 0.0 ;
168 |       for (i=0 ; i<npred ; i++) {
169 |          total += evals[i] ;
170 |          sum += (factor - i) * evals[i] / factor ;
171 |          }
172 | 
173 |       // Compute the criterion
174 |       sum /= total ;
175 |       val[icase-lookback+1] = sum ;
176 | 
177 |       if (val[icase-lookback+1] > maxval)
178 |          maxval = val[icase-lookback+1] ;
179 |       if (val[icase-lookback+1] < minval)
180 |          minval = val[icase-lookback+1] ;
181 |       meanval += val[icase-lookback+1] ;
182 | 
183 |       } // For all cases
184 | 
185 |    meanval /= n_cases - lookback + 1 ;
186 | 
187 | 
188 | /*
189 |    Print summary
190 | */
191 | 
192 |    audit ( "" ) ;
193 |    sprintf_s ( msg, "Mean coherence = %.5lf", meanval ) ;
194 |    audit ( msg ) ;
195 |    sprintf_s ( msg, "Min = %.5lf", minval ) ;
196 |    audit ( msg ) ;
197 |    sprintf_s ( msg, "Max = %.5lf", maxval ) ;
198 |    audit ( msg ) ;
199 |    audit ( "" ) ;
200 |    sprintf_s ( msg, "Coherence values have been written to %s", coherence_log ) ;
201 |    audit ( msg ) ;
202 | 
203 |    MEMTEXT ( "AN_COHERENCE: free means, covar, evals, evects, workv (,nonpar_work)" ) ;
204 |    free ( means ) ;
205 |    free ( covar ) ;
206 |    free ( evals ) ;
207 |    free ( evects ) ;
208 |    free ( workv ) ;
209 |    if (nonpar_work != NULL)
210 |       free ( nonpar_work ) ;
211 | }
212 | 
213 | AnalyzeCoherenceChild::~AnalyzeCoherenceChild ()
214 | {
215 |    MEMTEXT ( "AN_COHERENCE.CPP AnalyzeCoherenceChild destructor" ) ;
216 |    if (val != NULL)
217 |       free ( val ) ;
218 | }
219 | 


--------------------------------------------------------------------------------
/AN_CVARS.TXT:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  AN_CVARS - AnalyzeClusterVars operations                                  */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | int an_cvars (
  8 |    int n_dim ,          // Number of initial dimensions to consider
  9 |    int ngrp_to_print ,  // Start printing when n of groups drops this low
 10 |    int type             // Centroid versus leader method
 11 |    )
 12 | {
 13 |    int i, j, nvars, icand1, icand2, ibest1, ibest2, n_groups ;
 14 |    int *group_id, *n_in_group ;
 15 |    double x, dotprod, length, best_dotprod, *centroids ;
 16 |    char msg[256], msg2[256] ;
 17 | 
 18 |    n_groups = npred ;   // Number of groups; initially, every variable is its own group
 19 |    nvars = npred ;      // This name just makes things more clear; no other reason
 20 | 
 21 | /*
 22 |    Allocate memory
 23 | */
 24 | 
 25 |    group_id = (int *) malloc ( nvars * sizeof(int) ) ;
 26 |    n_in_group = (int *) malloc ( nvars * sizeof(int) ) ;
 27 |    centroids = (double *) malloc ( nvars * n_dim * sizeof(double) ) ;
 28 | 
 29 | /*
 30 |    Initialize; For each variable, make the length of the vector one
 31 | */
 32 | 
 33 |    for (i=0 ; i<nvars ; i++) {
 34 |       group_id[i] = i ;          // For each variable, this is the group to which it belongs
 35 |       n_in_group[i] = 1 ;        // Size of each group
 36 |       length = 0.0 ;             // Will cumulate length of this variable's vector
 37 |       for (j=0 ; j<n_dim ; j++)
 38 |          length += structure[i*nvars+j] * structure[i*nvars+j] ;
 39 |       length = 1.0 / sqrt ( length ) ;
 40 |       for (j=0 ; j<n_dim ; j++)  // Normalize the length of this variable's vector
 41 |          centroids[i*n_dim+j] = length * structure[i*nvars+j] ;
 42 |       }
 43 | 
 44 | /*
 45 |    Print normalized factors
 46 | */
 47 | 
 48 |    audit ( "" ) ;
 49 |    audit ( "" ) ;
 50 |    audit ( "Relevant factors, after normalization" ) ;
 51 |    audit ( "" ) ;
 52 | 
 53 |    for (i=0 ; i<nvars ; i++) {
 54 |       sprintf_s ( msg, "%15s %8.4lf", var_names[preds[i]], centroids[i*n_dim+0] ) ;
 55 |       for (j=1 ; j<n_dim ; j++) {
 56 |          sprintf_s ( msg2 , "%8.4lf", centroids[i*n_dim+j] ) ;
 57 |          strcat_s ( msg , msg2 ) ;
 58 |          }
 59 |       audit ( msg ) ;
 60 |       }
 61 |    audit ( "" ) ;
 62 | 
 63 | /*
 64 |    Hierarchical grouping
 65 | */
 66 | 
 67 |    while (n_groups > 1) {
 68 |       best_dotprod = -1.0 ;
 69 | 
 70 |       // Try every pair of groups (icand1 and icand2)
 71 |       for (icand1=0 ; icand1<n_groups-1 ; icand1++) {
 72 |          for (icand2=icand1+1 ; icand2<n_groups ; icand2++) {
 73 | 
 74 |             // The distance measure is based on the angle between two variables
 75 |             // Because the two vectors are unit length, their dot product is the cosine of the angle between them.
 76 |             // The negative of a variable is equivalent to the variable, so check both via symmetry
 77 |             dotprod = 0.0 ;
 78 |             for (i=0 ; i<n_dim ; i++)
 79 |                dotprod += centroids[icand1*n_dim+i] * centroids[icand2*n_dim+i] ;
 80 |             dotprod = fabs ( dotprod ) ;   // Handle symmetry
 81 | 
 82 |             if (dotprod > best_dotprod) {  // Keep track of the pair with best criterion
 83 |                best_dotprod = dotprod ;
 84 |                ibest1 = icand1 ; 
 85 |                ibest2 = icand2 ;
 86 |                }
 87 | 
 88 |             } // For icand2
 89 |          } // For icand1
 90 | 
 91 |       // We just found the closest pair.  Merge larger index into smaller.
 92 | 
 93 |       if (best_dotprod > 1.0)   // Should never happen, but handle tiny fpt errors
 94 |          best_dotprod = 1.0 ;
 95 | 
 96 |       sprintf_s ( msg , "Merged groups %d and %d separated by %.2lf degrees; now have %d groups",
 97 |                   ibest1+1, ibest2+1, acos(best_dotprod)*180.0/PI, n_groups-1 ) ;
 98 |       audit ( msg ) ;
 99 | 
100 |       if (type) {  // Did the user request centroid method?
101 |          // Recompute the (approximate) centroid of the absorbing (smaller id) group
102 |          length = 0.0 ;
103 |          for (j=0 ; j<n_dim ; j++) {
104 |             x = (n_in_group[ibest1] * centroids[ibest1*n_dim+j] +
105 |                  n_in_group[ibest2] * centroids[ibest2*n_dim+j]) /
106 |                 (n_in_group[ibest1] + n_in_group[ibest2]) ;
107 |             centroids[ibest1*n_dim+j] = x ;
108 |             length += x * x ;
109 |             }
110 |          length = 1.0 / sqrt ( length ) ;
111 |          for (j=0 ; j<n_dim ; j++)
112 |             centroids[ibest1*n_dim+j] *= length ;  // The length must always be one
113 |          } // If type is centroid (not leader)
114 | 
115 |       n_in_group[ibest1] += n_in_group[ibest2] ;  // Group 1 just absorbed group 2
116 | 
117 |       // Remap the largest and then pull down all groups above largest.
118 |       for (i=0 ; i<nvars ; i++) {
119 |          if (group_id[i] == ibest2)  // If this variable was in Group 2
120 |             group_id[i] = ibest1 ;   // Reclassify it as being in Group 1, the absorbing group
121 |          if (group_id[i] > ibest2)   // Groups above absorbed group
122 |             --group_id[i] ;          // Now have to fill in the hole below them
123 |          }
124 | 
125 |       for (i=ibest2+1 ; i<n_groups ; i++) {  // Crunch down group stuff above the absorbed group
126 |          n_in_group[i-1] = n_in_group[i] ;
127 |          for (j=0 ; j<n_dim ; j++)
128 |             centroids[(i-1)*n_dim+j] = centroids[i*n_dim+j] ;
129 |          }
130 | 
131 |       --n_groups ;    // We just lost a group (ibest2 was absorbed into ibest1)
132 | 
133 | /*
134 |    Print group membership info
135 | */
136 | 
137 |       if (n_groups <= ngrp_to_print  &&  n_groups > 1) {
138 |          audit ( "Group membership..." ) ;
139 |          for (i=0 ; i<n_groups ; i++) {
140 |             sprintf_s ( msg , "   Group %d", i+1 ) ;
141 |             audit ( msg ) ;
142 |             for (j=0 ; j<nvars ; j++) {
143 |                if (group_id[j] == i) {
144 |                   sprintf_s ( msg , "      %s", var_names[preds[j]] ) ;
145 |                   audit ( msg ) ;
146 |                   }
147 |                }
148 |             }
149 |          }
150 | 
151 |       } // while (n_groups > 1)
152 | 
153 | FINISH:
154 |    free ( group_id ) ;
155 |    free ( n_in_group ) ;
156 |    free ( centroids ) ;
157 | 
158 |    return 0 ;
159 | }


--------------------------------------------------------------------------------
/AN_EIGEN.TXT:
--------------------------------------------------------------------------------
  1 | class AnalyzeEigenChild {
  2 | 
  3 | public:
  4 |    AnalyzeEigenChild ( int npreds , int *preds , int nonpar ) ;
  5 |    ~AnalyzeEigenChild () ;
  6 | 
  7 |    int npred ;
  8 |    int preds[MAX_VARS] ;
  9 |    int nonpar ;
 10 |    int n ;        // Size of matrix (number of eigenvalues)
 11 |    double *val ;  // Eigenvalues for display
 12 | } ;
 13 | 
 14 | 
 15 | /*
 16 |    Allocate memory
 17 | */
 18 | 
 19 |    cumulative = (double *) malloc ( npred * sizeof(double) ) ;
 20 |    covar = (double *) malloc ( npred * npred * sizeof(double) ) ;
 21 |    evals = (double *) malloc ( npred * sizeof(double) ) ;
 22 |    structure = (double *) malloc ( npred * npred * sizeof(double) ) ;
 23 |    means = (double *) malloc ( npred * sizeof(double) ) ;
 24 |    stddev = (double *) malloc ( npred * sizeof(double) ) ;
 25 | 
 26 | /*
 27 |    Compute means (means) and standard deviations (stddev)
 28 | */
 29 | 
 30 |    for (i=0 ; i<npred ; i++)
 31 |       means[i] = stddev[i] = 1.e-60 ;
 32 | 
 33 |    for (i=0 ; i<n_cases ; i++) {
 34 |       for (j=0 ; j<npred ; j++)
 35 |          means[j] += database[i*n_vars+preds[j]] ;
 36 |       }
 37 | 
 38 |    for (j=0 ; j<npred ; j++)
 39 |       means[j] /= n_cases ;
 40 | 
 41 |    for (i=0 ; i<n_cases ; i++) {
 42 |       for (j=0 ; j<npred ; j++) {
 43 |          diff = database[i*n_vars+preds[j]] - means[j] ;
 44 |          stddev[j] += diff * diff ;
 45 |          }
 46 |       }
 47 | 
 48 |    for (j=0 ; j<npred ; j++)
 49 |       stddev[j] = sqrt ( stddev[j] / n_cases ) ;
 50 | 
 51 | /*
 52 |    Compute correlation matrix 'covar'
 53 | */
 54 | 
 55 |    for (i=1 ; i<npred ; i++) {
 56 |       for (j=0 ; j<i ; j++)
 57 |          covar[i*npred+j] = 0.0 ;
 58 |       }
 59 | 
 60 |    for (i=0 ; i<n_cases ; i++) {
 61 |       for (j=1 ; j<npred ; j++) {
 62 |          diff = (database[i*n_vars+preds[j]] - means[j]) / stddev[j] ;
 63 |          for (k=0 ; k<j ; k++) {
 64 |             diff2 = (database[i*n_vars+preds[k]] - means[k]) / stddev[k] ;
 65 |             covar[j*npred+k] += diff * diff2 ;
 66 |             }
 67 |          }
 68 |       }
 69 | 
 70 |    for (j=1 ; j<npred ; j++) {
 71 |       for (k=0 ; k<j ; k++)
 72 |          covar[j*npred+k] /= n_cases ;
 73 |       }
 74 | 
 75 |    for (j=0 ; j<npred ; j++) {
 76 |       covar[j*npred+j] = 1.0 ;
 77 |       for (k=j+1 ; k<npred ; k++)
 78 |          covar[j*npred+k] = covar[k*npred+j] ;  // May be needed for some evect routines
 79 |       }                                         // But not evec_rs(), so may be omitted!
 80 | 
 81 | /*
 82 |    Compute eigenvalues/vectors
 83 | */
 84 | 
 85 |    evec_rs ( covar , npred , 1 , structure , evals , means ) ;
 86 | 
 87 |    sum = 0.0 ;
 88 |    for (i=0 ; i<npred ; i++) {   // We display cumulative eigenvalues
 89 |       if (evals[i] < 0.0)  // Happens only from tiny fpt errors
 90 |          evals[i] = 0.0 ;
 91 |       sum += evals[i] ;
 92 |       cumulative[i] = sum ;
 93 |       }
 94 | 
 95 |    for (i=0 ; i<npred ; i++)     // Make it percent
 96 |       cumulative[i] = 100.0 * cumulative[i] / sum ;
 97 | 
 98 | /*
 99 |    Compute the factor structures by multiplying each eigenvector
100 |    by the square root of its eigenvalue
101 | */
102 | 
103 |    for (i=0 ; i<eigen_npred ; i++) {
104 |       for (j=0 ; j<eigen_npred ; j++) {
105 |          structure[i*npred+j] *= sqrt(evals[j]) ;
106 |          if (structure[i*npred+j] < -1.0)  // In a perfect fpt world this would never happen
107 |             structure[i*npred+j] = -1.0 ;
108 |          if (structure[i*npred+j] > 1.0)
109 |             structure[i*npred+j] = 1.0 ;
110 |          }
111 |       }
112 | 
113 |    free ( covar ) ;
114 |    free ( means ) ;
115 |    free ( stddev ) ;
116 | }
117 | 


--------------------------------------------------------------------------------
/AN_ROTATE.TXT:
--------------------------------------------------------------------------------
 1 | /*
 2 |    Compute (square root) communalities
 3 |    This assumes that structure contains all npred columns (factors)
 4 |    and we are rotating the first n_kept of them.
 5 | */
 6 | 
 7 |    for (i=0 ; i<npred ; i++) {
 8 |       sum = 0.0 ;
 9 |       for (j=0 ; j<n_kept ; j++)
10 |          sum += structure[i*npred+j] * structure[i*npred+j] ;
11 |       comm[i] = sqrt ( sum ) ;
12 |       }
13 | 
14 | /*
15 |    Prescale (divide) structures by (square root) communalities.
16 |    After rotation has converged, we'll undo this by multiplying by sqrt communalities.
17 |    The original version did not do this, which introduced slight bias.
18 | */
19 | 
20 |    for (i=0 ; i<npred ; i++) {
21 |       sum = comm[i] ;
22 |       for (j=0 ; j<n_kept ; j++)
23 |          structure[i*npred+j] /= sum ;
24 |       }
25 | 
26 | /*
27 | --------------------------------------------
28 |    Main outer loop (and its initializations)
29 | --------------------------------------------
30 | */
31 | 
32 |    for (iter=0 ; iter<100 ; iter++) {   // limit is for safety and should never come even close
33 |       converged = 1 ;                   // We'll reset this if an adjustment is made
34 |       for (first_column=0 ; first_column<n_kept-1 ; first_column++) {  // We do all pairs of columns
35 |          for (second_column=first_column+1 ; second_column<n_kept ; second_column++) {
36 |             A = B = C = D = 0.0 ;       // We will sum these down the row (all vars)
37 | 
38 |             // Sum down rows the quantities we will need to compute rotation angle
39 | 
40 |             for (ivar=0 ; ivar<npred ; ivar++) {
41 |                row_ptr = structure + ivar * npred ;  // This var's row in structure matrix
42 |                load1 = row_ptr[first_column] ;
43 |                load2 = row_ptr[second_column] ;
44 |                Uterm = load1 * load1 - load2 * load2 ;
45 |                Vterm = 2.0 * load1 * load2 ;
46 |                A += Uterm ;
47 |                B += Vterm ;
48 |                C += Uterm * Uterm - Vterm * Vterm ;
49 |                D += 2.0 * Uterm * Vterm ;
50 |                } // For ivar
51 |             numer = D - 2.0 * A * B / npred ;
52 |             denom = C - (A * A - B * B) / npred ;
53 |             phi = 0.25 * atan2 ( numer , denom ) ;
54 | 
55 |             // We have the angle.  Rotate if it is not tiny.
56 | 
57 |             if (fabs(phi) < 1.e-10)   // No point rotating this pair of columns if angle is tiny
58 |                continue ;
59 | 
60 |             sin_phi = sin ( phi ) ;
61 |             cos_phi = cos ( phi ) ;
62 | 
63 |             for (ivar=0 ; ivar<npred ; ivar++) {
64 |                row_ptr = structure + ivar * npred ;  // This var's row in structure matrix
65 |                load1 = row_ptr[first_column] ;
66 |                load2 = row_ptr[second_column] ;
67 |                row_ptr[first_column] =   cos_phi * load1  +  sin_phi * load2 ;
68 |                row_ptr[second_column] = -sin_phi * load1  +  cos_phi * load2 ;
69 |                }
70 | 
71 |             converged = 0 ;  // We just made an adjustment, so we are not converged
72 | 
73 |             } // For second column
74 |          } // For first column
75 | 
76 |       if (converged)
77 |          break ;
78 |       } // For iter (main outer loop)
79 | 
80 | /*
81 |    We prescaled the structures.  Now undo that.
82 | */
83 | 
84 |    for (i=0 ; i<npred ; i++) {
85 |       sum = comm[i] ;
86 |       for (j=0 ; j<n_kept ; j++)
87 |          structure[i*npred+j] *= sum ;
88 |       }
89 | 


--------------------------------------------------------------------------------
/BILINEAR.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  BILINEAR - Bilinear class for two-dimensional interpolation               */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | #include <assert.h>
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | #include <math.h>
 11 | #include <stdlib.h>
 12 | 
 13 | class Bilinear {
 14 | 
 15 | public:
 16 |    Bilinear ( int nxin , double *xin , int nyin , double *yin , double *zin ,
 17 |               int extra ) ;
 18 |    ~Bilinear () ;
 19 |    double evaluate ( double x , double y ) ;
 20 | 
 21 | private:
 22 |    int quadratic ;
 23 |    int nx ;
 24 |    int ny ;
 25 |    double *x ;
 26 |    double *y ;
 27 |    double *z ;
 28 | } ;
 29 | 
 30 | 
 31 | Bilinear::Bilinear ( // Uses input points (x,y,z) where z=f(x,y)
 32 |    int nxin ,        // Number of x points
 33 |    double *xin ,     // They are here, sorted ascending
 34 |    int nyin ,        // Number of y points
 35 |    double *yin ,     // They are here, sorted ascending
 36 |    double *zin ,     // Corresponding function values, y changing fastest
 37 |    int extra         // If nonzero, use 3x3 block with quadratic interpolation
 38 |    )
 39 | {
 40 | 
 41 |    quadratic = extra ;
 42 |    nx = nxin ;
 43 |    ny = nyin ;
 44 |    x = (double *) malloc ( nx * sizeof(double) ) ;
 45 |    y = (double *) malloc ( ny * sizeof(double) ) ;
 46 |    z = (double *) malloc ( nx * ny * sizeof(double) ) ;
 47 |    assert ( x != NULL ) ;
 48 |    assert ( y != NULL ) ;
 49 |    assert ( z != NULL ) ;
 50 | 
 51 |    memcpy ( x , xin , nx * sizeof(double) ) ;
 52 |    memcpy ( y , yin , ny * sizeof(double) ) ;
 53 |    memcpy ( z , zin , nx * ny * sizeof(double) ) ;
 54 | }
 55 | 
 56 | Bilinear::~Bilinear ()
 57 | {
 58 |    free ( x ) ;
 59 |    free ( y ) ;
 60 |    free ( z ) ;
 61 | }
 62 | 
 63 | double Bilinear::evaluate ( double xpt , double ypt )
 64 | {
 65 |    int k, kxlo, kxmid, kxhi, kylo, kymid, kyhi ;
 66 |    double t, u, val, clo, cmid, chi, zlo, zmid, zhi ;
 67 |    double dlo, dmid, dhi, lo_mid, lo_hi, mid_hi ;
 68 | 
 69 | /*
 70 |    Bound outlying inputs
 71 | */
 72 | 
 73 |    if (xpt < x[0])
 74 |       xpt = x[0] ;
 75 |    if (xpt > x[nx-1])
 76 |       xpt = x[nx-1] ;
 77 |    if (ypt < y[0])
 78 |       ypt = y[0] ;
 79 |    if (ypt > y[ny-1])
 80 |       ypt = y[ny-1] ;
 81 | 
 82 | /*
 83 |    Find the pair of x coordinates that bound the input
 84 | */
 85 | 
 86 |    kxlo = 0 ;
 87 |    kxhi = nx - 1 ;
 88 |    while (kxhi > kxlo+1) {
 89 |       k = (kxhi + kxlo) / 2 ;
 90 |       if (xpt < x[k])
 91 |          kxhi = k ;
 92 |       else
 93 |          kxlo = k ;
 94 |       }
 95 | 
 96 | /*
 97 |    Find the pair of y coordinates that bound the input
 98 | */
 99 | 
100 |    kylo = 0 ;
101 |    kyhi = ny - 1 ;
102 |    while (kyhi > kylo+1) {
103 |       k = (kyhi + kylo) / 2 ;
104 |       if (ypt < y[k])
105 |          kyhi = k ;
106 |       else
107 |          kylo = k ;
108 |       }
109 | 
110 | /*
111 |    3x3 with quadratic interpolation?
112 | */
113 | 
114 |    if (quadratic) {
115 |       // Choose which way to go for the third x point
116 |       if (kxlo == 0) {
117 |          kxmid = kxhi ;
118 |          ++kxhi ;
119 |          }
120 |       else if (kxhi == nx-1) {
121 |          kxmid = kxlo ;
122 |          --kxlo ;
123 |          }
124 |       else if (xpt-x[kxlo] < x[kxhi]-xpt) {
125 |          kxmid = kxlo ;
126 |          --kxlo ;
127 |          }
128 |       else {
129 |          kxmid = kxhi ;
130 |          ++kxhi ;
131 |          }
132 | 
133 |       // Choose which way to go for the third y point
134 |       if (kylo == 0) {
135 |          kymid = kyhi ;
136 |          ++kyhi ;
137 |          }
138 |       else if (kyhi == ny-1) {
139 |          kymid = kylo ;
140 |          --kylo ;
141 |          }
142 |       else if (ypt-y[kylo] < y[kyhi]-ypt) {
143 |          kymid = kylo ;
144 |          --kylo ;
145 |          }
146 |       else {
147 |          kymid = kyhi ;
148 |          ++kyhi ;
149 |          }
150 | 
151 |       dlo = xpt - x[kxlo] ;
152 |       dmid = xpt - x[kxmid] ;
153 |       dhi = xpt - x[kxhi] ;
154 |       lo_mid = x[kxlo] - x[kxmid] ;
155 |       lo_hi = x[kxlo] - x[kxhi] ;
156 |       mid_hi = x[kxmid] - x[kxhi] ;
157 |       clo = dmid * dhi / (lo_mid * lo_hi) ;
158 |       cmid = dlo * dhi / (-lo_mid * mid_hi) ;
159 |       chi = dlo * dmid / (lo_hi * mid_hi) ;
160 | 
161 |       zlo  = clo * z[kxlo*ny+kylo]  + cmid * z[kxmid*ny+kylo]  + chi * z[kxhi*ny+kylo] ;
162 |       zmid = clo * z[kxlo*ny+kymid] + cmid * z[kxmid*ny+kymid] + chi * z[kxhi*ny+kymid] ;
163 |       zhi  = clo * z[kxlo*ny+kyhi]  + cmid * z[kxmid*ny+kyhi]  + chi * z[kxhi*ny+kyhi] ;
164 | 
165 |       dlo = ypt - y[kylo] ;
166 |       dmid = ypt - y[kymid] ;
167 |       dhi = ypt - y[kyhi] ;
168 |       lo_mid = y[kylo] - y[kymid] ;
169 |       lo_hi = y[kylo] - y[kyhi] ;
170 |       mid_hi = y[kymid] - y[kyhi] ;
171 |       clo = dmid * dhi / (lo_mid * lo_hi) ;
172 |       cmid = dlo * dhi / (-lo_mid * mid_hi) ;
173 |       chi = dlo * dmid / (lo_hi * mid_hi) ;
174 | 
175 |       return clo * zlo + cmid * zmid + chi * zhi ;
176 |       } // If quadratic
177 | 
178 | /*
179 |    Ordinary 2x2 bilinear
180 | */
181 | 
182 |    else {
183 |       t = (xpt - x[kxlo]) / (x[kxhi] - x[kxlo]) ;
184 |       u = (ypt - y[kylo]) / (y[kyhi] - y[kylo]) ;
185 | 
186 |       val = (1.0 - t) * (1.0 - u) * z[kxlo*ny+kylo] ;
187 |       val += t * (1.0 - u) * z[kxhi*ny+kylo] ;
188 |       val += t * u * z[kxhi*ny+kyhi] ;
189 |       val += (1.0 - t) * u * z[kxlo*ny+kyhi] ;
190 |       return val ;
191 |       }
192 | }
193 | 


--------------------------------------------------------------------------------
/BRENTMIN.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  BRENTMIN - Use Brent's method to find a local minimum of a                */
  4 | /*             univariate function.                                           */
  5 | /*                                                                            */
  6 | /*  This is given three points such that the center has lesser function       */
  7 | /*  value than its neighbors.  It iteratively refines the interval.           */
  8 | /*  If the criterion function drops to critlim or smaller, execution will     */
  9 | /*  terminate.                                                                */
 10 | /*                                                                            */
 11 | /******************************************************************************/
 12 | 
 13 | #include <math.h>
 14 | 
 15 | int user_pressed_escape() ;
 16 | 
 17 | int brentmin (
 18 |    int itmax ,            // Iteration limit
 19 |    double critlim ,       // Quit if crit drops this low
 20 |    double eps ,           // Function convergence tolerance
 21 |    double tol ,           // X convergence tolerance
 22 |    int (*criter) (double , double *) , // Criterion function
 23 |    double *xa ,           // Lower X value, input and output
 24 |    double *xb ,           // Middle (best), input and output
 25 |    double *xc ,           // And upper, input and output
 26 |    double *y              // Function value at xb, input and output
 27 |    )
 28 | {
 29 |    int iter, user_quit ;
 30 |    double x0, x1, x2, y0, y1, y2, xleft, xmid, xright, movement, trial ;
 31 |    double small_step, small_dist, numer, denom, temp1, temp2 ;
 32 |    double testdist, this_x, this_y ;
 33 | 
 34 | /*
 35 |    Initialize
 36 | */
 37 | 
 38 |    x0 = x1 = x2 = *xb ;
 39 |    xleft = *xa ;
 40 |    xright = *xc ;
 41 | 
 42 |    y0 = y1 = y2 = *y ;
 43 | 
 44 | /*
 45 |   We want a golden-section search the first iteration.  Force this by setting
 46 |   movement equal to zero.
 47 | */
 48 | 
 49 |    movement = trial = 0.0 ;
 50 |    user_quit = 0 ;
 51 | 
 52 | /*
 53 |    Main loop.
 54 | */
 55 | 
 56 |    for (iter=0 ; iter<itmax ; iter++) {
 57 | 
 58 |       if (y0 < critlim)    // Done?
 59 |          break ;
 60 | 
 61 |       if ((user_quit = user_pressed_escape ()) != 0)
 62 |          break ;
 63 | 
 64 | /*
 65 |    This test is more sophisticated than it looks.  It tests the closeness
 66 |    of xright and xleft (relative to small_dist), AND makes sure that x0 is
 67 |    near the midpont of that interval.
 68 | */
 69 | 
 70 |       small_step = fabs ( x0 ) ;
 71 |       if (small_step < 1.0)
 72 |          small_step = 1.0 ;
 73 |       small_step *= tol ;
 74 |       small_dist = 2.0 * small_step ;
 75 | 
 76 |       xmid = 0.5 * (xleft + xright) ;
 77 | 
 78 |       if (fabs ( x0 - xmid )  <=  (small_dist - 0.5 * (xright - xleft)))
 79 |          break ;
 80 | 
 81 | /*
 82 |    Avoid refining function to limits of precision
 83 | */
 84 |       if ((iter >= 4)  &&  ((fabs(y2 - y0) / (fabs(y0) + 1.0)) < eps))
 85 |          break ;
 86 | 
 87 |       if (fabs ( movement )  >  small_step) {  // Try parabolic only if moving
 88 |          temp1 = (x0 - x2) * (y0 - y1) ;
 89 |          temp2 = (x0 - x1) * (y0 - y2) ;
 90 |          numer = (x0 - x1) * temp2 - (x0 - x2) * temp1 ;
 91 |          denom = 2. * (temp1 - temp2) ;
 92 |          testdist = movement ;     // Intervals must get smaller
 93 |          movement = trial ;
 94 |          if (fabs(denom) > 1.e-40)
 95 |             trial = numer / denom ; // Parabolic estimate of minimum
 96 |          else 
 97 |             trial = 1.e40 ;
 98 | 
 99 |          temp1 = trial + x0 ;
100 |          if ((2.0 * fabs ( trial ) < fabs ( testdist ))// If shrinking
101 |           && (temp1 > xleft) && (temp1 < xright)) {    // And safely in bounds
102 |             this_x = temp1 ;                           // Use parabolic estimate
103 |             if ((this_x - xleft  <  small_dist)  ||    // Cannot get too close
104 |                 (xright - this_x <  small_dist))       // to the endpoints
105 |                trial = (x0 < xmid)  ?  small_step  :  -small_step ;
106 |             }
107 |          else {  // Punt via golden section because cannot use parabolic
108 |             movement = (xmid > x0)  ?  xright - x0  :  xleft - x0 ;
109 |             trial = .3819660 * movement ;
110 |             }
111 |          }
112 |       else { // Must use golden section due to insufficient movement
113 |          movement = (xmid > x0)  ?  xright - x0  :  xleft - x0 ;
114 |          trial = .3819660 * movement ;
115 |          }
116 | 
117 |       if (fabs (trial)  >=  small_step)     // Make sure we move a good distance
118 |          this_x = x0 + trial ;
119 |       else
120 |          this_x = (trial > 0.0)  ?  x0 + small_step  :  x0 - small_step ;
121 | 
122 | /*
123 |    Evaluate the function here.
124 | */
125 | 
126 |       user_quit = criter ( this_x , &this_y ) ;
127 |       if (user_quit)
128 |          break ;
129 | 
130 | /*
131 |    Insert this new point in the correct position in the 'best' hierarchy
132 | */
133 | 
134 |       if (this_y <= y0) {    // Improvement
135 |          if (this_x < x0)
136 |             xright = x0 ;
137 |          else
138 |             xleft = x0 ;
139 |          x2 = x1 ;
140 |          x1 = x0 ;
141 |          x0 = this_x ;
142 |          y2 = y1 ;
143 |          y1 = y0 ;
144 |          y0 = this_y ;
145 |          }
146 | 
147 |       else {                  // No improvement
148 |          if (this_x >= x0)
149 |             xright = this_x ;
150 |          else
151 |             xleft = this_x ;
152 | 
153 |          if ((this_y <= y1)  ||  (x1 == x0)) {
154 |             x2 = x1 ;
155 |             x1 = this_x ;
156 |             y2 = y1 ;
157 |             y1 = this_y ;
158 |             }
159 |          else if ((this_y <= y2)  ||  (x2 == x0)  ||  (x2 == x1)) {
160 |             x2 = this_x ;
161 |             y2 = this_y ;
162 |             }
163 |          }
164 |       }
165 |  
166 |    *xa = xleft ;
167 |    *xb = x0 ;
168 |    *xc = xright ;
169 |    *y = y0 ;
170 | 
171 |    return user_quit ;
172 | }
173 | 


--------------------------------------------------------------------------------
/Contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Apress Source Code
 2 | 
 3 | Copyright for Apress source code belongs to the author(s). However, under fair use you are encouraged to fork and contribute minor corrections and updates for the benefit of the author(s) and other readers.
 4 | 
 5 | ## How to Contribute
 6 | 
 7 | 1. Make sure you have a GitHub account.
 8 | 2. Fork the repository for the relevant book.
 9 | 3. Create a new branch on which to make your change, e.g. 
10 | `git checkout -b my_code_contribution`
11 | 4. Commit your change. Include a commit message describing the correction. Please note that if your commit message is not clear, the correction will not be accepted.
12 | 5. Submit a pull request.
13 | 
14 | Thank you for your contribution!


--------------------------------------------------------------------------------
/DATAMINE_Manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/data-mining-algorithms-cpp/bf18dda7f2361534423c56076fd60999b1becd86/DATAMINE_Manual.pdf


--------------------------------------------------------------------------------
/DENSITY_PLOTS.TXT:
--------------------------------------------------------------------------------
  1 | This is a set of code fragments that illustrate computation of a density plot.
  2 | 
  3 | The following variables are especially important here
  4 |    database     n_cases (rows) by n_vars (columns) dataset containing all data
  5 |    grid         res by res displayable image which we compute
  6 |    val1         Horizontal variables, which we extract from the database
  7 |    val2         And vertical variable
  8 |    keys         Work area, needed only for histogram equalization
  9 | 
 10 | The user-specified parameters are as follows:
 11 |    varnum1      Column in the database of horizontal variable
 12 |    varnum2      Column in the database of vertical variable
 13 |    use_lowlim1  Flag: limit the lower range of the horizontal variable?
 14 |    lowlim_val1  Lower limit if specified by user
 15 |    Similarly variables for upper limits and vertical variable
 16 |    res          Vertical and horizontal resolution of the square image generated
 17 |    width        Fraction of standard deviation used for Parzen window width
 18 |    shift        Amount to shift displayed tone for better display
 19 |    spread       Amount to expand displayed tone range for better display
 20 |    type         Type of display
 21 |       TYPE_DENSITY              Actual density
 22 |       TYPE_MARGINAL             Marginal density product, shows 'no relationship' pattern
 23 |       TYPE_INCONSISTENCY        Marginal inconsistency
 24 |       TYPE_MI                   Mutual information contribution
 25 |    hist         Apply histogram normalization?
 26 |    sharpen      Sharpen display range to clarify boundary?
 27 | 
 28 | 
 29 | /*
 30 | These are memory allocations, with 'keys', 'val1', and 'val2' being work areas
 31 | */
 32 | 
 33 |    grid = (double *) malloc ( 2 * res * res * sizeof(double) ) ;
 34 |    keys = (int *) malloc ( res * res * sizeof(int) ) ;
 35 |    val1 = (double *) malloc ( n_cases * sizeof(double) ) ;
 36 |    val2 = (double *) malloc ( n_cases * sizeof(double) ) ;
 37 | 
 38 | 
 39 | /*
 40 |    Get the data from the database, with 'n_vars' being the number of columns in the database,
 41 |    and 'n_cases' being the number of rows.
 42 | */
 43 | 
 44 |    for (i=0 ; i<n_cases ; i++) {
 45 |       val1[i] = database[i*n_vars+varnum1] ;  // Horizontal variable
 46 |       val2[i] = database[i*n_vars+varnum2] ;  // Vertical variable
 47 |       }
 48 | 
 49 | 
 50 | /*
 51 |    Find the range of each variable
 52 |    Apply user's limits if specified, and disallow (pathological!) zero range.
 53 | */
 54 | 
 55 |    smallest = largest = val1[0] ;
 56 |    for (i=1 ; i<n_cases ; i++) {
 57 |       if (val1[i] < smallest)
 58 |          smallest = val1[i] ;
 59 |       if (val1[i] > largest)
 60 |          largest = val1[i] ;
 61 |       }
 62 | 
 63 |    if (use_lowlim1)
 64 |       smallest = lowlim_val1 ;
 65 | 
 66 |    if (use_highlim1)
 67 |       largest = highlim_val1 ;
 68 | 
 69 |    if (largest <= smallest) {      // Should never happen, but user may be careless
 70 |       largest = smallest + 0.1 ;
 71 |       smallest = largest - 0.2 ;
 72 |       }
 73 | 
 74 |    // Use these ranges to set up plot things, such as labels
 75 |    // This code is omitted, as it is specific to the desired interface system
 76 |    // We let xmin and xmax be the actual display range, which may equal or be outside (smallest, largest).
 77 |    // Now we do the same thing for the vertical variable
 78 | 
 79 |    smallest = largest = val2[0] ;
 80 |    for (i=1 ; i<n_cases ; i++) {
 81 |       if (val2[i] < smallest)
 82 |          smallest = val2[i] ;
 83 |       if (val2[i] > largest)
 84 |          largest = val2[i] ;
 85 |       }
 86 | 
 87 |    if (use_lowlim2)
 88 |       smallest = lowlim_val2 ;
 89 | 
 90 |    if (use_highlim2)
 91 |       largest = highlim_val2 ;
 92 | 
 93 |    if (largest <= smallest) {
 94 |       largest = smallest + 0.1 ;
 95 |       smallest = largest - 0.2 ;
 96 |       }
 97 | 
 98 | 
 99 | /*
100 |    Compute the scale factors for the Parzen windows
101 | */
102 | 
103 |    scale1 = scale2 = mean1 = mean2 = 0.0 ;
104 | 
105 |    for (i=0 ; i<n_cases ; i++) {
106 |       x = val1[i] ;
107 |       if (use_lowlim1  &&  x < lowlim_val1)
108 |          x = lowlim_val1 ;
109 |       if (use_highlim1  &&  x > highlim_val1)
110 |          x = highlim_val1 ;
111 |       mean1 += x ;
112 |       x = val2[i] ;
113 |       if (use_lowlim2  &&  x < lowlim_val2)
114 |          x = lowlim_val2 ;
115 |       if (use_highlim2  &&  x > highlim_val2)
116 |          x = highlim_val2 ;
117 |       mean2 += x ;
118 |       }
119 | 
120 |    mean1 /= n_cases ;
121 |    mean2 /= n_cases ;
122 | 
123 |    for (i=0 ; i<n_cases ; i++) {
124 |       x = val1[i] ;
125 |       if (use_lowlim1  &&  x < lowlim_val1)
126 |          x = lowlim_val1 ;
127 |       if (use_highlim1  &&  x > highlim_val1)
128 |          x = highlim_val1 ;
129 |       diff = x - mean1 ;
130 |       scale1 += diff * diff ;
131 |       x = val2[i] ;
132 |       if (use_lowlim2  &&  x < lowlim_val2)
133 |          x = lowlim_val2 ;
134 |       if (use_highlim2  &&  x > highlim_val2)
135 |          x = highlim_val2 ;
136 |       diff = x - mean2 ;
137 |       scale2 += diff * diff ;
138 |       }
139 | 
140 |    scale1 = width * sqrt ( scale1 / n_cases ) ;
141 |    scale2 = width * sqrt ( scale2 / n_cases ) ;
142 | 
143 |    if (scale1 < 1.e-30)   // Should never happen, but user may be careless
144 |       scale1 = 1.e-30 ;
145 | 
146 |    if (scale2 < 1.e-30)
147 |       scale2 = 1.e-30 ;
148 | 
149 | 
150 | /*
151 |    Compute the raw display grid, before any transformations
152 | */
153 | 
154 |    for (i=0 ; i<res*res ; i++)
155 |       grid[i] = 0.0 ;  // Avoid nan in case user aborts
156 | 
157 |    total_joint = 0.0 ;
158 | 
159 |    for (horz=0 ; horz<res ; horz++) {
160 |       x = xmin + horz * (xmax - xmin) / (res - 1) ;
161 |       for (vert=0 ; vert<res ; vert++) {
162 |          y = ymin + vert * (ymax - ymin) / (res - 1) ;
163 |          xmarg = ymarg = joint = 0.0 ;
164 |          for (i=0 ; i<n_cases ; i++) {
165 |             xdiff = (val1[i] - x) / scale1 ;
166 |             ydiff = (val2[i] - y) / scale2 ;
167 |             xmarg += exp ( -0.5 * xdiff * xdiff ) ;
168 |             ymarg += exp ( -0.5 * ydiff * ydiff ) ;
169 |             joint += exp ( -0.5 * (xdiff * xdiff + ydiff * ydiff) ) ;
170 |             }
171 |          xmarg /= n_cases * scale1 * root_two_pi ;
172 |          ymarg /= n_cases * scale2 * root_two_pi ;
173 |          joint /= n_cases * scale1 * scale2 * two_pi ;
174 |          if (xmarg < 1.e-50)
175 |             xmarg = 1.e-50 ;
176 |          if (ymarg < 1.e-50)
177 |             ymarg = 1.e-50 ;
178 |          if (joint < 1.e-100)
179 |             joint = 1.e-100 ;
180 | 
181 |          if (type == TYPE_DENSITY)
182 |             grid[vert*res+horz] = log ( joint ) ;
183 |          else if (type == TYPE_MARGINAL)
184 |             grid[vert*res+horz] = log ( xmarg ) + log ( ymarg ) ;
185 |          else {  // INCONSISTENCY or MI
186 |             numer = joint ;
187 |             if (numer < 1.e-100)
188 |                numer = 1.e-100 ;
189 |             denom = xmarg * ymarg ;
190 |             if (denom < 1.e-100)
191 |                denom = 1.e-100 ;
192 |             grid[vert*res+horz] = log ( numer ) - log ( denom ) ; // Inconsistency
193 |             if (type == TYPE_MI) {   // If user wants mutual information
194 |                total_joint += numer ;
195 |                grid[vert*res+horz] *= numer ;
196 |                }
197 |             }
198 |          }
199 |       } // For horz
200 | 
201 | 
202 |    if (type == TYPE_MI) {   // If user wants mutual information
203 |       totalMI = 0.0 ;
204 |       maxMI = -1.e100 ;
205 |       for (horz=0 ; horz<res ; horz++) {
206 |          x = xmin + horz * (xmax - xmin) / (res - 1) ;
207 |          for (vert=0 ; vert<res ; vert++) {
208 |             y = ymin + vert * (ymax - ymin) / (res - 1) ;
209 |             grid[vert*res+horz] /= total_joint ;  // Normalize (Does not impact display; just makes printed MI sensible)
210 |             totalMI += grid[vert*res+horz] ;      // Guaranteed non-negative
211 |             if (grid[vert*res+horz] > maxMI) {
212 |                maxMI = grid[vert*res+horz] ;
213 |                maxMIx = x ;
214 |                maxMIy = y ;
215 |                }
216 |             }
217 |          }
218 |       if (totalMI > 0.0)
219 |          maxMI *= res * res / totalMI ;
220 |       else
221 |          maxMI = 0.0 ;
222 |       }
223 | 
224 |    if (type == TYPE_INCONSISTENCY) {   // If user wants inconsistency
225 |       max_pos = max_neg = 1.e-20 ;
226 |       for (i=0 ; i<res*res ; i++) {
227 |         if (grid[i] > 0.0  &&  grid[i] > max_pos)
228 |            max_pos = grid[i] ;
229 |         if (grid[i] < 0.0  &&  (-grid[i]) > max_neg)
230 |            max_neg = -grid[i] ;
231 |          }
232 |       for (i=0 ; i<res*res ; i++) {
233 |          if (grid[i] > 0.0)
234 |             grid[i] /= max_pos ;
235 |          if (grid[i] < 0.0)
236 |             grid[i] /= -max_neg ;
237 |          }
238 |       }
239 | 
240 |    if (hist) {
241 | /*
242 |    Sort the grid entries so we can compute fractiles.
243 |    Recall that we allocated twice the needed space to allow for sorted array.
244 |    Then convert each grid entry into its fractile.
245 | */
246 | 
247 |       for (i=0 ; i<res*res ; i++)
248 |          keys[i] = i ;
249 | 
250 |       sorted = grid + res * res ;  // Use last half for scratch
251 |       memcpy ( sorted , grid , res * res * sizeof(double) ) ;
252 |       qsortdsi ( 0 , res * res - 1 , sorted , keys ) ;
253 |    
254 |       for (i=0 ; i<res*res ; i++)
255 |          grid[keys[i]] = (double) i / (res * res - 1.0) ;
256 |       if (sharpen) {
257 |          for (i=0 ; i<res*res ; i++)
258 |             grid[i] = grid[i] * grid[i] * grid[i] ;
259 |          }
260 |       } // Histogram equalization
261 | 
262 |    else {  // We scale by using ALMOST extremes
263 |       sorted = grid + res * res ;  // Use last half for scratch
264 |       for (i=0 ; i<res*res ; i++)
265 |          sorted[i] = grid[i] ;
266 |       qsortd ( 0 , res * res - 1 , sorted ) ;
267 |       i = (int) (0.01 * res * res) ;
268 |       smallest = sorted[i] ;
269 |       largest = sorted[res*res-i-1] ;
270 |       mult = 1.0 / (largest - smallest + 1.e-20) ;
271 |       for (i=0 ; i<res*res ; i++) {
272 |          grid[i] = mult * (grid[i] - smallest) ;
273 |          if (grid[i] > 1.0)
274 |             grid[i] = 1.0 ;
275 |          if (grid[i] < 0.0)
276 |             grid[i] = 0.0 ;
277 |          if (sharpen)
278 |             grid[i] = grid[i] * grid[i] * grid[i] ;
279 |          }
280 |       } // No histogram equalization
281 | 
282 | 
283 | 
284 | /*
285 |    Apply the user's visual transform
286 | */
287 | 
288 |    if (spread >= 0.0)
289 |       mult = spread + 1.0 ;
290 |    else
291 |       mult = 1.0 / (1.0 - spread) ;
292 | 
293 |    for (i=0 ; i<res*res ; i++) {
294 |       grid[i] += 0.01 * shift ;
295 |       if (grid[i] < 1.e-12)       // Needed for log below
296 |          grid[i] = 1.e-12 ;
297 |       if (grid[i] > 1.0 - 1.e-12) // Ditto
298 |          grid[i] = 1.0 - 1.e-12 ;
299 | 
300 |       if (grid[i] <= 0.5)
301 |          grid[i] = 0.5 * exp ( mult * log ( 2.0 * grid[i] )) ;
302 |       else 
303 |          grid[i] = 1.0 - 0.5 * exp ( mult * log ( 2.0 * (1.0 - grid[i]) )) ;
304 |       }
305 | }


--------------------------------------------------------------------------------
/DataMine.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Apress/data-mining-algorithms-cpp/bf18dda7f2361534423c56076fd60999b1becd86/DataMine.exe


--------------------------------------------------------------------------------
/EVEC_RS.CPP:
--------------------------------------------------------------------------------
  1 | /****************************************************************************/
  2 | /*                                                                          */
  3 | /*                           EVEC_RS                                        */
  4 | /*                                                                          */
  5 | /*   Compute eigenvalues and vectors of real symmetric matrix               */
  6 | /*                                                                          */
  7 | /****************************************************************************/
  8 | /*                                                                          */
  9 | 
 10 | #include <math.h>
 11 | 
 12 | /*
 13 |    The input matrix is mat_in.  It is not touched.  The upper minor triangle
 14 |    of it is ignored, and hence may be garbage.  Its column dimension is n.
 15 |    The eigenvectors are output in vect, which has column dimension n.
 16 |    The calling program may use the same matrix for mat_in and vect,
 17 |    in which case the input is simply replaced.
 18 |    The eigenvalues are output in eval.  Workv is a double work vector n long.
 19 |    This returns the number of eigenvalues which could not be computed,
 20 |    which is virtually always 0.  I've exhaustively tested this routine and
 21 |    never seen it return a nonzero value.
 22 | */
 23 | 
 24 | int evec_rs ( double *mat_in , int n , int find_vec , double *vect , double *eval , double *workv )
 25 | {
 26 |    int i, im1, j, k, irow, irowm1, ival, ivalp1, iercnt, msplit, ibig ;
 27 |    double b, f, g, h, hh, p, r, x, scale, shift, sine, cosine, big, *vptr ;
 28 | 
 29 |    // Compzero is an accuracy versus speed tradeoff.  The algorithm is most accurate when compzero=0.
 30 |    // But by letting 'zero' be a very small positive number, we can take some early loop exits
 31 |    // with very little penalty, insignificant most of the time.
 32 |    double compzero = 1.e-16 ;
 33 | 
 34 |    // Eps is used only for splitting a large matrix into two smaller matrices at a 'zero' diagonal,
 35 |    // greatly speeding operation.  But if the diagonal is not quite zero, this does introduce a tiny,
 36 |    // usually insignificant, error.
 37 |    // The algorithm is most accurate when eps=0, but very small values are fine for most work.
 38 |    double eps = 1.e-12 ;
 39 |    
 40 |    /* copy lower triangle of input to output. */
 41 |    for (i=0 ; i<n ; i++) {
 42 |       for (j=0 ; j<=i ; j++)
 43 |          vect[i*n+j] = mat_in[i*n+j] ;
 44 |       }
 45 | /*
 46 | ------------------------------------------------------------------------------
 47 | 
 48 |    This section converts the matrix (now in vect) to tri-diagonal form
 49 |    using Householder's method.  It is done backwards; The last row is done
 50 |    first. The subdiagonal is saved in workv as it is found.
 51 | 
 52 | ------------------------------------------------------------------------------
 53 | */
 54 |    for (irow=n-1 ; irow>0 ; irow--) {
 55 |       irowm1 = irow - 1 ;
 56 |       h = 0.0 ;
 57 |       /* We can improve computational accuracy by scaling the row. */
 58 |       for (scale=0.0 , i=0 ; i<=irowm1 ; i++) /* do left of diag only */
 59 |          scale += fabs ( vect[irow*n+i] ) ;
 60 |       /* Avoid a lot of work if this row already tri-diagonal */
 61 |       if (scale < compzero  ||  irow == 1)
 62 |          workv[irow] = vect[irow*n+irowm1] ;
 63 |       else {
 64 |          /*  Do actual scaling (left of diag only).  Cumulate sum squares */
 65 |          for (i=0 ; i<=irowm1 ; i++) {
 66 |             x = vect[irow*n+i] / scale ;
 67 |             vect[irow*n+i] = x ;
 68 |             h += x * x ;
 69 |             }
 70 |          /*  The 'U' vector of the literature is the row vector except that
 71 |              its first element (f) has the length of the vector (sqrt(h))
 72 |              either added or subtracted (g), whichever gives the largest
 73 |              absolute value. */
 74 |          f = vect[irow*n+irowm1] ;
 75 |          g = ( f > 0 )  ?  -sqrt (h)  :  sqrt (h)  ;
 76 |          workv[irow] = g * scale ;  /* subdiagonal compensated for scaling */
 77 | 
 78 |          h -= f * g ;
 79 |          vect[irow*n+irowm1] = f - g ;
 80 | 
 81 |          /* Prepare to reduce vect.  Use upper triangle for storage. */
 82 |          
 83 |          for (f=0.0 , j=0 ; j<=irowm1 ; j++) {
 84 |             if (find_vec)
 85 |                vect[j*n+irow] = vect[irow*n+j] / h ;
 86 |             /* Form element of A * U */
 87 |             for (g=0.0 , k=0 ; k<=j ; k++)
 88 |                g += vect[j*n+k] * vect[irow*n+k] ;
 89 |             if (j < irowm1)
 90 |                for (k=j+1 ; k<=irowm1 ; k++)
 91 |                   g += vect[k*n+j] * vect[irow*n+k] ;
 92 |             /* Compute an element of P.  Use the positions in workv below
 93 |                those already determined subdiagonals as work areas. */
 94 |             workv[j] = g / h ;
 95 |             f += workv[j] * vect[irow*n+j] ;
 96 |             }  /* for f=0.0  j=0  */
 97 | 
 98 |          /* Reduce A such that all elements of row irow are zero except the
 99 |             diagonal and the element to its left (ignoring symmetric
100 |             elements).  Naturally we need not compute those zeroes.  Just
101 |             modify the rows above irow.  */
102 |          hh = f / (h + h) ;
103 |          for (j=0 ; j<=irowm1 ; j++) {
104 |             f = vect[irow*n+j] ;
105 |             g = workv[j] - hh * f ;
106 |             workv[j] = g ;
107 |             for (k=0 ; k<=j ; k++)
108 |                vect[j*n+k] -= f * workv[k] + g * vect[irow*n+k] ;
109 |             }
110 |          }  /*  else scale<compzero  */
111 | 
112 |       /* We are done with this row!  Save h in eval.  */
113 |       eval[irow] = h ;
114 | 
115 |       }  /* for irow=n-1 */
116 | /*
117 | ------------------------------------------------------------------------------
118 | 
119 |    We are nearly done with the tri-diagonalization.  The transformation
120 |    itself has been done to the matrix and the subdiagonals are stored in  
121 |    workv.  H for each row is in eval.  Complete the job by recovering
122 |    the transformation matrix and diagonal.
123 | 
124 | ------------------------------------------------------------------------------
125 | */
126 |    workv[0] = 0.0 ;
127 |    if (find_vec) {
128 |       eval[0] = 0.0 ;
129 |       for (irow=0 ; irow<n ; irow++) {
130 |          irowm1 = irow-1  ;  /* following if insures no negative subscript! */
131 |          if (fabs (eval[irow])  >  compzero) {
132 |             for (j=0 ; j<=irowm1 ; j++) {
133 |                for (g=0.0 , k=0 ; k<=irowm1 ; k++)
134 |                   g += vect[irow*n+k] * vect[k*n+j] ;
135 |                for (k=0 ; k<=irowm1 ; k++)
136 |                   vect[k*n+j] -= g * vect[k*n+irow] ;
137 |                }
138 |             }
139 |          /*  Recover diagonal and zero matrix elements which are truly zero
140 |              but were not computed.  */
141 |          eval[irow] = vect[irow*n+irow] ;
142 |          vect[irow*n+irow] = 1. ;
143 |          for (j=0 ; j<=irowm1 ; j++) {
144 |             vect[irow*n+j] = 0.0 ;
145 |             vect[j*n+irow] = 0.0 ;
146 |             }
147 |          }  /*  for  irow=0  */
148 |       } // If find_vec
149 | 
150 |    else {
151 |       for (irow=0 ; irow<n ; irow++)
152 |          eval[irow] = vect[irow*n+irow] ;
153 |       }   
154 | 
155 | /*
156 | ------------------------------------------------------------------------------
157 | 
158 |    The matrix is now completely tridiagonal.  The diagonal is in eval and
159 |    the subdiagonal still in workv.  The transformation matrix is in vect. 
160 |    Now we use the QL method to find the eigenvalues and vectors.
161 | 
162 | ------------------------------------------------------------------------------
163 | */
164 |    if (n == 1)
165 |       return ( 0 ) ;
166 | 
167 |    /*  The first element of the subdiagonal does not exist.  Shift workv.  */
168 |    for (i=1 ; i<n ; i++)
169 |       workv[i-1] = workv[i] ;
170 |    workv[n-1] = 0.0 ;
171 | 
172 |    shift = 0.0 ;
173 |    b = 0.0 ;
174 |    /*
175 |       This is the main loop.  The rotation isolates one eigenvalue at a time.
176 |    */
177 |    for (ival=0 ; ival<n ; ival++) {
178 |       iercnt = 0 ;  /* count tries for this eigenvalue  */
179 |       /*  It is always nice to be able to split a matrix into two parts
180 |           in order to reduce it from one big problem to two smaller ones.
181 |           We use 'b' as a computational zero.  If a subdiagonal element
182 |           is smaller than b we have a split.  */
183 |       h = eps * ( fabs (eval[ival]) + fabs (workv[ival] ) ) ;
184 |       h = (h > compzero) ? h : compzero ;  /* needed in some cases */
185 |       b = (b > h) ? b : h  ;
186 |       /* Recall we set workv[n-1]=0.0  This loop at least finds that.  */
187 |       for (msplit=ival ; msplit<n ; msplit++)
188 |          if (fabs ( workv[msplit] ) <= b)
189 |             break ;
190 | 
191 |       /*  We might luck out.  If the first subdiagonal is 'zero' then
192 |           the corresponding diagonal is an eigenvalue.  Thus we only need to
193 |           do the computation if that is not the case.  */
194 |       if ( msplit > ival) {
195 |          do {
196 |             if (iercnt++ > 100)  /* avoid useless repetition */
197 |                return (n - ival) ;
198 |             /*  Before transforming we shift all eigenvalues by a constant to
199 |                 accelerate convergence.  Now shift by an additional h for
200 |                 this one.  */
201 |             ivalp1 = ival + 1 ;
202 |             g = eval[ival] ;
203 |             p = ( eval[ivalp1] - g )  /  (2. * workv[ival]);/* tricky denom */
204 |             r = sqrt ( p * p + 1.0 ) ;
205 |             eval[ival] = workv[ival] / ( p + ( (p>0) ? r : -r ) )  ;
206 | 
207 |             h = g - eval[ival] ;
208 |             /* We just shifted ival'th.  Do same for others.  */
209 |             for (i=ivalp1 ; i<n ; i++)  /* above 'if' insures ivalp1<n */
210 |                eval[i] -= h ;
211 |             shift += h ;
212 |             /* This is the actual QL transform */
213 |             p = eval[msplit] ;
214 |             cosine = 1.0 ;
215 |             sine = 0.0 ;
216 |             /* Only rotate between last eigenvalue computed and split point */
217 |             for (i=msplit-1 ; i >= ival ; i--) {
218 |                g = cosine * workv[i] ;
219 |                h = cosine * p ;
220 |                if (fabs (p) >= fabs (workv[i])) {
221 |                   cosine = workv[i] / p ;
222 |                   r = sqrt ( cosine * cosine + 1.0 ) ;
223 |                   workv[i+1] = sine * p * r ;
224 |                   sine = cosine / r ;
225 |                   cosine = 1.0 / r ;
226 |                   }
227 |                else {
228 |                   cosine = p / workv[i] ;
229 |                   r = sqrt ( cosine * cosine + 1.0 ) ;
230 |                   workv[i+1] = sine * workv[i] * r ;
231 |                   sine = 1.0 / r ;
232 |                   cosine = cosine * sine ;
233 |                   }
234 |                p = cosine * eval[i] - sine * g ;
235 |                eval[i+1] = h + sine * (cosine * g + sine * eval[i]) ;
236 |                /* now we must transform vect the same way, so that we get
237 |                   the eigenvector of the original matrix.  Note that
238 |                   previous vectors are untouched.  */
239 |                if (find_vec) {
240 |                   for (k=0 ; k<n ; k++) {
241 |                      vptr = vect + k * n + i ;
242 |                      h = vptr[1] ;
243 |                      vptr[1] = sine * *vptr  +  cosine * h ;
244 |                      *vptr = cosine * *vptr  -  sine * h ;
245 |                      }
246 |                   }
247 |                }  /*  for i=msplit-1  */
248 |             /*  A tentative eigenvalue has been found.  Save it.  */
249 |             eval[ival] = cosine * p ;
250 |             workv[ival] = sine * p ;
251 | 
252 |             /*  Repeat until satisfactory accuracy is achieved.  */
253 |             } while ( fabs (workv[ival])  > b ) ;
254 |          }  /*  if  msplit > ival  */
255 |       /*  We have an eigenvalue.  Compensate for shifting.  */
256 |       eval[ival] += shift ;
257 | 
258 |       }  /*  for ival=0  */
259 |  /*
260 | ------------------------------------------------------------------------------
261 | 
262 |    This is it.  We are all done.  However, many programs prefer for the  
263 |    eigenvalues (and corresponding vectors!) to be sorted in decreasing    
264 |    order.  Do this now.  Then flip signs in any column which has more
265 |    negatives than positives.  This is appreciated during interpretation.
266 | 
267 | ------------------------------------------------------------------------------
268 | */
269 | 
270 |    for (i=1 ; i<n ; i++) {
271 |       im1 = i - 1 ;
272 |       ibig = im1 ;
273 |       big = eval[im1] ;
274 |       /*  Find largest eval beyond im1  */
275 |       for (j=i ; j<n ; j++) {
276 |          x = eval[j] ;
277 |          if (x > big) {
278 |             big = x ;
279 |             ibig = j ;
280 |             }
281 |          }
282 |       if (ibig != im1) {
283 |          /* swap */
284 |          eval[ibig] = eval[im1] ;
285 |          eval[im1] = big ;
286 |          if (find_vec) {
287 |             for (j=0 ; j<n ; j++) {
288 |                x = vect[j*n+im1] ;
289 |                p = vect[j*n+ibig] ;  /* using p due to compiler error */
290 |                vect[j*n+im1] = p ;
291 |                vect[j*n+ibig] = x ;
292 |                }
293 |             }
294 |          }
295 |       }
296 | 
297 |    if (find_vec) {
298 |       for (i=0 ; i<n ; i++) {
299 |          for (k=0 , j=0 ; j<n ; j++)
300 |             if (vect[j*n+i] < 0.)
301 |                k++ ;
302 |          if (2*k > n)
303 |             for (j=0 ; j<n ; j++)
304 |                vect[j*n+i] *= -1. ;
305 |          }
306 |       }
307 |    return ( 0 ) ;
308 | }


--------------------------------------------------------------------------------
/GLOB_MIN.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  GLOB_MIN - Check equispaced intervals to find rough global minimum        */
  4 | /*             of a univariate function                                       */
  5 | /*                                                                            */
  6 | /*  This is called with a lower and upper bound for the domain to be          */
  7 | /*  searched.  If the function is still decreasing at one of these endpoints  */
  8 | /*  the search will continue beyond the specified endpoint.                   */
  9 | /*  The total interval is equally divided into npts-1 subintervals.           */
 10 | /*  These subintervals will be spaced arithmetically or logarithmically       */
 11 | /*  according to log_space.                                                   */
 12 | /*  If the criterion function drops to critlim or smaller, execution will     */
 13 | /*  terminate as soon as a local minimum is found.  (Global search stops.)    */
 14 | /*  Three points will be returned.  The center point, (x2,y2), will have      */
 15 | /*  smaller function value (y2) than its neighbors.  (In pathological         */
 16 | /*  cases they may be equal.)                                                 */
 17 | /*                                                                            */
 18 | /*  If npts is input negative, that means the user is inputting f(low) in *y2.*/
 19 | /*  This sometimes saves a function evaluation.                               */
 20 | /*                                                                            */
 21 | /*  Normally it returns zero.  It returns one if user pressed ESCape before   */
 22 | /*  the minimum was found.                                                    */
 23 | /*                                                                            */
 24 | /******************************************************************************/
 25 | 
 26 | #include <math.h>
 27 | 
 28 | int user_pressed_escape() ;
 29 | 
 30 | int glob_min (
 31 |    double low ,           // Lower limit for search
 32 |    double high ,          // Upper limit
 33 |    int npts ,             // Number of points to try
 34 |    int log_space ,        // Space by log?
 35 |    double critlim ,       // Quit global if crit drops this low
 36 |    int (*criter) (double , double *) , // Criterion function
 37 |    double *x1 ,
 38 |    double *y1 ,           // Lower X value and function there
 39 |    double *x2 ,
 40 |    double *y2 ,           // Middle (best)
 41 |    double *x3 ,
 42 |    double *y3             // And upper
 43 |    )
 44 | {
 45 |    int i, ibest, turned_up, know_first_point, user_quit ;
 46 |    double x, y, rate, previous ;
 47 | 
 48 |    user_quit = 0 ;
 49 | 
 50 |    if (npts < 0) {
 51 |       npts = -npts ;
 52 |       know_first_point = 1 ;
 53 |       }
 54 |    else 
 55 |       know_first_point = 0 ;
 56 | 
 57 |    if (log_space)
 58 |       rate = exp ( log (high / low) / (npts - 1) ) ;
 59 |    else 
 60 |       rate = (high - low) / (npts - 1) ;
 61 | 
 62 |    x = low ;
 63 | 
 64 |    previous = 0.0 ; // Avoids "use before set" compiler warnings
 65 |    ibest = -1 ;     // For proper critlim escape
 66 |    turned_up = 0 ;  // Must know if function increased after min
 67 | 
 68 |    for (i=0 ; i<npts ; i++) {
 69 | 
 70 |       if (i  ||  ! know_first_point)
 71 |          user_quit = criter ( x , &y ) ;
 72 |       else
 73 |          y = *y2 ;
 74 | 
 75 |       if ((i == 0)  ||  (y < *y2)) {  // Keep track of best here
 76 |          ibest = i ;
 77 |          *x2 = x ;
 78 |          *y2 = y ;
 79 |          *y1 = previous ;  // Function value to its left
 80 |          turned_up = 0 ;   // Flag that min is not yet bounded
 81 |          }
 82 | 
 83 |       else if (i == (ibest+1)) { // Didn't improve so this point may
 84 |          *y3 = y ;               // be the right neighbor of the best
 85 |          turned_up = 1 ;         // Flag that min is bounded
 86 |          }
 87 | 
 88 |       previous = y ;             // Keep track for left neighbor of best
 89 | 
 90 |       if (! user_quit)
 91 |          user_quit = user_pressed_escape () ;
 92 | 
 93 |       if ((user_quit  ||  (*y2 <= critlim))  &&  (ibest > 0)  &&  turned_up)
 94 |          break ; // Done if (abort or good enough) and both neighbors found
 95 | 
 96 |       if (user_quit)          // Alas, both neighbors not found
 97 |          return user_quit ;   // Flag that the other 2 pts not there
 98 | 
 99 |       if (log_space)
100 |          x *= rate ;
101 |       else 
102 |          x += rate ;
103 |       }
104 | 
105 | /*
106 |    At this point we have a minimum (within low,high) at (x2,y2).
107 |    Compute x1 and x3, its neighbors.
108 |    We already know y1 and y3 (unless the minimum is at an endpoint!).
109 | */
110 | 
111 |    if (log_space) {
112 |       *x1 = *x2 / rate ;
113 |       *x3 = *x2 * rate ;
114 |       }
115 |    else {
116 |       *x1 = *x2 - rate ;
117 |       *x3 = *x2 + rate ;
118 |       }
119 | 
120 | /*
121 |    Normally we would now be done.  However, the careless user may have
122 |    given us a bad x range (low,high) for the global search.
123 |    If the function was still decreasing at an endpoint, bail out the
124 |    user by continuing the search.
125 | */
126 | 
127 |    if (! turned_up) { // Must extend to the right (larger x)
128 |       for (;;) {      // Endless loop goes as long as necessary
129 | 
130 |          user_quit = user_pressed_escape () ;
131 | 
132 |          if (! user_quit)
133 |             user_quit = criter ( *x3 , y3 ) ;
134 | 
135 |          if (user_quit)          // Alas, both neighbors not found
136 |             return user_quit ;   // Flag that the other 2 pts not there
137 | 
138 |          if (*y3 > *y2)  // If function increased we are done
139 |             break ;
140 |          if ((*y1 == *y2)  &&  (*y2 == *y3)) // Give up if flat
141 |             break ;
142 | 
143 |          *x1 = *x2 ;      // Shift all points
144 |          *y1 = *y2 ;
145 |          *x2 = *x3 ;
146 |          *y2 = *y3 ;
147 | 
148 |          rate *= 3.0 ;    // Step further each time
149 |          if (log_space)   // And advance to new frontier
150 |             *x3 *= rate ;
151 |          else 
152 |             *x3 += rate ;
153 |          }
154 |       }
155 | 
156 |    else if (ibest == 0) {  // Must extend to the left (smaller x)
157 |       for (;;) {           // Endless loop goes as long as necessary
158 | 
159 |          user_quit = user_pressed_escape () ;
160 | 
161 |          if (! user_quit)
162 |             user_quit = criter ( *x1 , y1 ) ;
163 | 
164 |          if (user_quit)         // Alas, both neighbors not found
165 |             return user_quit ;  // Flag that the other 2 pts not there
166 | 
167 |          if (*y1 > *y2)   // If function increased we are done
168 |             break ;
169 |          if ((*y1 == *y2)  &&  (*y2 == *y3)) // Give up if flat
170 |             break ;
171 | 
172 |          *x3 = *x2 ;      // Shift all points
173 |          *y3 = *y2 ;
174 |          *x2 = *x1 ;
175 |          *y2 = *y1 ;
176 | 
177 |          rate *= 3.0 ;    // Step further each time
178 |          if (log_space)   // And advance to new frontier
179 |             *x1 /= rate ;
180 |          else 
181 |             *x1 -= rate ;
182 |          }
183 |       }
184 | 
185 |    return 0 ;
186 | }
187 | 


--------------------------------------------------------------------------------
/HORNS_METHOD.TXT:
--------------------------------------------------------------------------------
  1 | typedef struct {
  2 |    int nc ;             // Number of cases
  3 |    int nv ;             // Number of variables
  4 |    double *covar ;      // Scratch for covariance matrix
  5 |    double *evals ;      // Computed eigenvalues
  6 |    double *workv ;      // Scratch vector for evec_rs()
  7 |    int ieval ;          // Needed for placing result in all_evals
  8 | } MC_EVALS_PARAMS ;
  9 | 
 10 | static unsigned int __stdcall evals_threaded ( LPVOID dp )
 11 | {
 12 |    int i, j, icase, n_cases, n_vars ;
 13 |    double *xvec, *sums, *covar, xtemp, *evals, *workv ;
 14 | 
 15 |    n_cases = ((MC_EVALS_PARAMS *) dp)->nc ;
 16 |    n_vars = ((MC_EVALS_PARAMS *) dp)->nv ;
 17 |    covar = ((MC_EVALS_PARAMS *) dp)->covar ;
 18 |    xvec = evals = ((MC_EVALS_PARAMS *) dp)->evals ;   // We borrow this for computing covar
 19 |    sums = workv = ((MC_EVALS_PARAMS *) dp)->workv ;  // Ditto
 20 | 
 21 | /*
 22 |    Compute the lower-left triangle of the covariance matrix of a
 23 |    standardized, uncorrelated normal random variable.
 24 |    The upper-right triangle is ignored by the evec_rs() routine.
 25 | */
 26 | 
 27 |    for (i=0 ; i<n_vars ; i++) {
 28 |       sums[i] = 0.0 ;
 29 |       for (j=0 ; j<=i ; j++)
 30 |          covar[i*n_vars+j] = 0.0 ;
 31 |       }
 32 | 
 33 | 
 34 |    for (icase=0 ; icase<n_cases ; icase++) {
 35 | 
 36 |       // Generate the random vector
 37 |       for (i=0 ; i<n_vars ; i++) {
 38 |          if (i % 2 == 0)
 39 |             normal_pair ( &xvec[i] , &xtemp ) ;
 40 |          else
 41 |             xvec[i] = xtemp ;
 42 |          }
 43 | 
 44 |       // Cumulate for this random vector
 45 |       for (i=0 ; i<n_vars ; i++) {
 46 |          sums[i] += xvec[i] ;
 47 |          for (j=0 ; j<=i ; j++)
 48 |             covar[i*n_vars+j] += xvec[i] * xvec[j] ;
 49 |          }
 50 |       } // For all cases
 51 | 
 52 |    // Compute n_cases times covariances
 53 |    for (i=0 ; i<n_vars ; i++) {
 54 |       for (j=0 ; j<=i ; j++)
 55 |          covar[i*n_vars+j] -= sums[i] * sums[j] / n_cases ;
 56 |       }
 57 | 
 58 |    // Compute correlation matrix
 59 | 
 60 |    for (i=0 ; i<n_vars ; i++) {
 61 |       covar[i*n_vars+i] = sqrt ( covar[i*n_vars+i] ) ;
 62 |       for (j=0 ; j<i ; j++)
 63 |          covar[i*n_vars+j] /= covar[i*n_vars+i] * covar[j*n_vars+j] ;
 64 |       }
 65 | 
 66 |    for (i=0 ; i<n_vars ; i++)   // Definition of correlation matrix
 67 |       covar[i*n_vars+i] = 1.0 ;
 68 | 
 69 |    evec_rs ( covar , n_vars , 0 , covar , evals , workv ) ;
 70 | 
 71 |    return 0 ;
 72 | }
 73 | 
 74 | int mc_evals (
 75 |    int nc ,               // Number of cases
 76 |    int nv ,               // Number of variables
 77 |    int mc_reps ,          // Number of MC replications
 78 |    int max_threads ,      // Max number of threads to use
 79 |    double fractile ,      // Desired fractile, 0-1
 80 |    double *threshold      // Computed values of each eval for specified fractile
 81 |    )
 82 | {
 83 |    int i, j, k, ieval, ithread, n_threads, empty_slot, ret_val ;
 84 |    double *covar ;         // Scratch for covariance matrix, nv*nv*max_threads
 85 |    double *evals ;         // Scratch for eigenvalues, nv*max_threads
 86 |    double *workv ;         // Scratch for evec_rs()
 87 |    double *all_evals ;     // Scratch for all eigenvalues, nv*mc_reps
 88 |    char msg[256] ;
 89 |    MC_EVALS_PARAMS mc_evals_params[MAX_THREADS] ;
 90 |    HANDLE threads[MAX_THREADS] ;
 91 | 
 92 |    if (mc_reps < 1)
 93 |       mc_reps = 1 ;
 94 | 
 95 |    if (max_threads > mc_reps)
 96 |       max_threads = mc_reps ;
 97 | 
 98 | /*
 99 |    Allocate memory
100 | */
101 | 
102 |    covar = (double *) malloc ( nv * nv * max_threads * sizeof(double) ) ;
103 |    evals = (double *) malloc ( nv * max_threads * sizeof(double) ) ;
104 |    workv = (double *) malloc ( nv * max_threads * sizeof(double) ) ;
105 |    all_evals = (double *) malloc ( nv * mc_reps * sizeof(double) ) ;
106 | 
107 | /*
108 | --------------------------------------------------------------------------------
109 | 
110 |    Outer-most loop does threaded MC replications
111 |    Initialize those thread parameters which are constant for all threads.
112 | 
113 | --------------------------------------------------------------------------------
114 | */
115 | 
116 |    for (ithread=0 ; ithread<max_threads ; ithread++) {
117 |       mc_evals_params[ithread].nc = nc ;
118 |       mc_evals_params[ithread].nv = nv ;
119 |       mc_evals_params[ithread].covar = covar + ithread * nv * nv ;
120 |       mc_evals_params[ithread].evals = evals + ithread * nv ;
121 |       mc_evals_params[ithread].workv = workv + ithread * nv ;
122 |       } // For all threads, initializing constant stuff
123 | 
124 | 
125 | /*
126 |    Do it
127 | */
128 | 
129 |    n_threads = 0 ;                    // Counts threads that are active
130 |    for (i=0 ; i<max_threads ; i++)
131 |       threads[i] = NULL ;
132 | 
133 |    ieval = 0 ;        // Index of this replication in all_evals
134 |    empty_slot = -1 ;  // After full, will identify the thread that just completed
135 |    for (;;) {         // Main thread loop processes all replications
136 | 
137 | /*
138 |    Handle user ESCape
139 | */
140 | 
141 |       if (escape_key_pressed  ||  user_pressed_escape ()) {
142 |          for (i=0, k=0 ; i<max_threads ; i++) {
143 |             if (threads[i] != NULL)
144 |                threads[k++] = threads[i] ;
145 |             }
146 |          ret_val = WaitForMultipleObjects ( k , threads , TRUE , 50000 ) ;
147 |          for (i=0 ; i<k ; i++)
148 |             CloseHandle ( threads[i] ) ;
149 |          ret_val = ERROR_ESCAPE ;
150 |          goto FINISH ;
151 |          }
152 | 
153 | /*
154 |    Start a new thread if we still have work to do
155 | */
156 | 
157 |       if (ieval < mc_reps) {    // If there are still some to do
158 |          if (empty_slot < 0)    // Negative while we are initially filling the queue
159 |             k = n_threads ;
160 |          else
161 |             k = empty_slot ;
162 |          mc_evals_params[k].ieval = ieval  ;         // Needed for placing final result in all_evals
163 |          threads[k] = (HANDLE) _beginthreadex ( NULL , 0 , evals_threaded , &mc_evals_params[k] , 0 , NULL ) ;
164 |          if (threads[k] == NULL) {
165 |             for (i=0 ; i<n_threads ; i++) {
166 |                if (threads[i] != NULL)
167 |                   CloseHandle ( threads[i] ) ;
168 |                }
169 |             ret_val = ERROR_INSUFFICIENT_MEMORY ;
170 |             goto FINISH ;
171 |             }
172 |          ++n_threads ;
173 |          ++ieval ;
174 |          } // if (ieval < mc_reps)
175 | 
176 |       if (n_threads == 0)  // Are we done?
177 |          break ;
178 | 
179 | /*
180 |    Handle full suite of threads running and more threads to add as soon as some are done.
181 |    Wait for just one thread to finish.
182 | */
183 | 
184 |       if (n_threads == max_threads  &&  ieval < mc_reps) {
185 |          ret_val = WaitForMultipleObjects ( n_threads , threads , FALSE , 500000 ) ;
186 |          if (ret_val == WAIT_TIMEOUT  ||  ret_val == WAIT_FAILED  ||  ret_val < 0  ||  ret_val >= n_threads) {
187 |             ret_val = ERROR_INSUFFICIENT_MEMORY ;
188 |             goto FINISH ;
189 |             }
190 | 
191 |          k = mc_evals_params[ret_val].ieval ;
192 |          for (i=0 ; i<nv ; i++)
193 |             all_evals[i*mc_reps+k] = mc_evals_params[ret_val].evals[i] ;
194 | 
195 |          empty_slot = ret_val ;
196 |          CloseHandle ( threads[empty_slot] ) ;
197 |          threads[empty_slot] = NULL ;
198 |          --n_threads ;
199 |          }
200 | 
201 | /*
202 |    Handle all work has been started and now we are just waiting for threads to finish
203 | */
204 | 
205 |       else if (ieval == mc_reps) {
206 |          ret_val = WaitForMultipleObjects ( n_threads , threads , TRUE , 500000 ) ;
207 |          if (ret_val == WAIT_TIMEOUT  ||  ret_val == WAIT_FAILED  ||  ret_val < 0  ||  ret_val >= n_threads) {
208 |             ret_val = ERROR_INSUFFICIENT_MEMORY ;
209 |             goto FINISH ;
210 |             }
211 |           for (i=0 ; i<n_threads ; i++) {
212 |             k = mc_evals_params[i].ieval ;
213 |             for (j=0 ; j<nv ; j++) {
214 |                all_evals[j*mc_reps+k] = mc_evals_params[i].evals[j] ;
215 |                }
216 | 
217 |             CloseHandle ( threads[i] ) ;
218 |             }
219 | 
220 |          break ;
221 |          }
222 |       } // Endless loop which threads computation of evals for all reps
223 | 
224 | 
225 | 
226 | /*
227 |    All eigenvalues are computed and saved.  Compute specified fractile for each.
228 | */
229 | 
230 |    k = (int) (fractile * (mc_reps+1)) - 1 ;
231 |    if (k < 0)
232 |       k = 0 ;
233 |    if (k >= mc_reps)
234 |       k = mc_reps - 1 ;
235 | 
236 |    for (i=0 ; i<nv ; i++) {
237 |       qsortd ( 0 , mc_reps-1 , all_evals + i * mc_reps ) ;
238 |       threshold[i] = all_evals[i*mc_reps+k] ;
239 |       }
240 | 
241 |    ret_val = 0 ;
242 | 
243 | 
244 | /*
245 |    Finished.  Clean up and exit.
246 | */
247 | 
248 | FINISH:
249 |    if (covar != NULL)
250 |       free ( covar ) ;
251 |    if (evals != NULL)
252 |       free ( evals ) ;
253 |    if (workv != NULL)
254 |       free ( workv ) ;
255 |    if (all_evals != NULL)
256 |       free ( all_evals ) ;
257 | 
258 |    return ret_val ;
259 | }


--------------------------------------------------------------------------------
/INTEGRAT.CPP:
--------------------------------------------------------------------------------
 1 | /******************************************************************************/
 2 | /*                                                                            */
 3 | /*  INTEGRAT - Integrate() to perform adaptive quadrature                     */
 4 | /*                                                                            */
 5 | /******************************************************************************/
 6 | 
 7 | #include <math.h>
 8 | 
 9 | #define INTBUF 100 /* Incredibly conservative! (divisions 2^(-100) are tiny!) */
10 | 
11 | double integrate (
12 |    double low ,                // Lower limit for definite integral
13 |    double high ,               // Upper limit
14 |    double min_width ,          // Demand subdivision this small or smaller
15 |    double acc ,                // Relative interval width limit
16 |    double tol ,                // Relative error tolerance
17 |    double (*criter) (double)   // Criterion function
18 |    )
19 | {
20 |    int istack ;
21 |    double sum, a, b, mid, fa, fb, fmid, lowres, hires, fac ;
22 | 
23 | struct IntStack {
24 |    double x0 ;
25 |    double x1 ;
26 |    double f0 ;
27 |    double f1 ;
28 | } stack[INTBUF] ;
29 | 
30 |    fac = 3.0 * tol ; // Error is about (lowres-hires) / 3
31 | 
32 | /*
33 |    Start by initializing the stack to be the entire interval
34 |    and the integral so far to be zero
35 | */
36 | 
37 |    stack[0].x0 = low ;
38 |    stack[0].f0 = criter ( low ) ;
39 |    stack[0].x1 = high ;
40 |    stack[0].f1 = criter ( high ) ;
41 |    istack = 1 ;
42 |    sum = 0.0 ;
43 | 
44 | /*
45 |    Main algorithm starts here.  Pop interval off stack and test its quality.
46 | */
47 | 
48 |    while (istack > 0) {  // While there is still at least one interval on stack
49 |       --istack ;         // Pop this interval
50 |       a = stack[istack].x0 ;
51 |       b = stack[istack].x1 ;
52 |       fa = stack[istack].f0 ;
53 |       fb = stack[istack].f1 ;
54 |       mid = 0.5 * (a + b) ;
55 |       fmid = criter ( mid ) ;
56 |       lowres = 0.5 * (b - a) * (fa + fb) ; // Trapezoidal rule
57 |       hires = 0.25 * (b - a) * (fa + 2.0 * fmid + fb) ; // And refined value
58 |       // If the interval is ridiculously narrow, no point in continuing
59 |       // If it gets this far, chances are the integrand is discontinuous
60 |       if (b - a <= acc * (1.0 + fabs(a) + fabs(b)))
61 |          sum += hires ;               // Quit trying to refine
62 |       else if ((b - a) <= min_width  &&  fabs(lowres-hires) < fac * (b - a))
63 |          sum += hires ;  // Normal convergence flag
64 |       else {
65 |          stack[istack].x0 = a ;
66 |          stack[istack].f0 = fa ;
67 |          stack[istack].x1 = mid ;
68 |          stack[istack].f1 = fmid ;
69 |          ++istack ;
70 |          if (istack < INTBUF) {       // Insurance against catastrophe only
71 |             stack[istack].x0 = mid ;  // Should ALWAYS be true (easily!)
72 |             stack[istack].f0 = fmid ; // If this if() fails, the answer will
73 |             stack[istack].x1 = b ;    // of course be wrong, but only due to
74 |             stack[istack].f1 = fb ;   // a horrendous underlying problem
75 |             ++istack ;                // like a singularity in the function
76 |             }
77 |          else {
78 |             --istack ;      // Error condition, so undo push
79 |             sum += hires ;  // And go with this best estimiate
80 |             }
81 |          }
82 |       }
83 |    return sum ;
84 | }
85 | 


--------------------------------------------------------------------------------
/INVERT.CPP:
--------------------------------------------------------------------------------
  1 | //----------------------------------------------------------------------------
  2 | /*                           LUDECOMP                                       */
  3 | /*                                                                          */
  4 | /*   Compute the LU decomposition via Crout algorithm                       */
  5 | 
  6 | #include <math.h>
  7 | #include <string.h>
  8 | 
  9 | int LUdecomp (
 10 |    int n ,           // Order of the input matrix
 11 |    double *mat_in ,  // Input matrix in standard (row major) order
 12 |    double *mat_out , // Output of LU decomposition
 13 |    int dim ,         // Their column dimension in the calling routine
 14 |    int digits ,      // If > 0 mat_in is assumed to be accurate to digits figs
 15 |    double *det ,     // Determinant
 16 |    int *pivot ,      // Output of permutation used for pivot optimization
 17 |    double *equil )   // Output of smallest 1 / abs (mat_in[i]) for each 
 18 | 
 19 | /* It returns 1 if accuracy has not been maintained, 2 if singular. */
 20 | 
 21 | {
 22 |    int row, col, inner, i, rmax ;
 23 |    double sum, fptemp, rn, wrel, big, biggest, *ptr1, *ptr2, *lurc ;
 24 |    double ai, wi, wa, p, q, test ;
 25 | 
 26 |    rmax = 0 ;  // Not needed.  Shuts up LINT.
 27 | 
 28 | /*
 29 |    Initialize.  Copy input matrix to output.
 30 | */
 31 | 
 32 |    rn = (double) n ;
 33 |    wrel = 0.0 ;
 34 |    biggest = 0.0 ;
 35 |    *det = 1.0 ;
 36 | 
 37 |    for (row=0 ; row<n ; row++) {
 38 |       big = 0.0 ;
 39 |       ptr1 = mat_in + row * dim ;
 40 |       ptr2 = mat_out + row * dim ;
 41 |       i = n ;
 42 |       while (i--) {
 43 |          fptemp = *ptr2++ = *ptr1++ ;
 44 |          if ((fptemp = fabs ( fptemp )) > big)
 45 |             big = fptemp ;
 46 |          }
 47 |       if (big < 1.0e-90)
 48 |          goto SINGULAR ;
 49 |       if (big > biggest)
 50 |          biggest = big ;
 51 |       equil[row] = 1.0 / big ;
 52 |       }
 53 | 
 54 | /*
 55 | -------------------------------------------------------------------------------
 56 | 
 57 |    This is the main loop which does all columns
 58 | 
 59 | -------------------------------------------------------------------------------
 60 | */
 61 | 
 62 |    for (col=0 ; col<n ; col++) {
 63 | 
 64 | /*
 65 |   First compute all rows of U for this column which are strictly above diagonal.
 66 |   Pointer lurc is the current mat_out location.
 67 | */
 68 | 
 69 |       for (row=0, lurc=mat_out+col ; row<col ; row++, lurc = lurc + dim) {
 70 |          sum = *lurc ;
 71 | 
 72 |          if (digits) {  /* Accuracy test */
 73 |             ai = fabs ( sum ) ;
 74 |             wi = 0.0 ;
 75 |             if (row) {
 76 |                ptr1 = mat_out + row * dim ;
 77 |                ptr2 = mat_out + col ;
 78 |                inner = row ;
 79 |                while (inner--) {
 80 |                   fptemp = *ptr1++  *  *ptr2 ;
 81 |                   ptr2 = ptr2 + dim ;
 82 |                   sum -= fptemp ;
 83 |                   wi += fabs ( fptemp ) ;
 84 |                   }
 85 |                *lurc = sum ;
 86 |                }
 87 |             wi += fabs ( sum ) ;
 88 |             if (ai < 1.e-90)
 89 |                ai = biggest ;
 90 |             test = wi / ai ;
 91 |             if (test > wrel)
 92 |                wrel = test ;
 93 |             }  /* if digits (accuracy test) */
 94 | 
 95 |          else {   /* No accuracy test */
 96 |             if (row) {
 97 |                ptr1 = mat_out + row * dim ;
 98 |                ptr2 = mat_out + col ;
 99 |                inner = row ;
100 |                while (inner--) {
101 |                   sum -= *ptr1++  *  *ptr2 ;
102 |                   ptr2 += dim ;
103 |                   }
104 |                *lurc = sum ;
105 |                }
106 |             } /* No accuracy test */
107 |          }  /* for row */
108 | 
109 | /*
110 |    Now compute the diagonal of U and the elements of L below the diagonal
111 | */
112 | 
113 |       p = 0.0 ;
114 | 
115 |       for (row=col ; row<n ; row++, lurc = lurc + dim) {  /* lurc still OK */
116 |          sum = *lurc ;
117 |          if (digits) { /* Accuracy test */
118 |             ai = fabs ( sum ) ;
119 |             wi = 0. ;
120 |             if (col) {
121 |                ptr1 = mat_out + row * dim ;
122 |                ptr2 = mat_out + col ;
123 |                inner = col ;
124 |                while (inner--) {
125 |                   fptemp = *ptr1++  *  *ptr2 ;
126 |                   ptr2 = ptr2 + dim ;
127 |                   sum -= fptemp ;
128 |                   wi += fabs ( fptemp ) ;
129 |                   }
130 |                *lurc = sum ;
131 |                }
132 |             wi += fabs ( sum ) ;
133 |             if (ai < 1.e-90)
134 |                ai = biggest ;
135 |             test = wi / ai ;
136 |             if (test > wrel)
137 |                wrel = test ;
138 |             } /*  if digits (accuracy test) */
139 | 
140 |           else {   /* No accuracy test */
141 |             if (col) {
142 |                ptr1 = mat_out + row * dim ;
143 |                ptr2 = mat_out + col ;
144 |                inner = col ;
145 |                while (inner--) {
146 |                   sum -= *ptr1++  *  *ptr2 ;
147 |                   ptr2 = ptr2 + dim ;
148 |                   }
149 |                *lurc = sum ;
150 |                }
151 |             }  /* no accuracy test */
152 | 
153 |          q = equil[row] * fabs ( sum ) ;
154 |          if (q > p) {
155 |             p = q ;
156 |             rmax = row ;
157 |             }
158 |          }  /* for row */
159 | 
160 |       if ((rn + p) == rn) /* No longer can tell them apart? */
161 |          goto SINGULAR ;
162 | 
163 | /*
164 |    If this row is not the best pivot, interchange for stability
165 | */
166 | 
167 |       if (rmax != col) {
168 |          *det = - *det ;
169 |          ptr1 = mat_out + rmax * dim ;
170 |          ptr2 = mat_out + col * dim ;
171 |          inner = n ;
172 |          while (inner--) {
173 |             fptemp = *ptr1 ;
174 |             *ptr1++ = *ptr2 ;
175 |             *ptr2++ = fptemp ;
176 |             }
177 |          equil[rmax] = equil[col] ;
178 |          }
179 | 
180 | /*
181 |    Final loop housekeeping.  Divide by pivot.
182 | */
183 | 
184 |       pivot[col] = rmax ;
185 |       *det *= (fptemp = mat_out[col * dim + col]) ;
186 |       
187 |       ptr1 = mat_out + (col+1) * dim + col ;
188 |       inner = n - col ;
189 |       while (--inner) {
190 |          *ptr1 /= fptemp ;
191 |          ptr1 += dim ;
192 |          }
193 | 
194 |       }  /* for col */
195 | 
196 | /*
197 |    All done.  Do final accuracy test.
198 | */
199 | 
200 |    if (digits) {
201 |       p = (double) (3 * n + 3) ;
202 |       wa = p * wrel ;
203 |       if (wa + pow ( 10.0 , (double) -digits ) == wa)
204 |          return 1 ;
205 |       }
206 |    return 0 ;
207 | 
208 | SINGULAR:
209 |    *det = 0.0 ;
210 |    return 2 ;
211 | }
212 | 
213 | //----------------------------------------------------------------------------
214 | /*                           ELIM                                           */
215 | /*                                                                          */
216 | /*   Elimination part of equation solution (follows LUdecomp)               */
217 | 
218 | void elim (
219 |    int n ,        // Order of system
220 |    double *lu ,   // LU output of LUdecomp
221 |    int dim ,      // Column dimension of LU in calling program
222 |    double *rhs ,  // Right hand side of system
223 |    int *pivot,    // Pivot output of LUdecomp
224 |    double *x )    // Solution
225 | {
226 |    int ip, row, col, iw ;
227 |    double sum, *ptr1, *ptr2 ;
228 | 
229 | 
230 |    memcpy ( x , rhs , n * sizeof(double) ) ; // Copy right hand side to x
231 | 
232 | /*
233 |    Solve LY = RHS  for Y
234 | */
235 | 
236 |    iw = -1 ;
237 |    for (row=0 ; row<n ; row++) {
238 |       ip = pivot[row] ;
239 |       sum = x[ip] ;
240 |       x[ip] = x[row] ;
241 |       if (iw >= 0) {
242 |          ptr1 = lu + row * dim + iw ;
243 |          ptr2 = x + iw ;
244 |          col = row - iw ;
245 |          while (col--)
246 |             sum -= *ptr1++  *  *ptr2++ ;
247 |          }
248 |       else if (fabs ( sum ) > 1.e-90)
249 |          iw = row ;
250 |       x[row] = sum ;
251 |       }
252 | 
253 | /*
254 |    Now solve UX = Y for X
255 | */
256 | 
257 |    for (row=n-1 ; row>=0 ; row--) {
258 |       sum = x[row] ;
259 |       ptr1 = lu + row * dim + row + 1 ;
260 |       ptr2 = x + row + 1 ;
261 |       col = n - row ;
262 |       while (--col)
263 |          sum -= *ptr1++  *  *ptr2++ ;
264 |       x[row] = sum / lu[row * dim + row] ;
265 |       }
266 | }
267 | 
268 | //----------------------------------------------------------------------------
269 | 
270 | int invert (
271 |    int n ,           // Size of matrix
272 |    double *x ,       // Matrix to be inverted, not changed
273 |    double *xinv ,    // Output of its inverse
274 |    double *det ,     // Determinant
275 |    double *rwork ,   // Work vector n*n + 2*n long
276 |    int *iwork )      // Work vector n long
277 | {
278 |    int i, j, ret_val ;
279 |    double *lu, *equil, *soln ;
280 | 
281 |    lu = rwork ;
282 |    equil = lu + n * n ;
283 |    soln = equil + n ;
284 | 
285 |    ret_val = LUdecomp ( n , x , lu , n , 0 , det , iwork , equil ) ;
286 | 
287 |    if (ret_val)
288 |       return 1 ;
289 | 
290 |    for (i=0 ; i<n ; i++) {  // For each column of inverse
291 |       for (j=0 ; j<n ; j++)
292 |          equil[j] = 0.0 ;
293 |       equil[i] = 1.0 ;
294 |       elim ( n , lu , n , equil , iwork , soln ) ;
295 |       for (j=0 ; j<n ; j++)
296 |          xinv[j*n+i] = soln[j] ;
297 |       }
298 | 
299 |    return 0 ;
300 | }
301 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ﻿Freeware License, some rights reserved
 2 | 
 3 | Copyright (c) 2018 Timothy Masters
 4 | 
 5 | Permission is hereby granted, free of charge, to anyone obtaining a copy 
 6 | of this software and associated documentation files (the "Software"), 
 7 | to work with the Software within the limits of freeware distribution and fair use. 
 8 | This includes the rights to use, copy, and modify the Software for personal use. 
 9 | Users are also allowed and encouraged to submit corrections and modifications 
10 | to the Software for the benefit of other users.
11 | 
12 | It is not allowed to reuse,  modify, or redistribute the Software for 
13 | commercial use in any way, or for a user’s educational materials such as books 
14 | or blog articles without prior permission from the copyright holder. 
15 | 
16 | The above copyright notice and this permission notice need to be included 
17 | in all copies or substantial portions of the software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS OR APRESS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/MI_BIN.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  MI_BIN - Mutual information for binary predicted and predictors           */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | #include <assert.h>
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | #include <math.h>
 11 | #include <conio.h>
 12 | #include <ctype.h>
 13 | #include <stdlib.h>
 14 | 
 15 | extern void free_data ( int nvars , char **names , double *data ) ;
 16 | extern double mutinf_b ( int n , short int *y , short int *x , short int *z ) ;
 17 | extern int readfile ( char *name , int *nvars , char ***names ,
 18 |                       int *ncases , double **data ) ;
 19 | extern void partition ( int n , double *data , int *npart ,
 20 |                         double *bnds , short int *bins ) ;
 21 | extern void qsortdsi ( int first , int last , double *data , int *slave ) ;
 22 | 
 23 | int main (
 24 |    int argc ,    // Number of command line arguments (includes prog name)
 25 |    char *argv[]  // Arguments (prog name is argv[0])
 26 |    )
 27 | 
 28 | {
 29 |    int i, j, k, depzero, indepzero, nvars, ncases, maxkept, ivar, *kept ;
 30 |    int n_indep_vars, idep, icand, iz, ibest, *sortwork, nkept, *last_indices ;
 31 |    double *data, *work, temp, p, error_entropy ;
 32 |    double *save_info, bestcrit ;
 33 |    double criterion, entropy, bound, *crits, *scores ;
 34 |    short int *bins_dep, *bins_indep, *xbins ;
 35 |    char filename[256], **names, depname[256] ;
 36 |    char trial_name[256] ;
 37 |    FILE *fp ;
 38 | 
 39 | /*
 40 |    Process command line parameters
 41 | */
 42 | 
 43 | #if 1
 44 |    if (argc != 7) {
 45 |       printf ( "\nUsage: MI_BIN  datafile  n_indep  depname  depzero  indepzero  maxkept" ) ;
 46 |       printf ( "\n  datafile - name of the text file containing the data" ) ;
 47 |       printf ( "\n             The first line is variable names" ) ;
 48 |       printf ( "\n             Subsequent lines are the data." ) ;
 49 |       printf ( "\n             Delimiters can be space, comma, or tab" ) ;
 50 |       printf ( "\n  n_indep - Number of independent vars, starting with the first" ) ;
 51 |       printf ( "\n  depname - Name of the 'dependent' variable" ) ;
 52 |       printf ( "\n            It must be AFTER the first n_indep variables" ) ;
 53 |       printf ( "\n  depzero - If nonzero, dependent variable is split >0 vs <=0" ) ;
 54 |       printf ( "\n            Else split is by optimal partition" ) ;
 55 |       printf ( "\n  indepzero - Ditto, for independent variables" ) ;
 56 |       printf ( "\n  maxkept - Stepwise will allow at most this many predictors" ) ;
 57 |       return EXIT_FAILURE ;
 58 |       }
 59 | 
 60 |    strcpy ( filename , argv[1] ) ;
 61 |    n_indep_vars = atoi ( argv[2] ) ;
 62 |    strcpy ( depname , argv[3] ) ;
 63 |    depzero = atoi ( argv[4] ) ;
 64 |    indepzero = atoi ( argv[5] ) ;
 65 |    maxkept = atoi ( argv[6] ) ;
 66 | #else
 67 |    strcpy ( filename , "..\\VARS.TXT" ) ;
 68 |    strcpy ( depname , "DAY_RETURN" ) ;
 69 |    n_indep_vars = 8 ;
 70 |    depzero = 1 ;
 71 |    indepzero = 1 ;
 72 |    maxkept = 99 ;
 73 | #endif
 74 | 
 75 |    _strupr ( depname ) ;
 76 | 
 77 | 
 78 | /*
 79 |    Open the text file to which results will be written
 80 | */
 81 | 
 82 |    fp = fopen ( "MI_BIN.LOG" , "wt" ) ;
 83 |    if (fp == NULL) { // Should never happen
 84 |       printf ( "\nCannot open MI_BIN.LOG file for writing!" ) ;
 85 |       return EXIT_FAILURE ;
 86 |       }
 87 | 
 88 | /*
 89 |    Read the file and locate the index of the 'dependent' variable
 90 | */
 91 | 
 92 |    if (readfile ( filename , &nvars , &names , &ncases , &data ))
 93 |       return EXIT_FAILURE ;
 94 | 
 95 |    for (idep=0 ; idep<nvars ; idep++) {
 96 |       if (! strcmp ( depname , names[idep] ))
 97 |          break ;
 98 |       }
 99 | 
100 |    if (idep == nvars) {
101 |       printf ( "\nERROR... Dependent variable %s is not in file", depname ) ;
102 |       return EXIT_FAILURE ;
103 |       }
104 | 
105 |    if (idep < n_indep_vars) {
106 |       printf ( "\nERROR... Dependent variable %s must be beyond independent vars",
107 |                depname ) ;
108 |       return EXIT_FAILURE ;
109 |       }
110 | 
111 | /*
112 |    Allocate scratch memory
113 | 
114 |    bins_dep - Bin ids for the 'dependent' variable
115 |    bins_indep - Bin ids for the 'independent' variables
116 |    kept - Array of indices of variables kept so far
117 |    crits - Ditto, criterion
118 |    scores - Current (regularly updated) min I(Y;X|Z) for choosing best candidate
119 |    last_indices - For each candidate, last index among Zs used to compute scores
120 |    sortwork - Temporary use for printing variable's information sorted
121 |    save_info - Ditto, this is univariate information, to be sorted
122 | */
123 | 
124 |    work = (double *) malloc ( ncases * sizeof(double) ) ;
125 |    assert ( work != NULL ) ;
126 |    bins_dep = (short int *) malloc ( ncases * sizeof(short int) ) ;
127 |    assert ( bins_dep != NULL ) ;
128 |    bins_indep = (short int *) malloc ( ncases * n_indep_vars * sizeof(short int) ) ;
129 |    assert ( bins_indep != NULL ) ;
130 |    kept = (int *) malloc ( n_indep_vars * sizeof(int) ) ;
131 |    assert ( kept != NULL ) ;
132 |    crits = (double *) malloc ( n_indep_vars * sizeof(double) ) ;
133 |    assert ( crits != NULL ) ;
134 |    scores = (double *) malloc ( n_indep_vars * sizeof(double) ) ;
135 |    assert ( scores != NULL ) ;
136 |    last_indices = (int *) malloc ( n_indep_vars * sizeof(int) ) ;
137 |    assert ( last_indices != NULL ) ;
138 |    sortwork = (int *) malloc ( n_indep_vars * sizeof(int) ) ;
139 |    assert ( sortwork != NULL ) ;
140 |    save_info = (double *) malloc ( n_indep_vars * sizeof(double) ) ;
141 |    assert ( save_info != NULL ) ;
142 | 
143 | /*
144 |    Compute the bin membership of all variables.
145 |    If the user requested, we treat the variable as binary (two bins)
146 |    using <=0 and >0 as the definition of bin membership.
147 |    Otherwise we use partition() to do the split.
148 | */
149 | 
150 |    if (depzero) {   // The dependent variable is split at zero
151 |       for (i=0 ; i<ncases ; i++) {
152 |          if (data[i*nvars+idep] > 0.0)
153 |             bins_dep[i] = (short int) 1 ;
154 |          else
155 |             bins_dep[i] = (short int) 0 ;
156 |          }
157 |       fprintf ( fp , "\n%s has been split at zero", names[idep] ) ;
158 |       }
159 |    else {                  // The dependent variable is to be partitioned
160 |       for (i=0 ; i<ncases ; i++)
161 |          work[i] = data[i*nvars+idep] ;
162 |       k = 2 ;
163 |       partition ( ncases , work , &k , NULL , bins_dep ) ;
164 |       fprintf ( fp , "\n%s has been optimally partitioned", names[idep] ) ;
165 |       }
166 | 
167 |    if (indepzero) {   // The independent variable is split at zero
168 |       fprintf ( fp , "\nIndependent variables have been split at zero");
169 |       for (ivar=0 ; ivar<n_indep_vars ; ivar++) {
170 |          for (i=0 ; i<ncases ; i++) {
171 |             if (data[i*nvars+ivar] > 0.0)
172 |                bins_indep[ivar*ncases+i] = (short int) 1 ;
173 |             else
174 |                bins_indep[ivar*ncases+i] = (short int) 0 ;
175 |             }
176 |          }
177 |       }
178 |    else {
179 |       fprintf ( fp , "\nIndependent variables have been given an optimal split");
180 |       for (ivar=0 ; ivar<n_indep_vars ; ivar++) {
181 |          for (i=0 ; i<ncases ; i++)
182 |             work[i] = data[i*nvars+ivar] ;
183 |          k = 2 ;
184 |          partition ( ncases , work , &k , NULL , bins_indep+ivar*ncases ) ;
185 |          }
186 |       }
187 | 
188 | /*
189 |    Compute and save the mutual information for the dependent variable with
190 |    each individual independent variable candidate.  Print the results,
191 |    sort them, and print them again, this time sorted.
192 |    Also compute the error entropy so we can use it for the Fano bound.
193 |    We need to save the criterion of each in save_info because this is the array
194 |    that will be sorted, and we also save it in scores because this is the array
195 |    that will be used for future 'best variable' selection.
196 |    While we're at it, initialize last_indices to -1 for each variable.
197 |    This is explained in the big comment block later.
198 | */
199 | 
200 |    entropy = mutinf_b ( ncases , bins_dep , NULL , NULL ) ;
201 |    fprintf ( fp , "\n\n\nMutual information of %s  (Entropy = %.4lf)",
202 |              depname, entropy ) ;
203 | 
204 |    fprintf ( fp , "\n\nInitial candidates, in order of appearance in data file" ) ;
205 |    fprintf ( fp , "\n" ) ;
206 |    fprintf ( fp , "\n                       Variable   Information   Fano's bound" ) ;
207 | 
208 |    for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
209 |       xbins = bins_indep + icand * ncases ; // This X candidate is here
210 | 
211 |       // Compute the error entropy
212 |       k = 0 ;
213 |       for (i=0 ; i<ncases ; i++) {
214 |          if (bins_dep[i] == xbins[i])
215 |             ++k ;
216 |          }
217 |       if (k > 0  &&  k < ncases) {
218 |          p = (double) k / (double) ncases ;
219 |          error_entropy = -p * log(p) - (1.0 - p) * log(1.0-p) ;
220 |          }
221 |       else
222 |          error_entropy = 0.0 ;
223 | 
224 |       criterion = mutinf_b ( ncases , bins_dep , xbins , NULL ) ;
225 |       bound = (entropy - criterion - error_entropy) / log ( 2.0 ) ;
226 |       if (bound < 0.0)
227 |          bound = 0.0 ;
228 |       printf ( "\n%s = %.5lf  (%.5lf)", names[icand], criterion, bound ) ;
229 |       fprintf ( fp , "\n%31s %11.5lf  %13.5lf", names[icand], criterion, bound ) ;
230 |       sortwork[icand] = icand ;
231 |       scores[icand] = save_info[icand] = criterion ;
232 |       last_indices[icand] = -1 ;
233 |       } // Initial list of all candidates
234 | 
235 | 
236 |    fprintf ( fp , "\n" ) ;
237 |    fprintf ( fp , "\nInitial candidates, in order of decreasing mutual information" ) ;
238 |    fprintf ( fp , "\n" ) ;
239 |    fprintf ( fp , "\n                       Variable   Information" ) ;
240 | 
241 |    qsortdsi ( 0 , n_indep_vars-1 , save_info , sortwork ) ;
242 |    for (icand=0 ; icand<n_indep_vars ; icand++) { // Do all candidates
243 |       k = sortwork[n_indep_vars-1-icand] ;        // Index of sorted candidate
244 |       fprintf ( fp , "\n%31s   %.5lf", names[k], save_info[n_indep_vars-1-icand] ) ;
245 |       }
246 | 
247 | /*
248 |    Initialize the 'kept' set to be the best variable, and then begin the
249 |    main outer loop that adds variables one at a time.
250 | 
251 |    The criterion for picking the best next candidate (we want the max criterion)
252 |    is the minimum value of I(Y;X|Z) across the set of variables kept so far.
253 |    In this expression, Y is the dependent variable, X is the candidate, and
254 |    Z is a member of the kept set.  I(Y;X|Z) is large when X adds information
255 |    about Y above and beyond what Z already adds.  It is small if X adds nothing
256 |    useful.  So by letting Z be each member of the kept set, one at a time,
257 |    and using the minimum I(Y;X|Z) found, we avoid adding a new variable whose
258 |    information is already supplied.
259 | 
260 |    There is a cute trick for avoiding having to check every candidate against
261 |    every Z.  When a new Z is tested in computing the minimum across all Z,
262 |    the minimum obviously cannot increase.  So if the minimum across Z so far
263 |    is already worse than the best candidate criterion so far, there is no
264 |    point in continuing to test more Zs for a candidate.  This candidate has
265 |    already lost the competition for this round.  Of course, we need to keep
266 |    track of, for each candidate, the place where we have stopped testing it
267 |    against Zs.  This is because on a later round of adding a variable, the
268 |    best so far may be small, and a candidate whose testing was stopped early
269 |    on a prior round may need to be tested against more Zs to see if it might
270 |    be the best now.
271 | */
272 | 
273 |    kept[0] = sortwork[n_indep_vars-1] ;   // Index of best single candidate
274 |    crits[0] = save_info[n_indep_vars-1] ; // Its criterion value
275 |    nkept = 1 ;
276 | 
277 |    if (maxkept > n_indep_vars)  // Guard against silly user
278 |       maxkept = n_indep_vars ;
279 | 
280 |    while (nkept < maxkept) {
281 | 
282 |       printf ( "\n\nLatest candidate: %s", names[kept[nkept-1]] ) ;
283 | 
284 |       fprintf ( fp , "\n" ) ;
285 |       fprintf ( fp , "\nVariables so far                 Criterion" ) ;
286 |       for (i=0 ; i<nkept ; i++)
287 |          fprintf ( fp , "\n%31s %10.5lf", names[kept[i]], crits[i] ) ;
288 |       fprintf ( fp , "\n" ) ;
289 |       fprintf ( fp , "\nSearching for an additional candidate..." ) ;
290 |       fprintf ( fp , "\n" ) ;
291 |       fprintf ( fp , "\n                       Variable  Criterion" ) ;
292 | 
293 |       bestcrit = -1.e60 ;
294 |       for (icand=0 ; icand<n_indep_vars ; icand++) { // Try all candidates
295 |          for (i=0 ; i<nkept ; i++) {  // Is this candidate already kept?
296 |             if (kept[i] == icand)
297 |                break ;
298 |             }
299 |          if (i < nkept)  // If this candidate 'icand' is already kept
300 |             continue ;   // Skip it
301 | 
302 |          strcpy ( trial_name , names[icand] ) ;   // Its name for printing
303 |          printf ( "\n  Testing candidate %s  Starting score=%.5lf  Tested thru %d",
304 |                   trial_name, scores[icand], last_indices[icand] ) ;
305 | 
306 |          // Compute I(Y;X|Z) for each Z in the kept set, and keep track of min
307 |          // We've already done them through last_indices[icand], so start
308 |          // with the next one up.  Allow for early exit if icand already loses.
309 |          for (iz=last_indices[icand]+1 ; iz<nkept ; iz++) {
310 |             if (scores[icand] <= bestcrit) // Has this candidate already lost?
311 |                break ;                     // If so, no need to keep doing Zs
312 |             j = kept[iz] ;                 // Index of variable in the kept set
313 |             temp = mutinf_b ( ncases , bins_dep , bins_indep + icand * ncases ,
314 |                               bins_indep + j * ncases ) ; // I(Y;X|Z)
315 |             if (temp < scores[icand])
316 |                scores[icand] = temp ;
317 |             last_indices[icand] = iz ;
318 |             printf ( "\n    With kept %s I(Y;X|Z)=%.5lf  score=%.5lf",
319 |                      names[j], temp, scores[icand] ) ;
320 |             } // For all kept variables, computing min conditional mutual information
321 | 
322 |          criterion = scores[icand] ;
323 |          printf ( "\n  %s = %.5lf", trial_name, criterion ) ;
324 |          fprintf ( fp , "\n%31s %10.5lf", trial_name, criterion ) ;
325 | 
326 |          if (criterion > bestcrit) { // Did we just set a new record?
327 |             bestcrit = criterion ;   // If so, update the record
328 |             ibest = icand ;          // Keep track of the winning candidate
329 |             }
330 | 
331 |          } // For all candidates
332 | 
333 |       // We now have the best candidate
334 |       if (bestcrit <= 0.0)
335 |          break ;
336 |       kept[nkept] = ibest ;
337 |       crits[nkept] = bestcrit ;
338 |       printf ( "\nAdded %s = %.5lf", names[ibest], bestcrit ) ;
339 |       ++nkept ;
340 |       } // While adding new variables
341 | 
342 |    fprintf ( fp , "\n" ) ;
343 |    fprintf ( fp , "\nFinal set                        Criterion" ) ;
344 |    for (i=0 ; i<nkept ; i++)
345 |       fprintf ( fp , "\n%31s %10.5lf", names[kept[i]], crits[i] ) ;
346 | 
347 | 
348 |    fclose ( fp ) ;
349 |    free ( work ) ;
350 |    free ( bins_dep ) ;
351 |    free ( bins_indep ) ;
352 |    free ( kept ) ;
353 |    free ( crits ) ;
354 |    free ( scores ) ;
355 |    free ( last_indices ) ;
356 |    free ( sortwork ) ;
357 |    free ( save_info ) ;
358 |    free_data ( nvars , names , data ) ;
359 |    printf ( "\n\nPress any key..." ) ;
360 |    _getch () ;
361 |    return EXIT_SUCCESS ;
362 | }
363 | 


--------------------------------------------------------------------------------
/MUTINF_B.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  MutInf_B - Mutual information for binary data                             */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | #include <assert.h>
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | #include <math.h>
 11 | #include <stdlib.h>
 12 | 
 13 | double mutinf_b (
 14 |    int n ,         // Number of cases
 15 |    short int *y ,  // The 'dependent' variable
 16 |    short int *x ,  // The 'independent' variable; NULL to compute H(Y)
 17 |    short int *z )  // NULL to compute I(X;Y), z to compute I(X;Y|Z)
 18 | {
 19 |    int i, nx0, nx1, ny0, ny1, nz0, nz1, n00, n01, n10, n11 ;
 20 |    int  n000, n010, n100, n110, n001, n011, n101, n111 ;
 21 |    double p, HX, HY, HZ, HXY, HYZ, HXZ, HXYZ ;
 22 | 
 23 | /*
 24 | --------------------------------------------------------------------------------
 25 | 
 26 |    Compute the entropy of Y
 27 | 
 28 | --------------------------------------------------------------------------------
 29 | */
 30 | 
 31 |    if (x == NULL) {
 32 |       ny1 = 0 ;
 33 |       for (i=0 ; i<n ; i++) {
 34 |          if (y[i])
 35 |             ++ny1 ;
 36 |          }
 37 |       ny0 = n - ny1 ;
 38 |       // Compute the entropy of Y
 39 |       if (ny0) {
 40 |          p = (double) ny0 / (double) n ;
 41 |          HY = p * log ( p ) ;
 42 |          }
 43 |       else
 44 |          HY = 0.0 ;
 45 |       if (ny1) {
 46 |          p = (double) ny1 / (double) n ;
 47 |          HY += p * log ( p ) ;
 48 |          }
 49 |       return -HY ;
 50 |       }
 51 | /*
 52 | --------------------------------------------------------------------------------
 53 | 
 54 |    Compute the joint entropy I(X;Y)
 55 | 
 56 | --------------------------------------------------------------------------------
 57 | */
 58 | 
 59 |    if (z == NULL) {
 60 |       n01 = n10 = n11 = 0 ;
 61 |       for (i=0 ; i<n ; i++) {
 62 |          if (x[i]) {
 63 |             if (y[i])
 64 |                ++n11 ;
 65 |             else
 66 |                ++n10 ;
 67 |             }
 68 |          else {
 69 |             if (y[i])
 70 |                ++n01 ;
 71 |             }
 72 |          }
 73 |       n00 = n - n01 - n10 - n11 ;
 74 |       // Compute the marginals
 75 |       nx0 = n00 + n01 ;
 76 |       nx1 = n10 + n11 ;
 77 |       ny0 = n00 + n10 ;
 78 |       ny1 = n01 + n11 ;
 79 |       // Compute the entropy of X
 80 |       if (nx0) {
 81 |          p = (double) nx0 / (double) n ;
 82 |          HX = p * log ( p ) ;
 83 |          }
 84 |       else
 85 |          HX = 0.0 ;
 86 |       if (nx1) {
 87 |          p = (double) nx1 / (double) n ;
 88 |          HX += p * log ( p ) ;
 89 |          }
 90 | 
 91 |       // Compute the entropy of Y
 92 |       if (ny0) {
 93 |          p = (double) ny0 / (double) n ;
 94 |          HY = p * log ( p ) ;
 95 |          }
 96 |       else
 97 |          HY = 0.0 ;
 98 |       if (ny1) {
 99 |          p = (double) ny1 / (double) n ;
100 |          HY += p * log ( p ) ;
101 |          }
102 | 
103 |       // Compute the joint entropy H(X,Y)
104 |       if (n00) {
105 |          p = (double) n00 / (double) n ;
106 |          HXY = p * log ( p ) ;
107 |          }
108 |       else
109 |          HXY = 0.0 ;
110 |       if (n01) {
111 |          p = (double) n01 / (double) n ;
112 |          HXY += p * log ( p ) ;
113 |          }
114 |       if (n10) {
115 |          p = (double) n10 / (double) n ;
116 |          HXY += p * log ( p ) ;
117 |          }
118 |       if (n11) {
119 |          p = (double) n11 / (double) n ;
120 |          HXY += p * log ( p ) ;
121 |          }
122 | 
123 |       return HXY - HX - HY ;
124 |       } // If z==NULL... want I(X;Y)
125 | 
126 | /*
127 | --------------------------------------------------------------------------------
128 | 
129 |    Compute the joint conditional entropy I(X;Y|Z)
130 | 
131 | --------------------------------------------------------------------------------
132 | */
133 | 
134 |    else {
135 |       n000 = n001 = n010 = n011 = n100 = n101 = n110 = n111 = 0 ;
136 |       for (i=0 ; i<n ; i++) {
137 |          if (x[i]) {
138 |             if (y[i]) {
139 |                if (z[i])
140 |                   ++n111 ;
141 |                else
142 |                   ++n110 ;
143 |                }
144 |             else {
145 |                if (z[i])
146 |                   ++n101 ;
147 |                else
148 |                   ++n100 ;
149 |                }
150 |             }
151 |          else {
152 |             if (y[i]) {
153 |                if (z[i])
154 |                   ++n011 ;
155 |                else
156 |                   ++n010 ;
157 |                }
158 |             else {
159 |                if (z[i])
160 |                   ++n001 ;
161 |                else
162 |                   ++n000 ;
163 |                }
164 |             }
165 |          }
166 |       // Compute the entropy of Z
167 |       nz0 = n000 + n010 + n100 + n110 ;
168 |       nz1 = n - nz0 ;
169 |       if (nz0) {
170 |          p = (double) nz0 / (double) n ;
171 |          HZ = p * log ( p ) ;
172 |          }
173 |       else
174 |          HZ = 0.0 ;
175 |       if (nz1) {
176 |          p = (double) nz1 / (double) n ;
177 |          HZ += p * log ( p ) ;
178 |          }
179 | 
180 |       // Compute the joint entropy H(X,Z)
181 |       n00 = n000 + n010 ;
182 |       n01 = n001 + n011 ;
183 |       n10 = n100 + n110 ;
184 |       n11 = n101 + n111 ;
185 |       if (n00) {
186 |          p = (double) n00 / (double) n ;
187 |          HXZ = p * log ( p ) ;
188 |          }
189 |       else
190 |          HXZ = 0.0 ;
191 |       if (n01) {
192 |          p = (double) n01 / (double) n ;
193 |          HXZ += p * log ( p ) ;
194 |          }
195 |       if (n10) {
196 |          p = (double) n10 / (double) n ;
197 |          HXZ += p * log ( p ) ;
198 |          }
199 |       if (n11) {
200 |          p = (double) n11 / (double) n ;
201 |          HXZ += p * log ( p ) ;
202 |          }
203 | 
204 |       // Compute the joint entropy H(Y,Z)
205 |       n00 = n000 + n100 ;
206 |       n01 = n001 + n101 ;
207 |       n10 = n010 + n110 ;
208 |       n11 = n011 + n111 ;
209 |       if (n00) {
210 |          p = (double) n00 / (double) n ;
211 |          HYZ = p * log ( p ) ;
212 |          }
213 |       else
214 |          HYZ = 0.0 ;
215 |       if (n01) {
216 |          p = (double) n01 / (double) n ;
217 |          HYZ += p * log ( p ) ;
218 |          }
219 |       if (n10) {
220 |          p = (double) n10 / (double) n ;
221 |          HYZ += p * log ( p ) ;
222 |          }
223 |       if (n11) {
224 |          p = (double) n11 / (double) n ;
225 |          HYZ += p * log ( p ) ;
226 |          }
227 | 
228 |    // Compute the joint entropy H(X,Y,Z)
229 |       if (n000) {
230 |          p = (double) n000 / (double) n ;
231 |          HXYZ = p * log ( p ) ;
232 |          }
233 |       else
234 |          HXYZ = 0.0 ;
235 |       if (n001) {
236 |          p = (double) n001 / (double) n ;
237 |          HXYZ += p * log ( p ) ;
238 |          }
239 |       if (n010) {
240 |          p = (double) n010 / (double) n ;
241 |          HXYZ += p * log ( p ) ;
242 |          }
243 |       if (n011) {
244 |          p = (double) n011 / (double) n ;
245 |          HXYZ += p * log ( p ) ;
246 |          }
247 |       if (n100) {
248 |          p = (double) n100 / (double) n ;
249 |          HXYZ += p * log ( p ) ;
250 |          }
251 |       if (n101) {
252 |          p = (double) n101 / (double) n ;
253 |          HXYZ += p * log ( p ) ;
254 |          }
255 |       if (n110) {
256 |          p = (double) n110 / (double) n ;
257 |          HXYZ += p * log ( p ) ;
258 |          }
259 |       if (n111) {
260 |          p = (double) n111 / (double) n ;
261 |          HXYZ += p * log ( p ) ;
262 |          }
263 |       }
264 | 
265 |    return HZ + HXYZ - HXZ - HYZ ;
266 | }
267 | 


--------------------------------------------------------------------------------
/MUTINF_D.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  MutInf_D - Mutual information for discrete data                           */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | #include <assert.h>
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | #include <math.h>
 11 | #include <stdlib.h>
 12 | 
 13 | 
 14 | class MutualInformationDiscrete {
 15 | 
 16 | public:
 17 |    MutualInformationDiscrete ( int nc , short int *bins ) ;
 18 |    ~MutualInformationDiscrete () ;
 19 |    double entropy () ;
 20 |    double mut_inf ( short int *bins ) ;
 21 |    double conditional ( short int *bins ) ;
 22 |    double conditional_error ( short int *bins ) ;
 23 |    double HYe ( short int *bins ) ;
 24 |    double hPe ( short int *bins ) ;
 25 | 
 26 | private:
 27 |    int ncases ;         // Number of cases
 28 |    short int *bins_y ;  // They are here
 29 |    int nbins_y ;        // Number of bins
 30 |    int *marginal_y ;    // Marginal distribution
 31 | } ;
 32 | 
 33 | 
 34 | /*
 35 | --------------------------------------------------------------------------------
 36 | 
 37 |    MutualInformationDiscrete - Constructor and destructor
 38 | 
 39 | --------------------------------------------------------------------------------
 40 | */
 41 | 
 42 | 
 43 | MutualInformationDiscrete::MutualInformationDiscrete (
 44 |    int nc ,      // Number of cases
 45 |    short int *bins )   // They are here (y, the 'dependent' variable)
 46 | {
 47 |    int i ;
 48 | 
 49 | /*
 50 |    Keep a local copy of the bins
 51 | */
 52 | 
 53 |    ncases = nc ;
 54 | 
 55 |    bins_y = (short int *) malloc ( ncases * sizeof(short int) ) ;
 56 | 
 57 |    memcpy ( bins_y , bins , ncases * sizeof(short int) ) ;
 58 | 
 59 | /*
 60 |    Compute the number of bins, and then compute and save the marginal distribution
 61 | */
 62 | 
 63 |    nbins_y = 0 ;
 64 |    for (i=0 ; i<ncases ; i++) {
 65 |       if (bins_y[i] > nbins_y)
 66 |          nbins_y = bins_y[i] ;
 67 |       }
 68 |    ++nbins_y ;  // Number of bins is one greater than max bin because org=0
 69 | 
 70 |    marginal_y = (int *) malloc ( nbins_y * sizeof(int) ) ;
 71 |    assert (marginal_y != NULL) ;
 72 | 
 73 |    for (i=0 ; i<nbins_y ; i++)
 74 |       marginal_y[i] = 0 ;
 75 | 
 76 |    for (i=0 ; i<ncases ; i++)
 77 |       ++marginal_y[bins_y[i]] ;
 78 | }
 79 | 
 80 | MutualInformationDiscrete::~MutualInformationDiscrete ()
 81 | {
 82 |    free ( bins_y ) ;
 83 |    free ( marginal_y ) ;
 84 | }
 85 | 
 86 | /*
 87 | --------------------------------------------------------------------------------
 88 | 
 89 |    entropy() - Compute the entropy of Y, the 'dependent' variable
 90 | 
 91 | --------------------------------------------------------------------------------
 92 | */
 93 | 
 94 | double MutualInformationDiscrete::entropy ()
 95 | {
 96 |    int i ;
 97 |    double p, ent ;
 98 | 
 99 |    ent = 0.0 ;
100 |    for (i=0 ; i<nbins_y ; i++) {
101 |       if (marginal_y[i] > 0) {
102 |          p = (double) marginal_y[i] / ncases ;
103 |          ent += p * log ( p ) ;
104 |          }
105 |       }
106 |    return -ent ;
107 | }
108 | 
109 | /*
110 | --------------------------------------------------------------------------------
111 | 
112 |    conditional ( bins_x ) - Compute the conditional entropy of Y given X
113 | 
114 | --------------------------------------------------------------------------------
115 | */
116 | 
117 | double MutualInformationDiscrete::conditional ( short int *bins_x )
118 | {
119 |    int i, ix, iy, nbins_x, *grid, *marginal_x ;
120 |    double CI, pyx, cix ;
121 | 
122 | /*
123 |    Compute the number of bins
124 | */
125 | 
126 |    nbins_x = 0 ;
127 |    for (i=0 ; i<ncases ; i++) {
128 |       if (bins_x[i] > nbins_x)
129 |          nbins_x = bins_x[i] ;
130 |       }
131 |    ++nbins_x ;  // Number of bins is one greater than max bin because org=0
132 | 
133 | /*
134 |    Compute the marginal of x and the counts in the nbins_x by nbins_y grid
135 | */
136 | 
137 |    marginal_x = (int *) malloc ( nbins_x * sizeof(int) ) ;
138 | 
139 |    grid = (int *) malloc ( nbins_x * nbins_y * sizeof(int) ) ;
140 | 
141 |    for (ix=0 ; ix<nbins_x ; ix++) {
142 |       marginal_x[ix] = 0 ;
143 |       for (iy=0 ; iy<nbins_y ; iy++)
144 |          grid[ix*nbins_y+iy] = 0 ;
145 |       }
146 | 
147 |    for (i=0 ; i<ncases ; i++) {
148 |       ix = bins_x[i] ;
149 |       ++marginal_x[ix] ;
150 |       ++grid[ix*nbins_y+bins_y[i]] ;
151 |       }
152 | 
153 | /*
154 |    Compute the conditional entropy
155 | */
156 | 
157 |    CI = 0.0 ;
158 |    for (ix=0 ; ix<nbins_x ; ix++) {
159 |       if (marginal_x[ix] > 0) {
160 |          cix = 0.0 ;
161 |          for (iy=0 ; iy<nbins_y ; iy++) {
162 |             pyx = (double) grid[ix*nbins_y+iy] / (double) marginal_x[ix] ;
163 |             if (pyx > 0.0)
164 |                cix += pyx * log ( pyx ) ;
165 |             }
166 |          }
167 |       CI += cix * marginal_x[ix] / ncases ;
168 |       }
169 | 
170 |    free ( marginal_x ) ;
171 |    free ( grid ) ;
172 | 
173 |    return -CI ;
174 | }
175 | 
176 | /*
177 | --------------------------------------------------------------------------------
178 | 
179 |    mut_inf ( bins_x ) - Compute the mutual information I(X;Y)
180 | 
181 | --------------------------------------------------------------------------------
182 | */
183 | 
184 | double MutualInformationDiscrete::mut_inf ( short int *bins_x )
185 | {
186 |    int i, j, ix, nbins_x, *grid, *marginal_x ;
187 |    double MI, px, py, pxy ;
188 | 
189 | /*
190 |    Compute the number of bins
191 | */
192 | 
193 |    nbins_x = 0 ;
194 |    for (i=0 ; i<ncases ; i++) {
195 |       if (bins_x[i] > nbins_x)
196 |          nbins_x = bins_x[i] ;
197 |       }
198 |    ++nbins_x ;  // Number of bins is one greater than max bin because org=0
199 | 
200 | /*
201 |    Compute the marginal of x and the counts in the nbins_x by nbins_y grid
202 | */
203 | 
204 |    marginal_x = (int *) malloc ( nbins_x * sizeof(int) ) ;
205 |    assert (marginal_x != NULL) ;
206 | 
207 |    grid = (int *) malloc ( nbins_x * nbins_y * sizeof(int) ) ;
208 |    assert ( grid != NULL ) ;
209 | 
210 |    for (i=0 ; i<nbins_x ; i++) {
211 |       marginal_x[i] = 0 ;
212 |       for (j=0 ; j<nbins_y ; j++)
213 |          grid[i*nbins_y+j] = 0 ;
214 |       }
215 | 
216 |    for (i=0 ; i<ncases ; i++) {
217 |       ix = bins_x[i] ;
218 |       ++marginal_x[ix] ;
219 |       ++grid[ix*nbins_y+bins_y[i]] ;
220 |       }
221 | 
222 | /*
223 |    Compute the mutual information
224 | */
225 | 
226 |    MI = 0.0 ;
227 |    for (i=0 ; i<nbins_x ; i++) {
228 |       px = (double) marginal_x[i] / (double) ncases ;
229 |       for (j=0 ; j<nbins_y ; j++) {
230 |          py = (double) marginal_y[j] / (double) ncases ;
231 |          pxy = (double) grid[i*nbins_y+j] / (double) ncases ;
232 |          if (pxy > 0.0)
233 |             MI += pxy * log ( pxy / (px * py) ) ;
234 |          }
235 |       }
236 | 
237 |    free ( marginal_x ) ;
238 |    free ( grid ) ;
239 | 
240 |    return MI ;
241 | }
242 | 
243 | /*
244 | --------------------------------------------------------------------------------
245 | 
246 |    hPe ( bins_x ) - Compute the Shannon entropy of the probability of error
247 |                     This only makes sense if X and Y have the same number of
248 |                     bins, and the bin of X is a prediction of the bin of Y.
249 | 
250 | --------------------------------------------------------------------------------
251 | */
252 | 
253 | double MutualInformationDiscrete::hPe ( short int *bins_x )
254 | {
255 |    int i, err ;
256 |    double p ;
257 | 
258 |    err = 0 ;
259 |    for (i=0 ; i<ncases ; i++) {
260 |       if (bins_x[i] != bins_y[i])
261 |          ++err ;
262 |       }
263 | 
264 |    if (err == 0  ||  err == ncases)
265 |       return 0.0 ;
266 | 
267 |    p = (double) err / (double) ncases ;
268 |    return -p * log ( p ) - (1.0 - p) * log ( 1.0 - p ) ;
269 | }
270 | 
271 | /*
272 | --------------------------------------------------------------------------------
273 | 
274 |    conditional_error ( bins_x ) - Compute the conditional error entropy given X
275 | 
276 | --------------------------------------------------------------------------------
277 | */
278 | 
279 | double MutualInformationDiscrete::conditional_error ( short int *bins_x )
280 | {
281 |    int i, ix, nbins_x, *error_count, *marginal_x ;
282 |    double CI, pyx ;
283 | 
284 | /*
285 |    Compute the number of bins
286 | */
287 | 
288 |    nbins_x = 0 ;
289 |    for (i=0 ; i<ncases ; i++) {
290 |       if (bins_x[i] > nbins_x)
291 |          nbins_x = bins_x[i] ;
292 |       }
293 |    ++nbins_x ;  // Number of bins is one greater than max bin because org=0
294 | 
295 | /*
296 |    Compute the marginal of x and the error counts
297 | */
298 | 
299 |    marginal_x = (int *) malloc ( nbins_x * sizeof(int) ) ;
300 |    assert (marginal_x != NULL) ;
301 | 
302 |    error_count = (int *) malloc ( nbins_x * sizeof(int) ) ;
303 |    assert ( error_count != NULL ) ;
304 | 
305 |    for (ix=0 ; ix<nbins_x ; ix++) {
306 |       marginal_x[ix] = 0 ;
307 |       error_count[ix] = 0 ;
308 |       }
309 | 
310 |    for (i=0 ; i<ncases ; i++) {
311 |       ix = bins_x[i] ;
312 |       ++marginal_x[ix] ;
313 |       if (bins_y[i] != ix)
314 |          ++error_count[ix] ;
315 |       }
316 | 
317 | /*
318 |    Compute the conditional error entropy
319 | */
320 | 
321 |    CI = 0.0 ;
322 |    for (ix=0 ; ix<nbins_x ; ix++) {
323 |       if (error_count[ix] > 0  &&  error_count[ix] < marginal_x[ix]) {
324 |          pyx = (double) error_count[ix] / (double) marginal_x[ix] ;
325 |          CI += (pyx * log(pyx) + (1.0-pyx) * log(1.0-pyx)) * marginal_x[ix] / ncases ;
326 |          }
327 |       }
328 | 
329 |    free ( marginal_x ) ;
330 |    free ( error_count ) ;
331 | 
332 |    return -CI ;
333 | }
334 | 
335 | /*
336 | --------------------------------------------------------------------------------
337 | 
338 |    HYe ( bins_x ) - Compute the minimum (over bins of X) conditional entropy
339 |                     H(Y|error,X).  In other words, for each X bin,compute the
340 |                     conditional entropy of Y given that this X is an incorrect
341 |                     decision.  Return the minimum of this value across X bins.
342 |                     This only makes sense if X and Y have the same number of
343 |                     bins, and the bin of X is a prediction of the bin of Y.
344 | 
345 | --------------------------------------------------------------------------------
346 | */
347 | 
348 | double MutualInformationDiscrete::HYe ( short int *bins_x )
349 | {
350 |    int i, ix, iy, nbins_x, nerr, *grid, *marginal_x ;
351 |    double minCI, pyx, cix ;
352 | 
353 | /*
354 |    Compute the number of bins
355 | */
356 | 
357 |    nbins_x = 0 ;
358 |    for (i=0 ; i<ncases ; i++) {
359 |       if (bins_x[i] > nbins_x)
360 |          nbins_x = bins_x[i] ;
361 |       }
362 |    ++nbins_x ;  // Number of bins is one greater than max bin because org=0
363 | 
364 | /*
365 |    This algorithm makes sense only if nbins_x equals nbins_y.
366 |    Return an error flag that will get the user's attention if this is violated.
367 | */
368 | 
369 |    if (nbins_x != nbins_y)
370 |       return -1.e60 ;
371 | 
372 | /*
373 |    Compute the marginal of x and the counts in the nbins_x by nbins_y grid
374 | */
375 | 
376 |    marginal_x = (int *) malloc ( nbins_x * sizeof(int) ) ;
377 |    assert (marginal_x != NULL) ;
378 | 
379 |    grid = (int *) malloc ( nbins_x * nbins_y * sizeof(int) ) ;
380 |    assert ( grid != NULL ) ;
381 | 
382 |    for (ix=0 ; ix<nbins_x ; ix++) {
383 |       marginal_x[ix] = 0 ;
384 |       for (iy=0 ; iy<nbins_y ; iy++)
385 |          grid[ix*nbins_y+iy] = 0 ;
386 |       }
387 | 
388 |    for (i=0 ; i<ncases ; i++) {
389 |       ix = bins_x[i] ;
390 |       ++marginal_x[ix] ;
391 |       ++grid[ix*nbins_y+bins_y[i]] ;
392 |       }
393 | 
394 | /*
395 |    Compute the minimum entropy, conditional on error and each X
396 |    Note that the computation in the inner loop is almost the same as in the
397 |    conditional entropy.  The only difference is that since we are also
398 |    conditioning on the classification being in error, we must remove from
399 |    the X marginal the diagonal element, which is the correct decision.
400 |    The outer loop looks for the minimum, rather than summing.
401 | */
402 | 
403 |    minCI = 1.e60 ;
404 |    for (ix=0 ; ix<nbins_x ; ix++) {
405 |       nerr = marginal_x[ix] - grid[ix*nbins_y+ix] ; // Marginal that is in error
406 |       if (nerr > 0) {
407 |          cix = 0.0 ;
408 |          for (iy=0 ; iy<nbins_y ; iy++) {
409 |             if (iy == ix)  // This is the correct decision
410 |                continue ;  // So we exclude it; we are summing over errors
411 |             pyx = (double) grid[ix*nbins_y+iy] / (double) nerr ;
412 |             if (pyx > 0.0)
413 |                cix -= pyx * log ( pyx ) ;
414 |             }
415 |          if (cix < minCI)
416 |             minCI = cix ;
417 |          }
418 |       }
419 | 
420 |    free ( marginal_x ) ;
421 |    free ( grid ) ;
422 | 
423 |    return minCI ;
424 | }
425 | 


--------------------------------------------------------------------------------
/PART.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  PART - Partition an array into roughly equal size bins, avoiding ties     */
  4 | /*                                                                            */
  5 | /*  I make no special claims of optimality for this algorithm, largely        */
  6 | /*  because there is no single optimality criterion!  All algorithms involve  */
  7 | /*  tradeoffs.  However, I am reasonably certain that this algorithm has two  */
  8 | /*  valuable properties:                                                      */
  9 | /*                                                                            */
 10 | /*  1) If the user inputs npart at least as large as the number of distinct   */
 11 | /*     values in the dataset, npart will be returned equal to the number of   */
 12 | /*     distinct values in the dataset, and each bin will correspond exactly   */
 13 | /*     to a distinct value.                                                   */
 14 | /*                                                                            */
 15 | /*  2) If the data has few or no ties, and the user inputs npart much less    */
 16 | /*     than n, the dataset will be partitioned into npart bins, all of which  */
 17 | /*     have equal or very nearly equal size.                                  */
 18 | /*                                                                            */
 19 | /******************************************************************************/
 20 | 
 21 | #include <assert.h>
 22 | #include <stdio.h>
 23 | #include <string.h>
 24 | #include <math.h>
 25 | #include <stdlib.h>
 26 | 
 27 | extern void qsortdsi ( int first , int last , double *data , int *slave ) ;
 28 | 
 29 | void partition (
 30 |    int n ,         // Input: Number of cases in the data array
 31 |    double *data ,  // Input: The data array
 32 |    int *npart ,    // Input/Output: Number of partitions to find; Returned as
 33 |                    // actual number of partitions, which happens if massive ties
 34 |    double *bnds ,  // Output: Upper bound (inclusive) of each partition
 35 |                    // If the user inputs this NULL, bounds are not returned
 36 |    short int *bins // Output: Bin id (0 through npart-1) for each case
 37 |    )
 38 | {
 39 |    int i, j, k, np, *ix, *indices, *bin_end, ibound, tie_found ;
 40 |    int istart, istop, nleft, nright, nbest, ibound_best, isplit_best ;
 41 |    double *x ;
 42 | 
 43 |    if (*npart > n)  // Defend against a careless user
 44 |       *npart = n ;
 45 | 
 46 |    np = *npart ;    // Will be number of partitions
 47 | 
 48 |    x = (double *) malloc ( n * sizeof(double) ) ;
 49 |    ix = (int *) malloc ( n * sizeof(int) ) ;
 50 |    indices = (int *) malloc ( n * sizeof(int) ) ;
 51 |    bin_end = (int *) malloc ( np * sizeof(int) ) ;
 52 | 
 53 | /*
 54 |    Sort the data and compute an integer rank array that identifies ties.
 55 |    We could use the x array, but the code later will run faster if it can
 56 |    work with integers instead of reals.
 57 |    Also keep the indices of the original data points, as we will need this
 58 |    information at the end of this code to assign cases to bins.
 59 | */
 60 | 
 61 |    for (i=0 ; i<n ; i++) {
 62 |       x[i] = data[i] ;
 63 |       indices[i] = i ;
 64 |       }
 65 | 
 66 |    qsortdsi ( 0 , n-1 , x , indices ) ;
 67 | 
 68 |    ix[0] = k = 0 ;
 69 |    for (i=1 ; i<n ; i++) {
 70 |       if (x[i] - x[i-1] >= 1.e-12 * (1.0 + fabs(x[i]) + fabs(x[i-1])))
 71 |          ++k ;     // If not a tie, advance the counter of unique values
 72 |       ix[i] = k ;
 73 |       }
 74 | 
 75 | /*
 76 |    Compute initial bounds based strictly on equal number of cases in each bin.
 77 |    Ignore ties for now.
 78 | */
 79 | 
 80 |    k = 0 ;                              // Will be start of next bin up
 81 |    for (i=0 ; i<np ; i++) {             // For all partitions
 82 |       j = (n - k) / (np - i) ;          // Number of cases in this partition
 83 |       k += j ;                          // Advance the index of next one up
 84 |       bin_end[i] = k-1 ;                // Store upper bound of this bin
 85 |       }
 86 | 
 87 | /*
 88 |    If the data has no ties, we are done.  But if there are ties, we must iterate
 89 |    until no partition boundary splits a tie.
 90 |    Note that the upper bound of the last partition is always the last case
 91 |    in the sorted array, so we don't need to worry about it splitting a tie.
 92 |    There are no cases above it!  All we care about are the np-1 internal
 93 |    boundaries.
 94 | */
 95 | 
 96 |    for (;;) {
 97 | 
 98 |       tie_found = 0 ;
 99 | 
100 |       for (ibound=0 ; ibound<np-1 ; ibound++) {
101 |          if (ix[bin_end[ibound]] == ix[bin_end[ibound]+1]) { // Splits a tie?
102 |             // This bound splits a tie.  Remove this bound.
103 |             for (i=ibound+1 ; i<np ; i++)
104 |                bin_end[i-1] = bin_end[i] ;
105 |             --np ;
106 |             tie_found = 1 ;
107 |             break ;
108 |             }
109 |          } // For all bounds, looking for a split across a tie
110 | 
111 |       if (! tie_found)  // If we got all the way through the loop
112 |          break ;        // without finding a bad bound, we are done
113 | 
114 |       // The offending bound is now gone.  Try splitting each remaining
115 |       // bin.  For each split, check the size of the smaller resulting bin.
116 |       // Choose the split that gives the largest of the smaller.
117 |       // Note that np has been decremented, so now np < *npart.
118 | 
119 |       istart = 0 ;
120 |       nbest = -1 ;
121 |       for (ibound=0 ; ibound<np ; ibound++) {
122 |          istop = bin_end[ibound] ;
123 |          // Now processing a bin from istart through istop, inclusive
124 |          for (i=istart ; i<istop ; i++) { // Try all possible splits of this bin
125 |             if (ix[i] == ix[i+1])         // If this splits a tie
126 |                continue ;                 // Don't check
127 |             nleft = i - istart + 1 ;      // Number of cases in left half
128 |             nright = istop - i ;          // And right half
129 |             if (nleft < nright) {
130 |                if (nleft > nbest) {
131 |                   nbest = nleft ;
132 |                   ibound_best = ibound ;
133 |                   isplit_best = i ;
134 |                   }
135 |                }
136 |             else {
137 |                if (nright > nbest) {
138 |                   nbest = nright ;
139 |                   ibound_best = ibound ;
140 |                   isplit_best = i ;
141 |                   }
142 |                }
143 |             }
144 |          istart = istop + 1 ;
145 |          } // For all bounds, looking for the best bin to split
146 | 
147 |       // The search is done.  It may (rarely) be the case that no further
148 |       // splits are possible.  This will happen if the user requests more
149 |       // partitions than there are unique values in the dataset.
150 |       // We know that this has happened if nbest is still -1.  In this case
151 |       // we (obviously) cannot do a split to make up for the one lost above.
152 | 
153 |       if (nbest < 0)
154 |          continue ;
155 | 
156 |       // We get here when the best split of an existing partition has been
157 |       // found.  Save it.  The bin that we are splitting is ibound_best,
158 |       // and the split for a new bound is at isplit_best.
159 | 
160 |       for (ibound=np-1 ; ibound>=ibound_best ; ibound--)
161 |          bin_end[ibound+1] = bin_end[ibound] ;
162 |       bin_end[ibound_best] = isplit_best ;
163 |       ++np ;
164 | 
165 |       } // Endless search loop
166 | 
167 | /*
168 |    The partition bounds are found.
169 |    Return them to the user if requested.
170 | */
171 | 
172 |    *npart = np ;   // Return the final number of partitions
173 | 
174 |    if (bnds != NULL) {  // Does the user want the boundary values?
175 |       for (ibound=0 ; ibound<np ; ibound++)
176 |          bnds[ibound] = x[bin_end[ibound]] ;
177 |       }
178 | 
179 | /*
180 |    Return the bin membership of each case in the dataset
181 | */
182 | 
183 |    istart = 0 ;                            // The current bin starts here
184 |    for (ibound=0 ; ibound<np ; ibound++) { // Process all bins
185 |       istop = bin_end[ibound] ;            // Inclusive end of this bin
186 |       for (i=istart ; i<=istop ; i++)
187 |          bins[indices[i]] = (short int) ibound ;
188 |       istart = istop + 1 ;
189 |       }
190 | 
191 |    free ( x ) ;
192 |    free ( ix ) ;
193 |    free ( indices ) ;
194 |    free ( bin_end ) ;
195 | }
196 | 


--------------------------------------------------------------------------------
/PARZDENS.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  PARZDENS - ParzDens_? used for mutual information continuous method       */
  4 | /*                                                                            */
  5 | /*  These routines transform the raw input data to a normal distribution,     */
  6 | /*  so these density estimators are NOT suitable for general use.             */
  7 | /*  They are for use by integration routines for computing mutual information.*/
  8 | /*  For general use, remove the normal transformation and compute scale       */
  9 | /*  factors appropriately.                                                    */
 10 | /*                                                                            */
 11 | /******************************************************************************/
 12 | 
 13 | #include <assert.h>
 14 | #include <stdio.h>
 15 | #include <string.h>
 16 | #include <math.h>
 17 | #include <stdlib.h>
 18 | 
 19 | #if ! defined ( PI )
 20 | #define PI 3.141592653589793
 21 | #endif
 22 | 
 23 | extern double inverse_normal_cdf ( double p ) ;
 24 | extern void qsortdsi ( int first , int last , double *data , int *slave ) ;
 25 | 
 26 | class CubicSpline {
 27 | 
 28 | public:
 29 |    CubicSpline ( int n , double *xin , double *yin ) ;
 30 |    ~CubicSpline () ;
 31 |    double evaluate ( double x ) ;
 32 | 
 33 | private:
 34 |    int n ;
 35 |    double *x ;
 36 |    double *y ;
 37 |    double *y2 ;
 38 | } ;
 39 | 
 40 | class Bilinear {
 41 | 
 42 | public:
 43 |    Bilinear ( int nxin , double *xin , int nyin , double *yin , double *zin ,
 44 |               int extra ) ;
 45 |    ~Bilinear () ;
 46 |    double evaluate ( double x , double y ) ;
 47 | 
 48 | private:
 49 |    int quadratic ;
 50 |    int nx ;
 51 |    int ny ;
 52 |    double *x ;
 53 |    double *y ;
 54 |    double *z ;
 55 | } ;
 56 | 
 57 | /*
 58 | --------------------------------------------------------------------------------
 59 | 
 60 |    ParzenDensities ParzDens_? used for continuous mutual information
 61 | 
 62 | --------------------------------------------------------------------------------
 63 | */
 64 | 
 65 | class ParzDens_1 {
 66 | 
 67 | public:
 68 |    ParzDens_1 ( int n_tset , double *tset , int n_div ) ;
 69 |    ~ParzDens_1 () ;
 70 |    double density ( double x ) ;
 71 |    double low ;     // Lowest value with significant density
 72 |    double high ;    // And highest
 73 | 
 74 | private:
 75 |    int nd ;         // Number of points in array below
 76 |    double *d ;      // The data on which the density is based
 77 |    double var ;     // Presumed variance
 78 |    double factor ;  // Normalizing factor to make it a density
 79 |    CubicSpline *spline ; // Used only if interpolation
 80 | } ;
 81 | 
 82 | class ParzDens_2 {
 83 | 
 84 | public:
 85 |    ParzDens_2 ( int n_tset , double *tset0 , double *tset1 , int n_div ) ;
 86 |    ~ParzDens_2 () ;
 87 |    double density ( double x0 , double x1 ) ;
 88 | 
 89 | private:
 90 |    int nd ;         // Number of points in arrays below
 91 |    double *d0 ;     // The data on which the density is based; first variable
 92 |    double *d1 ;     // And second
 93 |    double var0 ;    // Presumed variance of first variable
 94 |    double var1 ;    // And second
 95 |    double factor ;  // Normalizing factor to make it a density
 96 |    Bilinear *bilin ; // Used only for bilinear interpolation
 97 | } ;
 98 | 
 99 | class ParzDens_3 {
100 | 
101 | public:
102 |    ParzDens_3 ( int n_tset , double *tset0 , double *tset1 , double *tset2 , int n_div ) ;
103 |    ~ParzDens_3 () ;
104 |    double density ( double x0 , double x1 , double x2 ) ;
105 | 
106 | private:
107 |    int nd ;         // Number of points in arrays below
108 |    double *d0 ;     // The data on which the density is based; first variable
109 |    double *d1 ;     // And second
110 |    double *d2 ;     // And third
111 |    double var0 ;    // Presumed variance of first variable
112 |    double var1 ;    // And second
113 |    double var2 ;    // And third
114 |    double factor ;  // Normalizing factor to make it a density
115 | } ;
116 | 
117 | /*
118 | --------------------------------------------------------------------------------
119 | 
120 |    MutualInformation
121 | 
122 | --------------------------------------------------------------------------------
123 | */
124 | 
125 | class MutualInformationParzen {  // Parzen window method
126 | 
127 | public:
128 |    MutualInformationParzen ( int nn , double *dep_vals , int ndiv ) ;
129 |    ~MutualInformationParzen () ;
130 |    double mut_inf ( double *x ) ;
131 | 
132 | private:
133 |    int n ;             // Number of cases
134 |    int n_div ;         // Number of divisions of range, typically 5-10
135 |    double *depvals ;   // 'Dependent' variable
136 |    ParzDens_1 *dens_dep ;   // Marginal density of 'dependent' variable
137 | } ;
138 | 
139 | class MutualInformationAdaptive {  // Adaptive partitioning method
140 | 
141 | public:
142 |    MutualInformationAdaptive ( int nn , double *dep_vals ,
143 |                                int respect_ties , double crit ) ;
144 |    ~MutualInformationAdaptive () ;
145 |    double mut_inf ( double *x , int respect_ties ) ;
146 | 
147 | private:
148 |    int n ;             // Number of cases
149 |    int *y ;            // 'Dependent' variable ranks
150 |    int *y_tied ;       // tied[i] != 0 if case with rank i == case with rank i+1
151 |    double chi_crit ;   // Chi-square test criterion
152 | } ;
153 | 
154 | 
155 | /*
156 | --------------------------------------------------------------------------------
157 | 
158 |    ParzDens_1 - Parzen density of a single variable
159 | 
160 | --------------------------------------------------------------------------------
161 | */
162 | 
163 | ParzDens_1::ParzDens_1 ( int n_tset , double *tset , int n_div )
164 | {
165 |    int i, j, *indices ;
166 |    double std, *x, *y, xbot, xinc, diff, sum ;
167 | 
168 |    nd = n_tset ;
169 |    spline = NULL ;
170 | 
171 |    d = (double *) malloc ( nd * sizeof(double) ) ;
172 | 
173 |    indices = (int *) malloc ( nd * sizeof(int) ) ;
174 | 
175 | /*
176 |    Convert the data to a normal distribution
177 | */
178 | 
179 |    for (i=0 ; i<nd ; i++) {
180 |       indices[i] = i ;
181 |       d[i] = tset[i] ;
182 |       }
183 |    qsortdsi ( 0 , nd-1 , d , indices ) ;
184 |    for (i=0 ; i<nd ; i++)
185 |       d[indices[i]] = inverse_normal_cdf ( (i + 1.0) / (nd + 1) ) ;
186 |    free ( indices ) ;
187 | 
188 |    std = 2.0 / n_div ;
189 |    var = std * std ;
190 |    high = 3.0 + 3.0 * std ;
191 |    low = -high ;
192 | 
193 |    factor = 1.0 / (nd * sqrt (2.0 * PI * var) ) ;
194 | 
195 |    if (nd <= 100)
196 |       return ;
197 | 
198 |    // We have a lot of cases, so prepare for cubic spline interpolation
199 |    x = (double *) malloc ( 1001 * sizeof(double) ) ;
200 |    y = (double *) malloc ( 1001 * sizeof(double) ) ;
201 | 
202 |    xinc = (-1.5 - low) / 100.0 ;
203 | 
204 |    for (i=0 ; i<100 ; i++)
205 |       x[i] = low + i * xinc ;
206 | 
207 |    xbot = x[99] ;
208 |    xinc = (1.5 - xbot) / 801.0 ;
209 |    for (i=0 ; i<800 ; i++)
210 |       x[i+100] = xbot + (i+1) * xinc ;
211 | 
212 |    xbot = x[899] ;
213 |    xinc = (high - xbot) / 101.0 ;
214 |    for (i=0 ; i<101 ; i++)
215 |       x[i+900] = xbot + (i+1) * xinc ;
216 | 
217 |    for (i=0 ; i<1001 ; i++) {
218 |       sum = 0.0 ;
219 |       for (j=0 ; j<nd ; j++) {
220 |          diff = x[i] - d[j] ;
221 |          sum += exp ( -0.5 * diff * diff / var ) ;
222 |          }
223 |       y[i] = factor * sum ;
224 |       }
225 | 
226 |    spline = new CubicSpline ( 1001 , x , y ) ;
227 | 
228 |    free ( x ) ;
229 |    free ( y ) ;
230 | }
231 | 
232 | ParzDens_1::~ParzDens_1 ()
233 | {
234 |    if (d != NULL)
235 |       free ( d ) ;
236 |    if (spline != NULL)
237 |       delete spline ;
238 | }
239 | 
240 | double ParzDens_1::density ( double x )
241 | {
242 |    int i ;
243 |    double sum, diff ;
244 | 
245 |    if (spline != NULL)
246 |       return spline->evaluate ( x ) ;
247 | 
248 |    sum = 0.0 ;
249 |    for (i=0 ; i<nd ; i++) {
250 |       diff = x - d[i];
251 |       sum += exp ( -0.5 * diff * diff / var ) ;
252 |       }
253 | 
254 |    return sum * factor ;
255 | }
256 | 
257 | /*
258 | --------------------------------------------------------------------------------
259 | 
260 |    ParzDens_2 - Parzen density of a bivariate pair
261 | 
262 | --------------------------------------------------------------------------------
263 | */
264 | 
265 | #define P2RES 200
266 | 
267 | ParzDens_2::ParzDens_2 ( int n_tset , double *tset0 , double *tset1 , int n_div )
268 | {
269 |    int i, j, k, k0, k1, k2, *indices ;
270 |    double *x, *y, *z, xbot, xinc, ybot, yinc, xlow, xhigh, ylow, yhigh, std ;
271 |    double diff0, diff1, sum ;
272 | 
273 |    nd = n_tset ;
274 | 
275 |    bilin = NULL ;
276 |    d0 = (double *) malloc ( 2 * nd * sizeof(double) ) ;
277 |    indices = (int *) malloc ( nd * sizeof(int) ) ;
278 |    d1 = d0 + nd ;
279 | 
280 | 
281 | /*
282 |    Convert the data to a normal distribution
283 | */
284 | 
285 |    for (i=0 ; i<nd ; i++) {
286 |       indices[i] = i ;
287 |       d0[i] = tset0[i] ;
288 |       }
289 |    qsortdsi ( 0 , nd-1 , d0 , indices ) ;
290 |    for (i=0 ; i<nd ; i++)
291 |       d0[indices[i]] = inverse_normal_cdf ( (i + 1.0) / (nd + 1) ) ;
292 | 
293 |    for (i=0 ; i<nd ; i++) {
294 |       indices[i] = i ;
295 |       d1[i] = tset1[i] ;
296 |       }
297 |    qsortdsi ( 0 , nd-1 , d1 , indices ) ;
298 |    for (i=0 ; i<nd ; i++)
299 |       d1[indices[i]] = inverse_normal_cdf ( (i + 1.0) / (nd + 1) ) ;
300 | 
301 |    free ( indices ) ;
302 | 
303 |    std = 2.0 / n_div ;
304 |    var0 = var1 = std * std ;
305 |    xhigh = yhigh = 3.0 + 2.0 * std ;
306 |    xlow = ylow = -xhigh ;
307 | 
308 |    factor = 1.0 / (nd * 2.0 * PI * sqrt ( var0 * var1 ) ) ;
309 | 
310 |    if (nd <= 100)
311 |       return ;
312 | 
313 |    // We have a lot of cases, so prepare for bilinear interpolation
314 |    x = (double *) malloc ( P2RES * sizeof(double) ) ;
315 |    y = (double *) malloc ( P2RES * sizeof(double) ) ;
316 |    z = (double *) malloc ( P2RES * P2RES * sizeof(double) ) ;
317 | 
318 |    if (x == NULL  ||  y == NULL  ||  z == NULL) {
319 |       if (x != NULL)
320 |          free ( x ) ;
321 |       if (y != NULL)
322 |          free ( y ) ;
323 |       if (z != NULL)
324 |          free ( z ) ;
325 |       return ;  // If insufficient memory, do not interpolate
326 |       }
327 | 
328 |    k0 = (int) (0.1 * P2RES) ;
329 |    xinc = (-1.5 - xlow) / k0 ;
330 |    for (i=0 ; i<k0 ; i++)
331 |       x[i] = xlow + i * xinc ;
332 | 
333 |    k1 = (int) (0.8 * P2RES) ;
334 |    xbot = x[k0-1] ;
335 |    xinc = (1.5 - xbot) / (k1 + 1) ;
336 |    for (i=0 ; i<k1 ; i++)
337 |       x[i+k0] = xbot + (i+1) * xinc ;
338 | 
339 |    xbot = x[k0+k1-1] ;
340 |    k2 = P2RES - k0 - k1 ;
341 |    xinc = (xhigh - xbot) / k2 ;
342 |    for (i=0 ; i<k2 ; i++)
343 |       x[i+k0+k1] = xbot + (i+1) * xinc ;
344 | 
345 | 
346 |    k0 = (int) (0.1 * P2RES) ;
347 |    yinc = (-1.5 - ylow) / k0 ;
348 |    for (i=0 ; i<k0 ; i++)
349 |       y[i] = ylow + i * yinc ;
350 | 
351 |    k1 = (int) (0.8 * P2RES) ;
352 |    ybot = y[k0-1] ;
353 |    yinc = (1.5 - ybot) / (k1 + 1) ;
354 |    for (i=0 ; i<k1 ; i++)
355 |       y[i+k0] = ybot + (i+1) * yinc ;
356 | 
357 |    ybot = y[k0+k1-1] ;
358 |    k2 = P2RES - k0 - k1 ;
359 |    yinc = (yhigh - ybot) / k2 ;
360 |    for (i=0 ; i<k2 ; i++)
361 |       y[i+k0+k1] = ybot + (i+1) * yinc ;
362 | 
363 |    for (i=0 ; i<P2RES ; i++) {
364 |       for (j=0 ; j<P2RES ; j++) {
365 |          sum = 0.0 ;
366 |          for (k=0 ; k<nd ; k++) {
367 |             diff0 = x[i] - d0[k] ;
368 |             diff1 = y[j] - d1[k] ;
369 |             sum += exp ( -0.5 * (diff0 * diff0 / var0 + diff1 * diff1 / var1 ));
370 |             }
371 |          z[i*P2RES+j] = factor * sum ;
372 |          }
373 |       }
374 | 
375 |    bilin = new Bilinear ( P2RES , x , P2RES , y , z , 1 ) ;
376 | 
377 |    free ( x ) ;
378 |    free ( y ) ;
379 |    free ( z ) ;
380 | }
381 | 
382 | ParzDens_2::~ParzDens_2 ()
383 | {
384 |    if (d0 != NULL)
385 |       free ( d0 ) ;
386 |    if (bilin != NULL)
387 |       delete bilin ;
388 | }
389 | 
390 | double ParzDens_2::density ( double x0 , double x1 )
391 | {
392 |    int i ;
393 |    double sum, diff0, diff1 ;
394 | 
395 |    if (bilin != NULL)
396 |       return bilin->evaluate ( x0 , x1 ) ;
397 | 
398 |    sum = 0.0 ;
399 |    for (i=0 ; i<nd ; i++) {
400 |       diff0 = x0 - d0[i] ;
401 |       diff1 = x1 - d1[i] ;
402 |       sum += exp ( -0.5 * (diff0 * diff0 / var0 + diff1 * diff1 / var1 ) ) ;
403 |       }
404 | 
405 |    return sum * factor ;
406 | }
407 | 
408 | /*
409 | --------------------------------------------------------------------------------
410 | 
411 |    ParzDens_3 - Parzen density of a trivariate trio
412 | 
413 | --------------------------------------------------------------------------------
414 | */
415 | 
416 | ParzDens_3::ParzDens_3 ( int n_tset , double *tset0 , double *tset1 , double *tset2 , int n_div )
417 | {
418 |    int i, *indices ;
419 |    double std ;
420 | 
421 |    nd = n_tset ;
422 | 
423 |    d0 = (double *) malloc ( 3 * nd * sizeof(double) ) ;
424 |    indices = (int *) malloc ( nd * sizeof(int) ) ;
425 |    d1 = d0 + nd ;
426 |    d2 = d1 + nd ;
427 | 
428 | /*
429 |    Convert the data to a normal distribution
430 | */
431 | 
432 |    for (i=0 ; i<nd ; i++) {
433 |       indices[i] = i ;
434 |       d0[i] = tset0[i] ;
435 |       }
436 |    qsortdsi ( 0 , nd-1 , d0 , indices ) ;
437 |    for (i=0 ; i<nd ; i++)
438 |       d0[indices[i]] = inverse_normal_cdf ( (i + 1.0) / (nd + 1) ) ;
439 | 
440 |    for (i=0 ; i<nd ; i++) {
441 |       indices[i] = i ;
442 |       d1[i] = tset1[i] ;
443 |       }
444 |    qsortdsi ( 0 , nd-1 , d1 , indices ) ;
445 |    for (i=0 ; i<nd ; i++)
446 |       d1[indices[i]] = inverse_normal_cdf ( (i + 1.0) / (nd + 1) ) ;
447 | 
448 |    for (i=0 ; i<nd ; i++) {
449 |       indices[i] = i ;
450 |       d2[i] = tset2[i] ;
451 |       }
452 |    qsortdsi ( 0 , nd-1 , d2 , indices ) ;
453 |    for (i=0 ; i<nd ; i++)
454 |       d2[indices[i]] = inverse_normal_cdf ( (i + 1.0) / (nd + 1) ) ;
455 | 
456 |    free ( indices ) ;
457 | 
458 |    std = 2.0 / n_div ;
459 |    var0 = var1 = var2 = std * std ;
460 | 
461 |    factor = 1.0 / (nd * 2.0 * PI * sqrt(2.0 * PI) * sqrt(var0 * var1 * var2) ) ;
462 | }
463 | 
464 | ParzDens_3::~ParzDens_3 ()
465 | {
466 |    if (d0 != NULL)
467 |       free ( d0 ) ;
468 | }
469 | 
470 | double ParzDens_3::density ( double x0 , double x1 , double x2 )
471 | {
472 |    int i ;
473 |    double sum, diff0, diff1, diff2 ;
474 | 
475 |    sum = 0.0 ;
476 |    for (i=0 ; i<nd ; i++) {
477 |       diff0 = x0 - d0[i] ;
478 |       diff1 = x1 - d1[i] ;
479 |       diff2 = x2 - d2[i] ;
480 |       sum += exp ( -0.5 * (diff0 * diff0 / var0 + diff1 * diff1 / var1 +
481 |                            diff2 * diff2 / var2 ) ) ;
482 |       }
483 | 
484 |    return sum * factor ;
485 | }
486 | 
487 | 


--------------------------------------------------------------------------------
/POWELL.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  POWELL - Use Powell's method to find a local minimum of a function        */
  4 | /*           Note that we must provide in y the function value at starting x. */
  5 | /*                                                                            */
  6 | /******************************************************************************/
  7 | 
  8 | #include <math.h>
  9 | 
 10 | extern int brentmin ( int itmax , double critlim , double eps ,
 11 |                       double tol , int (*criter) (double , double *) ,
 12 |                       double *x1 , double *x2 , double *x3 , double *y ) ;
 13 | extern int glob_min ( double low , double high , int npts , int log_space ,
 14 |                       double critlim , int (*criter) (double , double *) ,
 15 |                       double *x1, double *y1 , double *x2, double *y2 ,
 16 |                       double *x3, double *y3 ) ;
 17 | extern int user_pressed_escape () ;
 18 | 
 19 | /*
 20 |    This routine uses the general univariate minimizers 'glob_min' and
 21 |    'brentmin' to minimize along the gradient line. So we must have a local
 22 |    function for them to call, and it must have access to the relevant data.
 23 |    These statics handle that.
 24 | */
 25 | 
 26 | static int univar_crit ( double t , double *fval ) ; // Local univariate criterion
 27 | static double *local_x, *local_base, *local_direc ;  // It uses these
 28 | static int local_n ;
 29 | static int (*local_criter) ( double *xvec , double *fval ) ;
 30 | 
 31 | int powell (
 32 |    double scale ,         // Used for size of initial search by glob_min()
 33 |    int maxits ,           // Iteration limit (0 for no limit)
 34 |    double critlim ,       // Quit if crit drops this low
 35 |    double tol ,           // Convergence tolerance
 36 |    int (*criter) ( double * , double * ) , // Criterion func
 37 |    int n ,                // Number of variables
 38 |    double *x ,            // In/out of independent variable
 39 |    double *y ,            // In/out of function value
 40 |    double *base ,         // Work vector n long
 41 |    double *p0 ,           // Work vector n long
 42 |    double *direc ,        // Work vector n*n long
 43 |    int update_progress    // Call setpos_progress_message() to update progress bar?
 44 |    )
 45 | {
 46 |    int i, j, idir, iter, user_quit, convergence_counter, idelta, replaced ;
 47 |    double fval, fbest, f0, test, t1, t2, t3, y1, y2, y3 ;
 48 |    double prev_best, toler, delta, len, ftest, mult ;
 49 | 
 50 |    t2 = 0.0 ;  // Not needed.  Shuts up LINT.
 51 | 
 52 | /*
 53 |    Initialize for the local univariate criterion which may be called by
 54 |    'glob_min' and 'brentmin' to minimize along the search direction.
 55 | */
 56 | 
 57 | 
 58 |    local_x = x ;
 59 |    local_base = base ;
 60 |    local_n = n ;
 61 |    local_criter = criter ;
 62 | 
 63 | /*
 64 |    Initialize the direction matrix to be an identity.
 65 | */
 66 | 
 67 |    for (i=0 ; i<n ; i++) {
 68 |       for (j=0 ; j<n ; j++)
 69 |          direc[i*n+j] = (j == i) ? 1.0 : 0.0 ;
 70 |       }
 71 | 
 72 | /*
 73 |    Main loop.  For safety we impose a limit on iterations.
 74 | */
 75 | 
 76 |    replaced = -1 ;          // Assume no replacement will be done
 77 |    prev_best = 1.e90 ;
 78 |    fbest = *y ;
 79 |    iter = convergence_counter = user_quit = 0 ;
 80 | 
 81 |    for (;;) {
 82 | 
 83 |       if ((iter++ >= maxits)  &&  (maxits > 0))
 84 |          break ;
 85 | 
 86 |       if (fbest < critlim)     // Do we satisfy user yet?
 87 |          break ;
 88 | 
 89 | /*
 90 |    Convergence check
 91 | */
 92 | 
 93 |       if (fabs(prev_best) <= 1.0)            // If the function is small
 94 |          toler = tol ;                       // Work on absolutes
 95 |       else                                   // But if it is large
 96 |          toler = tol * fabs(prev_best) ;     // Keep things relative
 97 | 
 98 |       if ((prev_best - fbest)  <=  toler) {  // If little improvement
 99 |          if (++convergence_counter >= 2)     // Then count how many
100 |             break ;                          // And quit if too many
101 |          }
102 |       else                                   // But a good iteration
103 |          convergence_counter = 0 ;           // Resets this counter
104 | 
105 |       if (fbest < prev_best)                 // Always true if well behaved
106 |          prev_best = fbest ;
107 | 
108 | /*
109 |    Does the user want to quit?
110 | */
111 | 
112 |       if ((user_quit = user_pressed_escape ()) != 0)
113 |          break ;
114 | 
115 | /*
116 |    Loop over all search directions, minimizing in each.
117 |    Keep track of the direction that gave the most improvement.
118 |    For efficiency, we keep 'replaced' as the vector that just got replaced
119 |    (or -1 if no replacement was done).  Skip the first direction if that
120 |    was the one just replaced!
121 | */
122 | 
123 |       for (i=0 ; i<n ; i++)            // Preserve the starting point
124 |          p0[i] = x[i] ;
125 |       f0 = fbest ;                     // And function value at that point
126 |       delta = -1.0 ;                   // Will keep best improvement here
127 |       idelta = 0 ;                     // Shuts up LINT
128 | 
129 |       for (idir=0 ; idir<n ; idir++) { // For each direction
130 |          if ((n>1)  &&  ! idir  &&  ! replaced) // If we just replaced the
131 |             continue ;                 // first vector, avoid waste
132 |          for (i=0 ; i<n ; i++)         // Local criter steps out from here
133 |             base[i] = x[i] ;           // So it must be current point
134 |          y2 = fbest ;                  // Glob_min can use first f value
135 |          local_direc = direc + idir * n ; // This is the idir direction
136 |          for (mult=1.0 ; mult < 101.0 ; mult *= 10.0) {
137 |             user_quit = glob_min ( -mult * scale , mult * scale , 7 , 0 ,
138 |                                    critlim , univar_crit , &t1 , &y1 , &t2 ,
139 |                                    &y2 , &t3 , &y3 ) ;
140 |             if (user_quit)  // ESCape?
141 |                break ;
142 |             if ((y2 < y1)  &&  (y2 < y3)) // Loop until minimum is bounded
143 |                break ;
144 |             }
145 |          if (user_quit  ||  (y2 < critlim)) { // ESCape or good enough already?
146 |             if (y2 < fbest) {                 // If global caused improvement
147 |                for (i=0 ; i<n ; i++)          // Implement that improvement
148 |                   x[i] = base[i] + t2 * local_direc[i] ;
149 |                fbest = y2 ;
150 |                }
151 |             else {                            // Else revert to starting point
152 |                for (i=0 ; i<n ; i++)
153 |                   x[i] = base[i] ;
154 |                }
155 |             goto FINISH ;
156 |             }
157 | 
158 |          if (convergence_counter) { // If failing, try extra hard
159 |             fval = y2 ;
160 |             user_quit = brentmin ( 20 , critlim , tol , 1.e-7 ,
161 |                               univar_crit , &t1 , &t2 , &t3 , &fval ) ;
162 |             }
163 |          else {                    // But normally refine only moderately
164 |             fval = y2 ;
165 |             user_quit = brentmin ( 10 , critlim , 10.0 * tol , 1.e-5 ,
166 |                               univar_crit , &t1 , &t2 , &t3 , &fval ) ;
167 |             }
168 |          if (user_quit)
169 |             goto FINISH ;
170 | 
171 |          for (i=0 ; i<n ; i++)          // Get current point from parametric
172 |             x[i] = base[i] + t2 * local_direc[i] ;
173 | 
174 |          if (fbest - fval > delta) {    // Keep track of best direction
175 |             delta = fbest - fval ;
176 |             idelta = idir ;
177 |             }
178 |          fbest = fval ;                  // This is always the best so far
179 |          } // For all directions
180 | 
181 | /*
182 |    Before looping through all n directions, we stood at point p0 with f=f0.
183 |    We now stand at point x with f=fbest.
184 |    It is quite possible that the average direction of motion points right
185 |    along a ravine.  Thus, it behooves us to step out in that direction.
186 |    Try it.  We might luck out.
187 | */
188 | 
189 |       for (i=0 ; i<n ; i++) {
190 |          p0[i] = x[i] - p0[i] ;    // Preserve average direction here
191 |          base[i] = x[i] + p0[i] ;  // Step out to this point (borrow base)
192 |          }
193 |       user_quit = criter ( base , &fval ) ;     // Evaluate function at this test point
194 | 
195 |       if (user_quit)
196 |          goto FINISH ;
197 | 
198 | /*
199 |    If this step improved, and if a more sophisticated second derivative
200 |    test is passed, we replace the old direction of max improvement with this
201 |    average direction (after first minimizing in this great direction).
202 |    Also, set 'replaced' to the vector that was replaced if that is done.
203 |    Otherwise set it to -1.  We do that so on the next iteration we skip
204 |    minimization along the first direction if that happened to be the one
205 |    that was just replaced!
206 | */
207 | 
208 |       replaced = -1 ;                  // Assume no replacement will be done
209 |       ftest = fbest ;                  // Save for replacement test
210 |       if (fval < fbest) {              // If this trial step improved
211 |          fbest = fval ;                // Might as well keep this better point
212 |          for (i=0 ; i<n ; i++)         // Keep x current
213 |             x[i] = base[i] ;
214 |          }
215 | 
216 |       if (fval < f0) {                 // First of two tests for replacement
217 |          test = f0 - ftest - delta ;
218 |          test = 2.0 * (f0 - 2.0 * ftest + fval) * test * test ;
219 |          if (test < delta * (f0 - fval) * (f0 - fval)) { // Use this direction
220 |             replaced = idelta ;        // Record this upcoming replacement
221 |             len = 0.0 ;                // Will cumulate length of new dir
222 |             for (i=0 ; i<n ; i++)
223 |                len += p0[i] * p0[i] ;
224 |             len = sqrt ( len ) ;
225 |             for (i=0 ; i<n ; i++)
226 |                p0[i] /= len ;          // Keep direction unit length
227 |             local_direc = p0 ;         // We put the average direction here 
228 |             y2 = fbest ;               // Glob_min can use first f value
229 |             for (i=0 ; i<n ; i++)      // Set out from here
230 |                base[i] = x[i] ;
231 |             for (mult=1.0 ; mult < 101.0 ; mult *= 10.0) {
232 |                user_quit = glob_min ( -mult * scale , mult * scale , 7 , 0 ,
233 |                                       critlim , univar_crit , &t1 , &y1 , &t2 ,
234 |                                       &y2 , &t3 , &y3 ) ;
235 |                if (user_quit)  // ESCape?
236 |                   break ;
237 |                if ((y2 < y1)  &&  (y2 < y3)) // Loop until minimum is bounded
238 |                   break ;
239 |                }
240 |             if (user_quit  ||  (y2 < critlim)) { // ESCape or good enough already?
241 |                if (y2 < fbest) {                 // If global caused improvement
242 |                   for (i=0 ; i<n ; i++)          // Implement that improvement
243 |                      x[i] = base[i] + t2 * local_direc[i] ;
244 |                   fbest = y2 ;
245 |                   }
246 |                else {                            // Else revert to starting point
247 |                   for (i=0 ; i<n ; i++)
248 |                      x[i] = base[i] ;
249 |                   }
250 |                break ;
251 |                }
252 |             if (convergence_counter) {  // If failing, try extra hard
253 |                fval = y2 ;
254 |                user_quit = brentmin ( 20 , critlim , tol , 1.e-7 ,
255 |                                  univar_crit , &t1 , &t2 , &t3 , &fval ) ;
256 |                }
257 |             else {                    // But normally refine only moderately
258 |                fval = y2 ;
259 |                user_quit = brentmin ( 10 , critlim , 10.0 * tol , 1.e-5 ,
260 |                                  univar_crit , &t1 , &t2 , &t3 , &fval ) ;
261 |                }
262 |             if (user_quit) {              // If user pressed ESCape
263 |                fbest = -fval ;
264 |                user_quit = 1 ;
265 |                goto FINISH ;
266 |                }
267 | 
268 |             for (i=0 ; i<n ; i++)          // Get current point from parametric
269 |                x[i] = base[i] + t2 * local_direc[i] ;
270 |             fbest = fval ;                 // This is always the best so far
271 | 
272 |             for (i=0 ; i<n ; i++)
273 |                direc[idelta*n+i] = p0[i] ; // Replace best direction with this
274 |             } // If second derivative test passed
275 |          } // If test one passed
276 |       } // Main loop
277 | 
278 | FINISH:
279 | 
280 |       *y = fbest ;
281 |       return user_quit ;
282 | }
283 | 
284 | 
285 | /*
286 | --------------------------------------------------------------------------------
287 | 
288 |    local criterion for univariate minimization
289 | 
290 | --------------------------------------------------------------------------------
291 | */
292 | 
293 | static int univar_crit ( double t , double *fval )
294 | {
295 |    int i ;
296 | 
297 |    for (i=0 ; i<local_n ; i++)
298 |       local_x[i] = local_base[i] + t * local_direc[i] ;
299 |    return local_criter ( local_x , fval ) ;
300 | }
301 | 


--------------------------------------------------------------------------------
/QSORTD.CPP:
--------------------------------------------------------------------------------
  1 | /****************************************************************************/
  2 | /*                                                                          */
  3 | /*  QSORT - Quick sort a double array.                                      */
  4 | /*                                                                          */
  5 | /****************************************************************************/
  6 | 
  7 | #include <math.h>
  8 | 
  9 | void qsortd ( int first , int last , double *data )
 10 | {
 11 |    int lower, upper ;
 12 |    double ftemp, split ;
 13 | 
 14 |    split = data[(first+last)/2] ;
 15 |    lower = first ;
 16 |    upper = last ;
 17 | 
 18 |    do {
 19 |       while ( split > data[lower] )
 20 |          ++lower ;
 21 |       while ( split < data[upper] )
 22 |          --upper ;
 23 |       if (lower == upper) {
 24 |          ++lower ;
 25 |          --upper ;
 26 |          }
 27 |       else if (lower < upper) {
 28 |          ftemp = data[lower] ;
 29 |          data[lower++] = data[upper] ;
 30 |          data[upper--] = ftemp ;
 31 |          }
 32 |       } while ( lower <= upper ) ;
 33 | 
 34 |    if (first < upper)
 35 |       qsortd ( first , upper , data ) ;
 36 |    if (lower < last)
 37 |       qsortd ( lower , last , data ) ;
 38 | }
 39 | 
 40 | void qsortds ( int first , int last , double *data , double *slave )
 41 | {
 42 |    int lower, upper ;
 43 |    double ftemp, split ;
 44 | 
 45 |    split = data[(first+last)/2] ;
 46 |    lower = first ;
 47 |    upper = last ;
 48 | 
 49 |    do {
 50 |       while ( split > data[lower] )
 51 |          ++lower ;
 52 |       while ( split < data[upper] )
 53 |          --upper ;
 54 |       if (lower == upper) {
 55 |          ++lower ;
 56 |          --upper ;
 57 |          }
 58 |       else if (lower < upper) {
 59 |          ftemp = slave[lower] ;
 60 |          slave[lower] = slave[upper] ;
 61 |          slave[upper] = ftemp ;
 62 |          ftemp = data[lower] ;
 63 |          data[lower++] = data[upper] ;
 64 |          data[upper--] = ftemp ;
 65 |          }
 66 |       } while ( lower <= upper ) ;
 67 | 
 68 |    if (first < upper)
 69 |       qsortds ( first , upper , data , slave ) ;
 70 |    if (lower < last)
 71 |       qsortds ( lower , last , data , slave ) ;
 72 | }
 73 | 
 74 | void qsortdsi ( int first , int last , double *data , int *slave )
 75 | {
 76 |    int lower, upper, itemp ;
 77 |    double ftemp, split ;
 78 | 
 79 |    split = data[(first+last)/2] ;
 80 |    lower = first ;
 81 |    upper = last ;
 82 | 
 83 |    do {
 84 |       while ( split > data[lower] )
 85 |          ++lower ;
 86 |       while ( split < data[upper] )
 87 |          --upper ;
 88 |       if (lower == upper) {
 89 |          ++lower ;
 90 |          --upper ;
 91 |          }
 92 |       else if (lower < upper) {
 93 |          itemp = slave[lower] ;
 94 |          slave[lower] = slave[upper] ;
 95 |          slave[upper] = itemp ;
 96 |          ftemp = data[lower] ;
 97 |          data[lower++] = data[upper] ;
 98 |          data[upper--] = ftemp ;
 99 |          }
100 |       } while ( lower <= upper ) ;
101 | 
102 |    if (first < upper)
103 |       qsortdsi ( first , upper , data , slave ) ;
104 |    if (lower < last)
105 |       qsortdsi ( lower , last , data , slave ) ;
106 | }


--------------------------------------------------------------------------------
/RANDOM.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  RANDOM - Assorted non-uniform random number generators.                   */
  4 | /*           They all call an external uniform generator, unifrand().         */
  5 | /*                                                                            */
  6 | /*    normal () - Normal (mean zero, unit variance)                           */
  7 | /*    normal_pair ( double *x1 , double *x2 ) - Pair of standard normals      */
  8 | /*    beta ( int v1 , int v2 ) - Beta with parameters v1 / 2 and v2 / v2      */
  9 | /*    rand_sphere ( int nvars , double *x ) - Uniform on unit sphere surface  */
 10 | /*    cauchy ( int n , double scale , double *x ) - Multivariate Cauchy       */
 11 | /*                                                                            */
 12 | /******************************************************************************/
 13 | 
 14 | #include <math.h>
 15 | 
 16 | #if ! defined ( PI )
 17 | #define PI 3.141592653589793
 18 | #endif
 19 | 
 20 | extern double unifrand () ;
 21 | 
 22 | /*
 23 | --------------------------------------------------------------------------------
 24 | 
 25 |    Generate a standard normal random variable or a pair of them
 26 |    using the Box-Muller method.
 27 | 
 28 | --------------------------------------------------------------------------------
 29 | */
 30 | 
 31 | double normal ()
 32 | {
 33 |    double x1, x2 ;
 34 | 
 35 |    for (;;) {
 36 |       x1 = unifrand () ;
 37 |       if (x1 <= 0.0)      // Safety: log(0) is undefined
 38 |          continue ;
 39 |       x1 = sqrt ( -2.0 * log ( x1 )) ;
 40 |       x2 = cos ( 2.0 * PI * unifrand () ) ;
 41 |       return x1 * x2 ;
 42 |       }
 43 | }
 44 | 
 45 | void normal_pair ( double *x1 , double *x2 )
 46 | {
 47 |    double u1, u2 ;
 48 | 
 49 |    for (;;) {
 50 |       u1 = unifrand () ;
 51 |       if (u1 <= 0.0)      // Safety: log(0) is undefined
 52 |          continue ;
 53 |       u1 = sqrt ( -2.0 * log ( u1 )) ;
 54 |       u2 = 2.0 * PI * unifrand () ;
 55 |       *x1 = u1 * sin ( u2 ) ;
 56 |       *x2 = u1 * cos ( u2 ) ;
 57 |       return ;
 58 |       }
 59 | }
 60 | 
 61 | /*
 62 | --------------------------------------------------------------------------------
 63 | 
 64 |    Generate a Gamma random variable having parameter v/2
 65 | 
 66 | --------------------------------------------------------------------------------
 67 | */
 68 | 
 69 | double gamma ( int v )
 70 | {
 71 |    double x, y, z, vm1, root ;
 72 | 
 73 |    switch (v) {
 74 | 
 75 |       case 1:                   // Chi-square with 1 df is 2 gamma(.5)
 76 |          x = normal () ;
 77 |          return 0.5 * x * x ;
 78 | 
 79 |       case 2:                   // Gamma(1) is exponential(1)
 80 |          for (;;) {
 81 |             x = unifrand () ;
 82 |             if (x > 0.0)
 83 |                return -log ( x ) ;
 84 |             }
 85 | 
 86 |       default:                  // Valid for all real a>1 (a=v/2)
 87 |          vm1 = 0.5 * v - 1.0 ;
 88 |          root = sqrt ( v - 1.0 ) ;
 89 | 
 90 |          for (;;) {
 91 |             y = tan ( PI * unifrand () ) ;
 92 |             x = root * y + vm1 ;
 93 |             if (x <= 0.0)
 94 |                continue ;
 95 |             z = (1.0 + y * y) * exp ( vm1 * log(x/vm1) - root * y ) ;
 96 |             if (unifrand () <= z)
 97 |                return x ;
 98 |             }
 99 |       }
100 | }
101 | 
102 | 
103 | /*
104 | --------------------------------------------------------------------------------
105 | 
106 |    Generate a beta random variable with parameters v1 / 2 and v2 / 2.
107 | 
108 | --------------------------------------------------------------------------------
109 | */
110 | 
111 | double beta ( int v1 , int v2 )
112 | {
113 |    double x1, x2 ;
114 | 
115 |    x1 = gamma ( v1 ) ;
116 |    x2 = gamma ( v2 ) ;
117 | 
118 |    return x1 / (x1 + x2) ;
119 | }
120 | 
121 | /*
122 | --------------------------------------------------------------------------------
123 | 
124 |    Generate a random point on an n-sphere.
125 | 
126 | --------------------------------------------------------------------------------
127 | */
128 | 
129 | void rand_sphere ( int nvars , double *x )
130 | {
131 |    int i ;
132 |    double length ;
133 | 
134 |    length = 0.0 ;
135 |    for (i=0 ; i<nvars/2 ; i++) {      // Efficiently generate pairs
136 |       normal_pair ( &x[2*i] , &x[2*i+1] ) ;
137 |       length += x[2*i] * x[2*i]  +  x[2*i+1] * x[2*i+1] ;
138 |       }
139 | 
140 |    if (nvars % 2) {                   // If odd, get the last one
141 |       x[nvars-1] = normal () ;
142 |       length += x[nvars-1] * x[nvars-1] ;
143 |       }
144 | 
145 |    length = 1.0 / sqrt ( length ) ;
146 |    while (nvars--)
147 |       x[nvars] *= length ;
148 | }
149 | 
150 | 
151 | /*
152 | --------------------------------------------------------------------------------
153 | 
154 |    Generate a random vector following the n-variate Cauchy density
155 |    with specified scale.
156 | 
157 | --------------------------------------------------------------------------------
158 | */
159 | 
160 | void cauchy ( int n , double scale , double *x )
161 | {
162 |    double temp ;
163 | 
164 |    if (n == 1) {
165 |       temp = PI * unifrand () - 0.5 * PI ;
166 |       x[0] = scale * tan ( 0.99999999 * temp ) ;
167 |       return ;
168 |       }
169 | 
170 |    rand_sphere ( n , x ) ;
171 | 
172 |    temp = beta ( n , 1 ) ;
173 | 
174 |    if (temp < 1.0)
175 |       temp = scale * sqrt ( temp / (1.0 - temp) ) ;
176 |    else
177 |       temp = 1.e10 ;
178 | 
179 |    while (n--)
180 |       x[n] *= temp ;
181 | }
182 | 
183 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Apress Source Code
 2 | 
 3 | This repository accompanies Data Mining Algorithms in C++ (http://www.apress.com/9781484233146) by Timothy Masters (Apress, 2018).
 4 | 
 5 | [comment]: #cover
 6 | ![Cover image](9781484233146.jpg)
 7 | 
 8 | Download the files as a zip using the green button, or clone the repository to your machine using Git.
 9 | 
10 | ## Releases
11 | 
12 | Release v1.0 corresponds to the code in the published book, without corrections or updates.
13 | 
14 | ## Contributions
15 | 
16 | See the file Contributing.md for more information on how you can contribute to this repository.


--------------------------------------------------------------------------------
/SPEARMAN.CPP:
--------------------------------------------------------------------------------
 1 | /******************************************************************************/
 2 | /*                                                                            */
 3 | /*  SPEARMAN - Compute Spearman Rho                                           */
 4 | /*                                                                            */
 5 | /******************************************************************************/
 6 | 
 7 | #include <math.h>
 8 | #include <string.h>
 9 | 
10 | extern void qsortds ( int first , int last , double *data , double *slave ) ;
11 | 
12 | double spearman (  // Returns rho in range -1 to 1
13 |    int n ,         // Input: Number of cases
14 |    double *var1 ,  // Input: One variable
15 |    double *var2 ,  // Input: Other variable
16 |    double *x ,     // Work vector n long
17 |    double *y       // Work vector n long
18 |    )
19 | {
20 |    int j, k, ntied ;
21 |    double val, x_tie_correc, y_tie_correc ;
22 |    double dn, ssx, ssy, rank, diff, rankerr, rho ;
23 | 
24 |    // We need to rearrange input vectors, so copy them to work vectors
25 |    // To avoid disturbing the caller
26 |    if (x != var1)
27 |       memcpy ( x , var1 , n * sizeof(double) ) ;
28 |    if (y != var2)
29 |       memcpy ( y , var2 , n * sizeof(double) ) ;
30 | 
31 |    // Compute ties in x, compute correction as SUM ( ties**3 - ties )
32 |    // The following routine sorts x ascending and simultaneously moves y
33 |    qsortds ( 0 , n-1 , x , y ) ;
34 |    x_tie_correc = 0.0 ;
35 |    for (j=0 ; j<n ; ) { // Convert x to ranks, cumulate tie corec
36 |       val = x[j] ;
37 |       for (k=j+1 ; k<n ; k++) {  // Find all ties
38 |          if (x[k] > val)
39 |             break ;
40 |          }
41 |       ntied = k - j ;
42 |       x_tie_correc += (double) ntied * ntied * ntied - ntied ;
43 |       rank = 0.5 * ((double) j + (double) k + 1.0) ;
44 |       while (j < k)
45 |          x[j++] = rank ;
46 |       } // For each case in sorted x array
47 | 
48 |    // Now do same for y
49 |    qsortds ( 0 , n-1 , y , x ) ;
50 |    y_tie_correc = 0.0 ;
51 |    for (j=0 ; j<n ; ) { // Convert y to ranks, cumulate tie corec
52 |       val = y[j] ;
53 |       for (k=j+1 ; k<n ; k++) {  // Find all ties
54 |          if (y[k] > val)
55 |             break ;
56 |          }
57 |       ntied = k - j ;
58 |       y_tie_correc += (double) ntied * ntied * ntied - ntied ;
59 |       rank = 0.5 * ((double) j + (double) k + 1.0) ;
60 |       while (j < k)
61 |          y[j++] = rank ;
62 |       } // For each case in sorted y array
63 | 
64 |    // Final computations
65 |    dn = n ;
66 |    ssx = (dn * dn * dn - dn - x_tie_correc) / 12.0 ;
67 |    ssy = (dn * dn * dn - dn - y_tie_correc) / 12.0 ;
68 |    rankerr = 0.0 ;
69 |    for (j=0 ; j<n ; j++) { // Cumulate squared rank differences
70 |       diff = x[j] - y[j] ;
71 |       rankerr += diff * diff ;
72 |       }
73 |    rho = 0.5 * (ssx + ssy - rankerr) / sqrt (ssx * ssy + 1.e-20) ;
74 |    return rho ;
75 | }
76 | 


--------------------------------------------------------------------------------
/SPLINE.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  SPLINE - CubicSpline class                                                */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | #include <assert.h>
  8 | #include <math.h>
  9 | #include <stdlib.h>
 10 | #include <string.h>
 11 | 
 12 | extern void qsortds ( int first , int last , double *x , double *y ) ;
 13 | 
 14 | class CubicSpline {
 15 | 
 16 | public:
 17 |    CubicSpline ( int n , double *xin , double *yin ) ;
 18 |    ~CubicSpline () ;
 19 |    double evaluate ( double x ) ;
 20 | 
 21 | private:
 22 |    int n ;
 23 |    double *x ;
 24 |    double *y ;
 25 |    double *y2 ;
 26 | } ;
 27 | 
 28 | 
 29 | CubicSpline::CubicSpline (
 30 |    int nin ,       // Number of input points
 31 |    double *xin ,   // They are here, not necessarily sorted
 32 |    double *yin
 33 |    )
 34 | {
 35 |    int i ;
 36 |    double temp, p, *c ;
 37 | 
 38 |    n = nin ;
 39 |    c =  (double *) malloc ( n * sizeof(double) ) ;
 40 |    x =  (double *) malloc ( n * sizeof(double) ) ;
 41 |    y =  (double *) malloc ( n * sizeof(double) ) ;
 42 |    y2 = (double *) malloc ( n * sizeof(double) ) ;
 43 | 
 44 |    assert ( c != NULL ) ;
 45 |    assert ( x != NULL ) ;
 46 |    assert ( y != NULL ) ;
 47 |    assert ( y2 != NULL ) ;
 48 | 
 49 |    memcpy ( x , xin , n * sizeof(double) ) ;
 50 |    memcpy ( y , yin , n * sizeof(double) ) ;
 51 |    qsortds ( 0 , n-1 , x , y ) ;
 52 | 
 53 |    y2[0] = c[0] = 0.0 ;
 54 | 
 55 |    for (i=1 ; i<n-1 ; i++) {
 56 |       temp = (x[i] - x[i-1]) / (x[i+1] - x[i-1]) ;
 57 |       p = temp * y2[i-1] + 2.0 ;
 58 |       y2[i] = (temp - 1.0) / p ;
 59 |       c[i] = (y[i+1] - y[i]) / (x[i+1] - x[i]) ;
 60 |       c[i] -= (y[i] - y[i-1]) / (x[i] - x[i-1]) ;
 61 |       c[i] = (6.0 * c[i] / (x[i+1] - x[i-1]) - temp * c[i-1]) / p ;
 62 |       }
 63 | 
 64 |    y2[n-1] = 0.0 ;
 65 |    for (i=n-2 ; i>=0 ; i--)
 66 |       y2[i] = y2[i] * y2[i+1] + c[i] ;
 67 | 
 68 |    free ( c ) ;
 69 | }
 70 | 
 71 | CubicSpline::~CubicSpline ()
 72 | {
 73 |    free ( x ) ;
 74 |    free ( y ) ;
 75 |    free ( y2 ) ;
 76 | }
 77 | 
 78 | double CubicSpline::evaluate ( double xpt )
 79 | {
 80 |    int k, klo, khi ;
 81 |    double dist, a, b, aa, bb, val ;
 82 | 
 83 |    if (xpt < x[0])
 84 |       return y[0] ;
 85 | 
 86 |    if (xpt > x[n-1])
 87 |       return y[n-1] ;
 88 | 
 89 |    klo = 0 ;
 90 |    khi = n - 1 ;
 91 | 
 92 |    while (khi > klo+1) {
 93 |       k = (khi + klo) / 2 ;
 94 |       if (xpt < x[k])
 95 |          khi = k ;
 96 |       else
 97 |          klo = k ;
 98 |       }
 99 | 
100 |    dist = x[khi] - x[klo] + 1.e-60 ;
101 |    a = (x[khi] - xpt) / dist ;
102 |    b = (xpt - x[klo]) / dist ;
103 |    aa = a * (a * a - 1.0) ;
104 |    bb = b * (b * b - 1.0) ;
105 | 
106 |    val = (aa * y2[klo] + bb * y2[khi]) * dist * dist / 6.0 ;
107 |    return a * y[klo] + b * y[khi] + val ;
108 | }
109 | 


--------------------------------------------------------------------------------
/SVDCMP.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  SVDCMP  -  SingularValueDecomp class for computing the singular value     */
  4 | /*             decomposition of a rectangular matrix having at least as many  */
  5 | /*             rows as columns.                                               */
  6 | /*             This also includes a back-substitution routine for computing   */
  7 | /*             solutions to linear systems.                                   */
  8 | /*                                                                            */
  9 | /*  This is based on the implementation in Press "Numerical Recipes"          */
 10 | /*  with several bug fixes.                                                   */
 11 | /*                                                                            */
 12 | /******************************************************************************/
 13 | 
 14 | #include <math.h>
 15 | #include <malloc.h>
 16 | #include <stdlib.h>
 17 | #include <string.h>
 18 | 
 19 | /*
 20 | --------------------------------------------------------------------------------
 21 | 
 22 |    SingularValueDecomp - Singular value decomposition
 23 | 
 24 |    The following steps are needed to compute a least-squares solution
 25 |    to a (possibly overdetermined) linear system:
 26 |      1) Create a SingularValueDecomp object.  The constructor will allocate
 27 |         memory for the design matrix 'a', the right-hand-side 'b', and all
 28 |         scratch memory that it needs.  Optionally, the user can flag the
 29 |         constructor to preserve 'a' and return the decomposition in 'u'.
 30 |         Normally, 'a' is overwritten.
 31 |      2) The design matrix must be placed in 'a' and svdcmp called.
 32 |      3) Place the right-hand-side in 'b'
 33 |      4) Allocate a vector where the solution is to be placed.
 34 |         Call backsub with a pointer to this vector.
 35 | 
 36 | --------------------------------------------------------------------------------
 37 | */
 38 | 
 39 | class SingularValueDecomp {
 40 | 
 41 | public:
 42 | 
 43 |    SingularValueDecomp ( int nrows , int ncols , int save_a=0 ) ;
 44 |    ~SingularValueDecomp () ;
 45 |    void svdcmp () ;
 46 |    void backsub ( double limit , double *soln ) ;
 47 | 
 48 |    int ok ;         // Was everything legal and allocs successful?
 49 | 
 50 | /*
 51 |    These are made public to allow access if desired.
 52 |    Normally, only 'a' (the design matrix) and 'b' (the right-hand-side)
 53 |    are written by the user.  If 'save_a' is nonzero, 'a' is kept intact.
 54 | */
 55 | 
 56 |    double *a ;      // nrows by ncols input of design, output of U
 57 |    double *u ;      // unless save_a nonzero, in which case U output in 'u'
 58 |    double *w ;      // Unsorted ncols vector of singular values
 59 |    double *v ;      // Ncols by ncols output of 'v'
 60 |    double *b ;      // Nrows right-hand-side for backsub
 61 | 
 62 | 
 63 | private:
 64 | 
 65 |    void bidiag ( double *matrix ) ;
 66 |    double bid1 ( int col , double *matrix , double scale ) ;
 67 |    double bid2 ( int col , double *matrix , double scale ) ;
 68 |    void right ( double *matrix ) ;
 69 |    void left ( double *matrix ) ;
 70 |    void cancel ( int low , int high , double *matrix ) ;
 71 |    void qr ( int low , int high , double *matrix ) ;
 72 |    void qr_mrot ( int col , double sine , double cosine , double *matrix ) ;
 73 |    void qr_vrot ( int col , double sine , double cosine ) ;
 74 | 
 75 |    int rows ;       // Nrows preserved here
 76 |    int cols ;       // And ncols
 77 |    double *work ;   // Scratch vector ncols long
 78 |    double norm ;    // Norm of 'a' matrix
 79 | } ;
 80 | 
 81 | inline double root_ss ( double x , double y )
 82 | {
 83 |    double ratio ;
 84 |    if (x < 0.0)
 85 |       x = -x ;
 86 |    if (y < 0.0)
 87 |       y = -y ;
 88 | //   if (x >= y) {
 89 |    if (x > y) {                                  // Bug fix 7/26/2012
 90 |       ratio = y / x ;
 91 |       return x * sqrt ( ratio * ratio + 1.0 ) ;
 92 |       }
 93 |    else if (y == 0.0)
 94 |       return 0.0 ;
 95 |    else {
 96 |       ratio = x / y ;
 97 |       return y * sqrt ( ratio * ratio + 1.0 ) ;
 98 |       }
 99 | }
100 | 
101 | /*
102 | --------------------------------------------------------------------------------
103 | 
104 |    Constructor - Allocate input/output and scratch memory.
105 |                  Normally, this returns ok=1.  If not, the user called it with
106 |                  more columns than rows, or there was insufficient memory.
107 | 
108 | --------------------------------------------------------------------------------
109 | */
110 | 
111 | SingularValueDecomp::SingularValueDecomp ( int nr , int nc , int save_a )
112 | {
113 |    if (nc > nr) {             // Illegal
114 |       rows = cols = ok = 0 ;
115 |       return ;
116 |       }
117 | 
118 |    a = (double *) malloc ( nr * nc * sizeof(double) ) ;
119 |    w = (double *) malloc ( nc * sizeof(double) ) ;
120 |    v = (double *) malloc ( nc * nc * sizeof(double) ) ;
121 |    b = (double *) malloc ( nr * sizeof(double) ) ;
122 |    work = (double *) malloc ( nc * sizeof(double) ) ;
123 |    if (save_a)
124 |       u = (double *) malloc ( nr * nc * sizeof(double) ) ;
125 |    else
126 |       u = NULL ;
127 | 
128 |    if ((a == NULL)  ||  (w == NULL)  ||  (v == NULL)  ||  (b == NULL)  ||
129 |        (work == NULL)  ||  (save_a && (u == NULL))) {
130 |       if (a != NULL)
131 |          free ( a ) ;
132 |       if (w != NULL)
133 |          free ( w ) ;
134 |       if (v != NULL)
135 |          free ( v ) ;
136 |       if (b != NULL)
137 |          free ( b ) ;
138 |       if (work != NULL)
139 |          free ( work ) ;
140 |       if (u != NULL)
141 |          free ( u ) ;
142 |       rows = cols = ok = 0 ;
143 |       return ;
144 |       }
145 | 
146 |    ok = 1 ;       // Flag to user that all went well
147 |    rows = nr ;
148 |    cols = nc ;
149 | }
150 | 
151 | /*
152 | --------------------------------------------------------------------------------
153 | 
154 |    Destructor - Free memory
155 | 
156 | --------------------------------------------------------------------------------
157 | */
158 | 
159 | SingularValueDecomp::~SingularValueDecomp ()
160 | {
161 |    if (! ok)    // If constructor's mallocs failed
162 |       return ;  // there is nothing to free
163 | 
164 |    free ( a ) ;
165 |    free ( w ) ;
166 |    free ( v ) ;
167 |    free ( b ) ;
168 |    free ( work ) ;
169 |    if (u != NULL)
170 |       free ( u ) ;
171 | }
172 | 
173 | 
174 | /*
175 | --------------------------------------------------------------------------------
176 | 
177 |    svdcmp - Singular value decomposition of 'a'
178 | 
179 | --------------------------------------------------------------------------------
180 | */
181 | 
182 | void SingularValueDecomp::svdcmp ()
183 | {
184 |    int i, sval, split, iter_limit ;
185 |    double *matrix ;
186 | 
187 |    if (u != NULL) {   // Must we keep 'a' intact?
188 |       memcpy ( u , a , rows * cols * sizeof(double) ) ;  // If so, copy it
189 |       matrix = u ;                                       // And work on copy
190 |       }
191 |    else              // If not, operate directly on 'a'
192 |       matrix = a ;
193 | 
194 |    bidiag ( matrix ) ;       // Reduce to bidiagonal
195 |    right ( matrix ) ;        // Accumulate right transforms
196 |    left ( matrix ) ;         // And left
197 | 
198 |    sval = cols ;
199 |    while (sval--) {    // Loop over the singular values in reverse order
200 |       iter_limit = 50 ;
201 |       while (iter_limit--) {  // Avoid nearly endless loop (very rare!)
202 |          split = sval + 1 ;
203 |          while (--split) {    // Keep splitting as long as possible
204 |             if (norm + fabs (work[split]) == norm) {
205 |                break ;
206 |                }
207 |             if (norm + fabs (w[split-1]) == norm) {
208 |                cancel ( split , sval , matrix ) ;
209 |                break ;
210 |                }
211 |             }
212 |          if (split == sval) {     // Converged?
213 |             if (w[sval] < 0.0) {  // Keep them nonnegative
214 |                w[sval] = -w[sval] ;
215 |                for (i=0 ; i<cols ; i++)
216 |                   v[i*cols+sval] = -v[i*cols+sval] ;
217 |                }
218 |             break ;
219 |             }
220 |          qr ( split , sval , matrix ) ;
221 |          }
222 |       }
223 | }
224 | 
225 | /*
226 | --------------------------------------------------------------------------------
227 | 
228 |    bidiag - Householder reduction to bidiagonal
229 | 
230 | --------------------------------------------------------------------------------
231 | */
232 | 
233 | void SingularValueDecomp::bidiag ( double *matrix )
234 | {
235 |    int col, k ;
236 |    double temp, testnorm, scale ;
237 | 
238 |    norm = temp = scale = 0.0 ;
239 | 
240 |    for (col=0 ; col<cols ; col++) {
241 | 
242 |       work[col] = scale * temp ;
243 | 
244 |       scale = 0.0 ;
245 |       for (k=col ; k<rows ; k++)
246 |          scale += fabs ( matrix[k*cols+col] ) ;
247 | 
248 |       if (scale > 0.0)
249 |          w[col] = scale * bid1 ( col , matrix , scale ) ;
250 |       else 
251 |          w[col] = 0.0 ;
252 | 
253 |       scale = 0.0 ;
254 |       for (k=col+1 ; k<cols ; k++)
255 |          scale += fabs ( matrix[col*cols+k] ) ;
256 | 
257 |       if (scale > 0.0)
258 |          temp = bid2 ( col , matrix , scale ) ;
259 |       else
260 |          temp = 0.0 ;
261 | 
262 |       testnorm = fabs (w[col]) + fabs (work[col]) ;
263 |       if (testnorm > norm)
264 |          norm = testnorm ;
265 |       }
266 | }
267 | 
268 | double SingularValueDecomp::bid1 ( int col , double *matrix , double scale )
269 | {
270 |    int i, j ;
271 |    double diag, rv, fac, sum ;
272 | 
273 |    sum = 0.0 ;
274 |    for (i=col ; i<rows ; i++) {
275 |       fac = (matrix[i*cols+col] /= scale) ;
276 |       sum += fac * fac ;
277 |       }
278 |    rv = sqrt ( sum ) ;
279 |    diag = matrix[col*cols+col] ;
280 |    if (diag > 0.0)
281 |       rv = -rv ;
282 |    fac = 1.0 / (diag * rv - sum) ;
283 |    matrix[col*cols+col] = diag - rv ;
284 | 
285 |    for (j=col+1 ; j<cols ; j++) {
286 |       sum = 0.0 ;
287 |       for (i=col ; i<rows ; i++)
288 |          sum += matrix[i*cols+col] * matrix[i*cols+j] ;
289 |       sum *= fac ;
290 |       for (i=col ; i<rows ; i++)
291 |          matrix[i*cols+j] += sum * matrix[i*cols+col] ;
292 |       }
293 | 
294 |    for (i=col ; i<rows ; i++)
295 |       matrix[i*cols+col] *= scale ;
296 | 
297 |    return rv ;
298 | }
299 | 
300 | double SingularValueDecomp::bid2 ( int col , double *matrix , double scale )
301 | {
302 |    int i, j ;
303 |    double fac, diag, rv, sum ;
304 | 
305 |    sum = 0.0 ;
306 |    for (i=col+1 ; i<cols ; i++) {
307 |       fac = (matrix[col*cols+i] /= scale) ;
308 |       sum += fac * fac ;
309 |       }
310 | 
311 |    rv = sqrt ( sum ) ;
312 |    diag = matrix[col*cols+col+1] ;
313 |    if (diag > 0.0)
314 |       rv = -rv ;
315 | 
316 |    matrix[col*cols+col+1] = diag - rv ;
317 |    fac = 1.0 / (diag * rv - sum) ;
318 |    for (i=col+1 ; i<cols ; i++)
319 |       work[i] = fac * matrix[col*cols+i] ;
320 | 
321 |    for (j=col+1 ; j<rows ; j++) {
322 |       sum = 0.0 ;
323 |       for (i=col+1 ; i<cols ; i++)
324 |          sum += matrix[j*cols+i] * matrix[col*cols+i] ;
325 |       for (i=col+1 ; i<cols ; i++)
326 |          matrix[j*cols+i] += sum * work[i] ;
327 |       }
328 |    for (i=col+1 ; i<cols ; i++)
329 |       matrix[col*cols+i] *= scale ;
330 |    return rv ;
331 | }
332 | 
333 | 
334 | /*
335 | --------------------------------------------------------------------------------
336 | 
337 |    Cumulate right and left transforms
338 | 
339 | --------------------------------------------------------------------------------
340 | */
341 | 
342 | void SingularValueDecomp::right ( double *matrix )
343 | {
344 |    int col, i, j ;
345 |    double temp, denom, sum ;
346 | 
347 |    denom = 0.0 ;
348 |    col = cols ;
349 |    while (col--) {
350 |       if (denom != 0.0) {
351 |          temp = 1.0 / matrix[col*cols+col+1] ;
352 |          for (i=col+1 ; i<cols ; i++)  // Double division avoids underflow
353 |             v[i*cols+col] = temp * matrix[col*cols+i] / denom ;
354 |          for (i=col+1 ; i<cols ; i++) {
355 |             sum = 0.0 ;
356 |             for (j=col+1 ; j<cols ; j++)
357 |                sum += v[j*cols+i] * matrix[col*cols+j] ;
358 |             for (j=col+1 ; j<cols ; j++)
359 |                v[j*cols+i] += sum * v[j*cols+col] ;
360 |             }
361 |          }
362 | 
363 |       denom = work[col] ;
364 | 
365 |       for (i=col+1 ; i<cols ; i++)
366 |          v[col*cols+i] = v[i*cols+col] = 0.0 ;
367 |       v[col*cols+col] = 1.0 ;
368 |       }
369 | }
370 | 
371 | void SingularValueDecomp::left ( double *matrix )
372 | {
373 |    int col, i, j ;
374 |    double temp, fac, sum ;
375 | 
376 |    col = cols ;
377 |    while (col--) {
378 | 
379 |       for (i=col+1 ; i<cols ; i++)
380 |          matrix[col*cols+i] = 0.0 ;
381 | 
382 |       if (w[col] == 0.0) {
383 |          for (i=col ; i<rows ; i++)
384 |             matrix[i*cols+col] = 0.0 ;
385 |          }
386 | 
387 |       else {
388 |          fac = 1.0 / w[col] ;
389 |          temp = fac / matrix[col*cols+col]  ;
390 | 
391 |          for (i=col+1 ; i<cols ; i++) {
392 |             sum = 0.0 ;
393 |             for (j=col+1 ; j<rows ; j++)
394 |                sum += matrix[j*cols+col] * matrix[j*cols+i] ;
395 |             sum *= temp ;
396 |             for (j=col ; j<rows ; j++)
397 |                matrix[j*cols+i] += sum * matrix[j*cols+col] ;
398 |             }
399 |          for (i=col ; i<rows ; i++)
400 |             matrix[i*cols+col] *= fac ;
401 |          }
402 | 
403 |       matrix[col*cols+col] += 1.0 ;
404 |       }
405 | }
406 | 
407 | 
408 | /*
409 | --------------------------------------------------------------------------------
410 | 
411 |    cancel
412 | 
413 | --------------------------------------------------------------------------------
414 | */
415 | 
416 | void SingularValueDecomp::cancel (
417 |    int low ,
418 |    int high ,
419 |    double *matrix
420 |    )
421 | {
422 |    int col, row, lm1 ;
423 |    double sine, cosine, leg1, leg2, svhypot, y, x, *mpt1, *mpt2 ;
424 | 
425 |    lm1 = low - 1 ;
426 |    sine = 1.0 ;
427 |    for (col=low ; col<=high ; col++) {
428 |       leg1 = sine * work[col] ;
429 |       if (fabs (leg1) + norm != norm) {
430 |          leg2 = w[col] ;
431 |          w[col] = svhypot = root_ss ( leg1 , leg2 ) ;
432 |          sine = -leg1 / svhypot ;
433 |          cosine =  leg2 / svhypot ;
434 |          for (row=0 ; row<rows ; row++) {
435 |             mpt1 = matrix + row * cols + col ;
436 |             mpt2 = matrix + row * cols + lm1 ;
437 |             x = *mpt1 ;
438 |             y = *mpt2 ;
439 |             *mpt1 = x * cosine  -  y * sine ;
440 |             *mpt2 = x * sine  +  y * cosine ;
441 |             }
442 |          }
443 |       }
444 | }
445 | 
446 | /*
447 | --------------------------------------------------------------------------------
448 | 
449 |    qr
450 | 
451 | --------------------------------------------------------------------------------
452 | */
453 | 
454 | void SingularValueDecomp::qr (
455 |    int low ,
456 |    int high ,
457 |    double *matrix )
458 | {
459 |    int col ;
460 |    double sine, cosine, wk, tx, ty, x, y, svhypot, temp, ww, wh, wkh, whm1, wkhm1;
461 | 
462 |    wh = w[high] ;
463 |    whm1 = w[high-1] ;
464 |    wkh = work[high] ;
465 |    wkhm1 = work[high-1] ;
466 |    temp = 2.0 * wkh * whm1 ;
467 |    if (temp != 0.0)
468 |       temp = ((whm1+wh) * (whm1-wh) + (wkhm1+wkh) * (wkhm1-wkh)) / temp ;
469 |    else
470 |       temp = 0.0 ;
471 | 
472 |    svhypot = root_ss ( temp , 1.0 ) ;
473 |    if (temp < 0.0)
474 |       svhypot = -svhypot ;
475 | 
476 |    ww = w[low] ;
477 |    wk = wkh * (whm1 / (temp + svhypot) - wkh)  +  (ww+wh) * (ww-wh) ;
478 |    if (ww != 0.0)
479 |       wk /= ww ;
480 |    else
481 |       wk = 0.0 ;
482 | 
483 |    sine = cosine = 1.0 ;
484 | 
485 |    for (col=low ; col<high ; col++) {
486 |       x = work[col+1] ;
487 |       ty = sine * x ;
488 |       x *= cosine ;
489 |       svhypot = root_ss ( wk , ty ) ;
490 |       work[col] = svhypot ;
491 |       cosine = wk / svhypot ;
492 |       sine = ty / svhypot ;
493 |       tx = ww * cosine  +  x * sine ;
494 |       x = x * cosine  -  ww * sine ;
495 |       y = w[col+1] ;
496 |       ty = y * sine ;
497 |       y *= cosine ;
498 |       qr_vrot ( col , sine , cosine ) ;
499 |       w[col] = svhypot = root_ss ( tx , ty ) ;
500 |       if (svhypot != 0.0) {
501 |          cosine = tx / svhypot ;
502 |          sine = ty / svhypot ;
503 |          }
504 |       qr_mrot ( col , sine , cosine , matrix ) ;
505 |       wk = cosine * x  +  sine * y ;
506 |       ww = cosine * y  -  sine * x ;
507 |       }
508 |    work[low] = 0.0 ;
509 |    work[high] = wk ;
510 |    w[high] = ww ;
511 | }
512 | 
513 | void SingularValueDecomp::qr_vrot ( int col , double sine , double cosine )
514 | {
515 |    int row ;
516 |    double x, y, *vptr ;
517 | 
518 |    for (row=0 ; row<cols ; row++) {
519 |       vptr = v + row * cols + col ;
520 |       x = *vptr ;
521 |       y = *(vptr+1) ;
522 |       *vptr = x * cosine  +  y * sine ;
523 |       *(vptr+1) = y * cosine  -  x * sine ;
524 |       }
525 | }
526 | 
527 | void SingularValueDecomp::qr_mrot ( int col , double sine , double cosine ,
528 |                                     double *matrix )
529 | {
530 |    int row ;
531 |    double x, y, *mptr ;
532 | 
533 |    for (row=0 ; row<rows ; row++) {
534 |       mptr = matrix + row * cols + col ;
535 |       x = *mptr ;
536 |       y = *(mptr+1) ;
537 |       *mptr = x * cosine  +  y * sine ;
538 |       *(mptr+1) = y * cosine  -  x * sine ;
539 |       }
540 | }
541 | 
542 | /*
543 | --------------------------------------------------------------------------------
544 | 
545 |    Backsubstitution algorithm for solving Ax=b where A generated u, w, v
546 |    Inputs are not destroyed, so it may be called with several b's.
547 |    The user must have filled in the public RHS 'b' before calling this.
548 | 
549 | --------------------------------------------------------------------------------
550 | */
551 | 
552 | void SingularValueDecomp::backsub (
553 |    double limit ,  // SV limit (about sqrt machine precision is good)
554 |    double *soln    // Output: solution
555 |    )
556 | {
557 |    int i, j ;
558 |    double sum, wmax, *matrix ;
559 | 
560 |    if (u != NULL)    // If we preserved 'a', use 'u'
561 |       matrix = u ;
562 |    else              // Else 'u' is in 'a'
563 |       matrix = a ;
564 | 
565 | /*
566 |    Scale the threshold to make it relative to the norm
567 | */
568 | 
569 |    wmax = -1.e40 ;
570 |    for (i=0 ; i<cols ; i++) {
571 |       if ((i == 0)  ||  (w[i] > wmax))
572 |          wmax = w[i] ;
573 |       }
574 | 
575 |    limit = limit * wmax  +  1.e-60 ;
576 | 
577 | /*
578 |    Find U'b
579 | */
580 | 
581 |    for (i=0 ; i<cols ; i++) {
582 |       sum = 0.0 ;
583 |       if (w[i] > limit) {
584 |          for (j=0 ; j<rows ; j++)
585 |             sum += matrix[j*cols+i] * b[j] ;
586 |          sum /= w[i] ;
587 |          }
588 |       work[i] = sum ;
589 |       }
590 | 
591 | /*
592 |    Multiply by V to complete the solution
593 | */
594 | 
595 |    for (i=0 ; i<cols ; i++) {
596 |       sum = 0.0 ;
597 |       for (j=0 ; j<cols ; j++)
598 |          sum += v[i*cols+j] * work[j] ;
599 |       soln[i] = sum ;
600 |       }
601 | }
602 | 


--------------------------------------------------------------------------------
/TEST_CON.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  TEST_CON - Test the continuous mutual information methods                 */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | #include <stdio.h>
  8 | #include <string.h>
  9 | #include <math.h>
 10 | #include <conio.h>
 11 | #include <ctype.h>
 12 | #include <stdlib.h>
 13 | 
 14 | extern double normal () ;
 15 | extern double unifrand () ;
 16 | 
 17 | class CubicSpline {
 18 | 
 19 | public:
 20 |    CubicSpline ( int n , double *xin , double *yin ) ;
 21 |    ~CubicSpline () ;
 22 |    double evaluate ( double x ) ;
 23 | 
 24 | private:
 25 |    int n ;
 26 |    double *x ;
 27 |    double *y ;
 28 |    double *y2 ;
 29 | } ;
 30 | 
 31 | class ParzDens_1 {
 32 | 
 33 | public:
 34 |    ParzDens_1 ( int n_tset , double *tset , int n_div ) ;
 35 |    ~ParzDens_1 () ;
 36 |    double density ( double x ) ;
 37 |    double low ;     // Lowest value with significant density
 38 |    double high ;    // And highest
 39 | 
 40 | private:
 41 |    int nd ;         // Number of points in array below
 42 |    double *d ;      // The data on which the density is based
 43 |    double var ;     // Presumed variance
 44 |    double factor ;  // Normalizing factor to make it a density
 45 |    CubicSpline *spline ; // Used only if interpolation
 46 | } ;
 47 | 
 48 | class MutualInformationParzen {  // Parzen window method
 49 | 
 50 | public:
 51 |    MutualInformationParzen ( int nn , double *dep_vals , int ndiv ) ;
 52 |    ~MutualInformationParzen () ;
 53 |    double mut_inf ( double *x ) ;
 54 | 
 55 | private:
 56 |    int n ;             // Number of cases
 57 |    int n_div ;         // Number of divisions of range, typically 5-10
 58 |    double *depvals ;   // 'Dependent' variable
 59 |    ParzDens_1 *dens_dep ;   // Marginal density of 'dependent' variable
 60 | } ;
 61 | 
 62 | class MutualInformationAdaptive {  // Adaptive partitioning method
 63 | 
 64 | public:
 65 |    MutualInformationAdaptive ( int nn , double *dep_vals ,
 66 |                                int respect_ties , double crit ) ;
 67 |    ~MutualInformationAdaptive () ;
 68 |    double mut_inf ( double *x , int respect_ties ) ;
 69 | 
 70 | private:
 71 |    int n ;             // Number of cases
 72 |    int *y ;            // 'Dependent' variable ranks
 73 |    int *y_tied ;       // tied[i] != 0 if case with rank i == case with rank i+1
 74 |    double chi_crit ;   // Chi-square test criterion
 75 | } ;
 76 | 
 77 | 
 78 | int main (
 79 |    int argc ,    // Number of command line arguments (includes prog name)
 80 |    char *argv[]  // Arguments (prog name is argv[0])
 81 |    )
 82 | 
 83 | {
 84 |    int i, nsamps, ntries, ndiv, divisor, itry, respect_ties ;
 85 |    double corr, correct, ptie, *x, *y, x1, x2, result, prior_x1 ;
 86 |    double total_parzen, bias_parzen, stderr_parzen ;
 87 |    double total_adapt, bias_adapt, stderr_adapt ;
 88 |    double chi_test ;
 89 |    MutualInformationParzen *mi_parzen ;
 90 |    MutualInformationAdaptive *mi_adapt ;
 91 | 
 92 | /*
 93 |    Process command line parameters
 94 | */
 95 | 
 96 | #if 1
 97 |    if (argc != 8) {
 98 |       printf (
 99 |          "\nUsage: TEST_CON nsamples ntries correlation ptie respect_ties ndiv chi_test" ) ;
100 |       exit ( 1 ) ;
101 |       }
102 | 
103 |    nsamps = atoi ( argv[1] ) ;
104 |    ntries = atoi ( argv[2] ) ;
105 |    corr = atof ( argv[3] ) ;
106 |    ptie = atof ( argv[4] ) ;
107 |    respect_ties = atoi ( argv[5] ) ;
108 |    ndiv = atoi ( argv[6] ) ;
109 |    chi_test = atof ( argv[7] ) ;
110 | #else
111 |    nsamps = 101 ;
112 |    ntries = 10 ;
113 |    corr = 0.9 ;
114 |    ptie = 0.0 ;
115 |    respect_ties = 0 ;
116 |    ndiv = 5 ;
117 |    chi_test = 6.0 ;
118 | #endif
119 | 
120 |    if ((nsamps <= 0)  ||  (ntries <= 0)  || (corr < -1.0)  ||  (corr > 1.0)
121 |     || (ptie < 0.0)  || (ptie > 1.0)  || (ndiv < 2)  || (chi_test < 0.0)) {
122 |       printf (
123 |          "\nUsage: TEST_CON nsamples ntries correlation ptie respect_ties ndiv chi_test" ) ;
124 |       exit ( 1 ) ;
125 |       }
126 | 
127 | 
128 | /*
129 |    Allocate memory and initialize
130 | */
131 | 
132 |    divisor = ntries / 100 ;  // This is for progress reports only
133 |    if (divisor < 1)
134 |       divisor = 1 ;
135 | 
136 |    x = (double *) malloc ( nsamps * sizeof(double) ) ;
137 |    y = (double *) malloc ( nsamps * sizeof(double) ) ;
138 | 
139 | 
140 | /*
141 |    Main outer loop does all tries
142 | */
143 | 
144 |    correct = -0.5 * log ( 1.0 - corr * corr ) ;
145 |    total_parzen = bias_parzen = stderr_parzen = 0.0 ;
146 |    total_adapt = bias_adapt = stderr_adapt = 0.0 ;
147 | 
148 |    for (itry=1 ; itry<=ntries ; itry++) {
149 | 
150 |       if (((itry-1) % divisor) == 0)
151 |          printf ( "\n\n\nTry %d of %d", itry, ntries ) ;
152 | 
153 |       prior_x1 = 0.5 ;             // Arbitrary
154 |       for (i=0 ; i<nsamps ; i++) { // Create bivariate sample with known correlation
155 |          if (unifrand() < ptie)    // Duplicate the prior observation for a tie?
156 |             x1 = prior_x1 ;
157 |          else {
158 |             x1 = normal () ;
159 |             prior_x1 = x1 ;
160 |             }
161 |          x2 = normal () ;
162 |          if (i < nsamps/2) {       // Equally split ties between X and Y
163 |             x[i] = x1 ;
164 |             y[i] = corr * x1 + sqrt ( 1.0 - corr * corr ) * x2 ;
165 |             }
166 |          else {
167 |             y[i] = x1 ;
168 |             x[i] = corr * x1 + sqrt ( 1.0 - corr * corr ) * x2 ;
169 |             }
170 |          }
171 | 
172 |       mi_adapt = new MutualInformationAdaptive ( nsamps , y , respect_ties , chi_test ) ;
173 |       result = mi_adapt->mut_inf ( x , respect_ties ) ;
174 |       delete mi_adapt ;
175 |       total_adapt += result ;
176 |       bias_adapt += result - correct ;
177 |       stderr_adapt += (result - correct) * (result - correct) ;
178 | 
179 |       mi_parzen = new MutualInformationParzen ( nsamps , y , ndiv ) ;
180 |       result = mi_parzen->mut_inf ( x ) ;
181 |       delete mi_parzen ;
182 |       total_parzen += result ;
183 |       bias_parzen += result - correct ;
184 |       stderr_parzen += (result - correct) * (result - correct) ;
185 | 
186 |       if ((((itry-1) % divisor) == 0)
187 |        || (itry == ntries) ) {        // Don't do this every try!  Too slow.
188 |          printf ( "\nParzen Mean = %.5lf   Bias = %.5lf   StdErr = %.5lf",
189 |             total_parzen/itry, bias_parzen/itry, sqrt ( stderr_parzen/itry )) ;
190 |          printf ( "\nAdapt  Mean = %.5lf   Bias = %.5lf   StdErr = %.5lf",
191 |                total_adapt/itry, bias_adapt/itry, sqrt ( stderr_adapt/itry )) ;
192 |          }
193 | 
194 |       if (_kbhit ()) {         // Has the user pressed a key?
195 |          if (_getch() == 27)   // The ESCape key?
196 |             break ;
197 |          }
198 | 
199 |       } // For all tries
200 | 
201 |    free ( x ) ;
202 |    free ( y ) ;
203 |    return EXIT_SUCCESS ;
204 | }
205 | 


--------------------------------------------------------------------------------
/TRANS_ENT.CPP:
--------------------------------------------------------------------------------
  1 | /******************************************************************************/
  2 | /*                                                                            */
  3 | /*  TRANS_ENT - Schreiber's transfer entropy (information transfer)           */
  4 | /*                                                                            */
  5 | /******************************************************************************/
  6 | 
  7 | #include <assert.h>
  8 | #include <stdio.h>
  9 | #include <string.h>
 10 | #include <math.h>
 11 | #include <stdlib.h>
 12 | 
 13 | /*
 14 | --------------------------------------------------------------------------------
 15 | 
 16 |    We are given two series, x and y, each having n cases.
 17 |    It is assumed that p(y[i]) is a function of y[i-1], y[i-2], ..., y[i-yhist].
 18 |    But does x[i-xlag], x[i-xlag-1], ..., x[i-xlag-xhist+1] influence the y transition
 19 |    probabilities?  This function measures the extent to which this occurs.
 20 | 
 21 |    The traditional version has xlag=1, meaning that the value of x concurrent
 22 |    with y is not allowed to participate in influencing y.
 23 |    Many models want the historical x influence to come up to y,
 24 |    allowing concurrent influence.  For this, xlag=0.
 25 |    This happens, for example, in developing model-based market trading systems
 26 |    in which the indicator/target data is such that indicators are computed
 27 |    based strictly on the past and targets strictly on the future.
 28 |    So the data already has X inherently lagged to Y, and you would not want
 29 |    to lag it still further.
 30 | 
 31 |    Note that we have nbins_x ^ xhist * nbins_y ^ (yhist+1) bins.
 32 |    In order to get decent probability  estimates, these bins must contain
 33 |    a decent number of cases.  The number of bins will blow up fast as
 34 |    xhist and yhist grow!  Keep them small unless n is gigantic.
 35 | 
 36 |    Suppose 'a' represents the current y, 'b' represents y history, and
 37 |    'c' represents x history.  Then the information transfer is:
 38 | 
 39 |    SUM [ p(a,b,c) log ( p(a|b,c) / p(a|b) ]
 40 | 
 41 |    So it's a sum of logs, weighted by the probability of each possible outcome.
 42 |    The log term is the ratio of the conditional probability of the current y
 43 |    given both its history and x history, over the conditional given just
 44 |    its own (y) history.  If c, the x history, has no impact, this ratio will
 45 |    be 1, and its log will be zero.
 46 | 
 47 |    Note that p(a|b,c) = p(a,b,c) / p(b,c) and p(a|b) = p(a,b) / p(b)
 48 | 
 49 |    To speed calculations, after cumulating p(a,b,c) we compute and save
 50 |    the marginals p(b,c), p(a,b), and p(b).
 51 | 
 52 |    Four work vectors must be supplied.
 53 |    Let nx = nbins_x ^ xhist  and  ny = nbins_y ^ yhist.  The lengths are:
 54 |       counts = nx * ny * nbins_y
 55 |       ab = nbins_y * ny
 56 |       bc = nx * ny
 57 |       b = ny
 58 | 
 59 | --------------------------------------------------------------------------------
 60 | */
 61 | 
 62 | double trans_ent (
 63 |    int n ,          // Length of x and y
 64 |    int nbins_x ,    // Number of x bins.  Beware if greater than 2.
 65 |    int nbins_y ,    // Ditto y
 66 |    short int *x ,   // Independent variable, which impacts y transitions
 67 |    short int *y ,   // Dependent variable
 68 |    int xlag ,       // Lag of most recent predictive x: 1 for traditional, 0 for concurrent
 69 |    int xhist ,      // Length of x history.  At least 1; Beware if greater than 1.
 70 |    int yhist ,      // Ditto y
 71 |    int *counts ,    // Work vector (see comment above)
 72 |    double *ab ,     // Ditto
 73 |    double *bc ,     // Ditto
 74 |    double *b        // Ditto
 75 |    )
 76 | {
 77 |    int i, j, nx, ny, nxy, istart, ix, iy, ia, total ;
 78 |    double p, trans, numer, denom ;
 79 | 
 80 | /*
 81 |    Compute key constants.
 82 | */
 83 | 
 84 |    nx = nbins_x ;
 85 |    for (i=1 ; i<xhist ; i++)   // Number of bins for X history
 86 |       nx *= nbins_x ;
 87 | 
 88 |    ny = nbins_y ;
 89 |    for (i=1 ; i<yhist ; i++)   // Number of bins for Y history
 90 |       ny *= nbins_y ;
 91 | 
 92 |    nxy = nx * ny ;             // Total number of history bins
 93 | 
 94 | /*
 95 |    Pass through the data, cumulating the bin counts
 96 |    The counts will be kept in an array with X history changing fastest,
 97 |    then Y history, and current Y changing last.
 98 | */
 99 | 
100 |    memset ( counts , 0 , nxy * nbins_y * sizeof(int) ) ;
101 | 
102 |    istart = xhist + xlag - 1 ;
103 |    if (yhist > istart)
104 |       istart = yhist ;
105 | 
106 |    for (i=istart ; i<n ; i++) {
107 | 
108 |       // Which of the nbins_x ^ xhist X history bins does this case lie in?
109 |       ix = x[i-xlag] ;
110 |       for (j=1 ; j<xhist ; j++)
111 |          ix = nbins_x * ix + x[i-j-xlag] ;
112 | 
113 |       // Which of the nbins_y ^ yhist Y history bins does this case lie in?
114 |       iy = y[i-1] ;
115 |       for (j=2 ; j<=yhist ; j++)
116 |          iy = nbins_y * iy + y[i-j] ;
117 | 
118 |       ++counts [ y[i] * nxy + iy * nx + ix ] ;  // Increment the correct bin
119 |       }
120 | 
121 |    total = n - istart ;
122 | 
123 | 
124 | /*
125 |    Compute and save the marginals
126 | */
127 | 
128 |    for (i=0 ; i<nbins_y*ny ; i++)
129 |       ab[i] = 0.0 ;
130 |    for (i=0 ; i<nx*ny ; i++)
131 |       bc[i] = 0.0 ;
132 |    for (i=0 ; i<ny ; i++)
133 |       b[i] = 0.0 ;
134 | 
135 |    for (ia=0 ; ia<nbins_y ; ia++) {
136 |       for (iy=0 ; iy<ny ; iy++) {
137 |          for (ix=0 ; ix<nx ; ix++) {
138 |             p = (double) counts [ ia * nxy + iy * nx + ix ] / (double) total ;
139 |             ab[ia*ny+iy] += p ;
140 |             bc[iy*nx+ix] += p ;
141 |             b[iy] += p ;
142 |             }
143 |          }
144 |       }
145 | 
146 | 
147 | /*
148 |    Compute the information transfer
149 | */
150 | 
151 |    trans = 0.0 ;
152 |    for (ia=0 ; ia<nbins_y ; ia++) {
153 |       for (iy=0 ; iy<ny ; iy++) {
154 |          for (ix=0 ; ix<nx ; ix++) {
155 |             p = (double) counts [ ia * nxy + iy * nx + ix ] / (double) total ;
156 |             if (p <= 0.0)
157 |                continue ;
158 |             numer = p / bc[iy*nx+ix] ;
159 |             denom = ab[ia*ny+iy] / b[iy] ;
160 |             trans += p * log ( numer / denom ) ;
161 |             }
162 |          }
163 |       }
164 | 
165 |    return trans ;
166 | }
167 | 


--------------------------------------------------------------------------------
/errata.md:
--------------------------------------------------------------------------------
 1 | # Errata for Data Mining Algorithms in C++
 2 | 
 3 | On **page xx** [Summary of error]:
 4 |  
 5 | Details of error here. Highlight key pieces in **bold**.
 6 | 
 7 | ***
 8 | 
 9 | On **page xx** [Summary of error]:
10 |  
11 | Details of error here. Highlight key pieces in **bold**.
12 | 
13 | ***


--------------------------------------------------------------------------------