//--------------------------------------------------------------------- // An ugly maximal likelihood estimator, version 1.00 //--------------------------------------------------------------------- //--------------------------------------------------------------------- // input - input file with format // "data" file name // "MC" file name // output file name // id # of sought particle // flag (1 or 0) // number of different numbers of NN // # of nn_1 // .... // # of nn_last //_____________________________________________________________________ #include #include #include #include #include void ReadInput ( char* InputFileName, char *DataName, char* MCName, char *OutName, double &dx, int &idt, double &lowb ) ; void CheckInput ( char* DataName, char* MCName, int &ndata, int &nMC, int &d ) ; void ReadArrays ( char* DataName, char* MCName, double** &Data, double** &MC, int ndata, int nMC, int d ); void FindNeighbours ( double **Data, double **MC, char *OutName, int ndata, int nMC, int d, double dx, int SoughtIdt, double lowb ) ; void panic (char* str) ; inline INT (double val) { return val < 0 ? (int) (val - 0.5) : int (val + 0.5) ; } // ---------------------- // MAIN // ---------------------- int main (int argc, char* argv[]) { int idt; /* idt being sought */ int d; /* number of variables */ int ndata, nMC ; /* number of lines in data and MC */ char* DataName = new char[30]; // data file name */ char* MCName = new char[30]; // MC file name */ char* OutName = new char[30]; // output file name */ double **Data, **MC; double dx; // total number of different nns double lowb; if (argc < 2) panic ("neighbours ") ; ReadInput ( argv[1], // reads command-line arguments DataName, MCName, OutName, dx, idt, lowb ); CheckInput ( DataName, MCName, // half-checks compatibility ndata, nMC, d ); // of data & MC files ReadArrays ( DataName, MCName, Data, MC, ndata, nMC, d ); FindNeighbours ( Data, MC, OutName, // finds nns[i] nearest MCnbrs ndata, nMC, d, // for each pt. in data dx, idt, lowb ); // and prints results out return 1; } /* Transforms program arguments into corresponding variables */ void ReadInput ( char *FileName, char *DataName, char* MCName, char *OutName, double &dx, int &idt, double &lowb ) { ifstream InFile ; InFile.open (FileName, ios::nocreate); if (InFile.fail () ) panic ("No input file" ); InFile >> DataName ; InFile >> MCName; InFile >> OutName; InFile >> idt ; InFile >> dx; InFile >> lowb; InFile.close(); } /* 1. compares number of columns in MC and data files. (#column(MC) - #coulmn(data) should be 1 ) 2. counts number of variables and number of points in data and MC */ void CheckInput ( char* DataName, char* MCName, int &ndata, int &nMC, int &d ) { int nwords, nlines; double DataNcolumns, MCNcolumns; char *com = (char*) malloc (512); // command ifstream dummy; int cnt ; /* counter */ for (cnt = 0 ; cnt++ <= 1; ) { if (cnt == 1) sprintf (com, "wc %s > dummyfile", DataName); else sprintf (com, "wc %s > dummyfile", MCName); system (com); dummy.open ("dummyfile", ios::nocreate); dummy >> nlines >> nwords; if (cnt == 1) { DataNcolumns = (double) nwords / nlines ; ndata = nlines ; } else { MCNcolumns = (double) nwords / nlines ; nMC = nlines ; } dummy.close(); } if ( fabs ( DataNcolumns - INT (DataNcolumns) > 0.0001 ) || fabs ( MCNcolumns - INT (MCNcolumns + 0.5) > 0.0001 ) || INT (MCNcolumns) - INT (DataNcolumns) != 1 ) panic ("Bad input files or unmatched number of variables\n") ; d = INT (DataNcolumns) ; } // ------------------------------------------------------------------------ // in the simplest and most naive version of this program // this function just reads the data and MC files // INTO RAM!!! // ------------------------------------------------------------------------ void ReadArrays ( char* DataName, char* MCName, double** &Data, double** &MC, int ndata, int nMC, int d ) { ifstream DataFile, MCFile; long cnt1; DataFile.open (DataName, ios::nocreate); MCFile.open (MCName, ios::nocreate); Data = new double* [ndata]; MC = new double* [nMC]; // reading from data file to data array for ( cnt1 = 0 ; cnt1 < ndata; cnt1++ ) { Data[cnt1] = new double [d]; for (int cnt2 = 0; cnt2 < d; cnt2++ ) { DataFile >> Data[cnt1][cnt2]; if ( DataFile.eof() ) panic ( "Unexpected data EOF in FindNeighbours" ); } } // reading from MC file to MC array for ( cnt1 = 0 ; cnt1 < nMC; cnt1++ ) { MC[cnt1] = new double [d+1]; for (int cnt2 = 0; cnt2 <= d; cnt2++ ) { MCFile >> MC[cnt1][cnt2]; if (MCFile.eof()) panic( "Unexpected MC EOF in FindNeighbours\n"); } } DataFile.close (); MCFile.close (); } // ------------------------------------------------------------------------ // The major procedure - finding nn-s for each point & // printing the results. // Complexity = O (D * M * d) // D - # of data points, M - # of MC points, d - dimensionality // ------------------------------------------------------------------------ void FindNeighbours ( double **Data, double **MC, char *OutName, int ndata, int nMC, int d, double dx, int SoughtIdt, double lowb ) { ofstream OutputFile; int cnt1, cnt2, cnt3; // counters double prob[200]; // likelihood on each var long nsig[200], nnoise[200]; // noise and sig on each var if (d > 198) panic (" Too much output nn-s" ) ; OutputFile.open (OutName); for (cnt1 = 0 ; cnt1 < ndata; cnt1++ ) // loop on all data point { for (cnt2 = 0 ; cnt2 < d ; cnt2++ ) { if (fabs(Data[cnt1][cnt2]-lowb) < 0.001) { nsig[cnt2] = 139; // for P_b = 0.139 nnoise[cnt2] = 861; continue ; } for (cnt3 = 0 ; cnt3 < d ; cnt3++ ) nnoise[cnt3] = nsig [cnt3] = 0 ; for (cnt3 = 0; cnt3 < nMC; cnt3++) // loop on all MC points { if (fabs (MC[cnt3][cnt2]-Data[cnt1][cnt3]) < dx) { if (MC[cnt2][d] == SoughtIdt) nsig[cnt2]++; else nnoise[cnt2]++; } } } prob[199] = 1; for (cnt3 = 0 ; cnt3 < d ; cnt3++ ) { prob[cnt3] = (double) nsig[cnt3] / (nsig[cnt3]+nnoise[cnt3]); prob[199] *= prob[cnt3]; } OutputFile << prob[199] << endl; // writing output } OutputFile.close (); } //panic void panic (char* str) { cerr << str << endl ; exit (0); }