Commit 53863fc1fc8d2efea4e4e5ed2a81265a6490022c

Authored by Zhenghua Gong
1 parent 7f982ea62a
Exists in master

Get structures from orders

Showing 1 changed file with 1030 additions and 0 deletions   Show diff stats
File was created 1 #include <string>
2 #include <vector>
3 #include <numeric>
4 #include <fstream>
5 #include <iterator>
6 #include <iostream>
7 #include <utility>
8 #include <iomanip>
9 #include <ctime>
10 #include <sstream>
11 #include <boost/math/special_functions.hpp>
12 #include <boost/lexical_cast.hpp>
13 #include <boost/algorithm/string/replace.hpp>
14 #include <boost/multiprecision/cpp_int.hpp>
15
16 #include <stdio.h>
17 #include <math.h>
18 #include <algorithm> // std::find
19 #include <map>
20 #include <iterator>
21 #include <bits/stdc++.h>
22
23 using namespace std;
24
25 //Function for calculating the binomial coefficient
26 boost::multiprecision::cpp_int BinomialCoefficient(unsigned int n, unsigned int k) {
27 if (k == 0) { return 1; }
28 else { return (n * BinomialCoefficient(n - 1, k - 1)) / k; }
29 }
30 struct compare {
31 bool operator()(const std::string& first, const std::string& second) {
32 if(first.size() == second.size())
33 return first < second;
34 else
35 return first.size() < second.size();
36 }
37 };
38
39 string int_to_str(int num)
40 {
41 stringstream ss;
42
43 ss << num;
44
45 return ss.str();
46 };
47
48
49 int str_to_int(string st)
50 {
51 int result;
52
53 stringstream(st) >> result;
54
55 return result;
56 };
57
58 double str2doub(std::string str)
59 {
60 double d;
61 std::stringstream(str) >> d;
62 return d;
63 }
64
65 bool compareI(const pair<string, double>&i, const pair<string, double>&j)
66 {
67 return i.second < j.second;
68 }
69
70 bool compareD(const pair<string, double>&i, const pair<string, double>&j)
71 {
72 return i.second > j.second;
73 }
74
75 bool compareIn(const pair<double, string>&i, const pair<double, string>&j)
76 {
77 return i.first < j.first;
78 }
79
80 bool compareDe(const pair<double, string>&i, const pair<double, string>&j)
81 {
82 return i.first > j.first;
83 }
84
85 double logAB (double x, double y)
86 {
87 double result;
88 double maxVal = max(x,y);
89
90 if(maxVal == x)
91 {
92 result = maxVal + log(1+exp(y-maxVal));
93 }
94 else
95 {
96 result = maxVal + log(1+exp(x-maxVal));
97 }
98 return result;
99 }
100
101 double persentageXofY (double newS, double oldS)
102 {
103 double result;
104 result = exp(oldS-newS)*100;
105 return result;
106 }
107
108 void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr)
109 {
110 // Get the first occurrence
111 size_t pos = data.find(toSearch);
112
113 // Repeat till end is reached
114 while( pos != std::string::npos)
115 {
116 // Replace this occurrence of Sub String
117 data.replace(pos, toSearch.size(), replaceStr);
118 // Get the next occurrence from the current position
119 pos =data.find(toSearch, pos + toSearch.size());
120 }
121 }
122
123 string findMB(string structures, string variable){
124 string MB;
125
126
127 return MB;
128 }
129
130 int main() {
131
132 string FILENAME; // Downloads/D50C9v.txt
133 int COLS; // 9
134 vector <int> max_cat; // 1 2 1 3 2 1 2 2 3
135 vector <int> min_cat; // 0 0 0 0 0 0 0 0 0
136 unsigned int maxparents; // 4
137 vector <int> order; //2 6 8 4 1 0 3 5 7
138 double PERCENT;
139 string filepath; // output file name
140
141 vector<string> settings;
142 vector<bool> settingsFlag;
143
144 settings.push_back("DataFile");
145 settingsFlag.push_back(false);
146 settings.push_back("TotalVariables");
147 settingsFlag.push_back(false);
148 settings.push_back("MaximumCategory");
149 settingsFlag.push_back(false);
150 settings.push_back("MinimumCategory");
151 settingsFlag.push_back(false);
152 settings.push_back("MaximumParents");
153 settingsFlag.push_back(false);
154 settings.push_back("Order");
155 settingsFlag.push_back(false);
156 settings.push_back("Percentage");
157 settingsFlag.push_back(false);
158 settings.push_back("OutputFileName");
159 settingsFlag.push_back(false);
160
161 fstream infile;
162 string ConfigFile;
163 std::cout << "What is the location of the configuration file that you edited?: ";
164 std::cin >> ConfigFile;
165 std::cout << std::endl;
166 //string ConfigFile = "/home/efraingonzalez0/Downloads/new.config";
167 infile.open(ConfigFile.c_str(),ios::in);
168 vector<string> line;
169 if(infile.is_open()) {
170 std::cout << "Configuration file correctly opened" << std::endl;
171 while(!infile.eof()) {
172 string tmp;
173 getline(infile,tmp);
174 line.push_back(tmp);
175 }
176 }
177 else {
178 std::cout << "Unable to open configuration file" << std::endl;
179 exit(0);
180 }
181
182 infile.close();
183 line.erase(line.end());
184 for(size_t k = 0; k < settings.size(); ++k) {
185 for(size_t i = 0; i < line.size(); ++i) {
186 size_t foundit = line[i].find(settings[k]);
187 if (foundit != std::string::npos) {
188 //find position of "="
189 unsigned int poseq = line[i].find("=");
190 unsigned int posneqfir,posneqlas;
191 //first position of relevant string
192 for(unsigned int j = poseq + 1; j < line[i].length(); ++j) {
193 if(line[i].at(j) != ' ') {
194 posneqfir = j;
195 break;
196 }
197 }
198 //last position of relevant string
199 for(unsigned int j2 = line[i].length() - 1; j2 >= posneqfir; ++j2) {
200 if((line[i].at(j2) != ' ') && (line[i].at(j2) != '\n')){
201 posneqlas = j2;
202 break;
203 }
204 }
205
206 if(settings[k] == "DataFile") {
207 FILENAME = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
208 }
209 else if(settings[k] == "TotalVariables") {
210 COLS = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1));
211 }
212 else if(settings[k] == "MaximumCategory") {//still need to edit
213 string max_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
214 std::stringstream macs(max_cat_string);
215 int macsti;
216 while( macs >> macsti) {
217 max_cat.push_back(macsti);
218 }
219 }
220 else if(settings[k] == "MinimumCategory") {//still need to edit
221 string min_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
222 std::stringstream mics(min_cat_string);
223 int micsti;
224 while( mics >> micsti) {
225 min_cat.push_back(micsti);
226 }
227 }
228
229 else if(settings[k] == "MaximumParents") {
230 maxparents = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1));
231 }
232
233 else if(settings[k] == "Order") {//still need to edit
234 string st_ord_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
235 std::stringstream starordr(st_ord_string);
236 int StOrti;
237 while( starordr >> StOrti) {
238 order.push_back(StOrti);
239 }
240 }
241
242 else if(settings[k] == "Percentage") {
243 PERCENT = str2doub(line[i].substr(posneqfir,posneqlas - posneqfir + 1));
244 }
245
246 else if(settings[k] == "OutputFileName") {
247 filepath = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
248 }
249
250 settingsFlag[k] = true;
251 break;
252 }
253 }
254 }
255
256 for(int k=0;k<settings.size();k++) {
257 if(!settingsFlag[k]) {
258 std::cout << "Error: "<< settings[k] << " is not configured." << std::endl;
259 exit(0);
260 }
261 else if(max_cat.size() != COLS) {
262 std::cout << "Error: Size of MaximumCategory and the number of TotalVariables do not match" << std::endl;
263 exit(0);
264 }
265 else if(min_cat.size() != COLS) {
266 std::cout << "Error: Size of MinimumCategory and the number of TotalVariables do not match" << std::endl;
267 exit(0);
268 }
269 else if(max_cat.size() != min_cat.size()) {
270 std::cout << "Error: Size of MaximumCategory and MinimumCategory do not match" << std::endl;
271 exit(0);
272 }
273 else if(PERCENT > 100.0){
274 std::cout << "Error: Percentage may not be larger than 100.0 which represents 100%. If you need a difference larger than 100% then use a negative number." << std::endl;
275 exit(0);
276 }
277 }
278
279
280
281 fstream file;
282 std::cout << endl;
283 vector < vector <int> > DAT; // 2d array as a vector of vectors
284 vector <int> rowVector(COLS); // vector to add into 'array' (represents a row)
285 int row = 0; // Row counter
286
287 // Read file
288 file.open(FILENAME.c_str(), ios::in); // Open file
289 if (file.is_open()) { // If file has correctly opened...
290 // Output debug message
291 cout << "File correctly opened" << endl;
292
293 // Dynamically store data into array
294 while (file.good()) { // ... and while there are no errors,
295 DAT.push_back(rowVector); // add a new row,
296 for (int col = 0; col<COLS; col++) {
297 file >> DAT[row][col]; // fill the row with col elements
298 }
299 row++; // Keep track of actual row
300 }
301 }
302 else cout << "Unable to open file" << endl;
303 file.close();
304 DAT.erase(DAT.end());
305
306 size_t totvars = DAT[1].size(); //column number
307 size_t tottuples = DAT.size();//row number
308
309
310 std::cout << endl;
311
312
313 //Time that program begins
314 time_t start = time(0);
315 char* dt_start = ctime(&start);
316 std::cout << "The local date and time is: " << dt_start << std::endl;
317
318
319 //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category:
320 //categories in i
321 vector <int> catsi;
322 //total combinations of variables
323 //int totcombos = 1;
324 //how many catagori for every variable
325 for ( int i = 0; i < totvars; ++i) {
326 catsi.push_back((max_cat[i] - min_cat[i]) + 1);
327 //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1);
328 }
329
330 //print out catsi
331 for (size_t i = 0; i < catsi.size();++i) {
332 std::cout << catsi[i] << " ";
333 }
334 std::cout << endl;
335
336 //Total Families Ui,alpha for a particular variable in the order
337 //Total Families Ui,alpha for a particular variable in the order
338 vector <boost::multiprecision::cpp_int> families;
339 for (unsigned int i = 0; i < totvars; ++i) {
340 int numparents = i;
341 if (numparents == 0) {
342 families.push_back(1);
343 }
344 else {
345
346 boost::multiprecision::cpp_int numfams = 0;
347 for (unsigned int j = 0; j <= i; ++j) {
348 if (j <= maxparents) {
349 unsigned long long jFactorial = 1;
350 unsigned long long ijFactorial = 1;
351 unsigned long long iFactorial = 1;
352 //Calculate j!
353 for (unsigned int g = 0; g <= j; ++g) {
354 if (g != 0) {
355 jFactorial *= g;
356 }
357 }
358 //Calculate i!
359 for (unsigned int g = 0; g <= i; ++g) {
360 if (g != 0) {
361 iFactorial *= g;
362 }
363 }
364 //Calculate (i-j)!
365 for (unsigned int g = 0; g <= (i - j); ++g) {
366 if (g != 0) {
367 ijFactorial *= g;
368 }
369 }
370 numfams += BinomialCoefficient(i,j);
371 }
372 else {
373 break;
374 }
375 }
376 families.push_back(numfams);
377 }
378 }
379
380
381 //How many parent combinations for each step? As well as there counts
382 vector< vector <int> > ParentCombos;
383 vector< vector <int> > fullNijkvector;
384 vector< vector <string> > indexofvar; // This is label or index for each variable.
385 for (size_t i = 0; i < order.size(); ++i) {
386 //i represents the order of the variable
387 if (i == 0) {
388 vector <int> tmp,Nijkovercombos1;
389 vector <string> tempstring;
390 // tempstring.push_back("[0]");
391 tempstring.push_back("[" + int_to_str(order[0])+ "]");
392 tmp.push_back(1);
393 ParentCombos.push_back(tmp);
394 //counting the amount of times that a value of the first variable in the order occurs
395 //this starts with the maximum value for that variable
396 for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) {
397 //hello cycles through the categories of the first variable in the order
398 int Nijk1 = 0;
399 //green cycles through tuples
400 for (int green = 0; green < tottuples; ++green){
401 if (DAT[green][order[0]] == hello) {
402 Nijk1 += 1;
403 }
404 }
405 Nijkovercombos1.push_back(Nijk1);
406 }
407 fullNijkvector.push_back(Nijkovercombos1);
408 indexofvar.push_back(tempstring);
409
410 }
411 else {
412 vector <int> tmp,Nijkovercombos1;
413 vector <string> tempstring;
414 // string tempa = "[" + int_to_str(i) +"]";
415 string tempa = "[" + int_to_str(order[i]) +"]";
416 tempstring.push_back(tempa);
417 tmp.push_back(1);
418 int numparnts = i;
419 //counting the amount of times that a value of the last variable in the current order size occurs
420 //this starts with the maximum value for that variable
421 for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) {
422 //hello cycles through the categories of the first variable in the order
423 int Nijk1 = 0;
424 //green cycles through tuples
425 for (int green = 0; green < tottuples; ++green) {
426 if (DAT[green][order[numparnts]] == hello) {
427 Nijk1 += 1;
428 }
429 }
430 Nijkovercombos1.push_back(Nijk1);
431 }
432 fullNijkvector.push_back(Nijkovercombos1);
433
434 //j representing the number of parents
435 for (int it = 1; it <= numparnts; ++it) {
436 //(333)Creating a vector that uses the right combination
437 boost::multiprecision::cpp_int Nloopy = 0;
438 //, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1;
439 /*std::cout << "This is for " << numparnts << " choose " << it << endl;
440 std::cout << "The iteration number is: " << it << endl;*/
441 //Accounting for the limit of parent quantity
442 if (it > maxparents) {
443 break;
444 }
445 else {
446 vector <int> NewMat(numparnts, 0);
447 for (int p = 0; p < it; ++p) {
448 NewMat[p] = 1;
449 }
450 /*for (int g = 2; g <= numparnts; ++g) {
451 NcolFactorial *= g;
452 }
453 for (int g = 2; g <= it; ++g) {
454 iFactorial *= g;
455 }
456 for (int g = 2; g <= (numparnts - it); ++g) {
457 NiFactorial *= g;
458 }*/
459 //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts
460 Nloopy = BinomialCoefficient(numparnts,it);
461 for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) {
462 int combsparents = 1;
463 vector <int> parsetv;
464 for (int par = 0; par < NewMat.size(); ++par){
465 if (NewMat[par] == 1){
466 parsetv.push_back(par);
467 }
468 }
469
470 string tempstring2;
471
472 for (int par2 = 0; par2 < parsetv.size(); ++par2){
473 if(par2+1==parsetv.size()){
474 // tempstring2 = tempstring2 + int_to_str(parsetv[par2]);
475 tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]]);
476 //tempstring2 = tempstring2 +","+"|" + int_to_str(i);
477 // tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]";
478 tempstring2 = "[" + int_to_str(order[i])+"|"+tempstring2 +"]";
479 }
480 else{
481 //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+",";
482 // tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":";
483 tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]])+":";
484 }
485
486
487 }
488 tempstring.push_back(tempstring2);
489
490 /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl;
491 std::cout << "Here comes the NewMat:" << endl;*/
492 //(444)This sets up the process for changing
493 //PosOne tells me the position of the last one in the vector
494 //We want to change when the position is the last position available in the vector
495 int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0;
496 int SumOnes3 = 0, SumOnes4 = 0, SumY = 0;
497 for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) {
498 if (NewMat[PosOne] == 1) {
499 break;
500 }
501 }
502 for (int y = (numparnts - 1); y >= (numparnts - it); --y) {
503 //SumOnes tells you the amount of ones in the last i columns
504 //These are the last columns being considered
505 SumOnes += NewMat[y];
506 }
507 for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) {
508 //SumOnes2 tells you the amount of ones before you reach the next zero
509 //PosOne2 keeps track of the position of the coming zero
510 SumOnes2 += NewMat[PosOne2];
511 if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) {
512 break;
513 }
514 }
515 for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) {
516 //FrstOne tells you the position of the first number 1 starting from the left hand side
517 if (NewMat[FrstOne] == 1) {
518 break;
519 }
520 }
521 for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) {
522 //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions
523 SumOnes4 += NewMat[x];
524 }
525 //Prints out NewMat
526 /*for (int u = 0; u < numparnts; ++u) {
527 std::cout << NewMat[u] << " ";
528 }
529 std::cout << endl;*/
530
531
532 //Adding in the code that will allow counts parent combinations for this particular variable
533 for (int q = 0; q < i; ++q) {
534 if (NewMat[q] == 1) {
535 combsparents *= catsi[order[q]];
536 }
537 }
538 tmp.push_back(combsparents);//made the whole parentset configure for a variable
539 /*std::cout << "This is combsparents: " << combsparents << endl;
540 std::cout << endl;*/
541 vector <int> hvect;
542 //hvect tells us which variables are being considered always the last variable is being considered
543 //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only
544 //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 >
545 //for A C hvect will be < 0 2 >
546 for (int h = 0; h < i; ++h) {
547 if (NewMat[h] == 1) {
548 hvect.push_back(h);
549 }
550 }
551 hvect.push_back(numparnts);
552 size_t shvect = hvect.size();
553 //Prints out hvect
554 /*for (int u = 0; u < shvect; ++u) {
555 std::cout << hvect[u] << " ";
556 }*/
557 //std::cout << endl;
558 //Counting the amount of values in the data that have that particular parent combination
559 vector <int> Nijkovercombos;
560 for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) {
561 //(333)Creating a vector that uses the right combination
562 /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/
563 vector <int> Test(shvect, last), maxtest;
564 for (int p = 0; p < (shvect-1); ++p) {
565 Test[p] = max_cat[order[hvect[p]]];
566 }
567 maxtest = Test;
568 for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) {
569 //std::cout << endl;
570 /*std::cout << endl;
571 std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl;
572 std::cout << "Here comes the Test:" << endl;*/
573 //(444)This sets up the process for changing
574 //NMpos tells me the position of the last non minimum value in the vector
575 //We want to change when the position is the last position available in the vector
576 int NMpos = 0, minpos = 0;
577 for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) {
578 if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) {
579 break;
580 }
581 }
582 for (minpos = (shvect - 2); minpos >= 0; --minpos) {
583 //minpos tells you the position of the last minimum value
584 if (Test[minpos] == min_cat[order[hvect[minpos]]]) {
585 break;
586 }
587 }
588 //Prints out Test
589 /*for (int u = 0; u < shvect; ++u) {
590 std::cout << Test[u] << " ";
591 }
592 std::cout << endl;
593 std::cout << endl;
594 std::cout << endl;*/
595 //Count how many occurrences of the value are present in the data
596 int Nijk = 0;
597 for (int num2size = 0; num2size < tottuples; ++num2size) {
598 int countcorrect = 0;
599 for (size_t g = 0; g < Test.size(); ++g) {
600 //num2size cycles through tuples
601 //order[hvect[g]] represents the variable in the order that we are considering as a parent
602 if (DAT[num2size][order[hvect[g]]] == Test[g]) {
603 countcorrect += 1;
604 }
605 }
606 if (countcorrect == Test.size()) {
607 Nijk += 1;
608 }
609 }
610 //Nijkovercombos displays data as follows
611 //it starts with the smallest value for the last variable in hvect
612 //and the largest values in the first n-1 variables in hvect
613 //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0
614 //count,count,count,count e.g. 13, 2, 2, 3, 4, 10
615 Nijkovercombos.push_back(Nijk);
616 //(666)Now that the values have been calculated find out what the next combination of variables should be
617 if ((NMpos == -1) & (minpos == (shvect - 2))) {
618 //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000
619 break;
620 }
621 if (minpos < NMpos) {
622 Test[NMpos] = Test[NMpos] - 1;
623 }
624 else if (NMpos < minpos) {
625 Test[NMpos] = Test[NMpos] - 1;
626 for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) {
627 Test[filler] = maxtest[filler];
628 }
629 }
630 }
631 }
632 fullNijkvector.push_back(Nijkovercombos);
633 //(666)Now that the unique values have been calculated find out what the next combination of variables should be
634 if ((PosOne == (numparnts - 1)) & (SumOnes == it)) {
635 break;
636 }
637 else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) {
638 for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
639 //NxtOne tells you the position of the next closest number 1 that we would
640 //like to change the position of (we will call it the important number one)
641 //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
642 SumOnes3 += NewMat[NxtOne];
643 if (SumOnes3 == (SumOnes2 + 1)) {
644 break;
645 }
646 }
647 if (SumOnes4 == (it - 1)) {
648 //If all except one of the 1's are found in the last it - 1 columns
649 for (int x = 0; x < numparnts; ++x) {
650 if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) {
651 //If
652 NewMat[x] = 1;
653 }
654 else {
655 NewMat[x] = 0;
656 }
657 }
658 }
659 else {
660 for (int x = 0; x < numparnts; ++x) {
661 if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) {
662 //If the position is that of the first 1 or it falls between the changed number one and the total
663 //amount of ones that are on that side of the zero 10111
664 NewMat[x] = 1;
665 }
666 else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) {
667 //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing
668 //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right
669 NewMat[x] = 1;
670 }
671 else {
672 NewMat[x] = 0;
673 }
674 }
675 }
676 }
677 else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) {
678 for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
679 //NxtOne tells you the position of the next closest number 1 that we would
680 //like to change the position of (we will call it the important number one)
681 //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
682 SumOnes3 += NewMat[NxtOne];
683 if (SumOnes3 == 1) {
684 break;
685 }
686 }
687 if (it != 1) {
688 for (int x = 0; x < numparnts; ++x) {
689 if (x == (NxtOne + 1)) {
690 NewMat[x] = 1;
691 }
692 else if (x == NxtOne) {
693 NewMat[x] = 0;
694 }
695 else if ((NewMat[x] == 1) & (x != NxtOne)) {
696 NewMat[x] = 1;
697 }
698 else {
699 NewMat[x] = 0;
700 }
701 }
702 }
703 else {
704 for (int x = 0; x < numparnts; ++x) {
705 if ((x == (NxtOne + 1))) {
706 NewMat[x] = 1;
707 }
708 else {
709 NewMat[x] = 0;
710 }
711 }
712 }
713 }
714 }
715
716 }
717 }
718 ParentCombos.push_back(tmp);
719 indexofvar.push_back(tempstring);
720 }
721
722 }
723 //std::cout << endl;
724 std::cout << endl;
725 //printing out the ParentCombos matrix just created above
726 /*for (size_t i = 0; i < ParentCombos.size(); ++i) {
727 for (size_t j = 0; j < ParentCombos[i].size(); ++j) {
728 std::cout << ParentCombos[i][j] << " ";
729 }
730 std::cout << endl;
731 }*/
732 std::cout << endl;
733 //printing out the fullNijkvector matrix just created above
734 /*for (size_t i = 0; i < fullNijkvector.size(); ++i) {
735 for (size_t j = 0; j < fullNijkvector[i].size(); ++j) {
736 std::cout << fullNijkvector[i][j] << " ";
737 }
738 std::cout << endl;
739 }
740 std::cout << endl;*/
741 //Print out Data
742 /*for (int i = 0; i < DAT.size(); ++i) {
743 for (int j = 0; j < DAT[i].size(); ++j) {
744 std::cout << DAT[i][j] << " ";
745 }
746 std::cout << endl;
747 }*/
748 //Print out the families size
749 /*for (size_t i = 0; i < families.size(); ++i) {
750 std::cout << families[i] << " ";
751 }*/
752
753 //Obtaining the actual score from this information
754 //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting
755 //the first variable in the order)
756 //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha
757 int posinfull = 0;
758 //finlogscore is the final score in natural log format
759 double finlogscore = 0.0;
760 vector< vector <double> > vecvarparset;
761 for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) {
762 //sumovUialpha is the the sum over all parent sets for a particular variable
763 double sumovUialpha = 0.0;
764 //vector of all values of seclastgamma
765 vector <double> vec2ndlastgamma;
766 double maxseclastgamma;
767 //Uialpha cycles through all the parent sets for a particular family
768 for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) {
769 // nijkprime represents the value of 1/(ri * qi)
770 double nijkprime, nijprime;
771 double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha];
772
773 nijprime = 1.0 / (PCs);
774 nijkprime = 1.0 / (rij * PCs);
775 //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic
776 double seclastgamma = 0.0;
777 //qi_Uialpha cycles through the combinations for the parents in a set
778 for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) {
779 double lastgamma = 0.0;
780 double nij = 0.0;
781 //countijk cycles through the categories of the variable with a particular order
782 //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to
783 //and then find the categories for it
784 for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) {
785 double topy;
786 //rightcol lets you find the right column/position of the value that you need for a particular category within the
787 int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]);
788 nij += fullNijkvector[posinfull][rightcol];
789 topy = (nijkprime + fullNijkvector[posinfull][rightcol]);
790
791 //Using boost lgamma function for the product over categories and parent combinations
792 lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime);
793
794 }
795 double boty = nij + nijprime;
796 seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty);
797
798 }
799 vec2ndlastgamma.push_back(seclastgamma);
800
801
802
803 //Calculate sumovUialpha based on the logsumexp concept
804 if (Uialpha + 1 == families[varinorder]) {
805
806 for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) {
807 //change the value of maxseclastgamma if new value is larger than the previous value
808 if (que == 0) {
809 maxseclastgamma = vec2ndlastgamma[0];
810 }
811 else {
812 if (maxseclastgamma < vec2ndlastgamma[que]) {
813 maxseclastgamma = vec2ndlastgamma[que];
814 }
815 }
816 }
817 for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) {
818 sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma);
819 }
820 //add info on parent set scores for each variable to this vector of vectors
821 vecvarparset.push_back(vec2ndlastgamma);
822 }
823
824 /*std::cout << endl;
825 std::cout << seclastgamma;
826 std::cout << endl;*/
827 posinfull += 1;
828 //std::cout << posinfull << endl;
829 }
830 finlogscore += log(sumovUialpha) + maxseclastgamma;
831 }
832
833 vector < vector<string> > parSet;
834 vector< map <double, string, greater <double> > > parSetScoreSorted;
835 vector< map <double, string, greater <double> > > strucScore;
836
837
838 //Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map.
839
840 for (unsigned i = 0; i < indexofvar.size(); ++i){
841
842 map <double, string, greater <double> > tempMap;
843 for (unsigned j=0; j< indexofvar[i].size(); ++j){
844 tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j]));
845 }
846 parSetScoreSorted.push_back(tempMap);
847
848 }
849
850
851 pair <double, string> bestStrScore;
852 double bestScore = 0;
853 string bestLable;
854
855
856 for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){
857 map <double, string> :: iterator itr;
858 itr = parSetScoreSorted[i].begin();
859 bestScore = bestScore + (itr->first);
860 bestLable = bestLable +(itr->second);
861 }
862
863 bestStrScore = make_pair(bestScore, bestLable);//This is the best score.
864
865
866 vector < pair <double, string> > sortedStru;//This store all the structures in the percentage.
867 vector < vector < pair <double, string > > > deltaC;
868
869 for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){
870 map <double, string> :: iterator itr0, itr1;
871 vector < pair <double, string > > tempDelta;
872 double tempDeltaS;
873 string tempDeltaL;
874 itr0 = parSetScoreSorted[l].begin();
875 itr1 = parSetScoreSorted[l].begin();
876 double tem1=itr1->first, tem2 = itr1->first;
877 // cout << tem1 << " : " << tem2 << endl;
878 for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){
879 // tem1 = tem2;
880 itr1 = ++itr1;
881 tem2 = itr1->first;
882 double tem = -log(exp(tem2-tem1)+1);
883 tem1 = tem1-tem;
884 if(tem <= log(PERCENT/10)){
885 tempDeltaS = (itr1->first)-(itr0->first);
886 tempDeltaL = itr1->second;
887 tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL));
888 }
889 }
890
891 deltaC.push_back( tempDelta);
892 }
893
894
895 for(unsigned i=0; i< deltaC.size(); ++i){
896 for (unsigned j=0; j< deltaC[i].size(); ++j)
897 {
898 double score = bestStrScore.first + deltaC[i][j].first;
899 string lab = bestStrScore.second;
900 findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second);
901 sortedStru.push_back(make_pair(score, lab));
902 }
903 }
904
905 sort(sortedStru.begin(),sortedStru.end(),compareDe);
906
907 vector <double> strP;
908 double s = bestScore;
909
910 for(unsigned i=0; i< sortedStru.size(); ++i ){
911 double s0 = sortedStru[i].first;
912 // s = s + exp(sortedStru[i].first);
913 double te = -log(exp(s0-s)+1);
914
915 strP.push_back(te);
916 s = s - te;
917
918 }
919
920 /* double fi = s/exp(finlogscore)*100;
921 strP.push_back(fi);*/
922
923 std::cout << std::endl;
924 std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl;
925 std::cout << std::endl;
926
927
928
929
930 cout << "Best several structures are:" << endl;
931 cout << bestScore << " : " << bestLable << " "<< exp(strP[0])*100 << endl;
932
933 for (unsigned i=0; i< sortedStru.size(); ++i )
934 {
935 if(exp(strP[i+1]) > (PERCENT/100)){
936 break;
937 }
938 cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl;
939
940 }
941
942 /* for (unsigned i=0; i< 5; ++i )
943 {
944
945 cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl;
946
947 }*/
948
949 /* cout << "Best several structures are:" << endl;
950 cout << bestScore << " : " << bestLable << " " << endl;
951
952 for (unsigned i=0; i< sortedStru.size(); ++i )
953 {
954 cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< endl;
955 }*/
956
957 std::cout << std::endl;
958 std::cout << std::endl;
959
960 /* for(unsigned i=0; i< parSetScoreSorted.size(); ++i){
961 map <double, string> :: iterator itr;
962 std::cout << "For variable "<< order[i] <<":"<< endl;
963 for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr)
964 {
965 cout << itr->second << " : " << itr->first << "; ";
966 }
967 cout << endl;
968 cout << endl;
969
970 }*/
971
972
973 string orderstring;
974 for (unsigned i=0; i< order.size(); ++i){
975 orderstring.append(int_to_str(order[i])+" ");
976 }
977
978 ofstream myfile;
979 myfile.open( filepath.c_str(), ios::out | ios::app );
980 // cout << path << endl;
981 if (myfile.is_open())
982 {
983 myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " "<< bestLable << " " << bestScore << " " << exp(strP[0])*100;
984 myfile << "\n";
985
986 for(unsigned i=0; i< sortedStru.size(); ++i){
987 if(exp(strP[i+1]) > (PERCENT/100)){
988 break;
989 }
990 myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " " << sortedStru[i].second << " " << sortedStru[i].first<< " " << exp(strP[i+1])*100;
991 myfile << "\n";
992
993 }
994 }
995 else cout << "Unable to open file";
996 myfile.close();
997
998 /* ofstream myfile;
999 myfile.open("/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2RO.txt");
1000 if (myfile.is_open())
1001 {
1002 for(unsigned i=0; i< parSetScoreSorted.size(); ++i){
1003 map <double, string> :: iterator itr;
1004 for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr){
1005 myfile << itr->second << " : " << itr->first << "; " ;
1006 }
1007 myfile << "\n";
1008
1009 }
1010 }
1011 else cout << "Unable to open file";
1012 myfile.close();*/
1013
1014
1015 //time after completion
1016 time_t later = time(0);
1017 char* dt_later = ctime(&later);
1018 std::cout << "The local date and time is: " << dt_later << std::endl;
1019
1020 std::cout << std::endl;
1021 std::cout << std::endl;
1022
1023
1024 std::cin.clear();
1025 std::cin.ignore();
1026 std::cin.get();
1027 return 0;
1028 }
1029
1030
1031