Commit c643e466cef282135e09dbb1e5fb7342cbb61864

Authored by Zhenghua Gong
1 parent 92636adddb
Exists in master

This added file is to get the best structures and markov blanket of each best structures.

Showing 1 changed file with 993 additions and 0 deletions   Show diff stats
File was created 1 #include <string>
2 #include <vector>
3 #include <numeric>
4 #include <fstream>
5 #include <iterator>
6 #include <iostream>
7 #include <utility>
8 #include <iomanip>
9 #include <ctime>
10 #include <boost/math/special_functions.hpp>
11 #include <boost/lexical_cast.hpp>
12 #include <boost/algorithm/string/replace.hpp>
13
14 #include <stdio.h>
15 #include <math.h>
16 #include <algorithm> // std::find
17 #include <map>
18 #include <iterator>
19 using namespace std;
20
21
22 struct compare {
23 bool operator()(const std::string& first, const std::string& second) {
24 if(first.size() == second.size())
25 return first < second;
26 else
27 return first.size() < second.size();
28 }
29 };
30
31 string int_to_str(int num)
32 {
33 stringstream ss;
34
35 ss << num;
36
37 return ss.str();
38 };
39
40
41 int str_to_int(string st)
42 {
43 int result;
44
45 stringstream(st) >> result;
46
47 return result;
48 };
49
50 bool compareI(const pair<string, double>&i, const pair<string, double>&j)
51 {
52 return i.second < j.second;
53 }
54
55 bool compareD(const pair<string, double>&i, const pair<string, double>&j)
56 {
57 return i.second > j.second;
58 }
59
60 bool compareIn(const pair<double, string>&i, const pair<double, string>&j)
61 {
62 return i.first < j.first;
63 }
64
65 bool compareDe(const pair<double, string>&i, const pair<double, string>&j)
66 {
67 return i.first > j.first;
68 }
69
70 double logAB (double x, double y)
71 {
72 double result;
73 double maxVal = max(x,y);
74
75 if(maxVal == x)
76 {
77 result = maxVal + log(1+exp(y-maxVal));
78 }
79 else
80 {
81 result = maxVal + log(1+exp(x-maxVal));
82 }
83 return result;
84 }
85
86 double persentageXofY (double newS, double oldS)
87 {
88 double result;
89 result = exp(oldS-newS)*100;
90 return result;
91 }
92
93 void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr)
94 {
95 // Get the first occurrence
96 size_t pos = data.find(toSearch);
97
98 // Repeat till end is reached
99 while( pos != std::string::npos)
100 {
101 // Replace this occurrence of Sub String
102 data.replace(pos, toSearch.size(), replaceStr);
103 // Get the next occurrence from the current position
104 pos =data.find(toSearch, pos + toSearch.size());
105 }
106 }
107
108 int main() {
109 /*
110 //TEMPORARY INPUT FILE
111 vector< vector<int> > DAT;
112 vector <int> temp, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
113 //Filling up some of the test variables needed for the data set
114 temp.push_back(0);
115 temp.push_back(0);
116 temp.push_back(1);
117 //set 2
118 temp2.push_back(0);
119 temp2.push_back(1);
120 temp2.push_back(0);
121 //set3
122 temp3.push_back(0);
123 temp3.push_back(1);
124 temp3.push_back(1);
125 //set4
126 temp4.push_back(1);
127 temp4.push_back(0);
128 temp4.push_back(0);
129 //set5
130 temp5.push_back(1);
131 temp5.push_back(1);
132 temp5.push_back(1);
133 //set6
134 temp6.push_back(1);
135 temp6.push_back(1);
136 temp6.push_back(0);
137 //set7
138 temp7.push_back(1);
139 temp7.push_back(0);
140 temp7.push_back(1);
141 //set8
142 temp8.push_back(1);
143 temp8.push_back(0);
144 temp8.push_back(1);
145 //set9
146 temp9.push_back(1);
147 temp9.push_back(0);
148 temp9.push_back(1);
149 //set10
150 temp10.push_back(1);
151 temp10.push_back(0);
152 temp10.push_back(1);
153 //set11
154 temp11.push_back(1);
155 temp11.push_back(1);
156 temp11.push_back(1);
157 //set12
158 temp12.push_back(1);
159 temp12.push_back(0);
160 temp12.push_back(1);
161 //set13
162 temp13.push_back(1);
163 temp13.push_back(0);
164 temp13.push_back(1);
165 //set14
166 temp14.push_back(1);
167 temp14.push_back(1);
168 temp14.push_back(0);
169 //set15
170 temp15.push_back(0);
171 temp15.push_back(1);
172 temp15.push_back(1);
173 //set16
174 temp16.push_back(1);
175 temp16.push_back(0);
176 temp16.push_back(1);
177 std::cout << endl;
178 //Filling up test DATASET
179 DAT.push_back(temp);
180 DAT.push_back(temp2);
181 DAT.push_back(temp3);
182 DAT.push_back(temp4);
183 DAT.push_back(temp5);
184 DAT.push_back(temp6);
185 DAT.push_back(temp7);
186 DAT.push_back(temp8);
187 DAT.push_back(temp9);
188 DAT.push_back(temp10);
189 DAT.push_back(temp11);
190 DAT.push_back(temp12);
191 DAT.push_back(temp13);
192 DAT.push_back(temp14);
193 DAT.push_back(temp15);
194 DAT.push_back(temp16);
195 std::cout << endl;
196 size_t totvars = DAT[1].size();
197 size_t tottuples = DAT.size();
198 */
199 //Time before user input
200 time_t now = time(0);
201 char* dt = ctime(&now);
202 std::cout << "The local date and time is: " << dt << std::endl;
203 string FILENAME;
204 std::cout << "What is the name of your file?: ";
205 std::cin >> FILENAME;
206 // Variable declarations
207 fstream file;
208 int COLS;
209 std::cout << "How many variables in your data file?: ";
210 std::cin >> COLS;
211 std::cout << endl;
212 vector < vector <int> > DAT; // 2d array as a vector of vectors
213 vector <int> rowVector(COLS); // vector to add into 'array' (represents a row)
214 int row = 0; // Row counter
215
216 // Read file
217 file.open(FILENAME.c_str(), ios::in); // Open file
218 if (file.is_open()) { // If file has correctly opened...
219 // Output debug message
220 cout << "File correctly opened" << endl;
221
222 // Dynamically store data into array
223 while (file.good()) { // ... and while there are no errors,
224 DAT.push_back(rowVector); // add a new row,
225 for (int col = 0; col<COLS; col++) {
226 file >> DAT[row][col]; // fill the row with col elements
227 }
228 row++; // Keep track of actual row
229 }
230 }
231 else cout << "Unable to open file" << endl;
232 file.close();
233
234 size_t totvars = DAT[1].size(); //column number
235 size_t tottuples = DAT.size();//row number
236
237
238
239
240 //Ask User for the order they would like to test Order starts at 0 e.g. 0,1,2 is valid order for three variables
241 int order_i;
242 vector <int> order;
243 std::cout << "Which is the order you would like to test?: ";
244 for (int i = 0; i < totvars;++i) {
245 std::cin >> order_i;
246 order.push_back(order_i);
247 }
248 std::cout << endl;
249 //Ask user for max value and min value of each variable being inputed
250 vector <int> max_cat;
251 vector <int> min_cat;
252 int max_cat_input;
253 int min_cat_input;
254
255 std::cout << "What is the maximum categorical value of each variable?: ";
256 for (int i = 0; i < totvars; ++i) {
257 std::cin >> max_cat_input;
258 max_cat.push_back(max_cat_input);
259 }
260 for (size_t i = 0; i < max_cat.size();++i) {
261 std::cout << max_cat[i] << " ";
262 }
263 std::cout << endl;
264 std::cout << endl;
265 std::cout << "What is the minimum categorical value of each variable?: ";
266 for (int i = 0; i < totvars; ++i) {
267 std::cin >> min_cat_input;
268 min_cat.push_back(min_cat_input);
269 }
270 for (size_t i = 0; i < min_cat.size();++i) {
271 std::cout << min_cat[i] << " ";
272 }
273 std::cout << endl;
274 //Set the maximum for the amount of parents for any given variable
275 unsigned int maxparents;
276 std::cout << "What is the maximum amount of parents to be considered for any given variable?: ";
277 std::cin >> maxparents;
278 std::cout << endl;
279
280 double PERCENT;
281 std::cout << "What is the percentage (Please use the format of 98.99, not 0.9899)?: ";
282 std::cin >> PERCENT;
283 std::cout << endl;
284
285 //Time that program begins
286 time_t start = time(0);
287 char* dt_start = ctime(&start);
288 std::cout << "The local date and time is: " << dt_start << std::endl;
289
290
291 //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category:
292 //categories in i
293 vector <int> catsi;
294 //total combinations of variables
295 //int totcombos = 1;
296 //how many catagori for every variable
297 for ( int i = 0; i < totvars; ++i) {
298 catsi.push_back((max_cat[i] - min_cat[i]) + 1);
299 //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1);
300 }
301 /*size_t mincatsi = 10000, mincatvar;
302 for (size_t i = 0; i < catsi.size(); ++i) {
303 if (catsi[i] < mincatsi) {
304 mincatsi = catsi[i];
305 mincatvar = i;
306 }
307 }*/
308 //print out catsi
309 for (size_t i = 0; i < catsi.size();++i) {
310 std::cout << catsi[i] << " ";
311 }
312 std::cout << endl;
313
314 //Total Families Ui,alpha for a particular variable in the order
315 vector <unsigned long long> families;//is vector, first element is the number of parentset for the first variabel......
316 for (unsigned int i = 0; i < totvars; ++i) {
317 int numparents = i;
318 if (numparents == 0) {
319 families.push_back(1);
320 }
321 else {
322
323 unsigned long long numfams = 0;
324 for (unsigned int j = 0; j <= i; ++j) {
325 if (j <= maxparents) {
326 unsigned long long jFactorial = 1;
327 unsigned long long ijFactorial = 1;
328 unsigned long long iFactorial = 1;
329 //Calculate j!
330 for (unsigned int g = 0; g <= j; ++g) {
331 if (g != 0) {
332 jFactorial *= g;
333 }
334 }
335 //Calculate i!
336 for (unsigned int g = 0; g <= i; ++g) {
337 if (g != 0) {
338 iFactorial *= g;
339 }
340 }
341 //Calculate (i-j)!
342 for (unsigned int g = 0; g <= (i - j); ++g) {
343 if (g != 0) {
344 ijFactorial *= g;
345 }
346 }
347 numfams += (iFactorial) / (jFactorial * ijFactorial);
348 }
349 else {
350 break;
351 }
352 }
353 families.push_back(numfams);
354 }
355 }
356
357
358 //How many parent combinations for each step? As well as there counts
359 vector< vector <int> > ParentCombos;
360 vector< vector <int> > fullNijkvector;
361 vector< vector <string> > indexofvar; // This is label or index for each variable.
362 for (size_t i = 0; i < order.size(); ++i) {
363 //i represents the order of the variable
364 if (i == 0) {
365 vector <int> tmp,Nijkovercombos1;
366 vector <string> tempstring;
367 tempstring.push_back("[0]");
368 tmp.push_back(1);
369 ParentCombos.push_back(tmp);
370 //counting the amount of times that a value of the first variable in the order occurs
371 //this starts with the maximum value for that variable
372 for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) {
373 //hello cycles through the categories of the first variable in the order
374 int Nijk1 = 0;
375 //green cycles through tuples
376 for (int green = 0; green < tottuples; ++green){
377 if (DAT[green][order[0]] == hello) {
378 Nijk1 += 1;
379 }
380 }
381 Nijkovercombos1.push_back(Nijk1);
382 }
383 fullNijkvector.push_back(Nijkovercombos1);
384 indexofvar.push_back(tempstring);
385
386 }
387 else {
388 vector <int> tmp,Nijkovercombos1;
389 vector <string> tempstring;
390 string tempa = "[" + int_to_str(i) +"]";
391 tempstring.push_back(tempa);
392 tmp.push_back(1);
393 int numparnts = i;
394 //counting the amount of times that a value of the last variable in the current order size occurs
395 //this starts with the maximum value for that variable
396 for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) {
397 //hello cycles through the categories of the first variable in the order
398 int Nijk1 = 0;
399 //green cycles through tuples
400 for (int green = 0; green < tottuples; ++green) {
401 if (DAT[green][order[numparnts]] == hello) {
402 Nijk1 += 1;
403 }
404 }
405 Nijkovercombos1.push_back(Nijk1);
406 }
407 fullNijkvector.push_back(Nijkovercombos1);
408
409 //j representing the number of parents
410 for (int it = 1; it <= numparnts; ++it) {
411 //(333)Creating a vector that uses the right combination
412 double Nloopy = 0, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1;
413 /*std::cout << "This is for " << numparnts << " choose " << it << endl;
414 std::cout << "The iteration number is: " << it << endl;*/
415 //Accounting for the limit of parent quantity
416 if (it > maxparents) {
417 break;
418 }
419 else {
420 vector <int> NewMat(numparnts, 0);
421 for (int p = 0; p < it; ++p) {
422 NewMat[p] = 1;
423 }
424 for (int g = 2; g <= numparnts; ++g) {
425 NcolFactorial *= g;
426 }
427 for (int g = 2; g <= it; ++g) {
428 iFactorial *= g;
429 }
430 for (int g = 2; g <= (numparnts - it); ++g) {
431 NiFactorial *= g;
432 }
433 //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts
434 Nloopy = NcolFactorial / (iFactorial * NiFactorial);
435 for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) {
436 int combsparents = 1;
437 vector <int> parsetv;
438 for (int par = 0; par < NewMat.size(); ++par){
439 if (NewMat[par] == 1){
440 parsetv.push_back(par);
441 }
442 }
443
444 string tempstring2;
445
446 for (int par2 = 0; par2 < parsetv.size(); ++par2){
447 if(par2+1==parsetv.size()){
448 tempstring2 = tempstring2 + int_to_str(parsetv[par2]);
449 //tempstring2 = tempstring2 +","+"|" + int_to_str(i);
450 tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]";
451 }
452 else{
453 //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+",";
454 tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":";
455 }
456
457
458 }
459 tempstring.push_back(tempstring2);
460
461 /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl;
462 std::cout << "Here comes the NewMat:" << endl;*/
463 //(444)This sets up the process for changing
464 //PosOne tells me the position of the last one in the vector
465 //We want to change when the position is the last position available in the vector
466 int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0;
467 int SumOnes3 = 0, SumOnes4 = 0, SumY = 0;
468 for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) {
469 if (NewMat[PosOne] == 1) {
470 break;
471 }
472 }
473 for (int y = (numparnts - 1); y >= (numparnts - it); --y) {
474 //SumOnes tells you the amount of ones in the last i columns
475 //These are the last columns being considered
476 SumOnes += NewMat[y];
477 }
478 for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) {
479 //SumOnes2 tells you the amount of ones before you reach the next zero
480 //PosOne2 keeps track of the position of the coming zero
481 SumOnes2 += NewMat[PosOne2];
482 if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) {
483 break;
484 }
485 }
486 for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) {
487 //FrstOne tells you the position of the first number 1 starting from the left hand side
488 if (NewMat[FrstOne] == 1) {
489 break;
490 }
491 }
492 for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) {
493 //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions
494 SumOnes4 += NewMat[x];
495 }
496 //Prints out NewMat
497 /*for (int u = 0; u < numparnts; ++u) {
498 std::cout << NewMat[u] << " ";
499 }
500 std::cout << endl;*/
501
502
503 //Adding in the code that will allow counts parent combinations for this particular variable
504 for (int q = 0; q < i; ++q) {
505 if (NewMat[q] == 1) {
506 combsparents *= catsi[order[q]];
507 }
508 }
509 tmp.push_back(combsparents);//made the whole parentset configure for a variable
510 /*std::cout << "This is combsparents: " << combsparents << endl;
511 std::cout << endl;*/
512 vector <int> hvect;
513 //hvect tells us which variables are being considered always the last variable is being considered
514 //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only
515 //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 >
516 //for A C hvect will be < 0 2 >
517 for (int h = 0; h < i; ++h) {
518 if (NewMat[h] == 1) {
519 hvect.push_back(h);
520 }
521 }
522 hvect.push_back(numparnts);
523 size_t shvect = hvect.size();
524 //Prints out hvect
525 /*for (int u = 0; u < shvect; ++u) {
526 std::cout << hvect[u] << " ";
527 }*/
528 //std::cout << endl;
529 //Counting the amount of values in the data that have that particular parent combination
530 vector <int> Nijkovercombos;
531 for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) {
532 //(333)Creating a vector that uses the right combination
533 /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/
534 vector <int> Test(shvect, last), maxtest;
535 for (int p = 0; p < (shvect-1); ++p) {
536 Test[p] = max_cat[order[hvect[p]]];
537 }
538 maxtest = Test;
539 for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) {
540 //std::cout << endl;
541 /*std::cout << endl;
542 std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl;
543 std::cout << "Here comes the Test:" << endl;*/
544 //(444)This sets up the process for changing
545 //NMpos tells me the position of the last non minimum value in the vector
546 //We want to change when the position is the last position available in the vector
547 int NMpos = 0, minpos = 0;
548 for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) {
549 if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) {
550 break;
551 }
552 }
553 for (minpos = (shvect - 2); minpos >= 0; --minpos) {
554 //minpos tells you the position of the last minimum value
555 if (Test[minpos] == min_cat[order[hvect[minpos]]]) {
556 break;
557 }
558 }
559 //Prints out Test
560 /*for (int u = 0; u < shvect; ++u) {
561 std::cout << Test[u] << " ";
562 }
563 std::cout << endl;
564 std::cout << endl;
565 std::cout << endl;*/
566 //Count how many occurrences of the value are present in the data
567 int Nijk = 0;
568 for (int num2size = 0; num2size < tottuples; ++num2size) {
569 int countcorrect = 0;
570 for (size_t g = 0; g < Test.size(); ++g) {
571 //num2size cycles through tuples
572 //order[hvect[g]] represents the variable in the order that we are considering as a parent
573 if (DAT[num2size][order[hvect[g]]] == Test[g]) {
574 countcorrect += 1;
575 }
576 }
577 if (countcorrect == Test.size()) {
578 Nijk += 1;
579 }
580 }
581 //Nijkovercombos displays data as follows
582 //it starts with the smallest value for the last variable in hvect
583 //and the largest values in the first n-1 variables in hvect
584 //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0
585 //count,count,count,count e.g. 13, 2, 2, 3, 4, 10
586 Nijkovercombos.push_back(Nijk);
587 //(666)Now that the values have been calculated find out what the next combination of variables should be
588 if ((NMpos == -1) & (minpos == (shvect - 2))) {
589 //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000
590 break;
591 }
592 if (minpos < NMpos) {
593 Test[NMpos] = Test[NMpos] - 1;
594 }
595 else if (NMpos < minpos) {
596 Test[NMpos] = Test[NMpos] - 1;
597 for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) {
598 Test[filler] = maxtest[filler];
599 }
600 }
601 }
602 }
603 fullNijkvector.push_back(Nijkovercombos);
604 //(666)Now that the unique values have been calculated find out what the next combination of variables should be
605 if ((PosOne == (numparnts - 1)) & (SumOnes == it)) {
606 break;
607 }
608 else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) {
609 for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
610 //NxtOne tells you the position of the next closest number 1 that we would
611 //like to change the position of (we will call it the important number one)
612 //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
613 SumOnes3 += NewMat[NxtOne];
614 if (SumOnes3 == (SumOnes2 + 1)) {
615 break;
616 }
617 }
618 if (SumOnes4 == (it - 1)) {
619 //If all except one of the 1's are found in the last it - 1 columns
620 for (int x = 0; x < numparnts; ++x) {
621 if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) {
622 //If
623 NewMat[x] = 1;
624 }
625 else {
626 NewMat[x] = 0;
627 }
628 }
629 }
630 else {
631 for (int x = 0; x < numparnts; ++x) {
632 if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) {
633 //If the position is that of the first 1 or it falls between the changed number one and the total
634 //amount of ones that are on that side of the zero 10111
635 NewMat[x] = 1;
636 }
637 else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) {
638 //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing
639 //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right
640 NewMat[x] = 1;
641 }
642 else {
643 NewMat[x] = 0;
644 }
645 }
646 }
647 }
648 else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) {
649 for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
650 //NxtOne tells you the position of the next closest number 1 that we would
651 //like to change the position of (we will call it the important number one)
652 //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
653 SumOnes3 += NewMat[NxtOne];
654 if (SumOnes3 == 1) {
655 break;
656 }
657 }
658 if (it != 1) {
659 for (int x = 0; x < numparnts; ++x) {
660 if (x == (NxtOne + 1)) {
661 NewMat[x] = 1;
662 }
663 else if (x == NxtOne) {
664 NewMat[x] = 0;
665 }
666 else if ((NewMat[x] == 1) & (x != NxtOne)) {
667 NewMat[x] = 1;
668 }
669 else {
670 NewMat[x] = 0;
671 }
672 }
673 }
674 else {
675 for (int x = 0; x < numparnts; ++x) {
676 if ((x == (NxtOne + 1))) {
677 NewMat[x] = 1;
678 }
679 else {
680 NewMat[x] = 0;
681 }
682 }
683 }
684 }
685 }
686
687 }
688 }
689 ParentCombos.push_back(tmp);
690 indexofvar.push_back(tempstring);
691 }
692
693 }
694 //std::cout << endl;
695 std::cout << endl;
696 //printing out the ParentCombos matrix just created above
697 /*for (size_t i = 0; i < ParentCombos.size(); ++i) {
698 for (size_t j = 0; j < ParentCombos[i].size(); ++j) {
699 std::cout << ParentCombos[i][j] << " ";
700 }
701 std::cout << endl;
702 }*/
703 std::cout << endl;
704 //printing out the fullNijkvector matrix just created above
705 /*for (size_t i = 0; i < fullNijkvector.size(); ++i) {
706 for (size_t j = 0; j < fullNijkvector[i].size(); ++j) {
707 std::cout << fullNijkvector[i][j] << " ";
708 }
709 std::cout << endl;
710 }
711 std::cout << endl;*/
712 //Print out Data
713 /*for (int i = 0; i < DAT.size(); ++i) {
714 for (int j = 0; j < DAT[i].size(); ++j) {
715 std::cout << DAT[i][j] << " ";
716 }
717 std::cout << endl;
718 }*/
719 //Print out the families size
720 /*for (size_t i = 0; i < families.size(); ++i) {
721 std::cout << families[i] << " ";
722 }*/
723
724 //Obtaining the actual score from this information
725 //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting
726 //the first variable in the order)
727 //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha
728 int posinfull = 0;
729 //finlogscore is the final score in natural log format
730 long double finlogscore = 0.0;
731 vector< vector <double> > vecvarparset;
732 for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) {
733 //sumovUialpha is the the sum over all parent sets for a particular variable
734 long double sumovUialpha = 0.0;
735 //vector of all values of seclastgamma
736 vector <double> vec2ndlastgamma;
737 long double maxseclastgamma;
738 //Uialpha cycles through all the parent sets for a particular family
739 for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) {
740 // nijkprime represents the value of 1/(ri * qi)
741 long double nijkprime, nijprime;
742 long double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha];
743
744 nijprime = 1.0 / (PCs);
745 nijkprime = 1.0 / (rij * PCs);
746 //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic
747 long double seclastgamma = 0.0;
748 //qi_Uialpha cycles through the combinations for the parents in a set
749 for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) {
750 long double lastgamma = 0.0;
751 long double nij = 0.0;
752 //countijk cycles through the categories of the variable with a particular order
753 //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to
754 //and then find the categories for it
755 for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) {
756 long double topy;
757 //rightcol lets you find the right column/position of the value that you need for a particular category within the
758 int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]);
759 nij += fullNijkvector[posinfull][rightcol];
760 topy = (nijkprime + fullNijkvector[posinfull][rightcol]);
761
762 //Using boost lgamma function for the product over categories and parent combinations
763 lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime);
764
765 }
766 long double boty = nij + nijprime;
767 seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty);
768
769 }
770 vec2ndlastgamma.push_back(seclastgamma);
771
772
773
774 //Calculate sumovUialpha based on the logsumexp concept
775 if (Uialpha + 1 == families[varinorder]) {
776
777 for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) {
778 //change the value of maxseclastgamma if new value is larger than the previous value
779 if (que == 0) {
780 maxseclastgamma = vec2ndlastgamma[0];
781 }
782 else {
783 if (maxseclastgamma < vec2ndlastgamma[que]) {
784 maxseclastgamma = vec2ndlastgamma[que];
785 }
786 }
787 }
788 for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) {
789 sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma);
790 }
791 //add info on parent set scores for each variable to this vector of vectors
792 vecvarparset.push_back(vec2ndlastgamma);
793 }
794
795 /*std::cout << endl;
796 std::cout << seclastgamma;
797 std::cout << endl;*/
798 posinfull += 1;
799 //std::cout << posinfull << endl;
800 }
801 finlogscore += log(sumovUialpha) + maxseclastgamma;
802 }
803
804 vector < vector<string> > parSet;
805 vector< map <double, string, greater <double> > > parSetScoreSorted;
806 vector< map <double, string, greater <double> > > strucScore;
807
808
809 //Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map.
810
811 for (unsigned i = 0; i < indexofvar.size(); ++i){
812
813 map <double, string, greater <double> > tempMap;
814 for (unsigned j=0; j< indexofvar[i].size(); ++j){
815 tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j]));
816 }
817 parSetScoreSorted.push_back(tempMap);
818
819 }
820
821
822 pair <double, string> bestStrScore;
823 double bestScore = 0;
824 string bestLable;
825
826
827 for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){
828 map <double, string> :: iterator itr;
829 itr = parSetScoreSorted[i].begin();
830 bestScore = bestScore + (itr->first);
831 bestLable = bestLable +(itr->second);
832 }
833
834 bestStrScore = make_pair(bestScore, bestLable);//This is the best score.
835
836 vector < pair <double, string> > sortedStru;//This store all the structures in the percentage.
837 vector < vector < pair <double, string > > > deltaC;
838
839 for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){
840 map <double, string> :: iterator itr0, itr1;
841 vector < pair <double, string > > tempDelta;
842 double tempDeltaS;
843 string tempDeltaL;
844 itr0 = parSetScoreSorted[l].begin();
845 itr1 = parSetScoreSorted[l].begin();
846 double tem1=exp(itr1->first), tem2 = exp(itr1->first);
847 for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){
848 tem1 = tem2;
849 itr1 = ++itr1;
850 tem2 = tem1 + exp(itr1->first);
851 double tem = (tem1/tem2)*100;
852 if(tem <= PERCENT){
853 tempDeltaS = (itr1->first)-(itr0->first);
854 tempDeltaL = itr1->second;
855 tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL));
856 }
857 }
858
859 deltaC.push_back( tempDelta);
860 }
861
862
863 for(unsigned i=0; i< deltaC.size(); ++i){
864 for (unsigned j=0; j< deltaC[i].size(); ++j)
865 {
866 double score = bestStrScore.first + deltaC[i][j].first;
867 string lab = bestStrScore.second;
868 findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second);
869 sortedStru.push_back(make_pair(score, lab));
870 }
871 }
872
873 sort(sortedStru.begin(),sortedStru.end(),compareDe);
874
875
876
877 /* for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ //l means each variable
878 for (unsigned m = contl; m < parSetScoreSorted[l].size(); ++m){ //m means the number of parents sets
879 contl =m;
880 vector < pair <string, double > > delta; // store the different of the highest score and the second highest score
881 for (unsigned i = l; i < parSetScoreSorted.size(); ++i){
882 map <double, string> :: iterator itr0, itr1;
883 double tempDeltaS;
884 string tempDeltaL;
885 itr0 = parSetScoreSorted[i].begin();
886 itr1 = itr0;
887 for (int kr = 0; kr < contl; ++kr){
888 itr1 = itr0++;
889 }
890 tempDeltaS = (itr1->first)-(itr0->first);
891 tempDeltaL = int_to_str(i);
892 delta.push_back(make_pair(tempDeltaL, tempDeltaS));
893 }
894
895 sort(delta.begin(),delta.end(),compareI);
896
897 double const stopLimit = PERCENT;
898 double tempbeforeS = bestStrScore.first, temafterS;
899
900 for (unsigned i = 0; i < delta.size(); ++i){
901 pair <double, string > temppair;
902 double tempLime;
903 int ind = str_to_int(delta[i].first);
904 string s = parSetScoreSorted[ind].begin()->second;
905 string sRep = (++parSetScoreSorted[ind].begin())->second;
906 temppair = bestStrScore;
907 findAndReplaceAll(temppair.second, s, sRep);
908 temppair.first = temppair.first - delta[i].second;
909
910 temafterS = logAB(tempbeforeS, temppair.first);
911
912 tempLime = persentageXofY(temafterS, tempbeforeS) ;
913
914 tempbeforeS = temafterS;
915
916 if(tempLime > stopLimit){
917 goto finish;
918 }
919
920 sortedStru.push_back(temppair);
921
922 // std::cout << ind << " "<< s << " "<< sRep << " "<< temppair.first << " " << temppair.second <<endl;
923 // std::cout << temppair.first << " " << temppair.second <<endl;
924 // std::cout << std::endl;
925
926 }
927
928 }
929
930 }
931
932 finish:*/
933
934
935 std::cout << std::endl;
936 std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl;
937 std::cout << std::endl;
938
939
940 cout << "Best several structures are:" << endl;
941 cout << bestScore << " : " << bestLable << endl;
942
943 for (unsigned i=0; i< sortedStru.size(); ++i )
944 {
945 cout << sortedStru[i].first << " : " << sortedStru[i].second << endl;
946 }
947
948 std::cout << std::endl;
949 std::cout << std::endl;
950
951 for(unsigned i=0; i< parSetScoreSorted.size(); ++i){
952 map <double, string> :: iterator itr;
953 std::cout << "For variable "<< i <<":"<< endl;
954 for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr)
955 {
956 cout << itr->second << " : " << itr->first << "; ";
957 }
958 cout << endl;
959 cout << endl;
960
961 }
962
963
964 /* ofstream myfile;
965 myfile.open("/home/zgong001/Documents/SprinklerDataset/bestStructure.txt");
966 if (myfile.is_open())
967 {
968 myfile << bestLable << " : " << bestScore << sortedStru.size();
969 myfile << "\n";
970
971 for(unsigned i=0; i< sortedStru.size(); ++i){
972 myfile << sortedStru[i].second << " : " << sortedStru[i].first;
973 myfile << "\n";
974 }
975 }
976 else cout << "Unable to open file";
977 myfile.close();*/
978
979
980 //time after completion
981 time_t later = time(0);
982 char* dt_later = ctime(&later);
983 std::cout << "The local date and time is: " << dt_later << std::endl;
984
985 std::cout << std::endl;
986 std::cout << std::endl;
987
988
989 std::cin.clear();
990 std::cin.ignore();
991 std::cin.get();
992 return 0;
993 }
994