Commit c643e466cef282135e09dbb1e5fb7342cbb61864
1 parent
92636adddb
Exists in
master
This added file is to get the best structures and markov blanket of each best structures.
Showing
1 changed file
with
993 additions
and
0 deletions
Show diff stats
MyOrder.cpp
File was created | 1 | #include <string> | |
2 | #include <vector> | ||
3 | #include <numeric> | ||
4 | #include <fstream> | ||
5 | #include <iterator> | ||
6 | #include <iostream> | ||
7 | #include <utility> | ||
8 | #include <iomanip> | ||
9 | #include <ctime> | ||
10 | #include <boost/math/special_functions.hpp> | ||
11 | #include <boost/lexical_cast.hpp> | ||
12 | #include <boost/algorithm/string/replace.hpp> | ||
13 | |||
14 | #include <stdio.h> | ||
15 | #include <math.h> | ||
16 | #include <algorithm> // std::find | ||
17 | #include <map> | ||
18 | #include <iterator> | ||
19 | using namespace std; | ||
20 | |||
21 | |||
22 | struct compare { | ||
23 | bool operator()(const std::string& first, const std::string& second) { | ||
24 | if(first.size() == second.size()) | ||
25 | return first < second; | ||
26 | else | ||
27 | return first.size() < second.size(); | ||
28 | } | ||
29 | }; | ||
30 | |||
31 | string int_to_str(int num) | ||
32 | { | ||
33 | stringstream ss; | ||
34 | |||
35 | ss << num; | ||
36 | |||
37 | return ss.str(); | ||
38 | }; | ||
39 | |||
40 | |||
41 | int str_to_int(string st) | ||
42 | { | ||
43 | int result; | ||
44 | |||
45 | stringstream(st) >> result; | ||
46 | |||
47 | return result; | ||
48 | }; | ||
49 | |||
50 | bool compareI(const pair<string, double>&i, const pair<string, double>&j) | ||
51 | { | ||
52 | return i.second < j.second; | ||
53 | } | ||
54 | |||
55 | bool compareD(const pair<string, double>&i, const pair<string, double>&j) | ||
56 | { | ||
57 | return i.second > j.second; | ||
58 | } | ||
59 | |||
60 | bool compareIn(const pair<double, string>&i, const pair<double, string>&j) | ||
61 | { | ||
62 | return i.first < j.first; | ||
63 | } | ||
64 | |||
65 | bool compareDe(const pair<double, string>&i, const pair<double, string>&j) | ||
66 | { | ||
67 | return i.first > j.first; | ||
68 | } | ||
69 | |||
70 | double logAB (double x, double y) | ||
71 | { | ||
72 | double result; | ||
73 | double maxVal = max(x,y); | ||
74 | |||
75 | if(maxVal == x) | ||
76 | { | ||
77 | result = maxVal + log(1+exp(y-maxVal)); | ||
78 | } | ||
79 | else | ||
80 | { | ||
81 | result = maxVal + log(1+exp(x-maxVal)); | ||
82 | } | ||
83 | return result; | ||
84 | } | ||
85 | |||
86 | double persentageXofY (double newS, double oldS) | ||
87 | { | ||
88 | double result; | ||
89 | result = exp(oldS-newS)*100; | ||
90 | return result; | ||
91 | } | ||
92 | |||
93 | void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr) | ||
94 | { | ||
95 | // Get the first occurrence | ||
96 | size_t pos = data.find(toSearch); | ||
97 | |||
98 | // Repeat till end is reached | ||
99 | while( pos != std::string::npos) | ||
100 | { | ||
101 | // Replace this occurrence of Sub String | ||
102 | data.replace(pos, toSearch.size(), replaceStr); | ||
103 | // Get the next occurrence from the current position | ||
104 | pos =data.find(toSearch, pos + toSearch.size()); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | int main() { | ||
109 | /* | ||
110 | //TEMPORARY INPUT FILE | ||
111 | vector< vector<int> > DAT; | ||
112 | vector <int> temp, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16; | ||
113 | //Filling up some of the test variables needed for the data set | ||
114 | temp.push_back(0); | ||
115 | temp.push_back(0); | ||
116 | temp.push_back(1); | ||
117 | //set 2 | ||
118 | temp2.push_back(0); | ||
119 | temp2.push_back(1); | ||
120 | temp2.push_back(0); | ||
121 | //set3 | ||
122 | temp3.push_back(0); | ||
123 | temp3.push_back(1); | ||
124 | temp3.push_back(1); | ||
125 | //set4 | ||
126 | temp4.push_back(1); | ||
127 | temp4.push_back(0); | ||
128 | temp4.push_back(0); | ||
129 | //set5 | ||
130 | temp5.push_back(1); | ||
131 | temp5.push_back(1); | ||
132 | temp5.push_back(1); | ||
133 | //set6 | ||
134 | temp6.push_back(1); | ||
135 | temp6.push_back(1); | ||
136 | temp6.push_back(0); | ||
137 | //set7 | ||
138 | temp7.push_back(1); | ||
139 | temp7.push_back(0); | ||
140 | temp7.push_back(1); | ||
141 | //set8 | ||
142 | temp8.push_back(1); | ||
143 | temp8.push_back(0); | ||
144 | temp8.push_back(1); | ||
145 | //set9 | ||
146 | temp9.push_back(1); | ||
147 | temp9.push_back(0); | ||
148 | temp9.push_back(1); | ||
149 | //set10 | ||
150 | temp10.push_back(1); | ||
151 | temp10.push_back(0); | ||
152 | temp10.push_back(1); | ||
153 | //set11 | ||
154 | temp11.push_back(1); | ||
155 | temp11.push_back(1); | ||
156 | temp11.push_back(1); | ||
157 | //set12 | ||
158 | temp12.push_back(1); | ||
159 | temp12.push_back(0); | ||
160 | temp12.push_back(1); | ||
161 | //set13 | ||
162 | temp13.push_back(1); | ||
163 | temp13.push_back(0); | ||
164 | temp13.push_back(1); | ||
165 | //set14 | ||
166 | temp14.push_back(1); | ||
167 | temp14.push_back(1); | ||
168 | temp14.push_back(0); | ||
169 | //set15 | ||
170 | temp15.push_back(0); | ||
171 | temp15.push_back(1); | ||
172 | temp15.push_back(1); | ||
173 | //set16 | ||
174 | temp16.push_back(1); | ||
175 | temp16.push_back(0); | ||
176 | temp16.push_back(1); | ||
177 | std::cout << endl; | ||
178 | //Filling up test DATASET | ||
179 | DAT.push_back(temp); | ||
180 | DAT.push_back(temp2); | ||
181 | DAT.push_back(temp3); | ||
182 | DAT.push_back(temp4); | ||
183 | DAT.push_back(temp5); | ||
184 | DAT.push_back(temp6); | ||
185 | DAT.push_back(temp7); | ||
186 | DAT.push_back(temp8); | ||
187 | DAT.push_back(temp9); | ||
188 | DAT.push_back(temp10); | ||
189 | DAT.push_back(temp11); | ||
190 | DAT.push_back(temp12); | ||
191 | DAT.push_back(temp13); | ||
192 | DAT.push_back(temp14); | ||
193 | DAT.push_back(temp15); | ||
194 | DAT.push_back(temp16); | ||
195 | std::cout << endl; | ||
196 | size_t totvars = DAT[1].size(); | ||
197 | size_t tottuples = DAT.size(); | ||
198 | */ | ||
199 | //Time before user input | ||
200 | time_t now = time(0); | ||
201 | char* dt = ctime(&now); | ||
202 | std::cout << "The local date and time is: " << dt << std::endl; | ||
203 | string FILENAME; | ||
204 | std::cout << "What is the name of your file?: "; | ||
205 | std::cin >> FILENAME; | ||
206 | // Variable declarations | ||
207 | fstream file; | ||
208 | int COLS; | ||
209 | std::cout << "How many variables in your data file?: "; | ||
210 | std::cin >> COLS; | ||
211 | std::cout << endl; | ||
212 | vector < vector <int> > DAT; // 2d array as a vector of vectors | ||
213 | vector <int> rowVector(COLS); // vector to add into 'array' (represents a row) | ||
214 | int row = 0; // Row counter | ||
215 | |||
216 | // Read file | ||
217 | file.open(FILENAME.c_str(), ios::in); // Open file | ||
218 | if (file.is_open()) { // If file has correctly opened... | ||
219 | // Output debug message | ||
220 | cout << "File correctly opened" << endl; | ||
221 | |||
222 | // Dynamically store data into array | ||
223 | while (file.good()) { // ... and while there are no errors, | ||
224 | DAT.push_back(rowVector); // add a new row, | ||
225 | for (int col = 0; col<COLS; col++) { | ||
226 | file >> DAT[row][col]; // fill the row with col elements | ||
227 | } | ||
228 | row++; // Keep track of actual row | ||
229 | } | ||
230 | } | ||
231 | else cout << "Unable to open file" << endl; | ||
232 | file.close(); | ||
233 | |||
234 | size_t totvars = DAT[1].size(); //column number | ||
235 | size_t tottuples = DAT.size();//row number | ||
236 | |||
237 | |||
238 | |||
239 | |||
240 | //Ask User for the order they would like to test Order starts at 0 e.g. 0,1,2 is valid order for three variables | ||
241 | int order_i; | ||
242 | vector <int> order; | ||
243 | std::cout << "Which is the order you would like to test?: "; | ||
244 | for (int i = 0; i < totvars;++i) { | ||
245 | std::cin >> order_i; | ||
246 | order.push_back(order_i); | ||
247 | } | ||
248 | std::cout << endl; | ||
249 | //Ask user for max value and min value of each variable being inputed | ||
250 | vector <int> max_cat; | ||
251 | vector <int> min_cat; | ||
252 | int max_cat_input; | ||
253 | int min_cat_input; | ||
254 | |||
255 | std::cout << "What is the maximum categorical value of each variable?: "; | ||
256 | for (int i = 0; i < totvars; ++i) { | ||
257 | std::cin >> max_cat_input; | ||
258 | max_cat.push_back(max_cat_input); | ||
259 | } | ||
260 | for (size_t i = 0; i < max_cat.size();++i) { | ||
261 | std::cout << max_cat[i] << " "; | ||
262 | } | ||
263 | std::cout << endl; | ||
264 | std::cout << endl; | ||
265 | std::cout << "What is the minimum categorical value of each variable?: "; | ||
266 | for (int i = 0; i < totvars; ++i) { | ||
267 | std::cin >> min_cat_input; | ||
268 | min_cat.push_back(min_cat_input); | ||
269 | } | ||
270 | for (size_t i = 0; i < min_cat.size();++i) { | ||
271 | std::cout << min_cat[i] << " "; | ||
272 | } | ||
273 | std::cout << endl; | ||
274 | //Set the maximum for the amount of parents for any given variable | ||
275 | unsigned int maxparents; | ||
276 | std::cout << "What is the maximum amount of parents to be considered for any given variable?: "; | ||
277 | std::cin >> maxparents; | ||
278 | std::cout << endl; | ||
279 | |||
280 | double PERCENT; | ||
281 | std::cout << "What is the percentage (Please use the format of 98.99, not 0.9899)?: "; | ||
282 | std::cin >> PERCENT; | ||
283 | std::cout << endl; | ||
284 | |||
285 | //Time that program begins | ||
286 | time_t start = time(0); | ||
287 | char* dt_start = ctime(&start); | ||
288 | std::cout << "The local date and time is: " << dt_start << std::endl; | ||
289 | |||
290 | |||
291 | //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category: | ||
292 | //categories in i | ||
293 | vector <int> catsi; | ||
294 | //total combinations of variables | ||
295 | //int totcombos = 1; | ||
296 | //how many catagori for every variable | ||
297 | for ( int i = 0; i < totvars; ++i) { | ||
298 | catsi.push_back((max_cat[i] - min_cat[i]) + 1); | ||
299 | //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1); | ||
300 | } | ||
301 | /*size_t mincatsi = 10000, mincatvar; | ||
302 | for (size_t i = 0; i < catsi.size(); ++i) { | ||
303 | if (catsi[i] < mincatsi) { | ||
304 | mincatsi = catsi[i]; | ||
305 | mincatvar = i; | ||
306 | } | ||
307 | }*/ | ||
308 | //print out catsi | ||
309 | for (size_t i = 0; i < catsi.size();++i) { | ||
310 | std::cout << catsi[i] << " "; | ||
311 | } | ||
312 | std::cout << endl; | ||
313 | |||
314 | //Total Families Ui,alpha for a particular variable in the order | ||
315 | vector <unsigned long long> families;//is vector, first element is the number of parentset for the first variabel...... | ||
316 | for (unsigned int i = 0; i < totvars; ++i) { | ||
317 | int numparents = i; | ||
318 | if (numparents == 0) { | ||
319 | families.push_back(1); | ||
320 | } | ||
321 | else { | ||
322 | |||
323 | unsigned long long numfams = 0; | ||
324 | for (unsigned int j = 0; j <= i; ++j) { | ||
325 | if (j <= maxparents) { | ||
326 | unsigned long long jFactorial = 1; | ||
327 | unsigned long long ijFactorial = 1; | ||
328 | unsigned long long iFactorial = 1; | ||
329 | //Calculate j! | ||
330 | for (unsigned int g = 0; g <= j; ++g) { | ||
331 | if (g != 0) { | ||
332 | jFactorial *= g; | ||
333 | } | ||
334 | } | ||
335 | //Calculate i! | ||
336 | for (unsigned int g = 0; g <= i; ++g) { | ||
337 | if (g != 0) { | ||
338 | iFactorial *= g; | ||
339 | } | ||
340 | } | ||
341 | //Calculate (i-j)! | ||
342 | for (unsigned int g = 0; g <= (i - j); ++g) { | ||
343 | if (g != 0) { | ||
344 | ijFactorial *= g; | ||
345 | } | ||
346 | } | ||
347 | numfams += (iFactorial) / (jFactorial * ijFactorial); | ||
348 | } | ||
349 | else { | ||
350 | break; | ||
351 | } | ||
352 | } | ||
353 | families.push_back(numfams); | ||
354 | } | ||
355 | } | ||
356 | |||
357 | |||
358 | //How many parent combinations for each step? As well as there counts | ||
359 | vector< vector <int> > ParentCombos; | ||
360 | vector< vector <int> > fullNijkvector; | ||
361 | vector< vector <string> > indexofvar; // This is label or index for each variable. | ||
362 | for (size_t i = 0; i < order.size(); ++i) { | ||
363 | //i represents the order of the variable | ||
364 | if (i == 0) { | ||
365 | vector <int> tmp,Nijkovercombos1; | ||
366 | vector <string> tempstring; | ||
367 | tempstring.push_back("[0]"); | ||
368 | tmp.push_back(1); | ||
369 | ParentCombos.push_back(tmp); | ||
370 | //counting the amount of times that a value of the first variable in the order occurs | ||
371 | //this starts with the maximum value for that variable | ||
372 | for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) { | ||
373 | //hello cycles through the categories of the first variable in the order | ||
374 | int Nijk1 = 0; | ||
375 | //green cycles through tuples | ||
376 | for (int green = 0; green < tottuples; ++green){ | ||
377 | if (DAT[green][order[0]] == hello) { | ||
378 | Nijk1 += 1; | ||
379 | } | ||
380 | } | ||
381 | Nijkovercombos1.push_back(Nijk1); | ||
382 | } | ||
383 | fullNijkvector.push_back(Nijkovercombos1); | ||
384 | indexofvar.push_back(tempstring); | ||
385 | |||
386 | } | ||
387 | else { | ||
388 | vector <int> tmp,Nijkovercombos1; | ||
389 | vector <string> tempstring; | ||
390 | string tempa = "[" + int_to_str(i) +"]"; | ||
391 | tempstring.push_back(tempa); | ||
392 | tmp.push_back(1); | ||
393 | int numparnts = i; | ||
394 | //counting the amount of times that a value of the last variable in the current order size occurs | ||
395 | //this starts with the maximum value for that variable | ||
396 | for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) { | ||
397 | //hello cycles through the categories of the first variable in the order | ||
398 | int Nijk1 = 0; | ||
399 | //green cycles through tuples | ||
400 | for (int green = 0; green < tottuples; ++green) { | ||
401 | if (DAT[green][order[numparnts]] == hello) { | ||
402 | Nijk1 += 1; | ||
403 | } | ||
404 | } | ||
405 | Nijkovercombos1.push_back(Nijk1); | ||
406 | } | ||
407 | fullNijkvector.push_back(Nijkovercombos1); | ||
408 | |||
409 | //j representing the number of parents | ||
410 | for (int it = 1; it <= numparnts; ++it) { | ||
411 | //(333)Creating a vector that uses the right combination | ||
412 | double Nloopy = 0, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1; | ||
413 | /*std::cout << "This is for " << numparnts << " choose " << it << endl; | ||
414 | std::cout << "The iteration number is: " << it << endl;*/ | ||
415 | //Accounting for the limit of parent quantity | ||
416 | if (it > maxparents) { | ||
417 | break; | ||
418 | } | ||
419 | else { | ||
420 | vector <int> NewMat(numparnts, 0); | ||
421 | for (int p = 0; p < it; ++p) { | ||
422 | NewMat[p] = 1; | ||
423 | } | ||
424 | for (int g = 2; g <= numparnts; ++g) { | ||
425 | NcolFactorial *= g; | ||
426 | } | ||
427 | for (int g = 2; g <= it; ++g) { | ||
428 | iFactorial *= g; | ||
429 | } | ||
430 | for (int g = 2; g <= (numparnts - it); ++g) { | ||
431 | NiFactorial *= g; | ||
432 | } | ||
433 | //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts | ||
434 | Nloopy = NcolFactorial / (iFactorial * NiFactorial); | ||
435 | for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) { | ||
436 | int combsparents = 1; | ||
437 | vector <int> parsetv; | ||
438 | for (int par = 0; par < NewMat.size(); ++par){ | ||
439 | if (NewMat[par] == 1){ | ||
440 | parsetv.push_back(par); | ||
441 | } | ||
442 | } | ||
443 | |||
444 | string tempstring2; | ||
445 | |||
446 | for (int par2 = 0; par2 < parsetv.size(); ++par2){ | ||
447 | if(par2+1==parsetv.size()){ | ||
448 | tempstring2 = tempstring2 + int_to_str(parsetv[par2]); | ||
449 | //tempstring2 = tempstring2 +","+"|" + int_to_str(i); | ||
450 | tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]"; | ||
451 | } | ||
452 | else{ | ||
453 | //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+","; | ||
454 | tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":"; | ||
455 | } | ||
456 | |||
457 | |||
458 | } | ||
459 | tempstring.push_back(tempstring2); | ||
460 | |||
461 | /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl; | ||
462 | std::cout << "Here comes the NewMat:" << endl;*/ | ||
463 | //(444)This sets up the process for changing | ||
464 | //PosOne tells me the position of the last one in the vector | ||
465 | //We want to change when the position is the last position available in the vector | ||
466 | int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0; | ||
467 | int SumOnes3 = 0, SumOnes4 = 0, SumY = 0; | ||
468 | for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) { | ||
469 | if (NewMat[PosOne] == 1) { | ||
470 | break; | ||
471 | } | ||
472 | } | ||
473 | for (int y = (numparnts - 1); y >= (numparnts - it); --y) { | ||
474 | //SumOnes tells you the amount of ones in the last i columns | ||
475 | //These are the last columns being considered | ||
476 | SumOnes += NewMat[y]; | ||
477 | } | ||
478 | for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) { | ||
479 | //SumOnes2 tells you the amount of ones before you reach the next zero | ||
480 | //PosOne2 keeps track of the position of the coming zero | ||
481 | SumOnes2 += NewMat[PosOne2]; | ||
482 | if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) { | ||
483 | break; | ||
484 | } | ||
485 | } | ||
486 | for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) { | ||
487 | //FrstOne tells you the position of the first number 1 starting from the left hand side | ||
488 | if (NewMat[FrstOne] == 1) { | ||
489 | break; | ||
490 | } | ||
491 | } | ||
492 | for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) { | ||
493 | //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions | ||
494 | SumOnes4 += NewMat[x]; | ||
495 | } | ||
496 | //Prints out NewMat | ||
497 | /*for (int u = 0; u < numparnts; ++u) { | ||
498 | std::cout << NewMat[u] << " "; | ||
499 | } | ||
500 | std::cout << endl;*/ | ||
501 | |||
502 | |||
503 | //Adding in the code that will allow counts parent combinations for this particular variable | ||
504 | for (int q = 0; q < i; ++q) { | ||
505 | if (NewMat[q] == 1) { | ||
506 | combsparents *= catsi[order[q]]; | ||
507 | } | ||
508 | } | ||
509 | tmp.push_back(combsparents);//made the whole parentset configure for a variable | ||
510 | /*std::cout << "This is combsparents: " << combsparents << endl; | ||
511 | std::cout << endl;*/ | ||
512 | vector <int> hvect; | ||
513 | //hvect tells us which variables are being considered always the last variable is being considered | ||
514 | //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only | ||
515 | //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 > | ||
516 | //for A C hvect will be < 0 2 > | ||
517 | for (int h = 0; h < i; ++h) { | ||
518 | if (NewMat[h] == 1) { | ||
519 | hvect.push_back(h); | ||
520 | } | ||
521 | } | ||
522 | hvect.push_back(numparnts); | ||
523 | size_t shvect = hvect.size(); | ||
524 | //Prints out hvect | ||
525 | /*for (int u = 0; u < shvect; ++u) { | ||
526 | std::cout << hvect[u] << " "; | ||
527 | }*/ | ||
528 | //std::cout << endl; | ||
529 | //Counting the amount of values in the data that have that particular parent combination | ||
530 | vector <int> Nijkovercombos; | ||
531 | for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) { | ||
532 | //(333)Creating a vector that uses the right combination | ||
533 | /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/ | ||
534 | vector <int> Test(shvect, last), maxtest; | ||
535 | for (int p = 0; p < (shvect-1); ++p) { | ||
536 | Test[p] = max_cat[order[hvect[p]]]; | ||
537 | } | ||
538 | maxtest = Test; | ||
539 | for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) { | ||
540 | //std::cout << endl; | ||
541 | /*std::cout << endl; | ||
542 | std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl; | ||
543 | std::cout << "Here comes the Test:" << endl;*/ | ||
544 | //(444)This sets up the process for changing | ||
545 | //NMpos tells me the position of the last non minimum value in the vector | ||
546 | //We want to change when the position is the last position available in the vector | ||
547 | int NMpos = 0, minpos = 0; | ||
548 | for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) { | ||
549 | if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) { | ||
550 | break; | ||
551 | } | ||
552 | } | ||
553 | for (minpos = (shvect - 2); minpos >= 0; --minpos) { | ||
554 | //minpos tells you the position of the last minimum value | ||
555 | if (Test[minpos] == min_cat[order[hvect[minpos]]]) { | ||
556 | break; | ||
557 | } | ||
558 | } | ||
559 | //Prints out Test | ||
560 | /*for (int u = 0; u < shvect; ++u) { | ||
561 | std::cout << Test[u] << " "; | ||
562 | } | ||
563 | std::cout << endl; | ||
564 | std::cout << endl; | ||
565 | std::cout << endl;*/ | ||
566 | //Count how many occurrences of the value are present in the data | ||
567 | int Nijk = 0; | ||
568 | for (int num2size = 0; num2size < tottuples; ++num2size) { | ||
569 | int countcorrect = 0; | ||
570 | for (size_t g = 0; g < Test.size(); ++g) { | ||
571 | //num2size cycles through tuples | ||
572 | //order[hvect[g]] represents the variable in the order that we are considering as a parent | ||
573 | if (DAT[num2size][order[hvect[g]]] == Test[g]) { | ||
574 | countcorrect += 1; | ||
575 | } | ||
576 | } | ||
577 | if (countcorrect == Test.size()) { | ||
578 | Nijk += 1; | ||
579 | } | ||
580 | } | ||
581 | //Nijkovercombos displays data as follows | ||
582 | //it starts with the smallest value for the last variable in hvect | ||
583 | //and the largest values in the first n-1 variables in hvect | ||
584 | //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0 | ||
585 | //count,count,count,count e.g. 13, 2, 2, 3, 4, 10 | ||
586 | Nijkovercombos.push_back(Nijk); | ||
587 | //(666)Now that the values have been calculated find out what the next combination of variables should be | ||
588 | if ((NMpos == -1) & (minpos == (shvect - 2))) { | ||
589 | //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000 | ||
590 | break; | ||
591 | } | ||
592 | if (minpos < NMpos) { | ||
593 | Test[NMpos] = Test[NMpos] - 1; | ||
594 | } | ||
595 | else if (NMpos < minpos) { | ||
596 | Test[NMpos] = Test[NMpos] - 1; | ||
597 | for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) { | ||
598 | Test[filler] = maxtest[filler]; | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | } | ||
603 | fullNijkvector.push_back(Nijkovercombos); | ||
604 | //(666)Now that the unique values have been calculated find out what the next combination of variables should be | ||
605 | if ((PosOne == (numparnts - 1)) & (SumOnes == it)) { | ||
606 | break; | ||
607 | } | ||
608 | else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) { | ||
609 | for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | ||
610 | //NxtOne tells you the position of the next closest number 1 that we would | ||
611 | //like to change the position of (we will call it the important number one) | ||
612 | //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | ||
613 | SumOnes3 += NewMat[NxtOne]; | ||
614 | if (SumOnes3 == (SumOnes2 + 1)) { | ||
615 | break; | ||
616 | } | ||
617 | } | ||
618 | if (SumOnes4 == (it - 1)) { | ||
619 | //If all except one of the 1's are found in the last it - 1 columns | ||
620 | for (int x = 0; x < numparnts; ++x) { | ||
621 | if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) { | ||
622 | //If | ||
623 | NewMat[x] = 1; | ||
624 | } | ||
625 | else { | ||
626 | NewMat[x] = 0; | ||
627 | } | ||
628 | } | ||
629 | } | ||
630 | else { | ||
631 | for (int x = 0; x < numparnts; ++x) { | ||
632 | if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) { | ||
633 | //If the position is that of the first 1 or it falls between the changed number one and the total | ||
634 | //amount of ones that are on that side of the zero 10111 | ||
635 | NewMat[x] = 1; | ||
636 | } | ||
637 | else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) { | ||
638 | //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing | ||
639 | //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right | ||
640 | NewMat[x] = 1; | ||
641 | } | ||
642 | else { | ||
643 | NewMat[x] = 0; | ||
644 | } | ||
645 | } | ||
646 | } | ||
647 | } | ||
648 | else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) { | ||
649 | for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | ||
650 | //NxtOne tells you the position of the next closest number 1 that we would | ||
651 | //like to change the position of (we will call it the important number one) | ||
652 | //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | ||
653 | SumOnes3 += NewMat[NxtOne]; | ||
654 | if (SumOnes3 == 1) { | ||
655 | break; | ||
656 | } | ||
657 | } | ||
658 | if (it != 1) { | ||
659 | for (int x = 0; x < numparnts; ++x) { | ||
660 | if (x == (NxtOne + 1)) { | ||
661 | NewMat[x] = 1; | ||
662 | } | ||
663 | else if (x == NxtOne) { | ||
664 | NewMat[x] = 0; | ||
665 | } | ||
666 | else if ((NewMat[x] == 1) & (x != NxtOne)) { | ||
667 | NewMat[x] = 1; | ||
668 | } | ||
669 | else { | ||
670 | NewMat[x] = 0; | ||
671 | } | ||
672 | } | ||
673 | } | ||
674 | else { | ||
675 | for (int x = 0; x < numparnts; ++x) { | ||
676 | if ((x == (NxtOne + 1))) { | ||
677 | NewMat[x] = 1; | ||
678 | } | ||
679 | else { | ||
680 | NewMat[x] = 0; | ||
681 | } | ||
682 | } | ||
683 | } | ||
684 | } | ||
685 | } | ||
686 | |||
687 | } | ||
688 | } | ||
689 | ParentCombos.push_back(tmp); | ||
690 | indexofvar.push_back(tempstring); | ||
691 | } | ||
692 | |||
693 | } | ||
694 | //std::cout << endl; | ||
695 | std::cout << endl; | ||
696 | //printing out the ParentCombos matrix just created above | ||
697 | /*for (size_t i = 0; i < ParentCombos.size(); ++i) { | ||
698 | for (size_t j = 0; j < ParentCombos[i].size(); ++j) { | ||
699 | std::cout << ParentCombos[i][j] << " "; | ||
700 | } | ||
701 | std::cout << endl; | ||
702 | }*/ | ||
703 | std::cout << endl; | ||
704 | //printing out the fullNijkvector matrix just created above | ||
705 | /*for (size_t i = 0; i < fullNijkvector.size(); ++i) { | ||
706 | for (size_t j = 0; j < fullNijkvector[i].size(); ++j) { | ||
707 | std::cout << fullNijkvector[i][j] << " "; | ||
708 | } | ||
709 | std::cout << endl; | ||
710 | } | ||
711 | std::cout << endl;*/ | ||
712 | //Print out Data | ||
713 | /*for (int i = 0; i < DAT.size(); ++i) { | ||
714 | for (int j = 0; j < DAT[i].size(); ++j) { | ||
715 | std::cout << DAT[i][j] << " "; | ||
716 | } | ||
717 | std::cout << endl; | ||
718 | }*/ | ||
719 | //Print out the families size | ||
720 | /*for (size_t i = 0; i < families.size(); ++i) { | ||
721 | std::cout << families[i] << " "; | ||
722 | }*/ | ||
723 | |||
724 | //Obtaining the actual score from this information | ||
725 | //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting | ||
726 | //the first variable in the order) | ||
727 | //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha | ||
728 | int posinfull = 0; | ||
729 | //finlogscore is the final score in natural log format | ||
730 | long double finlogscore = 0.0; | ||
731 | vector< vector <double> > vecvarparset; | ||
732 | for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) { | ||
733 | //sumovUialpha is the the sum over all parent sets for a particular variable | ||
734 | long double sumovUialpha = 0.0; | ||
735 | //vector of all values of seclastgamma | ||
736 | vector <double> vec2ndlastgamma; | ||
737 | long double maxseclastgamma; | ||
738 | //Uialpha cycles through all the parent sets for a particular family | ||
739 | for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) { | ||
740 | // nijkprime represents the value of 1/(ri * qi) | ||
741 | long double nijkprime, nijprime; | ||
742 | long double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha]; | ||
743 | |||
744 | nijprime = 1.0 / (PCs); | ||
745 | nijkprime = 1.0 / (rij * PCs); | ||
746 | //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic | ||
747 | long double seclastgamma = 0.0; | ||
748 | //qi_Uialpha cycles through the combinations for the parents in a set | ||
749 | for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) { | ||
750 | long double lastgamma = 0.0; | ||
751 | long double nij = 0.0; | ||
752 | //countijk cycles through the categories of the variable with a particular order | ||
753 | //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to | ||
754 | //and then find the categories for it | ||
755 | for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) { | ||
756 | long double topy; | ||
757 | //rightcol lets you find the right column/position of the value that you need for a particular category within the | ||
758 | int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]); | ||
759 | nij += fullNijkvector[posinfull][rightcol]; | ||
760 | topy = (nijkprime + fullNijkvector[posinfull][rightcol]); | ||
761 | |||
762 | //Using boost lgamma function for the product over categories and parent combinations | ||
763 | lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime); | ||
764 | |||
765 | } | ||
766 | long double boty = nij + nijprime; | ||
767 | seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty); | ||
768 | |||
769 | } | ||
770 | vec2ndlastgamma.push_back(seclastgamma); | ||
771 | |||
772 | |||
773 | |||
774 | //Calculate sumovUialpha based on the logsumexp concept | ||
775 | if (Uialpha + 1 == families[varinorder]) { | ||
776 | |||
777 | for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) { | ||
778 | //change the value of maxseclastgamma if new value is larger than the previous value | ||
779 | if (que == 0) { | ||
780 | maxseclastgamma = vec2ndlastgamma[0]; | ||
781 | } | ||
782 | else { | ||
783 | if (maxseclastgamma < vec2ndlastgamma[que]) { | ||
784 | maxseclastgamma = vec2ndlastgamma[que]; | ||
785 | } | ||
786 | } | ||
787 | } | ||
788 | for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) { | ||
789 | sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma); | ||
790 | } | ||
791 | //add info on parent set scores for each variable to this vector of vectors | ||
792 | vecvarparset.push_back(vec2ndlastgamma); | ||
793 | } | ||
794 | |||
795 | /*std::cout << endl; | ||
796 | std::cout << seclastgamma; | ||
797 | std::cout << endl;*/ | ||
798 | posinfull += 1; | ||
799 | //std::cout << posinfull << endl; | ||
800 | } | ||
801 | finlogscore += log(sumovUialpha) + maxseclastgamma; | ||
802 | } | ||
803 | |||
804 | vector < vector<string> > parSet; | ||
805 | vector< map <double, string, greater <double> > > parSetScoreSorted; | ||
806 | vector< map <double, string, greater <double> > > strucScore; | ||
807 | |||
808 | |||
809 | //Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map. | ||
810 | |||
811 | for (unsigned i = 0; i < indexofvar.size(); ++i){ | ||
812 | |||
813 | map <double, string, greater <double> > tempMap; | ||
814 | for (unsigned j=0; j< indexofvar[i].size(); ++j){ | ||
815 | tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j])); | ||
816 | } | ||
817 | parSetScoreSorted.push_back(tempMap); | ||
818 | |||
819 | } | ||
820 | |||
821 | |||
822 | pair <double, string> bestStrScore; | ||
823 | double bestScore = 0; | ||
824 | string bestLable; | ||
825 | |||
826 | |||
827 | for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){ | ||
828 | map <double, string> :: iterator itr; | ||
829 | itr = parSetScoreSorted[i].begin(); | ||
830 | bestScore = bestScore + (itr->first); | ||
831 | bestLable = bestLable +(itr->second); | ||
832 | } | ||
833 | |||
834 | bestStrScore = make_pair(bestScore, bestLable);//This is the best score. | ||
835 | |||
836 | vector < pair <double, string> > sortedStru;//This store all the structures in the percentage. | ||
837 | vector < vector < pair <double, string > > > deltaC; | ||
838 | |||
839 | for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ | ||
840 | map <double, string> :: iterator itr0, itr1; | ||
841 | vector < pair <double, string > > tempDelta; | ||
842 | double tempDeltaS; | ||
843 | string tempDeltaL; | ||
844 | itr0 = parSetScoreSorted[l].begin(); | ||
845 | itr1 = parSetScoreSorted[l].begin(); | ||
846 | double tem1=exp(itr1->first), tem2 = exp(itr1->first); | ||
847 | for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){ | ||
848 | tem1 = tem2; | ||
849 | itr1 = ++itr1; | ||
850 | tem2 = tem1 + exp(itr1->first); | ||
851 | double tem = (tem1/tem2)*100; | ||
852 | if(tem <= PERCENT){ | ||
853 | tempDeltaS = (itr1->first)-(itr0->first); | ||
854 | tempDeltaL = itr1->second; | ||
855 | tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL)); | ||
856 | } | ||
857 | } | ||
858 | |||
859 | deltaC.push_back( tempDelta); | ||
860 | } | ||
861 | |||
862 | |||
863 | for(unsigned i=0; i< deltaC.size(); ++i){ | ||
864 | for (unsigned j=0; j< deltaC[i].size(); ++j) | ||
865 | { | ||
866 | double score = bestStrScore.first + deltaC[i][j].first; | ||
867 | string lab = bestStrScore.second; | ||
868 | findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second); | ||
869 | sortedStru.push_back(make_pair(score, lab)); | ||
870 | } | ||
871 | } | ||
872 | |||
873 | sort(sortedStru.begin(),sortedStru.end(),compareDe); | ||
874 | |||
875 | |||
876 | |||
877 | /* for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ //l means each variable | ||
878 | for (unsigned m = contl; m < parSetScoreSorted[l].size(); ++m){ //m means the number of parents sets | ||
879 | contl =m; | ||
880 | vector < pair <string, double > > delta; // store the different of the highest score and the second highest score | ||
881 | for (unsigned i = l; i < parSetScoreSorted.size(); ++i){ | ||
882 | map <double, string> :: iterator itr0, itr1; | ||
883 | double tempDeltaS; | ||
884 | string tempDeltaL; | ||
885 | itr0 = parSetScoreSorted[i].begin(); | ||
886 | itr1 = itr0; | ||
887 | for (int kr = 0; kr < contl; ++kr){ | ||
888 | itr1 = itr0++; | ||
889 | } | ||
890 | tempDeltaS = (itr1->first)-(itr0->first); | ||
891 | tempDeltaL = int_to_str(i); | ||
892 | delta.push_back(make_pair(tempDeltaL, tempDeltaS)); | ||
893 | } | ||
894 | |||
895 | sort(delta.begin(),delta.end(),compareI); | ||
896 | |||
897 | double const stopLimit = PERCENT; | ||
898 | double tempbeforeS = bestStrScore.first, temafterS; | ||
899 | |||
900 | for (unsigned i = 0; i < delta.size(); ++i){ | ||
901 | pair <double, string > temppair; | ||
902 | double tempLime; | ||
903 | int ind = str_to_int(delta[i].first); | ||
904 | string s = parSetScoreSorted[ind].begin()->second; | ||
905 | string sRep = (++parSetScoreSorted[ind].begin())->second; | ||
906 | temppair = bestStrScore; | ||
907 | findAndReplaceAll(temppair.second, s, sRep); | ||
908 | temppair.first = temppair.first - delta[i].second; | ||
909 | |||
910 | temafterS = logAB(tempbeforeS, temppair.first); | ||
911 | |||
912 | tempLime = persentageXofY(temafterS, tempbeforeS) ; | ||
913 | |||
914 | tempbeforeS = temafterS; | ||
915 | |||
916 | if(tempLime > stopLimit){ | ||
917 | goto finish; | ||
918 | } | ||
919 | |||
920 | sortedStru.push_back(temppair); | ||
921 | |||
922 | // std::cout << ind << " "<< s << " "<< sRep << " "<< temppair.first << " " << temppair.second <<endl; | ||
923 | // std::cout << temppair.first << " " << temppair.second <<endl; | ||
924 | // std::cout << std::endl; | ||
925 | |||
926 | } | ||
927 | |||
928 | } | ||
929 | |||
930 | } | ||
931 | |||
932 | finish:*/ | ||
933 | |||
934 | |||
935 | std::cout << std::endl; | ||
936 | std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl; | ||
937 | std::cout << std::endl; | ||
938 | |||
939 | |||
940 | cout << "Best several structures are:" << endl; | ||
941 | cout << bestScore << " : " << bestLable << endl; | ||
942 | |||
943 | for (unsigned i=0; i< sortedStru.size(); ++i ) | ||
944 | { | ||
945 | cout << sortedStru[i].first << " : " << sortedStru[i].second << endl; | ||
946 | } | ||
947 | |||
948 | std::cout << std::endl; | ||
949 | std::cout << std::endl; | ||
950 | |||
951 | for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ | ||
952 | map <double, string> :: iterator itr; | ||
953 | std::cout << "For variable "<< i <<":"<< endl; | ||
954 | for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr) | ||
955 | { | ||
956 | cout << itr->second << " : " << itr->first << "; "; | ||
957 | } | ||
958 | cout << endl; | ||
959 | cout << endl; | ||
960 | |||
961 | } | ||
962 | |||
963 | |||
964 | /* ofstream myfile; | ||
965 | myfile.open("/home/zgong001/Documents/SprinklerDataset/bestStructure.txt"); | ||
966 | if (myfile.is_open()) | ||
967 | { | ||
968 | myfile << bestLable << " : " << bestScore << sortedStru.size(); | ||
969 | myfile << "\n"; | ||
970 | |||
971 | for(unsigned i=0; i< sortedStru.size(); ++i){ | ||
972 | myfile << sortedStru[i].second << " : " << sortedStru[i].first; | ||
973 | myfile << "\n"; | ||
974 | } | ||
975 | } | ||
976 | else cout << "Unable to open file"; | ||
977 | myfile.close();*/ | ||
978 | |||
979 | |||
980 | //time after completion | ||
981 | time_t later = time(0); | ||
982 | char* dt_later = ctime(&later); | ||
983 | std::cout << "The local date and time is: " << dt_later << std::endl; | ||
984 | |||
985 | std::cout << std::endl; | ||
986 | std::cout << std::endl; | ||
987 | |||
988 | |||
989 | std::cin.clear(); | ||
990 | std::cin.ignore(); | ||
991 | std::cin.get(); | ||
992 | return 0; | ||
993 | } | ||
994 |