Commit c643e466cef282135e09dbb1e5fb7342cbb61864
1 parent
92636adddb
Exists in
master
This added file is to get the best structures and markov blanket of each best structures.
Showing
1 changed file
with
993 additions
and
0 deletions
Show diff stats
MyOrder.cpp
... | ... | @@ -0,0 +1,993 @@ |
1 | +#include <string> | |
2 | +#include <vector> | |
3 | +#include <numeric> | |
4 | +#include <fstream> | |
5 | +#include <iterator> | |
6 | +#include <iostream> | |
7 | +#include <utility> | |
8 | +#include <iomanip> | |
9 | +#include <ctime> | |
10 | +#include <boost/math/special_functions.hpp> | |
11 | +#include <boost/lexical_cast.hpp> | |
12 | +#include <boost/algorithm/string/replace.hpp> | |
13 | + | |
14 | +#include <stdio.h> | |
15 | +#include <math.h> | |
16 | +#include <algorithm> // std::find | |
17 | +#include <map> | |
18 | +#include <iterator> | |
19 | +using namespace std; | |
20 | + | |
21 | + | |
22 | +struct compare { | |
23 | + bool operator()(const std::string& first, const std::string& second) { | |
24 | + if(first.size() == second.size()) | |
25 | + return first < second; | |
26 | + else | |
27 | + return first.size() < second.size(); | |
28 | + } | |
29 | +}; | |
30 | + | |
31 | +string int_to_str(int num) | |
32 | +{ | |
33 | + stringstream ss; | |
34 | + | |
35 | + ss << num; | |
36 | + | |
37 | + return ss.str(); | |
38 | +}; | |
39 | + | |
40 | + | |
41 | +int str_to_int(string st) | |
42 | +{ | |
43 | + int result; | |
44 | + | |
45 | + stringstream(st) >> result; | |
46 | + | |
47 | + return result; | |
48 | +}; | |
49 | + | |
50 | +bool compareI(const pair<string, double>&i, const pair<string, double>&j) | |
51 | +{ | |
52 | + return i.second < j.second; | |
53 | +} | |
54 | + | |
55 | +bool compareD(const pair<string, double>&i, const pair<string, double>&j) | |
56 | +{ | |
57 | + return i.second > j.second; | |
58 | +} | |
59 | + | |
60 | +bool compareIn(const pair<double, string>&i, const pair<double, string>&j) | |
61 | +{ | |
62 | + return i.first < j.first; | |
63 | +} | |
64 | + | |
65 | +bool compareDe(const pair<double, string>&i, const pair<double, string>&j) | |
66 | +{ | |
67 | + return i.first > j.first; | |
68 | +} | |
69 | + | |
70 | +double logAB (double x, double y) | |
71 | +{ | |
72 | + double result; | |
73 | + double maxVal = max(x,y); | |
74 | + | |
75 | + if(maxVal == x) | |
76 | + { | |
77 | + result = maxVal + log(1+exp(y-maxVal)); | |
78 | + } | |
79 | + else | |
80 | + { | |
81 | + result = maxVal + log(1+exp(x-maxVal)); | |
82 | + } | |
83 | + return result; | |
84 | +} | |
85 | + | |
86 | +double persentageXofY (double newS, double oldS) | |
87 | +{ | |
88 | + double result; | |
89 | + result = exp(oldS-newS)*100; | |
90 | + return result; | |
91 | +} | |
92 | + | |
93 | +void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr) | |
94 | +{ | |
95 | + // Get the first occurrence | |
96 | + size_t pos = data.find(toSearch); | |
97 | + | |
98 | + // Repeat till end is reached | |
99 | + while( pos != std::string::npos) | |
100 | + { | |
101 | + // Replace this occurrence of Sub String | |
102 | + data.replace(pos, toSearch.size(), replaceStr); | |
103 | + // Get the next occurrence from the current position | |
104 | + pos =data.find(toSearch, pos + toSearch.size()); | |
105 | + } | |
106 | +} | |
107 | + | |
108 | +int main() { | |
109 | + /* | |
110 | + //TEMPORARY INPUT FILE | |
111 | + vector< vector<int> > DAT; | |
112 | + vector <int> temp, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16; | |
113 | + //Filling up some of the test variables needed for the data set | |
114 | + temp.push_back(0); | |
115 | + temp.push_back(0); | |
116 | + temp.push_back(1); | |
117 | + //set 2 | |
118 | + temp2.push_back(0); | |
119 | + temp2.push_back(1); | |
120 | + temp2.push_back(0); | |
121 | + //set3 | |
122 | + temp3.push_back(0); | |
123 | + temp3.push_back(1); | |
124 | + temp3.push_back(1); | |
125 | + //set4 | |
126 | + temp4.push_back(1); | |
127 | + temp4.push_back(0); | |
128 | + temp4.push_back(0); | |
129 | + //set5 | |
130 | + temp5.push_back(1); | |
131 | + temp5.push_back(1); | |
132 | + temp5.push_back(1); | |
133 | + //set6 | |
134 | + temp6.push_back(1); | |
135 | + temp6.push_back(1); | |
136 | + temp6.push_back(0); | |
137 | + //set7 | |
138 | + temp7.push_back(1); | |
139 | + temp7.push_back(0); | |
140 | + temp7.push_back(1); | |
141 | + //set8 | |
142 | + temp8.push_back(1); | |
143 | + temp8.push_back(0); | |
144 | + temp8.push_back(1); | |
145 | + //set9 | |
146 | + temp9.push_back(1); | |
147 | + temp9.push_back(0); | |
148 | + temp9.push_back(1); | |
149 | + //set10 | |
150 | + temp10.push_back(1); | |
151 | + temp10.push_back(0); | |
152 | + temp10.push_back(1); | |
153 | + //set11 | |
154 | + temp11.push_back(1); | |
155 | + temp11.push_back(1); | |
156 | + temp11.push_back(1); | |
157 | + //set12 | |
158 | + temp12.push_back(1); | |
159 | + temp12.push_back(0); | |
160 | + temp12.push_back(1); | |
161 | + //set13 | |
162 | + temp13.push_back(1); | |
163 | + temp13.push_back(0); | |
164 | + temp13.push_back(1); | |
165 | + //set14 | |
166 | + temp14.push_back(1); | |
167 | + temp14.push_back(1); | |
168 | + temp14.push_back(0); | |
169 | + //set15 | |
170 | + temp15.push_back(0); | |
171 | + temp15.push_back(1); | |
172 | + temp15.push_back(1); | |
173 | + //set16 | |
174 | + temp16.push_back(1); | |
175 | + temp16.push_back(0); | |
176 | + temp16.push_back(1); | |
177 | + std::cout << endl; | |
178 | + //Filling up test DATASET | |
179 | + DAT.push_back(temp); | |
180 | + DAT.push_back(temp2); | |
181 | + DAT.push_back(temp3); | |
182 | + DAT.push_back(temp4); | |
183 | + DAT.push_back(temp5); | |
184 | + DAT.push_back(temp6); | |
185 | + DAT.push_back(temp7); | |
186 | + DAT.push_back(temp8); | |
187 | + DAT.push_back(temp9); | |
188 | + DAT.push_back(temp10); | |
189 | + DAT.push_back(temp11); | |
190 | + DAT.push_back(temp12); | |
191 | + DAT.push_back(temp13); | |
192 | + DAT.push_back(temp14); | |
193 | + DAT.push_back(temp15); | |
194 | + DAT.push_back(temp16); | |
195 | + std::cout << endl; | |
196 | + size_t totvars = DAT[1].size(); | |
197 | + size_t tottuples = DAT.size(); | |
198 | + */ | |
199 | + //Time before user input | |
200 | + time_t now = time(0); | |
201 | + char* dt = ctime(&now); | |
202 | + std::cout << "The local date and time is: " << dt << std::endl; | |
203 | + string FILENAME; | |
204 | + std::cout << "What is the name of your file?: "; | |
205 | + std::cin >> FILENAME; | |
206 | + // Variable declarations | |
207 | + fstream file; | |
208 | + int COLS; | |
209 | + std::cout << "How many variables in your data file?: "; | |
210 | + std::cin >> COLS; | |
211 | + std::cout << endl; | |
212 | + vector < vector <int> > DAT; // 2d array as a vector of vectors | |
213 | + vector <int> rowVector(COLS); // vector to add into 'array' (represents a row) | |
214 | + int row = 0; // Row counter | |
215 | + | |
216 | + // Read file | |
217 | + file.open(FILENAME.c_str(), ios::in); // Open file | |
218 | + if (file.is_open()) { // If file has correctly opened... | |
219 | + // Output debug message | |
220 | + cout << "File correctly opened" << endl; | |
221 | + | |
222 | + // Dynamically store data into array | |
223 | + while (file.good()) { // ... and while there are no errors, | |
224 | + DAT.push_back(rowVector); // add a new row, | |
225 | + for (int col = 0; col<COLS; col++) { | |
226 | + file >> DAT[row][col]; // fill the row with col elements | |
227 | + } | |
228 | + row++; // Keep track of actual row | |
229 | + } | |
230 | + } | |
231 | + else cout << "Unable to open file" << endl; | |
232 | + file.close(); | |
233 | + | |
234 | + size_t totvars = DAT[1].size(); //column number | |
235 | + size_t tottuples = DAT.size();//row number | |
236 | + | |
237 | + | |
238 | + | |
239 | + | |
240 | + //Ask User for the order they would like to test Order starts at 0 e.g. 0,1,2 is valid order for three variables | |
241 | + int order_i; | |
242 | + vector <int> order; | |
243 | + std::cout << "Which is the order you would like to test?: "; | |
244 | + for (int i = 0; i < totvars;++i) { | |
245 | + std::cin >> order_i; | |
246 | + order.push_back(order_i); | |
247 | + } | |
248 | + std::cout << endl; | |
249 | + //Ask user for max value and min value of each variable being inputed | |
250 | + vector <int> max_cat; | |
251 | + vector <int> min_cat; | |
252 | + int max_cat_input; | |
253 | + int min_cat_input; | |
254 | + | |
255 | + std::cout << "What is the maximum categorical value of each variable?: "; | |
256 | + for (int i = 0; i < totvars; ++i) { | |
257 | + std::cin >> max_cat_input; | |
258 | + max_cat.push_back(max_cat_input); | |
259 | + } | |
260 | + for (size_t i = 0; i < max_cat.size();++i) { | |
261 | + std::cout << max_cat[i] << " "; | |
262 | + } | |
263 | + std::cout << endl; | |
264 | + std::cout << endl; | |
265 | + std::cout << "What is the minimum categorical value of each variable?: "; | |
266 | + for (int i = 0; i < totvars; ++i) { | |
267 | + std::cin >> min_cat_input; | |
268 | + min_cat.push_back(min_cat_input); | |
269 | + } | |
270 | + for (size_t i = 0; i < min_cat.size();++i) { | |
271 | + std::cout << min_cat[i] << " "; | |
272 | + } | |
273 | + std::cout << endl; | |
274 | + //Set the maximum for the amount of parents for any given variable | |
275 | + unsigned int maxparents; | |
276 | + std::cout << "What is the maximum amount of parents to be considered for any given variable?: "; | |
277 | + std::cin >> maxparents; | |
278 | + std::cout << endl; | |
279 | + | |
280 | + double PERCENT; | |
281 | + std::cout << "What is the percentage (Please use the format of 98.99, not 0.9899)?: "; | |
282 | + std::cin >> PERCENT; | |
283 | + std::cout << endl; | |
284 | + | |
285 | + //Time that program begins | |
286 | + time_t start = time(0); | |
287 | + char* dt_start = ctime(&start); | |
288 | + std::cout << "The local date and time is: " << dt_start << std::endl; | |
289 | + | |
290 | + | |
291 | + //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category: | |
292 | + //categories in i | |
293 | + vector <int> catsi; | |
294 | + //total combinations of variables | |
295 | + //int totcombos = 1; | |
296 | + //how many catagori for every variable | |
297 | + for ( int i = 0; i < totvars; ++i) { | |
298 | + catsi.push_back((max_cat[i] - min_cat[i]) + 1); | |
299 | + //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1); | |
300 | + } | |
301 | + /*size_t mincatsi = 10000, mincatvar; | |
302 | + for (size_t i = 0; i < catsi.size(); ++i) { | |
303 | + if (catsi[i] < mincatsi) { | |
304 | + mincatsi = catsi[i]; | |
305 | + mincatvar = i; | |
306 | + } | |
307 | + }*/ | |
308 | + //print out catsi | |
309 | + for (size_t i = 0; i < catsi.size();++i) { | |
310 | + std::cout << catsi[i] << " "; | |
311 | + } | |
312 | + std::cout << endl; | |
313 | + | |
314 | + //Total Families Ui,alpha for a particular variable in the order | |
315 | + vector <unsigned long long> families;//is vector, first element is the number of parentset for the first variabel...... | |
316 | + for (unsigned int i = 0; i < totvars; ++i) { | |
317 | + int numparents = i; | |
318 | + if (numparents == 0) { | |
319 | + families.push_back(1); | |
320 | + } | |
321 | + else { | |
322 | + | |
323 | + unsigned long long numfams = 0; | |
324 | + for (unsigned int j = 0; j <= i; ++j) { | |
325 | + if (j <= maxparents) { | |
326 | + unsigned long long jFactorial = 1; | |
327 | + unsigned long long ijFactorial = 1; | |
328 | + unsigned long long iFactorial = 1; | |
329 | + //Calculate j! | |
330 | + for (unsigned int g = 0; g <= j; ++g) { | |
331 | + if (g != 0) { | |
332 | + jFactorial *= g; | |
333 | + } | |
334 | + } | |
335 | + //Calculate i! | |
336 | + for (unsigned int g = 0; g <= i; ++g) { | |
337 | + if (g != 0) { | |
338 | + iFactorial *= g; | |
339 | + } | |
340 | + } | |
341 | + //Calculate (i-j)! | |
342 | + for (unsigned int g = 0; g <= (i - j); ++g) { | |
343 | + if (g != 0) { | |
344 | + ijFactorial *= g; | |
345 | + } | |
346 | + } | |
347 | + numfams += (iFactorial) / (jFactorial * ijFactorial); | |
348 | + } | |
349 | + else { | |
350 | + break; | |
351 | + } | |
352 | + } | |
353 | + families.push_back(numfams); | |
354 | + } | |
355 | + } | |
356 | + | |
357 | + | |
358 | + //How many parent combinations for each step? As well as there counts | |
359 | + vector< vector <int> > ParentCombos; | |
360 | + vector< vector <int> > fullNijkvector; | |
361 | + vector< vector <string> > indexofvar; // This is label or index for each variable. | |
362 | + for (size_t i = 0; i < order.size(); ++i) { | |
363 | + //i represents the order of the variable | |
364 | + if (i == 0) { | |
365 | + vector <int> tmp,Nijkovercombos1; | |
366 | + vector <string> tempstring; | |
367 | + tempstring.push_back("[0]"); | |
368 | + tmp.push_back(1); | |
369 | + ParentCombos.push_back(tmp); | |
370 | + //counting the amount of times that a value of the first variable in the order occurs | |
371 | + //this starts with the maximum value for that variable | |
372 | + for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) { | |
373 | + //hello cycles through the categories of the first variable in the order | |
374 | + int Nijk1 = 0; | |
375 | + //green cycles through tuples | |
376 | + for (int green = 0; green < tottuples; ++green){ | |
377 | + if (DAT[green][order[0]] == hello) { | |
378 | + Nijk1 += 1; | |
379 | + } | |
380 | + } | |
381 | + Nijkovercombos1.push_back(Nijk1); | |
382 | + } | |
383 | + fullNijkvector.push_back(Nijkovercombos1); | |
384 | + indexofvar.push_back(tempstring); | |
385 | + | |
386 | + } | |
387 | + else { | |
388 | + vector <int> tmp,Nijkovercombos1; | |
389 | + vector <string> tempstring; | |
390 | + string tempa = "[" + int_to_str(i) +"]"; | |
391 | + tempstring.push_back(tempa); | |
392 | + tmp.push_back(1); | |
393 | + int numparnts = i; | |
394 | + //counting the amount of times that a value of the last variable in the current order size occurs | |
395 | + //this starts with the maximum value for that variable | |
396 | + for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) { | |
397 | + //hello cycles through the categories of the first variable in the order | |
398 | + int Nijk1 = 0; | |
399 | + //green cycles through tuples | |
400 | + for (int green = 0; green < tottuples; ++green) { | |
401 | + if (DAT[green][order[numparnts]] == hello) { | |
402 | + Nijk1 += 1; | |
403 | + } | |
404 | + } | |
405 | + Nijkovercombos1.push_back(Nijk1); | |
406 | + } | |
407 | + fullNijkvector.push_back(Nijkovercombos1); | |
408 | + | |
409 | + //j representing the number of parents | |
410 | + for (int it = 1; it <= numparnts; ++it) { | |
411 | + //(333)Creating a vector that uses the right combination | |
412 | + double Nloopy = 0, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1; | |
413 | + /*std::cout << "This is for " << numparnts << " choose " << it << endl; | |
414 | + std::cout << "The iteration number is: " << it << endl;*/ | |
415 | + //Accounting for the limit of parent quantity | |
416 | + if (it > maxparents) { | |
417 | + break; | |
418 | + } | |
419 | + else { | |
420 | + vector <int> NewMat(numparnts, 0); | |
421 | + for (int p = 0; p < it; ++p) { | |
422 | + NewMat[p] = 1; | |
423 | + } | |
424 | + for (int g = 2; g <= numparnts; ++g) { | |
425 | + NcolFactorial *= g; | |
426 | + } | |
427 | + for (int g = 2; g <= it; ++g) { | |
428 | + iFactorial *= g; | |
429 | + } | |
430 | + for (int g = 2; g <= (numparnts - it); ++g) { | |
431 | + NiFactorial *= g; | |
432 | + } | |
433 | + //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts | |
434 | + Nloopy = NcolFactorial / (iFactorial * NiFactorial); | |
435 | + for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) { | |
436 | + int combsparents = 1; | |
437 | + vector <int> parsetv; | |
438 | + for (int par = 0; par < NewMat.size(); ++par){ | |
439 | + if (NewMat[par] == 1){ | |
440 | + parsetv.push_back(par); | |
441 | + } | |
442 | + } | |
443 | + | |
444 | + string tempstring2; | |
445 | + | |
446 | + for (int par2 = 0; par2 < parsetv.size(); ++par2){ | |
447 | + if(par2+1==parsetv.size()){ | |
448 | + tempstring2 = tempstring2 + int_to_str(parsetv[par2]); | |
449 | + //tempstring2 = tempstring2 +","+"|" + int_to_str(i); | |
450 | + tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]"; | |
451 | + } | |
452 | + else{ | |
453 | + //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+","; | |
454 | + tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":"; | |
455 | + } | |
456 | + | |
457 | + | |
458 | + } | |
459 | + tempstring.push_back(tempstring2); | |
460 | + | |
461 | + /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl; | |
462 | + std::cout << "Here comes the NewMat:" << endl;*/ | |
463 | + //(444)This sets up the process for changing | |
464 | + //PosOne tells me the position of the last one in the vector | |
465 | + //We want to change when the position is the last position available in the vector | |
466 | + int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0; | |
467 | + int SumOnes3 = 0, SumOnes4 = 0, SumY = 0; | |
468 | + for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) { | |
469 | + if (NewMat[PosOne] == 1) { | |
470 | + break; | |
471 | + } | |
472 | + } | |
473 | + for (int y = (numparnts - 1); y >= (numparnts - it); --y) { | |
474 | + //SumOnes tells you the amount of ones in the last i columns | |
475 | + //These are the last columns being considered | |
476 | + SumOnes += NewMat[y]; | |
477 | + } | |
478 | + for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) { | |
479 | + //SumOnes2 tells you the amount of ones before you reach the next zero | |
480 | + //PosOne2 keeps track of the position of the coming zero | |
481 | + SumOnes2 += NewMat[PosOne2]; | |
482 | + if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) { | |
483 | + break; | |
484 | + } | |
485 | + } | |
486 | + for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) { | |
487 | + //FrstOne tells you the position of the first number 1 starting from the left hand side | |
488 | + if (NewMat[FrstOne] == 1) { | |
489 | + break; | |
490 | + } | |
491 | + } | |
492 | + for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) { | |
493 | + //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions | |
494 | + SumOnes4 += NewMat[x]; | |
495 | + } | |
496 | + //Prints out NewMat | |
497 | + /*for (int u = 0; u < numparnts; ++u) { | |
498 | + std::cout << NewMat[u] << " "; | |
499 | + } | |
500 | + std::cout << endl;*/ | |
501 | + | |
502 | + | |
503 | + //Adding in the code that will allow counts parent combinations for this particular variable | |
504 | + for (int q = 0; q < i; ++q) { | |
505 | + if (NewMat[q] == 1) { | |
506 | + combsparents *= catsi[order[q]]; | |
507 | + } | |
508 | + } | |
509 | + tmp.push_back(combsparents);//made the whole parentset configure for a variable | |
510 | + /*std::cout << "This is combsparents: " << combsparents << endl; | |
511 | + std::cout << endl;*/ | |
512 | + vector <int> hvect; | |
513 | + //hvect tells us which variables are being considered always the last variable is being considered | |
514 | + //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only | |
515 | + //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 > | |
516 | + //for A C hvect will be < 0 2 > | |
517 | + for (int h = 0; h < i; ++h) { | |
518 | + if (NewMat[h] == 1) { | |
519 | + hvect.push_back(h); | |
520 | + } | |
521 | + } | |
522 | + hvect.push_back(numparnts); | |
523 | + size_t shvect = hvect.size(); | |
524 | + //Prints out hvect | |
525 | + /*for (int u = 0; u < shvect; ++u) { | |
526 | + std::cout << hvect[u] << " "; | |
527 | + }*/ | |
528 | + //std::cout << endl; | |
529 | + //Counting the amount of values in the data that have that particular parent combination | |
530 | + vector <int> Nijkovercombos; | |
531 | + for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) { | |
532 | + //(333)Creating a vector that uses the right combination | |
533 | + /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/ | |
534 | + vector <int> Test(shvect, last), maxtest; | |
535 | + for (int p = 0; p < (shvect-1); ++p) { | |
536 | + Test[p] = max_cat[order[hvect[p]]]; | |
537 | + } | |
538 | + maxtest = Test; | |
539 | + for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) { | |
540 | + //std::cout << endl; | |
541 | + /*std::cout << endl; | |
542 | + std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl; | |
543 | + std::cout << "Here comes the Test:" << endl;*/ | |
544 | + //(444)This sets up the process for changing | |
545 | + //NMpos tells me the position of the last non minimum value in the vector | |
546 | + //We want to change when the position is the last position available in the vector | |
547 | + int NMpos = 0, minpos = 0; | |
548 | + for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) { | |
549 | + if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) { | |
550 | + break; | |
551 | + } | |
552 | + } | |
553 | + for (minpos = (shvect - 2); minpos >= 0; --minpos) { | |
554 | + //minpos tells you the position of the last minimum value | |
555 | + if (Test[minpos] == min_cat[order[hvect[minpos]]]) { | |
556 | + break; | |
557 | + } | |
558 | + } | |
559 | + //Prints out Test | |
560 | + /*for (int u = 0; u < shvect; ++u) { | |
561 | + std::cout << Test[u] << " "; | |
562 | + } | |
563 | + std::cout << endl; | |
564 | + std::cout << endl; | |
565 | + std::cout << endl;*/ | |
566 | + //Count how many occurrences of the value are present in the data | |
567 | + int Nijk = 0; | |
568 | + for (int num2size = 0; num2size < tottuples; ++num2size) { | |
569 | + int countcorrect = 0; | |
570 | + for (size_t g = 0; g < Test.size(); ++g) { | |
571 | + //num2size cycles through tuples | |
572 | + //order[hvect[g]] represents the variable in the order that we are considering as a parent | |
573 | + if (DAT[num2size][order[hvect[g]]] == Test[g]) { | |
574 | + countcorrect += 1; | |
575 | + } | |
576 | + } | |
577 | + if (countcorrect == Test.size()) { | |
578 | + Nijk += 1; | |
579 | + } | |
580 | + } | |
581 | + //Nijkovercombos displays data as follows | |
582 | + //it starts with the smallest value for the last variable in hvect | |
583 | + //and the largest values in the first n-1 variables in hvect | |
584 | + //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0 | |
585 | + //count,count,count,count e.g. 13, 2, 2, 3, 4, 10 | |
586 | + Nijkovercombos.push_back(Nijk); | |
587 | + //(666)Now that the values have been calculated find out what the next combination of variables should be | |
588 | + if ((NMpos == -1) & (minpos == (shvect - 2))) { | |
589 | + //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000 | |
590 | + break; | |
591 | + } | |
592 | + if (minpos < NMpos) { | |
593 | + Test[NMpos] = Test[NMpos] - 1; | |
594 | + } | |
595 | + else if (NMpos < minpos) { | |
596 | + Test[NMpos] = Test[NMpos] - 1; | |
597 | + for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) { | |
598 | + Test[filler] = maxtest[filler]; | |
599 | + } | |
600 | + } | |
601 | + } | |
602 | + } | |
603 | + fullNijkvector.push_back(Nijkovercombos); | |
604 | + //(666)Now that the unique values have been calculated find out what the next combination of variables should be | |
605 | + if ((PosOne == (numparnts - 1)) & (SumOnes == it)) { | |
606 | + break; | |
607 | + } | |
608 | + else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) { | |
609 | + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | |
610 | + //NxtOne tells you the position of the next closest number 1 that we would | |
611 | + //like to change the position of (we will call it the important number one) | |
612 | + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | |
613 | + SumOnes3 += NewMat[NxtOne]; | |
614 | + if (SumOnes3 == (SumOnes2 + 1)) { | |
615 | + break; | |
616 | + } | |
617 | + } | |
618 | + if (SumOnes4 == (it - 1)) { | |
619 | + //If all except one of the 1's are found in the last it - 1 columns | |
620 | + for (int x = 0; x < numparnts; ++x) { | |
621 | + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) { | |
622 | + //If | |
623 | + NewMat[x] = 1; | |
624 | + } | |
625 | + else { | |
626 | + NewMat[x] = 0; | |
627 | + } | |
628 | + } | |
629 | + } | |
630 | + else { | |
631 | + for (int x = 0; x < numparnts; ++x) { | |
632 | + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) { | |
633 | + //If the position is that of the first 1 or it falls between the changed number one and the total | |
634 | + //amount of ones that are on that side of the zero 10111 | |
635 | + NewMat[x] = 1; | |
636 | + } | |
637 | + else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) { | |
638 | + //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing | |
639 | + //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right | |
640 | + NewMat[x] = 1; | |
641 | + } | |
642 | + else { | |
643 | + NewMat[x] = 0; | |
644 | + } | |
645 | + } | |
646 | + } | |
647 | + } | |
648 | + else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) { | |
649 | + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | |
650 | + //NxtOne tells you the position of the next closest number 1 that we would | |
651 | + //like to change the position of (we will call it the important number one) | |
652 | + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | |
653 | + SumOnes3 += NewMat[NxtOne]; | |
654 | + if (SumOnes3 == 1) { | |
655 | + break; | |
656 | + } | |
657 | + } | |
658 | + if (it != 1) { | |
659 | + for (int x = 0; x < numparnts; ++x) { | |
660 | + if (x == (NxtOne + 1)) { | |
661 | + NewMat[x] = 1; | |
662 | + } | |
663 | + else if (x == NxtOne) { | |
664 | + NewMat[x] = 0; | |
665 | + } | |
666 | + else if ((NewMat[x] == 1) & (x != NxtOne)) { | |
667 | + NewMat[x] = 1; | |
668 | + } | |
669 | + else { | |
670 | + NewMat[x] = 0; | |
671 | + } | |
672 | + } | |
673 | + } | |
674 | + else { | |
675 | + for (int x = 0; x < numparnts; ++x) { | |
676 | + if ((x == (NxtOne + 1))) { | |
677 | + NewMat[x] = 1; | |
678 | + } | |
679 | + else { | |
680 | + NewMat[x] = 0; | |
681 | + } | |
682 | + } | |
683 | + } | |
684 | + } | |
685 | + } | |
686 | + | |
687 | + } | |
688 | + } | |
689 | + ParentCombos.push_back(tmp); | |
690 | + indexofvar.push_back(tempstring); | |
691 | + } | |
692 | + | |
693 | + } | |
694 | + //std::cout << endl; | |
695 | + std::cout << endl; | |
696 | + //printing out the ParentCombos matrix just created above | |
697 | + /*for (size_t i = 0; i < ParentCombos.size(); ++i) { | |
698 | + for (size_t j = 0; j < ParentCombos[i].size(); ++j) { | |
699 | + std::cout << ParentCombos[i][j] << " "; | |
700 | + } | |
701 | + std::cout << endl; | |
702 | + }*/ | |
703 | + std::cout << endl; | |
704 | + //printing out the fullNijkvector matrix just created above | |
705 | + /*for (size_t i = 0; i < fullNijkvector.size(); ++i) { | |
706 | + for (size_t j = 0; j < fullNijkvector[i].size(); ++j) { | |
707 | + std::cout << fullNijkvector[i][j] << " "; | |
708 | + } | |
709 | + std::cout << endl; | |
710 | + } | |
711 | + std::cout << endl;*/ | |
712 | + //Print out Data | |
713 | + /*for (int i = 0; i < DAT.size(); ++i) { | |
714 | + for (int j = 0; j < DAT[i].size(); ++j) { | |
715 | + std::cout << DAT[i][j] << " "; | |
716 | + } | |
717 | + std::cout << endl; | |
718 | + }*/ | |
719 | + //Print out the families size | |
720 | + /*for (size_t i = 0; i < families.size(); ++i) { | |
721 | + std::cout << families[i] << " "; | |
722 | + }*/ | |
723 | + | |
724 | + //Obtaining the actual score from this information | |
725 | + //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting | |
726 | + //the first variable in the order) | |
727 | + //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha | |
728 | + int posinfull = 0; | |
729 | + //finlogscore is the final score in natural log format | |
730 | + long double finlogscore = 0.0; | |
731 | + vector< vector <double> > vecvarparset; | |
732 | + for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) { | |
733 | + //sumovUialpha is the the sum over all parent sets for a particular variable | |
734 | + long double sumovUialpha = 0.0; | |
735 | + //vector of all values of seclastgamma | |
736 | + vector <double> vec2ndlastgamma; | |
737 | + long double maxseclastgamma; | |
738 | + //Uialpha cycles through all the parent sets for a particular family | |
739 | + for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) { | |
740 | + // nijkprime represents the value of 1/(ri * qi) | |
741 | + long double nijkprime, nijprime; | |
742 | + long double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha]; | |
743 | + | |
744 | + nijprime = 1.0 / (PCs); | |
745 | + nijkprime = 1.0 / (rij * PCs); | |
746 | + //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic | |
747 | + long double seclastgamma = 0.0; | |
748 | + //qi_Uialpha cycles through the combinations for the parents in a set | |
749 | + for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) { | |
750 | + long double lastgamma = 0.0; | |
751 | + long double nij = 0.0; | |
752 | + //countijk cycles through the categories of the variable with a particular order | |
753 | + //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to | |
754 | + //and then find the categories for it | |
755 | + for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) { | |
756 | + long double topy; | |
757 | + //rightcol lets you find the right column/position of the value that you need for a particular category within the | |
758 | + int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]); | |
759 | + nij += fullNijkvector[posinfull][rightcol]; | |
760 | + topy = (nijkprime + fullNijkvector[posinfull][rightcol]); | |
761 | + | |
762 | + //Using boost lgamma function for the product over categories and parent combinations | |
763 | + lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime); | |
764 | + | |
765 | + } | |
766 | + long double boty = nij + nijprime; | |
767 | + seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty); | |
768 | + | |
769 | + } | |
770 | + vec2ndlastgamma.push_back(seclastgamma); | |
771 | + | |
772 | + | |
773 | + | |
774 | + //Calculate sumovUialpha based on the logsumexp concept | |
775 | + if (Uialpha + 1 == families[varinorder]) { | |
776 | + | |
777 | + for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) { | |
778 | + //change the value of maxseclastgamma if new value is larger than the previous value | |
779 | + if (que == 0) { | |
780 | + maxseclastgamma = vec2ndlastgamma[0]; | |
781 | + } | |
782 | + else { | |
783 | + if (maxseclastgamma < vec2ndlastgamma[que]) { | |
784 | + maxseclastgamma = vec2ndlastgamma[que]; | |
785 | + } | |
786 | + } | |
787 | + } | |
788 | + for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) { | |
789 | + sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma); | |
790 | + } | |
791 | + //add info on parent set scores for each variable to this vector of vectors | |
792 | + vecvarparset.push_back(vec2ndlastgamma); | |
793 | + } | |
794 | + | |
795 | + /*std::cout << endl; | |
796 | + std::cout << seclastgamma; | |
797 | + std::cout << endl;*/ | |
798 | + posinfull += 1; | |
799 | + //std::cout << posinfull << endl; | |
800 | + } | |
801 | + finlogscore += log(sumovUialpha) + maxseclastgamma; | |
802 | + } | |
803 | + | |
804 | + vector < vector<string> > parSet; | |
805 | + vector< map <double, string, greater <double> > > parSetScoreSorted; | |
806 | + vector< map <double, string, greater <double> > > strucScore; | |
807 | + | |
808 | + | |
809 | +//Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map. | |
810 | + | |
811 | + for (unsigned i = 0; i < indexofvar.size(); ++i){ | |
812 | + | |
813 | + map <double, string, greater <double> > tempMap; | |
814 | + for (unsigned j=0; j< indexofvar[i].size(); ++j){ | |
815 | + tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j])); | |
816 | + } | |
817 | + parSetScoreSorted.push_back(tempMap); | |
818 | + | |
819 | + } | |
820 | + | |
821 | + | |
822 | + pair <double, string> bestStrScore; | |
823 | + double bestScore = 0; | |
824 | + string bestLable; | |
825 | + | |
826 | + | |
827 | + for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){ | |
828 | + map <double, string> :: iterator itr; | |
829 | + itr = parSetScoreSorted[i].begin(); | |
830 | + bestScore = bestScore + (itr->first); | |
831 | + bestLable = bestLable +(itr->second); | |
832 | + } | |
833 | + | |
834 | + bestStrScore = make_pair(bestScore, bestLable);//This is the best score. | |
835 | + | |
836 | + vector < pair <double, string> > sortedStru;//This store all the structures in the percentage. | |
837 | + vector < vector < pair <double, string > > > deltaC; | |
838 | + | |
839 | + for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ | |
840 | + map <double, string> :: iterator itr0, itr1; | |
841 | + vector < pair <double, string > > tempDelta; | |
842 | + double tempDeltaS; | |
843 | + string tempDeltaL; | |
844 | + itr0 = parSetScoreSorted[l].begin(); | |
845 | + itr1 = parSetScoreSorted[l].begin(); | |
846 | + double tem1=exp(itr1->first), tem2 = exp(itr1->first); | |
847 | + for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){ | |
848 | + tem1 = tem2; | |
849 | + itr1 = ++itr1; | |
850 | + tem2 = tem1 + exp(itr1->first); | |
851 | + double tem = (tem1/tem2)*100; | |
852 | + if(tem <= PERCENT){ | |
853 | + tempDeltaS = (itr1->first)-(itr0->first); | |
854 | + tempDeltaL = itr1->second; | |
855 | + tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL)); | |
856 | + } | |
857 | + } | |
858 | + | |
859 | + deltaC.push_back( tempDelta); | |
860 | + } | |
861 | + | |
862 | + | |
863 | + for(unsigned i=0; i< deltaC.size(); ++i){ | |
864 | + for (unsigned j=0; j< deltaC[i].size(); ++j) | |
865 | + { | |
866 | + double score = bestStrScore.first + deltaC[i][j].first; | |
867 | + string lab = bestStrScore.second; | |
868 | + findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second); | |
869 | + sortedStru.push_back(make_pair(score, lab)); | |
870 | + } | |
871 | + } | |
872 | + | |
873 | + sort(sortedStru.begin(),sortedStru.end(),compareDe); | |
874 | + | |
875 | + | |
876 | + | |
877 | +/* for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ //l means each variable | |
878 | + for (unsigned m = contl; m < parSetScoreSorted[l].size(); ++m){ //m means the number of parents sets | |
879 | + contl =m; | |
880 | + vector < pair <string, double > > delta; // store the different of the highest score and the second highest score | |
881 | + for (unsigned i = l; i < parSetScoreSorted.size(); ++i){ | |
882 | + map <double, string> :: iterator itr0, itr1; | |
883 | + double tempDeltaS; | |
884 | + string tempDeltaL; | |
885 | + itr0 = parSetScoreSorted[i].begin(); | |
886 | + itr1 = itr0; | |
887 | + for (int kr = 0; kr < contl; ++kr){ | |
888 | + itr1 = itr0++; | |
889 | + } | |
890 | + tempDeltaS = (itr1->first)-(itr0->first); | |
891 | + tempDeltaL = int_to_str(i); | |
892 | + delta.push_back(make_pair(tempDeltaL, tempDeltaS)); | |
893 | + } | |
894 | + | |
895 | + sort(delta.begin(),delta.end(),compareI); | |
896 | + | |
897 | + double const stopLimit = PERCENT; | |
898 | + double tempbeforeS = bestStrScore.first, temafterS; | |
899 | + | |
900 | + for (unsigned i = 0; i < delta.size(); ++i){ | |
901 | + pair <double, string > temppair; | |
902 | + double tempLime; | |
903 | + int ind = str_to_int(delta[i].first); | |
904 | + string s = parSetScoreSorted[ind].begin()->second; | |
905 | + string sRep = (++parSetScoreSorted[ind].begin())->second; | |
906 | + temppair = bestStrScore; | |
907 | + findAndReplaceAll(temppair.second, s, sRep); | |
908 | + temppair.first = temppair.first - delta[i].second; | |
909 | + | |
910 | + temafterS = logAB(tempbeforeS, temppair.first); | |
911 | + | |
912 | + tempLime = persentageXofY(temafterS, tempbeforeS) ; | |
913 | + | |
914 | + tempbeforeS = temafterS; | |
915 | + | |
916 | + if(tempLime > stopLimit){ | |
917 | + goto finish; | |
918 | + } | |
919 | + | |
920 | + sortedStru.push_back(temppair); | |
921 | + | |
922 | + // std::cout << ind << " "<< s << " "<< sRep << " "<< temppair.first << " " << temppair.second <<endl; | |
923 | + // std::cout << temppair.first << " " << temppair.second <<endl; | |
924 | + // std::cout << std::endl; | |
925 | + | |
926 | + } | |
927 | + | |
928 | + } | |
929 | + | |
930 | + } | |
931 | + | |
932 | + finish:*/ | |
933 | + | |
934 | + | |
935 | + std::cout << std::endl; | |
936 | + std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl; | |
937 | + std::cout << std::endl; | |
938 | + | |
939 | + | |
940 | + cout << "Best several structures are:" << endl; | |
941 | + cout << bestScore << " : " << bestLable << endl; | |
942 | + | |
943 | + for (unsigned i=0; i< sortedStru.size(); ++i ) | |
944 | + { | |
945 | + cout << sortedStru[i].first << " : " << sortedStru[i].second << endl; | |
946 | + } | |
947 | + | |
948 | + std::cout << std::endl; | |
949 | + std::cout << std::endl; | |
950 | + | |
951 | + for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ | |
952 | + map <double, string> :: iterator itr; | |
953 | + std::cout << "For variable "<< i <<":"<< endl; | |
954 | + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr) | |
955 | + { | |
956 | + cout << itr->second << " : " << itr->first << "; "; | |
957 | + } | |
958 | + cout << endl; | |
959 | + cout << endl; | |
960 | + | |
961 | + } | |
962 | + | |
963 | + | |
964 | +/* ofstream myfile; | |
965 | + myfile.open("/home/zgong001/Documents/SprinklerDataset/bestStructure.txt"); | |
966 | + if (myfile.is_open()) | |
967 | + { | |
968 | + myfile << bestLable << " : " << bestScore << sortedStru.size(); | |
969 | + myfile << "\n"; | |
970 | + | |
971 | + for(unsigned i=0; i< sortedStru.size(); ++i){ | |
972 | + myfile << sortedStru[i].second << " : " << sortedStru[i].first; | |
973 | + myfile << "\n"; | |
974 | + } | |
975 | + } | |
976 | + else cout << "Unable to open file"; | |
977 | + myfile.close();*/ | |
978 | + | |
979 | + | |
980 | + //time after completion | |
981 | + time_t later = time(0); | |
982 | + char* dt_later = ctime(&later); | |
983 | + std::cout << "The local date and time is: " << dt_later << std::endl; | |
984 | + | |
985 | + std::cout << std::endl; | |
986 | + std::cout << std::endl; | |
987 | + | |
988 | + | |
989 | + std::cin.clear(); | |
990 | + std::cin.ignore(); | |
991 | + std::cin.get(); | |
992 | + return 0; | |
993 | +} |