Commit 53863fc1fc8d2efea4e4e5ed2a81265a6490022c
1 parent
7f982ea62a
Exists in
master
Get structures from orders
Showing
1 changed file
with
1030 additions
and
0 deletions
Show diff stats
GetStructure.cpp
File was created | 1 | #include <string> | |
2 | #include <vector> | ||
3 | #include <numeric> | ||
4 | #include <fstream> | ||
5 | #include <iterator> | ||
6 | #include <iostream> | ||
7 | #include <utility> | ||
8 | #include <iomanip> | ||
9 | #include <ctime> | ||
10 | #include <sstream> | ||
11 | #include <boost/math/special_functions.hpp> | ||
12 | #include <boost/lexical_cast.hpp> | ||
13 | #include <boost/algorithm/string/replace.hpp> | ||
14 | #include <boost/multiprecision/cpp_int.hpp> | ||
15 | |||
16 | #include <stdio.h> | ||
17 | #include <math.h> | ||
18 | #include <algorithm> // std::find | ||
19 | #include <map> | ||
20 | #include <iterator> | ||
21 | #include <bits/stdc++.h> | ||
22 | |||
23 | using namespace std; | ||
24 | |||
25 | //Function for calculating the binomial coefficient | ||
26 | boost::multiprecision::cpp_int BinomialCoefficient(unsigned int n, unsigned int k) { | ||
27 | if (k == 0) { return 1; } | ||
28 | else { return (n * BinomialCoefficient(n - 1, k - 1)) / k; } | ||
29 | } | ||
30 | struct compare { | ||
31 | bool operator()(const std::string& first, const std::string& second) { | ||
32 | if(first.size() == second.size()) | ||
33 | return first < second; | ||
34 | else | ||
35 | return first.size() < second.size(); | ||
36 | } | ||
37 | }; | ||
38 | |||
39 | string int_to_str(int num) | ||
40 | { | ||
41 | stringstream ss; | ||
42 | |||
43 | ss << num; | ||
44 | |||
45 | return ss.str(); | ||
46 | }; | ||
47 | |||
48 | |||
49 | int str_to_int(string st) | ||
50 | { | ||
51 | int result; | ||
52 | |||
53 | stringstream(st) >> result; | ||
54 | |||
55 | return result; | ||
56 | }; | ||
57 | |||
58 | double str2doub(std::string str) | ||
59 | { | ||
60 | double d; | ||
61 | std::stringstream(str) >> d; | ||
62 | return d; | ||
63 | } | ||
64 | |||
65 | bool compareI(const pair<string, double>&i, const pair<string, double>&j) | ||
66 | { | ||
67 | return i.second < j.second; | ||
68 | } | ||
69 | |||
70 | bool compareD(const pair<string, double>&i, const pair<string, double>&j) | ||
71 | { | ||
72 | return i.second > j.second; | ||
73 | } | ||
74 | |||
75 | bool compareIn(const pair<double, string>&i, const pair<double, string>&j) | ||
76 | { | ||
77 | return i.first < j.first; | ||
78 | } | ||
79 | |||
80 | bool compareDe(const pair<double, string>&i, const pair<double, string>&j) | ||
81 | { | ||
82 | return i.first > j.first; | ||
83 | } | ||
84 | |||
85 | double logAB (double x, double y) | ||
86 | { | ||
87 | double result; | ||
88 | double maxVal = max(x,y); | ||
89 | |||
90 | if(maxVal == x) | ||
91 | { | ||
92 | result = maxVal + log(1+exp(y-maxVal)); | ||
93 | } | ||
94 | else | ||
95 | { | ||
96 | result = maxVal + log(1+exp(x-maxVal)); | ||
97 | } | ||
98 | return result; | ||
99 | } | ||
100 | |||
101 | double persentageXofY (double newS, double oldS) | ||
102 | { | ||
103 | double result; | ||
104 | result = exp(oldS-newS)*100; | ||
105 | return result; | ||
106 | } | ||
107 | |||
108 | void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr) | ||
109 | { | ||
110 | // Get the first occurrence | ||
111 | size_t pos = data.find(toSearch); | ||
112 | |||
113 | // Repeat till end is reached | ||
114 | while( pos != std::string::npos) | ||
115 | { | ||
116 | // Replace this occurrence of Sub String | ||
117 | data.replace(pos, toSearch.size(), replaceStr); | ||
118 | // Get the next occurrence from the current position | ||
119 | pos =data.find(toSearch, pos + toSearch.size()); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | string findMB(string structures, string variable){ | ||
124 | string MB; | ||
125 | |||
126 | |||
127 | return MB; | ||
128 | } | ||
129 | |||
130 | int main() { | ||
131 | |||
132 | string FILENAME; // Downloads/D50C9v.txt | ||
133 | int COLS; // 9 | ||
134 | vector <int> max_cat; // 1 2 1 3 2 1 2 2 3 | ||
135 | vector <int> min_cat; // 0 0 0 0 0 0 0 0 0 | ||
136 | unsigned int maxparents; // 4 | ||
137 | vector <int> order; //2 6 8 4 1 0 3 5 7 | ||
138 | double PERCENT; | ||
139 | string filepath; // output file name | ||
140 | |||
141 | vector<string> settings; | ||
142 | vector<bool> settingsFlag; | ||
143 | |||
144 | settings.push_back("DataFile"); | ||
145 | settingsFlag.push_back(false); | ||
146 | settings.push_back("TotalVariables"); | ||
147 | settingsFlag.push_back(false); | ||
148 | settings.push_back("MaximumCategory"); | ||
149 | settingsFlag.push_back(false); | ||
150 | settings.push_back("MinimumCategory"); | ||
151 | settingsFlag.push_back(false); | ||
152 | settings.push_back("MaximumParents"); | ||
153 | settingsFlag.push_back(false); | ||
154 | settings.push_back("Order"); | ||
155 | settingsFlag.push_back(false); | ||
156 | settings.push_back("Percentage"); | ||
157 | settingsFlag.push_back(false); | ||
158 | settings.push_back("OutputFileName"); | ||
159 | settingsFlag.push_back(false); | ||
160 | |||
161 | fstream infile; | ||
162 | string ConfigFile; | ||
163 | std::cout << "What is the location of the configuration file that you edited?: "; | ||
164 | std::cin >> ConfigFile; | ||
165 | std::cout << std::endl; | ||
166 | //string ConfigFile = "/home/efraingonzalez0/Downloads/new.config"; | ||
167 | infile.open(ConfigFile.c_str(),ios::in); | ||
168 | vector<string> line; | ||
169 | if(infile.is_open()) { | ||
170 | std::cout << "Configuration file correctly opened" << std::endl; | ||
171 | while(!infile.eof()) { | ||
172 | string tmp; | ||
173 | getline(infile,tmp); | ||
174 | line.push_back(tmp); | ||
175 | } | ||
176 | } | ||
177 | else { | ||
178 | std::cout << "Unable to open configuration file" << std::endl; | ||
179 | exit(0); | ||
180 | } | ||
181 | |||
182 | infile.close(); | ||
183 | line.erase(line.end()); | ||
184 | for(size_t k = 0; k < settings.size(); ++k) { | ||
185 | for(size_t i = 0; i < line.size(); ++i) { | ||
186 | size_t foundit = line[i].find(settings[k]); | ||
187 | if (foundit != std::string::npos) { | ||
188 | //find position of "=" | ||
189 | unsigned int poseq = line[i].find("="); | ||
190 | unsigned int posneqfir,posneqlas; | ||
191 | //first position of relevant string | ||
192 | for(unsigned int j = poseq + 1; j < line[i].length(); ++j) { | ||
193 | if(line[i].at(j) != ' ') { | ||
194 | posneqfir = j; | ||
195 | break; | ||
196 | } | ||
197 | } | ||
198 | //last position of relevant string | ||
199 | for(unsigned int j2 = line[i].length() - 1; j2 >= posneqfir; ++j2) { | ||
200 | if((line[i].at(j2) != ' ') && (line[i].at(j2) != '\n')){ | ||
201 | posneqlas = j2; | ||
202 | break; | ||
203 | } | ||
204 | } | ||
205 | |||
206 | if(settings[k] == "DataFile") { | ||
207 | FILENAME = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | ||
208 | } | ||
209 | else if(settings[k] == "TotalVariables") { | ||
210 | COLS = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); | ||
211 | } | ||
212 | else if(settings[k] == "MaximumCategory") {//still need to edit | ||
213 | string max_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | ||
214 | std::stringstream macs(max_cat_string); | ||
215 | int macsti; | ||
216 | while( macs >> macsti) { | ||
217 | max_cat.push_back(macsti); | ||
218 | } | ||
219 | } | ||
220 | else if(settings[k] == "MinimumCategory") {//still need to edit | ||
221 | string min_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | ||
222 | std::stringstream mics(min_cat_string); | ||
223 | int micsti; | ||
224 | while( mics >> micsti) { | ||
225 | min_cat.push_back(micsti); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | else if(settings[k] == "MaximumParents") { | ||
230 | maxparents = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); | ||
231 | } | ||
232 | |||
233 | else if(settings[k] == "Order") {//still need to edit | ||
234 | string st_ord_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | ||
235 | std::stringstream starordr(st_ord_string); | ||
236 | int StOrti; | ||
237 | while( starordr >> StOrti) { | ||
238 | order.push_back(StOrti); | ||
239 | } | ||
240 | } | ||
241 | |||
242 | else if(settings[k] == "Percentage") { | ||
243 | PERCENT = str2doub(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); | ||
244 | } | ||
245 | |||
246 | else if(settings[k] == "OutputFileName") { | ||
247 | filepath = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | ||
248 | } | ||
249 | |||
250 | settingsFlag[k] = true; | ||
251 | break; | ||
252 | } | ||
253 | } | ||
254 | } | ||
255 | |||
256 | for(int k=0;k<settings.size();k++) { | ||
257 | if(!settingsFlag[k]) { | ||
258 | std::cout << "Error: "<< settings[k] << " is not configured." << std::endl; | ||
259 | exit(0); | ||
260 | } | ||
261 | else if(max_cat.size() != COLS) { | ||
262 | std::cout << "Error: Size of MaximumCategory and the number of TotalVariables do not match" << std::endl; | ||
263 | exit(0); | ||
264 | } | ||
265 | else if(min_cat.size() != COLS) { | ||
266 | std::cout << "Error: Size of MinimumCategory and the number of TotalVariables do not match" << std::endl; | ||
267 | exit(0); | ||
268 | } | ||
269 | else if(max_cat.size() != min_cat.size()) { | ||
270 | std::cout << "Error: Size of MaximumCategory and MinimumCategory do not match" << std::endl; | ||
271 | exit(0); | ||
272 | } | ||
273 | else if(PERCENT > 100.0){ | ||
274 | std::cout << "Error: Percentage may not be larger than 100.0 which represents 100%. If you need a difference larger than 100% then use a negative number." << std::endl; | ||
275 | exit(0); | ||
276 | } | ||
277 | } | ||
278 | |||
279 | |||
280 | |||
281 | fstream file; | ||
282 | std::cout << endl; | ||
283 | vector < vector <int> > DAT; // 2d array as a vector of vectors | ||
284 | vector <int> rowVector(COLS); // vector to add into 'array' (represents a row) | ||
285 | int row = 0; // Row counter | ||
286 | |||
287 | // Read file | ||
288 | file.open(FILENAME.c_str(), ios::in); // Open file | ||
289 | if (file.is_open()) { // If file has correctly opened... | ||
290 | // Output debug message | ||
291 | cout << "File correctly opened" << endl; | ||
292 | |||
293 | // Dynamically store data into array | ||
294 | while (file.good()) { // ... and while there are no errors, | ||
295 | DAT.push_back(rowVector); // add a new row, | ||
296 | for (int col = 0; col<COLS; col++) { | ||
297 | file >> DAT[row][col]; // fill the row with col elements | ||
298 | } | ||
299 | row++; // Keep track of actual row | ||
300 | } | ||
301 | } | ||
302 | else cout << "Unable to open file" << endl; | ||
303 | file.close(); | ||
304 | DAT.erase(DAT.end()); | ||
305 | |||
306 | size_t totvars = DAT[1].size(); //column number | ||
307 | size_t tottuples = DAT.size();//row number | ||
308 | |||
309 | |||
310 | std::cout << endl; | ||
311 | |||
312 | |||
313 | //Time that program begins | ||
314 | time_t start = time(0); | ||
315 | char* dt_start = ctime(&start); | ||
316 | std::cout << "The local date and time is: " << dt_start << std::endl; | ||
317 | |||
318 | |||
319 | //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category: | ||
320 | //categories in i | ||
321 | vector <int> catsi; | ||
322 | //total combinations of variables | ||
323 | //int totcombos = 1; | ||
324 | //how many catagori for every variable | ||
325 | for ( int i = 0; i < totvars; ++i) { | ||
326 | catsi.push_back((max_cat[i] - min_cat[i]) + 1); | ||
327 | //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1); | ||
328 | } | ||
329 | |||
330 | //print out catsi | ||
331 | for (size_t i = 0; i < catsi.size();++i) { | ||
332 | std::cout << catsi[i] << " "; | ||
333 | } | ||
334 | std::cout << endl; | ||
335 | |||
336 | //Total Families Ui,alpha for a particular variable in the order | ||
337 | //Total Families Ui,alpha for a particular variable in the order | ||
338 | vector <boost::multiprecision::cpp_int> families; | ||
339 | for (unsigned int i = 0; i < totvars; ++i) { | ||
340 | int numparents = i; | ||
341 | if (numparents == 0) { | ||
342 | families.push_back(1); | ||
343 | } | ||
344 | else { | ||
345 | |||
346 | boost::multiprecision::cpp_int numfams = 0; | ||
347 | for (unsigned int j = 0; j <= i; ++j) { | ||
348 | if (j <= maxparents) { | ||
349 | unsigned long long jFactorial = 1; | ||
350 | unsigned long long ijFactorial = 1; | ||
351 | unsigned long long iFactorial = 1; | ||
352 | //Calculate j! | ||
353 | for (unsigned int g = 0; g <= j; ++g) { | ||
354 | if (g != 0) { | ||
355 | jFactorial *= g; | ||
356 | } | ||
357 | } | ||
358 | //Calculate i! | ||
359 | for (unsigned int g = 0; g <= i; ++g) { | ||
360 | if (g != 0) { | ||
361 | iFactorial *= g; | ||
362 | } | ||
363 | } | ||
364 | //Calculate (i-j)! | ||
365 | for (unsigned int g = 0; g <= (i - j); ++g) { | ||
366 | if (g != 0) { | ||
367 | ijFactorial *= g; | ||
368 | } | ||
369 | } | ||
370 | numfams += BinomialCoefficient(i,j); | ||
371 | } | ||
372 | else { | ||
373 | break; | ||
374 | } | ||
375 | } | ||
376 | families.push_back(numfams); | ||
377 | } | ||
378 | } | ||
379 | |||
380 | |||
381 | //How many parent combinations for each step? As well as there counts | ||
382 | vector< vector <int> > ParentCombos; | ||
383 | vector< vector <int> > fullNijkvector; | ||
384 | vector< vector <string> > indexofvar; // This is label or index for each variable. | ||
385 | for (size_t i = 0; i < order.size(); ++i) { | ||
386 | //i represents the order of the variable | ||
387 | if (i == 0) { | ||
388 | vector <int> tmp,Nijkovercombos1; | ||
389 | vector <string> tempstring; | ||
390 | // tempstring.push_back("[0]"); | ||
391 | tempstring.push_back("[" + int_to_str(order[0])+ "]"); | ||
392 | tmp.push_back(1); | ||
393 | ParentCombos.push_back(tmp); | ||
394 | //counting the amount of times that a value of the first variable in the order occurs | ||
395 | //this starts with the maximum value for that variable | ||
396 | for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) { | ||
397 | //hello cycles through the categories of the first variable in the order | ||
398 | int Nijk1 = 0; | ||
399 | //green cycles through tuples | ||
400 | for (int green = 0; green < tottuples; ++green){ | ||
401 | if (DAT[green][order[0]] == hello) { | ||
402 | Nijk1 += 1; | ||
403 | } | ||
404 | } | ||
405 | Nijkovercombos1.push_back(Nijk1); | ||
406 | } | ||
407 | fullNijkvector.push_back(Nijkovercombos1); | ||
408 | indexofvar.push_back(tempstring); | ||
409 | |||
410 | } | ||
411 | else { | ||
412 | vector <int> tmp,Nijkovercombos1; | ||
413 | vector <string> tempstring; | ||
414 | // string tempa = "[" + int_to_str(i) +"]"; | ||
415 | string tempa = "[" + int_to_str(order[i]) +"]"; | ||
416 | tempstring.push_back(tempa); | ||
417 | tmp.push_back(1); | ||
418 | int numparnts = i; | ||
419 | //counting the amount of times that a value of the last variable in the current order size occurs | ||
420 | //this starts with the maximum value for that variable | ||
421 | for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) { | ||
422 | //hello cycles through the categories of the first variable in the order | ||
423 | int Nijk1 = 0; | ||
424 | //green cycles through tuples | ||
425 | for (int green = 0; green < tottuples; ++green) { | ||
426 | if (DAT[green][order[numparnts]] == hello) { | ||
427 | Nijk1 += 1; | ||
428 | } | ||
429 | } | ||
430 | Nijkovercombos1.push_back(Nijk1); | ||
431 | } | ||
432 | fullNijkvector.push_back(Nijkovercombos1); | ||
433 | |||
434 | //j representing the number of parents | ||
435 | for (int it = 1; it <= numparnts; ++it) { | ||
436 | //(333)Creating a vector that uses the right combination | ||
437 | boost::multiprecision::cpp_int Nloopy = 0; | ||
438 | //, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1; | ||
439 | /*std::cout << "This is for " << numparnts << " choose " << it << endl; | ||
440 | std::cout << "The iteration number is: " << it << endl;*/ | ||
441 | //Accounting for the limit of parent quantity | ||
442 | if (it > maxparents) { | ||
443 | break; | ||
444 | } | ||
445 | else { | ||
446 | vector <int> NewMat(numparnts, 0); | ||
447 | for (int p = 0; p < it; ++p) { | ||
448 | NewMat[p] = 1; | ||
449 | } | ||
450 | /*for (int g = 2; g <= numparnts; ++g) { | ||
451 | NcolFactorial *= g; | ||
452 | } | ||
453 | for (int g = 2; g <= it; ++g) { | ||
454 | iFactorial *= g; | ||
455 | } | ||
456 | for (int g = 2; g <= (numparnts - it); ++g) { | ||
457 | NiFactorial *= g; | ||
458 | }*/ | ||
459 | //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts | ||
460 | Nloopy = BinomialCoefficient(numparnts,it); | ||
461 | for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) { | ||
462 | int combsparents = 1; | ||
463 | vector <int> parsetv; | ||
464 | for (int par = 0; par < NewMat.size(); ++par){ | ||
465 | if (NewMat[par] == 1){ | ||
466 | parsetv.push_back(par); | ||
467 | } | ||
468 | } | ||
469 | |||
470 | string tempstring2; | ||
471 | |||
472 | for (int par2 = 0; par2 < parsetv.size(); ++par2){ | ||
473 | if(par2+1==parsetv.size()){ | ||
474 | // tempstring2 = tempstring2 + int_to_str(parsetv[par2]); | ||
475 | tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]]); | ||
476 | //tempstring2 = tempstring2 +","+"|" + int_to_str(i); | ||
477 | // tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]"; | ||
478 | tempstring2 = "[" + int_to_str(order[i])+"|"+tempstring2 +"]"; | ||
479 | } | ||
480 | else{ | ||
481 | //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+","; | ||
482 | // tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":"; | ||
483 | tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]])+":"; | ||
484 | } | ||
485 | |||
486 | |||
487 | } | ||
488 | tempstring.push_back(tempstring2); | ||
489 | |||
490 | /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl; | ||
491 | std::cout << "Here comes the NewMat:" << endl;*/ | ||
492 | //(444)This sets up the process for changing | ||
493 | //PosOne tells me the position of the last one in the vector | ||
494 | //We want to change when the position is the last position available in the vector | ||
495 | int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0; | ||
496 | int SumOnes3 = 0, SumOnes4 = 0, SumY = 0; | ||
497 | for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) { | ||
498 | if (NewMat[PosOne] == 1) { | ||
499 | break; | ||
500 | } | ||
501 | } | ||
502 | for (int y = (numparnts - 1); y >= (numparnts - it); --y) { | ||
503 | //SumOnes tells you the amount of ones in the last i columns | ||
504 | //These are the last columns being considered | ||
505 | SumOnes += NewMat[y]; | ||
506 | } | ||
507 | for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) { | ||
508 | //SumOnes2 tells you the amount of ones before you reach the next zero | ||
509 | //PosOne2 keeps track of the position of the coming zero | ||
510 | SumOnes2 += NewMat[PosOne2]; | ||
511 | if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) { | ||
512 | break; | ||
513 | } | ||
514 | } | ||
515 | for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) { | ||
516 | //FrstOne tells you the position of the first number 1 starting from the left hand side | ||
517 | if (NewMat[FrstOne] == 1) { | ||
518 | break; | ||
519 | } | ||
520 | } | ||
521 | for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) { | ||
522 | //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions | ||
523 | SumOnes4 += NewMat[x]; | ||
524 | } | ||
525 | //Prints out NewMat | ||
526 | /*for (int u = 0; u < numparnts; ++u) { | ||
527 | std::cout << NewMat[u] << " "; | ||
528 | } | ||
529 | std::cout << endl;*/ | ||
530 | |||
531 | |||
532 | //Adding in the code that will allow counts parent combinations for this particular variable | ||
533 | for (int q = 0; q < i; ++q) { | ||
534 | if (NewMat[q] == 1) { | ||
535 | combsparents *= catsi[order[q]]; | ||
536 | } | ||
537 | } | ||
538 | tmp.push_back(combsparents);//made the whole parentset configure for a variable | ||
539 | /*std::cout << "This is combsparents: " << combsparents << endl; | ||
540 | std::cout << endl;*/ | ||
541 | vector <int> hvect; | ||
542 | //hvect tells us which variables are being considered always the last variable is being considered | ||
543 | //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only | ||
544 | //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 > | ||
545 | //for A C hvect will be < 0 2 > | ||
546 | for (int h = 0; h < i; ++h) { | ||
547 | if (NewMat[h] == 1) { | ||
548 | hvect.push_back(h); | ||
549 | } | ||
550 | } | ||
551 | hvect.push_back(numparnts); | ||
552 | size_t shvect = hvect.size(); | ||
553 | //Prints out hvect | ||
554 | /*for (int u = 0; u < shvect; ++u) { | ||
555 | std::cout << hvect[u] << " "; | ||
556 | }*/ | ||
557 | //std::cout << endl; | ||
558 | //Counting the amount of values in the data that have that particular parent combination | ||
559 | vector <int> Nijkovercombos; | ||
560 | for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) { | ||
561 | //(333)Creating a vector that uses the right combination | ||
562 | /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/ | ||
563 | vector <int> Test(shvect, last), maxtest; | ||
564 | for (int p = 0; p < (shvect-1); ++p) { | ||
565 | Test[p] = max_cat[order[hvect[p]]]; | ||
566 | } | ||
567 | maxtest = Test; | ||
568 | for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) { | ||
569 | //std::cout << endl; | ||
570 | /*std::cout << endl; | ||
571 | std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl; | ||
572 | std::cout << "Here comes the Test:" << endl;*/ | ||
573 | //(444)This sets up the process for changing | ||
574 | //NMpos tells me the position of the last non minimum value in the vector | ||
575 | //We want to change when the position is the last position available in the vector | ||
576 | int NMpos = 0, minpos = 0; | ||
577 | for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) { | ||
578 | if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) { | ||
579 | break; | ||
580 | } | ||
581 | } | ||
582 | for (minpos = (shvect - 2); minpos >= 0; --minpos) { | ||
583 | //minpos tells you the position of the last minimum value | ||
584 | if (Test[minpos] == min_cat[order[hvect[minpos]]]) { | ||
585 | break; | ||
586 | } | ||
587 | } | ||
588 | //Prints out Test | ||
589 | /*for (int u = 0; u < shvect; ++u) { | ||
590 | std::cout << Test[u] << " "; | ||
591 | } | ||
592 | std::cout << endl; | ||
593 | std::cout << endl; | ||
594 | std::cout << endl;*/ | ||
595 | //Count how many occurrences of the value are present in the data | ||
596 | int Nijk = 0; | ||
597 | for (int num2size = 0; num2size < tottuples; ++num2size) { | ||
598 | int countcorrect = 0; | ||
599 | for (size_t g = 0; g < Test.size(); ++g) { | ||
600 | //num2size cycles through tuples | ||
601 | //order[hvect[g]] represents the variable in the order that we are considering as a parent | ||
602 | if (DAT[num2size][order[hvect[g]]] == Test[g]) { | ||
603 | countcorrect += 1; | ||
604 | } | ||
605 | } | ||
606 | if (countcorrect == Test.size()) { | ||
607 | Nijk += 1; | ||
608 | } | ||
609 | } | ||
610 | //Nijkovercombos displays data as follows | ||
611 | //it starts with the smallest value for the last variable in hvect | ||
612 | //and the largest values in the first n-1 variables in hvect | ||
613 | //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0 | ||
614 | //count,count,count,count e.g. 13, 2, 2, 3, 4, 10 | ||
615 | Nijkovercombos.push_back(Nijk); | ||
616 | //(666)Now that the values have been calculated find out what the next combination of variables should be | ||
617 | if ((NMpos == -1) & (minpos == (shvect - 2))) { | ||
618 | //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000 | ||
619 | break; | ||
620 | } | ||
621 | if (minpos < NMpos) { | ||
622 | Test[NMpos] = Test[NMpos] - 1; | ||
623 | } | ||
624 | else if (NMpos < minpos) { | ||
625 | Test[NMpos] = Test[NMpos] - 1; | ||
626 | for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) { | ||
627 | Test[filler] = maxtest[filler]; | ||
628 | } | ||
629 | } | ||
630 | } | ||
631 | } | ||
632 | fullNijkvector.push_back(Nijkovercombos); | ||
633 | //(666)Now that the unique values have been calculated find out what the next combination of variables should be | ||
634 | if ((PosOne == (numparnts - 1)) & (SumOnes == it)) { | ||
635 | break; | ||
636 | } | ||
637 | else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) { | ||
638 | for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | ||
639 | //NxtOne tells you the position of the next closest number 1 that we would | ||
640 | //like to change the position of (we will call it the important number one) | ||
641 | //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | ||
642 | SumOnes3 += NewMat[NxtOne]; | ||
643 | if (SumOnes3 == (SumOnes2 + 1)) { | ||
644 | break; | ||
645 | } | ||
646 | } | ||
647 | if (SumOnes4 == (it - 1)) { | ||
648 | //If all except one of the 1's are found in the last it - 1 columns | ||
649 | for (int x = 0; x < numparnts; ++x) { | ||
650 | if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) { | ||
651 | //If | ||
652 | NewMat[x] = 1; | ||
653 | } | ||
654 | else { | ||
655 | NewMat[x] = 0; | ||
656 | } | ||
657 | } | ||
658 | } | ||
659 | else { | ||
660 | for (int x = 0; x < numparnts; ++x) { | ||
661 | if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) { | ||
662 | //If the position is that of the first 1 or it falls between the changed number one and the total | ||
663 | //amount of ones that are on that side of the zero 10111 | ||
664 | NewMat[x] = 1; | ||
665 | } | ||
666 | else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) { | ||
667 | //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing | ||
668 | //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right | ||
669 | NewMat[x] = 1; | ||
670 | } | ||
671 | else { | ||
672 | NewMat[x] = 0; | ||
673 | } | ||
674 | } | ||
675 | } | ||
676 | } | ||
677 | else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) { | ||
678 | for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | ||
679 | //NxtOne tells you the position of the next closest number 1 that we would | ||
680 | //like to change the position of (we will call it the important number one) | ||
681 | //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | ||
682 | SumOnes3 += NewMat[NxtOne]; | ||
683 | if (SumOnes3 == 1) { | ||
684 | break; | ||
685 | } | ||
686 | } | ||
687 | if (it != 1) { | ||
688 | for (int x = 0; x < numparnts; ++x) { | ||
689 | if (x == (NxtOne + 1)) { | ||
690 | NewMat[x] = 1; | ||
691 | } | ||
692 | else if (x == NxtOne) { | ||
693 | NewMat[x] = 0; | ||
694 | } | ||
695 | else if ((NewMat[x] == 1) & (x != NxtOne)) { | ||
696 | NewMat[x] = 1; | ||
697 | } | ||
698 | else { | ||
699 | NewMat[x] = 0; | ||
700 | } | ||
701 | } | ||
702 | } | ||
703 | else { | ||
704 | for (int x = 0; x < numparnts; ++x) { | ||
705 | if ((x == (NxtOne + 1))) { | ||
706 | NewMat[x] = 1; | ||
707 | } | ||
708 | else { | ||
709 | NewMat[x] = 0; | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | } | ||
714 | } | ||
715 | |||
716 | } | ||
717 | } | ||
718 | ParentCombos.push_back(tmp); | ||
719 | indexofvar.push_back(tempstring); | ||
720 | } | ||
721 | |||
722 | } | ||
723 | //std::cout << endl; | ||
724 | std::cout << endl; | ||
725 | //printing out the ParentCombos matrix just created above | ||
726 | /*for (size_t i = 0; i < ParentCombos.size(); ++i) { | ||
727 | for (size_t j = 0; j < ParentCombos[i].size(); ++j) { | ||
728 | std::cout << ParentCombos[i][j] << " "; | ||
729 | } | ||
730 | std::cout << endl; | ||
731 | }*/ | ||
732 | std::cout << endl; | ||
733 | //printing out the fullNijkvector matrix just created above | ||
734 | /*for (size_t i = 0; i < fullNijkvector.size(); ++i) { | ||
735 | for (size_t j = 0; j < fullNijkvector[i].size(); ++j) { | ||
736 | std::cout << fullNijkvector[i][j] << " "; | ||
737 | } | ||
738 | std::cout << endl; | ||
739 | } | ||
740 | std::cout << endl;*/ | ||
741 | //Print out Data | ||
742 | /*for (int i = 0; i < DAT.size(); ++i) { | ||
743 | for (int j = 0; j < DAT[i].size(); ++j) { | ||
744 | std::cout << DAT[i][j] << " "; | ||
745 | } | ||
746 | std::cout << endl; | ||
747 | }*/ | ||
748 | //Print out the families size | ||
749 | /*for (size_t i = 0; i < families.size(); ++i) { | ||
750 | std::cout << families[i] << " "; | ||
751 | }*/ | ||
752 | |||
753 | //Obtaining the actual score from this information | ||
754 | //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting | ||
755 | //the first variable in the order) | ||
756 | //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha | ||
757 | int posinfull = 0; | ||
758 | //finlogscore is the final score in natural log format | ||
759 | double finlogscore = 0.0; | ||
760 | vector< vector <double> > vecvarparset; | ||
761 | for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) { | ||
762 | //sumovUialpha is the the sum over all parent sets for a particular variable | ||
763 | double sumovUialpha = 0.0; | ||
764 | //vector of all values of seclastgamma | ||
765 | vector <double> vec2ndlastgamma; | ||
766 | double maxseclastgamma; | ||
767 | //Uialpha cycles through all the parent sets for a particular family | ||
768 | for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) { | ||
769 | // nijkprime represents the value of 1/(ri * qi) | ||
770 | double nijkprime, nijprime; | ||
771 | double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha]; | ||
772 | |||
773 | nijprime = 1.0 / (PCs); | ||
774 | nijkprime = 1.0 / (rij * PCs); | ||
775 | //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic | ||
776 | double seclastgamma = 0.0; | ||
777 | //qi_Uialpha cycles through the combinations for the parents in a set | ||
778 | for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) { | ||
779 | double lastgamma = 0.0; | ||
780 | double nij = 0.0; | ||
781 | //countijk cycles through the categories of the variable with a particular order | ||
782 | //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to | ||
783 | //and then find the categories for it | ||
784 | for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) { | ||
785 | double topy; | ||
786 | //rightcol lets you find the right column/position of the value that you need for a particular category within the | ||
787 | int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]); | ||
788 | nij += fullNijkvector[posinfull][rightcol]; | ||
789 | topy = (nijkprime + fullNijkvector[posinfull][rightcol]); | ||
790 | |||
791 | //Using boost lgamma function for the product over categories and parent combinations | ||
792 | lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime); | ||
793 | |||
794 | } | ||
795 | double boty = nij + nijprime; | ||
796 | seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty); | ||
797 | |||
798 | } | ||
799 | vec2ndlastgamma.push_back(seclastgamma); | ||
800 | |||
801 | |||
802 | |||
803 | //Calculate sumovUialpha based on the logsumexp concept | ||
804 | if (Uialpha + 1 == families[varinorder]) { | ||
805 | |||
806 | for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) { | ||
807 | //change the value of maxseclastgamma if new value is larger than the previous value | ||
808 | if (que == 0) { | ||
809 | maxseclastgamma = vec2ndlastgamma[0]; | ||
810 | } | ||
811 | else { | ||
812 | if (maxseclastgamma < vec2ndlastgamma[que]) { | ||
813 | maxseclastgamma = vec2ndlastgamma[que]; | ||
814 | } | ||
815 | } | ||
816 | } | ||
817 | for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) { | ||
818 | sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma); | ||
819 | } | ||
820 | //add info on parent set scores for each variable to this vector of vectors | ||
821 | vecvarparset.push_back(vec2ndlastgamma); | ||
822 | } | ||
823 | |||
824 | /*std::cout << endl; | ||
825 | std::cout << seclastgamma; | ||
826 | std::cout << endl;*/ | ||
827 | posinfull += 1; | ||
828 | //std::cout << posinfull << endl; | ||
829 | } | ||
830 | finlogscore += log(sumovUialpha) + maxseclastgamma; | ||
831 | } | ||
832 | |||
833 | vector < vector<string> > parSet; | ||
834 | vector< map <double, string, greater <double> > > parSetScoreSorted; | ||
835 | vector< map <double, string, greater <double> > > strucScore; | ||
836 | |||
837 | |||
838 | //Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map. | ||
839 | |||
840 | for (unsigned i = 0; i < indexofvar.size(); ++i){ | ||
841 | |||
842 | map <double, string, greater <double> > tempMap; | ||
843 | for (unsigned j=0; j< indexofvar[i].size(); ++j){ | ||
844 | tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j])); | ||
845 | } | ||
846 | parSetScoreSorted.push_back(tempMap); | ||
847 | |||
848 | } | ||
849 | |||
850 | |||
851 | pair <double, string> bestStrScore; | ||
852 | double bestScore = 0; | ||
853 | string bestLable; | ||
854 | |||
855 | |||
856 | for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){ | ||
857 | map <double, string> :: iterator itr; | ||
858 | itr = parSetScoreSorted[i].begin(); | ||
859 | bestScore = bestScore + (itr->first); | ||
860 | bestLable = bestLable +(itr->second); | ||
861 | } | ||
862 | |||
863 | bestStrScore = make_pair(bestScore, bestLable);//This is the best score. | ||
864 | |||
865 | |||
866 | vector < pair <double, string> > sortedStru;//This store all the structures in the percentage. | ||
867 | vector < vector < pair <double, string > > > deltaC; | ||
868 | |||
869 | for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ | ||
870 | map <double, string> :: iterator itr0, itr1; | ||
871 | vector < pair <double, string > > tempDelta; | ||
872 | double tempDeltaS; | ||
873 | string tempDeltaL; | ||
874 | itr0 = parSetScoreSorted[l].begin(); | ||
875 | itr1 = parSetScoreSorted[l].begin(); | ||
876 | double tem1=itr1->first, tem2 = itr1->first; | ||
877 | // cout << tem1 << " : " << tem2 << endl; | ||
878 | for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){ | ||
879 | // tem1 = tem2; | ||
880 | itr1 = ++itr1; | ||
881 | tem2 = itr1->first; | ||
882 | double tem = -log(exp(tem2-tem1)+1); | ||
883 | tem1 = tem1-tem; | ||
884 | if(tem <= log(PERCENT/10)){ | ||
885 | tempDeltaS = (itr1->first)-(itr0->first); | ||
886 | tempDeltaL = itr1->second; | ||
887 | tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL)); | ||
888 | } | ||
889 | } | ||
890 | |||
891 | deltaC.push_back( tempDelta); | ||
892 | } | ||
893 | |||
894 | |||
895 | for(unsigned i=0; i< deltaC.size(); ++i){ | ||
896 | for (unsigned j=0; j< deltaC[i].size(); ++j) | ||
897 | { | ||
898 | double score = bestStrScore.first + deltaC[i][j].first; | ||
899 | string lab = bestStrScore.second; | ||
900 | findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second); | ||
901 | sortedStru.push_back(make_pair(score, lab)); | ||
902 | } | ||
903 | } | ||
904 | |||
905 | sort(sortedStru.begin(),sortedStru.end(),compareDe); | ||
906 | |||
907 | vector <double> strP; | ||
908 | double s = bestScore; | ||
909 | |||
910 | for(unsigned i=0; i< sortedStru.size(); ++i ){ | ||
911 | double s0 = sortedStru[i].first; | ||
912 | // s = s + exp(sortedStru[i].first); | ||
913 | double te = -log(exp(s0-s)+1); | ||
914 | |||
915 | strP.push_back(te); | ||
916 | s = s - te; | ||
917 | |||
918 | } | ||
919 | |||
920 | /* double fi = s/exp(finlogscore)*100; | ||
921 | strP.push_back(fi);*/ | ||
922 | |||
923 | std::cout << std::endl; | ||
924 | std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl; | ||
925 | std::cout << std::endl; | ||
926 | |||
927 | |||
928 | |||
929 | |||
930 | cout << "Best several structures are:" << endl; | ||
931 | cout << bestScore << " : " << bestLable << " "<< exp(strP[0])*100 << endl; | ||
932 | |||
933 | for (unsigned i=0; i< sortedStru.size(); ++i ) | ||
934 | { | ||
935 | if(exp(strP[i+1]) > (PERCENT/100)){ | ||
936 | break; | ||
937 | } | ||
938 | cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl; | ||
939 | |||
940 | } | ||
941 | |||
942 | /* for (unsigned i=0; i< 5; ++i ) | ||
943 | { | ||
944 | |||
945 | cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl; | ||
946 | |||
947 | }*/ | ||
948 | |||
949 | /* cout << "Best several structures are:" << endl; | ||
950 | cout << bestScore << " : " << bestLable << " " << endl; | ||
951 | |||
952 | for (unsigned i=0; i< sortedStru.size(); ++i ) | ||
953 | { | ||
954 | cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< endl; | ||
955 | }*/ | ||
956 | |||
957 | std::cout << std::endl; | ||
958 | std::cout << std::endl; | ||
959 | |||
960 | /* for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ | ||
961 | map <double, string> :: iterator itr; | ||
962 | std::cout << "For variable "<< order[i] <<":"<< endl; | ||
963 | for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr) | ||
964 | { | ||
965 | cout << itr->second << " : " << itr->first << "; "; | ||
966 | } | ||
967 | cout << endl; | ||
968 | cout << endl; | ||
969 | |||
970 | }*/ | ||
971 | |||
972 | |||
973 | string orderstring; | ||
974 | for (unsigned i=0; i< order.size(); ++i){ | ||
975 | orderstring.append(int_to_str(order[i])+" "); | ||
976 | } | ||
977 | |||
978 | ofstream myfile; | ||
979 | myfile.open( filepath.c_str(), ios::out | ios::app ); | ||
980 | // cout << path << endl; | ||
981 | if (myfile.is_open()) | ||
982 | { | ||
983 | myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " "<< bestLable << " " << bestScore << " " << exp(strP[0])*100; | ||
984 | myfile << "\n"; | ||
985 | |||
986 | for(unsigned i=0; i< sortedStru.size(); ++i){ | ||
987 | if(exp(strP[i+1]) > (PERCENT/100)){ | ||
988 | break; | ||
989 | } | ||
990 | myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " " << sortedStru[i].second << " " << sortedStru[i].first<< " " << exp(strP[i+1])*100; | ||
991 | myfile << "\n"; | ||
992 | |||
993 | } | ||
994 | } | ||
995 | else cout << "Unable to open file"; | ||
996 | myfile.close(); | ||
997 | |||
998 | /* ofstream myfile; | ||
999 | myfile.open("/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2RO.txt"); | ||
1000 | if (myfile.is_open()) | ||
1001 | { | ||
1002 | for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ | ||
1003 | map <double, string> :: iterator itr; | ||
1004 | for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr){ | ||
1005 | myfile << itr->second << " : " << itr->first << "; " ; | ||
1006 | } | ||
1007 | myfile << "\n"; | ||
1008 | |||
1009 | } | ||
1010 | } | ||
1011 | else cout << "Unable to open file"; | ||
1012 | myfile.close();*/ | ||
1013 | |||
1014 | |||
1015 | //time after completion | ||
1016 | time_t later = time(0); | ||
1017 | char* dt_later = ctime(&later); | ||
1018 | std::cout << "The local date and time is: " << dt_later << std::endl; | ||
1019 | |||
1020 | std::cout << std::endl; | ||
1021 | std::cout << std::endl; | ||
1022 | |||
1023 | |||
1024 | std::cin.clear(); | ||
1025 | std::cin.ignore(); | ||
1026 | std::cin.get(); | ||
1027 | return 0; | ||
1028 | } | ||
1029 | |||
1030 | |||
1031 |