Commit 53863fc1fc8d2efea4e4e5ed2a81265a6490022c
1 parent
7f982ea62a
Exists in
master
Get structures from orders
Showing
1 changed file
with
1030 additions
and
0 deletions
Show diff stats
GetStructure.cpp
... | ... | @@ -0,0 +1,1030 @@ |
1 | +#include <string> | |
2 | +#include <vector> | |
3 | +#include <numeric> | |
4 | +#include <fstream> | |
5 | +#include <iterator> | |
6 | +#include <iostream> | |
7 | +#include <utility> | |
8 | +#include <iomanip> | |
9 | +#include <ctime> | |
10 | +#include <sstream> | |
11 | +#include <boost/math/special_functions.hpp> | |
12 | +#include <boost/lexical_cast.hpp> | |
13 | +#include <boost/algorithm/string/replace.hpp> | |
14 | +#include <boost/multiprecision/cpp_int.hpp> | |
15 | + | |
16 | +#include <stdio.h> | |
17 | +#include <math.h> | |
18 | +#include <algorithm> // std::find | |
19 | +#include <map> | |
20 | +#include <iterator> | |
21 | +#include <bits/stdc++.h> | |
22 | + | |
23 | +using namespace std; | |
24 | + | |
25 | +//Function for calculating the binomial coefficient | |
26 | +boost::multiprecision::cpp_int BinomialCoefficient(unsigned int n, unsigned int k) { | |
27 | + if (k == 0) { return 1; } | |
28 | + else { return (n * BinomialCoefficient(n - 1, k - 1)) / k; } | |
29 | +} | |
30 | +struct compare { | |
31 | + bool operator()(const std::string& first, const std::string& second) { | |
32 | + if(first.size() == second.size()) | |
33 | + return first < second; | |
34 | + else | |
35 | + return first.size() < second.size(); | |
36 | + } | |
37 | +}; | |
38 | + | |
39 | +string int_to_str(int num) | |
40 | +{ | |
41 | + stringstream ss; | |
42 | + | |
43 | + ss << num; | |
44 | + | |
45 | + return ss.str(); | |
46 | +}; | |
47 | + | |
48 | + | |
49 | +int str_to_int(string st) | |
50 | +{ | |
51 | + int result; | |
52 | + | |
53 | + stringstream(st) >> result; | |
54 | + | |
55 | + return result; | |
56 | +}; | |
57 | + | |
58 | +double str2doub(std::string str) | |
59 | +{ | |
60 | + double d; | |
61 | + std::stringstream(str) >> d; | |
62 | + return d; | |
63 | +} | |
64 | + | |
65 | +bool compareI(const pair<string, double>&i, const pair<string, double>&j) | |
66 | +{ | |
67 | + return i.second < j.second; | |
68 | +} | |
69 | + | |
70 | +bool compareD(const pair<string, double>&i, const pair<string, double>&j) | |
71 | +{ | |
72 | + return i.second > j.second; | |
73 | +} | |
74 | + | |
75 | +bool compareIn(const pair<double, string>&i, const pair<double, string>&j) | |
76 | +{ | |
77 | + return i.first < j.first; | |
78 | +} | |
79 | + | |
80 | +bool compareDe(const pair<double, string>&i, const pair<double, string>&j) | |
81 | +{ | |
82 | + return i.first > j.first; | |
83 | +} | |
84 | + | |
85 | +double logAB (double x, double y) | |
86 | +{ | |
87 | + double result; | |
88 | + double maxVal = max(x,y); | |
89 | + | |
90 | + if(maxVal == x) | |
91 | + { | |
92 | + result = maxVal + log(1+exp(y-maxVal)); | |
93 | + } | |
94 | + else | |
95 | + { | |
96 | + result = maxVal + log(1+exp(x-maxVal)); | |
97 | + } | |
98 | + return result; | |
99 | +} | |
100 | + | |
101 | +double persentageXofY (double newS, double oldS) | |
102 | +{ | |
103 | + double result; | |
104 | + result = exp(oldS-newS)*100; | |
105 | + return result; | |
106 | +} | |
107 | + | |
108 | +void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr) | |
109 | +{ | |
110 | + // Get the first occurrence | |
111 | + size_t pos = data.find(toSearch); | |
112 | + | |
113 | + // Repeat till end is reached | |
114 | + while( pos != std::string::npos) | |
115 | + { | |
116 | + // Replace this occurrence of Sub String | |
117 | + data.replace(pos, toSearch.size(), replaceStr); | |
118 | + // Get the next occurrence from the current position | |
119 | + pos =data.find(toSearch, pos + toSearch.size()); | |
120 | + } | |
121 | +} | |
122 | + | |
123 | +string findMB(string structures, string variable){ | |
124 | + string MB; | |
125 | + | |
126 | + | |
127 | + return MB; | |
128 | +} | |
129 | + | |
130 | +int main() { | |
131 | + | |
132 | + string FILENAME; // Downloads/D50C9v.txt | |
133 | + int COLS; // 9 | |
134 | + vector <int> max_cat; // 1 2 1 3 2 1 2 2 3 | |
135 | + vector <int> min_cat; // 0 0 0 0 0 0 0 0 0 | |
136 | + unsigned int maxparents; // 4 | |
137 | + vector <int> order; //2 6 8 4 1 0 3 5 7 | |
138 | + double PERCENT; | |
139 | + string filepath; // output file name | |
140 | + | |
141 | + vector<string> settings; | |
142 | + vector<bool> settingsFlag; | |
143 | + | |
144 | + settings.push_back("DataFile"); | |
145 | + settingsFlag.push_back(false); | |
146 | + settings.push_back("TotalVariables"); | |
147 | + settingsFlag.push_back(false); | |
148 | + settings.push_back("MaximumCategory"); | |
149 | + settingsFlag.push_back(false); | |
150 | + settings.push_back("MinimumCategory"); | |
151 | + settingsFlag.push_back(false); | |
152 | + settings.push_back("MaximumParents"); | |
153 | + settingsFlag.push_back(false); | |
154 | + settings.push_back("Order"); | |
155 | + settingsFlag.push_back(false); | |
156 | + settings.push_back("Percentage"); | |
157 | + settingsFlag.push_back(false); | |
158 | + settings.push_back("OutputFileName"); | |
159 | + settingsFlag.push_back(false); | |
160 | + | |
161 | + fstream infile; | |
162 | + string ConfigFile; | |
163 | + std::cout << "What is the location of the configuration file that you edited?: "; | |
164 | + std::cin >> ConfigFile; | |
165 | + std::cout << std::endl; | |
166 | + //string ConfigFile = "/home/efraingonzalez0/Downloads/new.config"; | |
167 | + infile.open(ConfigFile.c_str(),ios::in); | |
168 | + vector<string> line; | |
169 | + if(infile.is_open()) { | |
170 | + std::cout << "Configuration file correctly opened" << std::endl; | |
171 | + while(!infile.eof()) { | |
172 | + string tmp; | |
173 | + getline(infile,tmp); | |
174 | + line.push_back(tmp); | |
175 | + } | |
176 | + } | |
177 | + else { | |
178 | + std::cout << "Unable to open configuration file" << std::endl; | |
179 | + exit(0); | |
180 | + } | |
181 | + | |
182 | + infile.close(); | |
183 | + line.erase(line.end()); | |
184 | + for(size_t k = 0; k < settings.size(); ++k) { | |
185 | + for(size_t i = 0; i < line.size(); ++i) { | |
186 | + size_t foundit = line[i].find(settings[k]); | |
187 | + if (foundit != std::string::npos) { | |
188 | + //find position of "=" | |
189 | + unsigned int poseq = line[i].find("="); | |
190 | + unsigned int posneqfir,posneqlas; | |
191 | + //first position of relevant string | |
192 | + for(unsigned int j = poseq + 1; j < line[i].length(); ++j) { | |
193 | + if(line[i].at(j) != ' ') { | |
194 | + posneqfir = j; | |
195 | + break; | |
196 | + } | |
197 | + } | |
198 | + //last position of relevant string | |
199 | + for(unsigned int j2 = line[i].length() - 1; j2 >= posneqfir; ++j2) { | |
200 | + if((line[i].at(j2) != ' ') && (line[i].at(j2) != '\n')){ | |
201 | + posneqlas = j2; | |
202 | + break; | |
203 | + } | |
204 | + } | |
205 | + | |
206 | + if(settings[k] == "DataFile") { | |
207 | + FILENAME = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | |
208 | + } | |
209 | + else if(settings[k] == "TotalVariables") { | |
210 | + COLS = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); | |
211 | + } | |
212 | + else if(settings[k] == "MaximumCategory") {//still need to edit | |
213 | + string max_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | |
214 | + std::stringstream macs(max_cat_string); | |
215 | + int macsti; | |
216 | + while( macs >> macsti) { | |
217 | + max_cat.push_back(macsti); | |
218 | + } | |
219 | + } | |
220 | + else if(settings[k] == "MinimumCategory") {//still need to edit | |
221 | + string min_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | |
222 | + std::stringstream mics(min_cat_string); | |
223 | + int micsti; | |
224 | + while( mics >> micsti) { | |
225 | + min_cat.push_back(micsti); | |
226 | + } | |
227 | + } | |
228 | + | |
229 | + else if(settings[k] == "MaximumParents") { | |
230 | + maxparents = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); | |
231 | + } | |
232 | + | |
233 | + else if(settings[k] == "Order") {//still need to edit | |
234 | + string st_ord_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | |
235 | + std::stringstream starordr(st_ord_string); | |
236 | + int StOrti; | |
237 | + while( starordr >> StOrti) { | |
238 | + order.push_back(StOrti); | |
239 | + } | |
240 | + } | |
241 | + | |
242 | + else if(settings[k] == "Percentage") { | |
243 | + PERCENT = str2doub(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); | |
244 | + } | |
245 | + | |
246 | + else if(settings[k] == "OutputFileName") { | |
247 | + filepath = line[i].substr(posneqfir,posneqlas - posneqfir + 1); | |
248 | + } | |
249 | + | |
250 | + settingsFlag[k] = true; | |
251 | + break; | |
252 | + } | |
253 | + } | |
254 | + } | |
255 | + | |
256 | + for(int k=0;k<settings.size();k++) { | |
257 | + if(!settingsFlag[k]) { | |
258 | + std::cout << "Error: "<< settings[k] << " is not configured." << std::endl; | |
259 | + exit(0); | |
260 | + } | |
261 | + else if(max_cat.size() != COLS) { | |
262 | + std::cout << "Error: Size of MaximumCategory and the number of TotalVariables do not match" << std::endl; | |
263 | + exit(0); | |
264 | + } | |
265 | + else if(min_cat.size() != COLS) { | |
266 | + std::cout << "Error: Size of MinimumCategory and the number of TotalVariables do not match" << std::endl; | |
267 | + exit(0); | |
268 | + } | |
269 | + else if(max_cat.size() != min_cat.size()) { | |
270 | + std::cout << "Error: Size of MaximumCategory and MinimumCategory do not match" << std::endl; | |
271 | + exit(0); | |
272 | + } | |
273 | + else if(PERCENT > 100.0){ | |
274 | + std::cout << "Error: Percentage may not be larger than 100.0 which represents 100%. If you need a difference larger than 100% then use a negative number." << std::endl; | |
275 | + exit(0); | |
276 | + } | |
277 | + } | |
278 | + | |
279 | + | |
280 | + | |
281 | + fstream file; | |
282 | + std::cout << endl; | |
283 | + vector < vector <int> > DAT; // 2d array as a vector of vectors | |
284 | + vector <int> rowVector(COLS); // vector to add into 'array' (represents a row) | |
285 | + int row = 0; // Row counter | |
286 | + | |
287 | + // Read file | |
288 | + file.open(FILENAME.c_str(), ios::in); // Open file | |
289 | + if (file.is_open()) { // If file has correctly opened... | |
290 | + // Output debug message | |
291 | + cout << "File correctly opened" << endl; | |
292 | + | |
293 | + // Dynamically store data into array | |
294 | + while (file.good()) { // ... and while there are no errors, | |
295 | + DAT.push_back(rowVector); // add a new row, | |
296 | + for (int col = 0; col<COLS; col++) { | |
297 | + file >> DAT[row][col]; // fill the row with col elements | |
298 | + } | |
299 | + row++; // Keep track of actual row | |
300 | + } | |
301 | + } | |
302 | + else cout << "Unable to open file" << endl; | |
303 | + file.close(); | |
304 | + DAT.erase(DAT.end()); | |
305 | + | |
306 | + size_t totvars = DAT[1].size(); //column number | |
307 | + size_t tottuples = DAT.size();//row number | |
308 | + | |
309 | + | |
310 | + std::cout << endl; | |
311 | + | |
312 | + | |
313 | + //Time that program begins | |
314 | + time_t start = time(0); | |
315 | + char* dt_start = ctime(&start); | |
316 | + std::cout << "The local date and time is: " << dt_start << std::endl; | |
317 | + | |
318 | + | |
319 | + //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category: | |
320 | + //categories in i | |
321 | + vector <int> catsi; | |
322 | + //total combinations of variables | |
323 | + //int totcombos = 1; | |
324 | + //how many catagori for every variable | |
325 | + for ( int i = 0; i < totvars; ++i) { | |
326 | + catsi.push_back((max_cat[i] - min_cat[i]) + 1); | |
327 | + //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1); | |
328 | + } | |
329 | + | |
330 | + //print out catsi | |
331 | + for (size_t i = 0; i < catsi.size();++i) { | |
332 | + std::cout << catsi[i] << " "; | |
333 | + } | |
334 | + std::cout << endl; | |
335 | + | |
336 | + //Total Families Ui,alpha for a particular variable in the order | |
337 | + //Total Families Ui,alpha for a particular variable in the order | |
338 | + vector <boost::multiprecision::cpp_int> families; | |
339 | + for (unsigned int i = 0; i < totvars; ++i) { | |
340 | + int numparents = i; | |
341 | + if (numparents == 0) { | |
342 | + families.push_back(1); | |
343 | + } | |
344 | + else { | |
345 | + | |
346 | + boost::multiprecision::cpp_int numfams = 0; | |
347 | + for (unsigned int j = 0; j <= i; ++j) { | |
348 | + if (j <= maxparents) { | |
349 | + unsigned long long jFactorial = 1; | |
350 | + unsigned long long ijFactorial = 1; | |
351 | + unsigned long long iFactorial = 1; | |
352 | + //Calculate j! | |
353 | + for (unsigned int g = 0; g <= j; ++g) { | |
354 | + if (g != 0) { | |
355 | + jFactorial *= g; | |
356 | + } | |
357 | + } | |
358 | + //Calculate i! | |
359 | + for (unsigned int g = 0; g <= i; ++g) { | |
360 | + if (g != 0) { | |
361 | + iFactorial *= g; | |
362 | + } | |
363 | + } | |
364 | + //Calculate (i-j)! | |
365 | + for (unsigned int g = 0; g <= (i - j); ++g) { | |
366 | + if (g != 0) { | |
367 | + ijFactorial *= g; | |
368 | + } | |
369 | + } | |
370 | + numfams += BinomialCoefficient(i,j); | |
371 | + } | |
372 | + else { | |
373 | + break; | |
374 | + } | |
375 | + } | |
376 | + families.push_back(numfams); | |
377 | + } | |
378 | + } | |
379 | + | |
380 | + | |
381 | + //How many parent combinations for each step? As well as there counts | |
382 | + vector< vector <int> > ParentCombos; | |
383 | + vector< vector <int> > fullNijkvector; | |
384 | + vector< vector <string> > indexofvar; // This is label or index for each variable. | |
385 | + for (size_t i = 0; i < order.size(); ++i) { | |
386 | + //i represents the order of the variable | |
387 | + if (i == 0) { | |
388 | + vector <int> tmp,Nijkovercombos1; | |
389 | + vector <string> tempstring; | |
390 | +// tempstring.push_back("[0]"); | |
391 | + tempstring.push_back("[" + int_to_str(order[0])+ "]"); | |
392 | + tmp.push_back(1); | |
393 | + ParentCombos.push_back(tmp); | |
394 | + //counting the amount of times that a value of the first variable in the order occurs | |
395 | + //this starts with the maximum value for that variable | |
396 | + for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) { | |
397 | + //hello cycles through the categories of the first variable in the order | |
398 | + int Nijk1 = 0; | |
399 | + //green cycles through tuples | |
400 | + for (int green = 0; green < tottuples; ++green){ | |
401 | + if (DAT[green][order[0]] == hello) { | |
402 | + Nijk1 += 1; | |
403 | + } | |
404 | + } | |
405 | + Nijkovercombos1.push_back(Nijk1); | |
406 | + } | |
407 | + fullNijkvector.push_back(Nijkovercombos1); | |
408 | + indexofvar.push_back(tempstring); | |
409 | + | |
410 | + } | |
411 | + else { | |
412 | + vector <int> tmp,Nijkovercombos1; | |
413 | + vector <string> tempstring; | |
414 | +// string tempa = "[" + int_to_str(i) +"]"; | |
415 | + string tempa = "[" + int_to_str(order[i]) +"]"; | |
416 | + tempstring.push_back(tempa); | |
417 | + tmp.push_back(1); | |
418 | + int numparnts = i; | |
419 | + //counting the amount of times that a value of the last variable in the current order size occurs | |
420 | + //this starts with the maximum value for that variable | |
421 | + for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) { | |
422 | + //hello cycles through the categories of the first variable in the order | |
423 | + int Nijk1 = 0; | |
424 | + //green cycles through tuples | |
425 | + for (int green = 0; green < tottuples; ++green) { | |
426 | + if (DAT[green][order[numparnts]] == hello) { | |
427 | + Nijk1 += 1; | |
428 | + } | |
429 | + } | |
430 | + Nijkovercombos1.push_back(Nijk1); | |
431 | + } | |
432 | + fullNijkvector.push_back(Nijkovercombos1); | |
433 | + | |
434 | + //j representing the number of parents | |
435 | + for (int it = 1; it <= numparnts; ++it) { | |
436 | + //(333)Creating a vector that uses the right combination | |
437 | + boost::multiprecision::cpp_int Nloopy = 0; | |
438 | + //, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1; | |
439 | + /*std::cout << "This is for " << numparnts << " choose " << it << endl; | |
440 | + std::cout << "The iteration number is: " << it << endl;*/ | |
441 | + //Accounting for the limit of parent quantity | |
442 | + if (it > maxparents) { | |
443 | + break; | |
444 | + } | |
445 | + else { | |
446 | + vector <int> NewMat(numparnts, 0); | |
447 | + for (int p = 0; p < it; ++p) { | |
448 | + NewMat[p] = 1; | |
449 | + } | |
450 | + /*for (int g = 2; g <= numparnts; ++g) { | |
451 | + NcolFactorial *= g; | |
452 | + } | |
453 | + for (int g = 2; g <= it; ++g) { | |
454 | + iFactorial *= g; | |
455 | + } | |
456 | + for (int g = 2; g <= (numparnts - it); ++g) { | |
457 | + NiFactorial *= g; | |
458 | + }*/ | |
459 | + //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts | |
460 | + Nloopy = BinomialCoefficient(numparnts,it); | |
461 | + for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) { | |
462 | + int combsparents = 1; | |
463 | + vector <int> parsetv; | |
464 | + for (int par = 0; par < NewMat.size(); ++par){ | |
465 | + if (NewMat[par] == 1){ | |
466 | + parsetv.push_back(par); | |
467 | + } | |
468 | + } | |
469 | + | |
470 | + string tempstring2; | |
471 | + | |
472 | + for (int par2 = 0; par2 < parsetv.size(); ++par2){ | |
473 | + if(par2+1==parsetv.size()){ | |
474 | + // tempstring2 = tempstring2 + int_to_str(parsetv[par2]); | |
475 | + tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]]); | |
476 | + //tempstring2 = tempstring2 +","+"|" + int_to_str(i); | |
477 | + // tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]"; | |
478 | + tempstring2 = "[" + int_to_str(order[i])+"|"+tempstring2 +"]"; | |
479 | + } | |
480 | + else{ | |
481 | + //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+","; | |
482 | +// tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":"; | |
483 | + tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]])+":"; | |
484 | + } | |
485 | + | |
486 | + | |
487 | + } | |
488 | + tempstring.push_back(tempstring2); | |
489 | + | |
490 | + /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl; | |
491 | + std::cout << "Here comes the NewMat:" << endl;*/ | |
492 | + //(444)This sets up the process for changing | |
493 | + //PosOne tells me the position of the last one in the vector | |
494 | + //We want to change when the position is the last position available in the vector | |
495 | + int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0; | |
496 | + int SumOnes3 = 0, SumOnes4 = 0, SumY = 0; | |
497 | + for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) { | |
498 | + if (NewMat[PosOne] == 1) { | |
499 | + break; | |
500 | + } | |
501 | + } | |
502 | + for (int y = (numparnts - 1); y >= (numparnts - it); --y) { | |
503 | + //SumOnes tells you the amount of ones in the last i columns | |
504 | + //These are the last columns being considered | |
505 | + SumOnes += NewMat[y]; | |
506 | + } | |
507 | + for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) { | |
508 | + //SumOnes2 tells you the amount of ones before you reach the next zero | |
509 | + //PosOne2 keeps track of the position of the coming zero | |
510 | + SumOnes2 += NewMat[PosOne2]; | |
511 | + if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) { | |
512 | + break; | |
513 | + } | |
514 | + } | |
515 | + for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) { | |
516 | + //FrstOne tells you the position of the first number 1 starting from the left hand side | |
517 | + if (NewMat[FrstOne] == 1) { | |
518 | + break; | |
519 | + } | |
520 | + } | |
521 | + for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) { | |
522 | + //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions | |
523 | + SumOnes4 += NewMat[x]; | |
524 | + } | |
525 | + //Prints out NewMat | |
526 | + /*for (int u = 0; u < numparnts; ++u) { | |
527 | + std::cout << NewMat[u] << " "; | |
528 | + } | |
529 | + std::cout << endl;*/ | |
530 | + | |
531 | + | |
532 | + //Adding in the code that will allow counts parent combinations for this particular variable | |
533 | + for (int q = 0; q < i; ++q) { | |
534 | + if (NewMat[q] == 1) { | |
535 | + combsparents *= catsi[order[q]]; | |
536 | + } | |
537 | + } | |
538 | + tmp.push_back(combsparents);//made the whole parentset configure for a variable | |
539 | + /*std::cout << "This is combsparents: " << combsparents << endl; | |
540 | + std::cout << endl;*/ | |
541 | + vector <int> hvect; | |
542 | + //hvect tells us which variables are being considered always the last variable is being considered | |
543 | + //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only | |
544 | + //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 > | |
545 | + //for A C hvect will be < 0 2 > | |
546 | + for (int h = 0; h < i; ++h) { | |
547 | + if (NewMat[h] == 1) { | |
548 | + hvect.push_back(h); | |
549 | + } | |
550 | + } | |
551 | + hvect.push_back(numparnts); | |
552 | + size_t shvect = hvect.size(); | |
553 | + //Prints out hvect | |
554 | + /*for (int u = 0; u < shvect; ++u) { | |
555 | + std::cout << hvect[u] << " "; | |
556 | + }*/ | |
557 | + //std::cout << endl; | |
558 | + //Counting the amount of values in the data that have that particular parent combination | |
559 | + vector <int> Nijkovercombos; | |
560 | + for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) { | |
561 | + //(333)Creating a vector that uses the right combination | |
562 | + /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/ | |
563 | + vector <int> Test(shvect, last), maxtest; | |
564 | + for (int p = 0; p < (shvect-1); ++p) { | |
565 | + Test[p] = max_cat[order[hvect[p]]]; | |
566 | + } | |
567 | + maxtest = Test; | |
568 | + for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) { | |
569 | + //std::cout << endl; | |
570 | + /*std::cout << endl; | |
571 | + std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl; | |
572 | + std::cout << "Here comes the Test:" << endl;*/ | |
573 | + //(444)This sets up the process for changing | |
574 | + //NMpos tells me the position of the last non minimum value in the vector | |
575 | + //We want to change when the position is the last position available in the vector | |
576 | + int NMpos = 0, minpos = 0; | |
577 | + for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) { | |
578 | + if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) { | |
579 | + break; | |
580 | + } | |
581 | + } | |
582 | + for (minpos = (shvect - 2); minpos >= 0; --minpos) { | |
583 | + //minpos tells you the position of the last minimum value | |
584 | + if (Test[minpos] == min_cat[order[hvect[minpos]]]) { | |
585 | + break; | |
586 | + } | |
587 | + } | |
588 | + //Prints out Test | |
589 | + /*for (int u = 0; u < shvect; ++u) { | |
590 | + std::cout << Test[u] << " "; | |
591 | + } | |
592 | + std::cout << endl; | |
593 | + std::cout << endl; | |
594 | + std::cout << endl;*/ | |
595 | + //Count how many occurrences of the value are present in the data | |
596 | + int Nijk = 0; | |
597 | + for (int num2size = 0; num2size < tottuples; ++num2size) { | |
598 | + int countcorrect = 0; | |
599 | + for (size_t g = 0; g < Test.size(); ++g) { | |
600 | + //num2size cycles through tuples | |
601 | + //order[hvect[g]] represents the variable in the order that we are considering as a parent | |
602 | + if (DAT[num2size][order[hvect[g]]] == Test[g]) { | |
603 | + countcorrect += 1; | |
604 | + } | |
605 | + } | |
606 | + if (countcorrect == Test.size()) { | |
607 | + Nijk += 1; | |
608 | + } | |
609 | + } | |
610 | + //Nijkovercombos displays data as follows | |
611 | + //it starts with the smallest value for the last variable in hvect | |
612 | + //and the largest values in the first n-1 variables in hvect | |
613 | + //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0 | |
614 | + //count,count,count,count e.g. 13, 2, 2, 3, 4, 10 | |
615 | + Nijkovercombos.push_back(Nijk); | |
616 | + //(666)Now that the values have been calculated find out what the next combination of variables should be | |
617 | + if ((NMpos == -1) & (minpos == (shvect - 2))) { | |
618 | + //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000 | |
619 | + break; | |
620 | + } | |
621 | + if (minpos < NMpos) { | |
622 | + Test[NMpos] = Test[NMpos] - 1; | |
623 | + } | |
624 | + else if (NMpos < minpos) { | |
625 | + Test[NMpos] = Test[NMpos] - 1; | |
626 | + for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) { | |
627 | + Test[filler] = maxtest[filler]; | |
628 | + } | |
629 | + } | |
630 | + } | |
631 | + } | |
632 | + fullNijkvector.push_back(Nijkovercombos); | |
633 | + //(666)Now that the unique values have been calculated find out what the next combination of variables should be | |
634 | + if ((PosOne == (numparnts - 1)) & (SumOnes == it)) { | |
635 | + break; | |
636 | + } | |
637 | + else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) { | |
638 | + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | |
639 | + //NxtOne tells you the position of the next closest number 1 that we would | |
640 | + //like to change the position of (we will call it the important number one) | |
641 | + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | |
642 | + SumOnes3 += NewMat[NxtOne]; | |
643 | + if (SumOnes3 == (SumOnes2 + 1)) { | |
644 | + break; | |
645 | + } | |
646 | + } | |
647 | + if (SumOnes4 == (it - 1)) { | |
648 | + //If all except one of the 1's are found in the last it - 1 columns | |
649 | + for (int x = 0; x < numparnts; ++x) { | |
650 | + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) { | |
651 | + //If | |
652 | + NewMat[x] = 1; | |
653 | + } | |
654 | + else { | |
655 | + NewMat[x] = 0; | |
656 | + } | |
657 | + } | |
658 | + } | |
659 | + else { | |
660 | + for (int x = 0; x < numparnts; ++x) { | |
661 | + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) { | |
662 | + //If the position is that of the first 1 or it falls between the changed number one and the total | |
663 | + //amount of ones that are on that side of the zero 10111 | |
664 | + NewMat[x] = 1; | |
665 | + } | |
666 | + else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) { | |
667 | + //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing | |
668 | + //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right | |
669 | + NewMat[x] = 1; | |
670 | + } | |
671 | + else { | |
672 | + NewMat[x] = 0; | |
673 | + } | |
674 | + } | |
675 | + } | |
676 | + } | |
677 | + else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) { | |
678 | + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { | |
679 | + //NxtOne tells you the position of the next closest number 1 that we would | |
680 | + //like to change the position of (we will call it the important number one) | |
681 | + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one | |
682 | + SumOnes3 += NewMat[NxtOne]; | |
683 | + if (SumOnes3 == 1) { | |
684 | + break; | |
685 | + } | |
686 | + } | |
687 | + if (it != 1) { | |
688 | + for (int x = 0; x < numparnts; ++x) { | |
689 | + if (x == (NxtOne + 1)) { | |
690 | + NewMat[x] = 1; | |
691 | + } | |
692 | + else if (x == NxtOne) { | |
693 | + NewMat[x] = 0; | |
694 | + } | |
695 | + else if ((NewMat[x] == 1) & (x != NxtOne)) { | |
696 | + NewMat[x] = 1; | |
697 | + } | |
698 | + else { | |
699 | + NewMat[x] = 0; | |
700 | + } | |
701 | + } | |
702 | + } | |
703 | + else { | |
704 | + for (int x = 0; x < numparnts; ++x) { | |
705 | + if ((x == (NxtOne + 1))) { | |
706 | + NewMat[x] = 1; | |
707 | + } | |
708 | + else { | |
709 | + NewMat[x] = 0; | |
710 | + } | |
711 | + } | |
712 | + } | |
713 | + } | |
714 | + } | |
715 | + | |
716 | + } | |
717 | + } | |
718 | + ParentCombos.push_back(tmp); | |
719 | + indexofvar.push_back(tempstring); | |
720 | + } | |
721 | + | |
722 | + } | |
723 | + //std::cout << endl; | |
724 | + std::cout << endl; | |
725 | + //printing out the ParentCombos matrix just created above | |
726 | + /*for (size_t i = 0; i < ParentCombos.size(); ++i) { | |
727 | + for (size_t j = 0; j < ParentCombos[i].size(); ++j) { | |
728 | + std::cout << ParentCombos[i][j] << " "; | |
729 | + } | |
730 | + std::cout << endl; | |
731 | + }*/ | |
732 | + std::cout << endl; | |
733 | + //printing out the fullNijkvector matrix just created above | |
734 | + /*for (size_t i = 0; i < fullNijkvector.size(); ++i) { | |
735 | + for (size_t j = 0; j < fullNijkvector[i].size(); ++j) { | |
736 | + std::cout << fullNijkvector[i][j] << " "; | |
737 | + } | |
738 | + std::cout << endl; | |
739 | + } | |
740 | + std::cout << endl;*/ | |
741 | + //Print out Data | |
742 | + /*for (int i = 0; i < DAT.size(); ++i) { | |
743 | + for (int j = 0; j < DAT[i].size(); ++j) { | |
744 | + std::cout << DAT[i][j] << " "; | |
745 | + } | |
746 | + std::cout << endl; | |
747 | + }*/ | |
748 | + //Print out the families size | |
749 | + /*for (size_t i = 0; i < families.size(); ++i) { | |
750 | + std::cout << families[i] << " "; | |
751 | + }*/ | |
752 | + | |
753 | + //Obtaining the actual score from this information | |
754 | + //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting | |
755 | + //the first variable in the order) | |
756 | + //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha | |
757 | + int posinfull = 0; | |
758 | + //finlogscore is the final score in natural log format | |
759 | + double finlogscore = 0.0; | |
760 | + vector< vector <double> > vecvarparset; | |
761 | + for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) { | |
762 | + //sumovUialpha is the the sum over all parent sets for a particular variable | |
763 | + double sumovUialpha = 0.0; | |
764 | + //vector of all values of seclastgamma | |
765 | + vector <double> vec2ndlastgamma; | |
766 | + double maxseclastgamma; | |
767 | + //Uialpha cycles through all the parent sets for a particular family | |
768 | + for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) { | |
769 | + // nijkprime represents the value of 1/(ri * qi) | |
770 | + double nijkprime, nijprime; | |
771 | + double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha]; | |
772 | + | |
773 | + nijprime = 1.0 / (PCs); | |
774 | + nijkprime = 1.0 / (rij * PCs); | |
775 | + //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic | |
776 | + double seclastgamma = 0.0; | |
777 | + //qi_Uialpha cycles through the combinations for the parents in a set | |
778 | + for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) { | |
779 | + double lastgamma = 0.0; | |
780 | + double nij = 0.0; | |
781 | + //countijk cycles through the categories of the variable with a particular order | |
782 | + //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to | |
783 | + //and then find the categories for it | |
784 | + for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) { | |
785 | + double topy; | |
786 | + //rightcol lets you find the right column/position of the value that you need for a particular category within the | |
787 | + int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]); | |
788 | + nij += fullNijkvector[posinfull][rightcol]; | |
789 | + topy = (nijkprime + fullNijkvector[posinfull][rightcol]); | |
790 | + | |
791 | + //Using boost lgamma function for the product over categories and parent combinations | |
792 | + lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime); | |
793 | + | |
794 | + } | |
795 | + double boty = nij + nijprime; | |
796 | + seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty); | |
797 | + | |
798 | + } | |
799 | + vec2ndlastgamma.push_back(seclastgamma); | |
800 | + | |
801 | + | |
802 | + | |
803 | + //Calculate sumovUialpha based on the logsumexp concept | |
804 | + if (Uialpha + 1 == families[varinorder]) { | |
805 | + | |
806 | + for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) { | |
807 | + //change the value of maxseclastgamma if new value is larger than the previous value | |
808 | + if (que == 0) { | |
809 | + maxseclastgamma = vec2ndlastgamma[0]; | |
810 | + } | |
811 | + else { | |
812 | + if (maxseclastgamma < vec2ndlastgamma[que]) { | |
813 | + maxseclastgamma = vec2ndlastgamma[que]; | |
814 | + } | |
815 | + } | |
816 | + } | |
817 | + for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) { | |
818 | + sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma); | |
819 | + } | |
820 | + //add info on parent set scores for each variable to this vector of vectors | |
821 | + vecvarparset.push_back(vec2ndlastgamma); | |
822 | + } | |
823 | + | |
824 | + /*std::cout << endl; | |
825 | + std::cout << seclastgamma; | |
826 | + std::cout << endl;*/ | |
827 | + posinfull += 1; | |
828 | + //std::cout << posinfull << endl; | |
829 | + } | |
830 | + finlogscore += log(sumovUialpha) + maxseclastgamma; | |
831 | + } | |
832 | + | |
833 | + vector < vector<string> > parSet; | |
834 | + vector< map <double, string, greater <double> > > parSetScoreSorted; | |
835 | + vector< map <double, string, greater <double> > > strucScore; | |
836 | + | |
837 | + | |
838 | +//Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map. | |
839 | + | |
840 | + for (unsigned i = 0; i < indexofvar.size(); ++i){ | |
841 | + | |
842 | + map <double, string, greater <double> > tempMap; | |
843 | + for (unsigned j=0; j< indexofvar[i].size(); ++j){ | |
844 | + tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j])); | |
845 | + } | |
846 | + parSetScoreSorted.push_back(tempMap); | |
847 | + | |
848 | + } | |
849 | + | |
850 | + | |
851 | + pair <double, string> bestStrScore; | |
852 | + double bestScore = 0; | |
853 | + string bestLable; | |
854 | + | |
855 | + | |
856 | + for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){ | |
857 | + map <double, string> :: iterator itr; | |
858 | + itr = parSetScoreSorted[i].begin(); | |
859 | + bestScore = bestScore + (itr->first); | |
860 | + bestLable = bestLable +(itr->second); | |
861 | + } | |
862 | + | |
863 | + bestStrScore = make_pair(bestScore, bestLable);//This is the best score. | |
864 | + | |
865 | + | |
866 | + vector < pair <double, string> > sortedStru;//This store all the structures in the percentage. | |
867 | + vector < vector < pair <double, string > > > deltaC; | |
868 | + | |
869 | + for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ | |
870 | + map <double, string> :: iterator itr0, itr1; | |
871 | + vector < pair <double, string > > tempDelta; | |
872 | + double tempDeltaS; | |
873 | + string tempDeltaL; | |
874 | + itr0 = parSetScoreSorted[l].begin(); | |
875 | + itr1 = parSetScoreSorted[l].begin(); | |
876 | + double tem1=itr1->first, tem2 = itr1->first; | |
877 | +// cout << tem1 << " : " << tem2 << endl; | |
878 | + for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){ | |
879 | +// tem1 = tem2; | |
880 | + itr1 = ++itr1; | |
881 | + tem2 = itr1->first; | |
882 | + double tem = -log(exp(tem2-tem1)+1); | |
883 | + tem1 = tem1-tem; | |
884 | + if(tem <= log(PERCENT/10)){ | |
885 | + tempDeltaS = (itr1->first)-(itr0->first); | |
886 | + tempDeltaL = itr1->second; | |
887 | + tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL)); | |
888 | + } | |
889 | + } | |
890 | + | |
891 | + deltaC.push_back( tempDelta); | |
892 | + } | |
893 | + | |
894 | + | |
895 | + for(unsigned i=0; i< deltaC.size(); ++i){ | |
896 | + for (unsigned j=0; j< deltaC[i].size(); ++j) | |
897 | + { | |
898 | + double score = bestStrScore.first + deltaC[i][j].first; | |
899 | + string lab = bestStrScore.second; | |
900 | + findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second); | |
901 | + sortedStru.push_back(make_pair(score, lab)); | |
902 | + } | |
903 | + } | |
904 | + | |
905 | + sort(sortedStru.begin(),sortedStru.end(),compareDe); | |
906 | + | |
907 | + vector <double> strP; | |
908 | + double s = bestScore; | |
909 | + | |
910 | + for(unsigned i=0; i< sortedStru.size(); ++i ){ | |
911 | + double s0 = sortedStru[i].first; | |
912 | +// s = s + exp(sortedStru[i].first); | |
913 | + double te = -log(exp(s0-s)+1); | |
914 | + | |
915 | + strP.push_back(te); | |
916 | + s = s - te; | |
917 | + | |
918 | + } | |
919 | + | |
920 | +/* double fi = s/exp(finlogscore)*100; | |
921 | + strP.push_back(fi);*/ | |
922 | + | |
923 | + std::cout << std::endl; | |
924 | + std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl; | |
925 | + std::cout << std::endl; | |
926 | + | |
927 | + | |
928 | + | |
929 | + | |
930 | + cout << "Best several structures are:" << endl; | |
931 | + cout << bestScore << " : " << bestLable << " "<< exp(strP[0])*100 << endl; | |
932 | + | |
933 | + for (unsigned i=0; i< sortedStru.size(); ++i ) | |
934 | + { | |
935 | + if(exp(strP[i+1]) > (PERCENT/100)){ | |
936 | + break; | |
937 | + } | |
938 | + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl; | |
939 | + | |
940 | + } | |
941 | + | |
942 | +/* for (unsigned i=0; i< 5; ++i ) | |
943 | + { | |
944 | + | |
945 | + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl; | |
946 | + | |
947 | + }*/ | |
948 | + | |
949 | +/* cout << "Best several structures are:" << endl; | |
950 | + cout << bestScore << " : " << bestLable << " " << endl; | |
951 | + | |
952 | + for (unsigned i=0; i< sortedStru.size(); ++i ) | |
953 | + { | |
954 | + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< endl; | |
955 | + }*/ | |
956 | + | |
957 | + std::cout << std::endl; | |
958 | + std::cout << std::endl; | |
959 | + | |
960 | +/* for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ | |
961 | + map <double, string> :: iterator itr; | |
962 | + std::cout << "For variable "<< order[i] <<":"<< endl; | |
963 | + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr) | |
964 | + { | |
965 | + cout << itr->second << " : " << itr->first << "; "; | |
966 | + } | |
967 | + cout << endl; | |
968 | + cout << endl; | |
969 | + | |
970 | + }*/ | |
971 | + | |
972 | + | |
973 | + string orderstring; | |
974 | + for (unsigned i=0; i< order.size(); ++i){ | |
975 | + orderstring.append(int_to_str(order[i])+" "); | |
976 | + } | |
977 | + | |
978 | + ofstream myfile; | |
979 | + myfile.open( filepath.c_str(), ios::out | ios::app ); | |
980 | +// cout << path << endl; | |
981 | + if (myfile.is_open()) | |
982 | + { | |
983 | + myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " "<< bestLable << " " << bestScore << " " << exp(strP[0])*100; | |
984 | + myfile << "\n"; | |
985 | + | |
986 | + for(unsigned i=0; i< sortedStru.size(); ++i){ | |
987 | + if(exp(strP[i+1]) > (PERCENT/100)){ | |
988 | + break; | |
989 | + } | |
990 | + myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " " << sortedStru[i].second << " " << sortedStru[i].first<< " " << exp(strP[i+1])*100; | |
991 | + myfile << "\n"; | |
992 | + | |
993 | + } | |
994 | + } | |
995 | + else cout << "Unable to open file"; | |
996 | + myfile.close(); | |
997 | + | |
998 | +/* ofstream myfile; | |
999 | + myfile.open("/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2RO.txt"); | |
1000 | + if (myfile.is_open()) | |
1001 | + { | |
1002 | + for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ | |
1003 | + map <double, string> :: iterator itr; | |
1004 | + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr){ | |
1005 | + myfile << itr->second << " : " << itr->first << "; " ; | |
1006 | + } | |
1007 | + myfile << "\n"; | |
1008 | + | |
1009 | + } | |
1010 | + } | |
1011 | + else cout << "Unable to open file"; | |
1012 | + myfile.close();*/ | |
1013 | + | |
1014 | + | |
1015 | + //time after completion | |
1016 | + time_t later = time(0); | |
1017 | + char* dt_later = ctime(&later); | |
1018 | + std::cout << "The local date and time is: " << dt_later << std::endl; | |
1019 | + | |
1020 | + std::cout << std::endl; | |
1021 | + std::cout << std::endl; | |
1022 | + | |
1023 | + | |
1024 | + std::cin.clear(); | |
1025 | + std::cin.ignore(); | |
1026 | + std::cin.get(); | |
1027 | + return 0; | |
1028 | +} | |
1029 | + | |
1030 | + |