Commit c643e466cef282135e09dbb1e5fb7342cbb61864

Authored by Zhenghua Gong
1 parent 92636adddb
Exists in master

This added file is to get the best structures and markov blanket of each best structures.

Showing 1 changed file with 993 additions and 0 deletions   Show diff stats
... ... @@ -0,0 +1,993 @@
  1 +#include <string>
  2 +#include <vector>
  3 +#include <numeric>
  4 +#include <fstream>
  5 +#include <iterator>
  6 +#include <iostream>
  7 +#include <utility>
  8 +#include <iomanip>
  9 +#include <ctime>
  10 +#include <boost/math/special_functions.hpp>
  11 +#include <boost/lexical_cast.hpp>
  12 +#include <boost/algorithm/string/replace.hpp>
  13 +
  14 +#include <stdio.h>
  15 +#include <math.h>
  16 +#include <algorithm> // std::find
  17 +#include <map>
  18 +#include <iterator>
  19 +using namespace std;
  20 +
  21 +
  22 +struct compare {
  23 + bool operator()(const std::string& first, const std::string& second) {
  24 + if(first.size() == second.size())
  25 + return first < second;
  26 + else
  27 + return first.size() < second.size();
  28 + }
  29 +};
  30 +
  31 +string int_to_str(int num)
  32 +{
  33 + stringstream ss;
  34 +
  35 + ss << num;
  36 +
  37 + return ss.str();
  38 +};
  39 +
  40 +
  41 +int str_to_int(string st)
  42 +{
  43 + int result;
  44 +
  45 + stringstream(st) >> result;
  46 +
  47 + return result;
  48 +};
  49 +
  50 +bool compareI(const pair<string, double>&i, const pair<string, double>&j)
  51 +{
  52 + return i.second < j.second;
  53 +}
  54 +
  55 +bool compareD(const pair<string, double>&i, const pair<string, double>&j)
  56 +{
  57 + return i.second > j.second;
  58 +}
  59 +
  60 +bool compareIn(const pair<double, string>&i, const pair<double, string>&j)
  61 +{
  62 + return i.first < j.first;
  63 +}
  64 +
  65 +bool compareDe(const pair<double, string>&i, const pair<double, string>&j)
  66 +{
  67 + return i.first > j.first;
  68 +}
  69 +
  70 +double logAB (double x, double y)
  71 +{
  72 + double result;
  73 + double maxVal = max(x,y);
  74 +
  75 + if(maxVal == x)
  76 + {
  77 + result = maxVal + log(1+exp(y-maxVal));
  78 + }
  79 + else
  80 + {
  81 + result = maxVal + log(1+exp(x-maxVal));
  82 + }
  83 + return result;
  84 +}
  85 +
  86 +double persentageXofY (double newS, double oldS)
  87 +{
  88 + double result;
  89 + result = exp(oldS-newS)*100;
  90 + return result;
  91 +}
  92 +
  93 +void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr)
  94 +{
  95 + // Get the first occurrence
  96 + size_t pos = data.find(toSearch);
  97 +
  98 + // Repeat till end is reached
  99 + while( pos != std::string::npos)
  100 + {
  101 + // Replace this occurrence of Sub String
  102 + data.replace(pos, toSearch.size(), replaceStr);
  103 + // Get the next occurrence from the current position
  104 + pos =data.find(toSearch, pos + toSearch.size());
  105 + }
  106 +}
  107 +
  108 +int main() {
  109 + /*
  110 + //TEMPORARY INPUT FILE
  111 + vector< vector<int> > DAT;
  112 + vector <int> temp, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
  113 + //Filling up some of the test variables needed for the data set
  114 + temp.push_back(0);
  115 + temp.push_back(0);
  116 + temp.push_back(1);
  117 + //set 2
  118 + temp2.push_back(0);
  119 + temp2.push_back(1);
  120 + temp2.push_back(0);
  121 + //set3
  122 + temp3.push_back(0);
  123 + temp3.push_back(1);
  124 + temp3.push_back(1);
  125 + //set4
  126 + temp4.push_back(1);
  127 + temp4.push_back(0);
  128 + temp4.push_back(0);
  129 + //set5
  130 + temp5.push_back(1);
  131 + temp5.push_back(1);
  132 + temp5.push_back(1);
  133 + //set6
  134 + temp6.push_back(1);
  135 + temp6.push_back(1);
  136 + temp6.push_back(0);
  137 + //set7
  138 + temp7.push_back(1);
  139 + temp7.push_back(0);
  140 + temp7.push_back(1);
  141 + //set8
  142 + temp8.push_back(1);
  143 + temp8.push_back(0);
  144 + temp8.push_back(1);
  145 + //set9
  146 + temp9.push_back(1);
  147 + temp9.push_back(0);
  148 + temp9.push_back(1);
  149 + //set10
  150 + temp10.push_back(1);
  151 + temp10.push_back(0);
  152 + temp10.push_back(1);
  153 + //set11
  154 + temp11.push_back(1);
  155 + temp11.push_back(1);
  156 + temp11.push_back(1);
  157 + //set12
  158 + temp12.push_back(1);
  159 + temp12.push_back(0);
  160 + temp12.push_back(1);
  161 + //set13
  162 + temp13.push_back(1);
  163 + temp13.push_back(0);
  164 + temp13.push_back(1);
  165 + //set14
  166 + temp14.push_back(1);
  167 + temp14.push_back(1);
  168 + temp14.push_back(0);
  169 + //set15
  170 + temp15.push_back(0);
  171 + temp15.push_back(1);
  172 + temp15.push_back(1);
  173 + //set16
  174 + temp16.push_back(1);
  175 + temp16.push_back(0);
  176 + temp16.push_back(1);
  177 + std::cout << endl;
  178 + //Filling up test DATASET
  179 + DAT.push_back(temp);
  180 + DAT.push_back(temp2);
  181 + DAT.push_back(temp3);
  182 + DAT.push_back(temp4);
  183 + DAT.push_back(temp5);
  184 + DAT.push_back(temp6);
  185 + DAT.push_back(temp7);
  186 + DAT.push_back(temp8);
  187 + DAT.push_back(temp9);
  188 + DAT.push_back(temp10);
  189 + DAT.push_back(temp11);
  190 + DAT.push_back(temp12);
  191 + DAT.push_back(temp13);
  192 + DAT.push_back(temp14);
  193 + DAT.push_back(temp15);
  194 + DAT.push_back(temp16);
  195 + std::cout << endl;
  196 + size_t totvars = DAT[1].size();
  197 + size_t tottuples = DAT.size();
  198 + */
  199 + //Time before user input
  200 + time_t now = time(0);
  201 + char* dt = ctime(&now);
  202 + std::cout << "The local date and time is: " << dt << std::endl;
  203 + string FILENAME;
  204 + std::cout << "What is the name of your file?: ";
  205 + std::cin >> FILENAME;
  206 + // Variable declarations
  207 + fstream file;
  208 + int COLS;
  209 + std::cout << "How many variables in your data file?: ";
  210 + std::cin >> COLS;
  211 + std::cout << endl;
  212 + vector < vector <int> > DAT; // 2d array as a vector of vectors
  213 + vector <int> rowVector(COLS); // vector to add into 'array' (represents a row)
  214 + int row = 0; // Row counter
  215 +
  216 + // Read file
  217 + file.open(FILENAME.c_str(), ios::in); // Open file
  218 + if (file.is_open()) { // If file has correctly opened...
  219 + // Output debug message
  220 + cout << "File correctly opened" << endl;
  221 +
  222 + // Dynamically store data into array
  223 + while (file.good()) { // ... and while there are no errors,
  224 + DAT.push_back(rowVector); // add a new row,
  225 + for (int col = 0; col<COLS; col++) {
  226 + file >> DAT[row][col]; // fill the row with col elements
  227 + }
  228 + row++; // Keep track of actual row
  229 + }
  230 + }
  231 + else cout << "Unable to open file" << endl;
  232 + file.close();
  233 +
  234 + size_t totvars = DAT[1].size(); //column number
  235 + size_t tottuples = DAT.size();//row number
  236 +
  237 +
  238 +
  239 +
  240 + //Ask User for the order they would like to test Order starts at 0 e.g. 0,1,2 is valid order for three variables
  241 + int order_i;
  242 + vector <int> order;
  243 + std::cout << "Which is the order you would like to test?: ";
  244 + for (int i = 0; i < totvars;++i) {
  245 + std::cin >> order_i;
  246 + order.push_back(order_i);
  247 + }
  248 + std::cout << endl;
  249 + //Ask user for max value and min value of each variable being inputed
  250 + vector <int> max_cat;
  251 + vector <int> min_cat;
  252 + int max_cat_input;
  253 + int min_cat_input;
  254 +
  255 + std::cout << "What is the maximum categorical value of each variable?: ";
  256 + for (int i = 0; i < totvars; ++i) {
  257 + std::cin >> max_cat_input;
  258 + max_cat.push_back(max_cat_input);
  259 + }
  260 + for (size_t i = 0; i < max_cat.size();++i) {
  261 + std::cout << max_cat[i] << " ";
  262 + }
  263 + std::cout << endl;
  264 + std::cout << endl;
  265 + std::cout << "What is the minimum categorical value of each variable?: ";
  266 + for (int i = 0; i < totvars; ++i) {
  267 + std::cin >> min_cat_input;
  268 + min_cat.push_back(min_cat_input);
  269 + }
  270 + for (size_t i = 0; i < min_cat.size();++i) {
  271 + std::cout << min_cat[i] << " ";
  272 + }
  273 + std::cout << endl;
  274 + //Set the maximum for the amount of parents for any given variable
  275 + unsigned int maxparents;
  276 + std::cout << "What is the maximum amount of parents to be considered for any given variable?: ";
  277 + std::cin >> maxparents;
  278 + std::cout << endl;
  279 +
  280 + double PERCENT;
  281 + std::cout << "What is the percentage (Please use the format of 98.99, not 0.9899)?: ";
  282 + std::cin >> PERCENT;
  283 + std::cout << endl;
  284 +
  285 + //Time that program begins
  286 + time_t start = time(0);
  287 + char* dt_start = ctime(&start);
  288 + std::cout << "The local date and time is: " << dt_start << std::endl;
  289 +
  290 +
  291 + //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category:
  292 + //categories in i
  293 + vector <int> catsi;
  294 + //total combinations of variables
  295 + //int totcombos = 1;
  296 + //how many catagori for every variable
  297 + for ( int i = 0; i < totvars; ++i) {
  298 + catsi.push_back((max_cat[i] - min_cat[i]) + 1);
  299 + //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1);
  300 + }
  301 + /*size_t mincatsi = 10000, mincatvar;
  302 + for (size_t i = 0; i < catsi.size(); ++i) {
  303 + if (catsi[i] < mincatsi) {
  304 + mincatsi = catsi[i];
  305 + mincatvar = i;
  306 + }
  307 + }*/
  308 + //print out catsi
  309 + for (size_t i = 0; i < catsi.size();++i) {
  310 + std::cout << catsi[i] << " ";
  311 + }
  312 + std::cout << endl;
  313 +
  314 + //Total Families Ui,alpha for a particular variable in the order
  315 + vector <unsigned long long> families;//is vector, first element is the number of parentset for the first variabel......
  316 + for (unsigned int i = 0; i < totvars; ++i) {
  317 + int numparents = i;
  318 + if (numparents == 0) {
  319 + families.push_back(1);
  320 + }
  321 + else {
  322 +
  323 + unsigned long long numfams = 0;
  324 + for (unsigned int j = 0; j <= i; ++j) {
  325 + if (j <= maxparents) {
  326 + unsigned long long jFactorial = 1;
  327 + unsigned long long ijFactorial = 1;
  328 + unsigned long long iFactorial = 1;
  329 + //Calculate j!
  330 + for (unsigned int g = 0; g <= j; ++g) {
  331 + if (g != 0) {
  332 + jFactorial *= g;
  333 + }
  334 + }
  335 + //Calculate i!
  336 + for (unsigned int g = 0; g <= i; ++g) {
  337 + if (g != 0) {
  338 + iFactorial *= g;
  339 + }
  340 + }
  341 + //Calculate (i-j)!
  342 + for (unsigned int g = 0; g <= (i - j); ++g) {
  343 + if (g != 0) {
  344 + ijFactorial *= g;
  345 + }
  346 + }
  347 + numfams += (iFactorial) / (jFactorial * ijFactorial);
  348 + }
  349 + else {
  350 + break;
  351 + }
  352 + }
  353 + families.push_back(numfams);
  354 + }
  355 + }
  356 +
  357 +
  358 + //How many parent combinations for each step? As well as there counts
  359 + vector< vector <int> > ParentCombos;
  360 + vector< vector <int> > fullNijkvector;
  361 + vector< vector <string> > indexofvar; // This is label or index for each variable.
  362 + for (size_t i = 0; i < order.size(); ++i) {
  363 + //i represents the order of the variable
  364 + if (i == 0) {
  365 + vector <int> tmp,Nijkovercombos1;
  366 + vector <string> tempstring;
  367 + tempstring.push_back("[0]");
  368 + tmp.push_back(1);
  369 + ParentCombos.push_back(tmp);
  370 + //counting the amount of times that a value of the first variable in the order occurs
  371 + //this starts with the maximum value for that variable
  372 + for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) {
  373 + //hello cycles through the categories of the first variable in the order
  374 + int Nijk1 = 0;
  375 + //green cycles through tuples
  376 + for (int green = 0; green < tottuples; ++green){
  377 + if (DAT[green][order[0]] == hello) {
  378 + Nijk1 += 1;
  379 + }
  380 + }
  381 + Nijkovercombos1.push_back(Nijk1);
  382 + }
  383 + fullNijkvector.push_back(Nijkovercombos1);
  384 + indexofvar.push_back(tempstring);
  385 +
  386 + }
  387 + else {
  388 + vector <int> tmp,Nijkovercombos1;
  389 + vector <string> tempstring;
  390 + string tempa = "[" + int_to_str(i) +"]";
  391 + tempstring.push_back(tempa);
  392 + tmp.push_back(1);
  393 + int numparnts = i;
  394 + //counting the amount of times that a value of the last variable in the current order size occurs
  395 + //this starts with the maximum value for that variable
  396 + for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) {
  397 + //hello cycles through the categories of the first variable in the order
  398 + int Nijk1 = 0;
  399 + //green cycles through tuples
  400 + for (int green = 0; green < tottuples; ++green) {
  401 + if (DAT[green][order[numparnts]] == hello) {
  402 + Nijk1 += 1;
  403 + }
  404 + }
  405 + Nijkovercombos1.push_back(Nijk1);
  406 + }
  407 + fullNijkvector.push_back(Nijkovercombos1);
  408 +
  409 + //j representing the number of parents
  410 + for (int it = 1; it <= numparnts; ++it) {
  411 + //(333)Creating a vector that uses the right combination
  412 + double Nloopy = 0, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1;
  413 + /*std::cout << "This is for " << numparnts << " choose " << it << endl;
  414 + std::cout << "The iteration number is: " << it << endl;*/
  415 + //Accounting for the limit of parent quantity
  416 + if (it > maxparents) {
  417 + break;
  418 + }
  419 + else {
  420 + vector <int> NewMat(numparnts, 0);
  421 + for (int p = 0; p < it; ++p) {
  422 + NewMat[p] = 1;
  423 + }
  424 + for (int g = 2; g <= numparnts; ++g) {
  425 + NcolFactorial *= g;
  426 + }
  427 + for (int g = 2; g <= it; ++g) {
  428 + iFactorial *= g;
  429 + }
  430 + for (int g = 2; g <= (numparnts - it); ++g) {
  431 + NiFactorial *= g;
  432 + }
  433 + //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts
  434 + Nloopy = NcolFactorial / (iFactorial * NiFactorial);
  435 + for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) {
  436 + int combsparents = 1;
  437 + vector <int> parsetv;
  438 + for (int par = 0; par < NewMat.size(); ++par){
  439 + if (NewMat[par] == 1){
  440 + parsetv.push_back(par);
  441 + }
  442 + }
  443 +
  444 + string tempstring2;
  445 +
  446 + for (int par2 = 0; par2 < parsetv.size(); ++par2){
  447 + if(par2+1==parsetv.size()){
  448 + tempstring2 = tempstring2 + int_to_str(parsetv[par2]);
  449 + //tempstring2 = tempstring2 +","+"|" + int_to_str(i);
  450 + tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]";
  451 + }
  452 + else{
  453 + //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+",";
  454 + tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":";
  455 + }
  456 +
  457 +
  458 + }
  459 + tempstring.push_back(tempstring2);
  460 +
  461 + /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl;
  462 + std::cout << "Here comes the NewMat:" << endl;*/
  463 + //(444)This sets up the process for changing
  464 + //PosOne tells me the position of the last one in the vector
  465 + //We want to change when the position is the last position available in the vector
  466 + int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0;
  467 + int SumOnes3 = 0, SumOnes4 = 0, SumY = 0;
  468 + for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) {
  469 + if (NewMat[PosOne] == 1) {
  470 + break;
  471 + }
  472 + }
  473 + for (int y = (numparnts - 1); y >= (numparnts - it); --y) {
  474 + //SumOnes tells you the amount of ones in the last i columns
  475 + //These are the last columns being considered
  476 + SumOnes += NewMat[y];
  477 + }
  478 + for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) {
  479 + //SumOnes2 tells you the amount of ones before you reach the next zero
  480 + //PosOne2 keeps track of the position of the coming zero
  481 + SumOnes2 += NewMat[PosOne2];
  482 + if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) {
  483 + break;
  484 + }
  485 + }
  486 + for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) {
  487 + //FrstOne tells you the position of the first number 1 starting from the left hand side
  488 + if (NewMat[FrstOne] == 1) {
  489 + break;
  490 + }
  491 + }
  492 + for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) {
  493 + //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions
  494 + SumOnes4 += NewMat[x];
  495 + }
  496 + //Prints out NewMat
  497 + /*for (int u = 0; u < numparnts; ++u) {
  498 + std::cout << NewMat[u] << " ";
  499 + }
  500 + std::cout << endl;*/
  501 +
  502 +
  503 + //Adding in the code that will allow counts parent combinations for this particular variable
  504 + for (int q = 0; q < i; ++q) {
  505 + if (NewMat[q] == 1) {
  506 + combsparents *= catsi[order[q]];
  507 + }
  508 + }
  509 + tmp.push_back(combsparents);//made the whole parentset configure for a variable
  510 + /*std::cout << "This is combsparents: " << combsparents << endl;
  511 + std::cout << endl;*/
  512 + vector <int> hvect;
  513 + //hvect tells us which variables are being considered always the last variable is being considered
  514 + //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only
  515 + //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 >
  516 + //for A C hvect will be < 0 2 >
  517 + for (int h = 0; h < i; ++h) {
  518 + if (NewMat[h] == 1) {
  519 + hvect.push_back(h);
  520 + }
  521 + }
  522 + hvect.push_back(numparnts);
  523 + size_t shvect = hvect.size();
  524 + //Prints out hvect
  525 + /*for (int u = 0; u < shvect; ++u) {
  526 + std::cout << hvect[u] << " ";
  527 + }*/
  528 + //std::cout << endl;
  529 + //Counting the amount of values in the data that have that particular parent combination
  530 + vector <int> Nijkovercombos;
  531 + for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) {
  532 + //(333)Creating a vector that uses the right combination
  533 + /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/
  534 + vector <int> Test(shvect, last), maxtest;
  535 + for (int p = 0; p < (shvect-1); ++p) {
  536 + Test[p] = max_cat[order[hvect[p]]];
  537 + }
  538 + maxtest = Test;
  539 + for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) {
  540 + //std::cout << endl;
  541 + /*std::cout << endl;
  542 + std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl;
  543 + std::cout << "Here comes the Test:" << endl;*/
  544 + //(444)This sets up the process for changing
  545 + //NMpos tells me the position of the last non minimum value in the vector
  546 + //We want to change when the position is the last position available in the vector
  547 + int NMpos = 0, minpos = 0;
  548 + for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) {
  549 + if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) {
  550 + break;
  551 + }
  552 + }
  553 + for (minpos = (shvect - 2); minpos >= 0; --minpos) {
  554 + //minpos tells you the position of the last minimum value
  555 + if (Test[minpos] == min_cat[order[hvect[minpos]]]) {
  556 + break;
  557 + }
  558 + }
  559 + //Prints out Test
  560 + /*for (int u = 0; u < shvect; ++u) {
  561 + std::cout << Test[u] << " ";
  562 + }
  563 + std::cout << endl;
  564 + std::cout << endl;
  565 + std::cout << endl;*/
  566 + //Count how many occurrences of the value are present in the data
  567 + int Nijk = 0;
  568 + for (int num2size = 0; num2size < tottuples; ++num2size) {
  569 + int countcorrect = 0;
  570 + for (size_t g = 0; g < Test.size(); ++g) {
  571 + //num2size cycles through tuples
  572 + //order[hvect[g]] represents the variable in the order that we are considering as a parent
  573 + if (DAT[num2size][order[hvect[g]]] == Test[g]) {
  574 + countcorrect += 1;
  575 + }
  576 + }
  577 + if (countcorrect == Test.size()) {
  578 + Nijk += 1;
  579 + }
  580 + }
  581 + //Nijkovercombos displays data as follows
  582 + //it starts with the smallest value for the last variable in hvect
  583 + //and the largest values in the first n-1 variables in hvect
  584 + //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0
  585 + //count,count,count,count e.g. 13, 2, 2, 3, 4, 10
  586 + Nijkovercombos.push_back(Nijk);
  587 + //(666)Now that the values have been calculated find out what the next combination of variables should be
  588 + if ((NMpos == -1) & (minpos == (shvect - 2))) {
  589 + //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000
  590 + break;
  591 + }
  592 + if (minpos < NMpos) {
  593 + Test[NMpos] = Test[NMpos] - 1;
  594 + }
  595 + else if (NMpos < minpos) {
  596 + Test[NMpos] = Test[NMpos] - 1;
  597 + for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) {
  598 + Test[filler] = maxtest[filler];
  599 + }
  600 + }
  601 + }
  602 + }
  603 + fullNijkvector.push_back(Nijkovercombos);
  604 + //(666)Now that the unique values have been calculated find out what the next combination of variables should be
  605 + if ((PosOne == (numparnts - 1)) & (SumOnes == it)) {
  606 + break;
  607 + }
  608 + else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) {
  609 + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
  610 + //NxtOne tells you the position of the next closest number 1 that we would
  611 + //like to change the position of (we will call it the important number one)
  612 + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
  613 + SumOnes3 += NewMat[NxtOne];
  614 + if (SumOnes3 == (SumOnes2 + 1)) {
  615 + break;
  616 + }
  617 + }
  618 + if (SumOnes4 == (it - 1)) {
  619 + //If all except one of the 1's are found in the last it - 1 columns
  620 + for (int x = 0; x < numparnts; ++x) {
  621 + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) {
  622 + //If
  623 + NewMat[x] = 1;
  624 + }
  625 + else {
  626 + NewMat[x] = 0;
  627 + }
  628 + }
  629 + }
  630 + else {
  631 + for (int x = 0; x < numparnts; ++x) {
  632 + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) {
  633 + //If the position is that of the first 1 or it falls between the changed number one and the total
  634 + //amount of ones that are on that side of the zero 10111
  635 + NewMat[x] = 1;
  636 + }
  637 + else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) {
  638 + //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing
  639 + //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right
  640 + NewMat[x] = 1;
  641 + }
  642 + else {
  643 + NewMat[x] = 0;
  644 + }
  645 + }
  646 + }
  647 + }
  648 + else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) {
  649 + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
  650 + //NxtOne tells you the position of the next closest number 1 that we would
  651 + //like to change the position of (we will call it the important number one)
  652 + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
  653 + SumOnes3 += NewMat[NxtOne];
  654 + if (SumOnes3 == 1) {
  655 + break;
  656 + }
  657 + }
  658 + if (it != 1) {
  659 + for (int x = 0; x < numparnts; ++x) {
  660 + if (x == (NxtOne + 1)) {
  661 + NewMat[x] = 1;
  662 + }
  663 + else if (x == NxtOne) {
  664 + NewMat[x] = 0;
  665 + }
  666 + else if ((NewMat[x] == 1) & (x != NxtOne)) {
  667 + NewMat[x] = 1;
  668 + }
  669 + else {
  670 + NewMat[x] = 0;
  671 + }
  672 + }
  673 + }
  674 + else {
  675 + for (int x = 0; x < numparnts; ++x) {
  676 + if ((x == (NxtOne + 1))) {
  677 + NewMat[x] = 1;
  678 + }
  679 + else {
  680 + NewMat[x] = 0;
  681 + }
  682 + }
  683 + }
  684 + }
  685 + }
  686 +
  687 + }
  688 + }
  689 + ParentCombos.push_back(tmp);
  690 + indexofvar.push_back(tempstring);
  691 + }
  692 +
  693 + }
  694 + //std::cout << endl;
  695 + std::cout << endl;
  696 + //printing out the ParentCombos matrix just created above
  697 + /*for (size_t i = 0; i < ParentCombos.size(); ++i) {
  698 + for (size_t j = 0; j < ParentCombos[i].size(); ++j) {
  699 + std::cout << ParentCombos[i][j] << " ";
  700 + }
  701 + std::cout << endl;
  702 + }*/
  703 + std::cout << endl;
  704 + //printing out the fullNijkvector matrix just created above
  705 + /*for (size_t i = 0; i < fullNijkvector.size(); ++i) {
  706 + for (size_t j = 0; j < fullNijkvector[i].size(); ++j) {
  707 + std::cout << fullNijkvector[i][j] << " ";
  708 + }
  709 + std::cout << endl;
  710 + }
  711 + std::cout << endl;*/
  712 + //Print out Data
  713 + /*for (int i = 0; i < DAT.size(); ++i) {
  714 + for (int j = 0; j < DAT[i].size(); ++j) {
  715 + std::cout << DAT[i][j] << " ";
  716 + }
  717 + std::cout << endl;
  718 + }*/
  719 + //Print out the families size
  720 + /*for (size_t i = 0; i < families.size(); ++i) {
  721 + std::cout << families[i] << " ";
  722 + }*/
  723 +
  724 + //Obtaining the actual score from this information
  725 + //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting
  726 + //the first variable in the order)
  727 + //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha
  728 + int posinfull = 0;
  729 + //finlogscore is the final score in natural log format
  730 + long double finlogscore = 0.0;
  731 + vector< vector <double> > vecvarparset;
  732 + for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) {
  733 + //sumovUialpha is the the sum over all parent sets for a particular variable
  734 + long double sumovUialpha = 0.0;
  735 + //vector of all values of seclastgamma
  736 + vector <double> vec2ndlastgamma;
  737 + long double maxseclastgamma;
  738 + //Uialpha cycles through all the parent sets for a particular family
  739 + for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) {
  740 + // nijkprime represents the value of 1/(ri * qi)
  741 + long double nijkprime, nijprime;
  742 + long double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha];
  743 +
  744 + nijprime = 1.0 / (PCs);
  745 + nijkprime = 1.0 / (rij * PCs);
  746 + //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic
  747 + long double seclastgamma = 0.0;
  748 + //qi_Uialpha cycles through the combinations for the parents in a set
  749 + for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) {
  750 + long double lastgamma = 0.0;
  751 + long double nij = 0.0;
  752 + //countijk cycles through the categories of the variable with a particular order
  753 + //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to
  754 + //and then find the categories for it
  755 + for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) {
  756 + long double topy;
  757 + //rightcol lets you find the right column/position of the value that you need for a particular category within the
  758 + int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]);
  759 + nij += fullNijkvector[posinfull][rightcol];
  760 + topy = (nijkprime + fullNijkvector[posinfull][rightcol]);
  761 +
  762 + //Using boost lgamma function for the product over categories and parent combinations
  763 + lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime);
  764 +
  765 + }
  766 + long double boty = nij + nijprime;
  767 + seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty);
  768 +
  769 + }
  770 + vec2ndlastgamma.push_back(seclastgamma);
  771 +
  772 +
  773 +
  774 + //Calculate sumovUialpha based on the logsumexp concept
  775 + if (Uialpha + 1 == families[varinorder]) {
  776 +
  777 + for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) {
  778 + //change the value of maxseclastgamma if new value is larger than the previous value
  779 + if (que == 0) {
  780 + maxseclastgamma = vec2ndlastgamma[0];
  781 + }
  782 + else {
  783 + if (maxseclastgamma < vec2ndlastgamma[que]) {
  784 + maxseclastgamma = vec2ndlastgamma[que];
  785 + }
  786 + }
  787 + }
  788 + for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) {
  789 + sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma);
  790 + }
  791 + //add info on parent set scores for each variable to this vector of vectors
  792 + vecvarparset.push_back(vec2ndlastgamma);
  793 + }
  794 +
  795 + /*std::cout << endl;
  796 + std::cout << seclastgamma;
  797 + std::cout << endl;*/
  798 + posinfull += 1;
  799 + //std::cout << posinfull << endl;
  800 + }
  801 + finlogscore += log(sumovUialpha) + maxseclastgamma;
  802 + }
  803 +
  804 + vector < vector<string> > parSet;
  805 + vector< map <double, string, greater <double> > > parSetScoreSorted;
  806 + vector< map <double, string, greater <double> > > strucScore;
  807 +
  808 +
  809 +//Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map.
  810 +
  811 + for (unsigned i = 0; i < indexofvar.size(); ++i){
  812 +
  813 + map <double, string, greater <double> > tempMap;
  814 + for (unsigned j=0; j< indexofvar[i].size(); ++j){
  815 + tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j]));
  816 + }
  817 + parSetScoreSorted.push_back(tempMap);
  818 +
  819 + }
  820 +
  821 +
  822 + pair <double, string> bestStrScore;
  823 + double bestScore = 0;
  824 + string bestLable;
  825 +
  826 +
  827 + for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){
  828 + map <double, string> :: iterator itr;
  829 + itr = parSetScoreSorted[i].begin();
  830 + bestScore = bestScore + (itr->first);
  831 + bestLable = bestLable +(itr->second);
  832 + }
  833 +
  834 + bestStrScore = make_pair(bestScore, bestLable);//This is the best score.
  835 +
  836 + vector < pair <double, string> > sortedStru;//This store all the structures in the percentage.
  837 + vector < vector < pair <double, string > > > deltaC;
  838 +
  839 + for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){
  840 + map <double, string> :: iterator itr0, itr1;
  841 + vector < pair <double, string > > tempDelta;
  842 + double tempDeltaS;
  843 + string tempDeltaL;
  844 + itr0 = parSetScoreSorted[l].begin();
  845 + itr1 = parSetScoreSorted[l].begin();
  846 + double tem1=exp(itr1->first), tem2 = exp(itr1->first);
  847 + for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){
  848 + tem1 = tem2;
  849 + itr1 = ++itr1;
  850 + tem2 = tem1 + exp(itr1->first);
  851 + double tem = (tem1/tem2)*100;
  852 + if(tem <= PERCENT){
  853 + tempDeltaS = (itr1->first)-(itr0->first);
  854 + tempDeltaL = itr1->second;
  855 + tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL));
  856 + }
  857 + }
  858 +
  859 + deltaC.push_back( tempDelta);
  860 + }
  861 +
  862 +
  863 + for(unsigned i=0; i< deltaC.size(); ++i){
  864 + for (unsigned j=0; j< deltaC[i].size(); ++j)
  865 + {
  866 + double score = bestStrScore.first + deltaC[i][j].first;
  867 + string lab = bestStrScore.second;
  868 + findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second);
  869 + sortedStru.push_back(make_pair(score, lab));
  870 + }
  871 + }
  872 +
  873 + sort(sortedStru.begin(),sortedStru.end(),compareDe);
  874 +
  875 +
  876 +
  877 +/* for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ //l means each variable
  878 + for (unsigned m = contl; m < parSetScoreSorted[l].size(); ++m){ //m means the number of parents sets
  879 + contl =m;
  880 + vector < pair <string, double > > delta; // store the different of the highest score and the second highest score
  881 + for (unsigned i = l; i < parSetScoreSorted.size(); ++i){
  882 + map <double, string> :: iterator itr0, itr1;
  883 + double tempDeltaS;
  884 + string tempDeltaL;
  885 + itr0 = parSetScoreSorted[i].begin();
  886 + itr1 = itr0;
  887 + for (int kr = 0; kr < contl; ++kr){
  888 + itr1 = itr0++;
  889 + }
  890 + tempDeltaS = (itr1->first)-(itr0->first);
  891 + tempDeltaL = int_to_str(i);
  892 + delta.push_back(make_pair(tempDeltaL, tempDeltaS));
  893 + }
  894 +
  895 + sort(delta.begin(),delta.end(),compareI);
  896 +
  897 + double const stopLimit = PERCENT;
  898 + double tempbeforeS = bestStrScore.first, temafterS;
  899 +
  900 + for (unsigned i = 0; i < delta.size(); ++i){
  901 + pair <double, string > temppair;
  902 + double tempLime;
  903 + int ind = str_to_int(delta[i].first);
  904 + string s = parSetScoreSorted[ind].begin()->second;
  905 + string sRep = (++parSetScoreSorted[ind].begin())->second;
  906 + temppair = bestStrScore;
  907 + findAndReplaceAll(temppair.second, s, sRep);
  908 + temppair.first = temppair.first - delta[i].second;
  909 +
  910 + temafterS = logAB(tempbeforeS, temppair.first);
  911 +
  912 + tempLime = persentageXofY(temafterS, tempbeforeS) ;
  913 +
  914 + tempbeforeS = temafterS;
  915 +
  916 + if(tempLime > stopLimit){
  917 + goto finish;
  918 + }
  919 +
  920 + sortedStru.push_back(temppair);
  921 +
  922 + // std::cout << ind << " "<< s << " "<< sRep << " "<< temppair.first << " " << temppair.second <<endl;
  923 + // std::cout << temppair.first << " " << temppair.second <<endl;
  924 + // std::cout << std::endl;
  925 +
  926 + }
  927 +
  928 + }
  929 +
  930 + }
  931 +
  932 + finish:*/
  933 +
  934 +
  935 + std::cout << std::endl;
  936 + std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl;
  937 + std::cout << std::endl;
  938 +
  939 +
  940 + cout << "Best several structures are:" << endl;
  941 + cout << bestScore << " : " << bestLable << endl;
  942 +
  943 + for (unsigned i=0; i< sortedStru.size(); ++i )
  944 + {
  945 + cout << sortedStru[i].first << " : " << sortedStru[i].second << endl;
  946 + }
  947 +
  948 + std::cout << std::endl;
  949 + std::cout << std::endl;
  950 +
  951 + for(unsigned i=0; i< parSetScoreSorted.size(); ++i){
  952 + map <double, string> :: iterator itr;
  953 + std::cout << "For variable "<< i <<":"<< endl;
  954 + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr)
  955 + {
  956 + cout << itr->second << " : " << itr->first << "; ";
  957 + }
  958 + cout << endl;
  959 + cout << endl;
  960 +
  961 + }
  962 +
  963 +
  964 +/* ofstream myfile;
  965 + myfile.open("/home/zgong001/Documents/SprinklerDataset/bestStructure.txt");
  966 + if (myfile.is_open())
  967 + {
  968 + myfile << bestLable << " : " << bestScore << sortedStru.size();
  969 + myfile << "\n";
  970 +
  971 + for(unsigned i=0; i< sortedStru.size(); ++i){
  972 + myfile << sortedStru[i].second << " : " << sortedStru[i].first;
  973 + myfile << "\n";
  974 + }
  975 + }
  976 + else cout << "Unable to open file";
  977 + myfile.close();*/
  978 +
  979 +
  980 + //time after completion
  981 + time_t later = time(0);
  982 + char* dt_later = ctime(&later);
  983 + std::cout << "The local date and time is: " << dt_later << std::endl;
  984 +
  985 + std::cout << std::endl;
  986 + std::cout << std::endl;
  987 +
  988 +
  989 + std::cin.clear();
  990 + std::cin.ignore();
  991 + std::cin.get();
  992 + return 0;
  993 +}