Commit 53863fc1fc8d2efea4e4e5ed2a81265a6490022c

Authored by Zhenghua Gong
1 parent 7f982ea62a
Exists in master

Get structures from orders

Showing 1 changed file with 1030 additions and 0 deletions   Show diff stats
... ... @@ -0,0 +1,1030 @@
  1 +#include <string>
  2 +#include <vector>
  3 +#include <numeric>
  4 +#include <fstream>
  5 +#include <iterator>
  6 +#include <iostream>
  7 +#include <utility>
  8 +#include <iomanip>
  9 +#include <ctime>
  10 +#include <sstream>
  11 +#include <boost/math/special_functions.hpp>
  12 +#include <boost/lexical_cast.hpp>
  13 +#include <boost/algorithm/string/replace.hpp>
  14 +#include <boost/multiprecision/cpp_int.hpp>
  15 +
  16 +#include <stdio.h>
  17 +#include <math.h>
  18 +#include <algorithm> // std::find
  19 +#include <map>
  20 +#include <iterator>
  21 +#include <bits/stdc++.h>
  22 +
  23 +using namespace std;
  24 +
  25 +//Function for calculating the binomial coefficient
  26 +boost::multiprecision::cpp_int BinomialCoefficient(unsigned int n, unsigned int k) {
  27 + if (k == 0) { return 1; }
  28 + else { return (n * BinomialCoefficient(n - 1, k - 1)) / k; }
  29 +}
  30 +struct compare {
  31 + bool operator()(const std::string& first, const std::string& second) {
  32 + if(first.size() == second.size())
  33 + return first < second;
  34 + else
  35 + return first.size() < second.size();
  36 + }
  37 +};
  38 +
  39 +string int_to_str(int num)
  40 +{
  41 + stringstream ss;
  42 +
  43 + ss << num;
  44 +
  45 + return ss.str();
  46 +};
  47 +
  48 +
  49 +int str_to_int(string st)
  50 +{
  51 + int result;
  52 +
  53 + stringstream(st) >> result;
  54 +
  55 + return result;
  56 +};
  57 +
  58 +double str2doub(std::string str)
  59 +{
  60 + double d;
  61 + std::stringstream(str) >> d;
  62 + return d;
  63 +}
  64 +
  65 +bool compareI(const pair<string, double>&i, const pair<string, double>&j)
  66 +{
  67 + return i.second < j.second;
  68 +}
  69 +
  70 +bool compareD(const pair<string, double>&i, const pair<string, double>&j)
  71 +{
  72 + return i.second > j.second;
  73 +}
  74 +
  75 +bool compareIn(const pair<double, string>&i, const pair<double, string>&j)
  76 +{
  77 + return i.first < j.first;
  78 +}
  79 +
  80 +bool compareDe(const pair<double, string>&i, const pair<double, string>&j)
  81 +{
  82 + return i.first > j.first;
  83 +}
  84 +
  85 +double logAB (double x, double y)
  86 +{
  87 + double result;
  88 + double maxVal = max(x,y);
  89 +
  90 + if(maxVal == x)
  91 + {
  92 + result = maxVal + log(1+exp(y-maxVal));
  93 + }
  94 + else
  95 + {
  96 + result = maxVal + log(1+exp(x-maxVal));
  97 + }
  98 + return result;
  99 +}
  100 +
  101 +double persentageXofY (double newS, double oldS)
  102 +{
  103 + double result;
  104 + result = exp(oldS-newS)*100;
  105 + return result;
  106 +}
  107 +
  108 +void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr)
  109 +{
  110 + // Get the first occurrence
  111 + size_t pos = data.find(toSearch);
  112 +
  113 + // Repeat till end is reached
  114 + while( pos != std::string::npos)
  115 + {
  116 + // Replace this occurrence of Sub String
  117 + data.replace(pos, toSearch.size(), replaceStr);
  118 + // Get the next occurrence from the current position
  119 + pos =data.find(toSearch, pos + toSearch.size());
  120 + }
  121 +}
  122 +
  123 +string findMB(string structures, string variable){
  124 + string MB;
  125 +
  126 +
  127 + return MB;
  128 +}
  129 +
  130 +int main() {
  131 +
  132 + string FILENAME; // Downloads/D50C9v.txt
  133 + int COLS; // 9
  134 + vector <int> max_cat; // 1 2 1 3 2 1 2 2 3
  135 + vector <int> min_cat; // 0 0 0 0 0 0 0 0 0
  136 + unsigned int maxparents; // 4
  137 + vector <int> order; //2 6 8 4 1 0 3 5 7
  138 + double PERCENT;
  139 + string filepath; // output file name
  140 +
  141 + vector<string> settings;
  142 + vector<bool> settingsFlag;
  143 +
  144 + settings.push_back("DataFile");
  145 + settingsFlag.push_back(false);
  146 + settings.push_back("TotalVariables");
  147 + settingsFlag.push_back(false);
  148 + settings.push_back("MaximumCategory");
  149 + settingsFlag.push_back(false);
  150 + settings.push_back("MinimumCategory");
  151 + settingsFlag.push_back(false);
  152 + settings.push_back("MaximumParents");
  153 + settingsFlag.push_back(false);
  154 + settings.push_back("Order");
  155 + settingsFlag.push_back(false);
  156 + settings.push_back("Percentage");
  157 + settingsFlag.push_back(false);
  158 + settings.push_back("OutputFileName");
  159 + settingsFlag.push_back(false);
  160 +
  161 + fstream infile;
  162 + string ConfigFile;
  163 + std::cout << "What is the location of the configuration file that you edited?: ";
  164 + std::cin >> ConfigFile;
  165 + std::cout << std::endl;
  166 + //string ConfigFile = "/home/efraingonzalez0/Downloads/new.config";
  167 + infile.open(ConfigFile.c_str(),ios::in);
  168 + vector<string> line;
  169 + if(infile.is_open()) {
  170 + std::cout << "Configuration file correctly opened" << std::endl;
  171 + while(!infile.eof()) {
  172 + string tmp;
  173 + getline(infile,tmp);
  174 + line.push_back(tmp);
  175 + }
  176 + }
  177 + else {
  178 + std::cout << "Unable to open configuration file" << std::endl;
  179 + exit(0);
  180 + }
  181 +
  182 + infile.close();
  183 + line.erase(line.end());
  184 + for(size_t k = 0; k < settings.size(); ++k) {
  185 + for(size_t i = 0; i < line.size(); ++i) {
  186 + size_t foundit = line[i].find(settings[k]);
  187 + if (foundit != std::string::npos) {
  188 + //find position of "="
  189 + unsigned int poseq = line[i].find("=");
  190 + unsigned int posneqfir,posneqlas;
  191 + //first position of relevant string
  192 + for(unsigned int j = poseq + 1; j < line[i].length(); ++j) {
  193 + if(line[i].at(j) != ' ') {
  194 + posneqfir = j;
  195 + break;
  196 + }
  197 + }
  198 + //last position of relevant string
  199 + for(unsigned int j2 = line[i].length() - 1; j2 >= posneqfir; ++j2) {
  200 + if((line[i].at(j2) != ' ') && (line[i].at(j2) != '\n')){
  201 + posneqlas = j2;
  202 + break;
  203 + }
  204 + }
  205 +
  206 + if(settings[k] == "DataFile") {
  207 + FILENAME = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
  208 + }
  209 + else if(settings[k] == "TotalVariables") {
  210 + COLS = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1));
  211 + }
  212 + else if(settings[k] == "MaximumCategory") {//still need to edit
  213 + string max_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
  214 + std::stringstream macs(max_cat_string);
  215 + int macsti;
  216 + while( macs >> macsti) {
  217 + max_cat.push_back(macsti);
  218 + }
  219 + }
  220 + else if(settings[k] == "MinimumCategory") {//still need to edit
  221 + string min_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
  222 + std::stringstream mics(min_cat_string);
  223 + int micsti;
  224 + while( mics >> micsti) {
  225 + min_cat.push_back(micsti);
  226 + }
  227 + }
  228 +
  229 + else if(settings[k] == "MaximumParents") {
  230 + maxparents = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1));
  231 + }
  232 +
  233 + else if(settings[k] == "Order") {//still need to edit
  234 + string st_ord_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
  235 + std::stringstream starordr(st_ord_string);
  236 + int StOrti;
  237 + while( starordr >> StOrti) {
  238 + order.push_back(StOrti);
  239 + }
  240 + }
  241 +
  242 + else if(settings[k] == "Percentage") {
  243 + PERCENT = str2doub(line[i].substr(posneqfir,posneqlas - posneqfir + 1));
  244 + }
  245 +
  246 + else if(settings[k] == "OutputFileName") {
  247 + filepath = line[i].substr(posneqfir,posneqlas - posneqfir + 1);
  248 + }
  249 +
  250 + settingsFlag[k] = true;
  251 + break;
  252 + }
  253 + }
  254 + }
  255 +
  256 + for(int k=0;k<settings.size();k++) {
  257 + if(!settingsFlag[k]) {
  258 + std::cout << "Error: "<< settings[k] << " is not configured." << std::endl;
  259 + exit(0);
  260 + }
  261 + else if(max_cat.size() != COLS) {
  262 + std::cout << "Error: Size of MaximumCategory and the number of TotalVariables do not match" << std::endl;
  263 + exit(0);
  264 + }
  265 + else if(min_cat.size() != COLS) {
  266 + std::cout << "Error: Size of MinimumCategory and the number of TotalVariables do not match" << std::endl;
  267 + exit(0);
  268 + }
  269 + else if(max_cat.size() != min_cat.size()) {
  270 + std::cout << "Error: Size of MaximumCategory and MinimumCategory do not match" << std::endl;
  271 + exit(0);
  272 + }
  273 + else if(PERCENT > 100.0){
  274 + std::cout << "Error: Percentage may not be larger than 100.0 which represents 100%. If you need a difference larger than 100% then use a negative number." << std::endl;
  275 + exit(0);
  276 + }
  277 + }
  278 +
  279 +
  280 +
  281 + fstream file;
  282 + std::cout << endl;
  283 + vector < vector <int> > DAT; // 2d array as a vector of vectors
  284 + vector <int> rowVector(COLS); // vector to add into 'array' (represents a row)
  285 + int row = 0; // Row counter
  286 +
  287 + // Read file
  288 + file.open(FILENAME.c_str(), ios::in); // Open file
  289 + if (file.is_open()) { // If file has correctly opened...
  290 + // Output debug message
  291 + cout << "File correctly opened" << endl;
  292 +
  293 + // Dynamically store data into array
  294 + while (file.good()) { // ... and while there are no errors,
  295 + DAT.push_back(rowVector); // add a new row,
  296 + for (int col = 0; col<COLS; col++) {
  297 + file >> DAT[row][col]; // fill the row with col elements
  298 + }
  299 + row++; // Keep track of actual row
  300 + }
  301 + }
  302 + else cout << "Unable to open file" << endl;
  303 + file.close();
  304 + DAT.erase(DAT.end());
  305 +
  306 + size_t totvars = DAT[1].size(); //column number
  307 + size_t tottuples = DAT.size();//row number
  308 +
  309 +
  310 + std::cout << endl;
  311 +
  312 +
  313 + //Time that program begins
  314 + time_t start = time(0);
  315 + char* dt_start = ctime(&start);
  316 + std::cout << "The local date and time is: " << dt_start << std::endl;
  317 +
  318 +
  319 + //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category:
  320 + //categories in i
  321 + vector <int> catsi;
  322 + //total combinations of variables
  323 + //int totcombos = 1;
  324 + //how many catagori for every variable
  325 + for ( int i = 0; i < totvars; ++i) {
  326 + catsi.push_back((max_cat[i] - min_cat[i]) + 1);
  327 + //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1);
  328 + }
  329 +
  330 + //print out catsi
  331 + for (size_t i = 0; i < catsi.size();++i) {
  332 + std::cout << catsi[i] << " ";
  333 + }
  334 + std::cout << endl;
  335 +
  336 + //Total Families Ui,alpha for a particular variable in the order
  337 + //Total Families Ui,alpha for a particular variable in the order
  338 + vector <boost::multiprecision::cpp_int> families;
  339 + for (unsigned int i = 0; i < totvars; ++i) {
  340 + int numparents = i;
  341 + if (numparents == 0) {
  342 + families.push_back(1);
  343 + }
  344 + else {
  345 +
  346 + boost::multiprecision::cpp_int numfams = 0;
  347 + for (unsigned int j = 0; j <= i; ++j) {
  348 + if (j <= maxparents) {
  349 + unsigned long long jFactorial = 1;
  350 + unsigned long long ijFactorial = 1;
  351 + unsigned long long iFactorial = 1;
  352 + //Calculate j!
  353 + for (unsigned int g = 0; g <= j; ++g) {
  354 + if (g != 0) {
  355 + jFactorial *= g;
  356 + }
  357 + }
  358 + //Calculate i!
  359 + for (unsigned int g = 0; g <= i; ++g) {
  360 + if (g != 0) {
  361 + iFactorial *= g;
  362 + }
  363 + }
  364 + //Calculate (i-j)!
  365 + for (unsigned int g = 0; g <= (i - j); ++g) {
  366 + if (g != 0) {
  367 + ijFactorial *= g;
  368 + }
  369 + }
  370 + numfams += BinomialCoefficient(i,j);
  371 + }
  372 + else {
  373 + break;
  374 + }
  375 + }
  376 + families.push_back(numfams);
  377 + }
  378 + }
  379 +
  380 +
  381 + //How many parent combinations for each step? As well as there counts
  382 + vector< vector <int> > ParentCombos;
  383 + vector< vector <int> > fullNijkvector;
  384 + vector< vector <string> > indexofvar; // This is label or index for each variable.
  385 + for (size_t i = 0; i < order.size(); ++i) {
  386 + //i represents the order of the variable
  387 + if (i == 0) {
  388 + vector <int> tmp,Nijkovercombos1;
  389 + vector <string> tempstring;
  390 +// tempstring.push_back("[0]");
  391 + tempstring.push_back("[" + int_to_str(order[0])+ "]");
  392 + tmp.push_back(1);
  393 + ParentCombos.push_back(tmp);
  394 + //counting the amount of times that a value of the first variable in the order occurs
  395 + //this starts with the maximum value for that variable
  396 + for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) {
  397 + //hello cycles through the categories of the first variable in the order
  398 + int Nijk1 = 0;
  399 + //green cycles through tuples
  400 + for (int green = 0; green < tottuples; ++green){
  401 + if (DAT[green][order[0]] == hello) {
  402 + Nijk1 += 1;
  403 + }
  404 + }
  405 + Nijkovercombos1.push_back(Nijk1);
  406 + }
  407 + fullNijkvector.push_back(Nijkovercombos1);
  408 + indexofvar.push_back(tempstring);
  409 +
  410 + }
  411 + else {
  412 + vector <int> tmp,Nijkovercombos1;
  413 + vector <string> tempstring;
  414 +// string tempa = "[" + int_to_str(i) +"]";
  415 + string tempa = "[" + int_to_str(order[i]) +"]";
  416 + tempstring.push_back(tempa);
  417 + tmp.push_back(1);
  418 + int numparnts = i;
  419 + //counting the amount of times that a value of the last variable in the current order size occurs
  420 + //this starts with the maximum value for that variable
  421 + for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) {
  422 + //hello cycles through the categories of the first variable in the order
  423 + int Nijk1 = 0;
  424 + //green cycles through tuples
  425 + for (int green = 0; green < tottuples; ++green) {
  426 + if (DAT[green][order[numparnts]] == hello) {
  427 + Nijk1 += 1;
  428 + }
  429 + }
  430 + Nijkovercombos1.push_back(Nijk1);
  431 + }
  432 + fullNijkvector.push_back(Nijkovercombos1);
  433 +
  434 + //j representing the number of parents
  435 + for (int it = 1; it <= numparnts; ++it) {
  436 + //(333)Creating a vector that uses the right combination
  437 + boost::multiprecision::cpp_int Nloopy = 0;
  438 + //, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1;
  439 + /*std::cout << "This is for " << numparnts << " choose " << it << endl;
  440 + std::cout << "The iteration number is: " << it << endl;*/
  441 + //Accounting for the limit of parent quantity
  442 + if (it > maxparents) {
  443 + break;
  444 + }
  445 + else {
  446 + vector <int> NewMat(numparnts, 0);
  447 + for (int p = 0; p < it; ++p) {
  448 + NewMat[p] = 1;
  449 + }
  450 + /*for (int g = 2; g <= numparnts; ++g) {
  451 + NcolFactorial *= g;
  452 + }
  453 + for (int g = 2; g <= it; ++g) {
  454 + iFactorial *= g;
  455 + }
  456 + for (int g = 2; g <= (numparnts - it); ++g) {
  457 + NiFactorial *= g;
  458 + }*/
  459 + //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts
  460 + Nloopy = BinomialCoefficient(numparnts,it);
  461 + for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) {
  462 + int combsparents = 1;
  463 + vector <int> parsetv;
  464 + for (int par = 0; par < NewMat.size(); ++par){
  465 + if (NewMat[par] == 1){
  466 + parsetv.push_back(par);
  467 + }
  468 + }
  469 +
  470 + string tempstring2;
  471 +
  472 + for (int par2 = 0; par2 < parsetv.size(); ++par2){
  473 + if(par2+1==parsetv.size()){
  474 + // tempstring2 = tempstring2 + int_to_str(parsetv[par2]);
  475 + tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]]);
  476 + //tempstring2 = tempstring2 +","+"|" + int_to_str(i);
  477 + // tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]";
  478 + tempstring2 = "[" + int_to_str(order[i])+"|"+tempstring2 +"]";
  479 + }
  480 + else{
  481 + //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+",";
  482 +// tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":";
  483 + tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]])+":";
  484 + }
  485 +
  486 +
  487 + }
  488 + tempstring.push_back(tempstring2);
  489 +
  490 + /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl;
  491 + std::cout << "Here comes the NewMat:" << endl;*/
  492 + //(444)This sets up the process for changing
  493 + //PosOne tells me the position of the last one in the vector
  494 + //We want to change when the position is the last position available in the vector
  495 + int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0;
  496 + int SumOnes3 = 0, SumOnes4 = 0, SumY = 0;
  497 + for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) {
  498 + if (NewMat[PosOne] == 1) {
  499 + break;
  500 + }
  501 + }
  502 + for (int y = (numparnts - 1); y >= (numparnts - it); --y) {
  503 + //SumOnes tells you the amount of ones in the last i columns
  504 + //These are the last columns being considered
  505 + SumOnes += NewMat[y];
  506 + }
  507 + for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) {
  508 + //SumOnes2 tells you the amount of ones before you reach the next zero
  509 + //PosOne2 keeps track of the position of the coming zero
  510 + SumOnes2 += NewMat[PosOne2];
  511 + if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) {
  512 + break;
  513 + }
  514 + }
  515 + for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) {
  516 + //FrstOne tells you the position of the first number 1 starting from the left hand side
  517 + if (NewMat[FrstOne] == 1) {
  518 + break;
  519 + }
  520 + }
  521 + for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) {
  522 + //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions
  523 + SumOnes4 += NewMat[x];
  524 + }
  525 + //Prints out NewMat
  526 + /*for (int u = 0; u < numparnts; ++u) {
  527 + std::cout << NewMat[u] << " ";
  528 + }
  529 + std::cout << endl;*/
  530 +
  531 +
  532 + //Adding in the code that will allow counts parent combinations for this particular variable
  533 + for (int q = 0; q < i; ++q) {
  534 + if (NewMat[q] == 1) {
  535 + combsparents *= catsi[order[q]];
  536 + }
  537 + }
  538 + tmp.push_back(combsparents);//made the whole parentset configure for a variable
  539 + /*std::cout << "This is combsparents: " << combsparents << endl;
  540 + std::cout << endl;*/
  541 + vector <int> hvect;
  542 + //hvect tells us which variables are being considered always the last variable is being considered
  543 + //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only
  544 + //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 >
  545 + //for A C hvect will be < 0 2 >
  546 + for (int h = 0; h < i; ++h) {
  547 + if (NewMat[h] == 1) {
  548 + hvect.push_back(h);
  549 + }
  550 + }
  551 + hvect.push_back(numparnts);
  552 + size_t shvect = hvect.size();
  553 + //Prints out hvect
  554 + /*for (int u = 0; u < shvect; ++u) {
  555 + std::cout << hvect[u] << " ";
  556 + }*/
  557 + //std::cout << endl;
  558 + //Counting the amount of values in the data that have that particular parent combination
  559 + vector <int> Nijkovercombos;
  560 + for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) {
  561 + //(333)Creating a vector that uses the right combination
  562 + /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/
  563 + vector <int> Test(shvect, last), maxtest;
  564 + for (int p = 0; p < (shvect-1); ++p) {
  565 + Test[p] = max_cat[order[hvect[p]]];
  566 + }
  567 + maxtest = Test;
  568 + for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) {
  569 + //std::cout << endl;
  570 + /*std::cout << endl;
  571 + std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl;
  572 + std::cout << "Here comes the Test:" << endl;*/
  573 + //(444)This sets up the process for changing
  574 + //NMpos tells me the position of the last non minimum value in the vector
  575 + //We want to change when the position is the last position available in the vector
  576 + int NMpos = 0, minpos = 0;
  577 + for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) {
  578 + if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) {
  579 + break;
  580 + }
  581 + }
  582 + for (minpos = (shvect - 2); minpos >= 0; --minpos) {
  583 + //minpos tells you the position of the last minimum value
  584 + if (Test[minpos] == min_cat[order[hvect[minpos]]]) {
  585 + break;
  586 + }
  587 + }
  588 + //Prints out Test
  589 + /*for (int u = 0; u < shvect; ++u) {
  590 + std::cout << Test[u] << " ";
  591 + }
  592 + std::cout << endl;
  593 + std::cout << endl;
  594 + std::cout << endl;*/
  595 + //Count how many occurrences of the value are present in the data
  596 + int Nijk = 0;
  597 + for (int num2size = 0; num2size < tottuples; ++num2size) {
  598 + int countcorrect = 0;
  599 + for (size_t g = 0; g < Test.size(); ++g) {
  600 + //num2size cycles through tuples
  601 + //order[hvect[g]] represents the variable in the order that we are considering as a parent
  602 + if (DAT[num2size][order[hvect[g]]] == Test[g]) {
  603 + countcorrect += 1;
  604 + }
  605 + }
  606 + if (countcorrect == Test.size()) {
  607 + Nijk += 1;
  608 + }
  609 + }
  610 + //Nijkovercombos displays data as follows
  611 + //it starts with the smallest value for the last variable in hvect
  612 + //and the largest values in the first n-1 variables in hvect
  613 + //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0
  614 + //count,count,count,count e.g. 13, 2, 2, 3, 4, 10
  615 + Nijkovercombos.push_back(Nijk);
  616 + //(666)Now that the values have been calculated find out what the next combination of variables should be
  617 + if ((NMpos == -1) & (minpos == (shvect - 2))) {
  618 + //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000
  619 + break;
  620 + }
  621 + if (minpos < NMpos) {
  622 + Test[NMpos] = Test[NMpos] - 1;
  623 + }
  624 + else if (NMpos < minpos) {
  625 + Test[NMpos] = Test[NMpos] - 1;
  626 + for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) {
  627 + Test[filler] = maxtest[filler];
  628 + }
  629 + }
  630 + }
  631 + }
  632 + fullNijkvector.push_back(Nijkovercombos);
  633 + //(666)Now that the unique values have been calculated find out what the next combination of variables should be
  634 + if ((PosOne == (numparnts - 1)) & (SumOnes == it)) {
  635 + break;
  636 + }
  637 + else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) {
  638 + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
  639 + //NxtOne tells you the position of the next closest number 1 that we would
  640 + //like to change the position of (we will call it the important number one)
  641 + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
  642 + SumOnes3 += NewMat[NxtOne];
  643 + if (SumOnes3 == (SumOnes2 + 1)) {
  644 + break;
  645 + }
  646 + }
  647 + if (SumOnes4 == (it - 1)) {
  648 + //If all except one of the 1's are found in the last it - 1 columns
  649 + for (int x = 0; x < numparnts; ++x) {
  650 + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) {
  651 + //If
  652 + NewMat[x] = 1;
  653 + }
  654 + else {
  655 + NewMat[x] = 0;
  656 + }
  657 + }
  658 + }
  659 + else {
  660 + for (int x = 0; x < numparnts; ++x) {
  661 + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) {
  662 + //If the position is that of the first 1 or it falls between the changed number one and the total
  663 + //amount of ones that are on that side of the zero 10111
  664 + NewMat[x] = 1;
  665 + }
  666 + else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) {
  667 + //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing
  668 + //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right
  669 + NewMat[x] = 1;
  670 + }
  671 + else {
  672 + NewMat[x] = 0;
  673 + }
  674 + }
  675 + }
  676 + }
  677 + else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) {
  678 + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
  679 + //NxtOne tells you the position of the next closest number 1 that we would
  680 + //like to change the position of (we will call it the important number one)
  681 + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one
  682 + SumOnes3 += NewMat[NxtOne];
  683 + if (SumOnes3 == 1) {
  684 + break;
  685 + }
  686 + }
  687 + if (it != 1) {
  688 + for (int x = 0; x < numparnts; ++x) {
  689 + if (x == (NxtOne + 1)) {
  690 + NewMat[x] = 1;
  691 + }
  692 + else if (x == NxtOne) {
  693 + NewMat[x] = 0;
  694 + }
  695 + else if ((NewMat[x] == 1) & (x != NxtOne)) {
  696 + NewMat[x] = 1;
  697 + }
  698 + else {
  699 + NewMat[x] = 0;
  700 + }
  701 + }
  702 + }
  703 + else {
  704 + for (int x = 0; x < numparnts; ++x) {
  705 + if ((x == (NxtOne + 1))) {
  706 + NewMat[x] = 1;
  707 + }
  708 + else {
  709 + NewMat[x] = 0;
  710 + }
  711 + }
  712 + }
  713 + }
  714 + }
  715 +
  716 + }
  717 + }
  718 + ParentCombos.push_back(tmp);
  719 + indexofvar.push_back(tempstring);
  720 + }
  721 +
  722 + }
  723 + //std::cout << endl;
  724 + std::cout << endl;
  725 + //printing out the ParentCombos matrix just created above
  726 + /*for (size_t i = 0; i < ParentCombos.size(); ++i) {
  727 + for (size_t j = 0; j < ParentCombos[i].size(); ++j) {
  728 + std::cout << ParentCombos[i][j] << " ";
  729 + }
  730 + std::cout << endl;
  731 + }*/
  732 + std::cout << endl;
  733 + //printing out the fullNijkvector matrix just created above
  734 + /*for (size_t i = 0; i < fullNijkvector.size(); ++i) {
  735 + for (size_t j = 0; j < fullNijkvector[i].size(); ++j) {
  736 + std::cout << fullNijkvector[i][j] << " ";
  737 + }
  738 + std::cout << endl;
  739 + }
  740 + std::cout << endl;*/
  741 + //Print out Data
  742 + /*for (int i = 0; i < DAT.size(); ++i) {
  743 + for (int j = 0; j < DAT[i].size(); ++j) {
  744 + std::cout << DAT[i][j] << " ";
  745 + }
  746 + std::cout << endl;
  747 + }*/
  748 + //Print out the families size
  749 + /*for (size_t i = 0; i < families.size(); ++i) {
  750 + std::cout << families[i] << " ";
  751 + }*/
  752 +
  753 + //Obtaining the actual score from this information
  754 + //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting
  755 + //the first variable in the order)
  756 + //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha
  757 + int posinfull = 0;
  758 + //finlogscore is the final score in natural log format
  759 + double finlogscore = 0.0;
  760 + vector< vector <double> > vecvarparset;
  761 + for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) {
  762 + //sumovUialpha is the the sum over all parent sets for a particular variable
  763 + double sumovUialpha = 0.0;
  764 + //vector of all values of seclastgamma
  765 + vector <double> vec2ndlastgamma;
  766 + double maxseclastgamma;
  767 + //Uialpha cycles through all the parent sets for a particular family
  768 + for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) {
  769 + // nijkprime represents the value of 1/(ri * qi)
  770 + double nijkprime, nijprime;
  771 + double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha];
  772 +
  773 + nijprime = 1.0 / (PCs);
  774 + nijkprime = 1.0 / (rij * PCs);
  775 + //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic
  776 + double seclastgamma = 0.0;
  777 + //qi_Uialpha cycles through the combinations for the parents in a set
  778 + for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) {
  779 + double lastgamma = 0.0;
  780 + double nij = 0.0;
  781 + //countijk cycles through the categories of the variable with a particular order
  782 + //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to
  783 + //and then find the categories for it
  784 + for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) {
  785 + double topy;
  786 + //rightcol lets you find the right column/position of the value that you need for a particular category within the
  787 + int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]);
  788 + nij += fullNijkvector[posinfull][rightcol];
  789 + topy = (nijkprime + fullNijkvector[posinfull][rightcol]);
  790 +
  791 + //Using boost lgamma function for the product over categories and parent combinations
  792 + lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime);
  793 +
  794 + }
  795 + double boty = nij + nijprime;
  796 + seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty);
  797 +
  798 + }
  799 + vec2ndlastgamma.push_back(seclastgamma);
  800 +
  801 +
  802 +
  803 + //Calculate sumovUialpha based on the logsumexp concept
  804 + if (Uialpha + 1 == families[varinorder]) {
  805 +
  806 + for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) {
  807 + //change the value of maxseclastgamma if new value is larger than the previous value
  808 + if (que == 0) {
  809 + maxseclastgamma = vec2ndlastgamma[0];
  810 + }
  811 + else {
  812 + if (maxseclastgamma < vec2ndlastgamma[que]) {
  813 + maxseclastgamma = vec2ndlastgamma[que];
  814 + }
  815 + }
  816 + }
  817 + for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) {
  818 + sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma);
  819 + }
  820 + //add info on parent set scores for each variable to this vector of vectors
  821 + vecvarparset.push_back(vec2ndlastgamma);
  822 + }
  823 +
  824 + /*std::cout << endl;
  825 + std::cout << seclastgamma;
  826 + std::cout << endl;*/
  827 + posinfull += 1;
  828 + //std::cout << posinfull << endl;
  829 + }
  830 + finlogscore += log(sumovUialpha) + maxseclastgamma;
  831 + }
  832 +
  833 + vector < vector<string> > parSet;
  834 + vector< map <double, string, greater <double> > > parSetScoreSorted;
  835 + vector< map <double, string, greater <double> > > strucScore;
  836 +
  837 +
  838 +//Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map.
  839 +
  840 + for (unsigned i = 0; i < indexofvar.size(); ++i){
  841 +
  842 + map <double, string, greater <double> > tempMap;
  843 + for (unsigned j=0; j< indexofvar[i].size(); ++j){
  844 + tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j]));
  845 + }
  846 + parSetScoreSorted.push_back(tempMap);
  847 +
  848 + }
  849 +
  850 +
  851 + pair <double, string> bestStrScore;
  852 + double bestScore = 0;
  853 + string bestLable;
  854 +
  855 +
  856 + for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){
  857 + map <double, string> :: iterator itr;
  858 + itr = parSetScoreSorted[i].begin();
  859 + bestScore = bestScore + (itr->first);
  860 + bestLable = bestLable +(itr->second);
  861 + }
  862 +
  863 + bestStrScore = make_pair(bestScore, bestLable);//This is the best score.
  864 +
  865 +
  866 + vector < pair <double, string> > sortedStru;//This store all the structures in the percentage.
  867 + vector < vector < pair <double, string > > > deltaC;
  868 +
  869 + for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){
  870 + map <double, string> :: iterator itr0, itr1;
  871 + vector < pair <double, string > > tempDelta;
  872 + double tempDeltaS;
  873 + string tempDeltaL;
  874 + itr0 = parSetScoreSorted[l].begin();
  875 + itr1 = parSetScoreSorted[l].begin();
  876 + double tem1=itr1->first, tem2 = itr1->first;
  877 +// cout << tem1 << " : " << tem2 << endl;
  878 + for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){
  879 +// tem1 = tem2;
  880 + itr1 = ++itr1;
  881 + tem2 = itr1->first;
  882 + double tem = -log(exp(tem2-tem1)+1);
  883 + tem1 = tem1-tem;
  884 + if(tem <= log(PERCENT/10)){
  885 + tempDeltaS = (itr1->first)-(itr0->first);
  886 + tempDeltaL = itr1->second;
  887 + tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL));
  888 + }
  889 + }
  890 +
  891 + deltaC.push_back( tempDelta);
  892 + }
  893 +
  894 +
  895 + for(unsigned i=0; i< deltaC.size(); ++i){
  896 + for (unsigned j=0; j< deltaC[i].size(); ++j)
  897 + {
  898 + double score = bestStrScore.first + deltaC[i][j].first;
  899 + string lab = bestStrScore.second;
  900 + findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second);
  901 + sortedStru.push_back(make_pair(score, lab));
  902 + }
  903 + }
  904 +
  905 + sort(sortedStru.begin(),sortedStru.end(),compareDe);
  906 +
  907 + vector <double> strP;
  908 + double s = bestScore;
  909 +
  910 + for(unsigned i=0; i< sortedStru.size(); ++i ){
  911 + double s0 = sortedStru[i].first;
  912 +// s = s + exp(sortedStru[i].first);
  913 + double te = -log(exp(s0-s)+1);
  914 +
  915 + strP.push_back(te);
  916 + s = s - te;
  917 +
  918 + }
  919 +
  920 +/* double fi = s/exp(finlogscore)*100;
  921 + strP.push_back(fi);*/
  922 +
  923 + std::cout << std::endl;
  924 + std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl;
  925 + std::cout << std::endl;
  926 +
  927 +
  928 +
  929 +
  930 + cout << "Best several structures are:" << endl;
  931 + cout << bestScore << " : " << bestLable << " "<< exp(strP[0])*100 << endl;
  932 +
  933 + for (unsigned i=0; i< sortedStru.size(); ++i )
  934 + {
  935 + if(exp(strP[i+1]) > (PERCENT/100)){
  936 + break;
  937 + }
  938 + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl;
  939 +
  940 + }
  941 +
  942 +/* for (unsigned i=0; i< 5; ++i )
  943 + {
  944 +
  945 + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl;
  946 +
  947 + }*/
  948 +
  949 +/* cout << "Best several structures are:" << endl;
  950 + cout << bestScore << " : " << bestLable << " " << endl;
  951 +
  952 + for (unsigned i=0; i< sortedStru.size(); ++i )
  953 + {
  954 + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< endl;
  955 + }*/
  956 +
  957 + std::cout << std::endl;
  958 + std::cout << std::endl;
  959 +
  960 +/* for(unsigned i=0; i< parSetScoreSorted.size(); ++i){
  961 + map <double, string> :: iterator itr;
  962 + std::cout << "For variable "<< order[i] <<":"<< endl;
  963 + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr)
  964 + {
  965 + cout << itr->second << " : " << itr->first << "; ";
  966 + }
  967 + cout << endl;
  968 + cout << endl;
  969 +
  970 + }*/
  971 +
  972 +
  973 + string orderstring;
  974 + for (unsigned i=0; i< order.size(); ++i){
  975 + orderstring.append(int_to_str(order[i])+" ");
  976 + }
  977 +
  978 + ofstream myfile;
  979 + myfile.open( filepath.c_str(), ios::out | ios::app );
  980 +// cout << path << endl;
  981 + if (myfile.is_open())
  982 + {
  983 + myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " "<< bestLable << " " << bestScore << " " << exp(strP[0])*100;
  984 + myfile << "\n";
  985 +
  986 + for(unsigned i=0; i< sortedStru.size(); ++i){
  987 + if(exp(strP[i+1]) > (PERCENT/100)){
  988 + break;
  989 + }
  990 + myfile << orderstring << " " << boost::lexical_cast<string>(finlogscore) << " " << sortedStru[i].second << " " << sortedStru[i].first<< " " << exp(strP[i+1])*100;
  991 + myfile << "\n";
  992 +
  993 + }
  994 + }
  995 + else cout << "Unable to open file";
  996 + myfile.close();
  997 +
  998 +/* ofstream myfile;
  999 + myfile.open("/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2RO.txt");
  1000 + if (myfile.is_open())
  1001 + {
  1002 + for(unsigned i=0; i< parSetScoreSorted.size(); ++i){
  1003 + map <double, string> :: iterator itr;
  1004 + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr){
  1005 + myfile << itr->second << " : " << itr->first << "; " ;
  1006 + }
  1007 + myfile << "\n";
  1008 +
  1009 + }
  1010 + }
  1011 + else cout << "Unable to open file";
  1012 + myfile.close();*/
  1013 +
  1014 +
  1015 + //time after completion
  1016 + time_t later = time(0);
  1017 + char* dt_later = ctime(&later);
  1018 + std::cout << "The local date and time is: " << dt_later << std::endl;
  1019 +
  1020 + std::cout << std::endl;
  1021 + std::cout << std::endl;
  1022 +
  1023 +
  1024 + std::cin.clear();
  1025 + std::cin.ignore();
  1026 + std::cin.get();
  1027 + return 0;
  1028 +}
  1029 +
  1030 +