From 53863fc1fc8d2efea4e4e5ed2a81265a6490022c Mon Sep 17 00:00:00 2001 From: Zhenghua Gong Date: Fri, 6 Jul 2018 16:55:23 -0400 Subject: [PATCH] Get structures from orders --- GetStructure.cpp | 1030 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1030 insertions(+) create mode 100644 GetStructure.cpp diff --git a/GetStructure.cpp b/GetStructure.cpp new file mode 100644 index 0000000..016f79b --- /dev/null +++ b/GetStructure.cpp @@ -0,0 +1,1030 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include // std::find +#include +#include +#include + +using namespace std; + +//Function for calculating the binomial coefficient +boost::multiprecision::cpp_int BinomialCoefficient(unsigned int n, unsigned int k) { + if (k == 0) { return 1; } + else { return (n * BinomialCoefficient(n - 1, k - 1)) / k; } +} +struct compare { + bool operator()(const std::string& first, const std::string& second) { + if(first.size() == second.size()) + return first < second; + else + return first.size() < second.size(); + } +}; + +string int_to_str(int num) +{ + stringstream ss; + + ss << num; + + return ss.str(); +}; + + +int str_to_int(string st) +{ + int result; + + stringstream(st) >> result; + + return result; +}; + +double str2doub(std::string str) +{ + double d; + std::stringstream(str) >> d; + return d; +} + +bool compareI(const pair&i, const pair&j) +{ + return i.second < j.second; +} + +bool compareD(const pair&i, const pair&j) +{ + return i.second > j.second; +} + +bool compareIn(const pair&i, const pair&j) +{ + return i.first < j.first; +} + +bool compareDe(const pair&i, const pair&j) +{ + return i.first > j.first; +} + +double logAB (double x, double y) +{ + double result; + double maxVal = max(x,y); + + if(maxVal == x) + { + result = maxVal + log(1+exp(y-maxVal)); + } + else + { + result = maxVal + log(1+exp(x-maxVal)); + } + return result; +} + +double persentageXofY (double newS, double oldS) +{ + double result; + result = exp(oldS-newS)*100; + return result; +} + +void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr) +{ + // Get the first occurrence + size_t pos = data.find(toSearch); + + // Repeat till end is reached + while( pos != std::string::npos) + { + // Replace this occurrence of Sub String + data.replace(pos, toSearch.size(), replaceStr); + // Get the next occurrence from the current position + pos =data.find(toSearch, pos + toSearch.size()); + } +} + +string findMB(string structures, string variable){ + string MB; + + + return MB; +} + +int main() { + + string FILENAME; // Downloads/D50C9v.txt + int COLS; // 9 + vector max_cat; // 1 2 1 3 2 1 2 2 3 + vector min_cat; // 0 0 0 0 0 0 0 0 0 + unsigned int maxparents; // 4 + vector order; //2 6 8 4 1 0 3 5 7 + double PERCENT; + string filepath; // output file name + + vector settings; + vector settingsFlag; + + settings.push_back("DataFile"); + settingsFlag.push_back(false); + settings.push_back("TotalVariables"); + settingsFlag.push_back(false); + settings.push_back("MaximumCategory"); + settingsFlag.push_back(false); + settings.push_back("MinimumCategory"); + settingsFlag.push_back(false); + settings.push_back("MaximumParents"); + settingsFlag.push_back(false); + settings.push_back("Order"); + settingsFlag.push_back(false); + settings.push_back("Percentage"); + settingsFlag.push_back(false); + settings.push_back("OutputFileName"); + settingsFlag.push_back(false); + + fstream infile; + string ConfigFile; + std::cout << "What is the location of the configuration file that you edited?: "; + std::cin >> ConfigFile; + std::cout << std::endl; + //string ConfigFile = "/home/efraingonzalez0/Downloads/new.config"; + infile.open(ConfigFile.c_str(),ios::in); + vector line; + if(infile.is_open()) { + std::cout << "Configuration file correctly opened" << std::endl; + while(!infile.eof()) { + string tmp; + getline(infile,tmp); + line.push_back(tmp); + } + } + else { + std::cout << "Unable to open configuration file" << std::endl; + exit(0); + } + + infile.close(); + line.erase(line.end()); + for(size_t k = 0; k < settings.size(); ++k) { + for(size_t i = 0; i < line.size(); ++i) { + size_t foundit = line[i].find(settings[k]); + if (foundit != std::string::npos) { + //find position of "=" + unsigned int poseq = line[i].find("="); + unsigned int posneqfir,posneqlas; + //first position of relevant string + for(unsigned int j = poseq + 1; j < line[i].length(); ++j) { + if(line[i].at(j) != ' ') { + posneqfir = j; + break; + } + } + //last position of relevant string + for(unsigned int j2 = line[i].length() - 1; j2 >= posneqfir; ++j2) { + if((line[i].at(j2) != ' ') && (line[i].at(j2) != '\n')){ + posneqlas = j2; + break; + } + } + + if(settings[k] == "DataFile") { + FILENAME = line[i].substr(posneqfir,posneqlas - posneqfir + 1); + } + else if(settings[k] == "TotalVariables") { + COLS = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); + } + else if(settings[k] == "MaximumCategory") {//still need to edit + string max_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); + std::stringstream macs(max_cat_string); + int macsti; + while( macs >> macsti) { + max_cat.push_back(macsti); + } + } + else if(settings[k] == "MinimumCategory") {//still need to edit + string min_cat_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); + std::stringstream mics(min_cat_string); + int micsti; + while( mics >> micsti) { + min_cat.push_back(micsti); + } + } + + else if(settings[k] == "MaximumParents") { + maxparents = str_to_int(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); + } + + else if(settings[k] == "Order") {//still need to edit + string st_ord_string = line[i].substr(posneqfir,posneqlas - posneqfir + 1); + std::stringstream starordr(st_ord_string); + int StOrti; + while( starordr >> StOrti) { + order.push_back(StOrti); + } + } + + else if(settings[k] == "Percentage") { + PERCENT = str2doub(line[i].substr(posneqfir,posneqlas - posneqfir + 1)); + } + + else if(settings[k] == "OutputFileName") { + filepath = line[i].substr(posneqfir,posneqlas - posneqfir + 1); + } + + settingsFlag[k] = true; + break; + } + } + } + + for(int k=0;k 100.0){ + std::cout << "Error: Percentage may not be larger than 100.0 which represents 100%. If you need a difference larger than 100% then use a negative number." << std::endl; + exit(0); + } + } + + + + fstream file; + std::cout << endl; + vector < vector > DAT; // 2d array as a vector of vectors + vector rowVector(COLS); // vector to add into 'array' (represents a row) + int row = 0; // Row counter + + // Read file + file.open(FILENAME.c_str(), ios::in); // Open file + if (file.is_open()) { // If file has correctly opened... + // Output debug message + cout << "File correctly opened" << endl; + + // Dynamically store data into array + while (file.good()) { // ... and while there are no errors, + DAT.push_back(rowVector); // add a new row, + for (int col = 0; col> DAT[row][col]; // fill the row with col elements + } + row++; // Keep track of actual row + } + } + else cout << "Unable to open file" << endl; + file.close(); + DAT.erase(DAT.end()); + + size_t totvars = DAT[1].size(); //column number + size_t tottuples = DAT.size();//row number + + + std::cout << endl; + + + //Time that program begins + time_t start = time(0); + char* dt_start = ctime(&start); + std::cout << "The local date and time is: " << dt_start << std::endl; + + + //Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category: + //categories in i + vector catsi; + //total combinations of variables + //int totcombos = 1; + //how many catagori for every variable + for ( int i = 0; i < totvars; ++i) { + catsi.push_back((max_cat[i] - min_cat[i]) + 1); + //totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1); + } + + //print out catsi + for (size_t i = 0; i < catsi.size();++i) { + std::cout << catsi[i] << " "; + } + std::cout << endl; + + //Total Families Ui,alpha for a particular variable in the order + //Total Families Ui,alpha for a particular variable in the order + vector families; + for (unsigned int i = 0; i < totvars; ++i) { + int numparents = i; + if (numparents == 0) { + families.push_back(1); + } + else { + + boost::multiprecision::cpp_int numfams = 0; + for (unsigned int j = 0; j <= i; ++j) { + if (j <= maxparents) { + unsigned long long jFactorial = 1; + unsigned long long ijFactorial = 1; + unsigned long long iFactorial = 1; + //Calculate j! + for (unsigned int g = 0; g <= j; ++g) { + if (g != 0) { + jFactorial *= g; + } + } + //Calculate i! + for (unsigned int g = 0; g <= i; ++g) { + if (g != 0) { + iFactorial *= g; + } + } + //Calculate (i-j)! + for (unsigned int g = 0; g <= (i - j); ++g) { + if (g != 0) { + ijFactorial *= g; + } + } + numfams += BinomialCoefficient(i,j); + } + else { + break; + } + } + families.push_back(numfams); + } + } + + + //How many parent combinations for each step? As well as there counts + vector< vector > ParentCombos; + vector< vector > fullNijkvector; + vector< vector > indexofvar; // This is label or index for each variable. + for (size_t i = 0; i < order.size(); ++i) { + //i represents the order of the variable + if (i == 0) { + vector tmp,Nijkovercombos1; + vector tempstring; +// tempstring.push_back("[0]"); + tempstring.push_back("[" + int_to_str(order[0])+ "]"); + tmp.push_back(1); + ParentCombos.push_back(tmp); + //counting the amount of times that a value of the first variable in the order occurs + //this starts with the maximum value for that variable + for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) { + //hello cycles through the categories of the first variable in the order + int Nijk1 = 0; + //green cycles through tuples + for (int green = 0; green < tottuples; ++green){ + if (DAT[green][order[0]] == hello) { + Nijk1 += 1; + } + } + Nijkovercombos1.push_back(Nijk1); + } + fullNijkvector.push_back(Nijkovercombos1); + indexofvar.push_back(tempstring); + + } + else { + vector tmp,Nijkovercombos1; + vector tempstring; +// string tempa = "[" + int_to_str(i) +"]"; + string tempa = "[" + int_to_str(order[i]) +"]"; + tempstring.push_back(tempa); + tmp.push_back(1); + int numparnts = i; + //counting the amount of times that a value of the last variable in the current order size occurs + //this starts with the maximum value for that variable + for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) { + //hello cycles through the categories of the first variable in the order + int Nijk1 = 0; + //green cycles through tuples + for (int green = 0; green < tottuples; ++green) { + if (DAT[green][order[numparnts]] == hello) { + Nijk1 += 1; + } + } + Nijkovercombos1.push_back(Nijk1); + } + fullNijkvector.push_back(Nijkovercombos1); + + //j representing the number of parents + for (int it = 1; it <= numparnts; ++it) { + //(333)Creating a vector that uses the right combination + boost::multiprecision::cpp_int Nloopy = 0; + //, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1; + /*std::cout << "This is for " << numparnts << " choose " << it << endl; + std::cout << "The iteration number is: " << it << endl;*/ + //Accounting for the limit of parent quantity + if (it > maxparents) { + break; + } + else { + vector NewMat(numparnts, 0); + for (int p = 0; p < it; ++p) { + NewMat[p] = 1; + } + /*for (int g = 2; g <= numparnts; ++g) { + NcolFactorial *= g; + } + for (int g = 2; g <= it; ++g) { + iFactorial *= g; + } + for (int g = 2; g <= (numparnts - it); ++g) { + NiFactorial *= g; + }*/ + //Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts + Nloopy = BinomialCoefficient(numparnts,it); + for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) { + int combsparents = 1; + vector parsetv; + for (int par = 0; par < NewMat.size(); ++par){ + if (NewMat[par] == 1){ + parsetv.push_back(par); + } + } + + string tempstring2; + + for (int par2 = 0; par2 < parsetv.size(); ++par2){ + if(par2+1==parsetv.size()){ + // tempstring2 = tempstring2 + int_to_str(parsetv[par2]); + tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]]); + //tempstring2 = tempstring2 +","+"|" + int_to_str(i); + // tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]"; + tempstring2 = "[" + int_to_str(order[i])+"|"+tempstring2 +"]"; + } + else{ + //tempstring2 = tempstring2 + int_to_str(parsetv[par2])+","; +// tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":"; + tempstring2 = tempstring2 + int_to_str(order[parsetv[par2]])+":"; + } + + + } + tempstring.push_back(tempstring2); + + /*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl; + std::cout << "Here comes the NewMat:" << endl;*/ + //(444)This sets up the process for changing + //PosOne tells me the position of the last one in the vector + //We want to change when the position is the last position available in the vector + int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0; + int SumOnes3 = 0, SumOnes4 = 0, SumY = 0; + for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) { + if (NewMat[PosOne] == 1) { + break; + } + } + for (int y = (numparnts - 1); y >= (numparnts - it); --y) { + //SumOnes tells you the amount of ones in the last i columns + //These are the last columns being considered + SumOnes += NewMat[y]; + } + for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) { + //SumOnes2 tells you the amount of ones before you reach the next zero + //PosOne2 keeps track of the position of the coming zero + SumOnes2 += NewMat[PosOne2]; + if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) { + break; + } + } + for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) { + //FrstOne tells you the position of the first number 1 starting from the left hand side + if (NewMat[FrstOne] == 1) { + break; + } + } + for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) { + //SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions + SumOnes4 += NewMat[x]; + } + //Prints out NewMat + /*for (int u = 0; u < numparnts; ++u) { + std::cout << NewMat[u] << " "; + } + std::cout << endl;*/ + + + //Adding in the code that will allow counts parent combinations for this particular variable + for (int q = 0; q < i; ++q) { + if (NewMat[q] == 1) { + combsparents *= catsi[order[q]]; + } + } + tmp.push_back(combsparents);//made the whole parentset configure for a variable + /*std::cout << "This is combsparents: " << combsparents << endl; + std::cout << endl;*/ + vector hvect; + //hvect tells us which variables are being considered always the last variable is being considered + //e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only + //continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 > + //for A C hvect will be < 0 2 > + for (int h = 0; h < i; ++h) { + if (NewMat[h] == 1) { + hvect.push_back(h); + } + } + hvect.push_back(numparnts); + size_t shvect = hvect.size(); + //Prints out hvect + /*for (int u = 0; u < shvect; ++u) { + std::cout << hvect[u] << " "; + }*/ + //std::cout << endl; + //Counting the amount of values in the data that have that particular parent combination + vector Nijkovercombos; + for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) { + //(333)Creating a vector that uses the right combination + /*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/ + vector Test(shvect, last), maxtest; + for (int p = 0; p < (shvect-1); ++p) { + Test[p] = max_cat[order[hvect[p]]]; + } + maxtest = Test; + for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) { + //std::cout << endl; + /*std::cout << endl; + std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl; + std::cout << "Here comes the Test:" << endl;*/ + //(444)This sets up the process for changing + //NMpos tells me the position of the last non minimum value in the vector + //We want to change when the position is the last position available in the vector + int NMpos = 0, minpos = 0; + for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) { + if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) { + break; + } + } + for (minpos = (shvect - 2); minpos >= 0; --minpos) { + //minpos tells you the position of the last minimum value + if (Test[minpos] == min_cat[order[hvect[minpos]]]) { + break; + } + } + //Prints out Test + /*for (int u = 0; u < shvect; ++u) { + std::cout << Test[u] << " "; + } + std::cout << endl; + std::cout << endl; + std::cout << endl;*/ + //Count how many occurrences of the value are present in the data + int Nijk = 0; + for (int num2size = 0; num2size < tottuples; ++num2size) { + int countcorrect = 0; + for (size_t g = 0; g < Test.size(); ++g) { + //num2size cycles through tuples + //order[hvect[g]] represents the variable in the order that we are considering as a parent + if (DAT[num2size][order[hvect[g]]] == Test[g]) { + countcorrect += 1; + } + } + if (countcorrect == Test.size()) { + Nijk += 1; + } + } + //Nijkovercombos displays data as follows + //it starts with the smallest value for the last variable in hvect + //and the largest values in the first n-1 variables in hvect + //max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0 + //count,count,count,count e.g. 13, 2, 2, 3, 4, 10 + Nijkovercombos.push_back(Nijk); + //(666)Now that the values have been calculated find out what the next combination of variables should be + if ((NMpos == -1) & (minpos == (shvect - 2))) { + //break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000 + break; + } + if (minpos < NMpos) { + Test[NMpos] = Test[NMpos] - 1; + } + else if (NMpos < minpos) { + Test[NMpos] = Test[NMpos] - 1; + for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) { + Test[filler] = maxtest[filler]; + } + } + } + } + fullNijkvector.push_back(Nijkovercombos); + //(666)Now that the unique values have been calculated find out what the next combination of variables should be + if ((PosOne == (numparnts - 1)) & (SumOnes == it)) { + break; + } + else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) { + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { + //NxtOne tells you the position of the next closest number 1 that we would + //like to change the position of (we will call it the important number one) + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one + SumOnes3 += NewMat[NxtOne]; + if (SumOnes3 == (SumOnes2 + 1)) { + break; + } + } + if (SumOnes4 == (it - 1)) { + //If all except one of the 1's are found in the last it - 1 columns + for (int x = 0; x < numparnts; ++x) { + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) { + //If + NewMat[x] = 1; + } + else { + NewMat[x] = 0; + } + } + } + else { + for (int x = 0; x < numparnts; ++x) { + if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) { + //If the position is that of the first 1 or it falls between the changed number one and the total + //amount of ones that are on that side of the zero 10111 + NewMat[x] = 1; + } + else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) { + //If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing + //and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right + NewMat[x] = 1; + } + else { + NewMat[x] = 0; + } + } + } + } + else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) { + for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) { + //NxtOne tells you the position of the next closest number 1 that we would + //like to change the position of (we will call it the important number one) + //SumOnes3 helps keep track of the sum of all ones between now and the next important number one + SumOnes3 += NewMat[NxtOne]; + if (SumOnes3 == 1) { + break; + } + } + if (it != 1) { + for (int x = 0; x < numparnts; ++x) { + if (x == (NxtOne + 1)) { + NewMat[x] = 1; + } + else if (x == NxtOne) { + NewMat[x] = 0; + } + else if ((NewMat[x] == 1) & (x != NxtOne)) { + NewMat[x] = 1; + } + else { + NewMat[x] = 0; + } + } + } + else { + for (int x = 0; x < numparnts; ++x) { + if ((x == (NxtOne + 1))) { + NewMat[x] = 1; + } + else { + NewMat[x] = 0; + } + } + } + } + } + + } + } + ParentCombos.push_back(tmp); + indexofvar.push_back(tempstring); + } + + } + //std::cout << endl; + std::cout << endl; + //printing out the ParentCombos matrix just created above + /*for (size_t i = 0; i < ParentCombos.size(); ++i) { + for (size_t j = 0; j < ParentCombos[i].size(); ++j) { + std::cout << ParentCombos[i][j] << " "; + } + std::cout << endl; + }*/ + std::cout << endl; + //printing out the fullNijkvector matrix just created above + /*for (size_t i = 0; i < fullNijkvector.size(); ++i) { + for (size_t j = 0; j < fullNijkvector[i].size(); ++j) { + std::cout << fullNijkvector[i][j] << " "; + } + std::cout << endl; + } + std::cout << endl;*/ + //Print out Data + /*for (int i = 0; i < DAT.size(); ++i) { + for (int j = 0; j < DAT[i].size(); ++j) { + std::cout << DAT[i][j] << " "; + } + std::cout << endl; + }*/ + //Print out the families size + /*for (size_t i = 0; i < families.size(); ++i) { + std::cout << families[i] << " "; + }*/ + + //Obtaining the actual score from this information + //varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting + //the first variable in the order) + //keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha + int posinfull = 0; + //finlogscore is the final score in natural log format + double finlogscore = 0.0; + vector< vector > vecvarparset; + for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) { + //sumovUialpha is the the sum over all parent sets for a particular variable + double sumovUialpha = 0.0; + //vector of all values of seclastgamma + vector vec2ndlastgamma; + double maxseclastgamma; + //Uialpha cycles through all the parent sets for a particular family + for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) { + // nijkprime represents the value of 1/(ri * qi) + double nijkprime, nijprime; + double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha]; + + nijprime = 1.0 / (PCs); + nijkprime = 1.0 / (rij * PCs); + //seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic + double seclastgamma = 0.0; + //qi_Uialpha cycles through the combinations for the parents in a set + for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) { + double lastgamma = 0.0; + double nij = 0.0; + //countijk cycles through the categories of the variable with a particular order + //catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to + //and then find the categories for it + for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) { + double topy; + //rightcol lets you find the right column/position of the value that you need for a particular category within the + int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]); + nij += fullNijkvector[posinfull][rightcol]; + topy = (nijkprime + fullNijkvector[posinfull][rightcol]); + + //Using boost lgamma function for the product over categories and parent combinations + lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime); + + } + double boty = nij + nijprime; + seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty); + + } + vec2ndlastgamma.push_back(seclastgamma); + + + + //Calculate sumovUialpha based on the logsumexp concept + if (Uialpha + 1 == families[varinorder]) { + + for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) { + //change the value of maxseclastgamma if new value is larger than the previous value + if (que == 0) { + maxseclastgamma = vec2ndlastgamma[0]; + } + else { + if (maxseclastgamma < vec2ndlastgamma[que]) { + maxseclastgamma = vec2ndlastgamma[que]; + } + } + } + for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) { + sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma); + } + //add info on parent set scores for each variable to this vector of vectors + vecvarparset.push_back(vec2ndlastgamma); + } + + /*std::cout << endl; + std::cout << seclastgamma; + std::cout << endl;*/ + posinfull += 1; + //std::cout << posinfull << endl; + } + finlogscore += log(sumovUialpha) + maxseclastgamma; + } + + vector < vector > parSet; + vector< map > > parSetScoreSorted; + vector< map > > strucScore; + + +//Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map. + + for (unsigned i = 0; i < indexofvar.size(); ++i){ + + map > tempMap; + for (unsigned j=0; j< indexofvar[i].size(); ++j){ + tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j])); + } + parSetScoreSorted.push_back(tempMap); + + } + + + pair bestStrScore; + double bestScore = 0; + string bestLable; + + + for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){ + map :: iterator itr; + itr = parSetScoreSorted[i].begin(); + bestScore = bestScore + (itr->first); + bestLable = bestLable +(itr->second); + } + + bestStrScore = make_pair(bestScore, bestLable);//This is the best score. + + + vector < pair > sortedStru;//This store all the structures in the percentage. + vector < vector < pair > > deltaC; + + for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ + map :: iterator itr0, itr1; + vector < pair > tempDelta; + double tempDeltaS; + string tempDeltaL; + itr0 = parSetScoreSorted[l].begin(); + itr1 = parSetScoreSorted[l].begin(); + double tem1=itr1->first, tem2 = itr1->first; +// cout << tem1 << " : " << tem2 << endl; + for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){ +// tem1 = tem2; + itr1 = ++itr1; + tem2 = itr1->first; + double tem = -log(exp(tem2-tem1)+1); + tem1 = tem1-tem; + if(tem <= log(PERCENT/10)){ + tempDeltaS = (itr1->first)-(itr0->first); + tempDeltaL = itr1->second; + tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL)); + } + } + + deltaC.push_back( tempDelta); + } + + + for(unsigned i=0; i< deltaC.size(); ++i){ + for (unsigned j=0; j< deltaC[i].size(); ++j) + { + double score = bestStrScore.first + deltaC[i][j].first; + string lab = bestStrScore.second; + findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second); + sortedStru.push_back(make_pair(score, lab)); + } + } + + sort(sortedStru.begin(),sortedStru.end(),compareDe); + + vector strP; + double s = bestScore; + + for(unsigned i=0; i< sortedStru.size(); ++i ){ + double s0 = sortedStru[i].first; +// s = s + exp(sortedStru[i].first); + double te = -log(exp(s0-s)+1); + + strP.push_back(te); + s = s - te; + + } + +/* double fi = s/exp(finlogscore)*100; + strP.push_back(fi);*/ + + std::cout << std::endl; + std::cout << "Total Score: "<< boost::lexical_cast(finlogscore) << std::endl; + std::cout << std::endl; + + + + + cout << "Best several structures are:" << endl; + cout << bestScore << " : " << bestLable << " "<< exp(strP[0])*100 << endl; + + for (unsigned i=0; i< sortedStru.size(); ++i ) + { + if(exp(strP[i+1]) > (PERCENT/100)){ + break; + } + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl; + + } + +/* for (unsigned i=0; i< 5; ++i ) + { + + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< exp(strP[i+1])*100<< endl; + + }*/ + +/* cout << "Best several structures are:" << endl; + cout << bestScore << " : " << bestLable << " " << endl; + + for (unsigned i=0; i< sortedStru.size(); ++i ) + { + cout << sortedStru[i].first << " : " << sortedStru[i].second << " "<< endl; + }*/ + + std::cout << std::endl; + std::cout << std::endl; + +/* for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ + map :: iterator itr; + std::cout << "For variable "<< order[i] <<":"<< endl; + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr) + { + cout << itr->second << " : " << itr->first << "; "; + } + cout << endl; + cout << endl; + + }*/ + + + string orderstring; + for (unsigned i=0; i< order.size(); ++i){ + orderstring.append(int_to_str(order[i])+" "); + } + + ofstream myfile; + myfile.open( filepath.c_str(), ios::out | ios::app ); +// cout << path << endl; + if (myfile.is_open()) + { + myfile << orderstring << " " << boost::lexical_cast(finlogscore) << " "<< bestLable << " " << bestScore << " " << exp(strP[0])*100; + myfile << "\n"; + + for(unsigned i=0; i< sortedStru.size(); ++i){ + if(exp(strP[i+1]) > (PERCENT/100)){ + break; + } + myfile << orderstring << " " << boost::lexical_cast(finlogscore) << " " << sortedStru[i].second << " " << sortedStru[i].first<< " " << exp(strP[i+1])*100; + myfile << "\n"; + + } + } + else cout << "Unable to open file"; + myfile.close(); + +/* ofstream myfile; + myfile.open("/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2RO.txt"); + if (myfile.is_open()) + { + for(unsigned i=0; i< parSetScoreSorted.size(); ++i){ + map :: iterator itr; + for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr){ + myfile << itr->second << " : " << itr->first << "; " ; + } + myfile << "\n"; + + } + } + else cout << "Unable to open file"; + myfile.close();*/ + + + //time after completion + time_t later = time(0); + char* dt_later = ctime(&later); + std::cout << "The local date and time is: " << dt_later << std::endl; + + std::cout << std::endl; + std::cout << std::endl; + + + std::cin.clear(); + std::cin.ignore(); + std::cin.get(); + return 0; +} + + -- 2.29.0