MyOrder.cpp 30.9 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993
#include <string>
#include <vector>
#include <numeric>
#include <fstream>
#include <iterator>
#include <iostream>
#include <utility>
#include <iomanip>
#include <ctime>
#include <boost/math/special_functions.hpp>
#include <boost/lexical_cast.hpp>
#include <boost/algorithm/string/replace.hpp>

#include <stdio.h>
#include <math.h>
#include <algorithm>    // std::find
#include <map>
#include <iterator>
using namespace std;


struct compare {
    bool operator()(const std::string& first, const std::string& second) {
    	if(first.size() == second.size())
    		return first < second;
    	else
    		return first.size() < second.size();
    }
};

string int_to_str(int num)
{
    stringstream ss;

    ss << num;

    return ss.str();
};


int str_to_int(string st)
{
    int result;

    stringstream(st) >> result;

    return result;
};

bool compareI(const pair<string, double>&i, const pair<string, double>&j)
{
    return i.second < j.second;
}

bool compareD(const pair<string, double>&i, const pair<string, double>&j)
{
    return i.second > j.second;
}

bool compareIn(const pair<double, string>&i, const pair<double, string>&j)
{
    return i.first < j.first;
}

bool compareDe(const pair<double, string>&i, const pair<double, string>&j)
{
    return i.first > j.first;
}

double logAB (double x, double y)
{
	double result;
	double maxVal = max(x,y);

	if(maxVal == x)
	{
		result = maxVal + log(1+exp(y-maxVal));
	}
	else
	{
		result = maxVal + log(1+exp(x-maxVal));
	}
	return result;
}

double persentageXofY (double newS, double oldS)
{
	double result;
	result = exp(oldS-newS)*100;
	return result;
}

void findAndReplaceAll(std::string & data, std::string toSearch, std::string replaceStr)
{
	// Get the first occurrence
	size_t pos = data.find(toSearch);

	// Repeat till end is reached
	while( pos != std::string::npos)
	{
		// Replace this occurrence of Sub String
		data.replace(pos, toSearch.size(), replaceStr);
		// Get the next occurrence from the current position
		pos =data.find(toSearch, pos + toSearch.size());
	}
}

int main() {
	/*
	//TEMPORARY INPUT FILE
	vector< vector<int> > DAT;
	vector <int> temp, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11, temp12, temp13, temp14, temp15, temp16;
	//Filling up some of the test variables needed for the data set
	temp.push_back(0);
	temp.push_back(0);
	temp.push_back(1);
	//set 2
	temp2.push_back(0);
	temp2.push_back(1);
	temp2.push_back(0);
	//set3
	temp3.push_back(0);
	temp3.push_back(1);
	temp3.push_back(1);
	//set4
	temp4.push_back(1);
	temp4.push_back(0);
	temp4.push_back(0);
	//set5
	temp5.push_back(1);
	temp5.push_back(1);
	temp5.push_back(1);
	//set6
	temp6.push_back(1);
	temp6.push_back(1);
	temp6.push_back(0);
	//set7
	temp7.push_back(1);
	temp7.push_back(0);
	temp7.push_back(1);
	//set8
	temp8.push_back(1);
	temp8.push_back(0);
	temp8.push_back(1);
	//set9
	temp9.push_back(1);
	temp9.push_back(0);
	temp9.push_back(1);
	//set10
	temp10.push_back(1);
	temp10.push_back(0);
	temp10.push_back(1);
	//set11
	temp11.push_back(1);
	temp11.push_back(1);
	temp11.push_back(1);
	//set12
	temp12.push_back(1);
	temp12.push_back(0);
	temp12.push_back(1);
	//set13
	temp13.push_back(1);
	temp13.push_back(0);
	temp13.push_back(1);
	//set14
	temp14.push_back(1);
	temp14.push_back(1);
	temp14.push_back(0);
	//set15
	temp15.push_back(0);
	temp15.push_back(1);
	temp15.push_back(1);
	//set16
	temp16.push_back(1);
	temp16.push_back(0);
	temp16.push_back(1);
	std::cout << endl;
	//Filling up test DATASET
	DAT.push_back(temp);
	DAT.push_back(temp2);
	DAT.push_back(temp3);
	DAT.push_back(temp4);
	DAT.push_back(temp5);
	DAT.push_back(temp6);
	DAT.push_back(temp7);
	DAT.push_back(temp8);
	DAT.push_back(temp9);
	DAT.push_back(temp10);
	DAT.push_back(temp11);
	DAT.push_back(temp12);
	DAT.push_back(temp13);
	DAT.push_back(temp14);
	DAT.push_back(temp15);
	DAT.push_back(temp16);
	std::cout << endl;
	size_t totvars = DAT[1].size();
	size_t tottuples = DAT.size();
	*/
	//Time before user input
	time_t now = time(0);
	char* dt = ctime(&now);
	std::cout << "The local date and time is: " << dt << std::endl;
	string FILENAME;
	std::cout << "What is the name of your file?: ";
	std::cin >> FILENAME;
	// Variable declarations
	fstream file;
	int COLS;
	std::cout << "How many variables in your data file?: ";
	std::cin >> COLS;
	std::cout << endl;
	vector < vector <int> > DAT; // 2d array as a vector of vectors
	vector <int> rowVector(COLS); // vector to add into 'array' (represents a row)
	int row = 0; // Row counter

				 // Read file
	file.open(FILENAME.c_str(), ios::in); // Open file
	if (file.is_open()) { // If file has correctly opened...
						  // Output debug message
		cout << "File correctly opened" << endl;

		// Dynamically store data into array
		while (file.good()) { // ... and while there are no errors,
			DAT.push_back(rowVector); // add a new row,
			for (int col = 0; col<COLS; col++) {
				file >> DAT[row][col]; // fill the row with col elements
			}
			row++; // Keep track of actual row 
		}
	}
	else cout << "Unable to open file" << endl;
	file.close();

	size_t totvars = DAT[1].size();	//column number
	size_t tottuples = DAT.size();//row number
	



	//Ask User for the order they would like to test Order starts at 0 e.g. 0,1,2 is valid order for three variables
	int order_i;
	vector <int> order;
	std::cout << "Which is the order you would like to test?: ";
	for (int i = 0; i < totvars;++i) {
		std::cin >> order_i;
		order.push_back(order_i);
	}
	std::cout << endl;
	//Ask user for max value and min value of each variable being inputed
	vector <int> max_cat;
	vector <int> min_cat;
	int max_cat_input;
	int min_cat_input;

	std::cout << "What is the maximum categorical value of each variable?: ";
	for (int i = 0; i < totvars; ++i) {
		std::cin >> max_cat_input;
		max_cat.push_back(max_cat_input);
	}
	for (size_t i = 0; i < max_cat.size();++i) {
		std::cout << max_cat[i] << " ";
	}
	std::cout << endl;
	std::cout << endl;
	std::cout << "What is the minimum categorical value of each variable?: ";
	for (int i = 0; i < totvars; ++i) {
		std::cin >> min_cat_input;
		min_cat.push_back(min_cat_input);
	}
	for (size_t i = 0; i < min_cat.size();++i) {
		std::cout << min_cat[i] << " ";
	}
	std::cout << endl;
	//Set the maximum for the amount of parents for any given variable
	unsigned int maxparents;
	std::cout << "What is the maximum amount of parents to be considered for any given variable?: ";
	std::cin >> maxparents;
	std::cout << endl;

	double PERCENT;
	std::cout << "What is the percentage (Please use the format of 98.99, not 0.9899)?: ";
	std::cin >> PERCENT;
	std::cout << endl;

	//Time that program begins
	time_t start = time(0);
	char* dt_start = ctime(&start);
	std::cout << "The local date and time is: " << dt_start << std::endl;


	//Lets convert to counts for every variable combination which would be 2^n in the case of binary variables starting with the minimum in each category:
	//categories in i
	vector <int> catsi;
	//total combinations of variables
	//int totcombos = 1;
	//how many catagori for every variable
	for ( int i = 0; i < totvars; ++i) {
		catsi.push_back((max_cat[i] - min_cat[i]) + 1);
		//totcombos = totcombos * ((max_cat[i] - min_cat[i]) + 1);
	}
	/*size_t mincatsi = 10000, mincatvar;
	for (size_t i = 0; i < catsi.size(); ++i) {
		if (catsi[i] < mincatsi) {
			mincatsi = catsi[i];
			mincatvar = i;
		}
	}*/
	//print out catsi
	for (size_t i = 0; i < catsi.size();++i) {
		std::cout << catsi[i] << " ";
	}
	std::cout << endl;

	//Total Families Ui,alpha for a particular variable in the order
	vector <unsigned long long> families;//is vector, first element is the number of parentset for the first variabel......
	for (unsigned int i = 0; i < totvars; ++i) {
		int numparents = i;
		if (numparents == 0) {
			families.push_back(1);
		}
		else {

			unsigned long long numfams = 0;
			for (unsigned int j = 0; j <= i; ++j) {
				if (j <= maxparents) {
					unsigned long long jFactorial = 1;
					unsigned long long ijFactorial = 1;
					unsigned long long iFactorial = 1;
					//Calculate j!
					for (unsigned int g = 0; g <= j; ++g) {
						if (g != 0) {
							jFactorial *= g;
						}
					}
					//Calculate i!
					for (unsigned int g = 0; g <= i; ++g) {
						if (g != 0) {
							iFactorial *= g;
						}
					}
					//Calculate (i-j)!
					for (unsigned int g = 0; g <= (i - j); ++g) {
						if (g != 0) {
							ijFactorial *= g;
						}
					}
					numfams += (iFactorial) / (jFactorial * ijFactorial);
				}
				else {
					break;
				}
			}
			families.push_back(numfams);
		}
	}

		
	//How many parent combinations for each step? As well as there counts
	vector< vector <int> > ParentCombos;
	vector< vector <int> > fullNijkvector;
	vector< vector <string> > indexofvar; // This is label or index for each variable.
	for (size_t i = 0; i < order.size(); ++i) {
		//i represents the order of the variable
		if (i == 0) {
			vector <int> tmp,Nijkovercombos1;
			vector <string> tempstring;
			tempstring.push_back("[0]");
			tmp.push_back(1);
			ParentCombos.push_back(tmp);
			//counting the amount of times that a value of the first variable in the order occurs
			//this starts with the maximum value for that variable
			for (int hello = max_cat[order[0]];hello >= min_cat[order[0]];--hello) {
				//hello cycles through the categories of the first variable in the order
				int Nijk1 = 0;
				//green cycles through tuples
				for (int green = 0; green < tottuples; ++green){
					if (DAT[green][order[0]] == hello) {
						Nijk1 += 1;
					}
				}
				Nijkovercombos1.push_back(Nijk1);
			}
			fullNijkvector.push_back(Nijkovercombos1);
			indexofvar.push_back(tempstring);

		}
		else {
			vector <int> tmp,Nijkovercombos1;
			vector <string> tempstring;
			string tempa = "[" + int_to_str(i) +"]";
			tempstring.push_back(tempa);
			tmp.push_back(1);
			int numparnts = i;
			//counting the amount of times that a value of the last variable in the current order size occurs
			//this starts with the maximum value for that variable
			for (int hello = max_cat[order[numparnts]];hello >= min_cat[order[numparnts]];--hello) {
				//hello cycles through the categories of the first variable in the order
				int Nijk1 = 0;
				//green cycles through tuples
				for (int green = 0; green < tottuples; ++green) {
					if (DAT[green][order[numparnts]] == hello) {
						Nijk1 += 1;
					}
				}
				Nijkovercombos1.push_back(Nijk1);
			}
			fullNijkvector.push_back(Nijkovercombos1);
			
			//j representing the number of parents
			for (int it = 1; it <= numparnts; ++it) {
				//(333)Creating a vector that uses the right combination
				double Nloopy = 0, NcolFactorial = 1, iFactorial = 1, NiFactorial = 1;
				/*std::cout << "This is for " << numparnts << " choose " << it << endl;
				std::cout << "The iteration number is: " << it << endl;*/
				//Accounting for the limit of parent quantity
				if (it > maxparents) {
					break;
				}
				else {
					vector <int> NewMat(numparnts, 0);
					for (int p = 0; p < it; ++p) {
						NewMat[p] = 1;
					}
					for (int g = 2; g <= numparnts; ++g) {
						NcolFactorial *= g;
					}
					for (int g = 2; g <= it; ++g) {
						iFactorial *= g;
					}
					for (int g = 2; g <= (numparnts - it); ++g) {
						NiFactorial *= g;
					}
					//Nloopy represents the result of numparnts choose i e.g. numparnts choose 1 equals numparnts
					Nloopy = NcolFactorial / (iFactorial * NiFactorial);
					for (int iNloopy = 0; iNloopy < Nloopy; ++iNloopy) {
						int combsparents = 1;
                        vector <int> parsetv;
                        for (int par = 0; par < NewMat.size(); ++par){
                        	if (NewMat[par] == 1){
                        		parsetv.push_back(par);
                        	}
                        }

                        string tempstring2;

                        for (int par2 = 0; par2 < parsetv.size(); ++par2){
                        	if(par2+1==parsetv.size()){
                        		tempstring2 = tempstring2 + int_to_str(parsetv[par2]);
                        		//tempstring2 = tempstring2 +","+"|" + int_to_str(i);
                        		tempstring2 = "[" + int_to_str(i)+"|"+tempstring2 +"]";
                        	}
                        	else{
                        		//tempstring2 = tempstring2 + int_to_str(parsetv[par2])+",";
                        		tempstring2 = tempstring2 + int_to_str(parsetv[par2])+":";
                        	}


                        }
                        tempstring.push_back(tempstring2);

						/*std::cout << "This is iNloopy: " << (iNloopy + 1) << endl;
						std::cout << "Here comes the NewMat:" << endl;*/
						//(444)This sets up the process for changing
						//PosOne tells me the position of the last one in the vector
						//We want to change when the position is the last position available in the vector
						int SumOnes = 0, PosOne = 0, SumOnes2 = 0, PosOne2, NxtOne = 0, FrstOne = 0;
						int SumOnes3 = 0, SumOnes4 = 0, SumY = 0;
						for (PosOne = (numparnts - 1); PosOne >= 0; --PosOne) {
							if (NewMat[PosOne] == 1) {
								break;
							}
						}
						for (int y = (numparnts - 1); y >= (numparnts - it); --y) {
							//SumOnes tells you the amount of ones in the last i columns
							//These are the last columns being considered
							SumOnes += NewMat[y];
						}
						for (PosOne2 = (numparnts - 1); PosOne2 >= 0; --PosOne2) {
							//SumOnes2 tells you the amount of ones before you reach the next zero
							//PosOne2 keeps track of the position of the coming zero
							SumOnes2 += NewMat[PosOne2];
							if ((SumOnes2 > 0) & (NewMat[PosOne2] == 0)) {
								break;
							}
						}
						for (FrstOne = 0; FrstOne < numparnts; ++FrstOne) {
							//FrstOne tells you the position of the first number 1 starting from the left hand side
							if (NewMat[FrstOne] == 1) {
								break;
							}
						}
						for (int x = (numparnts - 1); x >= (numparnts - it + 1); --x) {
							//SumOnes4 helps keep track of the sum of all ones located in the last i - 1 positions
							SumOnes4 += NewMat[x];
						}
						//Prints out NewMat
						/*for (int u = 0; u < numparnts; ++u) {
							std::cout << NewMat[u] << " ";
						}
						std::cout << endl;*/


						//Adding in the code that will allow counts parent combinations for this particular variable
						for (int q = 0; q < i; ++q) {
							if (NewMat[q] == 1) {
								combsparents *= catsi[order[q]];
							}
						}
						tmp.push_back(combsparents);//made the whole parentset configure for a variable
						/*std::cout << "This is combsparents: " << combsparents << endl;
						std::cout << endl;*/
						vector <int> hvect;
						//hvect tells us which variables are being considered always the last variable is being considered
						//e.g if ABC is our order and we are on i equals 1 then we are looking at relationships between A and B only
						//continued: A is the only one that is either a parent or isn't a parent so hvect will be < 0 1 > 
						//for A C hvect will be < 0 2 >
						for (int h = 0; h < i; ++h) {
							if (NewMat[h] == 1) {
								hvect.push_back(h);
							}
						}
						hvect.push_back(numparnts);
						size_t shvect = hvect.size();
						//Prints out hvect
						/*for (int u = 0; u < shvect; ++u) {
							std::cout << hvect[u] << " ";
						}*/
						//std::cout << endl;
						//Counting the amount of values in the data that have that particular parent combination
						vector <int> Nijkovercombos;
						for (int last = min_cat[order[numparnts]]; last <= max_cat[order[numparnts]]; ++last) {
							//(333)Creating a vector that uses the right combination
							/*std::cout << "This is for " << i << " place in the order with value of variable equal to" << last << endl;*/
							vector <int> Test(shvect, last), maxtest;
							for (int p = 0; p < (shvect-1); ++p) {
								Test[p] = max_cat[order[hvect[p]]];
							}
							maxtest = Test;
							for (int i2Nloopy = 0; i2Nloopy < combsparents; ++i2Nloopy) {
								//std::cout << endl;
								/*std::cout << endl;
								std::cout << "This is i2Nloopy: " << (i2Nloopy + 1) << endl;
								std::cout << "Here comes the Test:" << endl;*/
								//(444)This sets up the process for changing
								//NMpos tells me the position of the last non minimum value in the vector
								//We want to change when the position is the last position available in the vector
								int NMpos = 0, minpos = 0;
								for (NMpos = (shvect - 2); NMpos >= 0; --NMpos) {
									if (Test[NMpos] != min_cat[order[hvect[NMpos]]]) {
										break;
									}
								}
								for (minpos = (shvect - 2); minpos >= 0; --minpos) {
									//minpos tells you the position of the last minimum value
									if (Test[minpos] == min_cat[order[hvect[minpos]]]) {
										break;
									}
								}
								//Prints out Test
								/*for (int u = 0; u < shvect; ++u) {
									std::cout << Test[u] << " ";
								}
								std::cout << endl;
								std::cout << endl;
								std::cout << endl;*/
								//Count how many occurrences of the value are present in the data
								int Nijk = 0;
								for (int num2size = 0; num2size < tottuples; ++num2size) {
									int countcorrect = 0;
									for (size_t g = 0; g < Test.size(); ++g) {
										//num2size cycles through tuples
										//order[hvect[g]] represents the variable in the order that we are considering as a parent
										if (DAT[num2size][order[hvect[g]]] == Test[g]) {
											countcorrect += 1;
										}
									}
									if (countcorrect == Test.size()) {
										Nijk += 1;
									}
								}
								//Nijkovercombos displays data as follows
								//it starts with the smallest value for the last variable in hvect
								//and the largest values in the first n-1 variables in hvect
								//max,max-1,max-2,max-3 e.g. 2, 1, 0, 2, 1, 0  
								//count,count,count,count e.g. 13, 2, 2, 3, 4, 10
								Nijkovercombos.push_back(Nijk);
								//(666)Now that the values have been calculated find out what the next combination of variables should be
								if ((NMpos == -1) & (minpos == (shvect - 2))) {
									//break when the 1st non minimum does not exist and the first minimum is found in the last position e.g. 0000
									break;
								}
								if (minpos < NMpos) {
									Test[NMpos] = Test[NMpos] - 1;
								}
								else if (NMpos < minpos) {
									Test[NMpos] = Test[NMpos] - 1;
									for (int filler = NMpos + 1; filler < (shvect - 1); ++filler) {
										Test[filler] = maxtest[filler];
									}
								}
							}
						}
						fullNijkvector.push_back(Nijkovercombos);
						//(666)Now that the unique values have been calculated find out what the next combination of variables should be
						if ((PosOne == (numparnts - 1)) & (SumOnes == it)) {
							break;
						}
						else if ((PosOne == (numparnts - 1)) & (SumOnes != it)) {
							for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
								//NxtOne tells you the position of the next closest number 1 that we would
								//like to change the position of (we will call it the important number one)
								//SumOnes3 helps keep track of the sum of all ones between now and the next important number one
								SumOnes3 += NewMat[NxtOne];
								if (SumOnes3 == (SumOnes2 + 1)) {
									break;
								}
							}
							if (SumOnes4 == (it - 1)) {
								//If all except one of the 1's are found in the last it - 1 columns
								for (int x = 0; x < numparnts; ++x) {
									if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == (FrstOne + 1))) {
										//If 
										NewMat[x] = 1;
									}
									else {
										NewMat[x] = 0;
									}
								}
							}
							else {
								for (int x = 0; x < numparnts; ++x) {
									if (((x <= (NxtOne + SumOnes3)) & (x > NxtOne)) | (x == FrstOne)) {
										//If the position is that of the first 1 or it falls between the changed number one and the total 
										//amount of ones that are on that side of the zero 10111
										NewMat[x] = 1;
									}
									else if ((x != FrstOne) & (x != NxtOne) & (NewMat[x] == 1) & (x < PosOne2)) {
										//If it is not the position of the first 1 and it is not the position of the 1 whose position we are interested in changing
										//and the previous value at this position was 1 and the postion is below the value of the first zero spotted from the right
										NewMat[x] = 1;
									}
									else {
										NewMat[x] = 0;
									}
								}
							}
						}
						else if ((PosOne != (numparnts - 1)) & (SumOnes != it)) {
							for (NxtOne = (numparnts - 1); NxtOne >= 0; --NxtOne) {
								//NxtOne tells you the position of the next closest number 1 that we would
								//like to change the position of (we will call it the important number one)
								//SumOnes3 helps keep track of the sum of all ones between now and the next important number one
								SumOnes3 += NewMat[NxtOne];
								if (SumOnes3 == 1) {
									break;
								}
							}
							if (it != 1) {
								for (int x = 0; x < numparnts; ++x) {
									if (x == (NxtOne + 1)) {
										NewMat[x] = 1;
									}
									else if (x == NxtOne) {
										NewMat[x] = 0;
									}
									else if ((NewMat[x] == 1) & (x != NxtOne)) {
										NewMat[x] = 1;
									}
									else {
										NewMat[x] = 0;
									}
								}
							}
							else {
								for (int x = 0; x < numparnts; ++x) {
									if ((x == (NxtOne + 1))) {
										NewMat[x] = 1;
									}
									else {
										NewMat[x] = 0;
									}
								}
							}
						}
					}

				}
			}
			ParentCombos.push_back(tmp);
			indexofvar.push_back(tempstring);
		}

	}
	//std::cout << endl;
	std::cout << endl;
	//printing out the ParentCombos matrix just created above
	/*for (size_t i = 0; i < ParentCombos.size(); ++i) {
		for (size_t j = 0; j < ParentCombos[i].size(); ++j) {
			std::cout << ParentCombos[i][j] << " ";
		}
		std::cout << endl;
	}*/
	std::cout << endl;
	//printing out the fullNijkvector matrix just created above
	/*for (size_t i = 0; i < fullNijkvector.size(); ++i) {
		for (size_t j = 0; j < fullNijkvector[i].size(); ++j) {
			std::cout << fullNijkvector[i][j] << " ";
		}
		std::cout << endl;
	}
	std::cout << endl;*/
	//Print out Data
	/*for (int i = 0; i < DAT.size(); ++i) {
		for (int j = 0; j < DAT[i].size(); ++j) {
			std::cout << DAT[i][j] << " ";
		}
		std::cout << endl;
	}*/
	//Print out the families size
	/*for (size_t i = 0; i < families.size(); ++i) {
		std::cout << families[i] << " ";
	}*/

	//Obtaining the actual score from this information
	//varinorder cycles through families (the amount of parent families that should be considered for the variable with a particular order starting
	//the first variable in the order)
	//keeping track of the position within the fullNijkvector associated with the varinorder and the qi_Uialpha
	int posinfull = 0;
	//finlogscore is the final score in natural log format
	long double finlogscore = 0.0;
	vector< vector <double> > vecvarparset;
	for (size_t varinorder = 0; varinorder < families.size(); ++varinorder) {
		//sumovUialpha is the the sum over all parent sets for a particular variable
		long double sumovUialpha = 0.0;
		//vector of all values of seclastgamma
		vector <double> vec2ndlastgamma;
		long double maxseclastgamma;
		//Uialpha cycles through all the parent sets for a particular family
		for (int Uialpha = 0; Uialpha < families[varinorder]; ++Uialpha) {
			// nijkprime represents the value of 1/(ri * qi)
			long double nijkprime, nijprime;
			long double rij = catsi[order[varinorder]], PCs = ParentCombos[varinorder][Uialpha];

			nijprime = 1.0 / (PCs);
			nijkprime = 1.0 / (rij * PCs);
			//seclastgamma is the sum over all combinations for the parents in a set sum because it is logarithmic
			long double seclastgamma = 0.0;
			//qi_Uialpha cycles through the combinations for the parents in a set
			for (int qi_Uialpha = 0; qi_Uialpha < ParentCombos[varinorder][Uialpha];++qi_Uialpha) {
				long double lastgamma = 0.0;
				long double nij = 0.0;
				//countijk cycles through the categories of the variable with a particular order
				//catsi is in the order that data is input and so one must use the order[varinorder] to first obtain the variable that we are referring to
				//and then find the categories for it
				for (int countijk = 0; countijk < catsi[order[varinorder]]; ++countijk) {
					long double topy;
					//rightcol lets you find the right column/position of the value that you need for a particular category within the
					int rightcol = qi_Uialpha + (countijk * ParentCombos[varinorder][Uialpha]);
					nij += fullNijkvector[posinfull][rightcol];
					topy = (nijkprime + fullNijkvector[posinfull][rightcol]);

					//Using boost lgamma function for the product over categories and parent combinations
					lastgamma += boost::math::lgamma(topy) - boost::math::lgamma(nijkprime);

				}
				long double boty = nij + nijprime;
				seclastgamma += lastgamma + boost::math::lgamma(nijprime) - boost::math::lgamma(boty);

			}
			vec2ndlastgamma.push_back(seclastgamma);



			//Calculate sumovUialpha based on the logsumexp concept
			if (Uialpha + 1 == families[varinorder]) {
				
				for (size_t que = 0; que < vec2ndlastgamma.size(); ++que) {
					//change the value of maxseclastgamma if new value is larger than the previous value
					if (que == 0) {
						maxseclastgamma = vec2ndlastgamma[0];
					}
					else {
						if (maxseclastgamma < vec2ndlastgamma[que]) {
							maxseclastgamma = vec2ndlastgamma[que];
						}
					}
				}
				for (size_t what = 0; what < vec2ndlastgamma.size(); ++what) {
					sumovUialpha += exp(vec2ndlastgamma[what] - maxseclastgamma);
				}
				//add info on parent set scores for each variable to this vector of vectors
				vecvarparset.push_back(vec2ndlastgamma);
			}
			
			/*std::cout << endl;
			std::cout << seclastgamma;
			std::cout << endl;*/
			posinfull += 1;
			//std::cout << posinfull << endl;
		}
		finlogscore += log(sumovUialpha) + maxseclastgamma;
	}

	vector < vector<string> > parSet;
	vector< map <double, string, greater <double> > > parSetScoreSorted;
	vector< map <double, string, greater <double> > > strucScore;


//Below is another way to match the index or label sets with the scores sets, and store the (score, label) into a map vector. And for each vector element the map is a sorted map.

	for (unsigned i = 0; i < indexofvar.size(); ++i){

		map <double, string, greater <double> > tempMap;
		for (unsigned j=0; j< indexofvar[i].size(); ++j){
		    tempMap.insert(make_pair(vecvarparset[i][j], indexofvar[i][j]));
		   }
		parSetScoreSorted.push_back(tempMap);

	}


	pair <double, string> bestStrScore;
	double bestScore = 0;
	string bestLable;


	for (unsigned i = 0; i < parSetScoreSorted.size(); ++i){
		map <double, string> :: iterator itr;
		itr = parSetScoreSorted[i].begin();
		bestScore = bestScore + (itr->first);
		bestLable = bestLable +(itr->second);
	}

	bestStrScore = make_pair(bestScore, bestLable);//This is the best score.

	vector < pair <double, string> > sortedStru;//This store all the structures in the percentage.
	vector < vector < pair <double, string > > > deltaC;

	for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){
		map <double, string> :: iterator itr0, itr1;
		vector < pair <double, string > > tempDelta;
		double tempDeltaS;
		string tempDeltaL;
		itr0 = parSetScoreSorted[l].begin();
		itr1 = parSetScoreSorted[l].begin();
		double tem1=exp(itr1->first), tem2 = exp(itr1->first);
		for (unsigned m = 1; m< parSetScoreSorted[l].size(); ++m){
			tem1 = tem2;
			itr1 = ++itr1;
			tem2 = tem1 + exp(itr1->first);
			double tem = (tem1/tem2)*100;
			if(tem <= PERCENT){
				tempDeltaS = (itr1->first)-(itr0->first);
				tempDeltaL = itr1->second;
				tempDelta.push_back(make_pair(tempDeltaS, tempDeltaL));
			}
		}

		deltaC.push_back( tempDelta);
	}


	for(unsigned i=0; i< deltaC.size(); ++i){
		for (unsigned j=0; j< deltaC[i].size(); ++j)
		{
			double score = bestStrScore.first + deltaC[i][j].first;
			string lab = bestStrScore.second;
			findAndReplaceAll(lab, parSetScoreSorted[i+1].begin()->second, deltaC[i][j].second);
			sortedStru.push_back(make_pair(score, lab));
		}
	}

	sort(sortedStru.begin(),sortedStru.end(),compareDe);



/*	for (unsigned l = 1; l< parSetScoreSorted.size(); ++l){ //l means each variable
		for (unsigned m = contl; m < parSetScoreSorted[l].size(); ++m){ //m means the number of parents sets
			contl =m;
			vector < pair <string, double > > delta; // store the different of the highest score and the second highest score
			for (unsigned i = l; i < parSetScoreSorted.size(); ++i){
				map <double, string> :: iterator itr0, itr1;
				double tempDeltaS;
				string tempDeltaL;
				itr0 = parSetScoreSorted[i].begin();
				itr1 = itr0;
				for (int kr = 0; kr < contl; ++kr){
					itr1 = itr0++;
				}
				tempDeltaS = (itr1->first)-(itr0->first);
				tempDeltaL = int_to_str(i);
				delta.push_back(make_pair(tempDeltaL, tempDeltaS));
			}

			sort(delta.begin(),delta.end(),compareI);

            double const stopLimit = PERCENT;
			double tempbeforeS = bestStrScore.first, temafterS;

			for (unsigned i = 0; i < delta.size(); ++i){
				pair <double, string > temppair;
			    double tempLime;
			    int ind = str_to_int(delta[i].first);
			    string s = parSetScoreSorted[ind].begin()->second;
				string sRep = (++parSetScoreSorted[ind].begin())->second;
				temppair = bestStrScore;
				findAndReplaceAll(temppair.second, s, sRep);
				temppair.first = temppair.first - delta[i].second;

				temafterS = logAB(tempbeforeS, temppair.first);

				tempLime = persentageXofY(temafterS, tempbeforeS) ;

				tempbeforeS = temafterS;

				if(tempLime > stopLimit){
					goto finish;
			}

				sortedStru.push_back(temppair);

					//		std::cout << ind << "    "<< s << "     "<< sRep << "     "<< temppair.first  << "    " << temppair.second <<endl;
					//		std::cout << temppair.first  << "    " << temppair.second <<endl;
					//		std::cout << std::endl;

			}

		}

	}

	finish:*/


	std::cout << std::endl;
	std::cout << "Total Score: "<< boost::lexical_cast<string>(finlogscore) << std::endl;
	std::cout << std::endl;


	cout << "Best several structures are:" << endl;
	cout  << bestScore << " : " << bestLable << endl;

	for (unsigned i=0; i< sortedStru.size(); ++i )
	{
		cout  << sortedStru[i].first << " : " << sortedStru[i].second << endl;
	}

	std::cout << std::endl;
	std::cout << std::endl;

	for(unsigned i=0; i< parSetScoreSorted.size(); ++i){
		map <double, string> :: iterator itr;
		std::cout << "For variable "<<  i <<":"<< endl;
		for (itr = parSetScoreSorted[i].begin(); itr != parSetScoreSorted[i].end(); ++itr)
		{
			cout  << itr->second << " : " << itr->first << ";   ";
		}
		cout << endl;
		cout << endl;

	}


/*	ofstream myfile;
	myfile.open("/home/zgong001/Documents/SprinklerDataset/bestStructure.txt");
	if (myfile.is_open())
	{
		myfile << bestLable << " : " << bestScore << sortedStru.size();
		myfile << "\n";

		for(unsigned i=0; i< sortedStru.size(); ++i){
			myfile  << sortedStru[i].second << " : " << sortedStru[i].first;
            myfile << "\n";
		}
	 }
	 else cout << "Unable to open file";
	 myfile.close();*/


	//time after completion
	time_t later = time(0);
	char* dt_later = ctime(&later);
	std::cout << "The local date and time is: " << dt_later << std::endl;

	std::cout << std::endl;
	std::cout << std::endl;


	std::cin.clear();
	std::cin.ignore();
	std::cin.get();
	return 0;
}