library(dplyr)
library(bnlearn)
#library(Hmisc)
library(BiDAG)
library("writexl")

#use below to download "graph" package, if needed; required for BiDAG
#note: use BiocManager v 3.14 with R v.4.1
#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("graph", version = "3.14")

mydata <-read.csv(file.choose(),header=T)

#randomly select 3 genes from 45,818 available genes (excludes last 5 rows: "no_feature", "ambiguous","too_low_aQual", "not_aligned", "aligment_not_unique")
set.seed(123)
indices3 <-sort(sample.int(45818,3))
indices3 
mydata3 <-mydata[indices3,]

#clean and reformat data
mydata3 <-t(mydata3[,-c(1,3)])
colnames(mydata3)<-c(mydata3[1,1],mydata3[1,2],mydata3[1,3])
mydata3.genes<-colnames(mydata3)
nrows <-nrow(mydata3)
mydata3 <-mydata3[-c(1,nrows),]
nrows <-nrow(mydata3)
ncols <-ncol(mydata3)
mydata3 <-as.data.frame(mydata3)

#Create new dataframe to hold continuous standardized values
mydata3new <-data.frame(matrix(ncol=ncols, nrow=nrows))
colnames(mydata3new)<-colnames(mydata3)

for (j in c(1:ncols)){
   for (i in c(1:nrows)){
      mydata3new[i,j] <-as.numeric(mydata3[i,j])
   }
}

for (j in c(1:ncols)){
   AVG <-mean(mydata3new[,j])
   SD <-sd(mydata3new[,j])
   
   for (i in c(1:nrows)){
      mydata3new[i,j+3] <-(mydata3new[i,j]-AVG)/SD
   }
}

colnames(mydata3new)[4] <-"gene1";colnames(mydata3new)[5] <-"gene2";colnames(mydata3new)[6] <-"gene3"

mydata3final <-mydata3new[,-c(1:3)]

#Create new dataframe to hold categorical values
mydata3d <-mydata3final
nrows <-nrow(mydata3d)
ncols <-ncol(mydata3d)
for (j in c(1:ncols)){  
   for (i in c(1:nrows)){
      if(mydata3d[i,j]<(-1)) mydata3d[i,j+3] <-1
      if(between(mydata3d[i,j],-1,1)) mydata3d[i,j+3] <-2
      if(mydata3d[i,j]>1)    mydata3d[i,j+3] <-3
   }
}
colnames(mydata3d)[4] <-"gene1";colnames(mydata3d)[5] <-"gene2";colnames(mydata3d)[6] <-"gene3"
mydata3d <-mydata3d[,-c(1:3)]
mydata3d[1]<-lapply(mydata3d[1],factor)
mydata3d[2]<-lapply(mydata3d[2],factor)
mydata3d[3]<-lapply(mydata3d[3],factor)


################################################################################
#FOR BNLEARN: 
################################################################################
#create all possible BNs for 3 variables
dag1 <-empty.graph(colnames(mydata3final))

dag2 <-model2network("[gene1|gene2][gene2][gene3]")
dag3 <-model2network("[gene1][gene2|gene1][gene3]")

dag4 <-model2network("[gene1|gene3][gene2][gene3]")
dag5 <-model2network("[gene1][gene2][gene3|gene1]")

dag6 <-model2network("[gene1][gene2|gene3][gene3]")
dag7 <-model2network("[gene1][gene2][gene3|gene2]")

dag8 <-model2network("[gene1|gene2:gene3][gene2][gene3]")

dag9<-model2network("[gene1][gene2|gene1][gene3|gene1]")
dag10<-model2network("[gene1|gene2][gene2][gene3|gene1]")
dag11<-model2network("[gene1|gene3][gene2|gene1][gene3]")

dag12<-model2network("[gene1][gene2|gene1:gene3][gene3]")

dag13<-model2network("[gene1|gene2][gene2][gene3|gene2]")
dag14<-model2network("[gene1][gene2|gene1][gene3|gene2]")
dag15<-model2network("[gene1|gene2][gene2|gene3][gene3]")

dag16<-model2network("[gene1][gene2][gene3|gene1:gene2]")

dag17<-model2network("[gene1|gene3][gene2|gene3][gene3]")
dag18<-model2network("[gene1][gene2|gene3][gene3|gene1]")
dag19<-model2network("[gene1|gene3][gene2][gene3|gene2]")

dag20 <-model2network("[gene1|gene2:gene3][gene2|gene3][gene3]")
dag21<-model2network("[gene1|gene2:gene3][gene2][gene3|gene2]")

dag22<-model2network("[gene1][gene2|gene1:gene3][gene3|gene1]")
dag23<-model2network("[gene1|gene3][gene2|gene1:gene3][gene3]")

dag24<-model2network("[gene1][gene2|gene1][gene3|gene1:gene2]")
dag25<-model2network("[gene1|gene2][gene2][gene3|gene2:gene1]")

#compute BGe scores using bnlearn (1st element in vector "scores")
scores <-vector("list",25) 
names(scores) <-c("dag1","dag2","dag3","dag4","dag5","dag6","dag7","dag8","dag9",
"dag10","dag11","dag12","dag13","dag14","dag15","dag16","dag17","dag18","dag19","dag20",
"dag21","dag22","dag23","dag24","dag25")
scores[1] <-score(dag1,mydata3final,type="bge")
scores[2] <-score(dag2,mydata3final,type="bge")
scores[3] <-score(dag3,mydata3final,type="bge")
scores[4] <-score(dag4,mydata3final,type="bge")
scores[5] <-score(dag5,mydata3final,type="bge")
scores[6] <-score(dag6,mydata3final,type="bge")
scores[7] <-score(dag7,mydata3final,type="bge")
scores[8] <-score(dag8,mydata3final,type="bge")
scores[9] <-score(dag9,mydata3final,type="bge")
scores[10] <-score(dag10,mydata3final,type="bge")
scores[11] <-score(dag11,mydata3final,type="bge")
scores[12] <-score(dag12,mydata3final,type="bge")
scores[13] <-score(dag13,mydata3final,type="bge")
scores[14] <-score(dag14,mydata3final,type="bge")
scores[15] <-score(dag15,mydata3final,type="bge")
scores[16] <-score(dag16,mydata3final,type="bge")
scores[17] <-score(dag17,mydata3final,type="bge")
scores[18] <-score(dag18,mydata3final,type="bge")
scores[19] <-score(dag19,mydata3final,type="bge")
scores[20] <-score(dag20,mydata3final,type="bge")
scores[21] <-score(dag21,mydata3final,type="bge")
scores[22] <-score(dag22,mydata3final,type="bge")
scores[23] <-score(dag23,mydata3final,type="bge")
scores[24] <-score(dag24,mydata3final,type="bge")
scores[25] <-score(dag25,mydata3final,type="bge")

#compute BDe scores using bnlearn (2nd element in vector "scores")
scores$dag1[2] <-score(dag1,mydata3d,type="bde")
scores$dag2[2] <-score(dag2,mydata3d,type="bde")
scores$dag3[2] <-score(dag3,mydata3d,type="bde")
scores$dag4[2] <-score(dag4,mydata3d,type="bde")
scores$dag5[2] <-score(dag5,mydata3d,type="bde")
scores$dag6[2] <-score(dag6,mydata3d,type="bde")
scores$dag7[2] <-score(dag7,mydata3d,type="bde")
scores$dag8[2] <-score(dag8,mydata3d,type="bde")
scores$dag9[2] <-score(dag9,mydata3d,type="bde")
scores$dag10[2] <-score(dag10,mydata3d,type="bde")
scores$dag11[2] <-score(dag11,mydata3d,type="bde")
scores$dag12[2] <-score(dag12,mydata3d,type="bde")
scores$dag13[2] <-score(dag13,mydata3d,type="bde")
scores$dag14[2] <-score(dag14,mydata3d,type="bde")
scores$dag15[2] <-score(dag15,mydata3d,type="bde")
scores$dag16[2] <-score(dag16,mydata3d,type="bde")
scores$dag17[2] <-score(dag17,mydata3d,type="bde")
scores$dag18[2] <-score(dag18,mydata3d,type="bde")
scores$dag19[2] <-score(dag19,mydata3d,type="bde")
scores$dag20[2] <-score(dag20,mydata3d,type="bde")
scores$dag21[2] <-score(dag21,mydata3d,type="bde")
scores$dag22[2] <-score(dag22,mydata3d,type="bde")
scores$dag23[2] <-score(dag23,mydata3d,type="bde")
scores$dag24[2] <-score(dag24,mydata3d,type="bde")
scores$dag25[2] <-score(dag25,mydata3d,type="bde")


################################################################################
#FOR BiDAG: 
################################################################################
#create all possible BNs for 3 variables
dagb1 <-matrix(c(0,0,0,0,0,0,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb2 <-matrix(c(0,0,0,1,0,0,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb3 <-matrix(c(0,1,0,0,0,0,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb4 <-matrix(c(0,0,0,0,0,0,1,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb5 <-matrix(c(0,0,1,0,0,0,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb6 <-matrix(c(0,0,0,0,0,0,0,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb7 <-matrix(c(0,0,0,0,0,1,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb8 <-matrix(c(0,0,0,1,0,0,1,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb9 <-matrix(c(0,1,1,0,0,0,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb10 <-matrix(c(0,0,1,1,0,0,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb11 <-matrix(c(0,1,0,0,0,0,1,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb12 <-matrix(c(0,1,0,0,0,0,0,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb13 <-matrix(c(0,0,0,1,0,1,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb14 <-matrix(c(0,1,0,0,0,1,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb15 <-matrix(c(0,0,0,1,0,0,0,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb16 <-matrix(c(0,0,1,0,0,1,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb17 <-matrix(c(0,0,0,0,0,0,1,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb18 <-matrix(c(0,0,1,0,0,0,0,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb19 <-matrix(c(0,0,0,0,0,1,1,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb20 <-matrix(c(0,0,0,1,0,0,1,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb21 <-matrix(c(0,0,0,1,0,1,1,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb22 <-matrix(c(0,1,1,0,0,0,0,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb23 <-matrix(c(0,1,0,0,0,0,1,1,0),nrow=3,ncol=3,byrow=TRUE)
dagb24 <-matrix(c(0,1,1,0,0,1,0,0,0),nrow=3,ncol=3,byrow=TRUE)
dagb25 <-matrix(c(0,0,1,1,0,1,0,0,0),nrow=3,ncol=3,byrow=TRUE)

#compute BGe scores using BiDAG
myp <-scoreparameters("bge",mydata3final)
scores$dag1[3]<-DAGscore(myp,dagb1)
scores$dag2[3]<-DAGscore(myp,dagb2)
scores$dag3[3]<-DAGscore(myp,dagb3)
scores$dag4[3]<-DAGscore(myp,dagb4)
scores$dag5[3]<-DAGscore(myp,dagb5)
scores$dag6[3]<-DAGscore(myp,dagb6)
scores$dag7[3]<-DAGscore(myp,dagb7)
scores$dag8[3]<-DAGscore(myp,dagb8)
scores$dag9[3]<-DAGscore(myp,dagb9)
scores$dag10[3]<-DAGscore(myp,dagb10)
scores$dag11[3]<-DAGscore(myp,dagb11)
scores$dag12[3]<-DAGscore(myp,dagb12)
scores$dag13[3]<-DAGscore(myp,dagb13)
scores$dag14[3]<-DAGscore(myp,dagb14)
scores$dag15[3]<-DAGscore(myp,dagb15)
scores$dag16[3]<-DAGscore(myp,dagb16)
scores$dag17[3]<-DAGscore(myp,dagb17)
scores$dag18[3]<-DAGscore(myp,dagb18)
scores$dag19[3]<-DAGscore(myp,dagb19)
scores$dag20[3]<-DAGscore(myp,dagb20)
scores$dag21[3]<-DAGscore(myp,dagb21)
scores$dag22[3]<-DAGscore(myp,dagb22)
scores$dag23[3]<-DAGscore(myp,dagb23)
scores$dag24[3]<-DAGscore(myp,dagb24)
scores$dag25[3]<-DAGscore(myp,dagb25)

#compute BDe scores using BiDAG
myp <-scoreparameters("bdecat",mydata3d)
scores$dag1[4]<-DAGscore(myp,dagb1)
scores$dag2[4]<-DAGscore(myp,dagb2)
scores$dag3[4]<-DAGscore(myp,dagb3)
scores$dag4[4]<-DAGscore(myp,dagb4)
scores$dag5[4]<-DAGscore(myp,dagb5)
scores$dag6[4]<-DAGscore(myp,dagb6)
scores$dag7[4]<-DAGscore(myp,dagb7)
scores$dag8[4]<-DAGscore(myp,dagb8)
scores$dag9[4]<-DAGscore(myp,dagb9)
scores$dag10[4]<-DAGscore(myp,dagb10)
scores$dag11[4]<-DAGscore(myp,dagb11)
scores$dag12[4]<-DAGscore(myp,dagb12)
scores$dag13[4]<-DAGscore(myp,dagb13)
scores$dag14[4]<-DAGscore(myp,dagb14)
scores$dag15[4]<-DAGscore(myp,dagb15)
scores$dag16[4]<-DAGscore(myp,dagb16)
scores$dag17[4]<-DAGscore(myp,dagb17)
scores$dag18[4]<-DAGscore(myp,dagb18)
scores$dag19[4]<-DAGscore(myp,dagb19)
scores$dag20[4]<-DAGscore(myp,dagb20)
scores$dag21[4]<-DAGscore(myp,dagb21)
scores$dag22[4]<-DAGscore(myp,dagb22)
scores$dag23[4]<-DAGscore(myp,dagb23)
scores$dag24[4]<-DAGscore(myp,dagb24)
scores$dag25[4]<-DAGscore(myp,dagb25)

################################################################################
#Plot all 25 BNs (grouped by i-equivalent structures)
################################################################################

par(mfrow=c(1,3))
plot(dag1);mtext("S1")
plot.new()
plot.new()

plot(dag2);mtext("S2")
plot(dag3);mtext("S3")
plot.new()

plot(dag4);mtext("S4")
plot(dag5);mtext("S5")
plot.new()

plot(dag6);mtext("S6")
plot(dag7);mtext("S7")
plot.new()

plot(dag8);mtext("S8")
plot.new()
plot.new()

plot(dag9);mtext("S9")
plot(dag10);mtext("S10")
plot(dag11);mtext("S11")

plot(dag12);mtext("S12")
plot.new()
plot.new()

plot(dag13);mtext("S13")
plot(dag14);mtext("S14")
plot(dag15);mtext("S15")

plot(dag16);mtext("S6")
plot.new()
plot.new()

plot(dag17);mtext("S17")
plot(dag18);mtext("S18")
plot(dag19);mtext("S19")

plot(dag20);mtext("S20")
plot(dag21);mtext("S21")
plot.new()

plot(dag22);mtext("S22")
plot(dag23);mtext("S23")
plot.new()

plot(dag24);mtext("S24")
plot(dag25);mtext("S25")
plot.new()
par(mfrow=c(1,1))

################################################################################
#Organize scores into table
################################################################################

#Create table to output scores
tabscores <-matrix(rep(NA, times=200),ncol=8, byrow=TRUE)
colnames(tabscores) <-c("Strct","I-Eq","BGe-bnlearn","BDe-bnlearn","BGe-BiDAG","BDe-BiDAG","BGe Diff","BDe Diff")
rownames(tabscores) <-c("S1","S2","S3","S4","S5","S6","S7","S8","S9","S10","S11","S12",
"S13","S14","S15","S16","S17","S18","S19","S20","S21","S22","S23","S24","S25")

#List structure # in first column
for (i in c(1:25)) tabscores[i,1]<-i

#List I-Equivalnce symbol in second column (structures with same letter are i-equivalent)
tabscores[1,2]<-"a"
tabscores[2,2]<-tabscores[3,2]<-"b"
tabscores[4,2]<-tabscores[5,2]<-"c"
tabscores[6,2]<-tabscores[7,2]<-"e"
tabscores[8,2]<-"f"
tabscores[9,2]<-tabscores[10,2]<-tabscores[11,2]<-"g"
tabscores[12,2]<-"h"
tabscores[13,2]<-tabscores[14,2]<-tabscores[15,2]<-"i"
tabscores[16,2]<-"j"
tabscores[17,2]<-tabscores[18,2]<-tabscores[19,2]<-"k"
tabscores[20,2]<-tabscores[21,2]<-"l"
tabscores[22,2]<-tabscores[23,2]<-"m"
tabscores[24,2]<-tabscores[25,2]<-"n"

#Store scores and score differences in columns 3-8
#Column 3= bnlearn BGe socre
#Column 4= bnlearn BDe score
#Column 5= BiDAG BGe sccore
#Column 6= BiDAG BiDAG score
#Column 7= BGe Difference (bnlearn score - BiDAG score)
#Column 8= BDe Difference (bnlearn score - BiDAG score)
for (i in c(1:25)){
   for (j in c(3:8)){
   if(j<7) tabscores[i,j]<-scores[[i]][j-2]
   if(j==7) tabscores[i,j]<-as.numeric(tabscores[i,3])-as.numeric(tabscores[i,5])
   if(j==8) tabscores[i,j]<-as.numeric(tabscores[i,4])-as.numeric(tabscores[i,6])
   }
}
#Export tabscores results to Excel file ***Set file directory in R before exporting
dfscores<-as.data.frame(tabscores)
write_xlsx(dfscores,"Wilcox_PHC7083_Project_Routput_Scores25Strcts.xlsx")


#Create table to output pairwise structure scores comparisons
tabcomp <-matrix(rep(NA, times=3600),ncol=12, byrow=TRUE)
colnames(tabcomp) <-c("Structure1","Structure2","Bnlearn_BGe_Ratio","Bnlearn_BGe_Strct","Bnlearn_BDe_Ratio","Bnlearn_BDe_Strct","BiDAG_BGe_Ratio","BiDAG_BGe_Strct","BiDAG_BDe_Ratio","BiDAG_BDe_Strct","BGe_Strcts_Disagree","BDe_Strcts_Disagree")

count<-1
for (i in c(1:24)){
   for (m in c((i+1):25)) {
   tabcomp[count,1]<-i                                        #List structure number of first structure (COL 1)
   tabcomp[count,2]<-m                                        #List structure number of second structure (COL 2)
   tabcomp[count,3]<-scores[[i]][1]/scores[[m]][1]            #Compute & store bnlearn BGe score ratio (COL 3)
   if(tabcomp[count,3]==1) tabcomp[count,4]<-"-"              #List bnlearn BGe preferred strct  (COL 4)
   if(tabcomp[count,3]<1) tabcomp[count,4]<-m
   if(tabcomp[count,3]>1) tabcomp[count,4]<-i
   tabcomp[count,5]<-scores[[i]][2]/scores[[m]][2]            #Compute & store bnlearn BDe score ratio (COL 5)
   if(tabcomp[count,5]==1) tabcomp[count,6]<-"-"              #List bnlearn BDe preferred strct  (COL 6)
   if(tabcomp[count,5]<1) tabcomp[count,6]<-m
   if(tabcomp[count,5]>1) tabcomp[count,6]<-i   
   tabcomp[count,7]<-scores[[i]][3]/scores[[m]][3]            #Compute & store BiDAG BGe score ratio (COL 7)
   if(tabcomp[count,7]==1) tabcomp[count,8]<-"-"              #List BiDAG BGe preferred strct  (COL 8)
   if(tabcomp[count,7]<1) tabcomp[count,8]<-m
   if(tabcomp[count,7]>1) tabcomp[count,8]<-i
   tabcomp[count,9]<-scores[[i]][4]/scores[[m]][4]            #Compute & store BiDAG BDe score ratio (COL 9)
   if(tabcomp[count,9]==1) tabcomp[count,10]<-"-"             #List BiDAG BDe preferred strct  (COL 10)
   if(tabcomp[count,9]<1) tabcomp[count,10]<-m
   if(tabcomp[count,9]>1) tabcomp[count,10]<-i
   
   {if(tabcomp[count,4]==tabcomp[count,8]) tabcomp[count,11]<-0     #Check is preferred BGe strcts agree, bnlearn vs. BiDAG (COL 11)
   else tabcomp[count,11]<-1}                                       #0= agree; 1= disagrees
   {if(tabcomp[count,6]==tabcomp[count,10]) tabcomp[count,12]<-0    #Check is preferred BDe strcts agree, bnlearn vs. BiDAG (COL 12)
   else tabcomp[count,12]<-1}                                       #0= agree; 1= disagrees
   
   count<-count+1
}}

#Export tabcomp results to Excel file ***Set file directory in R before exporting
dfcomp<-as.data.frame(tabcomp)
write_xlsx(dfcomp,"Wilcox_PHC7083_Project_Routput_Comp25Strcts.xlsx")


 

################################################################################


#randomly select 100 genes from 45,818 available genes (excludes last 5 rows: "no_feature", "ambiguous","too_low_aQual", "not_aligned", "aligment_not_unique")
#set.seed(125)
#indices100 <-sort(sample.int(45818,100))
#indices100 
#mydata100 <-mydata[indices100,]

