mlw.bn.code <-function(score1, n.restart, n.perturb, n.max.iter= Inf, n.maxp, method1, iss1) {

#########################################################
#DATA PREP

#Load required packaages
library(bnlearn)
library(dplyr)

#Import and prep data
mydata <-read.csv(file.choose(),header=T)

#If data file with all OTUs is imported, extract only OTUs to include in models
mydata2 <-data.frame(mydata$diagnosis,mydata$Gt8Met68,mydata$Unc02j5n,mydata$Unc00rn9,mydata$Unc04zvf,mydata$Unc99918,mydata$Unc056y6,mydata$UncBac25,mydata$Unc03u2r,mydata$Unc03tc5,mydata$Unc01ll8,mydata$Unc75554,mydata$Unc03iof,mydata$Unc055sp,mydata$Unc03e44,mydata$Unc05lxa,mydata$VeeSemin,mydata$Unc00qrn,mydata$Unc00v6w,mydata$Unc03mo5,mydata$Unc05n7t,mydata$Unc64172,mydata$Unc05bd1,mydata$Unc054vi,mydata$Unc01pk4,mydata$UncO8895,mydata$Unc91005,mydata$UncG3786,mydata$Unc00y95,mydata$Unc69508,mydata$UncC1868,mydata$Unc053gf,mydata$Unc05mrd,mydata$Unc01c0q,mydata$Unc04zq9,mydata$Unc02q6j,mydata$Unc01w0v,mydata$Unc00kv7,mydata$Unc48286,mydata$Unc91512)

#Rename columns
colnames(mydata2) <-c("diagnosis","Gt8Met68","Unc02j5n","Unc00rn9","Unc04zvf","Unc99918","Unc056y6","UncBac25","Unc03u2r","Unc03tc5","Unc01ll8","Unc75554","Unc03iof","Unc055sp","Unc03e44","Unc05lxa","VeeSemin","Unc00qrn","Unc00v6w","Unc03mo5","Unc05n7t","Unc64172","Unc05bd1","Unc054vi","Unc01pk4","UncO8895","Unc91005","UncG3786","Unc00y95","Unc69508","UncC1868","Unc053gf","Unc05mrd","Unc01c0q","Unc04zq9","Unc02q6j","Unc01w0v","Unc00kv7","Unc48286","Unc91512") 

#Specify all variables as factors 
mydata2[]<-lapply(mydata2,as.factor)

#########################################################
#Naive Bayes Classifier

#Create the network structure
start.time1 <-Sys.time()    
bn1 <<-naive.bayes(mydata2,training="diagnosis")
end.time1 <-Sys.time()
elasped.time1 <- end.time1-start.time1

#Extract and save goodness-of-fit scores
score1.loglik <-logLik(bn1,mydata2)
score1.AIC <-AIC(bn1,mydata2)
score1.BIC <-BIC(bn1,mydata2)

#Run 3-fold cross-validation to assess prediction performance
cv1 <<-bn.cv(mydata2,bn1, fit="bayes", fit.args=list(iss=iss1), loss="pred-lw", loss.args=list(target="diagnosis"),method="k-fold", k=3, runs=100)

#Save structure as file that can be opened in GeNIe
#write.net("bn1_GeNIe.net", fitted.bn1)

######################################################### 
#LEARN BN USING HILL-CLIMBING ALGORITHM 
     
#Learn the network structure     
start.time2 <-Sys.time()    
bn2 <<-hc(mydata2,score=score1,restart=n.restart, perturb=n.perturb, max.iter=n.max.iter, maxp=n.maxp)
end.time2 <-Sys.time()
elasped.time2 <- end.time2-start.time2

#Extract and save goodness-of-fit scores
score2.loglik <-logLik(bn2,mydata2)
score2.AIC <-AIC(bn2,mydata2)
score2.BIC <-BIC(bn2,mydata2)

#Run 3-fold cross-validation to assess prediction performance
cv2 <<-bn.cv(mydata2,bn2, fit="bayes", fit.args=list(iss=iss1), loss="pred-lw", loss.args=list(target="diagnosis"),method="k-fold", k=3, runs=100)

#Save structure as file that can be opened in GeNIe
#write.net("bn2_GeNIe.net", fitted.bn2)


#########################################################
#PRINT RESULTS

cat("***Naive Bayes Classifier***\n\n")
cat("Start time: ")
print(start.time1)
cat("End time: ")
print(end.time1)
cat("Elasped time: ")
print(elasped.time1)
cat("\n")
print(bn1)
cat("\nGoodness-of-Fit\nloglike Score: ",score1.loglik)
cat("\nAIC Score: ",score1.AIC)
cat("\nBIC Score: ",score1.BIC,"\n\n")
print(cv1)
plot(bn1)

cat("\n\n***Learned Bayesian Network***\n\n")
cat("Start time: ")
print(start.time2)
cat("End time: ")
print(end.time2)
cat("Elasped time: ")
print(elasped.time2)
cat("\n")
print(bn2)
cat("\nGoodness-of-Fit\nloglike Score: ",score2.loglik)
cat("\nAIC Score: ",score2.AIC)
cat("\nBIC Score: ",score2.BIC,"\n\n")
print(cv2)
plot(bn2)

plot(cv1, cv2, xlab=c("NBC","HC"),connect=TRUE)

########################################################
#Estimate parameters from the selected network structure
if(score1.loglik>score2.loglik){
     fitted.bn1 <-bn.fit(bn1, mydata2, method=method1, iss=iss1)
     cat("\n\nSelected Model: Naive Bayes Classifier\n")
     print(fitted.bn1)
}
if(score1.loglik<score2.loglik){
     fitted.bn2 <-bn.fit(bn2, mydata2, method=method1, iss=iss1)
     cat("\n\nSelected Model: BN learned from Hill-Climbing algorithm\n")
     print(fitted.bn2)
}

################## END OF FUNCTION ####################
}

#Call function
mlw.bn.code(score1="loglik", n.restart=1000, n.perturb=10000, n.max.iter= Inf, n.maxp=5, method1="bayes", iss1=1)





#Below not used yet


cpquery(bn.fit,n=100,event=(SIBDQScore_cat="0"),evidence=list(diagnosis="2",Shann_Index_exp_cat="0",SleepSatisfaction="2"),method="lw")

library(Rgraphviz)
library(gRain)
junction <-as.grain(fitted)
j.diag.SI.sSat <-setEvidence(junction,nodes=c("diagnosis","Shann_Index_exp_cat","SleepSatisfaction"),states=c("2","0","2"))
SIBDQ.cpt <-querygrain(j.diag.SI.sSat,nodes=c("SIBDQScore_cat"),type="marginal")
SIBDQ.cpt


