setwd("/home/qja0428/Dropbox/research/Yoo") #setwd("/Users/jinganqu/Dropbox/research/Yoo/code") library(bnlearn) data(learning.test) dataset = learning.test a = hc(dataset, score='bde') plot(a) from <- a$arcs[,1] to <- a$arcs[,2] m <- ncol(dataset) n <- nrow(dataset) #a function that return the variable position position <- function(x, data) { result <- which(x == names(data)) return(result) } from <- sapply(from,position,data=dataset) to <- sapply(to,position,data=dataset) score(a,data=dataset,type="bde") #############simple test sample############ test <- learning.test[1:5,1:3] test[2,3] <- "b" test <- data.frame(apply(test,2,as.character)) dag <- hc(test, score = "bic") score(dag,data=test,type="bde") #-10.37759 #bde score by hand score <- log(gamma(.5)/gamma(3.5)) + log(gamma(3.25)/gamma(.25)) + log(gamma(.25)/gamma(.25)) + log(gamma(.5)/gamma(2.5)) + log(gamma(1.25)/gamma(.25)) + log(gamma(1.25)/gamma(.25)) + log(gamma(.5)/gamma(3.5)) + log(gamma(2.25)/gamma(.25)) + log(gamma(1.25)/gamma(.25)) + log(gamma(.5)/gamma(2.5)) + log(gamma(.25)/gamma(.25)) + log(gamma(2.25)/gamma(.25)) write.csv(test,"./code/test.csv",row.names = F) ################################################ ######check all the graph of 3 variables######## ################################################ ######generate data set.seed(123) A <- sample(1:2,5000,replace = T) set.seed(428) B <- sample(1:2,5000,replace = T) set.seed(403) C <- sample(1:2,5000,replace = T) dataset <- data.frame(A,B,C) #save the dataset write.csv(dataset,"test_three_variables.csv",row.names = F) dataset <- read.csv("test_three_variables.csv") #make the variable as factor dataset <- apply(dataset,2,as.factor) dataset <- as.data.frame(dataset) score_bde <- numeric(25) #graph 1 A B C g1 <- empty.graph(names(dataset)) score_bde[1] <- score(g1,data = dataset, type="bde") #-10405.17 #graph 2 A->B C g2 <- set.arc(g1,"A","B") score_bde[2] <- score(g2,data = dataset, type="bde") #-10408.04 #graph 3 C->A->B g3 <- set.arc(g2,"C","A") score_bde[3] <- score(g3,data = dataset, type="bde") #-10411.15 #graph 4 B->A<-C g4 <- empty.graph(names(dataset)) modelstring(g4) <- "[B][C][A|B:C]" score_bde[4] <- score(g4,data = dataset, type="bde") #-10414.47 #graph 5 B->C->A g5 <- empty.graph(names(dataset)) modelstring(g5) <- "[B][C|B][A|C]" score_bde[5] <- score(g5,data = dataset, type="bde") #-10411.13 #graph 6 g6 <- empty.graph(names(dataset)) modelstring(g6) <- "[B][C|B][A|C:B]" score_bde[6] <- score(g6,data = dataset, type="bde") #-10417.33 #graph 7 g7 <- empty.graph(names(dataset)) modelstring(g7) <- "[C][A|C][B|C]" score_bde[7] <- score(g7,data = dataset, type="bde") #-10411.13 #graph 8 g8 <- empty.graph(names(dataset)) modelstring(g8) <- "[C][A|C][B|C:A]" score_bde[8] <- score(g8,data = dataset, type="bde") #-10417.33 #graph 9 g9 <- empty.graph(names(dataset)) modelstring(g9) <- "[C][A|C:B][B|C]" score_bde[9] <- score(g9,data = dataset, type="bde") #-10417.33 #graph 10 g10 <- empty.graph(names(dataset)) modelstring(g10) <- "[C|A][A][B]" score_bde[10] <- score(g10,data = dataset, type="bde") #-10408.27 #graph 11 g11 <- empty.graph(names(dataset)) modelstring(g11) <- "[C|A][A][B|A]" score_bde[11] <- score(g11,data = dataset, type="bde") #-10411.15 #graph 12 g12 <- empty.graph(names(dataset)) modelstring(g12) <- "[A|B][B][C|A]" score_bde[12] <- score(g12,data = dataset, type="bde") #-10411.15 #graph 13 g13 <- empty.graph(names(dataset)) modelstring(g13) <- "[A|B][B][C]" score_bde[13] <- score(g13,data = dataset, type="bde") #-10408.04 #graph 14 g14 <- empty.graph(names(dataset)) modelstring(g14) <- "[A][B][C|A:B]" score_bde[14] <- score(g14,data = dataset, type="bde") #-10414.45 #graph 15 g15 <- empty.graph(names(dataset)) modelstring(g15) <- "[A][B|A][C|A:B]" score_bde[15] <- score(g15,data = dataset, type="bde") #-10417.33 #graph 16 g16 <- empty.graph(names(dataset)) modelstring(g16) <- "[A|B][B][C|A:B]" score_bde[16] <- score(g16,data = dataset, type="bde") #-10417.33 #graph 17 g17 <- empty.graph(names(dataset)) modelstring(g17) <- "[A][B|C][C|A]" score_bde[17] <- score(g17,data = dataset, type="bde") #-10411.13 #graph 18 g18 <- empty.graph(names(dataset)) modelstring(g18) <- "[A][B|C:A][C|A]" score_bde[18] <- score(g18,data = dataset, type="bde") #-10417.33 #graph 19 g19 <- empty.graph(names(dataset)) modelstring(g19) <- "[A][B][C|B]" score_bde[19] <- score(g19,data = dataset, type="bde") #-10408.03 #graph 20 g20 <- empty.graph(names(dataset)) modelstring(g20) <- "[A][B|A][C|B]" score_bde[20] <- score(g20,data = dataset, type="bde") #-10410.9 #graph 21 g21 <- empty.graph(names(dataset)) modelstring(g21) <- "[A|B][B][C|B]" score_bde[21] <- score(g21,data = dataset, type="bde") #-10410.9 #graph 22 g22 <- empty.graph(names(dataset)) modelstring(g22) <- "[A][B|C][C]" score_bde[22] <- score(g22,data = dataset, type="bde") #-10408.03 #graph 23 g23 <- empty.graph(names(dataset)) modelstring(g23) <- "[A][B|C:A][C]" score_bde[23] <- score(g23,data = dataset, type="bde") #-10414.22 #graph 24 g24 <- empty.graph(names(dataset)) modelstring(g24) <- "[A|B][B|C][C]" score_bde[24] <- score(g24,data = dataset, type="bde") #-10410.9 #graph 25 g25 <- empty.graph(names(dataset)) modelstring(g25) <- "[A|C][B][C]" score_bde[25] <- score(g25,data = dataset, type="bde") #-10408.27 m <- ncol(dataset) n <- nrow(dataset) #a function that return the variable position position <- function(x, data) { result <- which(x == names(data)) return(result) } #bde score from python score_bde_python <- c(-10409.216534618594, -10413.72828956955, -10418.467521622148, -10423.856345541608, -10418.448531487986, -10418.44853148799, -10418.44853148799, -10428.34911035841, -10428.34911035841, -10413.95576667119, -10418.467521622146, -10418.467521622146, -10413.72828956955, -10423.837355407451, -10428.349110358407, -10428.349110358407, -10418.448531487988, -10428.349110358407, -10413.709299435392, -10418.221054386348, -10418.221054386348, -10413.709299435392, -10423.609878305811, -10418.221054386351, -10413.955766671188) arc <- data.frame() #output a pdf file pdf("plot.pdf") for (i in 1:25) { s <- paste0("g",i) tmp <- eval(parse(text=s)) #plot plot(tmp,main=paste("BDe score (python): ", score_bde_python[i], "\nBDe score (r): ", score_bde[i])) } dev.off() score_bde - score_bde_python for (i in 1:25) { s <- paste0("g",i) tmp <- eval(parse(text=s)) #plot plot(tmp,main=paste("BDe score: ", score_bde_python[i])) from <- tmp$arcs[,1] to <- tmp$arcs[,2] if (length(from) == 0) next else { from <- sapply(from,position,data=dataset) - 1 to <- sapply(to,position,data=dataset) - 1 l <- rep(i,length(from)) t <- data.frame(from, to, l) arc <- rbind(arc, t) } } names(arc) <- c("from","to","graph") write.csv(arc,"arc.csv",row.names = F)