bnlearn.R 7.24 KB
setwd("/home/qja0428/Dropbox/research/Yoo")
#setwd("/Users/jinganqu/Dropbox/research/Yoo/code")

library(bnlearn)

data(learning.test)

dataset = learning.test

a = hc(dataset, score='bde')
plot(a)

from <- a$arcs[,1]
to <- a$arcs[,2]
m <- ncol(dataset)
n <- nrow(dataset)
#a function that return the variable position
position <- function(x, data) {
  result <- which(x == names(data))
  return(result)
}

from <- sapply(from,position,data=dataset)
to <- sapply(to,position,data=dataset)

score(a,data=dataset,type="bde")









#############simple test sample############
test <- learning.test[1:5,1:3]
test[2,3] <- "b"
test <- data.frame(apply(test,2,as.character))

dag <- hc(test, score = "bic")

score(dag,data=test,type="bde")
#-10.37759
#bde score by hand
score <- log(gamma(.5)/gamma(3.5)) + log(gamma(3.25)/gamma(.25)) + log(gamma(.25)/gamma(.25)) +
  log(gamma(.5)/gamma(2.5)) + log(gamma(1.25)/gamma(.25)) + log(gamma(1.25)/gamma(.25)) + 
  log(gamma(.5)/gamma(3.5)) + log(gamma(2.25)/gamma(.25)) + log(gamma(1.25)/gamma(.25)) +
  log(gamma(.5)/gamma(2.5)) + log(gamma(.25)/gamma(.25)) + log(gamma(2.25)/gamma(.25))

write.csv(test,"./code/test.csv",row.names = F)


################################################
######check all the graph of 3 variables########
################################################

######generate data
set.seed(123)
A <- sample(1:2,5000,replace = T)
set.seed(428)
B <- sample(1:2,5000,replace = T)
set.seed(403)
C <- sample(1:2,5000,replace = T)

dataset <- data.frame(A,B,C)
#save the dataset
write.csv(dataset,"test_three_variables.csv",row.names = F)

dataset <- read.csv("test_three_variables.csv")
#make the variable as factor
dataset <- apply(dataset,2,as.factor)
dataset <- as.data.frame(dataset)

score_bde <- numeric(25)

#graph 1  A B C
g1 <- empty.graph(names(dataset))
score_bde[1] <- score(g1,data = dataset, type="bde") #-10405.17

#graph 2 A->B C
g2 <- set.arc(g1,"A","B")
score_bde[2] <- score(g2,data = dataset, type="bde") #-10408.04

#graph 3  C->A->B
g3 <- set.arc(g2,"C","A")
score_bde[3] <- score(g3,data = dataset, type="bde") #-10411.15


#graph 4 B->A<-C
g4 <- empty.graph(names(dataset))
modelstring(g4) <- "[B][C][A|B:C]"
score_bde[4] <- score(g4,data = dataset, type="bde") #-10414.47

#graph 5 B->C->A
g5 <- empty.graph(names(dataset))
modelstring(g5) <- "[B][C|B][A|C]"
score_bde[5] <- score(g5,data = dataset, type="bde") #-10411.13

#graph 6 
g6 <- empty.graph(names(dataset))
modelstring(g6) <- "[B][C|B][A|C:B]"
score_bde[6] <- score(g6,data = dataset, type="bde") #-10417.33

#graph 7
g7 <- empty.graph(names(dataset))
modelstring(g7) <- "[C][A|C][B|C]"
score_bde[7] <- score(g7,data = dataset, type="bde") #-10411.13

#graph 8
g8 <- empty.graph(names(dataset))
modelstring(g8) <- "[C][A|C][B|C:A]"
score_bde[8] <- score(g8,data = dataset, type="bde") #-10417.33

#graph 9
g9 <- empty.graph(names(dataset))
modelstring(g9) <- "[C][A|C:B][B|C]"
score_bde[9] <- score(g9,data = dataset, type="bde") #-10417.33

#graph 10
g10 <- empty.graph(names(dataset))
modelstring(g10) <- "[C|A][A][B]"
score_bde[10] <- score(g10,data = dataset, type="bde") #-10408.27

#graph 11
g11 <- empty.graph(names(dataset))
modelstring(g11) <- "[C|A][A][B|A]"
score_bde[11] <- score(g11,data = dataset, type="bde") #-10411.15

#graph 12
g12 <- empty.graph(names(dataset))
modelstring(g12) <- "[A|B][B][C|A]"
score_bde[12] <- score(g12,data = dataset, type="bde") #-10411.15

#graph 13
g13 <- empty.graph(names(dataset))
modelstring(g13) <- "[A|B][B][C]"
score_bde[13] <- score(g13,data = dataset, type="bde") #-10408.04


#graph 14
g14 <- empty.graph(names(dataset))
modelstring(g14) <- "[A][B][C|A:B]"
score_bde[14] <- score(g14,data = dataset, type="bde") #-10414.45


#graph 15
g15 <- empty.graph(names(dataset))
modelstring(g15) <- "[A][B|A][C|A:B]"
score_bde[15] <- score(g15,data = dataset, type="bde") #-10417.33


#graph 16
g16 <- empty.graph(names(dataset))
modelstring(g16) <- "[A|B][B][C|A:B]"
score_bde[16] <- score(g16,data = dataset, type="bde") #-10417.33


#graph 17
g17 <- empty.graph(names(dataset))
modelstring(g17) <- "[A][B|C][C|A]"
score_bde[17] <- score(g17,data = dataset, type="bde") #-10411.13


#graph 18
g18 <- empty.graph(names(dataset))
modelstring(g18) <- "[A][B|C:A][C|A]"
score_bde[18] <- score(g18,data = dataset, type="bde") #-10417.33


#graph 19
g19 <- empty.graph(names(dataset))
modelstring(g19) <- "[A][B][C|B]"
score_bde[19] <- score(g19,data = dataset, type="bde") #-10408.03


#graph 20
g20 <- empty.graph(names(dataset))
modelstring(g20) <- "[A][B|A][C|B]"
score_bde[20] <- score(g20,data = dataset, type="bde") #-10410.9


#graph 21
g21 <- empty.graph(names(dataset))
modelstring(g21) <- "[A|B][B][C|B]"
score_bde[21] <- score(g21,data = dataset, type="bde") #-10410.9

#graph 22
g22 <- empty.graph(names(dataset))
modelstring(g22) <- "[A][B|C][C]"
score_bde[22] <- score(g22,data = dataset, type="bde") #-10408.03


#graph 23
g23 <- empty.graph(names(dataset))
modelstring(g23) <- "[A][B|C:A][C]"
score_bde[23] <- score(g23,data = dataset, type="bde") #-10414.22


#graph 24
g24 <- empty.graph(names(dataset))
modelstring(g24) <- "[A|B][B|C][C]"
score_bde[24] <- score(g24,data = dataset, type="bde") #-10410.9


#graph 25
g25 <- empty.graph(names(dataset))
modelstring(g25) <- "[A|C][B][C]"
score_bde[25] <- score(g25,data = dataset, type="bde") #-10408.27

m <- ncol(dataset)
n <- nrow(dataset)
#a function that return the variable position
position <- function(x, data) {
  result <- which(x == names(data))
  return(result)
}
#bde score from python
score_bde_python <- c(-10409.216534618594,
                      -10413.72828956955,
                      -10418.467521622148,
                      -10423.856345541608,
                      -10418.448531487986,
                      -10418.44853148799,
                      -10418.44853148799,
                      -10428.34911035841,
                      -10428.34911035841,
                      -10413.95576667119,
                      -10418.467521622146,
                      -10418.467521622146,
                      -10413.72828956955,
                      -10423.837355407451,
                      -10428.349110358407,
                      -10428.349110358407,
                      -10418.448531487988,
                      -10428.349110358407,
                      -10413.709299435392,
                      -10418.221054386348,
                      -10418.221054386348,
                      -10413.709299435392,
                      -10423.609878305811,
                      -10418.221054386351,
                      -10413.955766671188)

arc <- data.frame()

#output a pdf file
pdf("plot.pdf")
for (i in 1:25) {
  s <- paste0("g",i)
  tmp <- eval(parse(text=s))
  #plot
  plot(tmp,main=paste("BDe score (python): ", score_bde_python[i], 
       "\nBDe score (r): ", score_bde[i]))
}
dev.off()


score_bde - score_bde_python


for (i in 1:25) {
  s <- paste0("g",i)
  tmp <- eval(parse(text=s))
  #plot
  plot(tmp,main=paste("BDe score: ", score_bde_python[i]))
  from <- tmp$arcs[,1]
  to <- tmp$arcs[,2]
  if (length(from) == 0) next
  else {
    from <- sapply(from,position,data=dataset) - 1
    to <- sapply(to,position,data=dataset) - 1
    l <- rep(i,length(from))
    t <- data.frame(from, to, l)
    arc <- rbind(arc, t)
  }
}
names(arc) <- c("from","to","graph")
write.csv(arc,"arc.csv",row.names = F)