score.py
2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import itertools
import math
class score(object):
def __init__(self, graph, data):
"""
graph is a Graph class as we define
data should be an Pandas data frame import from a csv formate file
"""
self.graph = graph
self.data = data
self.n, self.m = data.shape
def state(self):
"""
return the unique states for each variable
return a dict in which the key is i, the ith
number of variable in data
"""
states = {}
for i in xrange(self.m):
states[i] = list(set(self.data.ix[:,i]))
return states
#create dict for ri
ri = {}
#create dict for qi
qi = {}
def getNijk(self):
"""
the output is a dictionary in which the keys are i,
and the values are another dict which the keys are j,
and the values are lists in which (k+1)th is Nijk
"""
Nijk = {}
#get the state for each variable
S = self.state()
#get the num of states for each variable
num_state = {k:len(v) for (k, v) in zip(S.keys(), S.values())}
self.ri = num_state
#start to calculate the Nijk
for i in xrange(self.m):
Nijk[i] = {}
#get the parent for vertice i
p = self.graph.parents[i]
#get the # of possible configurations of the parents of i
#q = np.prod([num_state.get(k) for k in p])
#in the case p is empty
if len(p) == 0:
self.qi[i] = 0
continue
#list all possible configuarations of the parents of i
conf = list(itertools.product(*[S.get(k) for k in p]))
##get the # of possible configurations of the parents of i
q = len(conf)
self.qi[i] = q
for j in xrange(q):
Nijk[i][j] = {}
#get the jth state
conf_j = list(conf[j])
for k in xrange(num_state[i]):
Nijk[i][j][k] = 0
#list all possible
for index, row in self.data.iterrows():
tmp = row.tolist()
if (list(tmp[x] for x in p) == list(conf[j])) & (tmp[i] == S[i][k]):
Nijk[i][j][k] += 1
return Nijk
def BDe(self, prior = None, ess = None):
if ess == None:
ess = float(self.m)
result = 0.0
Nijk = self.getNijk()
for i in xrange(len(Nijk)):
for j in xrange(len(Nijk[i])):
for k in xrange(len(Nijk[i][j])):
nijk = ess/(self.ri[i]*self.qi[i])
result += math.log(math.gamma(Nijk[i][j][k] + nijk)/math.gamma(nijk))
nij = ess/self.qi[i]
Nij = sum(Nijk[i][j].itervalues())
result += math.log(math.gamma(nij)/math.gamma(Nij+nij))
return result