score.py
2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import itertools
import math
class score(object):
def __init__(self, graph, data):
"""
graph is a Graph class as we define
data should be an Pandas data frame import from a csv formate file
"""
self.graph = graph
self.data = data
self.n, self.m = data.shape
def state(self):
"""
return the unique states for each variable
return a dict in which the key is i, the ith
number of variable in data
"""
states = {}
for i in xrange(self.m):
states[i] = list(set(self.data.ix[:,i]))
return states
#create dict for ri
ri = {}
#create dict for qi
qi = {}
def getNijk(self):
"""
the output is a dictionary in which the keys are i,
and the values are another dict which the keys are j,
and the values are lists in which (k+1)th is Nijk
"""
Nijk = {}
#get the state for each variable
S = self.state()
#get the num of states for each variable
num_state = {k:len(v) for (k, v) in zip(S.keys(), S.values())}
self.ri = num_state
#start to calculate the Nijk
for i in xrange(self.m):
Nijk[i] = {}
#get the parent for vertice i
p = self.graph.parents[i]
#get the # of possible configurations of the parents of i
#q = np.prod([num_state.get(k) for k in p])
#if p is empty, then qi should be 1
if len(p) == 0:
self.qi[i] = 1
j = 0
Nijk[i][j] = {}
for k in xrange(num_state[i]):
Nijk[i][j][k] = 0
for index, row in self.data.iterrows():
tmp = row.tolist()
if tmp[i] == S[i][k]:
Nijk[i][j][k] += 1
continue
#list all possible configuarations of the parents of i
conf = list(itertools.product(*[S.get(k) for k in p]))
##get the # of possible configurations of the parents of i
q = len(conf)
self.qi[i] = q
for j in xrange(q):
Nijk[i][j] = {}
#get the jth state
conf_j = list(conf[j])
for k in xrange(num_state[i]):
Nijk[i][j][k] = 0
#list all possible
for index, row in self.data.iterrows():
tmp = row.tolist()
if (list(tmp[x] for x in p) == list(conf[j])) & (tmp[i] == S[i][k]):
Nijk[i][j][k] += 1
return Nijk
def BDe(self, prior = None, ess = None):
if ess == None:
#ess = float(self.m)
ess = 1.0
else: ess = float(ess)
result = 0.0
Nijk = self.getNijk()
for i in xrange(len(Nijk)):
for j in xrange(len(Nijk[i])):
for k in xrange(len(Nijk[i][j])):
nijk = ess/(self.ri[i]*self.qi[i])
#result += math.log(math.gamma(Nijk[i][j][k] + nijk)/math.gamma(nijk))
result += math.lgamma(Nijk[i][j][k] + nijk) - math.lgamma(nijk)
nij = ess/self.qi[i]
Nij = sum(Nijk[i][j].itervalues())
#result += math.log(math.gamma(nij)/math.gamma(Nij+nij))
result += math.lgamma(nij) - math.lgamma(Nij + nij)
return result