score.py 2.73 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137


import itertools
import math

class score(object):

	def __init__(self, graph, data):
		"""
		graph is a Graph class as we define
		data should be an Pandas data frame import from a csv formate file
		"""

		self.graph = graph
		self.data = data
		self.n, self.m = data.shape

	
	def state(self):
		"""
		return the unique states for each variable
		return a dict in which the key is i, the ith 
		number of variable in data
		"""

		states = {}

		for i in xrange(self.m):
			states[i] = list(set(self.data.ix[:,i]))

		return states
		

	#create dict for ri
	ri = {}


	#create dict for qi
	qi = {}

	def getNijk(self):
		"""
		the output is a dictionary in which the keys are i,
		and the values are another dict which the keys are j,
		and the values are lists in which (k+1)th is Nijk
		"""
		Nijk = {}

		#get the state for each variable
		S = self.state()
		#get the num of states for each variable
		num_state = {k:len(v) for (k, v) in zip(S.keys(), S.values())}
		self.ri = num_state

		#start to calculate the Nijk
		for i in xrange(self.m):
			Nijk[i] = {}

			#get the parent for vertice i
			p = self.graph.parents[i]

			#get the # of possible configurations of the parents of i
			#q = np.prod([num_state.get(k) for k in p])

			#if p is empty, then qi should be 1
			if len(p) == 0:
				self.qi[i] = 1
				j = 0

				Nijk[i][j] = {}


				for k in xrange(num_state[i]):
					Nijk[i][j][k] = 0

					for index, row in self.data.iterrows():
						tmp = row.tolist()
						if tmp[i] == S[i][k]:
							Nijk[i][j][k] += 1

				continue

			#list all possible configuarations of the parents of i
			conf = list(itertools.product(*[S.get(k) for k in p]))

			##get the # of possible configurations of the parents of i
			q = len(conf)
			self.qi[i] = q

			for j in xrange(q):
				Nijk[i][j] = {}

				#get the jth state
				conf_j = list(conf[j])

				for k in xrange(num_state[i]):
					Nijk[i][j][k] = 0

					#list all possible 
					for index, row in self.data.iterrows():
						tmp = row.tolist()
						if (list(tmp[x] for x in p) == list(conf[j])) & (tmp[i] == S[i][k]):
							Nijk[i][j][k] += 1


		return Nijk


	def BDe(self, prior = None, ess = None):
		if ess == None:
			#ess = float(self.m)
			ess = 1.0
		else: ess = float(ess)


		result = 0.0
		Nijk = self.getNijk()

		for i in xrange(len(Nijk)):
			for j in xrange(len(Nijk[i])):
				for k in xrange(len(Nijk[i][j])):
					nijk = ess/(self.ri[i]*self.qi[i])
					#result += math.log(math.gamma(Nijk[i][j][k] + nijk)/math.gamma(nijk))
					result += math.lgamma(Nijk[i][j][k] + nijk) - math.lgamma(nijk)

				nij = ess/self.qi[i]
				Nij = sum(Nijk[i][j].itervalues())
				#result += math.log(math.gamma(nij)/math.gamma(Nij+nij))
				result += math.lgamma(nij) - math.lgamma(Nij + nij)
		
		return result