score.py 2.34 KB
edit raw blame history



1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121


import itertools
import math

class score(object):

	def __init__(self, graph, data):
		"""
		graph is a Graph class as we define
		data should be an Pandas data frame import from a csv formate file
		"""

		self.graph = graph
		self.data = data
		self.n, self.m = data.shape

	
	def state(self):
		"""
		return the unique states for each variable
		return a dict in which the key is i, the ith 
		number of variable in data
		"""

		states = {}

		for i in xrange(self.m):
			states[i] = list(set(self.data.ix[:,i]))

		return states
		

	#create dict for ri
	ri = {}


	#create dict for qi
	qi = {}

	def getNijk(self):
		"""
		the output is a dictionary in which the keys are i,
		and the values are another dict which the keys are j,
		and the values are lists in which (k+1)th is Nijk
		"""
		Nijk = {}

		#get the state for each variable
		S = self.state()
		#get the num of states for each variable
		num_state = {k:len(v) for (k, v) in zip(S.keys(), S.values())}
		self.ri = num_state

		#start to calculate the Nijk
		for i in xrange(self.m):
			Nijk[i] = {}

			#get the parent for vertice i
			p = self.graph.parents[i]

			#get the # of possible configurations of the parents of i
			#q = np.prod([num_state.get(k) for k in p])

			#in the case p is empty
			if len(p) == 0:
				self.qi[i] = 0
				continue

			#list all possible configuarations of the parents of i
			conf = list(itertools.product(*[S.get(k) for k in p]))

			##get the # of possible configurations of the parents of i
			q = len(conf)
			self.qi[i] = q

			for j in xrange(q):
				Nijk[i][j] = {}

				#get the jth state
				conf_j = list(conf[j])

				for k in xrange(num_state[i]):
					Nijk[i][j][k] = 0

					#list all possible 
					for index, row in self.data.iterrows():
						tmp = row.tolist()
						if (list(tmp[x] for x in p) == list(conf[j])) & (tmp[i] == S[i][k]):
							Nijk[i][j][k] += 1


		return Nijk


	def BDe(self, prior = None, ess = None):
		if ess == None:
			ess = float(self.m)


		result = 0.0
		Nijk = self.getNijk()

		for i in xrange(len(Nijk)):
			for j in xrange(len(Nijk[i])):
				for k in xrange(len(Nijk[i][j])):
					nijk = ess/(self.ri[i]*self.qi[i])
					result += math.log(math.gamma(Nijk[i][j][k] + nijk)/math.gamma(nijk))


				nij = ess/self.qi[i]
				Nij = sum(Nijk[i][j].itervalues())
				result += math.log(math.gamma(nij)/math.gamma(Nij+nij))
		
		return result