score.py 2.34 KB
import itertools
import math

class score(object):

	def __init__(self, graph, data):
		"""
		graph is a Graph class as we define
		data should be an Pandas data frame import from a csv formate file
		"""

		self.graph = graph
		self.data = data
		self.n, self.m = data.shape

	
	

	def state(self):
		"""
		return the unique states for each variable
		return a dict in which the key is i, the ith 
		number of variable in data
		"""

		states = {}

		for i in xrange(self.m):
			states[i] = list(set(self.data.ix[:,i]))

		return states
		





	#create dict for ri
	ri = {}


	#create dict for qi
	qi = {}

	def getNijk(self):
		"""
		the output is a dictionary in which the keys are i,
		and the values are another dict which the keys are j,
		and the values are lists in which (k+1)th is Nijk
		"""
		Nijk = {}

		#get the state for each variable
		S = self.state()
		#get the num of states for each variable
		num_state = {k:len(v) for (k, v) in zip(S.keys(), S.values())}
		self.ri = num_state

		#start to calculate the Nijk
		for i in xrange(self.m):
			Nijk[i] = {}

			#get the parent for vertice i
			p = self.graph.parents[i]

			#get the # of possible configurations of the parents of i
			#q = np.prod([num_state.get(k) for k in p])

			#in the case p is empty
			if len(p) == 0:
				self.qi[i] = 0
				continue

			#list all possible configuarations of the parents of i
			conf = list(itertools.product(*[S.get(k) for k in p]))

			##get the # of possible configurations of the parents of i
			q = len(conf)
			self.qi[i] = q

			for j in xrange(q):
				Nijk[i][j] = {}

				#get the jth state
				conf_j = list(conf[j])

				for k in xrange(num_state[i]):
					Nijk[i][j][k] = 0

					#list all possible 
					for index, row in self.data.iterrows():
						tmp = row.tolist()
						if (list(tmp[x] for x in p) == list(conf[j])) & (tmp[i] == S[i][k]):
							Nijk[i][j][k] += 1


		return Nijk


	def BDe(self, prior = None, ess = None):
		if ess == None:
			ess = float(self.m)


		result = 0.0
		Nijk = self.getNijk()

		for i in xrange(len(Nijk)):
			for j in xrange(len(Nijk[i])):
				for k in xrange(len(Nijk[i][j])):
					nijk = ess/(self.ri[i]*self.qi[i])
					result += math.log(math.gamma(Nijk[i][j][k] + nijk)/math.gamma(nijk))


				nij = ess/self.qi[i]
				Nij = sum(Nijk[i][j].itervalues())
				result += math.log(math.gamma(nij)/math.gamma(Nij+nij))
		
		return result