score_with_tree.py 2.8 KB
import itertools
import math



class score(object):
	def __init__(self, graph, data):
		"""
		graph is a Graph class as we define
		data should be an Pandas data frame import from a csv formate file
		"""

		self.graph = graph
		self.data = data
		self.n, self.m = data.shape
		self.s, self.ri = self.state()
		self.qi, self.pi, self.conf = self.get_parents()
		

	#n, m = self.data.shape

	def state(self):
		"""
		return the unique states s for each variable and
		the ri
		s is a dict in which the key is i, 
		the ith number of variable in the data, containing the unique states of variable i
		ri is a dict in which ri[i] is the num of states in variable i
		"""
		s = {}
		ri = {}

		for i in xrange(self.m):
			s[i] = self.data.ix[:,i].unique().tolist()
			#sort
			#s[i].sort()
			ri[i] = len(s[i])
		return s, ri

	#s, ri = self.state()
	#names = self.data.columns.values.tolist()

	def get_parents(self):
		"""
		qi is a dict in which the key is i, the ith variable, containing the unique state of
		i variable's parents
		pi is a dict in which the key is i, containing the parents of i
		conf is a dict in which the key is i, containing the distinct states of 
		its parents
		"""
		qi = {}
		pi = {}
		conf = {}

		for i in xrange(self.m):

			#get the parents of variable i
			pi[i] = sorted(self.graph.parents[i])

			#get the # of possible configuarations of the parents of variable i

			#in the case p is empty
			if len(pi[i]) == 0:
				qi[i] = 1
				continue

			conf[i] = list(itertools.product(*[self.s.get(k) for k in pi[i]]))
			qi[i] = len(conf[i])

		return qi, pi, conf

	#qi, pi, conf = self.get_parents()


	def get_Nijk(self):
		"""
		the output is a dictionary in which the keys are i,
		and the values are another dict which the keys are j,
		and the values are lists in which (k+1)th is Nijk
		"""
		Nijk = {}

		#group data by all the variables
		names = self.data.columns.values.tolist()
		group = self.data.groupby(names).size()

		for i in xrange(self.m):
			Nijk[i] = {}

			for j in xrange(self.qi[i]):
				Nijk[i][j] = {}
				for k in xrange(self.ri[i]):
					select = [slice(None)]*self.m
					select[i] = self.s[i][k]

					for ai, a in enumerate(self.pi[i]):
						select[a] = self.conf[i][j][ai]

					Nijk[i][j][k] = group.loc[tuple(select)].sum()

		return Nijk







	def BDe(self, ess = None, prior = None):

		if ess == None:
			ess = 1.0
		else: ess = float(ess)

		result = 0.0
		Nijk = self.get_Nijk()

		for i in xrange(len(Nijk)):
			for j in xrange(len(Nijk[i])):
				for k in xrange(len(Nijk[i][j])):

					nijk = ess/(self.ri[i]*self.qi[i])
					result += math.lgamma(Nijk[i][j][k] + nijk) - math.lgamma(nijk)

				nij = ess/self.qi[i]
				Nij = sum(Nijk[i][j].itervalues())

				result += math.lgamma(nij) - math.lgamma(Nij + nij)

		return result