RMarkovBlanket.r 12.2 KB
#Efrain Gonzalez
#8/25/2017
#Code for Markov Blanket


#The required libraries
library(pryr)
library(MASS)
library(dplyr)
library(tidyr)
library(readr)
library(stringr)


#Have the user choose an original Dot file that they want to use
DotFile <- file.choose() 
TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
	dplyr::filter(!grepl("->|[{}]",X1)) %>%
	dplyr::filter(!grepl("Banjo",X1)) %>%
	dplyr::filter(!grepl("labeljust",X1))
counterP1 <- 1
sizeDotP1 <- dim(TheDotP1)[1]
NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1)
for(counterP1 in 1:sizeDotP1){
	coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>%
		as.character(.,stringsAsFactors = FALSE)
	if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){
		NumberP1 <- strsplit(coldataP1," ") %>%
			.[[1]]%>%
			.[1]
		VarNameP1 <- strsplit(coldataP1," ") %>%
			.[[1]] %>%
			.[2] %>%
			strsplit(.,"\"") %>%
			.[[1]] %>%
			.[grep("^\\w|^\\d",.)]
		NewDotP1[counterP1,1] <- VarNameP1
		NewDotP1[counterP1,2] <- NumberP1
	}
	if(grepl("->",coldataP1) == TRUE){
		break
	}
}


TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
	dplyr::filter(grepl("->",X1))
counterP2 <- 1
sizeDotP2 <- dim(TheDotP2)[1]
NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2)
for(counterP2 in 1:sizeDotP2){
	coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>%
		as.character(.,stringsAsFactors = FALSE)
	ParentNumP2 <- strsplit(coldataP2,"->") %>%
		.[[1]]%>%
		.[1]
	ChildNumP2 <- strsplit(coldataP2,"->") %>%
		.[[1]] %>%
		.[2] %>%
		strsplit(.,";") %>%
		.[[1]] %>%
		.[1]
	NewDotP2[counterP2,1] <- ParentNumP2
	NewDotP2[counterP2,2] <- ChildNumP2
}

colnames(NewDotP2) <- c("Parents","Children")

#Matching numbers to variable names
NewDotP2_2 <- NewDotP2
for(i in 1:sizeDotP1){
	#Where is the variable located within NewDotP2 (column one only)?
	chngreq	<- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1])
	if(is.na(sum(chngreq)) == FALSE){
		if(sum(chngreq) > 0){
			NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1])
		}
	}
	#i <- i + 1
}
NewDotP2_2
for(j in 1:sizeDotP1){
	#Where is the variable located within NewDotP2 (column two only)?
	chngreq	<- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2])
	if(is.na(sum(chngreq)) == FALSE){
		if(sum(chngreq) > 0){
			NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2])
		}
	}
	#j <- j + 1
}

lrgMarkov <- dim(NewDotP2_2)[1]
Blanky <- function(MarkovDegree = 20, VariableStartName = "Alzheimer", VariableEndName = "GRIN2A"){
	#Finding the Parents and Children
	d <- 1
	AllNamList1 <- vector("list",length = 3)
	#AllNamList <- vector("list", length = 3)
		##Finding the Parents of the Parents
	##A list of lists
	##outer set by the degree of the Markov blanket
	AllVarList <- vector("list",length = 3)
	varfound <- 0
	for(d in 1:MarkovDegree){
		colnames(NewDotP2_2) <- NULL
		##Which variable are you looking for?
		##This is the VariableEndName
		if(d == 1){
			##Finding the Parents for the variable
			LocPofVar <- grep(paste0("^",VariableStartName,"$"),NewDotP2_2[,2])
			PofVar <- NewDotP2_2[LocPofVar,1]
			AllNamList1[[1]] <- PofVar
			AllNamList1[[1]] <- AllNamList1[[1]][!duplicated(AllNamList1[[1]])]
			varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[1]]))
			
			##Finding the Children for the variable
			LocCofVar <- grep(paste0("^",VariableStartName,"$"),NewDotP2_2[,1])
			CofVar <- NewDotP2_2[LocCofVar,2]
			AllNamList1[[2]] <- CofVar
			AllNamList1[[2]] <- AllNamList1[[2]][!duplicated(AllNamList1[[2]])]
			varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[2]]))
			
			##Finding the Co-Parents of the Children for the variable
			NumofChild <- length(CofVar)
			if(NumofChild > 0){
				##Creating a list of the Co-Parents for each of the children
				##list size is based on the amount of Children
				COPlist <- vector("character", length = 0)
				nc <- 1
				for(nc in 1:NumofChild){
					LocCOPofVar <- grep(paste0("^",CofVar[nc],"$"),NewDotP2_2[,2])
					COPofVar <- NewDotP2_2[LocCOPofVar,1]	
					if(sum(grepl(VariableStartName,COPofVar)) >= 1){
						#positions of variable start name within the vector of co parents
						posoforig <- grep(paste0("^",VariableStartName,"$"),COPofVar)
						COPofVar <- COPofVar[-posoforig]
						COPlist <- append(COPlist,COPofVar)
					} else{
						#COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName]
						COPlist <- append(COPlist,COPofVar)
					}
					#nc <- nc + 1
				} 
			} else {
				##Making COPlist empty
				COPlist <- vector("character",length = 0)
			}
			AllNamList1[[3]] <- COPlist
			AllNamList1[[3]] <- AllNamList1[[3]][!duplicated(AllNamList1[[3]])]
			varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[3]]))
			
			AllVarList[[1]] <- AllNamList1

		} else if(d > 1){
			##inner set by the length of the previous AllVarlist we are working on
			lPreVList <- length(AllVarList[[d-1]])
			ef <- 1
			##PCCP will eventually equal the total size that we expect for the iteration (#d)
			## which is just lPreVList * 3
			PCCP <- 1
			newsize <- (lPreVList * 3)
			if(d > 3){
				AllVarList[[d]] <- vector("list")
			}
			for(ef in 1:lPreVList){
				##Finding the Parents
				NumofVars <- length(AllVarList[[d-1]][[ef]])
				if(NumofVars > 0){
					##Creating a list of the Parents for each of the Variables
					##list size is based on the amount of Previous Variables
					PofVlist <- vector("character", length = 0)
					np <- 1
					for(np in 1:NumofVars){
						LocPofVar <- grep(paste0("^",AllVarList[[d-1]][[ef]][np],"$"),NewDotP2_2[,2])
						PofVar <- NewDotP2_2[LocPofVar,1]
						PofVlist <- append(PofVlist,PofVar)
						#np <- np + 1
					}
				} else {
					##Making COPlist empty
					PofVlist <- vector("character",length = 0)
				}
				AllVarList[[d]][[PCCP]] <- PofVlist 
				AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])]
				##Have you found the VariableEndName?
				varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]]))
				PCCP <- PCCP + 1
				
				##Finding the Children
				if(NumofVars > 0){
					##Creating a list of the Children for each of the Previous Parents
					##list size is based on the amount of Previous Parents
					CofVlist <- vector("character", length = 0)
					np <- 1
					for(np in 1:NumofVars){
						LocCofVar <- grep(paste0("^",AllVarList[[d-1]][[ef]][np],"$"),NewDotP2_2[,1])
						CofVar <- NewDotP2_2[LocCofVar,2]
						#if(sum(grepl(VariableStartName,CofVar)) >= 1){
						#	#positions of variable start name within the vector of co parents
						#	posoforig <- grep(VariableStartName,COPofVar)
						#	COPofVar <- COPofVar[-posoforig]
						#	COPlist <- append(COPlist,COPofVar)
						#} else{
							CofVlist <- append(CofVlist,CofVar)
						#}
						#np <- np + 1
					}
				} else {
					##Making CofPlist empty
					CofVlist <- vector("character",length = 0)
				}
				AllVarList[[d]][[PCCP]] <- CofVlist
				AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])]
				##Have you found the VariableEndName yet?
				varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]]))
				PCCP <- PCCP + 1
				
				##Finding the Co-Parents
				NumofCVars <- length(CofVlist)
				if(NumofCVars > 0){
					ncp <- 1
					CPofClist <- vector("character",length = 0)
					for(ncp in 1:NumofCVars){
						LocCPofCVar <- grep(paste0("^",CofVlist[ncp],"$"),NewDotP2_2[,2])
						CPofCVar <- NewDotP2_2[LocCPofCVar,1]
						#if(sum(grepl(,CPofCVar)) >= 1){
						#	#positions of variable start name within the vector of co parents
						#	posoforig <- grep(VariableStartName,COPofVar)
						#	COPofVar <- COPofVar[-posoforig]
						#	COPlist <- append(COPlist,COPofVar)
						#} else{
							CPofClist <- append(CPofClist,CPofCVar)
						#}
						#ncp <- ncp + 1
					}
				
				} else {
					##Making COPlist empty
					CPofClist <- vector("character",length = 0)
				}
				AllVarList[[d]][[PCCP]] <- CPofClist
				AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])]
				##Have you found VariableEndName now?
				varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]]))
				PCCP <- PCCP + 1		
				#ef <- ef + 1
			}
		}

		##Stop if you have found the VariableEndName value
		if(varfound > 0){
			break
		}
		#d <- d + 1
	}
	##The Markov Degree is that found below
	d
}



#############Methods for finding paths#############
###Beyond Here Everything is still experimental###
#Find the path to the variable in question
RshipList <- vector("list",length = d)
l = d
for(l in d:1){
	if(l == d){
		RshipList[[l]] <- VariableEndName
		for(ship in 1:length(AllVarList[[d]])){
			if(sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[ship]])) > 0){
				break
			}
		}
			
	} else {
		modship <- ship %% 3
		intship <- as.integer(ship/3)
		if(modship > 0){
			intship <- intship + 1
		}
		RshipList[[l]] <- AllVarList[[l]][[intship]]
		ship <- intship
	}
	#l <- l - 1 	
}
#Full actual path
k = d
prevar <- VariableEndName
Pathlisting <- vector("list",length = d)
Pathlisting[[d]] <- prevar
for(k in d:2){
	if(k == d){
		for(ship in 1:length(AllVarList[[d]])){
			if(sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[ship]])) > 0){
				break
			}
		}
		Pathlisting[[d]] <- append(Pathlisting[[d]],ship)
			
	}
	modship <- ship %% 3
	intship <- as.integer(ship/3)
	if(modship > 0){
		intship <- intship + 1
	}
	if(modship == 0){
		##When modship == 0 then we are refering to a CoParent
		##Gives how many children the full set of CoParents has
		lenCoP <- length(AllVarList[[k]][[(ship - 1)]])
		##variables that are actually children of the coparent
		actvar <- vector("character",length = 0)
		##Parents of actvar
		Pactvar <- vector("character",length =0)
		##variables that could have led to the previous set of variables
		Wanvar <- vector("character",length = 0)
		m <- 1
		for(m in 1:lenCoP){
			LCPoVar <- grep(paste0("^",AllVarList[[k]][[(ship - 1)]][m],"$"),NewDotP2_2[,2])
			CPoVar <- NewDotP2_2[LCPoVar,1]
			lenprevar <- length(prevar)
			y <- 1
			for(y in 1:lenprevar){
		
				if(sum(grepl(prevar[y],CPoVar)) >= 1){
					actvar <- append(actvar,AllVarList[[k]][[(ship - 1)]][m])
					Pactvar <- append(Pactvar,CPoVar)
				}
				#y <- y + 1
			}
			#m <- m + 1

		}
		Pactvar <- Pactvar[!duplicated(Pactvar)]
		##Searching to see if any of the parents are in the previous degree
		##The right children will have both the current variable and a previous degrees variable as parents 
		o <- 1
		for(o in 1:length(Pactvar)){
			if(sum(grepl(Pactvar[o],AllVarList[[(k - 1)]][[intship]])) >= 1){
				Wanvar <- append(Wanvar,Pactvar[o])
			}
			#o <- o + 1
		}
		Wanvar <- Wanvar[!duplicated(Wanvar)]
		prevar <- Wanvar
			
	} else if(modship == 1){
		#When modship == 1 then we are referring to a Parent
		lengprevar <- length(prevar)
		t <- 1
		##All children of these parents
		TCoVar <- vector("character",length = 0)
		##variables that could have led to these parents
		leadVar <- vector("character",length = 0)
		for(t in 1: lengprevar){
			LCofVar <- grep(paste0("^",prevar[t],"$"),NewDotP2_2[,1])
			CoVar <- NewDotP2_2[LCofVar,2]
			TCoVar <- append(TCoVar,CoVar)
		}
		TCoVar <- TCoVar[!duplicated(TCoVar)] 
		s <- 1
		##Searching for the children that are also in the previous degree
		for(s in 1:length(TCoVar)){
			if(sum(grepl(TCoVar[s],AllVarList[[(k - 1)]][[intship]])) >= 1){
				leadVar <- append(leadVar,TCoVar[s])
			}
		}
		leadVar <- leadVar[!duplicated(leadVar)]
		prevar <- leadVar	
			
	} else if(modship == 2){
		#When modship == 2 then we are referring to a Child
		lenprevar <- length(prevar)
		x <- 1
		##All parents of these children
		TPoVar <- vector("character",length = 0)
		##variables that could have led to these children based on the 
		drctVar <- vector("character",length = 0)
		for(x in 1:lenprevar){
			LPoVar <- grep(paste0("^",prevar[x],"$"),NewDotP2_2[,2])
			PoVar <- NewDotP2_2[LPoVar,1]
			TPoVar <- append(TPoVar,PoVar)
		}
		TPoVar <- TPoVar[!duplicated(TPoVar)]
		y <- 1
		##Searching for the parents that are also in the previous degree
		for(y in 1:length(TPoVar)){
			if(sum(grepl(TPoVar[y],AllVarList[[(k - 1)]][[intship]])) >= 1){
				drctVar <- append(drctVar,TPoVar[y])
			}
		}
		drctVar <- drctVar[!duplicated(drctVar)]
		prevar <- drctVar
		
	
	}
	
	ship <- intship
	Pathlisting[[(k - 1)]] <- c(prevar, ship)
	
}