RMarkovBlanket.r 6.43 KB
#Efrain Gonzalez
#7/25/2017
#Code for Markov Blanket


#The required libraries
library(pryr)
library(MASS)
library(dplyr)
library(tidyr)
library(readr)
library(stringr)


#Have the user choose an original Dot file that they want to use
DotFile <- file.choose() 
TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
	dplyr::filter(!grepl("->|[{}]",X1)) %>%
	dplyr::filter(!grepl("Banjo",X1)) %>%
	dplyr::filter(!grepl("labeljust",X1))
counterP1 <- 1
sizeDotP1 <- dim(TheDotP1)[1]
NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1)
for(counterP1 in 1:sizeDotP1){
	coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>%
		as.character(.,stringsAsFactors = FALSE)
	if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){
		NumberP1 <- strsplit(coldataP1," ") %>%
			.[[1]]%>%
			.[1]
		VarNameP1 <- strsplit(coldataP1," ") %>%
			.[[1]] %>%
			.[2] %>%
			strsplit(.,"\"") %>%
			.[[1]] %>%
			.[grep("^\\w|^\\d",.)]
		NewDotP1[counterP1,1] <- VarNameP1
		NewDotP1[counterP1,2] <- NumberP1
	}
	if(grepl("->",coldataP1) == TRUE){
		break
	}
}


TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
	dplyr::filter(grepl("->",X1))
counterP2 <- 1
sizeDotP2 <- dim(TheDotP2)[1]
NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2)
for(counterP2 in 1:sizeDotP2){
	coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>%
		as.character(.,stringsAsFactors = FALSE)
	ParentNumP2 <- strsplit(coldataP2,"->") %>%
		.[[1]]%>%
		.[1]
	ChildNumP2 <- strsplit(coldataP2,"->") %>%
		.[[1]] %>%
		.[2] %>%
		strsplit(.,";") %>%
		.[[1]] %>%
		.[1]
	NewDotP2[counterP2,1] <- ParentNumP2
	NewDotP2[counterP2,2] <- ChildNumP2
}

colnames(NewDotP2) <- c("Parents","Children")

#Matching numbers to variable names
NewDotP2_2 <- NewDotP2
for(i in 1:sizeDotP1){
	#Where is the variable located within NewDotP2 (column one only)?
	chngreq	<- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1])
	if(is.na(sum(chngreq)) == FALSE){
		if(sum(chngreq) > 0){
			NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1])
		}
	}
	i <- i + 1
}
NewDotP2_2
for(j in 1:sizeDotP1){
	#Where is the variable located within NewDotP2 (column two only)?
	chngreq	<- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2])
	if(is.na(sum(chngreq)) == FALSE){
		if(sum(chngreq) > 0){
			NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2])
		}
	}
	j <- j + 1
}

lrgMarkov <- dim(NewDotP2_2)[1]
MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){
	#Finding the Parents and Children
	d <- 1
	AllNamList1 <- vector("list",length = 3)
	#AllNamList <- vector("list", length = 3)
		##Finding the Parents of the Parents
	##A list of lists
	##outer set by the degree of the Markov blanket
	AllVarList <- vector("list",length = 3)
	for(d in 1:MarkovDegree){
		colnames(NewDotP2_2) <- NULL
		##Which variable are you looking for?
		##This is the VariableEndName
		if(d == 1){
			##Finding the Parents for the variable
			LocPofVar <- grep(VariableStartName,NewDotP2_2[,2])
			PofVar <- NewDotP2_2[LocPofVar,1]
			AllNamList1[[1]] <- PofVar
			
			##Finding the Children for the variable
			LocCofVar <- grep(VariableStartName,NewDotP2_2[,1])
			CofVar <- NewDotP2_2[LocCofVar,2]
			AllNamList1[[2]] <- CofVar
			
			##Finding the Co-Parents of the Children for the variable
			NumofChild <- length(CofVar)
			if(NumofChild > 0){
				##Creating a list of the Co-Parents for each of the children
				##list size is based on the amount of Children
				COPlist <- vector("character", length = 0)
				nc <- 1
				for(nc in 1:NumofChild){
					LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2])
					COPofVar <- NewDotP2_2[LocCOPofVar,1]
					if(grepl(COPofVar,VariableStartName)){
						next
					} else{
						#COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName]
						COPlist <- append(COPlist,COPofVar)
					}
					nc <- nc + 1
				} 
			} else {
				##Making COPlist empty
				COPlist <- vector("character",length = 0)
			}
			AllNamList1[[3]] <- COPlist
			AllVarList[[1]] <- AllNamList1
		} else if(d > 1){
			##inner set by the length of the previous AllVarlist we are working on
			lPreVList <- length(AllVarList[[d-1]])
			ef <- 1
			##PCCP will eventually equal the total size that we expect for the iteration (#d)
			## which is just lPreVList * 3
			PCCP <- 1
			newsize <- (lPreVList * 3)
			if(d > 3){
				AllVarList[[d]] <- vector("list")
			}
			for(ef in 1:lPreVList){
				##Finding the Parents
				NumofVars <- length(AllVarList[[d-1]][[ef]])
				if(NumofVars > 0){
					##Creating a list of the Parents for each of the Variables
					##list size is based on the amount of Previous Variables
					PofVlist <- vector("character", length = 0)
					np <- 1
					for(np in 1:NumofVars){
						LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2])
						PofVar <- NewDotP2_2[LocPofVar,1]
						PofVlist <- append(PofVlist,PofVar)
						np <- np + 1
					}
				} else {
					##Making COPlist empty
					PofVlist <- vector("character",length = 0)
				}
				AllVarList[[d]][[PCCP]] <- PofVlist 
				PCCP <- PCCP + 1
				
				##Finding the Children
				if(NumofVars > 0){
					##Creating a list of the Children for each of the Previous Parents
					##list size is based on the amount of Previous Parents
					CofVlist <- vector("character", length = 0)
					np <- 1
					for(np in 1:NumofVars){
						LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1])
						CofVar <- NewDotP2_2[LocCofVar,2]
						#if(grepl(VariableStartName,CofVar)){
						#	next
						#} else{
							CofVlist <- append(CofVlist,CofVar)
						#}
						np <- np + 1
					}
				} else {
					##Making CofPlist empty
					CofVlist <- vector("character",length = 0)
				}
				AllVarList[[d]][[PCCP]] <- CofVlist
				PCCP <- PCCP + 1
				
				##Finding the Co-Parents
				NumofCVars <- length(CofVlist)
				if(NumofCVars > 0){
					ncp <- 1
					CPofClist <- vector("character",length = 0)
					for(ncp in 1:NumofCVars){
						LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2])
						CPofCVar <- NewDotP2_2[LocCPofCVar,1]
						CPofClist <- append(CPofClist,CPofCVar)
						ncp <- ncp + 1
					}
				
				} else {
					##Making COPlist empty
					CPofClist <- vector("character",length = 0)
				}
				AllVarList[[d]][[PCCP]] <- CPofClist
				PCCP <- PCCP + 1		
				ef <- ef + 1
			}
		}
		##Stop if you have found the VariableEndName value
		if(sum(grepl(VariableEndName,AllVarList)) > 0){
			break
		}
		d <- d + 1
	}
	##The Markov Degree is that found below
	d
}
#Now use the command MBlanky() with the appropriate settings