#Efrain Gonzalez #8/25/2017 #Code for Markov Blanket #The required libraries library(pryr) library(MASS) library(dplyr) library(tidyr) library(readr) library(stringr) #Have the user choose an original Dot file that they want to use DotFile <- file.choose() TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% dplyr::filter(!grepl("->|[{}]",X1)) %>% dplyr::filter(!grepl("Banjo",X1)) %>% dplyr::filter(!grepl("labeljust",X1)) counterP1 <- 1 sizeDotP1 <- dim(TheDotP1)[1] NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) for(counterP1 in 1:sizeDotP1){ coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% as.character(.,stringsAsFactors = FALSE) if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ NumberP1 <- strsplit(coldataP1," ") %>% .[[1]]%>% .[1] VarNameP1 <- strsplit(coldataP1," ") %>% .[[1]] %>% .[2] %>% strsplit(.,"\"") %>% .[[1]] %>% .[grep("^\\w|^\\d",.)] NewDotP1[counterP1,1] <- VarNameP1 NewDotP1[counterP1,2] <- NumberP1 } if(grepl("->",coldataP1) == TRUE){ break } } TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% dplyr::filter(grepl("->",X1)) counterP2 <- 1 sizeDotP2 <- dim(TheDotP2)[1] NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) for(counterP2 in 1:sizeDotP2){ coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% as.character(.,stringsAsFactors = FALSE) ParentNumP2 <- strsplit(coldataP2,"->") %>% .[[1]]%>% .[1] ChildNumP2 <- strsplit(coldataP2,"->") %>% .[[1]] %>% .[2] %>% strsplit(.,";") %>% .[[1]] %>% .[1] NewDotP2[counterP2,1] <- ParentNumP2 NewDotP2[counterP2,2] <- ChildNumP2 } colnames(NewDotP2) <- c("Parents","Children") #Matching numbers to variable names NewDotP2_2 <- NewDotP2 for(i in 1:sizeDotP1){ #Where is the variable located within NewDotP2 (column one only)? chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) if(is.na(sum(chngreq)) == FALSE){ if(sum(chngreq) > 0){ NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) } } #i <- i + 1 } NewDotP2_2 for(j in 1:sizeDotP1){ #Where is the variable located within NewDotP2 (column two only)? chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) if(is.na(sum(chngreq)) == FALSE){ if(sum(chngreq) > 0){ NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) } } #j <- j + 1 } lrgMarkov <- dim(NewDotP2_2)[1] Blanky <- function(MarkovDegree = 20, VariableStartName = "Alzheimer", VariableEndName = "GRIN2A"){ #Finding the Parents and Children d <- 1 AllNamList1 <- vector("list",length = 3) #AllNamList <- vector("list", length = 3) ##Finding the Parents of the Parents ##A list of lists ##outer set by the degree of the Markov blanket AllVarList <- vector("list",length = 3) varfound <- 0 for(d in 1:MarkovDegree){ colnames(NewDotP2_2) <- NULL ##Which variable are you looking for? ##This is the VariableEndName if(d == 1){ ##Finding the Parents for the variable LocPofVar <- grep(paste0("^",VariableStartName,"$"),NewDotP2_2[,2]) PofVar <- NewDotP2_2[LocPofVar,1] AllNamList1[[1]] <- PofVar AllNamList1[[1]] <- AllNamList1[[1]][!duplicated(AllNamList1[[1]])] varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[1]])) ##Finding the Children for the variable LocCofVar <- grep(paste0("^",VariableStartName,"$"),NewDotP2_2[,1]) CofVar <- NewDotP2_2[LocCofVar,2] AllNamList1[[2]] <- CofVar AllNamList1[[2]] <- AllNamList1[[2]][!duplicated(AllNamList1[[2]])] varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[2]])) ##Finding the Co-Parents of the Children for the variable NumofChild <- length(CofVar) if(NumofChild > 0){ ##Creating a list of the Co-Parents for each of the children ##list size is based on the amount of Children COPlist <- vector("character", length = 0) nc <- 1 for(nc in 1:NumofChild){ LocCOPofVar <- grep(paste0("^",CofVar[nc],"$"),NewDotP2_2[,2]) COPofVar <- NewDotP2_2[LocCOPofVar,1] if(sum(grepl(VariableStartName,COPofVar)) >= 1){ #positions of variable start name within the vector of co parents posoforig <- grep(paste0("^",VariableStartName,"$"),COPofVar) COPofVar <- COPofVar[-posoforig] COPlist <- append(COPlist,COPofVar) } else{ #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] COPlist <- append(COPlist,COPofVar) } #nc <- nc + 1 } } else { ##Making COPlist empty COPlist <- vector("character",length = 0) } AllNamList1[[3]] <- COPlist AllNamList1[[3]] <- AllNamList1[[3]][!duplicated(AllNamList1[[3]])] varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[3]])) AllVarList[[1]] <- AllNamList1 } else if(d > 1){ ##inner set by the length of the previous AllVarlist we are working on lPreVList <- length(AllVarList[[d-1]]) ef <- 1 ##PCCP will eventually equal the total size that we expect for the iteration (#d) ## which is just lPreVList * 3 PCCP <- 1 newsize <- (lPreVList * 3) if(d > 3){ AllVarList[[d]] <- vector("list") } for(ef in 1:lPreVList){ ##Finding the Parents NumofVars <- length(AllVarList[[d-1]][[ef]]) if(NumofVars > 0){ ##Creating a list of the Parents for each of the Variables ##list size is based on the amount of Previous Variables PofVlist <- vector("character", length = 0) np <- 1 for(np in 1:NumofVars){ LocPofVar <- grep(paste0("^",AllVarList[[d-1]][[ef]][np],"$"),NewDotP2_2[,2]) PofVar <- NewDotP2_2[LocPofVar,1] PofVlist <- append(PofVlist,PofVar) #np <- np + 1 } } else { ##Making COPlist empty PofVlist <- vector("character",length = 0) } AllVarList[[d]][[PCCP]] <- PofVlist AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])] ##Have you found the VariableEndName? varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]])) PCCP <- PCCP + 1 ##Finding the Children if(NumofVars > 0){ ##Creating a list of the Children for each of the Previous Parents ##list size is based on the amount of Previous Parents CofVlist <- vector("character", length = 0) np <- 1 for(np in 1:NumofVars){ LocCofVar <- grep(paste0("^",AllVarList[[d-1]][[ef]][np],"$"),NewDotP2_2[,1]) CofVar <- NewDotP2_2[LocCofVar,2] #if(sum(grepl(VariableStartName,CofVar)) >= 1){ # #positions of variable start name within the vector of co parents # posoforig <- grep(VariableStartName,COPofVar) # COPofVar <- COPofVar[-posoforig] # COPlist <- append(COPlist,COPofVar) #} else{ CofVlist <- append(CofVlist,CofVar) #} #np <- np + 1 } } else { ##Making CofPlist empty CofVlist <- vector("character",length = 0) } AllVarList[[d]][[PCCP]] <- CofVlist AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])] ##Have you found the VariableEndName yet? varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]])) PCCP <- PCCP + 1 ##Finding the Co-Parents NumofCVars <- length(CofVlist) if(NumofCVars > 0){ ncp <- 1 CPofClist <- vector("character",length = 0) for(ncp in 1:NumofCVars){ LocCPofCVar <- grep(paste0("^",CofVlist[ncp],"$"),NewDotP2_2[,2]) CPofCVar <- NewDotP2_2[LocCPofCVar,1] #if(sum(grepl(,CPofCVar)) >= 1){ # #positions of variable start name within the vector of co parents # posoforig <- grep(VariableStartName,COPofVar) # COPofVar <- COPofVar[-posoforig] # COPlist <- append(COPlist,COPofVar) #} else{ CPofClist <- append(CPofClist,CPofCVar) #} #ncp <- ncp + 1 } } else { ##Making COPlist empty CPofClist <- vector("character",length = 0) } AllVarList[[d]][[PCCP]] <- CPofClist AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])] ##Have you found VariableEndName now? varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]])) PCCP <- PCCP + 1 #ef <- ef + 1 } } ##Stop if you have found the VariableEndName value if(varfound > 0){ break } #d <- d + 1 } ##The Markov Degree is that found below d } #############Methods for finding paths############# ###Beyond Here Everything is still experimental### #Find the path to the variable in question RshipList <- vector("list",length = d) l = d for(l in d:1){ if(l == d){ RshipList[[l]] <- VariableEndName for(ship in 1:length(AllVarList[[d]])){ if(sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[ship]])) > 0){ break } } } else { modship <- ship %% 3 intship <- as.integer(ship/3) if(modship > 0){ intship <- intship + 1 } RshipList[[l]] <- AllVarList[[l]][[intship]] ship <- intship } #l <- l - 1 } #Full actual path k = d prevar <- VariableEndName Pathlisting <- vector("list",length = d) Pathlisting[[d]] <- prevar for(k in d:2){ if(k == d){ for(ship in 1:length(AllVarList[[d]])){ if(sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[ship]])) > 0){ break } } Pathlisting[[d]] <- append(Pathlisting[[d]],ship) } modship <- ship %% 3 intship <- as.integer(ship/3) if(modship > 0){ intship <- intship + 1 } if(modship == 0){ ##When modship == 0 then we are refering to a CoParent ##Gives how many children the full set of CoParents has lenCoP <- length(AllVarList[[k]][[(ship - 1)]]) ##variables that are actually children of the coparent actvar <- vector("character",length = 0) ##Parents of actvar Pactvar <- vector("character",length =0) ##variables that could have led to the previous set of variables Wanvar <- vector("character",length = 0) m <- 1 for(m in 1:lenCoP){ LCPoVar <- grep(paste0("^",AllVarList[[k]][[(ship - 1)]][m],"$"),NewDotP2_2[,2]) CPoVar <- NewDotP2_2[LCPoVar,1] lenprevar <- length(prevar) y <- 1 for(y in 1:lenprevar){ if(sum(grepl(prevar[y],CPoVar)) >= 1){ actvar <- append(actvar,AllVarList[[k]][[(ship - 1)]][m]) Pactvar <- append(Pactvar,CPoVar) } #y <- y + 1 } #m <- m + 1 } Pactvar <- Pactvar[!duplicated(Pactvar)] ##Searching to see if any of the parents are in the previous degree ##The right children will have both the current variable and a previous degrees variable as parents o <- 1 for(o in 1:length(Pactvar)){ if(sum(grepl(Pactvar[o],AllVarList[[(k - 1)]][[intship]])) >= 1){ Wanvar <- append(Wanvar,Pactvar[o]) } #o <- o + 1 } Wanvar <- Wanvar[!duplicated(Wanvar)] prevar <- Wanvar } else if(modship == 1){ #When modship == 1 then we are referring to a Parent lengprevar <- length(prevar) t <- 1 ##All children of these parents TCoVar <- vector("character",length = 0) ##variables that could have led to these parents leadVar <- vector("character",length = 0) for(t in 1: lengprevar){ LCofVar <- grep(paste0("^",prevar[t],"$"),NewDotP2_2[,1]) CoVar <- NewDotP2_2[LCofVar,2] TCoVar <- append(TCoVar,CoVar) } TCoVar <- TCoVar[!duplicated(TCoVar)] s <- 1 ##Searching for the children that are also in the previous degree for(s in 1:length(TCoVar)){ if(sum(grepl(TCoVar[s],AllVarList[[(k - 1)]][[intship]])) >= 1){ leadVar <- append(leadVar,TCoVar[s]) } } leadVar <- leadVar[!duplicated(leadVar)] prevar <- leadVar } else if(modship == 2){ #When modship == 2 then we are referring to a Child lenprevar <- length(prevar) x <- 1 ##All parents of these children TPoVar <- vector("character",length = 0) ##variables that could have led to these children based on the drctVar <- vector("character",length = 0) for(x in 1:lenprevar){ LPoVar <- grep(paste0("^",prevar[x],"$"),NewDotP2_2[,2]) PoVar <- NewDotP2_2[LPoVar,1] TPoVar <- append(TPoVar,PoVar) } TPoVar <- TPoVar[!duplicated(TPoVar)] y <- 1 ##Searching for the parents that are also in the previous degree for(y in 1:length(TPoVar)){ if(sum(grepl(TPoVar[y],AllVarList[[(k - 1)]][[intship]])) >= 1){ drctVar <- append(drctVar,TPoVar[y]) } } drctVar <- drctVar[!duplicated(drctVar)] prevar <- drctVar } ship <- intship Pathlisting[[(k - 1)]] <- c(prevar, ship) }