From eb003daf3f0f341bdd9abd261c94ca4a1339cd58 Mon Sep 17 00:00:00 2001 From: Efrain Gonzalez Date: Thu, 3 Aug 2017 10:01:45 -0400 Subject: [PATCH] A yet untested code --- RMarkovBlanket.r | 227 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 RMarkovBlanket.r diff --git a/RMarkovBlanket.r b/RMarkovBlanket.r new file mode 100644 index 0000000..d601bce --- /dev/null +++ b/RMarkovBlanket.r @@ -0,0 +1,227 @@ +#Efrain Gonzalez +#7/25/2017 +#Code for Markov Blanket + + +#The required libraries +library(pryr) +library(MASS) +library(dplyr) +library(tidyr) +library(readr) +library(stringr) + + +#Have the user choose an original Dot file that they want to use +DotFile <- file.choose() +TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% + dplyr::filter(!grepl("->|[{}]",X1)) %>% + dplyr::filter(!grepl("Banjo",X1)) %>% + dplyr::filter(!grepl("labeljust",X1)) +counterP1 <- 1 +sizeDotP1 <- dim(TheDotP1)[1] +NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) +for(counterP1 in 1:sizeDotP1){ + coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% + as.character(.,stringsAsFactors = FALSE) + if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ + NumberP1 <- strsplit(coldataP1," ") %>% + .[[1]]%>% + .[1] + VarNameP1 <- strsplit(coldataP1," ") %>% + .[[1]] %>% + .[2] %>% + strsplit(.,"\"") %>% + .[[1]] %>% + .[grep("^\\w|^\\d",.)] + NewDotP1[counterP1,1] <- VarNameP1 + NewDotP1[counterP1,2] <- NumberP1 + } + if(grepl("->",coldataP1) == TRUE){ + break + } +} + + +TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% + dplyr::filter(grepl("->",X1)) +counterP2 <- 1 +sizeDotP2 <- dim(TheDotP2)[1] +NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) +for(counterP2 in 1:sizeDotP2){ + coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% + as.character(.,stringsAsFactors = FALSE) + ParentNumP2 <- strsplit(coldataP2,"->") %>% + .[[1]]%>% + .[1] + ChildNumP2 <- strsplit(coldataP2,"->") %>% + .[[1]] %>% + .[2] %>% + strsplit(.,";") %>% + .[[1]] %>% + .[1] + NewDotP2[counterP2,1] <- ParentNumP2 + NewDotP2[counterP2,2] <- ChildNumP2 +} + +colnames(NewDotP2) <- c("Parents","Children") + +#Matching numbers to variable names +NewDotP2_2 <- NewDotP2 +for(i in 1:sizeDotP1){ + #Where is the variable located within NewDotP2 (column one only)? + chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) + if(is.na(sum(chngreq)) == FALSE){ + if(sum(chngreq) > 0){ + NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) + } + } + i <- i + 1 +} +NewDotP2_2 +for(j in 1:sizeDotP1){ + #Where is the variable located within NewDotP2 (column two only)? + chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) + if(is.na(sum(chngreq)) == FALSE){ + if(sum(chngreq) > 0){ + NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) + } + } + j <- j + 1 +} + +lrgMarkov <- dim(NewDotP2_2)[1] +MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ + #Finding the Parents and Children + d <- 1 + AllNamList1 <- vector("list",length = 3) + #AllNamList <- vector("list", length = 3) + ##Finding the Parents of the Parents + ##A list of lists + ##outer set by the degree of the Markov blanket + AllVarList <- vector("list",length = 3) + for(d in 1:MarkovDegree){ + colnames(NewDotP2_2) <- NULL + ##Which variable are you looking for? + ##This is the VariableEndName + if(d == 1){ + ##Finding the Parents for the variable + LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) + PofVar <- NewDotP2_2[LocPofVar,1] + AllNamList1[[1]] <- PofVar + + ##Finding the Children for the variable + LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) + CofVar <- NewDotP2_2[LocCofVar,2] + AllNamList1[[2]] <- CofVar + + ##Finding the Co-Parents of the Children for the variable + NumofChild <- length(CofVar) + if(NumofChild > 0){ + ##Creating a list of the Co-Parents for each of the children + ##list size is based on the amount of Children + COPlist <- vector("character", length = 0) + nc <- 1 + for(nc in 1:NumofChild){ + LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) + COPofVar <- NewDotP2_2[LocCOPofVar,1] + if(grepl(COPofVar,VariableStartName)){ + next + } else{ + #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] + COPlist <- append(COPlist,COPofVar) + } + nc <- nc + 1 + } + } else { + ##Making COPlist empty + COPlist <- vector("character",length = 0) + } + AllNamList1[[3]] <- COPlist + AllVarList[[1]] <- AllNamList1 + } else if(d > 1){ + ##inner set by the length of the previous AllVarlist we are working on + lPreVList <- length(AllVarList[[d-1]]) + ef <- 1 + ##PCCP will eventually equal the total size that we expect for the iteration (#d) + ## which is just lPreVList * 3 + PCCP <- 1 + newsize <- (lPreVList * 3) + if(d > 3){ + AllVarList[[d]] <- vector("list") + } + for(ef in 1:lPreVList){ + ##Finding the Parents + NumofVars <- length(AllVarList[[d-1]][[ef]]) + if(NumofVars > 0){ + ##Creating a list of the Parents for each of the Variables + ##list size is based on the amount of Previous Variables + PofVlist <- vector("character", length = 0) + np <- 1 + for(np in 1:NumofVars){ + LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) + PofVar <- NewDotP2_2[LocPofVar,1] + PofVlist <- append(PofVlist,PofVar) + np <- np + 1 + } + } else { + ##Making COPlist empty + PofVlist <- vector("character",length = 0) + } + AllVarList[[d]][[PCCP]] <- PofVlist + PCCP <- PCCP + 1 + + ##Finding the Children + if(NumofVars > 0){ + ##Creating a list of the Children for each of the Previous Parents + ##list size is based on the amount of Previous Parents + CofVlist <- vector("character", length = 0) + np <- 1 + for(np in 1:NumofVars){ + LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) + CofVar <- NewDotP2_2[LocCofVar,2] + #if(grepl(VariableStartName,CofVar)){ + # next + #} else{ + CofVlist <- append(CofVlist,CofVar) + #} + np <- np + 1 + } + } else { + ##Making CofPlist empty + CofVlist <- vector("character",length = 0) + } + AllVarList[[d]][[PCCP]] <- CofVlist + PCCP <- PCCP + 1 + + ##Finding the Co-Parents + NumofCVars <- length(CofVlist) + if(NumofCVars > 0){ + ncp <- 1 + CPofClist <- vector("character",length = 0) + for(ncp in 1:NumofCVars){ + LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) + CPofCVar <- NewDotP2_2[LocCPofCVar,1] + CPofClist <- append(CPofClist,CPofCVar) + ncp <- ncp + 1 + } + + } else { + ##Making COPlist empty + CPofClist <- vector("character",length = 0) + } + AllVarList[[d]][[PCCP]] <- CPofClist + PCCP <- PCCP + 1 + ef <- ef + 1 + } + } + ##Stop if you have found the VariableEndName value + if(sum(grepl(VariableEndName,AllVarList)) > 0){ + break + } + d <- d + 1 + } + ##The Markov Degree is that found below + d +} +#Now use the command MBlanky() with the appropriate settings \ No newline at end of file -- 2.29.0