Commit eb003daf3f0f341bdd9abd261c94ca4a1339cd58
1 parent
556b97bfaf
Exists in
master
A yet untested code
Showing
1 changed file
with
227 additions
and
0 deletions
 
Show diff stats
RMarkovBlanket.r
| File was created | 1 | #Efrain Gonzalez | |
| 2 | #7/25/2017 | ||
| 3 | #Code for Markov Blanket | ||
| 4 | |||
| 5 | |||
| 6 | #The required libraries | ||
| 7 | library(pryr) | ||
| 8 | library(MASS) | ||
| 9 | library(dplyr) | ||
| 10 | library(tidyr) | ||
| 11 | library(readr) | ||
| 12 | library(stringr) | ||
| 13 | |||
| 14 | |||
| 15 | #Have the user choose an original Dot file that they want to use | ||
| 16 | DotFile <- file.choose() | ||
| 17 | TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | ||
| 18 | dplyr::filter(!grepl("->|[{}]",X1)) %>% | ||
| 19 | dplyr::filter(!grepl("Banjo",X1)) %>% | ||
| 20 | dplyr::filter(!grepl("labeljust",X1)) | ||
| 21 | counterP1 <- 1 | ||
| 22 | sizeDotP1 <- dim(TheDotP1)[1] | ||
| 23 | NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) | ||
| 24 | for(counterP1 in 1:sizeDotP1){ | ||
| 25 | coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% | ||
| 26 | as.character(.,stringsAsFactors = FALSE) | ||
| 27 | if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ | ||
| 28 | NumberP1 <- strsplit(coldataP1," ") %>% | ||
| 29 | .[[1]]%>% | ||
| 30 | .[1] | ||
| 31 | VarNameP1 <- strsplit(coldataP1," ") %>% | ||
| 32 | .[[1]] %>% | ||
| 33 | .[2] %>% | ||
| 34 | strsplit(.,"\"") %>% | ||
| 35 | .[[1]] %>% | ||
| 36 | .[grep("^\\w|^\\d",.)] | ||
| 37 | NewDotP1[counterP1,1] <- VarNameP1 | ||
| 38 | NewDotP1[counterP1,2] <- NumberP1 | ||
| 39 | } | ||
| 40 | if(grepl("->",coldataP1) == TRUE){ | ||
| 41 | break | ||
| 42 | } | ||
| 43 | } | ||
| 44 | |||
| 45 | |||
| 46 | TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | ||
| 47 | dplyr::filter(grepl("->",X1)) | ||
| 48 | counterP2 <- 1 | ||
| 49 | sizeDotP2 <- dim(TheDotP2)[1] | ||
| 50 | NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) | ||
| 51 | for(counterP2 in 1:sizeDotP2){ | ||
| 52 | coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% | ||
| 53 | as.character(.,stringsAsFactors = FALSE) | ||
| 54 | ParentNumP2 <- strsplit(coldataP2,"->") %>% | ||
| 55 | .[[1]]%>% | ||
| 56 | .[1] | ||
| 57 | ChildNumP2 <- strsplit(coldataP2,"->") %>% | ||
| 58 | .[[1]] %>% | ||
| 59 | .[2] %>% | ||
| 60 | strsplit(.,";") %>% | ||
| 61 | .[[1]] %>% | ||
| 62 | .[1] | ||
| 63 | NewDotP2[counterP2,1] <- ParentNumP2 | ||
| 64 | NewDotP2[counterP2,2] <- ChildNumP2 | ||
| 65 | } | ||
| 66 | |||
| 67 | colnames(NewDotP2) <- c("Parents","Children") | ||
| 68 | |||
| 69 | #Matching numbers to variable names | ||
| 70 | NewDotP2_2 <- NewDotP2 | ||
| 71 | for(i in 1:sizeDotP1){ | ||
| 72 | #Where is the variable located within NewDotP2 (column one only)? | ||
| 73 | chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) | ||
| 74 | if(is.na(sum(chngreq)) == FALSE){ | ||
| 75 | if(sum(chngreq) > 0){ | ||
| 76 | NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) | ||
| 77 | } | ||
| 78 | } | ||
| 79 | i <- i + 1 | ||
| 80 | } | ||
| 81 | NewDotP2_2 | ||
| 82 | for(j in 1:sizeDotP1){ | ||
| 83 | #Where is the variable located within NewDotP2 (column two only)? | ||
| 84 | chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) | ||
| 85 | if(is.na(sum(chngreq)) == FALSE){ | ||
| 86 | if(sum(chngreq) > 0){ | ||
| 87 | NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) | ||
| 88 | } | ||
| 89 | } | ||
| 90 | j <- j + 1 | ||
| 91 | } | ||
| 92 | |||
| 93 | lrgMarkov <- dim(NewDotP2_2)[1] | ||
| 94 | MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ | ||
| 95 | #Finding the Parents and Children | ||
| 96 | d <- 1 | ||
| 97 | AllNamList1 <- vector("list",length = 3) | ||
| 98 | #AllNamList <- vector("list", length = 3) | ||
| 99 | ##Finding the Parents of the Parents | ||
| 100 | ##A list of lists | ||
| 101 | ##outer set by the degree of the Markov blanket | ||
| 102 | AllVarList <- vector("list",length = 3) | ||
| 103 | for(d in 1:MarkovDegree){ | ||
| 104 | colnames(NewDotP2_2) <- NULL | ||
| 105 | ##Which variable are you looking for? | ||
| 106 | ##This is the VariableEndName | ||
| 107 | if(d == 1){ | ||
| 108 | ##Finding the Parents for the variable | ||
| 109 | LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) | ||
| 110 | PofVar <- NewDotP2_2[LocPofVar,1] | ||
| 111 | AllNamList1[[1]] <- PofVar | ||
| 112 | |||
| 113 | ##Finding the Children for the variable | ||
| 114 | LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) | ||
| 115 | CofVar <- NewDotP2_2[LocCofVar,2] | ||
| 116 | AllNamList1[[2]] <- CofVar | ||
| 117 | |||
| 118 | ##Finding the Co-Parents of the Children for the variable | ||
| 119 | NumofChild <- length(CofVar) | ||
| 120 | if(NumofChild > 0){ | ||
| 121 | ##Creating a list of the Co-Parents for each of the children | ||
| 122 | ##list size is based on the amount of Children | ||
| 123 | COPlist <- vector("character", length = 0) | ||
| 124 | nc <- 1 | ||
| 125 | for(nc in 1:NumofChild){ | ||
| 126 | LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) | ||
| 127 | COPofVar <- NewDotP2_2[LocCOPofVar,1] | ||
| 128 | if(grepl(COPofVar,VariableStartName)){ | ||
| 129 | next | ||
| 130 | } else{ | ||
| 131 | #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] | ||
| 132 | COPlist <- append(COPlist,COPofVar) | ||
| 133 | } | ||
| 134 | nc <- nc + 1 | ||
| 135 | } | ||
| 136 | } else { | ||
| 137 | ##Making COPlist empty | ||
| 138 | COPlist <- vector("character",length = 0) | ||
| 139 | } | ||
| 140 | AllNamList1[[3]] <- COPlist | ||
| 141 | AllVarList[[1]] <- AllNamList1 | ||
| 142 | } else if(d > 1){ | ||
| 143 | ##inner set by the length of the previous AllVarlist we are working on | ||
| 144 | lPreVList <- length(AllVarList[[d-1]]) | ||
| 145 | ef <- 1 | ||
| 146 | ##PCCP will eventually equal the total size that we expect for the iteration (#d) | ||
| 147 | ## which is just lPreVList * 3 | ||
| 148 | PCCP <- 1 | ||
| 149 | newsize <- (lPreVList * 3) | ||
| 150 | if(d > 3){ | ||
| 151 | AllVarList[[d]] <- vector("list") | ||
| 152 | } | ||
| 153 | for(ef in 1:lPreVList){ | ||
| 154 | ##Finding the Parents | ||
| 155 | NumofVars <- length(AllVarList[[d-1]][[ef]]) | ||
| 156 | if(NumofVars > 0){ | ||
| 157 | ##Creating a list of the Parents for each of the Variables | ||
| 158 | ##list size is based on the amount of Previous Variables | ||
| 159 | PofVlist <- vector("character", length = 0) | ||
| 160 | np <- 1 | ||
| 161 | for(np in 1:NumofVars){ | ||
| 162 | LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) | ||
| 163 | PofVar <- NewDotP2_2[LocPofVar,1] | ||
| 164 | PofVlist <- append(PofVlist,PofVar) | ||
| 165 | np <- np + 1 | ||
| 166 | } | ||
| 167 | } else { | ||
| 168 | ##Making COPlist empty | ||
| 169 | PofVlist <- vector("character",length = 0) | ||
| 170 | } | ||
| 171 | AllVarList[[d]][[PCCP]] <- PofVlist | ||
| 172 | PCCP <- PCCP + 1 | ||
| 173 | |||
| 174 | ##Finding the Children | ||
| 175 | if(NumofVars > 0){ | ||
| 176 | ##Creating a list of the Children for each of the Previous Parents | ||
| 177 | ##list size is based on the amount of Previous Parents | ||
| 178 | CofVlist <- vector("character", length = 0) | ||
| 179 | np <- 1 | ||
| 180 | for(np in 1:NumofVars){ | ||
| 181 | LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) | ||
| 182 | CofVar <- NewDotP2_2[LocCofVar,2] | ||
| 183 | #if(grepl(VariableStartName,CofVar)){ | ||
| 184 | # next | ||
| 185 | #} else{ | ||
| 186 | CofVlist <- append(CofVlist,CofVar) | ||
| 187 | #} | ||
| 188 | np <- np + 1 | ||
| 189 | } | ||
| 190 | } else { | ||
| 191 | ##Making CofPlist empty | ||
| 192 | CofVlist <- vector("character",length = 0) | ||
| 193 | } | ||
| 194 | AllVarList[[d]][[PCCP]] <- CofVlist | ||
| 195 | PCCP <- PCCP + 1 | ||
| 196 | |||
| 197 | ##Finding the Co-Parents | ||
| 198 | NumofCVars <- length(CofVlist) | ||
| 199 | if(NumofCVars > 0){ | ||
| 200 | ncp <- 1 | ||
| 201 | CPofClist <- vector("character",length = 0) | ||
| 202 | for(ncp in 1:NumofCVars){ | ||
| 203 | LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) | ||
| 204 | CPofCVar <- NewDotP2_2[LocCPofCVar,1] | ||
| 205 | CPofClist <- append(CPofClist,CPofCVar) | ||
| 206 | ncp <- ncp + 1 | ||
| 207 | } | ||
| 208 | |||
| 209 | } else { | ||
| 210 | ##Making COPlist empty | ||
| 211 | CPofClist <- vector("character",length = 0) | ||
| 212 | } | ||
| 213 | AllVarList[[d]][[PCCP]] <- CPofClist | ||
| 214 | PCCP <- PCCP + 1 | ||
| 215 | ef <- ef + 1 | ||
| 216 | } | ||
| 217 | } | ||
| 218 | ##Stop if you have found the VariableEndName value | ||
| 219 | if(sum(grepl(VariableEndName,AllVarList)) > 0){ | ||
| 220 | break | ||
| 221 | } | ||
| 222 | d <- d + 1 | ||
| 223 | } | ||
| 224 | ##The Markov Degree is that found below | ||
| 225 | d | ||
| 226 | } | ||
| 227 | #Now use the command MBlanky() with the appropriate settings |