Commit 8f1c6201bb5cf6f1c0432bb580ee6422e468030c
1 parent
4d40f27465
Exists in
master
Updated Version
Showing
1 changed file
with
21 additions
and
9 deletions
Show diff stats
RMarkovBlanket.r
| 1 | #Efrain Gonzalez | 1 | #Efrain Gonzalez |
| 2 | #7/25/2017 | 2 | #8/25/2017 |
| 3 | #Code for Markov Blanket | 3 | #Code for Markov Blanket |
| 4 | 4 | ||
| 5 | 5 | ||
| 6 | #The required libraries | 6 | #The required libraries |
| 7 | library(pryr) | 7 | library(pryr) |
| 8 | library(MASS) | 8 | library(MASS) |
| 9 | library(dplyr) | 9 | library(dplyr) |
| 10 | library(tidyr) | 10 | library(tidyr) |
| 11 | library(readr) | 11 | library(readr) |
| 12 | library(stringr) | 12 | library(stringr) |
| 13 | 13 | ||
| 14 | 14 | ||
| 15 | #Have the user choose an original Dot file that they want to use | 15 | #Have the user choose an original Dot file that they want to use |
| 16 | DotFile <- file.choose() | 16 | DotFile <- file.choose() |
| 17 | TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | 17 | TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% |
| 18 | dplyr::filter(!grepl("->|[{}]",X1)) %>% | 18 | dplyr::filter(!grepl("->|[{}]",X1)) %>% |
| 19 | dplyr::filter(!grepl("Banjo",X1)) %>% | 19 | dplyr::filter(!grepl("Banjo",X1)) %>% |
| 20 | dplyr::filter(!grepl("labeljust",X1)) | 20 | dplyr::filter(!grepl("labeljust",X1)) |
| 21 | counterP1 <- 1 | 21 | counterP1 <- 1 |
| 22 | sizeDotP1 <- dim(TheDotP1)[1] | 22 | sizeDotP1 <- dim(TheDotP1)[1] |
| 23 | NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) | 23 | NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) |
| 24 | for(counterP1 in 1:sizeDotP1){ | 24 | for(counterP1 in 1:sizeDotP1){ |
| 25 | coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% | 25 | coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% |
| 26 | as.character(.,stringsAsFactors = FALSE) | 26 | as.character(.,stringsAsFactors = FALSE) |
| 27 | if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ | 27 | if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ |
| 28 | NumberP1 <- strsplit(coldataP1," ") %>% | 28 | NumberP1 <- strsplit(coldataP1," ") %>% |
| 29 | .[[1]]%>% | 29 | .[[1]]%>% |
| 30 | .[1] | 30 | .[1] |
| 31 | VarNameP1 <- strsplit(coldataP1," ") %>% | 31 | VarNameP1 <- strsplit(coldataP1," ") %>% |
| 32 | .[[1]] %>% | 32 | .[[1]] %>% |
| 33 | .[2] %>% | 33 | .[2] %>% |
| 34 | strsplit(.,"\"") %>% | 34 | strsplit(.,"\"") %>% |
| 35 | .[[1]] %>% | 35 | .[[1]] %>% |
| 36 | .[grep("^\\w|^\\d",.)] | 36 | .[grep("^\\w|^\\d",.)] |
| 37 | NewDotP1[counterP1,1] <- VarNameP1 | 37 | NewDotP1[counterP1,1] <- VarNameP1 |
| 38 | NewDotP1[counterP1,2] <- NumberP1 | 38 | NewDotP1[counterP1,2] <- NumberP1 |
| 39 | } | 39 | } |
| 40 | if(grepl("->",coldataP1) == TRUE){ | 40 | if(grepl("->",coldataP1) == TRUE){ |
| 41 | break | 41 | break |
| 42 | } | 42 | } |
| 43 | } | 43 | } |
| 44 | 44 | ||
| 45 | 45 | ||
| 46 | TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | 46 | TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% |
| 47 | dplyr::filter(grepl("->",X1)) | 47 | dplyr::filter(grepl("->",X1)) |
| 48 | counterP2 <- 1 | 48 | counterP2 <- 1 |
| 49 | sizeDotP2 <- dim(TheDotP2)[1] | 49 | sizeDotP2 <- dim(TheDotP2)[1] |
| 50 | NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) | 50 | NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) |
| 51 | for(counterP2 in 1:sizeDotP2){ | 51 | for(counterP2 in 1:sizeDotP2){ |
| 52 | coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% | 52 | coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% |
| 53 | as.character(.,stringsAsFactors = FALSE) | 53 | as.character(.,stringsAsFactors = FALSE) |
| 54 | ParentNumP2 <- strsplit(coldataP2,"->") %>% | 54 | ParentNumP2 <- strsplit(coldataP2,"->") %>% |
| 55 | .[[1]]%>% | 55 | .[[1]]%>% |
| 56 | .[1] | 56 | .[1] |
| 57 | ChildNumP2 <- strsplit(coldataP2,"->") %>% | 57 | ChildNumP2 <- strsplit(coldataP2,"->") %>% |
| 58 | .[[1]] %>% | 58 | .[[1]] %>% |
| 59 | .[2] %>% | 59 | .[2] %>% |
| 60 | strsplit(.,";") %>% | 60 | strsplit(.,";") %>% |
| 61 | .[[1]] %>% | 61 | .[[1]] %>% |
| 62 | .[1] | 62 | .[1] |
| 63 | NewDotP2[counterP2,1] <- ParentNumP2 | 63 | NewDotP2[counterP2,1] <- ParentNumP2 |
| 64 | NewDotP2[counterP2,2] <- ChildNumP2 | 64 | NewDotP2[counterP2,2] <- ChildNumP2 |
| 65 | } | 65 | } |
| 66 | 66 | ||
| 67 | colnames(NewDotP2) <- c("Parents","Children") | 67 | colnames(NewDotP2) <- c("Parents","Children") |
| 68 | 68 | ||
| 69 | #Matching numbers to variable names | 69 | #Matching numbers to variable names |
| 70 | NewDotP2_2 <- NewDotP2 | 70 | NewDotP2_2 <- NewDotP2 |
| 71 | for(i in 1:sizeDotP1){ | 71 | for(i in 1:sizeDotP1){ |
| 72 | #Where is the variable located within NewDotP2 (column one only)? | 72 | #Where is the variable located within NewDotP2 (column one only)? |
| 73 | chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) | 73 | chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) |
| 74 | if(is.na(sum(chngreq)) == FALSE){ | 74 | if(is.na(sum(chngreq)) == FALSE){ |
| 75 | if(sum(chngreq) > 0){ | 75 | if(sum(chngreq) > 0){ |
| 76 | NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) | 76 | NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) |
| 77 | } | 77 | } |
| 78 | } | 78 | } |
| 79 | i <- i + 1 | 79 | i <- i + 1 |
| 80 | } | 80 | } |
| 81 | NewDotP2_2 | 81 | NewDotP2_2 |
| 82 | for(j in 1:sizeDotP1){ | 82 | for(j in 1:sizeDotP1){ |
| 83 | #Where is the variable located within NewDotP2 (column two only)? | 83 | #Where is the variable located within NewDotP2 (column two only)? |
| 84 | chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) | 84 | chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) |
| 85 | if(is.na(sum(chngreq)) == FALSE){ | 85 | if(is.na(sum(chngreq)) == FALSE){ |
| 86 | if(sum(chngreq) > 0){ | 86 | if(sum(chngreq) > 0){ |
| 87 | NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) | 87 | NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) |
| 88 | } | 88 | } |
| 89 | } | 89 | } |
| 90 | j <- j + 1 | 90 | j <- j + 1 |
| 91 | } | 91 | } |
| 92 | 92 | ||
| 93 | lrgMarkov <- dim(NewDotP2_2)[1] | 93 | lrgMarkov <- dim(NewDotP2_2)[1] |
| 94 | MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ | 94 | Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ |
| 95 | #Finding the Parents and Children | 95 | #Finding the Parents and Children |
| 96 | d <- 1 | 96 | d <- 1 |
| 97 | AllNamList1 <- vector("list",length = 3) | 97 | AllNamList1 <- vector("list",length = 3) |
| 98 | #AllNamList <- vector("list", length = 3) | 98 | #AllNamList <- vector("list", length = 3) |
| 99 | ##Finding the Parents of the Parents | 99 | ##Finding the Parents of the Parents |
| 100 | ##A list of lists | 100 | ##A list of lists |
| 101 | ##outer set by the degree of the Markov blanket | 101 | ##outer set by the degree of the Markov blanket |
| 102 | AllVarList <- vector("list",length = 3) | 102 | AllVarList <- vector("list",length = 3) |
| 103 | for(d in 1:MarkovDegree){ | 103 | for(d in 1:MarkovDegree){ |
| 104 | colnames(NewDotP2_2) <- NULL | 104 | colnames(NewDotP2_2) <- NULL |
| 105 | ##Which variable are you looking for? | 105 | ##Which variable are you looking for? |
| 106 | ##This is the VariableEndName | 106 | ##This is the VariableEndName |
| 107 | if(d == 1){ | 107 | if(d == 1){ |
| 108 | ##Finding the Parents for the variable | 108 | ##Finding the Parents for the variable |
| 109 | LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) | 109 | LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) |
| 110 | PofVar <- NewDotP2_2[LocPofVar,1] | 110 | PofVar <- NewDotP2_2[LocPofVar,1] |
| 111 | AllNamList1[[1]] <- PofVar | 111 | AllNamList1[[1]] <- PofVar |
| 112 | 112 | ||
| 113 | ##Finding the Children for the variable | 113 | ##Finding the Children for the variable |
| 114 | LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) | 114 | LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) |
| 115 | CofVar <- NewDotP2_2[LocCofVar,2] | 115 | CofVar <- NewDotP2_2[LocCofVar,2] |
| 116 | AllNamList1[[2]] <- CofVar | 116 | AllNamList1[[2]] <- CofVar |
| 117 | 117 | ||
| 118 | ##Finding the Co-Parents of the Children for the variable | 118 | ##Finding the Co-Parents of the Children for the variable |
| 119 | NumofChild <- length(CofVar) | 119 | NumofChild <- length(CofVar) |
| 120 | if(NumofChild > 0){ | 120 | if(NumofChild > 0){ |
| 121 | ##Creating a list of the Co-Parents for each of the children | 121 | ##Creating a list of the Co-Parents for each of the children |
| 122 | ##list size is based on the amount of Children | 122 | ##list size is based on the amount of Children |
| 123 | COPlist <- vector("character", length = 0) | 123 | COPlist <- vector("character", length = 0) |
| 124 | nc <- 1 | 124 | nc <- 1 |
| 125 | for(nc in 1:NumofChild){ | 125 | for(nc in 1:NumofChild){ |
| 126 | LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) | 126 | LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) |
| 127 | COPofVar <- NewDotP2_2[LocCOPofVar,1] | 127 | COPofVar <- NewDotP2_2[LocCOPofVar,1] |
| 128 | if(grepl(COPofVar,VariableStartName)){ | 128 | if(sum(grepl(VariableStartName,COPofVar)) >= 1){ |
| 129 | next | 129 | #positions of variable start name within the vector of co parents |
| 130 | posoforig <- grep(VariableStartName,COPofVar) | ||
| 131 | COPofVar <- COPofVar[-posoforig] | ||
| 132 | COPlist <- append(COPlist,COPofVar) | ||
| 130 | } else{ | 133 | } else{ |
| 131 | #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] | 134 | #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] |
| 132 | COPlist <- append(COPlist,COPofVar) | 135 | COPlist <- append(COPlist,COPofVar) |
| 133 | } | 136 | } |
| 134 | nc <- nc + 1 | 137 | nc <- nc + 1 |
| 135 | } | 138 | } |
| 136 | } else { | 139 | } else { |
| 137 | ##Making COPlist empty | 140 | ##Making COPlist empty |
| 138 | COPlist <- vector("character",length = 0) | 141 | COPlist <- vector("character",length = 0) |
| 139 | } | 142 | } |
| 140 | AllNamList1[[3]] <- COPlist | 143 | AllNamList1[[3]] <- COPlist |
| 141 | AllVarList[[1]] <- AllNamList1 | 144 | AllVarList[[1]] <- AllNamList1 |
| 142 | } else if(d > 1){ | 145 | } else if(d > 1){ |
| 143 | ##inner set by the length of the previous AllVarlist we are working on | 146 | ##inner set by the length of the previous AllVarlist we are working on |
| 144 | lPreVList <- length(AllVarList[[d-1]]) | 147 | lPreVList <- length(AllVarList[[d-1]]) |
| 145 | ef <- 1 | 148 | ef <- 1 |
| 146 | ##PCCP will eventually equal the total size that we expect for the iteration (#d) | 149 | ##PCCP will eventually equal the total size that we expect for the iteration (#d) |
| 147 | ## which is just lPreVList * 3 | 150 | ## which is just lPreVList * 3 |
| 148 | PCCP <- 1 | 151 | PCCP <- 1 |
| 149 | newsize <- (lPreVList * 3) | 152 | newsize <- (lPreVList * 3) |
| 150 | if(d > 3){ | 153 | if(d > 3){ |
| 151 | AllVarList[[d]] <- vector("list") | 154 | AllVarList[[d]] <- vector("list") |
| 152 | } | 155 | } |
| 153 | for(ef in 1:lPreVList){ | 156 | for(ef in 1:lPreVList){ |
| 154 | ##Finding the Parents | 157 | ##Finding the Parents |
| 155 | NumofVars <- length(AllVarList[[d-1]][[ef]]) | 158 | NumofVars <- length(AllVarList[[d-1]][[ef]]) |
| 156 | if(NumofVars > 0){ | 159 | if(NumofVars > 0){ |
| 157 | ##Creating a list of the Parents for each of the Variables | 160 | ##Creating a list of the Parents for each of the Variables |
| 158 | ##list size is based on the amount of Previous Variables | 161 | ##list size is based on the amount of Previous Variables |
| 159 | PofVlist <- vector("character", length = 0) | 162 | PofVlist <- vector("character", length = 0) |
| 160 | np <- 1 | 163 | np <- 1 |
| 161 | for(np in 1:NumofVars){ | 164 | for(np in 1:NumofVars){ |
| 162 | LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) | 165 | LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) |
| 163 | PofVar <- NewDotP2_2[LocPofVar,1] | 166 | PofVar <- NewDotP2_2[LocPofVar,1] |
| 164 | PofVlist <- append(PofVlist,PofVar) | 167 | PofVlist <- append(PofVlist,PofVar) |
| 165 | np <- np + 1 | 168 | np <- np + 1 |
| 166 | } | 169 | } |
| 167 | } else { | 170 | } else { |
| 168 | ##Making COPlist empty | 171 | ##Making COPlist empty |
| 169 | PofVlist <- vector("character",length = 0) | 172 | PofVlist <- vector("character",length = 0) |
| 170 | } | 173 | } |
| 171 | AllVarList[[d]][[PCCP]] <- PofVlist | 174 | AllVarList[[d]][[PCCP]] <- PofVlist |
| 172 | PCCP <- PCCP + 1 | 175 | PCCP <- PCCP + 1 |
| 173 | 176 | ||
| 174 | ##Finding the Children | 177 | ##Finding the Children |
| 175 | if(NumofVars > 0){ | 178 | if(NumofVars > 0){ |
| 176 | ##Creating a list of the Children for each of the Previous Parents | 179 | ##Creating a list of the Children for each of the Previous Parents |
| 177 | ##list size is based on the amount of Previous Parents | 180 | ##list size is based on the amount of Previous Parents |
| 178 | CofVlist <- vector("character", length = 0) | 181 | CofVlist <- vector("character", length = 0) |
| 179 | np <- 1 | 182 | np <- 1 |
| 180 | for(np in 1:NumofVars){ | 183 | for(np in 1:NumofVars){ |
| 181 | LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) | 184 | LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) |
| 182 | CofVar <- NewDotP2_2[LocCofVar,2] | 185 | CofVar <- NewDotP2_2[LocCofVar,2] |
| 183 | #if(grepl(VariableStartName,CofVar)){ | 186 | #if(sum(grepl(VariableStartName,CofVar)) >= 1){ |
| 184 | # next | 187 | # #positions of variable start name within the vector of co parents |
| 188 | # posoforig <- grep(VariableStartName,COPofVar) | ||
| 189 | # COPofVar <- COPofVar[-posoforig] | ||
| 190 | # COPlist <- append(COPlist,COPofVar) | ||
| 185 | #} else{ | 191 | #} else{ |
| 186 | CofVlist <- append(CofVlist,CofVar) | 192 | CofVlist <- append(CofVlist,CofVar) |
| 187 | #} | 193 | #} |
| 188 | np <- np + 1 | 194 | np <- np + 1 |
| 189 | } | 195 | } |
| 190 | } else { | 196 | } else { |
| 191 | ##Making CofPlist empty | 197 | ##Making CofPlist empty |
| 192 | CofVlist <- vector("character",length = 0) | 198 | CofVlist <- vector("character",length = 0) |
| 193 | } | 199 | } |
| 194 | AllVarList[[d]][[PCCP]] <- CofVlist | 200 | AllVarList[[d]][[PCCP]] <- CofVlist |
| 195 | PCCP <- PCCP + 1 | 201 | PCCP <- PCCP + 1 |
| 196 | 202 | ||
| 197 | ##Finding the Co-Parents | 203 | ##Finding the Co-Parents |
| 198 | NumofCVars <- length(CofVlist) | 204 | NumofCVars <- length(CofVlist) |
| 199 | if(NumofCVars > 0){ | 205 | if(NumofCVars > 0){ |
| 200 | ncp <- 1 | 206 | ncp <- 1 |
| 201 | CPofClist <- vector("character",length = 0) | 207 | CPofClist <- vector("character",length = 0) |
| 202 | for(ncp in 1:NumofCVars){ | 208 | for(ncp in 1:NumofCVars){ |
| 203 | LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) | 209 | LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) |
| 204 | CPofCVar <- NewDotP2_2[LocCPofCVar,1] | 210 | CPofCVar <- NewDotP2_2[LocCPofCVar,1] |
| 205 | CPofClist <- append(CPofClist,CPofCVar) | 211 | #if(sum(grepl(,CPofCVar)) >= 1){ |
| 212 | # #positions of variable start name within the vector of co parents | ||
| 213 | # posoforig <- grep(VariableStartName,COPofVar) | ||
| 214 | # COPofVar <- COPofVar[-posoforig] | ||
| 215 | # COPlist <- append(COPlist,COPofVar) | ||
| 216 | #} else{ | ||
| 217 | CPofClist <- append(CPofClist,CPofCVar) | ||
| 218 | #} | ||
| 206 | ncp <- ncp + 1 | 219 | ncp <- ncp + 1 |
| 207 | } | 220 | } |
| 208 | 221 | ||
| 209 | } else { | 222 | } else { |
| 210 | ##Making COPlist empty | 223 | ##Making COPlist empty |
| 211 | CPofClist <- vector("character",length = 0) | 224 | CPofClist <- vector("character",length = 0) |
| 212 | } | 225 | } |
| 213 | AllVarList[[d]][[PCCP]] <- CPofClist | 226 | AllVarList[[d]][[PCCP]] <- CPofClist |
| 214 | PCCP <- PCCP + 1 | 227 | PCCP <- PCCP + 1 |
| 215 | ef <- ef + 1 | 228 | ef <- ef + 1 |
| 216 | } | 229 | } |
| 217 | } | 230 | } |
| 218 | ##Stop if you have found the VariableEndName value | 231 | ##Stop if you have found the VariableEndName value |
| 219 | if(sum(grepl(VariableEndName,AllVarList)) > 0){ | 232 | if(sum(grepl(VariableEndName,AllVarList)) > 0){ |
| 220 | break | 233 | break |
| 221 | } | 234 | } |
| 222 | d <- d + 1 | 235 | d <- d + 1 |
| 223 | } | 236 | } |
| 224 | ##The Markov Degree is that found below | 237 | ##The Markov Degree is that found below |
| 225 | d | 238 | d |
| 226 | } | 239 | } |
| 227 | #Now use the command MBlanky() with the appropriate settings |