Commit 8f1c6201bb5cf6f1c0432bb580ee6422e468030c
1 parent
4d40f27465
Exists in
master
Updated Version
Showing
1 changed file
with
21 additions
and
9 deletions
Show diff stats
RMarkovBlanket.r
1 | #Efrain Gonzalez | 1 | #Efrain Gonzalez |
2 | #7/25/2017 | 2 | #8/25/2017 |
3 | #Code for Markov Blanket | 3 | #Code for Markov Blanket |
4 | 4 | ||
5 | 5 | ||
6 | #The required libraries | 6 | #The required libraries |
7 | library(pryr) | 7 | library(pryr) |
8 | library(MASS) | 8 | library(MASS) |
9 | library(dplyr) | 9 | library(dplyr) |
10 | library(tidyr) | 10 | library(tidyr) |
11 | library(readr) | 11 | library(readr) |
12 | library(stringr) | 12 | library(stringr) |
13 | 13 | ||
14 | 14 | ||
15 | #Have the user choose an original Dot file that they want to use | 15 | #Have the user choose an original Dot file that they want to use |
16 | DotFile <- file.choose() | 16 | DotFile <- file.choose() |
17 | TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | 17 | TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% |
18 | dplyr::filter(!grepl("->|[{}]",X1)) %>% | 18 | dplyr::filter(!grepl("->|[{}]",X1)) %>% |
19 | dplyr::filter(!grepl("Banjo",X1)) %>% | 19 | dplyr::filter(!grepl("Banjo",X1)) %>% |
20 | dplyr::filter(!grepl("labeljust",X1)) | 20 | dplyr::filter(!grepl("labeljust",X1)) |
21 | counterP1 <- 1 | 21 | counterP1 <- 1 |
22 | sizeDotP1 <- dim(TheDotP1)[1] | 22 | sizeDotP1 <- dim(TheDotP1)[1] |
23 | NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) | 23 | NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) |
24 | for(counterP1 in 1:sizeDotP1){ | 24 | for(counterP1 in 1:sizeDotP1){ |
25 | coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% | 25 | coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% |
26 | as.character(.,stringsAsFactors = FALSE) | 26 | as.character(.,stringsAsFactors = FALSE) |
27 | if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ | 27 | if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ |
28 | NumberP1 <- strsplit(coldataP1," ") %>% | 28 | NumberP1 <- strsplit(coldataP1," ") %>% |
29 | .[[1]]%>% | 29 | .[[1]]%>% |
30 | .[1] | 30 | .[1] |
31 | VarNameP1 <- strsplit(coldataP1," ") %>% | 31 | VarNameP1 <- strsplit(coldataP1," ") %>% |
32 | .[[1]] %>% | 32 | .[[1]] %>% |
33 | .[2] %>% | 33 | .[2] %>% |
34 | strsplit(.,"\"") %>% | 34 | strsplit(.,"\"") %>% |
35 | .[[1]] %>% | 35 | .[[1]] %>% |
36 | .[grep("^\\w|^\\d",.)] | 36 | .[grep("^\\w|^\\d",.)] |
37 | NewDotP1[counterP1,1] <- VarNameP1 | 37 | NewDotP1[counterP1,1] <- VarNameP1 |
38 | NewDotP1[counterP1,2] <- NumberP1 | 38 | NewDotP1[counterP1,2] <- NumberP1 |
39 | } | 39 | } |
40 | if(grepl("->",coldataP1) == TRUE){ | 40 | if(grepl("->",coldataP1) == TRUE){ |
41 | break | 41 | break |
42 | } | 42 | } |
43 | } | 43 | } |
44 | 44 | ||
45 | 45 | ||
46 | TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | 46 | TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% |
47 | dplyr::filter(grepl("->",X1)) | 47 | dplyr::filter(grepl("->",X1)) |
48 | counterP2 <- 1 | 48 | counterP2 <- 1 |
49 | sizeDotP2 <- dim(TheDotP2)[1] | 49 | sizeDotP2 <- dim(TheDotP2)[1] |
50 | NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) | 50 | NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) |
51 | for(counterP2 in 1:sizeDotP2){ | 51 | for(counterP2 in 1:sizeDotP2){ |
52 | coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% | 52 | coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% |
53 | as.character(.,stringsAsFactors = FALSE) | 53 | as.character(.,stringsAsFactors = FALSE) |
54 | ParentNumP2 <- strsplit(coldataP2,"->") %>% | 54 | ParentNumP2 <- strsplit(coldataP2,"->") %>% |
55 | .[[1]]%>% | 55 | .[[1]]%>% |
56 | .[1] | 56 | .[1] |
57 | ChildNumP2 <- strsplit(coldataP2,"->") %>% | 57 | ChildNumP2 <- strsplit(coldataP2,"->") %>% |
58 | .[[1]] %>% | 58 | .[[1]] %>% |
59 | .[2] %>% | 59 | .[2] %>% |
60 | strsplit(.,";") %>% | 60 | strsplit(.,";") %>% |
61 | .[[1]] %>% | 61 | .[[1]] %>% |
62 | .[1] | 62 | .[1] |
63 | NewDotP2[counterP2,1] <- ParentNumP2 | 63 | NewDotP2[counterP2,1] <- ParentNumP2 |
64 | NewDotP2[counterP2,2] <- ChildNumP2 | 64 | NewDotP2[counterP2,2] <- ChildNumP2 |
65 | } | 65 | } |
66 | 66 | ||
67 | colnames(NewDotP2) <- c("Parents","Children") | 67 | colnames(NewDotP2) <- c("Parents","Children") |
68 | 68 | ||
69 | #Matching numbers to variable names | 69 | #Matching numbers to variable names |
70 | NewDotP2_2 <- NewDotP2 | 70 | NewDotP2_2 <- NewDotP2 |
71 | for(i in 1:sizeDotP1){ | 71 | for(i in 1:sizeDotP1){ |
72 | #Where is the variable located within NewDotP2 (column one only)? | 72 | #Where is the variable located within NewDotP2 (column one only)? |
73 | chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) | 73 | chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) |
74 | if(is.na(sum(chngreq)) == FALSE){ | 74 | if(is.na(sum(chngreq)) == FALSE){ |
75 | if(sum(chngreq) > 0){ | 75 | if(sum(chngreq) > 0){ |
76 | NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) | 76 | NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) |
77 | } | 77 | } |
78 | } | 78 | } |
79 | i <- i + 1 | 79 | i <- i + 1 |
80 | } | 80 | } |
81 | NewDotP2_2 | 81 | NewDotP2_2 |
82 | for(j in 1:sizeDotP1){ | 82 | for(j in 1:sizeDotP1){ |
83 | #Where is the variable located within NewDotP2 (column two only)? | 83 | #Where is the variable located within NewDotP2 (column two only)? |
84 | chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) | 84 | chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) |
85 | if(is.na(sum(chngreq)) == FALSE){ | 85 | if(is.na(sum(chngreq)) == FALSE){ |
86 | if(sum(chngreq) > 0){ | 86 | if(sum(chngreq) > 0){ |
87 | NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) | 87 | NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) |
88 | } | 88 | } |
89 | } | 89 | } |
90 | j <- j + 1 | 90 | j <- j + 1 |
91 | } | 91 | } |
92 | 92 | ||
93 | lrgMarkov <- dim(NewDotP2_2)[1] | 93 | lrgMarkov <- dim(NewDotP2_2)[1] |
94 | MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ | 94 | Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ |
95 | #Finding the Parents and Children | 95 | #Finding the Parents and Children |
96 | d <- 1 | 96 | d <- 1 |
97 | AllNamList1 <- vector("list",length = 3) | 97 | AllNamList1 <- vector("list",length = 3) |
98 | #AllNamList <- vector("list", length = 3) | 98 | #AllNamList <- vector("list", length = 3) |
99 | ##Finding the Parents of the Parents | 99 | ##Finding the Parents of the Parents |
100 | ##A list of lists | 100 | ##A list of lists |
101 | ##outer set by the degree of the Markov blanket | 101 | ##outer set by the degree of the Markov blanket |
102 | AllVarList <- vector("list",length = 3) | 102 | AllVarList <- vector("list",length = 3) |
103 | for(d in 1:MarkovDegree){ | 103 | for(d in 1:MarkovDegree){ |
104 | colnames(NewDotP2_2) <- NULL | 104 | colnames(NewDotP2_2) <- NULL |
105 | ##Which variable are you looking for? | 105 | ##Which variable are you looking for? |
106 | ##This is the VariableEndName | 106 | ##This is the VariableEndName |
107 | if(d == 1){ | 107 | if(d == 1){ |
108 | ##Finding the Parents for the variable | 108 | ##Finding the Parents for the variable |
109 | LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) | 109 | LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) |
110 | PofVar <- NewDotP2_2[LocPofVar,1] | 110 | PofVar <- NewDotP2_2[LocPofVar,1] |
111 | AllNamList1[[1]] <- PofVar | 111 | AllNamList1[[1]] <- PofVar |
112 | 112 | ||
113 | ##Finding the Children for the variable | 113 | ##Finding the Children for the variable |
114 | LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) | 114 | LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) |
115 | CofVar <- NewDotP2_2[LocCofVar,2] | 115 | CofVar <- NewDotP2_2[LocCofVar,2] |
116 | AllNamList1[[2]] <- CofVar | 116 | AllNamList1[[2]] <- CofVar |
117 | 117 | ||
118 | ##Finding the Co-Parents of the Children for the variable | 118 | ##Finding the Co-Parents of the Children for the variable |
119 | NumofChild <- length(CofVar) | 119 | NumofChild <- length(CofVar) |
120 | if(NumofChild > 0){ | 120 | if(NumofChild > 0){ |
121 | ##Creating a list of the Co-Parents for each of the children | 121 | ##Creating a list of the Co-Parents for each of the children |
122 | ##list size is based on the amount of Children | 122 | ##list size is based on the amount of Children |
123 | COPlist <- vector("character", length = 0) | 123 | COPlist <- vector("character", length = 0) |
124 | nc <- 1 | 124 | nc <- 1 |
125 | for(nc in 1:NumofChild){ | 125 | for(nc in 1:NumofChild){ |
126 | LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) | 126 | LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) |
127 | COPofVar <- NewDotP2_2[LocCOPofVar,1] | 127 | COPofVar <- NewDotP2_2[LocCOPofVar,1] |
128 | if(grepl(COPofVar,VariableStartName)){ | 128 | if(sum(grepl(VariableStartName,COPofVar)) >= 1){ |
129 | next | 129 | #positions of variable start name within the vector of co parents |
130 | posoforig <- grep(VariableStartName,COPofVar) | ||
131 | COPofVar <- COPofVar[-posoforig] | ||
132 | COPlist <- append(COPlist,COPofVar) | ||
130 | } else{ | 133 | } else{ |
131 | #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] | 134 | #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] |
132 | COPlist <- append(COPlist,COPofVar) | 135 | COPlist <- append(COPlist,COPofVar) |
133 | } | 136 | } |
134 | nc <- nc + 1 | 137 | nc <- nc + 1 |
135 | } | 138 | } |
136 | } else { | 139 | } else { |
137 | ##Making COPlist empty | 140 | ##Making COPlist empty |
138 | COPlist <- vector("character",length = 0) | 141 | COPlist <- vector("character",length = 0) |
139 | } | 142 | } |
140 | AllNamList1[[3]] <- COPlist | 143 | AllNamList1[[3]] <- COPlist |
141 | AllVarList[[1]] <- AllNamList1 | 144 | AllVarList[[1]] <- AllNamList1 |
142 | } else if(d > 1){ | 145 | } else if(d > 1){ |
143 | ##inner set by the length of the previous AllVarlist we are working on | 146 | ##inner set by the length of the previous AllVarlist we are working on |
144 | lPreVList <- length(AllVarList[[d-1]]) | 147 | lPreVList <- length(AllVarList[[d-1]]) |
145 | ef <- 1 | 148 | ef <- 1 |
146 | ##PCCP will eventually equal the total size that we expect for the iteration (#d) | 149 | ##PCCP will eventually equal the total size that we expect for the iteration (#d) |
147 | ## which is just lPreVList * 3 | 150 | ## which is just lPreVList * 3 |
148 | PCCP <- 1 | 151 | PCCP <- 1 |
149 | newsize <- (lPreVList * 3) | 152 | newsize <- (lPreVList * 3) |
150 | if(d > 3){ | 153 | if(d > 3){ |
151 | AllVarList[[d]] <- vector("list") | 154 | AllVarList[[d]] <- vector("list") |
152 | } | 155 | } |
153 | for(ef in 1:lPreVList){ | 156 | for(ef in 1:lPreVList){ |
154 | ##Finding the Parents | 157 | ##Finding the Parents |
155 | NumofVars <- length(AllVarList[[d-1]][[ef]]) | 158 | NumofVars <- length(AllVarList[[d-1]][[ef]]) |
156 | if(NumofVars > 0){ | 159 | if(NumofVars > 0){ |
157 | ##Creating a list of the Parents for each of the Variables | 160 | ##Creating a list of the Parents for each of the Variables |
158 | ##list size is based on the amount of Previous Variables | 161 | ##list size is based on the amount of Previous Variables |
159 | PofVlist <- vector("character", length = 0) | 162 | PofVlist <- vector("character", length = 0) |
160 | np <- 1 | 163 | np <- 1 |
161 | for(np in 1:NumofVars){ | 164 | for(np in 1:NumofVars){ |
162 | LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) | 165 | LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) |
163 | PofVar <- NewDotP2_2[LocPofVar,1] | 166 | PofVar <- NewDotP2_2[LocPofVar,1] |
164 | PofVlist <- append(PofVlist,PofVar) | 167 | PofVlist <- append(PofVlist,PofVar) |
165 | np <- np + 1 | 168 | np <- np + 1 |
166 | } | 169 | } |
167 | } else { | 170 | } else { |
168 | ##Making COPlist empty | 171 | ##Making COPlist empty |
169 | PofVlist <- vector("character",length = 0) | 172 | PofVlist <- vector("character",length = 0) |
170 | } | 173 | } |
171 | AllVarList[[d]][[PCCP]] <- PofVlist | 174 | AllVarList[[d]][[PCCP]] <- PofVlist |
172 | PCCP <- PCCP + 1 | 175 | PCCP <- PCCP + 1 |
173 | 176 | ||
174 | ##Finding the Children | 177 | ##Finding the Children |
175 | if(NumofVars > 0){ | 178 | if(NumofVars > 0){ |
176 | ##Creating a list of the Children for each of the Previous Parents | 179 | ##Creating a list of the Children for each of the Previous Parents |
177 | ##list size is based on the amount of Previous Parents | 180 | ##list size is based on the amount of Previous Parents |
178 | CofVlist <- vector("character", length = 0) | 181 | CofVlist <- vector("character", length = 0) |
179 | np <- 1 | 182 | np <- 1 |
180 | for(np in 1:NumofVars){ | 183 | for(np in 1:NumofVars){ |
181 | LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) | 184 | LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) |
182 | CofVar <- NewDotP2_2[LocCofVar,2] | 185 | CofVar <- NewDotP2_2[LocCofVar,2] |
183 | #if(grepl(VariableStartName,CofVar)){ | 186 | #if(sum(grepl(VariableStartName,CofVar)) >= 1){ |
184 | # next | 187 | # #positions of variable start name within the vector of co parents |
188 | # posoforig <- grep(VariableStartName,COPofVar) | ||
189 | # COPofVar <- COPofVar[-posoforig] | ||
190 | # COPlist <- append(COPlist,COPofVar) | ||
185 | #} else{ | 191 | #} else{ |
186 | CofVlist <- append(CofVlist,CofVar) | 192 | CofVlist <- append(CofVlist,CofVar) |
187 | #} | 193 | #} |
188 | np <- np + 1 | 194 | np <- np + 1 |
189 | } | 195 | } |
190 | } else { | 196 | } else { |
191 | ##Making CofPlist empty | 197 | ##Making CofPlist empty |
192 | CofVlist <- vector("character",length = 0) | 198 | CofVlist <- vector("character",length = 0) |
193 | } | 199 | } |
194 | AllVarList[[d]][[PCCP]] <- CofVlist | 200 | AllVarList[[d]][[PCCP]] <- CofVlist |
195 | PCCP <- PCCP + 1 | 201 | PCCP <- PCCP + 1 |
196 | 202 | ||
197 | ##Finding the Co-Parents | 203 | ##Finding the Co-Parents |
198 | NumofCVars <- length(CofVlist) | 204 | NumofCVars <- length(CofVlist) |
199 | if(NumofCVars > 0){ | 205 | if(NumofCVars > 0){ |
200 | ncp <- 1 | 206 | ncp <- 1 |
201 | CPofClist <- vector("character",length = 0) | 207 | CPofClist <- vector("character",length = 0) |
202 | for(ncp in 1:NumofCVars){ | 208 | for(ncp in 1:NumofCVars){ |
203 | LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) | 209 | LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) |
204 | CPofCVar <- NewDotP2_2[LocCPofCVar,1] | 210 | CPofCVar <- NewDotP2_2[LocCPofCVar,1] |
205 | CPofClist <- append(CPofClist,CPofCVar) | 211 | #if(sum(grepl(,CPofCVar)) >= 1){ |
212 | # #positions of variable start name within the vector of co parents | ||
213 | # posoforig <- grep(VariableStartName,COPofVar) | ||
214 | # COPofVar <- COPofVar[-posoforig] | ||
215 | # COPlist <- append(COPlist,COPofVar) | ||
216 | #} else{ | ||
217 | CPofClist <- append(CPofClist,CPofCVar) | ||
218 | #} | ||
206 | ncp <- ncp + 1 | 219 | ncp <- ncp + 1 |
207 | } | 220 | } |
208 | 221 | ||
209 | } else { | 222 | } else { |
210 | ##Making COPlist empty | 223 | ##Making COPlist empty |
211 | CPofClist <- vector("character",length = 0) | 224 | CPofClist <- vector("character",length = 0) |
212 | } | 225 | } |
213 | AllVarList[[d]][[PCCP]] <- CPofClist | 226 | AllVarList[[d]][[PCCP]] <- CPofClist |
214 | PCCP <- PCCP + 1 | 227 | PCCP <- PCCP + 1 |
215 | ef <- ef + 1 | 228 | ef <- ef + 1 |
216 | } | 229 | } |
217 | } | 230 | } |
218 | ##Stop if you have found the VariableEndName value | 231 | ##Stop if you have found the VariableEndName value |
219 | if(sum(grepl(VariableEndName,AllVarList)) > 0){ | 232 | if(sum(grepl(VariableEndName,AllVarList)) > 0){ |
220 | break | 233 | break |
221 | } | 234 | } |
222 | d <- d + 1 | 235 | d <- d + 1 |
223 | } | 236 | } |
224 | ##The Markov Degree is that found below | 237 | ##The Markov Degree is that found below |
225 | d | 238 | d |
226 | } | 239 | } |
227 | #Now use the command MBlanky() with the appropriate settings |