Commit eb003daf3f0f341bdd9abd261c94ca4a1339cd58
1 parent
556b97bfaf
Exists in
master
A yet untested code
Showing
1 changed file
with
227 additions
and
0 deletions
Show diff stats
RMarkovBlanket.r
File was created | 1 | #Efrain Gonzalez | |
2 | #7/25/2017 | ||
3 | #Code for Markov Blanket | ||
4 | |||
5 | |||
6 | #The required libraries | ||
7 | library(pryr) | ||
8 | library(MASS) | ||
9 | library(dplyr) | ||
10 | library(tidyr) | ||
11 | library(readr) | ||
12 | library(stringr) | ||
13 | |||
14 | |||
15 | #Have the user choose an original Dot file that they want to use | ||
16 | DotFile <- file.choose() | ||
17 | TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | ||
18 | dplyr::filter(!grepl("->|[{}]",X1)) %>% | ||
19 | dplyr::filter(!grepl("Banjo",X1)) %>% | ||
20 | dplyr::filter(!grepl("labeljust",X1)) | ||
21 | counterP1 <- 1 | ||
22 | sizeDotP1 <- dim(TheDotP1)[1] | ||
23 | NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) | ||
24 | for(counterP1 in 1:sizeDotP1){ | ||
25 | coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% | ||
26 | as.character(.,stringsAsFactors = FALSE) | ||
27 | if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ | ||
28 | NumberP1 <- strsplit(coldataP1," ") %>% | ||
29 | .[[1]]%>% | ||
30 | .[1] | ||
31 | VarNameP1 <- strsplit(coldataP1," ") %>% | ||
32 | .[[1]] %>% | ||
33 | .[2] %>% | ||
34 | strsplit(.,"\"") %>% | ||
35 | .[[1]] %>% | ||
36 | .[grep("^\\w|^\\d",.)] | ||
37 | NewDotP1[counterP1,1] <- VarNameP1 | ||
38 | NewDotP1[counterP1,2] <- NumberP1 | ||
39 | } | ||
40 | if(grepl("->",coldataP1) == TRUE){ | ||
41 | break | ||
42 | } | ||
43 | } | ||
44 | |||
45 | |||
46 | TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% | ||
47 | dplyr::filter(grepl("->",X1)) | ||
48 | counterP2 <- 1 | ||
49 | sizeDotP2 <- dim(TheDotP2)[1] | ||
50 | NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) | ||
51 | for(counterP2 in 1:sizeDotP2){ | ||
52 | coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% | ||
53 | as.character(.,stringsAsFactors = FALSE) | ||
54 | ParentNumP2 <- strsplit(coldataP2,"->") %>% | ||
55 | .[[1]]%>% | ||
56 | .[1] | ||
57 | ChildNumP2 <- strsplit(coldataP2,"->") %>% | ||
58 | .[[1]] %>% | ||
59 | .[2] %>% | ||
60 | strsplit(.,";") %>% | ||
61 | .[[1]] %>% | ||
62 | .[1] | ||
63 | NewDotP2[counterP2,1] <- ParentNumP2 | ||
64 | NewDotP2[counterP2,2] <- ChildNumP2 | ||
65 | } | ||
66 | |||
67 | colnames(NewDotP2) <- c("Parents","Children") | ||
68 | |||
69 | #Matching numbers to variable names | ||
70 | NewDotP2_2 <- NewDotP2 | ||
71 | for(i in 1:sizeDotP1){ | ||
72 | #Where is the variable located within NewDotP2 (column one only)? | ||
73 | chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) | ||
74 | if(is.na(sum(chngreq)) == FALSE){ | ||
75 | if(sum(chngreq) > 0){ | ||
76 | NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) | ||
77 | } | ||
78 | } | ||
79 | i <- i + 1 | ||
80 | } | ||
81 | NewDotP2_2 | ||
82 | for(j in 1:sizeDotP1){ | ||
83 | #Where is the variable located within NewDotP2 (column two only)? | ||
84 | chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) | ||
85 | if(is.na(sum(chngreq)) == FALSE){ | ||
86 | if(sum(chngreq) > 0){ | ||
87 | NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) | ||
88 | } | ||
89 | } | ||
90 | j <- j + 1 | ||
91 | } | ||
92 | |||
93 | lrgMarkov <- dim(NewDotP2_2)[1] | ||
94 | MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ | ||
95 | #Finding the Parents and Children | ||
96 | d <- 1 | ||
97 | AllNamList1 <- vector("list",length = 3) | ||
98 | #AllNamList <- vector("list", length = 3) | ||
99 | ##Finding the Parents of the Parents | ||
100 | ##A list of lists | ||
101 | ##outer set by the degree of the Markov blanket | ||
102 | AllVarList <- vector("list",length = 3) | ||
103 | for(d in 1:MarkovDegree){ | ||
104 | colnames(NewDotP2_2) <- NULL | ||
105 | ##Which variable are you looking for? | ||
106 | ##This is the VariableEndName | ||
107 | if(d == 1){ | ||
108 | ##Finding the Parents for the variable | ||
109 | LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) | ||
110 | PofVar <- NewDotP2_2[LocPofVar,1] | ||
111 | AllNamList1[[1]] <- PofVar | ||
112 | |||
113 | ##Finding the Children for the variable | ||
114 | LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) | ||
115 | CofVar <- NewDotP2_2[LocCofVar,2] | ||
116 | AllNamList1[[2]] <- CofVar | ||
117 | |||
118 | ##Finding the Co-Parents of the Children for the variable | ||
119 | NumofChild <- length(CofVar) | ||
120 | if(NumofChild > 0){ | ||
121 | ##Creating a list of the Co-Parents for each of the children | ||
122 | ##list size is based on the amount of Children | ||
123 | COPlist <- vector("character", length = 0) | ||
124 | nc <- 1 | ||
125 | for(nc in 1:NumofChild){ | ||
126 | LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) | ||
127 | COPofVar <- NewDotP2_2[LocCOPofVar,1] | ||
128 | if(grepl(COPofVar,VariableStartName)){ | ||
129 | next | ||
130 | } else{ | ||
131 | #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] | ||
132 | COPlist <- append(COPlist,COPofVar) | ||
133 | } | ||
134 | nc <- nc + 1 | ||
135 | } | ||
136 | } else { | ||
137 | ##Making COPlist empty | ||
138 | COPlist <- vector("character",length = 0) | ||
139 | } | ||
140 | AllNamList1[[3]] <- COPlist | ||
141 | AllVarList[[1]] <- AllNamList1 | ||
142 | } else if(d > 1){ | ||
143 | ##inner set by the length of the previous AllVarlist we are working on | ||
144 | lPreVList <- length(AllVarList[[d-1]]) | ||
145 | ef <- 1 | ||
146 | ##PCCP will eventually equal the total size that we expect for the iteration (#d) | ||
147 | ## which is just lPreVList * 3 | ||
148 | PCCP <- 1 | ||
149 | newsize <- (lPreVList * 3) | ||
150 | if(d > 3){ | ||
151 | AllVarList[[d]] <- vector("list") | ||
152 | } | ||
153 | for(ef in 1:lPreVList){ | ||
154 | ##Finding the Parents | ||
155 | NumofVars <- length(AllVarList[[d-1]][[ef]]) | ||
156 | if(NumofVars > 0){ | ||
157 | ##Creating a list of the Parents for each of the Variables | ||
158 | ##list size is based on the amount of Previous Variables | ||
159 | PofVlist <- vector("character", length = 0) | ||
160 | np <- 1 | ||
161 | for(np in 1:NumofVars){ | ||
162 | LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) | ||
163 | PofVar <- NewDotP2_2[LocPofVar,1] | ||
164 | PofVlist <- append(PofVlist,PofVar) | ||
165 | np <- np + 1 | ||
166 | } | ||
167 | } else { | ||
168 | ##Making COPlist empty | ||
169 | PofVlist <- vector("character",length = 0) | ||
170 | } | ||
171 | AllVarList[[d]][[PCCP]] <- PofVlist | ||
172 | PCCP <- PCCP + 1 | ||
173 | |||
174 | ##Finding the Children | ||
175 | if(NumofVars > 0){ | ||
176 | ##Creating a list of the Children for each of the Previous Parents | ||
177 | ##list size is based on the amount of Previous Parents | ||
178 | CofVlist <- vector("character", length = 0) | ||
179 | np <- 1 | ||
180 | for(np in 1:NumofVars){ | ||
181 | LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) | ||
182 | CofVar <- NewDotP2_2[LocCofVar,2] | ||
183 | #if(grepl(VariableStartName,CofVar)){ | ||
184 | # next | ||
185 | #} else{ | ||
186 | CofVlist <- append(CofVlist,CofVar) | ||
187 | #} | ||
188 | np <- np + 1 | ||
189 | } | ||
190 | } else { | ||
191 | ##Making CofPlist empty | ||
192 | CofVlist <- vector("character",length = 0) | ||
193 | } | ||
194 | AllVarList[[d]][[PCCP]] <- CofVlist | ||
195 | PCCP <- PCCP + 1 | ||
196 | |||
197 | ##Finding the Co-Parents | ||
198 | NumofCVars <- length(CofVlist) | ||
199 | if(NumofCVars > 0){ | ||
200 | ncp <- 1 | ||
201 | CPofClist <- vector("character",length = 0) | ||
202 | for(ncp in 1:NumofCVars){ | ||
203 | LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) | ||
204 | CPofCVar <- NewDotP2_2[LocCPofCVar,1] | ||
205 | CPofClist <- append(CPofClist,CPofCVar) | ||
206 | ncp <- ncp + 1 | ||
207 | } | ||
208 | |||
209 | } else { | ||
210 | ##Making COPlist empty | ||
211 | CPofClist <- vector("character",length = 0) | ||
212 | } | ||
213 | AllVarList[[d]][[PCCP]] <- CPofClist | ||
214 | PCCP <- PCCP + 1 | ||
215 | ef <- ef + 1 | ||
216 | } | ||
217 | } | ||
218 | ##Stop if you have found the VariableEndName value | ||
219 | if(sum(grepl(VariableEndName,AllVarList)) > 0){ | ||
220 | break | ||
221 | } | ||
222 | d <- d + 1 | ||
223 | } | ||
224 | ##The Markov Degree is that found below | ||
225 | d | ||
226 | } | ||
227 | #Now use the command MBlanky() with the appropriate settings |