Commit 8f1c6201bb5cf6f1c0432bb580ee6422e468030c

Authored by Efrain Gonzalez
1 parent 4d40f27465
Exists in master

Updated Version

Showing 1 changed file with 21 additions and 9 deletions   Show diff stats
1 #Efrain Gonzalez 1 #Efrain Gonzalez
2 #7/25/2017 2 #8/25/2017
3 #Code for Markov Blanket 3 #Code for Markov Blanket
4 4
5 5
6 #The required libraries 6 #The required libraries
7 library(pryr) 7 library(pryr)
8 library(MASS) 8 library(MASS)
9 library(dplyr) 9 library(dplyr)
10 library(tidyr) 10 library(tidyr)
11 library(readr) 11 library(readr)
12 library(stringr) 12 library(stringr)
13 13
14 14
15 #Have the user choose an original Dot file that they want to use 15 #Have the user choose an original Dot file that they want to use
16 DotFile <- file.choose() 16 DotFile <- file.choose()
17 TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% 17 TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
18 dplyr::filter(!grepl("->|[{}]",X1)) %>% 18 dplyr::filter(!grepl("->|[{}]",X1)) %>%
19 dplyr::filter(!grepl("Banjo",X1)) %>% 19 dplyr::filter(!grepl("Banjo",X1)) %>%
20 dplyr::filter(!grepl("labeljust",X1)) 20 dplyr::filter(!grepl("labeljust",X1))
21 counterP1 <- 1 21 counterP1 <- 1
22 sizeDotP1 <- dim(TheDotP1)[1] 22 sizeDotP1 <- dim(TheDotP1)[1]
23 NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1) 23 NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1)
24 for(counterP1 in 1:sizeDotP1){ 24 for(counterP1 in 1:sizeDotP1){
25 coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>% 25 coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>%
26 as.character(.,stringsAsFactors = FALSE) 26 as.character(.,stringsAsFactors = FALSE)
27 if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){ 27 if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){
28 NumberP1 <- strsplit(coldataP1," ") %>% 28 NumberP1 <- strsplit(coldataP1," ") %>%
29 .[[1]]%>% 29 .[[1]]%>%
30 .[1] 30 .[1]
31 VarNameP1 <- strsplit(coldataP1," ") %>% 31 VarNameP1 <- strsplit(coldataP1," ") %>%
32 .[[1]] %>% 32 .[[1]] %>%
33 .[2] %>% 33 .[2] %>%
34 strsplit(.,"\"") %>% 34 strsplit(.,"\"") %>%
35 .[[1]] %>% 35 .[[1]] %>%
36 .[grep("^\\w|^\\d",.)] 36 .[grep("^\\w|^\\d",.)]
37 NewDotP1[counterP1,1] <- VarNameP1 37 NewDotP1[counterP1,1] <- VarNameP1
38 NewDotP1[counterP1,2] <- NumberP1 38 NewDotP1[counterP1,2] <- NumberP1
39 } 39 }
40 if(grepl("->",coldataP1) == TRUE){ 40 if(grepl("->",coldataP1) == TRUE){
41 break 41 break
42 } 42 }
43 } 43 }
44 44
45 45
46 TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>% 46 TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
47 dplyr::filter(grepl("->",X1)) 47 dplyr::filter(grepl("->",X1))
48 counterP2 <- 1 48 counterP2 <- 1
49 sizeDotP2 <- dim(TheDotP2)[1] 49 sizeDotP2 <- dim(TheDotP2)[1]
50 NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2) 50 NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2)
51 for(counterP2 in 1:sizeDotP2){ 51 for(counterP2 in 1:sizeDotP2){
52 coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>% 52 coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>%
53 as.character(.,stringsAsFactors = FALSE) 53 as.character(.,stringsAsFactors = FALSE)
54 ParentNumP2 <- strsplit(coldataP2,"->") %>% 54 ParentNumP2 <- strsplit(coldataP2,"->") %>%
55 .[[1]]%>% 55 .[[1]]%>%
56 .[1] 56 .[1]
57 ChildNumP2 <- strsplit(coldataP2,"->") %>% 57 ChildNumP2 <- strsplit(coldataP2,"->") %>%
58 .[[1]] %>% 58 .[[1]] %>%
59 .[2] %>% 59 .[2] %>%
60 strsplit(.,";") %>% 60 strsplit(.,";") %>%
61 .[[1]] %>% 61 .[[1]] %>%
62 .[1] 62 .[1]
63 NewDotP2[counterP2,1] <- ParentNumP2 63 NewDotP2[counterP2,1] <- ParentNumP2
64 NewDotP2[counterP2,2] <- ChildNumP2 64 NewDotP2[counterP2,2] <- ChildNumP2
65 } 65 }
66 66
67 colnames(NewDotP2) <- c("Parents","Children") 67 colnames(NewDotP2) <- c("Parents","Children")
68 68
69 #Matching numbers to variable names 69 #Matching numbers to variable names
70 NewDotP2_2 <- NewDotP2 70 NewDotP2_2 <- NewDotP2
71 for(i in 1:sizeDotP1){ 71 for(i in 1:sizeDotP1){
72 #Where is the variable located within NewDotP2 (column one only)? 72 #Where is the variable located within NewDotP2 (column one only)?
73 chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1]) 73 chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1])
74 if(is.na(sum(chngreq)) == FALSE){ 74 if(is.na(sum(chngreq)) == FALSE){
75 if(sum(chngreq) > 0){ 75 if(sum(chngreq) > 0){
76 NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) 76 NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1])
77 } 77 }
78 } 78 }
79 i <- i + 1 79 i <- i + 1
80 } 80 }
81 NewDotP2_2 81 NewDotP2_2
82 for(j in 1:sizeDotP1){ 82 for(j in 1:sizeDotP1){
83 #Where is the variable located within NewDotP2 (column two only)? 83 #Where is the variable located within NewDotP2 (column two only)?
84 chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2]) 84 chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2])
85 if(is.na(sum(chngreq)) == FALSE){ 85 if(is.na(sum(chngreq)) == FALSE){
86 if(sum(chngreq) > 0){ 86 if(sum(chngreq) > 0){
87 NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) 87 NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2])
88 } 88 }
89 } 89 }
90 j <- j + 1 90 j <- j + 1
91 } 91 }
92 92
93 lrgMarkov <- dim(NewDotP2_2)[1] 93 lrgMarkov <- dim(NewDotP2_2)[1]
94 MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ 94 Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){
95 #Finding the Parents and Children 95 #Finding the Parents and Children
96 d <- 1 96 d <- 1
97 AllNamList1 <- vector("list",length = 3) 97 AllNamList1 <- vector("list",length = 3)
98 #AllNamList <- vector("list", length = 3) 98 #AllNamList <- vector("list", length = 3)
99 ##Finding the Parents of the Parents 99 ##Finding the Parents of the Parents
100 ##A list of lists 100 ##A list of lists
101 ##outer set by the degree of the Markov blanket 101 ##outer set by the degree of the Markov blanket
102 AllVarList <- vector("list",length = 3) 102 AllVarList <- vector("list",length = 3)
103 for(d in 1:MarkovDegree){ 103 for(d in 1:MarkovDegree){
104 colnames(NewDotP2_2) <- NULL 104 colnames(NewDotP2_2) <- NULL
105 ##Which variable are you looking for? 105 ##Which variable are you looking for?
106 ##This is the VariableEndName 106 ##This is the VariableEndName
107 if(d == 1){ 107 if(d == 1){
108 ##Finding the Parents for the variable 108 ##Finding the Parents for the variable
109 LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) 109 LocPofVar <- grep(VariableStartName,NewDotP2_2[,2])
110 PofVar <- NewDotP2_2[LocPofVar,1] 110 PofVar <- NewDotP2_2[LocPofVar,1]
111 AllNamList1[[1]] <- PofVar 111 AllNamList1[[1]] <- PofVar
112 112
113 ##Finding the Children for the variable 113 ##Finding the Children for the variable
114 LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) 114 LocCofVar <- grep(VariableStartName,NewDotP2_2[,1])
115 CofVar <- NewDotP2_2[LocCofVar,2] 115 CofVar <- NewDotP2_2[LocCofVar,2]
116 AllNamList1[[2]] <- CofVar 116 AllNamList1[[2]] <- CofVar
117 117
118 ##Finding the Co-Parents of the Children for the variable 118 ##Finding the Co-Parents of the Children for the variable
119 NumofChild <- length(CofVar) 119 NumofChild <- length(CofVar)
120 if(NumofChild > 0){ 120 if(NumofChild > 0){
121 ##Creating a list of the Co-Parents for each of the children 121 ##Creating a list of the Co-Parents for each of the children
122 ##list size is based on the amount of Children 122 ##list size is based on the amount of Children
123 COPlist <- vector("character", length = 0) 123 COPlist <- vector("character", length = 0)
124 nc <- 1 124 nc <- 1
125 for(nc in 1:NumofChild){ 125 for(nc in 1:NumofChild){
126 LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) 126 LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2])
127 COPofVar <- NewDotP2_2[LocCOPofVar,1] 127 COPofVar <- NewDotP2_2[LocCOPofVar,1]
128 if(grepl(COPofVar,VariableStartName)){ 128 if(sum(grepl(VariableStartName,COPofVar)) >= 1){
129 next 129 #positions of variable start name within the vector of co parents
130 posoforig <- grep(VariableStartName,COPofVar)
131 COPofVar <- COPofVar[-posoforig]
132 COPlist <- append(COPlist,COPofVar)
130 } else{ 133 } else{
131 #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] 134 #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName]
132 COPlist <- append(COPlist,COPofVar) 135 COPlist <- append(COPlist,COPofVar)
133 } 136 }
134 nc <- nc + 1 137 nc <- nc + 1
135 } 138 }
136 } else { 139 } else {
137 ##Making COPlist empty 140 ##Making COPlist empty
138 COPlist <- vector("character",length = 0) 141 COPlist <- vector("character",length = 0)
139 } 142 }
140 AllNamList1[[3]] <- COPlist 143 AllNamList1[[3]] <- COPlist
141 AllVarList[[1]] <- AllNamList1 144 AllVarList[[1]] <- AllNamList1
142 } else if(d > 1){ 145 } else if(d > 1){
143 ##inner set by the length of the previous AllVarlist we are working on 146 ##inner set by the length of the previous AllVarlist we are working on
144 lPreVList <- length(AllVarList[[d-1]]) 147 lPreVList <- length(AllVarList[[d-1]])
145 ef <- 1 148 ef <- 1
146 ##PCCP will eventually equal the total size that we expect for the iteration (#d) 149 ##PCCP will eventually equal the total size that we expect for the iteration (#d)
147 ## which is just lPreVList * 3 150 ## which is just lPreVList * 3
148 PCCP <- 1 151 PCCP <- 1
149 newsize <- (lPreVList * 3) 152 newsize <- (lPreVList * 3)
150 if(d > 3){ 153 if(d > 3){
151 AllVarList[[d]] <- vector("list") 154 AllVarList[[d]] <- vector("list")
152 } 155 }
153 for(ef in 1:lPreVList){ 156 for(ef in 1:lPreVList){
154 ##Finding the Parents 157 ##Finding the Parents
155 NumofVars <- length(AllVarList[[d-1]][[ef]]) 158 NumofVars <- length(AllVarList[[d-1]][[ef]])
156 if(NumofVars > 0){ 159 if(NumofVars > 0){
157 ##Creating a list of the Parents for each of the Variables 160 ##Creating a list of the Parents for each of the Variables
158 ##list size is based on the amount of Previous Variables 161 ##list size is based on the amount of Previous Variables
159 PofVlist <- vector("character", length = 0) 162 PofVlist <- vector("character", length = 0)
160 np <- 1 163 np <- 1
161 for(np in 1:NumofVars){ 164 for(np in 1:NumofVars){
162 LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) 165 LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2])
163 PofVar <- NewDotP2_2[LocPofVar,1] 166 PofVar <- NewDotP2_2[LocPofVar,1]
164 PofVlist <- append(PofVlist,PofVar) 167 PofVlist <- append(PofVlist,PofVar)
165 np <- np + 1 168 np <- np + 1
166 } 169 }
167 } else { 170 } else {
168 ##Making COPlist empty 171 ##Making COPlist empty
169 PofVlist <- vector("character",length = 0) 172 PofVlist <- vector("character",length = 0)
170 } 173 }
171 AllVarList[[d]][[PCCP]] <- PofVlist 174 AllVarList[[d]][[PCCP]] <- PofVlist
172 PCCP <- PCCP + 1 175 PCCP <- PCCP + 1
173 176
174 ##Finding the Children 177 ##Finding the Children
175 if(NumofVars > 0){ 178 if(NumofVars > 0){
176 ##Creating a list of the Children for each of the Previous Parents 179 ##Creating a list of the Children for each of the Previous Parents
177 ##list size is based on the amount of Previous Parents 180 ##list size is based on the amount of Previous Parents
178 CofVlist <- vector("character", length = 0) 181 CofVlist <- vector("character", length = 0)
179 np <- 1 182 np <- 1
180 for(np in 1:NumofVars){ 183 for(np in 1:NumofVars){
181 LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) 184 LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1])
182 CofVar <- NewDotP2_2[LocCofVar,2] 185 CofVar <- NewDotP2_2[LocCofVar,2]
183 #if(grepl(VariableStartName,CofVar)){ 186 #if(sum(grepl(VariableStartName,CofVar)) >= 1){
184 # next 187 # #positions of variable start name within the vector of co parents
188 # posoforig <- grep(VariableStartName,COPofVar)
189 # COPofVar <- COPofVar[-posoforig]
190 # COPlist <- append(COPlist,COPofVar)
185 #} else{ 191 #} else{
186 CofVlist <- append(CofVlist,CofVar) 192 CofVlist <- append(CofVlist,CofVar)
187 #} 193 #}
188 np <- np + 1 194 np <- np + 1
189 } 195 }
190 } else { 196 } else {
191 ##Making CofPlist empty 197 ##Making CofPlist empty
192 CofVlist <- vector("character",length = 0) 198 CofVlist <- vector("character",length = 0)
193 } 199 }
194 AllVarList[[d]][[PCCP]] <- CofVlist 200 AllVarList[[d]][[PCCP]] <- CofVlist
195 PCCP <- PCCP + 1 201 PCCP <- PCCP + 1
196 202
197 ##Finding the Co-Parents 203 ##Finding the Co-Parents
198 NumofCVars <- length(CofVlist) 204 NumofCVars <- length(CofVlist)
199 if(NumofCVars > 0){ 205 if(NumofCVars > 0){
200 ncp <- 1 206 ncp <- 1
201 CPofClist <- vector("character",length = 0) 207 CPofClist <- vector("character",length = 0)
202 for(ncp in 1:NumofCVars){ 208 for(ncp in 1:NumofCVars){
203 LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) 209 LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2])
204 CPofCVar <- NewDotP2_2[LocCPofCVar,1] 210 CPofCVar <- NewDotP2_2[LocCPofCVar,1]
205 CPofClist <- append(CPofClist,CPofCVar) 211 #if(sum(grepl(,CPofCVar)) >= 1){
212 # #positions of variable start name within the vector of co parents
213 # posoforig <- grep(VariableStartName,COPofVar)
214 # COPofVar <- COPofVar[-posoforig]
215 # COPlist <- append(COPlist,COPofVar)
216 #} else{
217 CPofClist <- append(CPofClist,CPofCVar)
218 #}
206 ncp <- ncp + 1 219 ncp <- ncp + 1
207 } 220 }
208 221
209 } else { 222 } else {
210 ##Making COPlist empty 223 ##Making COPlist empty
211 CPofClist <- vector("character",length = 0) 224 CPofClist <- vector("character",length = 0)
212 } 225 }
213 AllVarList[[d]][[PCCP]] <- CPofClist 226 AllVarList[[d]][[PCCP]] <- CPofClist
214 PCCP <- PCCP + 1 227 PCCP <- PCCP + 1
215 ef <- ef + 1 228 ef <- ef + 1
216 } 229 }
217 } 230 }
218 ##Stop if you have found the VariableEndName value 231 ##Stop if you have found the VariableEndName value
219 if(sum(grepl(VariableEndName,AllVarList)) > 0){ 232 if(sum(grepl(VariableEndName,AllVarList)) > 0){
220 break 233 break
221 } 234 }
222 d <- d + 1 235 d <- d + 1
223 } 236 }
224 ##The Markov Degree is that found below 237 ##The Markov Degree is that found below
225 d 238 d
226 } 239 }
227 #Now use the command MBlanky() with the appropriate settings