Commit eb003daf3f0f341bdd9abd261c94ca4a1339cd58

Authored by Efrain Gonzalez
1 parent 556b97bfaf
Exists in master

A yet untested code

Showing 1 changed file with 227 additions and 0 deletions   Show diff stats
File was created 1 #Efrain Gonzalez
2 #7/25/2017
3 #Code for Markov Blanket
4
5
6 #The required libraries
7 library(pryr)
8 library(MASS)
9 library(dplyr)
10 library(tidyr)
11 library(readr)
12 library(stringr)
13
14
15 #Have the user choose an original Dot file that they want to use
16 DotFile <- file.choose()
17 TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
18 dplyr::filter(!grepl("->|[{}]",X1)) %>%
19 dplyr::filter(!grepl("Banjo",X1)) %>%
20 dplyr::filter(!grepl("labeljust",X1))
21 counterP1 <- 1
22 sizeDotP1 <- dim(TheDotP1)[1]
23 NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1)
24 for(counterP1 in 1:sizeDotP1){
25 coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>%
26 as.character(.,stringsAsFactors = FALSE)
27 if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){
28 NumberP1 <- strsplit(coldataP1," ") %>%
29 .[[1]]%>%
30 .[1]
31 VarNameP1 <- strsplit(coldataP1," ") %>%
32 .[[1]] %>%
33 .[2] %>%
34 strsplit(.,"\"") %>%
35 .[[1]] %>%
36 .[grep("^\\w|^\\d",.)]
37 NewDotP1[counterP1,1] <- VarNameP1
38 NewDotP1[counterP1,2] <- NumberP1
39 }
40 if(grepl("->",coldataP1) == TRUE){
41 break
42 }
43 }
44
45
46 TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
47 dplyr::filter(grepl("->",X1))
48 counterP2 <- 1
49 sizeDotP2 <- dim(TheDotP2)[1]
50 NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2)
51 for(counterP2 in 1:sizeDotP2){
52 coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>%
53 as.character(.,stringsAsFactors = FALSE)
54 ParentNumP2 <- strsplit(coldataP2,"->") %>%
55 .[[1]]%>%
56 .[1]
57 ChildNumP2 <- strsplit(coldataP2,"->") %>%
58 .[[1]] %>%
59 .[2] %>%
60 strsplit(.,";") %>%
61 .[[1]] %>%
62 .[1]
63 NewDotP2[counterP2,1] <- ParentNumP2
64 NewDotP2[counterP2,2] <- ChildNumP2
65 }
66
67 colnames(NewDotP2) <- c("Parents","Children")
68
69 #Matching numbers to variable names
70 NewDotP2_2 <- NewDotP2
71 for(i in 1:sizeDotP1){
72 #Where is the variable located within NewDotP2 (column one only)?
73 chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1])
74 if(is.na(sum(chngreq)) == FALSE){
75 if(sum(chngreq) > 0){
76 NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1])
77 }
78 }
79 i <- i + 1
80 }
81 NewDotP2_2
82 for(j in 1:sizeDotP1){
83 #Where is the variable located within NewDotP2 (column two only)?
84 chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2])
85 if(is.na(sum(chngreq)) == FALSE){
86 if(sum(chngreq) > 0){
87 NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2])
88 }
89 }
90 j <- j + 1
91 }
92
93 lrgMarkov <- dim(NewDotP2_2)[1]
94 MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){
95 #Finding the Parents and Children
96 d <- 1
97 AllNamList1 <- vector("list",length = 3)
98 #AllNamList <- vector("list", length = 3)
99 ##Finding the Parents of the Parents
100 ##A list of lists
101 ##outer set by the degree of the Markov blanket
102 AllVarList <- vector("list",length = 3)
103 for(d in 1:MarkovDegree){
104 colnames(NewDotP2_2) <- NULL
105 ##Which variable are you looking for?
106 ##This is the VariableEndName
107 if(d == 1){
108 ##Finding the Parents for the variable
109 LocPofVar <- grep(VariableStartName,NewDotP2_2[,2])
110 PofVar <- NewDotP2_2[LocPofVar,1]
111 AllNamList1[[1]] <- PofVar
112
113 ##Finding the Children for the variable
114 LocCofVar <- grep(VariableStartName,NewDotP2_2[,1])
115 CofVar <- NewDotP2_2[LocCofVar,2]
116 AllNamList1[[2]] <- CofVar
117
118 ##Finding the Co-Parents of the Children for the variable
119 NumofChild <- length(CofVar)
120 if(NumofChild > 0){
121 ##Creating a list of the Co-Parents for each of the children
122 ##list size is based on the amount of Children
123 COPlist <- vector("character", length = 0)
124 nc <- 1
125 for(nc in 1:NumofChild){
126 LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2])
127 COPofVar <- NewDotP2_2[LocCOPofVar,1]
128 if(grepl(COPofVar,VariableStartName)){
129 next
130 } else{
131 #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName]
132 COPlist <- append(COPlist,COPofVar)
133 }
134 nc <- nc + 1
135 }
136 } else {
137 ##Making COPlist empty
138 COPlist <- vector("character",length = 0)
139 }
140 AllNamList1[[3]] <- COPlist
141 AllVarList[[1]] <- AllNamList1
142 } else if(d > 1){
143 ##inner set by the length of the previous AllVarlist we are working on
144 lPreVList <- length(AllVarList[[d-1]])
145 ef <- 1
146 ##PCCP will eventually equal the total size that we expect for the iteration (#d)
147 ## which is just lPreVList * 3
148 PCCP <- 1
149 newsize <- (lPreVList * 3)
150 if(d > 3){
151 AllVarList[[d]] <- vector("list")
152 }
153 for(ef in 1:lPreVList){
154 ##Finding the Parents
155 NumofVars <- length(AllVarList[[d-1]][[ef]])
156 if(NumofVars > 0){
157 ##Creating a list of the Parents for each of the Variables
158 ##list size is based on the amount of Previous Variables
159 PofVlist <- vector("character", length = 0)
160 np <- 1
161 for(np in 1:NumofVars){
162 LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2])
163 PofVar <- NewDotP2_2[LocPofVar,1]
164 PofVlist <- append(PofVlist,PofVar)
165 np <- np + 1
166 }
167 } else {
168 ##Making COPlist empty
169 PofVlist <- vector("character",length = 0)
170 }
171 AllVarList[[d]][[PCCP]] <- PofVlist
172 PCCP <- PCCP + 1
173
174 ##Finding the Children
175 if(NumofVars > 0){
176 ##Creating a list of the Children for each of the Previous Parents
177 ##list size is based on the amount of Previous Parents
178 CofVlist <- vector("character", length = 0)
179 np <- 1
180 for(np in 1:NumofVars){
181 LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1])
182 CofVar <- NewDotP2_2[LocCofVar,2]
183 #if(grepl(VariableStartName,CofVar)){
184 # next
185 #} else{
186 CofVlist <- append(CofVlist,CofVar)
187 #}
188 np <- np + 1
189 }
190 } else {
191 ##Making CofPlist empty
192 CofVlist <- vector("character",length = 0)
193 }
194 AllVarList[[d]][[PCCP]] <- CofVlist
195 PCCP <- PCCP + 1
196
197 ##Finding the Co-Parents
198 NumofCVars <- length(CofVlist)
199 if(NumofCVars > 0){
200 ncp <- 1
201 CPofClist <- vector("character",length = 0)
202 for(ncp in 1:NumofCVars){
203 LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2])
204 CPofCVar <- NewDotP2_2[LocCPofCVar,1]
205 CPofClist <- append(CPofClist,CPofCVar)
206 ncp <- ncp + 1
207 }
208
209 } else {
210 ##Making COPlist empty
211 CPofClist <- vector("character",length = 0)
212 }
213 AllVarList[[d]][[PCCP]] <- CPofClist
214 PCCP <- PCCP + 1
215 ef <- ef + 1
216 }
217 }
218 ##Stop if you have found the VariableEndName value
219 if(sum(grepl(VariableEndName,AllVarList)) > 0){
220 break
221 }
222 d <- d + 1
223 }
224 ##The Markov Degree is that found below
225 d
226 }
227 #Now use the command MBlanky() with the appropriate settings