Commit 7e98cf9561ea9d37386684ba753ddff576130a1a
1 parent
8f1c6201bb
Exists in
master
Fixed issue with similar variable names
Showing
1 changed file
with
36 additions
and
17 deletions
 
Show diff stats
RMarkovBlanket.r
| ... | ... | @@ -76,7 +76,7 @@ for(i in 1:sizeDotP1){ | 
| 76 | 76 | NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1]) | 
| 77 | 77 | } | 
| 78 | 78 | } | 
| 79 | - i <- i + 1 | |
| 79 | + #i <- i + 1 | |
| 80 | 80 | } | 
| 81 | 81 | NewDotP2_2 | 
| 82 | 82 | for(j in 1:sizeDotP1){ | 
| ... | ... | @@ -87,11 +87,11 @@ for(j in 1:sizeDotP1){ | 
| 87 | 87 | NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2]) | 
| 88 | 88 | } | 
| 89 | 89 | } | 
| 90 | - j <- j + 1 | |
| 90 | + #j <- j + 1 | |
| 91 | 91 | } | 
| 92 | 92 | |
| 93 | 93 | lrgMarkov <- dim(NewDotP2_2)[1] | 
| 94 | -Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){ | |
| 94 | +Blanky <- function(MarkovDegree = 20, VariableStartName = "Alzheimer", VariableEndName = "GRIN2A"){ | |
| 95 | 95 | #Finding the Parents and Children | 
| 96 | 96 | d <- 1 | 
| 97 | 97 | AllNamList1 <- vector("list",length = 3) | 
| ... | ... | @@ -100,20 +100,25 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 100 | 100 | ##A list of lists | 
| 101 | 101 | ##outer set by the degree of the Markov blanket | 
| 102 | 102 | AllVarList <- vector("list",length = 3) | 
| 103 | + varfound <- 0 | |
| 103 | 104 | for(d in 1:MarkovDegree){ | 
| 104 | 105 | colnames(NewDotP2_2) <- NULL | 
| 105 | 106 | ##Which variable are you looking for? | 
| 106 | 107 | ##This is the VariableEndName | 
| 107 | 108 | if(d == 1){ | 
| 108 | 109 | ##Finding the Parents for the variable | 
| 109 | - LocPofVar <- grep(VariableStartName,NewDotP2_2[,2]) | |
| 110 | + LocPofVar <- grep(paste0("^",VariableStartName,"$"),NewDotP2_2[,2]) | |
| 110 | 111 | PofVar <- NewDotP2_2[LocPofVar,1] | 
| 111 | 112 | AllNamList1[[1]] <- PofVar | 
| 113 | + AllNamList1[[1]] <- AllNamList1[[1]][!duplicated(AllNamList1[[1]])] | |
| 114 | + varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[1]])) | |
| 112 | 115 | |
| 113 | 116 | ##Finding the Children for the variable | 
| 114 | - LocCofVar <- grep(VariableStartName,NewDotP2_2[,1]) | |
| 117 | + LocCofVar <- grep(paste0("^",VariableStartName,"$"),NewDotP2_2[,1]) | |
| 115 | 118 | CofVar <- NewDotP2_2[LocCofVar,2] | 
| 116 | 119 | AllNamList1[[2]] <- CofVar | 
| 120 | + AllNamList1[[2]] <- AllNamList1[[2]][!duplicated(AllNamList1[[2]])] | |
| 121 | + varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[2]])) | |
| 117 | 122 | |
| 118 | 123 | ##Finding the Co-Parents of the Children for the variable | 
| 119 | 124 | NumofChild <- length(CofVar) | 
| ... | ... | @@ -123,25 +128,29 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 123 | 128 | COPlist <- vector("character", length = 0) | 
| 124 | 129 | nc <- 1 | 
| 125 | 130 | for(nc in 1:NumofChild){ | 
| 126 | - LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2]) | |
| 131 | + LocCOPofVar <- grep(paste0("^",CofVar[nc],"$"),NewDotP2_2[,2]) | |
| 127 | 132 | COPofVar <- NewDotP2_2[LocCOPofVar,1] | 
| 128 | 133 | if(sum(grepl(VariableStartName,COPofVar)) >= 1){ | 
| 129 | 134 | #positions of variable start name within the vector of co parents | 
| 130 | - posoforig <- grep(VariableStartName,COPofVar) | |
| 135 | + posoforig <- grep(paste0("^",VariableStartName,"$"),COPofVar) | |
| 131 | 136 | COPofVar <- COPofVar[-posoforig] | 
| 132 | 137 | COPlist <- append(COPlist,COPofVar) | 
| 133 | 138 | } else{ | 
| 134 | 139 | #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName] | 
| 135 | 140 | COPlist <- append(COPlist,COPofVar) | 
| 136 | 141 | } | 
| 137 | - nc <- nc + 1 | |
| 142 | + #nc <- nc + 1 | |
| 138 | 143 | } | 
| 139 | 144 | } else { | 
| 140 | 145 | ##Making COPlist empty | 
| 141 | 146 | COPlist <- vector("character",length = 0) | 
| 142 | 147 | } | 
| 143 | 148 | AllNamList1[[3]] <- COPlist | 
| 149 | + AllNamList1[[3]] <- AllNamList1[[3]][!duplicated(AllNamList1[[3]])] | |
| 150 | + varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllNamList1[[3]])) | |
| 151 | + | |
| 144 | 152 | AllVarList[[1]] <- AllNamList1 | 
| 153 | + | |
| 145 | 154 | } else if(d > 1){ | 
| 146 | 155 | ##inner set by the length of the previous AllVarlist we are working on | 
| 147 | 156 | lPreVList <- length(AllVarList[[d-1]]) | 
| ... | ... | @@ -162,16 +171,19 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 162 | 171 | PofVlist <- vector("character", length = 0) | 
| 163 | 172 | np <- 1 | 
| 164 | 173 | for(np in 1:NumofVars){ | 
| 165 | - LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2]) | |
| 174 | + LocPofVar <- grep(paste0("^",AllVarList[[d-1]][[ef]][np],"$"),NewDotP2_2[,2]) | |
| 166 | 175 | PofVar <- NewDotP2_2[LocPofVar,1] | 
| 167 | 176 | PofVlist <- append(PofVlist,PofVar) | 
| 168 | - np <- np + 1 | |
| 177 | + #np <- np + 1 | |
| 169 | 178 | } | 
| 170 | 179 | } else { | 
| 171 | 180 | ##Making COPlist empty | 
| 172 | 181 | PofVlist <- vector("character",length = 0) | 
| 173 | 182 | } | 
| 174 | 183 | AllVarList[[d]][[PCCP]] <- PofVlist | 
| 184 | + AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])] | |
| 185 | + ##Have you found the VariableEndName? | |
| 186 | + varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]])) | |
| 175 | 187 | PCCP <- PCCP + 1 | 
| 176 | 188 | |
| 177 | 189 | ##Finding the Children | 
| ... | ... | @@ -181,7 +193,7 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 181 | 193 | CofVlist <- vector("character", length = 0) | 
| 182 | 194 | np <- 1 | 
| 183 | 195 | for(np in 1:NumofVars){ | 
| 184 | - LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1]) | |
| 196 | + LocCofVar <- grep(paste0("^",AllVarList[[d-1]][[ef]][np],"$"),NewDotP2_2[,1]) | |
| 185 | 197 | CofVar <- NewDotP2_2[LocCofVar,2] | 
| 186 | 198 | #if(sum(grepl(VariableStartName,CofVar)) >= 1){ | 
| 187 | 199 | # #positions of variable start name within the vector of co parents | 
| ... | ... | @@ -191,13 +203,16 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 191 | 203 | #} else{ | 
| 192 | 204 | CofVlist <- append(CofVlist,CofVar) | 
| 193 | 205 | #} | 
| 194 | - np <- np + 1 | |
| 206 | + #np <- np + 1 | |
| 195 | 207 | } | 
| 196 | 208 | } else { | 
| 197 | 209 | ##Making CofPlist empty | 
| 198 | 210 | CofVlist <- vector("character",length = 0) | 
| 199 | 211 | } | 
| 200 | 212 | AllVarList[[d]][[PCCP]] <- CofVlist | 
| 213 | + AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])] | |
| 214 | + ##Have you found the VariableEndName yet? | |
| 215 | + varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]])) | |
| 201 | 216 | PCCP <- PCCP + 1 | 
| 202 | 217 | |
| 203 | 218 | ##Finding the Co-Parents | 
| ... | ... | @@ -206,7 +221,7 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 206 | 221 | ncp <- 1 | 
| 207 | 222 | CPofClist <- vector("character",length = 0) | 
| 208 | 223 | for(ncp in 1:NumofCVars){ | 
| 209 | - LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2]) | |
| 224 | + LocCPofCVar <- grep(paste0("^",CofVlist[ncp],"$"),NewDotP2_2[,2]) | |
| 210 | 225 | CPofCVar <- NewDotP2_2[LocCPofCVar,1] | 
| 211 | 226 | #if(sum(grepl(,CPofCVar)) >= 1){ | 
| 212 | 227 | # #positions of variable start name within the vector of co parents | 
| ... | ... | @@ -216,7 +231,7 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 216 | 231 | #} else{ | 
| 217 | 232 | CPofClist <- append(CPofClist,CPofCVar) | 
| 218 | 233 | #} | 
| 219 | - ncp <- ncp + 1 | |
| 234 | + #ncp <- ncp + 1 | |
| 220 | 235 | } | 
| 221 | 236 | |
| 222 | 237 | } else { | 
| ... | ... | @@ -224,15 +239,19 @@ Blanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", Va | 
| 224 | 239 | CPofClist <- vector("character",length = 0) | 
| 225 | 240 | } | 
| 226 | 241 | AllVarList[[d]][[PCCP]] <- CPofClist | 
| 242 | + AllVarList[[d]][[PCCP]] <- AllVarList[[d]][[PCCP]][!duplicated(AllVarList[[d]][[PCCP]])] | |
| 243 | + ##Have you found VariableEndName now? | |
| 244 | + varfound <- varfound + sum(grepl(paste0("^",VariableEndName,"$"),AllVarList[[d]][[PCCP]])) | |
| 227 | 245 | PCCP <- PCCP + 1 | 
| 228 | - ef <- ef + 1 | |
| 246 | + #ef <- ef + 1 | |
| 229 | 247 | } | 
| 230 | 248 | } | 
| 249 | + | |
| 231 | 250 | ##Stop if you have found the VariableEndName value | 
| 232 | - if(sum(grepl(VariableEndName,AllVarList)) > 0){ | |
| 251 | + if(varfound > 0){ | |
| 233 | 252 | break | 
| 234 | 253 | } | 
| 235 | - d <- d + 1 | |
| 254 | + #d <- d + 1 | |
| 236 | 255 | } | 
| 237 | 256 | ##The Markov Degree is that found below | 
| 238 | 257 | d |