Commit eb003daf3f0f341bdd9abd261c94ca4a1339cd58

Authored by Efrain Gonzalez
1 parent 556b97bfaf
Exists in master

A yet untested code

Showing 1 changed file with 227 additions and 0 deletions   Show diff stats
... ... @@ -0,0 +1,227 @@
  1 +#Efrain Gonzalez
  2 +#7/25/2017
  3 +#Code for Markov Blanket
  4 +
  5 +
  6 +#The required libraries
  7 +library(pryr)
  8 +library(MASS)
  9 +library(dplyr)
  10 +library(tidyr)
  11 +library(readr)
  12 +library(stringr)
  13 +
  14 +
  15 +#Have the user choose an original Dot file that they want to use
  16 +DotFile <- file.choose()
  17 +TheDotP1 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
  18 + dplyr::filter(!grepl("->|[{}]",X1)) %>%
  19 + dplyr::filter(!grepl("Banjo",X1)) %>%
  20 + dplyr::filter(!grepl("labeljust",X1))
  21 +counterP1 <- 1
  22 +sizeDotP1 <- dim(TheDotP1)[1]
  23 +NewDotP1 <- matrix("0",ncol = 2, nrow = sizeDotP1)
  24 +for(counterP1 in 1:sizeDotP1){
  25 + coldataP1 <- str_trim(TheDotP1[counterP1,1]) %>%
  26 + as.character(.,stringsAsFactors = FALSE)
  27 + if(grepl("Banjo|labeljust|>",coldataP1)==FALSE){
  28 + NumberP1 <- strsplit(coldataP1," ") %>%
  29 + .[[1]]%>%
  30 + .[1]
  31 + VarNameP1 <- strsplit(coldataP1," ") %>%
  32 + .[[1]] %>%
  33 + .[2] %>%
  34 + strsplit(.,"\"") %>%
  35 + .[[1]] %>%
  36 + .[grep("^\\w|^\\d",.)]
  37 + NewDotP1[counterP1,1] <- VarNameP1
  38 + NewDotP1[counterP1,2] <- NumberP1
  39 + }
  40 + if(grepl("->",coldataP1) == TRUE){
  41 + break
  42 + }
  43 +}
  44 +
  45 +
  46 +TheDotP2 <- read_delim(DotFile,delim = "\t",col_names = FALSE) %>%
  47 + dplyr::filter(grepl("->",X1))
  48 +counterP2 <- 1
  49 +sizeDotP2 <- dim(TheDotP2)[1]
  50 +NewDotP2 <- matrix("0",ncol = 2, nrow = sizeDotP2)
  51 +for(counterP2 in 1:sizeDotP2){
  52 + coldataP2 <- str_trim(TheDotP2[counterP2,1]) %>%
  53 + as.character(.,stringsAsFactors = FALSE)
  54 + ParentNumP2 <- strsplit(coldataP2,"->") %>%
  55 + .[[1]]%>%
  56 + .[1]
  57 + ChildNumP2 <- strsplit(coldataP2,"->") %>%
  58 + .[[1]] %>%
  59 + .[2] %>%
  60 + strsplit(.,";") %>%
  61 + .[[1]] %>%
  62 + .[1]
  63 + NewDotP2[counterP2,1] <- ParentNumP2
  64 + NewDotP2[counterP2,2] <- ChildNumP2
  65 +}
  66 +
  67 +colnames(NewDotP2) <- c("Parents","Children")
  68 +
  69 +#Matching numbers to variable names
  70 +NewDotP2_2 <- NewDotP2
  71 +for(i in 1:sizeDotP1){
  72 + #Where is the variable located within NewDotP2 (column one only)?
  73 + chngreq <- grep(paste0("^",NewDotP1[i,2],"$"),NewDotP2_2[,1])
  74 + if(is.na(sum(chngreq)) == FALSE){
  75 + if(sum(chngreq) > 0){
  76 + NewDotP2_2[chngreq,1] <- gsub(paste0("^",NewDotP1[i,2],"$"),NewDotP1[i,1],NewDotP2_2[chngreq,1])
  77 + }
  78 + }
  79 + i <- i + 1
  80 +}
  81 +NewDotP2_2
  82 +for(j in 1:sizeDotP1){
  83 + #Where is the variable located within NewDotP2 (column two only)?
  84 + chngreq <- grep(paste0("^",NewDotP1[j,2],"$"),NewDotP2_2[,2])
  85 + if(is.na(sum(chngreq)) == FALSE){
  86 + if(sum(chngreq) > 0){
  87 + NewDotP2_2[chngreq,2] <- gsub(paste0("^",NewDotP1[j,2],"$"),NewDotP1[j,1],NewDotP2_2[chngreq,2])
  88 + }
  89 + }
  90 + j <- j + 1
  91 +}
  92 +
  93 +lrgMarkov <- dim(NewDotP2_2)[1]
  94 +MBlanky <- function(MarkovDegree = lrgMarkov, VariableStartName = "Alzheimer", VariableEndName = "Age"){
  95 + #Finding the Parents and Children
  96 + d <- 1
  97 + AllNamList1 <- vector("list",length = 3)
  98 + #AllNamList <- vector("list", length = 3)
  99 + ##Finding the Parents of the Parents
  100 + ##A list of lists
  101 + ##outer set by the degree of the Markov blanket
  102 + AllVarList <- vector("list",length = 3)
  103 + for(d in 1:MarkovDegree){
  104 + colnames(NewDotP2_2) <- NULL
  105 + ##Which variable are you looking for?
  106 + ##This is the VariableEndName
  107 + if(d == 1){
  108 + ##Finding the Parents for the variable
  109 + LocPofVar <- grep(VariableStartName,NewDotP2_2[,2])
  110 + PofVar <- NewDotP2_2[LocPofVar,1]
  111 + AllNamList1[[1]] <- PofVar
  112 +
  113 + ##Finding the Children for the variable
  114 + LocCofVar <- grep(VariableStartName,NewDotP2_2[,1])
  115 + CofVar <- NewDotP2_2[LocCofVar,2]
  116 + AllNamList1[[2]] <- CofVar
  117 +
  118 + ##Finding the Co-Parents of the Children for the variable
  119 + NumofChild <- length(CofVar)
  120 + if(NumofChild > 0){
  121 + ##Creating a list of the Co-Parents for each of the children
  122 + ##list size is based on the amount of Children
  123 + COPlist <- vector("character", length = 0)
  124 + nc <- 1
  125 + for(nc in 1:NumofChild){
  126 + LocCOPofVar <- grep(CofVar[nc],NewDotP2_2[,2])
  127 + COPofVar <- NewDotP2_2[LocCOPofVar,1]
  128 + if(grepl(COPofVar,VariableStartName)){
  129 + next
  130 + } else{
  131 + #COPlist[[nc]] <- COPofVar[COPofVar!=VariableStartName]
  132 + COPlist <- append(COPlist,COPofVar)
  133 + }
  134 + nc <- nc + 1
  135 + }
  136 + } else {
  137 + ##Making COPlist empty
  138 + COPlist <- vector("character",length = 0)
  139 + }
  140 + AllNamList1[[3]] <- COPlist
  141 + AllVarList[[1]] <- AllNamList1
  142 + } else if(d > 1){
  143 + ##inner set by the length of the previous AllVarlist we are working on
  144 + lPreVList <- length(AllVarList[[d-1]])
  145 + ef <- 1
  146 + ##PCCP will eventually equal the total size that we expect for the iteration (#d)
  147 + ## which is just lPreVList * 3
  148 + PCCP <- 1
  149 + newsize <- (lPreVList * 3)
  150 + if(d > 3){
  151 + AllVarList[[d]] <- vector("list")
  152 + }
  153 + for(ef in 1:lPreVList){
  154 + ##Finding the Parents
  155 + NumofVars <- length(AllVarList[[d-1]][[ef]])
  156 + if(NumofVars > 0){
  157 + ##Creating a list of the Parents for each of the Variables
  158 + ##list size is based on the amount of Previous Variables
  159 + PofVlist <- vector("character", length = 0)
  160 + np <- 1
  161 + for(np in 1:NumofVars){
  162 + LocPofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,2])
  163 + PofVar <- NewDotP2_2[LocPofVar,1]
  164 + PofVlist <- append(PofVlist,PofVar)
  165 + np <- np + 1
  166 + }
  167 + } else {
  168 + ##Making COPlist empty
  169 + PofVlist <- vector("character",length = 0)
  170 + }
  171 + AllVarList[[d]][[PCCP]] <- PofVlist
  172 + PCCP <- PCCP + 1
  173 +
  174 + ##Finding the Children
  175 + if(NumofVars > 0){
  176 + ##Creating a list of the Children for each of the Previous Parents
  177 + ##list size is based on the amount of Previous Parents
  178 + CofVlist <- vector("character", length = 0)
  179 + np <- 1
  180 + for(np in 1:NumofVars){
  181 + LocCofVar <- grep(AllVarList[[d-1]][[ef]][np],NewDotP2_2[,1])
  182 + CofVar <- NewDotP2_2[LocCofVar,2]
  183 + #if(grepl(VariableStartName,CofVar)){
  184 + # next
  185 + #} else{
  186 + CofVlist <- append(CofVlist,CofVar)
  187 + #}
  188 + np <- np + 1
  189 + }
  190 + } else {
  191 + ##Making CofPlist empty
  192 + CofVlist <- vector("character",length = 0)
  193 + }
  194 + AllVarList[[d]][[PCCP]] <- CofVlist
  195 + PCCP <- PCCP + 1
  196 +
  197 + ##Finding the Co-Parents
  198 + NumofCVars <- length(CofVlist)
  199 + if(NumofCVars > 0){
  200 + ncp <- 1
  201 + CPofClist <- vector("character",length = 0)
  202 + for(ncp in 1:NumofCVars){
  203 + LocCPofCVar <- grep(CofVlist[ncp],NewDotP2_2[,2])
  204 + CPofCVar <- NewDotP2_2[LocCPofCVar,1]
  205 + CPofClist <- append(CPofClist,CPofCVar)
  206 + ncp <- ncp + 1
  207 + }
  208 +
  209 + } else {
  210 + ##Making COPlist empty
  211 + CPofClist <- vector("character",length = 0)
  212 + }
  213 + AllVarList[[d]][[PCCP]] <- CPofClist
  214 + PCCP <- PCCP + 1
  215 + ef <- ef + 1
  216 + }
  217 + }
  218 + ##Stop if you have found the VariableEndName value
  219 + if(sum(grepl(VariableEndName,AllVarList)) > 0){
  220 + break
  221 + }
  222 + d <- d + 1
  223 + }
  224 + ##The Markov Degree is that found below
  225 + d
  226 +}
  227 +#Now use the command MBlanky() with the appropriate settings
0 228 \ No newline at end of file