Commit 4b00e22457ceceba11ea1832884a9d7de7e69361

Authored by Zhenghua Gong
0 parents
Exists in master

Consensus Graph

Showing 1 changed file with 553 additions and 0 deletions   Show diff stats
... ... @@ -0,0 +1,553 @@
  1 +#source("/home/zgong001/Documents/Alarm/D50C9v/RCode/CombineOrders.R")
  2 +getRelation =
  3 + function(st = "[6][8][5][7|6:8:5][1|7][3|7:1][4|1][2|4][0|2]"){
  4 + re = c()
  5 + temstr = substr(st, 2, nchar(st)-1)
  6 + stlist = strsplit(chartr(old = "][", new = "##", temstr), "##")
  7 + for(i in 1:length(stlist[[1]])){
  8 + temc = chartr(old = "|", new = "g", stlist[[1]][i])
  9 +
  10 + if (grepl("g", temc)){
  11 + temc2 = strsplit(temc, "g")
  12 +
  13 + X = temc2[[1]][1]
  14 + TY = strsplit(temc2[[1]][2], ":")
  15 + for(j in 1:length(TY[[1]])){
  16 + Y = TY[[1]][j]
  17 +
  18 + cc= paste(Y,"->", X, sep = "")
  19 + re = append(re, cc)
  20 + }
  21 + }
  22 + }
  23 + return(re)
  24 + }
  25 +
  26 +creatDataframe =
  27 + function(v = c("0", "1","2","3","4","5","6","7","8"), r = 246){
  28 + cn = c()
  29 + co= combn(v,2)
  30 + for(i in 1:ncol(co)){
  31 + X = co[1,i]
  32 + Y = co[2,i]
  33 + c1 = paste(X, "->", Y, sep = "")
  34 + cn = append(cn,c1)
  35 + c2 = paste(Y, "->", X, sep = "")
  36 + cn = append(cn,c2)
  37 + c3 = paste(X, " NA ", Y, sep = "")
  38 + cn = append(cn,c3)
  39 + }
  40 +
  41 + b= ncol(co)
  42 +
  43 + re = data.frame(matrix(0, nrow=r, ncol=b*3))
  44 + colnames(re) = cn
  45 +
  46 + return(re)
  47 + }
  48 +
  49 +#---------------------------------------------------------------------------------
  50 +sortStru = function(st = "[5][6][8][7|5:8][1|5:7][3|7:8][2|6:1][4|1][0|2]"){
  51 + library(dplyr)
  52 + re = ""
  53 + temstr = substr(st, 2, nchar(st)-1)
  54 + stlist = strsplit(chartr(old = "][", new = "##", temstr), "##")
  55 + stlist = sort(stlist[[1]])
  56 + for(i in 1:length(stlist)){
  57 + if(nchar(stlist[i])>3){
  58 +
  59 + temc = chartr(old = "|", new = "g", stlist[i])
  60 +
  61 + temc2 = strsplit(temc, "g")
  62 + X = temc2[[1]][1]
  63 + TY = strsplit(temc2[[1]][2], ":")
  64 + TY = sort(TY[[1]])
  65 +
  66 + TY2 = paste(TY, collapse = ":")
  67 +
  68 + stlist[i] = paste(X,"|", TY2, sep = "")
  69 +
  70 + stlist[i] <- TY %>%
  71 + paste(., collapse=":") %>%
  72 + paste(X, "|", ., sep = "")
  73 +
  74 + }
  75 +
  76 + }
  77 +
  78 + re <- stlist %>%
  79 + paste(., collapse="][") %>%
  80 + paste("[", ., "]", sep = "")
  81 +
  82 + return(re)
  83 +}
  84 +
  85 +MB =
  86 + function (struc ="[6][8][5][7|6:8:5][3|7][1|7:3][4|1][2|4][0|2]", v ="1"){
  87 +
  88 + library(xlsx)
  89 + library(bnlearn)
  90 + library(dplyr)
  91 +
  92 +
  93 + mbs = ""
  94 +
  95 + strutem <- struc %>%
  96 + as.character(.) %>%
  97 + substr(., 2, nchar(.)-1)
  98 +
  99 + stru = strsplit(chartr(old = "][", new = "##", strutem), "##")
  100 +
  101 + for(j in 1:length(stru[[1]])){
  102 + if(grepl(v,stru[[1]][j])){
  103 + mbs = paste(mbs, "[", stru[[1]][j], "]", sep = "")
  104 + }
  105 + }
  106 + return(mbs)
  107 + }
  108 +
  109 +sumTable =
  110 + function (pathname = "/home/zgong001/Documents/Alarm/D1KC9v/D1KC9v BestOrders/Strus_D1KC9v.txt",
  111 + t = c("B-W", "AIFM1", "ATP6V1C2", "CACNA1D", "CACNB1", "CDH15", "CLDN6", "DDIT3", "EIF2AK3", "ENDOG", "HRK",
  112 + "LMNB1" ,"MAPK12", "NRF1", "PARP1" ,"PCK2", "PLA2G4C", "PPP2R3B", "VDAC2", "VDAC3"),
  113 + exfilename = "/home/zgong001/Documents/Alarm/D1KC9v/D1KC9v BestOrders/Strus_D1KC9v.xlsx"){
  114 + library(bnlearn)
  115 + library(xlsx)
  116 + library(dplyr)
  117 +
  118 + v = as.character(seq(0,length(t)-1),1)
  119 + suminput = read.table(pathname, header = FALSE)
  120 + suminput = na.omit(suminput)
  121 + sumResu = data.frame()
  122 + n = ncol(suminput)
  123 + ord = vector()
  124 +
  125 + for(l in 1:nrow(suminput)){
  126 + tem = suminput[l,1]
  127 + for(m in 2:(n-4)){
  128 + tem = paste(tem, suminput[l,m], sep = " ")
  129 + }
  130 + ord = append(ord, tem)
  131 + }
  132 +
  133 +
  134 + suminput = cbind(suminput, ord)
  135 +
  136 +
  137 + suminput = suminput[-n]
  138 +
  139 + for(j in (n-4):1){
  140 + suminput = suminput[,-j]
  141 + }
  142 +
  143 + colnames(suminput)[1] <- "OrderScores"
  144 + colnames(suminput)[2] <- "Structures"
  145 + colnames(suminput)[3] <- "StructScores"
  146 +
  147 + orders= aggregate( OrderScores ~ord, data=suminput, FUN = mean)
  148 + orders = orders[order(-orders[,2]), ]
  149 + for(i in 1:nrow(orders)){
  150 + orders$Opercentage[i] = exp(-log(sum(exp(orders$OrderScores-orders$OrderScores[i]))))
  151 + }
  152 +
  153 + orders$Ocumper = cumsum(orders$Opercentage)
  154 +
  155 + for(i in 1:nrow(orders)){
  156 + tem = suminput[which(suminput$ord== orders[i,1]), ]
  157 +
  158 + tem$Opercentage = orders[i,3]
  159 + tem$Ocumper = orders[i,4]
  160 +
  161 + for(j in 1:nrow(tem)){
  162 + tem$Spercentage[j] = exp(-log(sum(exp(tem$StructScores-tem$StructScores[j]))))
  163 + }
  164 + tem$Scumper = cumsum(tem$Spercentage)
  165 + tem = tem[c("ord", "OrderScores", "Opercentage","Ocumper", "Structures", "StructScores","Spercentage","Scumper")]
  166 + sumResu = rbind(sumResu, tem)
  167 + }
  168 +
  169 +
  170 + for(k in 1:nrow(sumResu)){
  171 + sumResu$sortS[k] = sortStru(as.character(sumResu$Structures[k]))
  172 + }
  173 +
  174 + rs = nrow(sumResu)
  175 +
  176 + rl = creatDataframe(v,rs)
  177 + sumResu= cbind(sumResu, rl)
  178 +
  179 + # sumAnce = sumResu
  180 +
  181 + co= combn(v,2)
  182 +
  183 + for(i in 1:nrow(sumResu)){
  184 + s = as.character(sumResu [i,5])
  185 + rel = getRelation(st = s)
  186 +
  187 + for(j in 1:ncol(co)){
  188 + X = co[1,j]
  189 + Y = co[2,j]
  190 + c1 = paste(X, "->", Y, sep = "")
  191 + c2 = paste(Y, "->", X, sep = "")
  192 + c3 = paste(X, " NA ", Y, sep = "")
  193 +
  194 + if (c1 %in% rel){
  195 + sumResu[i,c1] = sumResu$Opercentage[i]*sumResu$Spercentage[i]
  196 + }
  197 + else if(c2 %in% rel){
  198 + sumResu[i,c2] = sumResu$Opercentage[i]*sumResu$Spercentage[i]
  199 + }
  200 + else{
  201 + sumResu[i,c3] = sumResu$Opercentage[i]*sumResu$Spercentage[i]
  202 + }
  203 +
  204 + }
  205 +
  206 + }
  207 +
  208 + # print(sumResu)
  209 +
  210 + sumP = colSums(sumResu[, -c(1:9)])
  211 +
  212 + # print (sumP)
  213 +
  214 + ##########################################
  215 +
  216 +
  217 +
  218 + StruWithO = aggregate( StructScores ~sortS, data=sumResu, FUN = mean)
  219 +
  220 + for(l in 1:nrow(StruWithO)){
  221 + StruWithO$Spercentage[l] = exp(-log(sum(exp(StruWithO$StructScores-StruWithO$StructScores[l]))))
  222 + }
  223 +
  224 + StruWithO = StruWithO[order(-StruWithO[,3]), ]
  225 +
  226 + rss = nrow(StruWithO)
  227 +
  228 + rls = creatDataframe(v,rss)
  229 + StruWithO= cbind(StruWithO, rls)
  230 +
  231 + for(m in 1:nrow(StruWithO)){
  232 + ss = as.character(StruWithO [m,1])
  233 + rels = getRelation(st = ss)
  234 + # print(rels)
  235 +
  236 + for(n in 1:ncol(co)){
  237 + X = co[1,n]
  238 + Y = co[2,n]
  239 + c1 = paste(X, "->", Y, sep = "")
  240 + c2 = paste(Y, "->", X, sep = "")
  241 + c3 = paste(X, " NA ", Y, sep = "")
  242 +
  243 + if (c1 %in% rels){
  244 + StruWithO[m,c1] = StruWithO$Spercentage[m]
  245 + }
  246 + else if(c2 %in% rels){
  247 + StruWithO[m,c2] = StruWithO$Spercentage[m]
  248 + }
  249 + else{
  250 + StruWithO[m,c3] = StruWithO$Spercentage[m]
  251 + }
  252 +
  253 + }
  254 +
  255 + }
  256 +
  257 +
  258 + sumpS = colSums(StruWithO[, -c(1:3)])
  259 + # print(sumpS)
  260 +
  261 + # totS = (sumP+sumpS)/2
  262 +
  263 + # nam = names(totS)
  264 + nam = names(sumP)
  265 +
  266 + # nam2 = names(sumpS)
  267 +
  268 + # print(nam2)
  269 +
  270 + for(i in 1:length(nam)){
  271 + if (grepl("->", nam[i])){
  272 + TY = strsplit(nam[i], "->")
  273 + a = as.numeric(TY[[1]][1])
  274 + b = as.numeric(TY[[1]][2])
  275 + nam[i] = paste(t[a+1], "->", t[b+1], sep = "")
  276 + }
  277 +
  278 + if (grepl(" NA ", nam[i])){
  279 + TY = strsplit(nam[i], " NA ")
  280 + a = as.numeric(TY[[1]][1])
  281 + b = as.numeric(TY[[1]][2])
  282 + nam[i] = paste(t[a+1], " NA ", t[b+1], sep = "")
  283 + }
  284 +
  285 + }
  286 +
  287 + # print(nam)
  288 + names(sumP) = nam
  289 + names(sumpS) = nam
  290 +
  291 + sumP = as.data.frame(sumP)
  292 + sumpS = as.data.frame(sumpS)
  293 +
  294 + # print(sumP)
  295 +
  296 + # print(totS)
  297 +
  298 + write.xlsx(sumP, exfilename, sheetName = "Pairs", col.names = TRUE, row.names = TRUE, append = FALSE)
  299 + write.xlsx(sumpS, exfilename, sheetName = "Pairs-WO", col.names = TRUE, row.names = TRUE, append = TRUE)
  300 +
  301 + # return(sumpS)
  302 +
  303 + }
  304 +
  305 +
  306 +
  307 +AverageNet = function(pathname = "/home/zgong001/Documents/Projects/Luminal/3/SS/Pairs-A3-Luminal A-A-B.xlsx",
  308 + outfile = "/home/zgong001/Documents/dot/out.gv"){
  309 +
  310 + library(xlsx)
  311 +
  312 + res <- read.xlsx(pathname, 1) # read first sheet
  313 + res = na.omit(res)
  314 +
  315 + cat("digraph G {", file=outfile, sep="\n")
  316 +
  317 + p = round(nrow(res)/3)
  318 +
  319 + for(i in 1:p){
  320 +
  321 + t = (i-1)*3
  322 + v = as.vector(res[(t+1):(t+3),2])
  323 + names(v) = res[(t+1):(t+3),1]
  324 + m = sort(v, decreasing = TRUE)
  325 + mn = names(m[1])
  326 + if(grepl("->", mn)){
  327 + if(grepl("->", names(m[2]))){
  328 + cat("\t", mn, " [penwidth=", round(m[[1]]*2,digits = 2), ",", 'label="', round(m[[1]]*100 ,digits = 2), " (", round(m[[2]]*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  329 + }
  330 + else{
  331 + cat("\t", mn, " [penwidth=", round(m[[1]]*2,digits = 2), ",", 'label="', round(m[[1]]*100,digits = 2) , " (", round(m[[3]]*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  332 + }
  333 +
  334 + }
  335 +
  336 + }
  337 +
  338 + cat("}", file=outfile, append=TRUE)
  339 + return(res)
  340 +}
  341 +
  342 +############This is draw pairs percent on the graph.
  343 +AverageNet11 = function(pathname = "/home/zgong001/Documents/Projects/Luminal/3/SS/Pairs-A3-Luminal A-A-B.xlsx",
  344 + str = "",
  345 + outfile = "/home/zgong001/Documents/dot/out.gv"){
  346 +
  347 + library(xlsx)
  348 + library(bnlearn)
  349 +
  350 + res <- read.xlsx(pathname, 1) # read first sheet
  351 + res = na.omit(res)
  352 +
  353 + strD = model2network(as.character(str))
  354 + arcD = arcs(strD)
  355 + nodeD = nodes(strD)
  356 + arNo = unique(union(arcD[,1], arcD[,2]))
  357 + nod = setdiff(nodeD, arNo)
  358 +
  359 + cat("digraph G {", file=outfile, sep="\n")
  360 +
  361 + if (length(nod) > 0){
  362 + for(j in 1:length(nod)){
  363 + cat("\t", nod[j], "\n", file=outfile, append=TRUE)
  364 + }
  365 + }
  366 +
  367 + p = nrow(arcD)
  368 +
  369 + for(i in 1:p){
  370 +
  371 + mn = paste(arcD[i,1],"->" , arcD[i,2], sep = "")
  372 + mn2 = paste(arcD[i,2],"->" , arcD[i,1], sep = "")
  373 + s1 = res[which(res[,1]==mn),2]
  374 + s2 = res[which(res[,1]==mn2),2]
  375 +
  376 + if((s1>=0.9999)&(s2<=0.0001)){
  377 +
  378 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', ">99", " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  379 +
  380 + } else if (s1>=0.9999) {
  381 +
  382 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', ">99", " (", round(s2*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  383 +
  384 + } else if (s2<=0.0001) {
  385 +
  386 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', round(s1*100 ,digits = 2), " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  387 +
  388 + } else {
  389 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', round(s1*100 ,digits = 2), " (", round(s2*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  390 + }
  391 +
  392 +
  393 + }
  394 +
  395 + cat("\t", 'label="', ' ', "\\l", "Notes:", "\\l", " - >99 represents percentage of 99.99 and above", "\\l", " - ~0 represents percentage of 0.01 and below", "\\l", " - Percentage in the parentheses represents the percentage of an arc that is reversed", '\\l"', ";", "\n", file=outfile, append=TRUE)
  396 + cat("}", file=outfile, append=TRUE)
  397 + return(res)
  398 +}
  399 +
  400 +ExeAve = function(epathname = "/home/zgong001/Documents/Projects/Luminal/3/SS/Pairs-A3-Luminal A-A-B.xlsx",
  401 + strfile = "",
  402 + outfile = "/home/zgong001/Documents/dot/"){
  403 + library(xlsx)
  404 + strs = read.xlsx(strfile, sheetName = "Structures") # read first sheet
  405 + strs = na.omit(strs)
  406 + strs = as.data.frame(strs[1:2,1])
  407 +
  408 +
  409 + for(i in 1:2){
  410 + oun = paste(outfile, "-", i, ".gv", sep = "")
  411 + y = AverageNet11(pathname = epathname,
  412 + str = as.character(strs[i,1]),
  413 + outfile = oun)
  414 + }
  415 +
  416 + return (strs)
  417 +
  418 +}
  419 +
  420 +##################################################################################################################
  421 +# The following is for the Consensus version 2
  422 +AverageNet21 = function(pathname = "/home/zgong001/Documents/Projects/Luminal/3/SS/Pairs-A3-Luminal A-A-B.xlsx",
  423 + str = "",
  424 + outfile = "/home/zgong001/Documents/dot/out.gv"){
  425 +
  426 + library(xlsx)
  427 + library(bnlearn)
  428 +
  429 + res <- read.xlsx(pathname, 1) # read first sheet
  430 + res = na.omit(res)
  431 +
  432 +
  433 +
  434 + strD = model2network(as.character(str))
  435 + arcD = arcs(strD)
  436 + nodeD = nodes(strD)
  437 + arNo = unique(union(arcD[,1], arcD[,2]))
  438 + nod = setdiff(nodeD, arNo)
  439 +
  440 + cat("digraph G {", file=outfile, sep="\n")
  441 +
  442 + if (length(nod) > 0){
  443 + for(j in 1:length(nod)){
  444 + cat("\t", nod[j], "\n", file=outfile, append=TRUE)
  445 + }
  446 + }
  447 +
  448 + p = nrow(arcD)
  449 +
  450 + for(i in 1:p){
  451 +
  452 + mn = paste(arcD[i,1],"->" , arcD[i,2], sep = "")
  453 + mn2 = paste(arcD[i,2],"->" , arcD[i,1], sep = "")
  454 + s1 = res[which(res[,1]==mn),2]
  455 + s2 = res[which(res[,1]==mn2),2]
  456 +
  457 + if((s1>=0.9999)&(s2<=0.0001)){
  458 +
  459 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', ">99", " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  460 +
  461 + } else if (s1>=0.9999) {
  462 +
  463 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', ">99", " (", round(s2*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  464 +
  465 + } else if (s2<=0.0001) {
  466 +
  467 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', round(s1*100 ,digits = 2), " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  468 +
  469 + } else {
  470 +
  471 + cat("\t", mn, " [penwidth=", round(s1*10, digits = 2), ",", 'label="', round(s1*100 ,digits = 2), " (", round(s2*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  472 +
  473 + }
  474 +
  475 +
  476 +
  477 + }
  478 +
  479 + NoNA = data.frame()
  480 +
  481 + arcD = as.data.frame(arcD)
  482 + co= combn(nodeD,2)
  483 +
  484 +
  485 + for(j in 1:ncol(co)){
  486 + X = co[1,j]
  487 + Y = co[2,j]
  488 +
  489 + pair_find1 = data.frame(from= X, to=Y)
  490 + pair_find2 = data.frame(from= Y, to=X)
  491 +
  492 + if((nrow(merge(pair_find1,arcD )) == 0) & (nrow(merge(pair_find2,arcD )) == 0)){
  493 +
  494 + TX = paste(X,"->", Y, sep = "")
  495 + TY = paste(Y,"->", X, sep = "")
  496 + t1 = res[which(res[,1]==TX),2]
  497 + t2 = res[which(res[,1]==TY),2]
  498 + if(t1>t2){
  499 +
  500 + if((t1>=0.9999)&(t2<=0.0001)){
  501 +
  502 + cat("\t", TX, " [penwidth=", round(t1*10, digits = 2), ",", 'label="', ">99", " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  503 +
  504 + } else if (t1>=0.9999) {
  505 +
  506 + cat("\t", TX, " [penwidth=", round(t1*10, digits = 2), ",", 'label="', ">99", " (", round(t2*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  507 +
  508 + } else if (t2<=0.0001) {
  509 +
  510 + cat("\t", TX, " [penwidth=", round(t1*10, digits = 2), ",", 'label="', round(t1*100 ,digits = 2), " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  511 +
  512 + } else {
  513 +
  514 + cat("\t", TX, " [penwidth=", round(t1*10, digits = 2), ",", 'label="', round(t1*100 ,digits = 2), " (", round(t2*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  515 +
  516 + }
  517 +
  518 + }
  519 + else if (t2 >t1){
  520 +
  521 + if((t2>=0.9999)&(t1<=0.0001)){
  522 +
  523 + cat("\t", TY, " [penwidth=", round(t2*10, digits = 2), ",", 'label="', ">99", " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  524 +
  525 + } else if (t2>=0.9999) {
  526 +
  527 + cat("\t", TY, " [penwidth=", round(t2*10, digits = 2), ",", 'label="', ">99", " (", round(t1*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  528 +
  529 + } else if (t1<=0.0001) {
  530 +
  531 + cat("\t", TY, " [penwidth=", round(t2*10, digits = 2), ",", 'label="', round(t2*100 ,digits = 2), " (", "~0",')"', "];", "\n", file=outfile, append=TRUE)
  532 +
  533 + } else {
  534 +
  535 + cat("\t", TY, " [penwidth=", round(t2*10, digits = 2), ",", 'label="', round(t2*100 ,digits = 2), " (", round(t1*100,digits = 2),')"', "];", "\n", file=outfile, append=TRUE)
  536 +
  537 + }
  538 +
  539 + }
  540 + else{
  541 + next
  542 + }
  543 +
  544 +
  545 + }
  546 +
  547 +
  548 + }
  549 +
  550 + cat("\t", 'label="', ' ', "\\l", "Notes:", "\\l", " - >99 represents percentage of 99.99 and above", "\\l", " - ~0 represents percentage of 0.01 and below", "\\l", " - Percentage in the parentheses represents the percentage of an arc that is reversed", '\\l"', ";", "\n", file=outfile, append=TRUE)
  551 + cat("}", file=outfile, append=TRUE)
  552 + return(arcD)
  553 +}
0 554 \ No newline at end of file