From 6629da35df7d8263eb33c7af8d80fd510c683aed Mon Sep 17 00:00:00 2001 From: Zhenghua Gong Date: Fri, 6 Jul 2018 16:59:18 -0400 Subject: [PATCH] Summary of the orders --- Summary.R | 430 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 430 insertions(+) create mode 100644 Summary.R diff --git a/Summary.R b/Summary.R new file mode 100644 index 0000000..ed71726 --- /dev/null +++ b/Summary.R @@ -0,0 +1,430 @@ +#source("/home/zgong001/Documents/Alarm/D50C9v/RCode/CombineOrders.R") +getRelation = + function(st = "[6][8][5][7|6:8:5][1|7][3|7:1][4|1][2|4][0|2]"){ + re = c() + temstr = substr(st, 2, nchar(st)-1) + stlist = strsplit(chartr(old = "][", new = "##", temstr), "##") + for(i in 1:length(stlist[[1]])){ + temc = chartr(old = "|", new = "g", stlist[[1]][i]) + + if (grepl("g", temc)){ + temc2 = strsplit(temc, "g") + + X = temc2[[1]][1] + TY = strsplit(temc2[[1]][2], ":") + for(j in 1:length(TY[[1]])){ + Y = TY[[1]][j] + + cc= paste(Y,"->", X, sep = "") + re = append(re, cc) + } + } + } + return(re) + } + +creatDataframe = + function(v = c("0", "1","2","3","4","5","6","7","8"), r = 246){ + cn = c() + co= combn(v,2) + for(i in 1:ncol(co)){ + X = co[1,i] + Y = co[2,i] + c1 = paste(X, "->", Y, sep = "") + cn = append(cn,c1) + c2 = paste(Y, "->", X, sep = "") + cn = append(cn,c2) + c3 = paste(X, " NA ", Y, sep = "") + cn = append(cn,c3) + } + + b= ncol(co) + + re = data.frame(matrix(0, nrow=r, ncol=b*3)) + colnames(re) = cn + + return(re) + } + + +sumTable = + function (pathname = "/home/zgong001/Documents/Alarm/D1KC9v/D1KC9v BestOrders/Strus_D1KC9v.txt", + v = c("0", "1","2","3","4","5","6","7","8"), + exfilename = "/home/zgong001/Documents/Alarm/D1KC9v/D1KC9v BestOrders/Strus_D1KC9v.xlsx", + xlsxfile = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9vRS.xlsx"){ + library(bnlearn) + library(xlsx) + library(dplyr) + suminput = read.table(pathname, header = FALSE) + sumResu = data.frame() + sumAnce = data.frame() + suminput$ord = paste(suminput$V1,suminput$V2,suminput$V3,suminput$V4,suminput$V5,suminput$V6,suminput$V7,suminput$V8,suminput$V9, sep = " ") + suminput = suminput[-c(1,2,3,4,5,6,7,8,9,13)] + colnames(suminput)[1] <- "OrderScores" + colnames(suminput)[2] <- "Structures" + colnames(suminput)[3] <- "StructScores" + + orders= aggregate( OrderScores ~ord, data=suminput, FUN = mean) + orders = orders[order(-orders[,2]), ] + for(i in 1:nrow(orders)){ + orders$Opercentage[i] = exp(-log(sum(exp(orders$OrderScores-orders$OrderScores[i])))) + } + + orders$Ocumper = cumsum(orders$Opercentage) + + for(i in 1:nrow(orders)){ + tem = suminput[which(suminput$ord== orders[i,1]), ] + + tem$Opercentage = orders[i,3] + tem$Ocumper = orders[i,4] + + for(j in 1:nrow(tem)){ + tem$Spercentage[j] = exp(-log(sum(exp(tem$StructScores-tem$StructScores[j])))) + } + tem$Scumper = cumsum(tem$Spercentage) + tem = tem[c("ord", "OrderScores", "Opercentage","Ocumper", "Structures", "StructScores","Spercentage","Scumper")] + sumResu = rbind(sumResu, tem) + } + + rs = nrow(sumResu) + + rl = creatDataframe(v,rs) + sumResu= cbind(sumResu, rl) + + sumAnce = sumResu + + co= combn(v,2) + + for(i in 1:nrow(sumResu)){ + s = as.character(sumResu [i,5]) + rel = getRelation(st = s) + + for(j in 1:ncol(co)){ + X = co[1,j] + Y = co[2,j] + c1 = paste(X, "->", Y, sep = "") + c2 = paste(Y, "->", X, sep = "") + c3 = paste(X, " NA ", Y, sep = "") + + if (c1 %in% rel){ + sumResu[i,c1] = sumResu$Opercentage[i]*sumResu$Spercentage[i] + } + else if(c2 %in% rel){ + sumResu[i,c2] = sumResu$Opercentage[i]*sumResu$Spercentage[i] + } + else{ + sumResu[i,c3] = sumResu$Opercentage[i]*sumResu$Spercentage[i] + } + + } + + } + + for(i in 1:nrow(sumAnce)){ + s = as.character(sumAnce [i,5]) + dags = model2network(s) + + for(j in 1:ncol(co)){ + X = co[1,j] + Y = co[2,j] + c1 = paste(X, "->", Y, sep = "") + c2 = paste(Y, "->", X, sep = "") + c3 = paste(X, " NA ", Y, sep = "") + + pXY = path(dags, from = X, to = Y) + pYX = path(dags, from = Y, to = X) + + if (pXY){ + sumAnce[i,c1] = sumAnce$Opercentage[i]*sumAnce$Spercentage[i] + } + else if(pYX){ + sumAnce[i,c2] = sumAnce$Opercentage[i]*sumAnce$Spercentage[i] + } + else{ + sumAnce[i,c3] = sumAnce$Opercentage[i]*sumAnce$Spercentage[i] + } + + } + + } + + + + sumP = colSums(sumResu[, -c(1:8)]) + sumA = colSums(sumAnce[, -c(1:8)]) + + reP = read.xlsx(xlsxfile, sheetName = "Sheet1", colIndex = 1:3) + reA = read.xlsx(xlsxfile, sheetName = "Sheet2", colIndex = 1:3) + + sP = cbind(reP, sumP, diffT = abs(sumP-reP[,2]), diffB = abs(sumP-reP[,3])) + sA = cbind(reA, sumA, diffT = abs(sumA-reA[,2]), diffB = abs(sumA-reA[,3])) + + distanceP = colSums(sP[,c(5,6)]) + distanceA = colSums(sA[,c(5,6)]) + + + + write.xlsx(sumResu, exfilename, sheetName = "Parents Detail", col.names = TRUE, row.names = FALSE, append = FALSE) + write.xlsx(sP, exfilename, sheetName = "Parents Summary", col.names = TRUE, row.names = TRUE, append = TRUE) + write.xlsx(distanceP, exfilename, sheetName = "Parents Distance", col.names = TRUE, row.names = TRUE, append = TRUE) + write.xlsx(sumAnce, exfilename, sheetName = "Ancestor Detail", col.names = TRUE, row.names = FALSE, append = TRUE) + write.xlsx(sA, exfilename, sheetName = "Ancestor Summary", col.names = TRUE, row.names = TRUE, append = TRUE) + write.xlsx(distanceA, exfilename, sheetName = "Ancestor Distance", col.names = TRUE, row.names = TRUE, append = TRUE) + + + # return(sP) + + } + +#--------------------------------------------------------------------------------- +sortStru = function(st = "[5][6][8][7|5:8][1|5:7][3|7:8][2|6:1][4|1][0|2]"){ + library(dplyr) + re = "" + temstr = substr(st, 2, nchar(st)-1) + stlist = strsplit(chartr(old = "][", new = "##", temstr), "##") + stlist = sort(stlist[[1]]) + for(i in 1:length(stlist)){ + if(nchar(stlist[i])>3){ + + temc = chartr(old = "|", new = "g", stlist[i]) + + temc2 = strsplit(temc, "g") + X = temc2[[1]][1] + TY = strsplit(temc2[[1]][2], ":") + TY = sort(TY[[1]]) + + TY2 = paste(TY, collapse = ":") + + stlist[i] = paste(X,"|", TY2, sep = "") + + stlist[i] <- TY %>% + paste(., collapse=":") %>% + paste(X, "|", ., sep = "") + + } + + } + + re <- stlist %>% + paste(., collapse="][") %>% + paste("[", ., "]", sep = "") + + return(re) +} + +MB = + function (struc ="[6][8][5][7|6:8:5][3|7][1|7:3][4|1][2|4][0|2]", v ="1"){ + + library(xlsx) + library(bnlearn) + library(dplyr) + + + mbs = "" + + strutem <- struc %>% + as.character(.) %>% + substr(., 2, nchar(.)-1) + + stru = strsplit(chartr(old = "][", new = "##", strutem), "##") + + for(j in 1:length(stru[[1]])){ + if(grepl(v,stru[[1]][j])){ + mbs = paste(mbs, "[", stru[[1]][j], "]", sep = "") + } + } + return(mbs) + } + +sumBlanket2 = + function (xlsxfile = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2-newswap-0.5NONE-2-1h/Strus_D50S9v2_E30r3_o.xlsx", + xlsxfile2 = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9vRS.xlsx"){ + + library(xlsx) + library(bnlearn) + library(dplyr) + + re = read.xlsx(xlsxfile, sheetName = "Parents Detail", colIndex = 1:8) + re = na.omit(re) + + colnames(re) = c("Order", "OrderScore", "OP", "OPC", "Structures", "StructureScore", "SP", "SPC") + + truDis = vector() + besDis = vector() + + for(l in 1:9){ + a = l*4-3 + b = l*4-1 + n = as.character(l-1) + + rebm = read.xlsx(xlsxfile2, sheetName = "Sheet3", colIndex = a:b) + rebm = na.omit(rebm) + colnames(rebm) = c("LableM", "TrueB", "BestB") + + MBS = vector(mode="character", length=0) + for(i in 1:nrow(re)){ + + mbt = MB(struc = re[i,5], v = n) + st = sortStru(as.character(mbt)) + MBS = append(MBS,st) + + } + + blankScore = re$OP*re$SP + REE = cbind(re, MBS, blankScore) + + ag = aggregate(blankScore ~ MBS, data = REE, sum ) + + + truedist = 0 + + + if(rebm[1,1] %in% ag[,1]){ + tem = ag + tem[which(tem$MBS == as.character(rebm[1,1])),2] = abs(1-tem[which(tem$MBS == as.character(rebm[1,1])),2]) + truedist = sum(tem[,2]) + } + else{ + truedist = 2 + } + + truDis = append(truDis, truedist) + + + + bestdist = 0 + + tem2 = rebm[-1,1] + tem3 = ag[,1] + tt = vector() + + di1 = setdiff(tem2, tem3) + di2 = setdiff(tem3, tem2) + + if(length(di1) == 0 & length(di2) == 0){ + temm = ag[order(ag[,1]),] + temm1 =rebm[-1,c(1,3)] + + temmm11= temm1[order(temm1[,1]),] + + temm[,2] = abs(temmm11[,2] - temm[,2]) + + bestdist = sum(temm[,2]) + + } + else if(length(di1) == 0){ + + temm = ag[which(ag[,1] %in% di2),] + temm2 =rebm[-1,c(1,3)] + temm1 = ag[which(! ag[,1] %in% di2),] + + temmm21= temm1[order(temm1[,1]),] + temmm22= temm2[order(temm2[,1]),] + temmm21[,2] = abs(temmm21[,2] - temmm22[,2]) + temm=rbind(temm,temmm21) + + + bestdist = sum(temm[,2]) + + }else if(length(di2) == 0){ + + temm2 =rebm[-1,c(1,3)] + + temm = temm2[which(temm2[,1] %in% di1),] + + temm1 = temm2[which(! temm2[,1] %in% di1),] + + + temmm21= temm1[order(temm1[,1]),] + temmm22= ag[order(ag[,1]),] + + + temmm21[,2] = abs(temmm21[,2] - temmm22[,2]) + temm=rbind(temm,temmm21) + + + bestdist = sum(temm[,2]) + + + }else{ + comm = intersect(tem2, tem3) + + + temmm1 = rebm[which(rebm$LableM %in% comm),c(1,3)] + temmm12= temmm1[order(temmm1[,1]),] + temmm2 = ag[which(ag[,1] %in% comm),] + temmm22= temmm2[order(temmm2[,1]),] + temmm12[,2] = abs(temmm12[,2] - temmm22[,2]) + temt = rbind(temmm12, rebm[which(rebm$LableM %in% di1),c(1,3)]) + + temt = rbind(data.frame(MBS = temt[,1], blankScore =temt[,2] ), ag[which(ag[,1] %in% di2),]) + bestdist = sum(temt[,2]) + } + + besDis = append(besDis, bestdist) + + + } + + td = sum(truDis) + bd = sum(besDis) + + write.xlsx(data.frame("True dis"= td, "Best dis" = bd), xlsxfile, sheetName = "Blanket Distance", col.names = TRUE, row.names = FALSE, append = TRUE) + + return(re) + } + +processFile = function(filepath = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2-newswap-0.5NONE-1h/D50S9v2-E15-1hr1.txt") { + re = data.frame() + li = vector() + con = file(filepath, "r") + while ( TRUE ) { + line = readLines(con, n = 1) + if ( length(line) == 0 ) { + break + } + li = append(li, line) + } + close(con) + + for(i in 1:length(li)){ + s = strsplit(li[i], " ") + b = strsplit(s[[1]][2], " ") + l = data.frame(st=s[[1]][1], sc= as.numeric(as.character(b[[1]][1]))) + re = rbind(re, l) + } + + re = na.omit(re) + + re = unique(re) + re = re[order(-re[,2]), ] + + m = re[1,2] + + n=0 + + for(j in 2:nrow(re) ){ + s0 = re[j,2] + + te = -log(exp(s0-m)+1) + + m = m - te; + + if(exp(te) >= 0.99){ + break + } + n = j + } + + ree = re[1:n,] + + # sink("/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2-newswap-0.5NONE-1h/myfile.txt", append=TRUE, split=TRUE) + + write.table("------", filepath, sep="\t", row.names=F, col.names=F, append=TRUE) + + write.table(ree, filepath, sep="\t", row.names=F, col.names=F,append=TRUE) + + return(ree) + + +} -- 2.29.0