Zhenghua Gong / Structures and MB

Browse Code »

Commit 6629da35df7d8263eb33c7af8d80fd510c683aed

Authored by Zhenghua Gong 2018-07-06 16:59:18 -0400

1 parent 43faf26b77

Exists in master

Summary of the orders

Showing 1 changed file with 430 additions and 0 deletions Show diff stats

Summary.R

Diff comments View file @ 6629da3

File was created	1	#source("/home/zgong001/Documents/Alarm/D50C9v/RCode/CombineOrders.R")
	2	getRelation =
	3	function(st = "[6][8][5][7\|6:8:5][1\|7][3\|7:1][4\|1][2\|4][0\|2]"){
	4	re = c()
	5	temstr = substr(st, 2, nchar(st)-1)
	6	stlist = strsplit(chartr(old = "][", new = "##", temstr), "##")
	7	for(i in 1:length(stlist[[1]])){
	8	temc = chartr(old = "\|", new = "g", stlist[[1]][i])
	9
	10	if (grepl("g", temc)){
	11	temc2 = strsplit(temc, "g")
	12
	13	X = temc2[[1]][1]
	14	TY = strsplit(temc2[[1]][2], ":")
	15	for(j in 1:length(TY[[1]])){
	16	Y = TY[[1]][j]
	17
	18	cc= paste(Y,"->", X, sep = "")
	19	re = append(re, cc)
	20	}
	21	}
	22	}
	23	return(re)
	24	}
	25
	26	creatDataframe =
	27	function(v = c("0", "1","2","3","4","5","6","7","8"), r = 246){
	28	cn = c()
	29	co= combn(v,2)
	30	for(i in 1:ncol(co)){
	31	X = co[1,i]
	32	Y = co[2,i]
	33	c1 = paste(X, "->", Y, sep = "")
	34	cn = append(cn,c1)
	35	c2 = paste(Y, "->", X, sep = "")
	36	cn = append(cn,c2)
	37	c3 = paste(X, " NA ", Y, sep = "")
	38	cn = append(cn,c3)
	39	}
	40
	41	b= ncol(co)
	42
	43	re = data.frame(matrix(0, nrow=r, ncol=b*3))
	44	colnames(re) = cn
	45
	46	return(re)
	47	}
	48
	49
	50	sumTable =
	51	function (pathname = "/home/zgong001/Documents/Alarm/D1KC9v/D1KC9v BestOrders/Strus_D1KC9v.txt",
	52	v = c("0", "1","2","3","4","5","6","7","8"),
	53	exfilename = "/home/zgong001/Documents/Alarm/D1KC9v/D1KC9v BestOrders/Strus_D1KC9v.xlsx",
	54	xlsxfile = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9vRS.xlsx"){
	55	library(bnlearn)
	56	library(xlsx)
	57	library(dplyr)
	58	suminput = read.table(pathname, header = FALSE)
	59	sumResu = data.frame()
	60	sumAnce = data.frame()
	61	suminput$ord = paste(suminput$V1,suminput$V2,suminput$V3,suminput$V4,suminput$V5,suminput$V6,suminput$V7,suminput$V8,suminput$V9, sep = " ")
	62	suminput = suminput[-c(1,2,3,4,5,6,7,8,9,13)]
	63	colnames(suminput)[1] <- "OrderScores"
	64	colnames(suminput)[2] <- "Structures"
	65	colnames(suminput)[3] <- "StructScores"
	66
	67	orders= aggregate( OrderScores ~ord, data=suminput, FUN = mean)
	68	orders = orders[order(-orders[,2]), ]
	69	for(i in 1:nrow(orders)){
	70	orders$Opercentage[i] = exp(-log(sum(exp(orders$OrderScores-orders$OrderScores[i]))))
	71	}
	72
	73	orders$Ocumper = cumsum(orders$Opercentage)
	74
	75	for(i in 1:nrow(orders)){
	76	tem = suminput[which(suminput$ord== orders[i,1]), ]
	77
	78	tem$Opercentage = orders[i,3]
	79	tem$Ocumper = orders[i,4]
	80
	81	for(j in 1:nrow(tem)){
	82	tem$Spercentage[j] = exp(-log(sum(exp(tem$StructScores-tem$StructScores[j]))))
	83	}
	84	tem$Scumper = cumsum(tem$Spercentage)
	85	tem = tem[c("ord", "OrderScores", "Opercentage","Ocumper", "Structures", "StructScores","Spercentage","Scumper")]
	86	sumResu = rbind(sumResu, tem)
	87	}
	88
	89	rs = nrow(sumResu)
	90
	91	rl = creatDataframe(v,rs)
	92	sumResu= cbind(sumResu, rl)
	93
	94	sumAnce = sumResu
	95
	96	co= combn(v,2)
	97
	98	for(i in 1:nrow(sumResu)){
	99	s = as.character(sumResu [i,5])
	100	rel = getRelation(st = s)
	101
	102	for(j in 1:ncol(co)){
	103	X = co[1,j]
	104	Y = co[2,j]
	105	c1 = paste(X, "->", Y, sep = "")
	106	c2 = paste(Y, "->", X, sep = "")
	107	c3 = paste(X, " NA ", Y, sep = "")
	108
	109	if (c1 %in% rel){
	110	sumResu[i,c1] = sumResu$Opercentage[i]*sumResu$Spercentage[i]
	111	}
	112	else if(c2 %in% rel){
	113	sumResu[i,c2] = sumResu$Opercentage[i]*sumResu$Spercentage[i]
	114	}
	115	else{
	116	sumResu[i,c3] = sumResu$Opercentage[i]*sumResu$Spercentage[i]
	117	}
	118
	119	}
	120
	121	}
	122
	123	for(i in 1:nrow(sumAnce)){
	124	s = as.character(sumAnce [i,5])
	125	dags = model2network(s)
	126
	127	for(j in 1:ncol(co)){
	128	X = co[1,j]
	129	Y = co[2,j]
	130	c1 = paste(X, "->", Y, sep = "")
	131	c2 = paste(Y, "->", X, sep = "")
	132	c3 = paste(X, " NA ", Y, sep = "")
	133
	134	pXY = path(dags, from = X, to = Y)
	135	pYX = path(dags, from = Y, to = X)
	136
	137	if (pXY){
	138	sumAnce[i,c1] = sumAnce$Opercentage[i]*sumAnce$Spercentage[i]
	139	}
	140	else if(pYX){
	141	sumAnce[i,c2] = sumAnce$Opercentage[i]*sumAnce$Spercentage[i]
	142	}
	143	else{
	144	sumAnce[i,c3] = sumAnce$Opercentage[i]*sumAnce$Spercentage[i]
	145	}
	146
	147	}
	148
	149	}
	150
	151
	152
	153	sumP = colSums(sumResu[, -c(1:8)])
	154	sumA = colSums(sumAnce[, -c(1:8)])
	155
	156	reP = read.xlsx(xlsxfile, sheetName = "Sheet1", colIndex = 1:3)
	157	reA = read.xlsx(xlsxfile, sheetName = "Sheet2", colIndex = 1:3)
	158
	159	sP = cbind(reP, sumP, diffT = abs(sumP-reP[,2]), diffB = abs(sumP-reP[,3]))
	160	sA = cbind(reA, sumA, diffT = abs(sumA-reA[,2]), diffB = abs(sumA-reA[,3]))
	161
	162	distanceP = colSums(sP[,c(5,6)])
	163	distanceA = colSums(sA[,c(5,6)])
	164
	165
	166
	167	write.xlsx(sumResu, exfilename, sheetName = "Parents Detail", col.names = TRUE, row.names = FALSE, append = FALSE)
	168	write.xlsx(sP, exfilename, sheetName = "Parents Summary", col.names = TRUE, row.names = TRUE, append = TRUE)
	169	write.xlsx(distanceP, exfilename, sheetName = "Parents Distance", col.names = TRUE, row.names = TRUE, append = TRUE)
	170	write.xlsx(sumAnce, exfilename, sheetName = "Ancestor Detail", col.names = TRUE, row.names = FALSE, append = TRUE)
	171	write.xlsx(sA, exfilename, sheetName = "Ancestor Summary", col.names = TRUE, row.names = TRUE, append = TRUE)
	172	write.xlsx(distanceA, exfilename, sheetName = "Ancestor Distance", col.names = TRUE, row.names = TRUE, append = TRUE)
	173
	174
	175	# return(sP)
	176
	177	}
	178
	179	#---------------------------------------------------------------------------------
	180	sortStru = function(st = "[5][6][8][7\|5:8][1\|5:7][3\|7:8][2\|6:1][4\|1][0\|2]"){
	181	library(dplyr)
	182	re = ""
	183	temstr = substr(st, 2, nchar(st)-1)
	184	stlist = strsplit(chartr(old = "][", new = "##", temstr), "##")
	185	stlist = sort(stlist[[1]])
	186	for(i in 1:length(stlist)){
	187	if(nchar(stlist[i])>3){
	188
	189	temc = chartr(old = "\|", new = "g", stlist[i])
	190
	191	temc2 = strsplit(temc, "g")
	192	X = temc2[[1]][1]
	193	TY = strsplit(temc2[[1]][2], ":")
	194	TY = sort(TY[[1]])
	195
	196	TY2 = paste(TY, collapse = ":")
	197
	198	stlist[i] = paste(X,"\|", TY2, sep = "")
	199
	200	stlist[i] <- TY %>%
	201	paste(., collapse=":") %>%
	202	paste(X, "\|", ., sep = "")
	203
	204	}
	205
	206	}
	207
	208	re <- stlist %>%
	209	paste(., collapse="][") %>%
	210	paste("[", ., "]", sep = "")
	211
	212	return(re)
	213	}
	214
	215	MB =
	216	function (struc ="[6][8][5][7\|6:8:5][3\|7][1\|7:3][4\|1][2\|4][0\|2]", v ="1"){
	217
	218	library(xlsx)
	219	library(bnlearn)
	220	library(dplyr)
	221
	222
	223	mbs = ""
	224
	225	strutem <- struc %>%
	226	as.character(.) %>%
	227	substr(., 2, nchar(.)-1)
	228
	229	stru = strsplit(chartr(old = "][", new = "##", strutem), "##")
	230
	231	for(j in 1:length(stru[[1]])){
	232	if(grepl(v,stru[[1]][j])){
	233	mbs = paste(mbs, "[", stru[[1]][j], "]", sep = "")
	234	}
	235	}
	236	return(mbs)
	237	}
	238
	239	sumBlanket2 =
	240	function (xlsxfile = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2-newswap-0.5NONE-2-1h/Strus_D50S9v2_E30r3_o.xlsx",
	241	xlsxfile2 = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9vRS.xlsx"){
	242
	243	library(xlsx)
	244	library(bnlearn)
	245	library(dplyr)
	246
	247	re = read.xlsx(xlsxfile, sheetName = "Parents Detail", colIndex = 1:8)
	248	re = na.omit(re)
	249
	250	colnames(re) = c("Order", "OrderScore", "OP", "OPC", "Structures", "StructureScore", "SP", "SPC")
	251
	252	truDis = vector()
	253	besDis = vector()
	254
	255	for(l in 1:9){
	256	a = l*4-3
	257	b = l*4-1
	258	n = as.character(l-1)
	259
	260	rebm = read.xlsx(xlsxfile2, sheetName = "Sheet3", colIndex = a:b)
	261	rebm = na.omit(rebm)
	262	colnames(rebm) = c("LableM", "TrueB", "BestB")
	263
	264	MBS = vector(mode="character", length=0)
	265	for(i in 1:nrow(re)){
	266
	267	mbt = MB(struc = re[i,5], v = n)
	268	st = sortStru(as.character(mbt))
	269	MBS = append(MBS,st)
	270
	271	}
	272
	273	blankScore = re$OP*re$SP
	274	REE = cbind(re, MBS, blankScore)
	275
	276	ag = aggregate(blankScore ~ MBS, data = REE, sum )
	277
	278
	279	truedist = 0
	280
	281
	282	if(rebm[1,1] %in% ag[,1]){
	283	tem = ag
	284	tem[which(tem$MBS == as.character(rebm[1,1])),2] = abs(1-tem[which(tem$MBS == as.character(rebm[1,1])),2])
	285	truedist = sum(tem[,2])
	286	}
	287	else{
	288	truedist = 2
	289	}
	290
	291	truDis = append(truDis, truedist)
	292
	293
	294
	295	bestdist = 0
	296
	297	tem2 = rebm[-1,1]
	298	tem3 = ag[,1]
	299	tt = vector()
	300
	301	di1 = setdiff(tem2, tem3)
	302	di2 = setdiff(tem3, tem2)
	303
	304	if(length(di1) == 0 & length(di2) == 0){
	305	temm = ag[order(ag[,1]),]
	306	temm1 =rebm[-1,c(1,3)]
	307
	308	temmm11= temm1[order(temm1[,1]),]
	309
	310	temm[,2] = abs(temmm11[,2] - temm[,2])
	311
	312	bestdist = sum(temm[,2])
	313
	314	}
	315	else if(length(di1) == 0){
	316
	317	temm = ag[which(ag[,1] %in% di2),]
	318	temm2 =rebm[-1,c(1,3)]
	319	temm1 = ag[which(! ag[,1] %in% di2),]
	320
	321	temmm21= temm1[order(temm1[,1]),]
	322	temmm22= temm2[order(temm2[,1]),]
	323	temmm21[,2] = abs(temmm21[,2] - temmm22[,2])
	324	temm=rbind(temm,temmm21)
	325
	326
	327	bestdist = sum(temm[,2])
	328
	329	}else if(length(di2) == 0){
	330
	331	temm2 =rebm[-1,c(1,3)]
	332
	333	temm = temm2[which(temm2[,1] %in% di1),]
	334
	335	temm1 = temm2[which(! temm2[,1] %in% di1),]
	336
	337
	338	temmm21= temm1[order(temm1[,1]),]
	339	temmm22= ag[order(ag[,1]),]
	340
	341
	342	temmm21[,2] = abs(temmm21[,2] - temmm22[,2])
	343	temm=rbind(temm,temmm21)
	344
	345
	346	bestdist = sum(temm[,2])
	347
	348
	349	}else{
	350	comm = intersect(tem2, tem3)
	351
	352
	353	temmm1 = rebm[which(rebm$LableM %in% comm),c(1,3)]
	354	temmm12= temmm1[order(temmm1[,1]),]
	355	temmm2 = ag[which(ag[,1] %in% comm),]
	356	temmm22= temmm2[order(temmm2[,1]),]
	357	temmm12[,2] = abs(temmm12[,2] - temmm22[,2])
	358	temt = rbind(temmm12, rebm[which(rebm$LableM %in% di1),c(1,3)])
	359
	360	temt = rbind(data.frame(MBS = temt[,1], blankScore =temt[,2] ), ag[which(ag[,1] %in% di2),])
	361	bestdist = sum(temt[,2])
	362	}
	363
	364	besDis = append(besDis, bestdist)
	365
	366
	367	}
	368
	369	td = sum(truDis)
	370	bd = sum(besDis)
	371
	372	write.xlsx(data.frame("True dis"= td, "Best dis" = bd), xlsxfile, sheetName = "Blanket Distance", col.names = TRUE, row.names = FALSE, append = TRUE)
	373
	374	return(re)
	375	}
	376
	377	processFile = function(filepath = "/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2-newswap-0.5NONE-1h/D50S9v2-E15-1hr1.txt") {
	378	re = data.frame()
	379	li = vector()
	380	con = file(filepath, "r")
	381	while ( TRUE ) {
	382	line = readLines(con, n = 1)
	383	if ( length(line) == 0 ) {
	384	break
	385	}
	386	li = append(li, line)
	387	}
	388	close(con)
	389
	390	for(i in 1:length(li)){
	391	s = strsplit(li[i], " ")
	392	b = strsplit(s[[1]][2], " ")
	393	l = data.frame(st=s[[1]][1], sc= as.numeric(as.character(b[[1]][1])))
	394	re = rbind(re, l)
	395	}
	396
	397	re = na.omit(re)
	398
	399	re = unique(re)
	400	re = re[order(-re[,2]), ]
	401
	402	m = re[1,2]
	403
	404	n=0
	405
	406	for(j in 2:nrow(re) ){
	407	s0 = re[j,2]
	408
	409	te = -log(exp(s0-m)+1)
	410
	411	m = m - te;
	412
	413	if(exp(te) >= 0.99){
	414	break
	415	}
	416	n = j
	417	}
	418
	419	ree = re[1:n,]
	420
	421	# sink("/home/zgong001/Documents/Alarm/D50S9v2/D50S9v2-newswap-0.5NONE-1h/myfile.txt", append=TRUE, split=TRUE)
	422
	423	write.table("------", filepath, sep="\t", row.names=F, col.names=F, append=TRUE)
	424
	425	write.table(ree, filepath, sep="\t", row.names=F, col.names=F,append=TRUE)
	426
	427	return(ree)
	428
	429
	430	}
	431