Blame view

RCleanDscret.R 12.5 KB
234f89c9a   Efrain Gonzalez   Outputs raw and d...
1
  ##Posted 6/15/2017
c2b2c096e   Efrain Gonzalez   Update to amount ...
2
  options(digits = 11)
234f89c9a   Efrain Gonzalez   Outputs raw and d...
3

0eb342056   Efrain Gonzalez   This code combine...
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
  #Libraries required to run the code
  library(pryr)
  library(MASS)
  library(dplyr)
  library(tidyr)
  library(readr)
  library(stringr)
  
  
  #Necessary Functions
  #1#Function for handling the changing of row names and column names
  chngrownm <- function(mat){
  	row <- dim(mat)[1]
  	col <- dim(mat)[2]
  	j <- 1
  	x <- 1
  	p <- 1	
  	a <- 1 
  	b <- 1
  	g <- 1
  	for(j in 1:col){
  		if("!Sample_source_name_ch1"==mat[1,j]){
  			colnames(mat)[j] <- "Brain_Region"	
805474e1e   Efrain Gonzalez   Most recent updat...
27
  		} else if("!Sample_title" == mat[1,j]){
0eb342056   Efrain Gonzalez   This code combine...
28
  			colnames(mat)[j] <- "Title"
805474e1e   Efrain Gonzalez   Most recent updat...
29
  		} else if("!Sample_geo_accession" == mat[1,j]){
0eb342056   Efrain Gonzalez   This code combine...
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  			colnames(mat)[j] <- "ID_REF"
  		} else{
  			if(grepl("Sex|gender|Gender|sex",mat[2,j])==TRUE){
  				colnames(mat)[j] <- paste0("Sex",x)
  				x = x + 1
  			}
  			if(grepl("postmorteminterval|PMI|pmi",mat[2,j])==TRUE){
  				colnames(mat)[j] <- paste0("PMI",p)
  				p = p + 1
  			}
  			if(grepl("age|Age|AGE",mat[2,j])==TRUE){
  				colnames(mat)[j] <- paste0("Age",a)
  				a = a + 1
  			 }
  			if(grepl("braak|b&b",mat[2,j])==TRUE){
  				colnames(mat)[j] <- paste0("Braak",b)
  				b = b + 1
  			}
51d31a335   Efrain Gonzalez   Update (UNTESTED)
48
  			if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control|Normal",mat[2,j])==TRUE){
0eb342056   Efrain Gonzalez   This code combine...
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
  				colnames(mat)[j] <- paste0("Group",g)
  				g = g + 1
  			}
  			
  		}
  		j = j + 1
  	}
  	mat
  }			
  
  #2#Function for reorganizing information within the columns
  cinfo <- function(mat){
  	col <- dim(mat)[2]
  	j <-2
  	for(j in 2:col){
  		if(grepl("Group",colnames(mat)[j]) == TRUE){
  			mat[,j] <- gsub(".+:\\s|\\s.+;.+","",mat[,j])
  		}
  		if(grepl("Age",colnames(mat)[j])==TRUE){
  			mat[,j] <- gsub("\\D","",mat[,j])%>%
  				as.integer()
  		}
  		if(grepl("Sex",colnames(mat)[j])==TRUE){
  			mat[,j] <- gsub(".+:\\s","",mat[,j])
  		}
  		if(grepl("PMI",colnames(mat)[j])==TRUE){
  			mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>%
  				as.numeric() 
  		}
  		if(grepl("Braak",colnames(mat)[j])==TRUE){
  			mat[,j]<-gsub(".+:\\s","",mat[,j])%>%
  				as.roman()%>%
  				as.integer()
  		}
  	j=j+1
  	}
  	mat
  }
  
  #3#Function for labeling the gene IDs without names
  NAFIXING <- function(GIDNAM){
  	row <- dim(GIDNAM)[1]
  	i <- 1
  	for(i in 1:row){
  		if(grepl("^NA\\s*$",GIDNAM[i,2])==TRUE||is.na(GIDNAM[i,2])==TRUE){
  			GIDNAM[i,2] <- GIDNAM[i,1]
  		}
  		i <- i + 1
  	}
  	GIDNAM
  }	
  
  #4#Function for changing the gene ID to gene name
  cgeneID <- function(GeneName,DATA){
805474e1e   Efrain Gonzalez   Most recent updat...
103
104
105
106
107
108
109
110
111
112
113
114
  	nj <- t(GeneName)
  	nq <- t(DATA)
  	colGene <- dim(nj)[2]
  	colDATA <- dim(nq)[2]
  	j <- 1
  	for(j in 1:colDATA){
  		#where is that gene id located within the GPL file
  		chngreq <- grep(paste0("^",nq[1,j],"$"),nj[1,])
  		if(is.na(sum(chngreq))==FALSE){
  			if(sum(chngreq) > 0){
  			nq[1,j] <- gsub(paste0("^",nq[1,j],"$"),nj[2,chngreq],nq[1,j])
  			}
0eb342056   Efrain Gonzalez   This code combine...
115
  		}
805474e1e   Efrain Gonzalez   Most recent updat...
116
  		j <- j + 1
0eb342056   Efrain Gonzalez   This code combine...
117
  	}
805474e1e   Efrain Gonzalez   Most recent updat...
118
  	nq
0eb342056   Efrain Gonzalez   This code combine...
119
  }
805474e1e   Efrain Gonzalez   Most recent updat...
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
  #cgeneID <- function(GeneName,DATA){
  #    colGene <- dim(GeneName)[2]
  #     j <- 1
  #     for(j in 1:colGene){
  #	chngsreq <- grep(paste0("^",GeneName[1,j],"$"),DATA[1,])
  #	if(is.na(sum(chngsreq))==FALSE){
  #		if(sum(chngsreq) > 0){
  #			DATA[1,chngsreq] <- gsub(paste0("^",GeneName[1,j]),GeneName[2,j],DATA[1,chngsreq])
  #		}
  #	}
  #		#if(sum(chngsreq) > 0){
  #		##DATA[1,chngsreq] <- gsub(GeneName[1,j],GeneName[2,j],DATA[1,chngsreq])
  #		#DATA[1,chngsreq] <- gsub(paste0("^",GeneName[1,j]),GeneName[2,j],DATA[1,chngsreq])
  #		#}
  #	j = j+1
  #	}
  #	DATA
  #}
0eb342056   Efrain Gonzalez   This code combine...
138
139
140
141
142
143
144
  
  #5#Function for adjusting the gene names
  gcnames <- function(DiData,usecol=1){
  	nuruns <- dim(DiData)[2]
  	i = 1
  	nwnam <- rep("0",length.out=nuruns)
  	for(i in 1:nuruns){
62170e5f6   Efrain Gonzalez   Update to gcnames...
145
146
  		if(length(strsplit(colnames(DiData)[i],"///|//")[[1]]) >= usecol){
  			nwnam[i]=str_trim(strsplit(colnames(DiData)[i],"///|//")[[1]][usecol])
0eb342056   Efrain Gonzalez   This code combine...
147
  		} else{
62170e5f6   Efrain Gonzalez   Update to gcnames...
148
  			nwnam[i]=str_trim(strsplit(colnames(DiData)[i],"///|//")[[1]][1])
0eb342056   Efrain Gonzalez   This code combine...
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
  		}
  		
  	}
  	nwnam
  
  }
  
  #6# Function for discretizing the data 
  dndat <- function(NDATA){
  	rownd <- dim(NDATA)[1]
  	colnd <- dim(NDATA)[2]
  	DDATA <- matrix(0,nrow=rownd,ncol=colnd)
  	colnames(DDATA) <- colnames(NDATA)
  	i <- 1
  	for(i in 1:rownd){
  		j <- 1
  		for(j in 1:colnd){
  			if(is.na(NDATA[i,j])==FALSE){
  			
  				if(NDATA[i,j] < -1){
  					DDATA[i,j]=0L
805474e1e   Efrain Gonzalez   Most recent updat...
170
  				} else if(NDATA[i,j] > 1){
0eb342056   Efrain Gonzalez   This code combine...
171
  					DDATA[i,j]=2L
f2cad6d27   Efrain Gonzalez   Update to dndat f...
172
  				} else if(-1 <= NDATA[i,j] && NDATA[i,j] <= 1){
0eb342056   Efrain Gonzalez   This code combine...
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
  					DDATA[i,j]=1L
  				}
  			} else{
  				DDATA[i,j] = NDATA[i,j]
  			}
  			j = j + 1			
  		}
  		i = i + 1
  	}
  	DDATA
  }
  
  
  #The Rest of this code will be used every time you want to change a data set
  
  #Getting the series matrix file
  print("Choose the series matrix file that you want to Analyze")
  alz <- file.choose()
  
  #Getting the GPL file
  print("Choose the GPL file that correlates with the above series matrix file")
  genena <- file.choose()
  
  
  #Find out if it is a soft GPL file or not
  soft <- strsplit(genena,"[\\|/]") %>%
  	.[[1]] %>%
  	.[length(.)] %>%
  	grepl("soft|annot",.)
  
  #Working with the wordy part of the document
  alzword <- alz %>%
  	read_delim(delim ="\t",comment = "!Series",col_names = FALSE)%>%
  	filter(grepl("!Sample",X1))%>%
  	filter(!grepl("!Sample_contact",X1))
  
  ##Changing row names and column names:
  ALZWORD <- t(alzword)
  rownames(ALZWORD)=NULL
  colnames(ALZWORD) <- colnames(ALZWORD,do.NULL=FALSE)
  ALZWORD <- chngrownm(ALZWORD)[-1,]
  ALZWORD <- ALZWORD%>%
b2053f56b   Efrain Gonzalez   Updating handling...
215
  	as.data.frame(.,stringsAsFactors = FALSE)%>%
0eb342056   Efrain Gonzalez   This code combine...
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
  	dplyr::select(-starts_with("col"))
  
  ##Reorganizing information within the columns
  ALZWORDF <- cinfo(ALZWORD)
  
  
  #Working with Actual Data part of file
  alzdat <- alz %>% 
  	read_delim(delim="\t",col_names=TRUE,comment = "!",skip=1)
  ALZDAT <- t(alzdat[,-1])
  rownames(ALZDAT)=NULL
  
  ##Is there a clean version of the GPL file available?
  gplnum <- strsplit(genena,"[\\|/]") %>%
  	.[[1]] %>%
  	.[length(.)] %>%
  	gsub("\\D","",.)
  clfileex <- sum(grepl(paste0("Clean_GPL",gplnum),list.files()))
  if(clfileex >= 1){
  #use the clean version
  geneIDNam <-  paste0("Clean_GPL",gplnum,".txt") %>%
  	read_delim(delim="\t",col_names = c("ID","Symbol"), comment = "!")
805474e1e   Efrain Gonzalez   Most recent updat...
238
  } else if(clfileex == 0){
0eb342056   Efrain Gonzalez   This code combine...
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
  ##Lets Create a clean version
   
  ##Gene ID to Gene Name
  	if(soft == TRUE){
  		#Check to see if there is already a file containing information on soft files
  		fileex <- sum(grepl("GPL_ID_LOC.txt",list.files()))
  		if(fileex == 1){
  			#Check to see if this GPL soft file has been used before
  			IDF <- read_delim("GPL_ID_LOC.txt",delim = "\t",col_names = TRUE) %>%
  				.$GPL_FILE_NUM%>%
  				grepl(gplnum,.) %>%
  				sum()
  			if(IDF == 1){
  				IDLOCAL <- read_delim("GPL_ID_LOC.txt",delim = "\t",col_names = TRUE) %>%
  					.$GPL_FILE_NUM%>%
  					grep(gplnum,.)
  				idlocgpl <- read_delim("GPL_ID_LOC.txt",delim = "\t",col_names = TRUE) %>%
  					.$LOC_ID %>%
  					.[IDLOCAL]
  				geneIDNam <- genena %>%
  					read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>%
cb1063ceb   Efrain Gonzalez   Added "Gene symbo...
260
  					dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$|^Gene symbol$",colnames(.)))
805474e1e   Efrain Gonzalez   Most recent updat...
261
  			} else if(IDF == 0){
0eb342056   Efrain Gonzalez   This code combine...
262
263
264
265
266
267
268
269
270
271
  				#No information on this particular GPL file
  				idLOCGPL <- genena %>%
  					read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
  					t(.) %>%
  					grep("^ID\\s*$",.) %>%
  					-1
  				cbind(as.integer(gplnum),as.integer(idLOCGPL)) %>%
  					cat(file="GPL_ID_LOC.txt",sep = "\t", fill = TRUE, append = TRUE)
  				geneIDNam <- genena %>%
  					read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>%
cb1063ceb   Efrain Gonzalez   Added "Gene symbo...
272
  					dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$|^Gene symbol$",colnames(.)))
0eb342056   Efrain Gonzalez   This code combine...
273
  			}
805474e1e   Efrain Gonzalez   Most recent updat...
274
  		} else if(fileex == 0){
0eb342056   Efrain Gonzalez   This code combine...
275
276
277
278
279
280
281
282
283
284
285
  			#We must create a file that we can access for later use
  			idLOCGPL <- genena %>%
  				read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
  				t(.) %>%
  				grep("^ID\\s*$",.) %>%
  				-1
  			Firstval <- cbind(as.integer(gplnum),as.integer(idLOCGPL))
  			colnames(Firstval) <- c("GPL_FILE_NUM","LOC_ID")
  			write.table(Firstval,file = "GPL_ID_LOC.txt", sep = "\t",row.names = FALSE, col.names = TRUE)
  			geneIDNam <- genena %>%
  				read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>%
cb1063ceb   Efrain Gonzalez   Added "Gene symbo...
286
  				dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$|^Gene symbol$",colnames(.)))
0eb342056   Efrain Gonzalez   This code combine...
287
  		}
805474e1e   Efrain Gonzalez   Most recent updat...
288
  	 } else if(soft == FALSE){
0eb342056   Efrain Gonzalez   This code combine...
289
290
  		geneIDNam <- genena %>%
  		read_delim(delim="\t",comment = "#")%>%
cb1063ceb   Efrain Gonzalez   Added "Gene symbo...
291
  		dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$|^Gene symbol$",colnames(.)))
0eb342056   Efrain Gonzalez   This code combine...
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
  	}
  
  	##Labeling the gene IDs without names
  	geneIDNam <- NAFIXING(geneIDNam)	
  
  	##remove the whitespace
  	geneIDNam <- t(rbind(str_trim(t(geneIDNam)[1,]),str_trim(t(geneIDNam)[2,])))
  	
  	##Here is the clean version
  	write.table(geneIDNam,file = paste0("Clean_GPL",gplnum,".txt"),sep = "\t",row.names = FALSE, col.names = FALSE)
  }
  
  
  
  ##Changing the gene ID to gene name
805474e1e   Efrain Gonzalez   Most recent updat...
307
  ALZDAT1 <- cgeneID(geneIDNam,alzdat)
0eb342056   Efrain Gonzalez   This code combine...
308
309
310
311
312
313
314
315
316
  colnames(ALZDAT) = ALZDAT1[1,]
  
  
  ##Adjusting the column names aka the gene names
  colnames(ALZDAT) <- gcnames(ALZDAT)
  
  
  #Full RAW Data
  Fullalzdwr <- ALZDAT %>%
db021299e   Efrain Gonzalez   Updated "stringsA...
317
  	as.data.frame(.,stringsAsFactors = FALSE) %>%
0eb342056   Efrain Gonzalez   This code combine...
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
  	cbind(ALZWORDF,.)
  
  
  #Raw file is output
  nfnaex <- strsplit(alz,"[\\]") %>%
  	.[[1]] %>%
  	.[length(.)] %>%
  	gsub("\\D","",.) %>%
  	c("GSE",.,"aftexcel.txt") %>%
  	paste(collapse = "")
  write.table(t(Fullalzdwr), file = nfnaex, sep = "\t")
  
  
  #Now for the discretization part
  ##get the wordy part again
  rawword <- t(ALZWORDF)
  
  ##where is ID_REF located
689231363   Efrain Gonzalez   Update error with...
336
  hereim <- grep("ID_REF",rownames(rawword))
0eb342056   Efrain Gonzalez   This code combine...
337
338
339
340
341
342
  
  ##Subject Names GSM...
  subjnam <- rawword[hereim,]
  
  ##Getting the names for the rows
  namedarows <- rownames(rawword)[-hereim] %>%
b2053f56b   Efrain Gonzalez   Updating handling...
343
  	as.data.frame(.,stringsAsFactors = FALSE)
0eb342056   Efrain Gonzalez   This code combine...
344
  RAWWORD <- rawword[-hereim,] %>%
b2053f56b   Efrain Gonzalez   Updating handling...
345
  	as.data.frame(.,stringsAsFactors = FALSE) %>%
0eb342056   Efrain Gonzalez   This code combine...
346
347
348
349
  	bind_cols(namedarows,.)
  z <- 1
  naroww <- as.data.frame(rep(0,dim(RAWWORD)[1]),stringsAsFactors = FALSE)
  for(z in 1:dim(RAWWORD)[1]){
805474e1e   Efrain Gonzalez   Most recent updat...
350
351
352
353
354
355
356
      if(sum(is.na(RAWWORD[z,])) > 0){
          naroww[z,1] <- as.integer(sum(is.na(RAWWORD[z,])))
      }
  	if(length(grep("NA",RAWWORD[z,])) > 0){
          naroww[z,1] <- as.integer(length(grep("NA",RAWWORD[z,]))) + naroww[z,1]
      }
      z <- z + 1
0eb342056   Efrain Gonzalez   This code combine...
357
358
359
360
361
362
363
364
  }
  
  colnames(naroww) <- "ROW_NAs"
  RAWWORD <- bind_cols(RAWWORD,naroww)
  
  
  roALZna <- t(ALZDAT) %>%
  	rownames(.) %>%
b2053f56b   Efrain Gonzalez   Updating handling...
365
  	as.data.frame(.,stringsAsFactors = FALSE)
0eb342056   Efrain Gonzalez   This code combine...
366
367
368
  colnames(roALZna) <- "ID_REF"
  
  RAWDAT <- t(ALZDAT) %>%
b2053f56b   Efrain Gonzalez   Updating handling...
369
  	as.data.frame(.,stringsAsFactors = FALSE)
0eb342056   Efrain Gonzalez   This code combine...
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
  colnames(RAWDAT) <- NULL
  rownames(RAWDAT) <- NULL
  
  RAWDAT2 <- RAWDAT %>%
  	cbind(roALZna,.) %>%
  	dplyr::arrange(.,ID_REF)
  
  ##Editing the file for R processing
  RAWDATID <- RAWDAT2[,1] %>%
  	as.matrix(.)
  
  RAWDATNUM <- RAWDAT2[,-1] %>%
  	mapply(.,FUN = as.numeric) %>%
  	t(.)
  
  ##Consolidating genes with the same name
  ###create empty matrix of size equal  to tabRDATID
  tabRDATID <- table(RAWDATID)
  NuRDATN <- matrix(0, nrow = dim(RAWDATNUM)[1], ncol = length(tabRDATID))
  j <- 1
  for(j in 1:length(tabRDATID)){
  
  	##Putting the ones without duplicates in their new homes
  	if(tabRDATID[j] == 1){
  		NuRDATN[,j] <- RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])]
805474e1e   Efrain Gonzalez   Most recent updat...
395
  	} else if(tabRDATID[j] > 1){
0eb342056   Efrain Gonzalez   This code combine...
396
  	##Averaging duplicates and putting them in their new homes
0eb342056   Efrain Gonzalez   This code combine...
397
398
399
400
401
402
403
404
405
406
407
  		NuRDATN[,j] <- rowMeans(RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])],na.rm = TRUE)
  	}
  	j <- j + 1
  }
  
  ##Scaling the Data
  scrawdat <- NuRDATN%>%
  	scale()
  attr(scrawdat,"scaled:center") <- NULL
  attr(scrawdat,"scaled:scale") <- NULL
  colnames(scrawdat) <- rownames(tabRDATID)
556b97bfa   Efrain Gonzalez   Update Now output...
408
409
410
411
412
413
414
415
416
417
418
419
  #Outputting the Z-score file
  nfnzsc <- strsplit(alz,"[\\]") %>%
      .[[1]] %>%
      .[length(.)] %>%
      gsub("\\D","",.) %>%
      c("GSE",.,"zscore.txt") %>%
      paste(collapse = "")
  zscraw <- scrawdat %>%
      t()%>%
      as.data.frame(.,stringsAsFactors = FALSE)
  colnames(zscraw) <- subjnam
  write.table(zscraw, file = nfnzsc, sep = "\t",col.names = TRUE,row.names = TRUE)
0eb342056   Efrain Gonzalez   This code combine...
420
421
422
423
  ##Discretized the Data
  dialzdat <- scrawdat %>%
  	dndat(.) %>%
  	t()%>%
b2053f56b   Efrain Gonzalez   Updating handling...
424
  	as.data.frame(.,stringsAsFactors = FALSE)
0eb342056   Efrain Gonzalez   This code combine...
425
426
427
  colnames(dialzdat) <- rownames(RAWDATNUM)
  
  ##setting "ID_REF" as a new variable
b2053f56b   Efrain Gonzalez   Updating handling...
428
  geneNAM <- as.data.frame(as.matrix(rownames(dialzdat),ncol=1),stringsAsFactors = FALSE)
0eb342056   Efrain Gonzalez   This code combine...
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
  colnames(geneNAM) <- "ID_REF"
  rownames(dialzdat) <- NULL
  dialzdat <-bind_cols(geneNAM,dialzdat)
  
  ##NAs in a column
  x <- 2
  nacol <- as.data.frame(t(rep(0,dim(dialzdat)[2])),stringsAsFactors = FALSE)
  nacol[1,1] = "COL_NAs"
  for(x in 2:dim(dialzdat)[2]){
  	nacol[1,x] <- as.integer(sum(is.na(dialzdat[,x])))
  	x <- x + 1
  }
  colnames(nacol) <- colnames(dialzdat)
  dialzdat<-bind_rows(dialzdat,nacol)
  
  ##NAs in a row
  y <- 1
  narowd <- as.data.frame(rep(0,dim(dialzdat)[1]),stringsAsFactors = FALSE)
  for(y in 1:dim(dialzdat)[1]){
  	narowd[y,1] <- as.integer(sum(is.na(dialzdat[y,])))
  	y <- y + 1
  }
  colnames(narowd) <- "ROW_NAs"
  dialzdat <- bind_cols(dialzdat,narowd)
  colnames(dialzdat)[2:(dim(dialzdat)[2]-1)] <- subjnam
  colnames(RAWWORD) <- colnames(dialzdat)
  ##converting to character so that the clinical can be brought together with discrete data
  k <- 2
  for(k in 2:dim(dialzdat)[2]-1){
  	dialzdat[,k] <- as.character(dialzdat[,k])
  	k <- k + 1
  }
  #The End the full data
  Dscrtalzdw <- bind_rows(RAWWORD,dialzdat)
  
  #Produces Discrete file
689231363   Efrain Gonzalez   Update error with...
465
  nfnaex2 <- strsplit(alz,"[\\|/]") %>%
0eb342056   Efrain Gonzalez   This code combine...
466
467
468
469
470
  	.[[1]] %>%
  	.[length(.)] %>%
  	gsub("\\D","",.) %>%
  	c("GSE",.,"dscrt.txt") %>%
  	paste(collapse = "")
689231363   Efrain Gonzalez   Update error with...
471
  write.table(Dscrtalzdw, file = nfnaex2, sep = "\t",col.names = TRUE,row.names = FALSE)
0eb342056   Efrain Gonzalez   This code combine...
472