wrong extension

Efrain Gonzalez
1 parent a66a63dc50
Showing 1 changed file with 0 additions and 198 deletions Show diff stats
Rclean.txt
@@ -1,198 +0,0 @@
-#Libraries required to run the code
-library(MASS)
-library(pryr)
-library(dplyr)
-library(tidyr)
-library(readr)
-library(stringr)
-
-
-#Necessary Functions
-#1#Function for handling the changing of row names and column names
-chngrownm <- function(mat){
-	row <- dim(mat)[1]
-	col <- dim(mat)[2]
-	j <- 1
-	x <- 1
-	p <- 1	
-	a <- 1
-	b <- 1
-	g <- 1
-	for(j in 1:col){
-		if("!Sample_source_name_ch1"==mat[1,j]){
-			colnames(mat)[j] <- "Brain_Region"	
-		} 
-		if("!Sample_title" == mat[1,j]){
-			colnames(mat)[j] <- "Title"
-		} 
-		if("!Sample_geo_accession" == mat[1,j]){
-			colnames(mat)[j] <- "ID_REF"
-		} else{
-			if(grepl("Sex|gender|Gender|sex",mat[2,j])==TRUE){
-				colnames(mat)[j] <- paste0("Sex",x)
-				x = x + 1
-			}
-			if(grepl("postmorteminterval|PMI|pmi",mat[2,j])==TRUE){
-				colnames(mat)[j] <- paste0("PMI",p)
-				p = p + 1
-			}
-			if(grepl("age|Age|AGE",mat[2,j])==TRUE){
-				colnames(mat)[j] <- paste0("Age",a)
-				a = a + 1
-			 }
-			if(grepl("braak|b&b",mat[2,j])==TRUE){
-				colnames(mat)[j] <- paste0("Braak",b)
-				b = b + 1
-			}
-			if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control",mat[2,j])==TRUE){
-				colnames(mat)[j] <- paste0("Group",g)
-				g = g + 1
-			}
-			
-		}
-		j = j + 1
-	}
-	mat
-}			
-
-#2#Function for reorganizing information within the columns
-cinfo <- function(mat){
-	col <- dim(mat)[2]
-	j <-2
-	for(j in 2:col){
-		if(grepl("Group",colnames(mat)[j]) == TRUE){
-			mat[,j] <- gsub(".+:\\s|\\s.+;.+","",mat[,j])
-		}
-		if(grepl("Age",colnames(mat)[j])==TRUE){
-			mat[,j] <- gsub("\\D","",mat[,j])%>%
-				as.integer()
-		}
-		if(grepl("Sex",colnames(mat)[j])==TRUE){
-			mat[,j] <- gsub(".+:\\s","",mat[,j])
-		}
-		if(grepl("PMI",colnames(mat)[j])==TRUE){
-			mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>%
-				as.numeric() 
-		}
-		if(grepl("Braak",colnames(mat)[j])==TRUE){
-			mat[,j]<-gsub(".+:\\s","",mat[,j])%>%
-				as.roman()%>%
-				as.integer()
-		}
-	j=j+1
-	}
-	mat
-}
-
-#3#Function for changing the gene ID to gene name
-cgeneID <- function(GeneName,DATA){
-	colGene <- dim(GeneName)[2]
-	j <- 1
-	for(j in 1:colGene){
-		chngsreq <- grep(GeneName[1,j],DATA[1,])
-		DATA[1,chngsreq] <- gsub(GeneName[1,j],GeneName[2,j],DATA[1,chngsreq])
-		j = j+1
-	}
-	DATA
-}
-
-#4#Function for adjusting the gene names
-gcnames <- function(DiData,usecol=1){
-	nuruns <- dim(DiData)[2]
-	i = 1
-	nwnam <- rep("0",length.out=nuruns)
-	for(i in 1:nuruns){
-		if(length(strsplit(colnames(DiData)[i],"///")[[1]]) >= usecol){
-			nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][usecol]
-		} else{
-			nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][1]
-		}
-		
-	}
-	nwnam
-
-}
-
-
-
-#The Rest of this code will be used every time you want to change a data set
-
-#Getting the series matrix file
-print("Choose the series matrix file that you want to Analyze")
-alz <- file.choose()
-
-#Getting the GPL file
-print("Choose the GPL file that correlates with the above series matrix file")
-genena <- file.choose()
-
-
-#Set working directory based on the directory of the series matrix file
-##strsplit(alz,"[\\]") %>%
-##	.[[1]] %>%
-##	.[-length(.)] %>%
-##	paste(.,collapse="/") %>%
-##	setwd()
-	
-
-#Working with the wordy part of the document
-alzword <- alz %>%
-	read_delim(delim ="\t",comment = "!Series",col_names = FALSE)%>%
-	filter(grepl("!Sample",X1))%>%
-	filter(!grepl("!Sample_contact",X1))
-
-##Changing row names and column names:
-ALZWORD <- t(alzword)
-rownames(ALZWORD)=NULL
-colnames(ALZWORD) <- colnames(ALZWORD,do.NULL=FALSE)
-ALZWORD <- chngrownm(ALZWORD)[-1,]
-ALZWORD <- ALZWORD%>%
-	as.data.frame()%>%
-	dplyr::select(-starts_with("col"))
-
-##Reorganizing information within the columns
-ALZWORDF <- cinfo(ALZWORD)
-
-
-#Working with Actual Data part of file
-alzdat <- alz %>% 
-	read_delim(delim="\t",col_names=TRUE,comment = "!",skip=1)
-ALZDAT <- t(alzdat[,-1])
-rownames(ALZDAT)=NULL
-
-
-##Gene ID to Gene Name
-geneIDNam <- genena %>%
-	read_delim(delim="\t",comment = "#")%>%
-	dplyr::select(.,ID,grep("Symbol|ORF",colnames(.)))
-
-##Changing the ID to a Name
-ALZDAT1 <- cgeneID(t(geneIDNam),t(alzdat))
-colnames(ALZDAT) = ALZDAT1[1,]
-
-
-##Adjusting the column names aka the gene names
-colnames(ALZDAT) <- gcnames(ALZDAT)
-
-
-#Full Data
-Fullalzdw <- ALZDAT %>%
-	as.data.frame() %>%
-	cbind(ALZWORDF,.)
-
-##since the order in which the packages are added matters I moved this package to the top 
-##library(MASS)
-nfna <- strsplit(alz,"[\\]") %>%
-	.[[1]] %>%
-	.[length(.)] %>%
-	gsub("\\D","",.) %>%
-	c("GSE",.,"after.txt") %>%
-	paste(collapse = "")
-MASS::write.matrix(Fullalzdw,file = nfna,sep = "\t")
-#Perfect for excel viewing
-nfnaex <- strsplit(alz,"[\\]") %>%
-	.[[1]] %>%
-	.[length(.)] %>%
-	gsub("\\D","",.) %>%
-	c("GSE",.,"aftexcel.txt") %>%
-	paste(collapse = "")
-write.table(t(Fullalzdw), file = nfnaex, sep = "\t")
...	...	@@ -1,198 +0,0 @@
1		-#Libraries required to run the code
2		-library(MASS)
3		-library(pryr)
4		-library(dplyr)
5		-library(tidyr)
6		-library(readr)
7		-library(stringr)
8		-
9		-
10		-#Necessary Functions
11		-#1#Function for handling the changing of row names and column names
12		-chngrownm <- function(mat){
13		- row <- dim(mat)[1]
14		- col <- dim(mat)[2]
15		- j <- 1
16		- x <- 1
17		- p <- 1
18		- a <- 1
19		- b <- 1
20		- g <- 1
21		- for(j in 1:col){
22		- if("!Sample_source_name_ch1"==mat[1,j]){
23		- colnames(mat)[j] <- "Brain_Region"
24		- }
25		- if("!Sample_title" == mat[1,j]){
26		- colnames(mat)[j] <- "Title"
27		- }
28		- if("!Sample_geo_accession" == mat[1,j]){
29		- colnames(mat)[j] <- "ID_REF"
30		- } else{
31		- if(grepl("Sex\|gender\|Gender\|sex",mat[2,j])==TRUE){
32		- colnames(mat)[j] <- paste0("Sex",x)
33		- x = x + 1
34		- }
35		- if(grepl("postmorteminterval\|PMI\|pmi",mat[2,j])==TRUE){
36		- colnames(mat)[j] <- paste0("PMI",p)
37		- p = p + 1
38		- }
39		- if(grepl("age\|Age\|AGE",mat[2,j])==TRUE){
40		- colnames(mat)[j] <- paste0("Age",a)
41		- a = a + 1
42		- }
43		- if(grepl("braak\|b&b",mat[2,j])==TRUE){
44		- colnames(mat)[j] <- paste0("Braak",b)
45		- b = b + 1
46		- }
47		- if(grepl("group\|disease\|control\|AD\|normal\|diagnosis\|Alzheimer\|Control",mat[2,j])==TRUE){
48		- colnames(mat)[j] <- paste0("Group",g)
49		- g = g + 1
50		- }
51		-
52		- }
53		- j = j + 1
54		- }
55		- mat
56		-}
57		-
58		-#2#Function for reorganizing information within the columns
59		-cinfo <- function(mat){
60		- col <- dim(mat)[2]
61		- j <-2
62		- for(j in 2:col){
63		- if(grepl("Group",colnames(mat)[j]) == TRUE){
64		- mat[,j] <- gsub(".+:\\s\|\\s.+;.+","",mat[,j])
65		- }
66		- if(grepl("Age",colnames(mat)[j])==TRUE){
67		- mat[,j] <- gsub("\\D","",mat[,j])%>%
68		- as.integer()
69		- }
70		- if(grepl("Sex",colnames(mat)[j])==TRUE){
71		- mat[,j] <- gsub(".+:\\s","",mat[,j])
72		- }
73		- if(grepl("PMI",colnames(mat)[j])==TRUE){
74		- mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>%
75		- as.numeric()
76		- }
77		- if(grepl("Braak",colnames(mat)[j])==TRUE){
78		- mat[,j]<-gsub(".+:\\s","",mat[,j])%>%
79		- as.roman()%>%
80		- as.integer()
81		- }
82		- j=j+1
83		- }
84		- mat
85		-}
86		-
87		-#3#Function for changing the gene ID to gene name
88		-cgeneID <- function(GeneName,DATA){
89		- colGene <- dim(GeneName)[2]
90		- j <- 1
91		- for(j in 1:colGene){
92		- chngsreq <- grep(GeneName[1,j],DATA[1,])
93		- DATA[1,chngsreq] <- gsub(GeneName[1,j],GeneName[2,j],DATA[1,chngsreq])
94		- j = j+1
95		- }
96		- DATA
97		-}
98		-
99		-#4#Function for adjusting the gene names
100		-gcnames <- function(DiData,usecol=1){
101		- nuruns <- dim(DiData)[2]
102		- i = 1
103		- nwnam <- rep("0",length.out=nuruns)
104		- for(i in 1:nuruns){
105		- if(length(strsplit(colnames(DiData)[i],"///")[[1]]) >= usecol){
106		- nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][usecol]
107		- } else{
108		- nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][1]
109		- }
110		-
111		- }
112		- nwnam
113		-
114		-}
115		-
116		-
117		-
118		-#The Rest of this code will be used every time you want to change a data set
119		-
120		-#Getting the series matrix file
121		-print("Choose the series matrix file that you want to Analyze")
122		-alz <- file.choose()
123		-
124		-#Getting the GPL file
125		-print("Choose the GPL file that correlates with the above series matrix file")
126		-genena <- file.choose()
127		-
128		-
129		-#Set working directory based on the directory of the series matrix file
130		-##strsplit(alz,"[\\]") %>%
131		-## .[[1]] %>%
132		-## .[-length(.)] %>%
133		-## paste(.,collapse="/") %>%
134		-## setwd()
135		-
136		-
137		-#Working with the wordy part of the document
138		-alzword <- alz %>%
139		- read_delim(delim ="\t",comment = "!Series",col_names = FALSE)%>%
140		- filter(grepl("!Sample",X1))%>%
141		- filter(!grepl("!Sample_contact",X1))
142		-
143		-##Changing row names and column names:
144		-ALZWORD <- t(alzword)
145		-rownames(ALZWORD)=NULL
146		-colnames(ALZWORD) <- colnames(ALZWORD,do.NULL=FALSE)
147		-ALZWORD <- chngrownm(ALZWORD)[-1,]
148		-ALZWORD <- ALZWORD%>%
149		- as.data.frame()%>%
150		- dplyr::select(-starts_with("col"))
151		-
152		-##Reorganizing information within the columns
153		-ALZWORDF <- cinfo(ALZWORD)
154		-
155		-
156		-#Working with Actual Data part of file
157		-alzdat <- alz %>%
158		- read_delim(delim="\t",col_names=TRUE,comment = "!",skip=1)
159		-ALZDAT <- t(alzdat[,-1])
160		-rownames(ALZDAT)=NULL
161		-
162		-
163		-##Gene ID to Gene Name
164		-geneIDNam <- genena %>%
165		- read_delim(delim="\t",comment = "#")%>%
166		- dplyr::select(.,ID,grep("Symbol\|ORF",colnames(.)))
167		-
168		-##Changing the ID to a Name
169		-ALZDAT1 <- cgeneID(t(geneIDNam),t(alzdat))
170		-colnames(ALZDAT) = ALZDAT1[1,]
171		-
172		-
173		-##Adjusting the column names aka the gene names
174		-colnames(ALZDAT) <- gcnames(ALZDAT)
175		-
176		-
177		-#Full Data
178		-Fullalzdw <- ALZDAT %>%
179		- as.data.frame() %>%
180		- cbind(ALZWORDF,.)
181		-
182		-##since the order in which the packages are added matters I moved this package to the top
183		-##library(MASS)
184		-nfna <- strsplit(alz,"[\\]") %>%
185		- .[[1]] %>%
186		- .[length(.)] %>%
187		- gsub("\\D","",.) %>%
188		- c("GSE",.,"after.txt") %>%
189		- paste(collapse = "")
190		-MASS::write.matrix(Fullalzdw,file = nfna,sep = "\t")
191		-#Perfect for excel viewing
192		-nfnaex <- strsplit(alz,"[\\]") %>%
193		- .[[1]] %>%
194		- .[length(.)] %>%
195		- gsub("\\D","",.) %>%
196		- c("GSE",.,"aftexcel.txt") %>%
197		- paste(collapse = "")
198		-write.table(t(Fullalzdw), file = nfnaex, sep = "\t")