Commit 16b4f55de1bee74c9f9060e6884d4d27c04cfe45

Authored by Efrain Gonzalez
1 parent a66a63dc50
Exists in master

wrong extension

Showing 1 changed file with 0 additions and 198 deletions   Show diff stats
... ... @@ -1,198 +0,0 @@
1   -#Libraries required to run the code
2   -library(MASS)
3   -library(pryr)
4   -library(dplyr)
5   -library(tidyr)
6   -library(readr)
7   -library(stringr)
8   -
9   -
10   -#Necessary Functions
11   -#1#Function for handling the changing of row names and column names
12   -chngrownm <- function(mat){
13   - row <- dim(mat)[1]
14   - col <- dim(mat)[2]
15   - j <- 1
16   - x <- 1
17   - p <- 1
18   - a <- 1
19   - b <- 1
20   - g <- 1
21   - for(j in 1:col){
22   - if("!Sample_source_name_ch1"==mat[1,j]){
23   - colnames(mat)[j] <- "Brain_Region"
24   - }
25   - if("!Sample_title" == mat[1,j]){
26   - colnames(mat)[j] <- "Title"
27   - }
28   - if("!Sample_geo_accession" == mat[1,j]){
29   - colnames(mat)[j] <- "ID_REF"
30   - } else{
31   - if(grepl("Sex|gender|Gender|sex",mat[2,j])==TRUE){
32   - colnames(mat)[j] <- paste0("Sex",x)
33   - x = x + 1
34   - }
35   - if(grepl("postmorteminterval|PMI|pmi",mat[2,j])==TRUE){
36   - colnames(mat)[j] <- paste0("PMI",p)
37   - p = p + 1
38   - }
39   - if(grepl("age|Age|AGE",mat[2,j])==TRUE){
40   - colnames(mat)[j] <- paste0("Age",a)
41   - a = a + 1
42   - }
43   - if(grepl("braak|b&b",mat[2,j])==TRUE){
44   - colnames(mat)[j] <- paste0("Braak",b)
45   - b = b + 1
46   - }
47   - if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control",mat[2,j])==TRUE){
48   - colnames(mat)[j] <- paste0("Group",g)
49   - g = g + 1
50   - }
51   -
52   - }
53   - j = j + 1
54   - }
55   - mat
56   -}
57   -
58   -#2#Function for reorganizing information within the columns
59   -cinfo <- function(mat){
60   - col <- dim(mat)[2]
61   - j <-2
62   - for(j in 2:col){
63   - if(grepl("Group",colnames(mat)[j]) == TRUE){
64   - mat[,j] <- gsub(".+:\\s|\\s.+;.+","",mat[,j])
65   - }
66   - if(grepl("Age",colnames(mat)[j])==TRUE){
67   - mat[,j] <- gsub("\\D","",mat[,j])%>%
68   - as.integer()
69   - }
70   - if(grepl("Sex",colnames(mat)[j])==TRUE){
71   - mat[,j] <- gsub(".+:\\s","",mat[,j])
72   - }
73   - if(grepl("PMI",colnames(mat)[j])==TRUE){
74   - mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>%
75   - as.numeric()
76   - }
77   - if(grepl("Braak",colnames(mat)[j])==TRUE){
78   - mat[,j]<-gsub(".+:\\s","",mat[,j])%>%
79   - as.roman()%>%
80   - as.integer()
81   - }
82   - j=j+1
83   - }
84   - mat
85   -}
86   -
87   -#3#Function for changing the gene ID to gene name
88   -cgeneID <- function(GeneName,DATA){
89   - colGene <- dim(GeneName)[2]
90   - j <- 1
91   - for(j in 1:colGene){
92   - chngsreq <- grep(GeneName[1,j],DATA[1,])
93   - DATA[1,chngsreq] <- gsub(GeneName[1,j],GeneName[2,j],DATA[1,chngsreq])
94   - j = j+1
95   - }
96   - DATA
97   -}
98   -
99   -#4#Function for adjusting the gene names
100   -gcnames <- function(DiData,usecol=1){
101   - nuruns <- dim(DiData)[2]
102   - i = 1
103   - nwnam <- rep("0",length.out=nuruns)
104   - for(i in 1:nuruns){
105   - if(length(strsplit(colnames(DiData)[i],"///")[[1]]) >= usecol){
106   - nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][usecol]
107   - } else{
108   - nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][1]
109   - }
110   -
111   - }
112   - nwnam
113   -
114   -}
115   -
116   -
117   -
118   -#The Rest of this code will be used every time you want to change a data set
119   -
120   -#Getting the series matrix file
121   -print("Choose the series matrix file that you want to Analyze")
122   -alz <- file.choose()
123   -
124   -#Getting the GPL file
125   -print("Choose the GPL file that correlates with the above series matrix file")
126   -genena <- file.choose()
127   -
128   -
129   -#Set working directory based on the directory of the series matrix file
130   -##strsplit(alz,"[\\]") %>%
131   -## .[[1]] %>%
132   -## .[-length(.)] %>%
133   -## paste(.,collapse="/") %>%
134   -## setwd()
135   -
136   -
137   -#Working with the wordy part of the document
138   -alzword <- alz %>%
139   - read_delim(delim ="\t",comment = "!Series",col_names = FALSE)%>%
140   - filter(grepl("!Sample",X1))%>%
141   - filter(!grepl("!Sample_contact",X1))
142   -
143   -##Changing row names and column names:
144   -ALZWORD <- t(alzword)
145   -rownames(ALZWORD)=NULL
146   -colnames(ALZWORD) <- colnames(ALZWORD,do.NULL=FALSE)
147   -ALZWORD <- chngrownm(ALZWORD)[-1,]
148   -ALZWORD <- ALZWORD%>%
149   - as.data.frame()%>%
150   - dplyr::select(-starts_with("col"))
151   -
152   -##Reorganizing information within the columns
153   -ALZWORDF <- cinfo(ALZWORD)
154   -
155   -
156   -#Working with Actual Data part of file
157   -alzdat <- alz %>%
158   - read_delim(delim="\t",col_names=TRUE,comment = "!",skip=1)
159   -ALZDAT <- t(alzdat[,-1])
160   -rownames(ALZDAT)=NULL
161   -
162   -
163   -##Gene ID to Gene Name
164   -geneIDNam <- genena %>%
165   - read_delim(delim="\t",comment = "#")%>%
166   - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.)))
167   -
168   -##Changing the ID to a Name
169   -ALZDAT1 <- cgeneID(t(geneIDNam),t(alzdat))
170   -colnames(ALZDAT) = ALZDAT1[1,]
171   -
172   -
173   -##Adjusting the column names aka the gene names
174   -colnames(ALZDAT) <- gcnames(ALZDAT)
175   -
176   -
177   -#Full Data
178   -Fullalzdw <- ALZDAT %>%
179   - as.data.frame() %>%
180   - cbind(ALZWORDF,.)
181   -
182   -##since the order in which the packages are added matters I moved this package to the top
183   -##library(MASS)
184   -nfna <- strsplit(alz,"[\\]") %>%
185   - .[[1]] %>%
186   - .[length(.)] %>%
187   - gsub("\\D","",.) %>%
188   - c("GSE",.,"after.txt") %>%
189   - paste(collapse = "")
190   -MASS::write.matrix(Fullalzdw,file = nfna,sep = "\t")
191   -#Perfect for excel viewing
192   -nfnaex <- strsplit(alz,"[\\]") %>%
193   - .[[1]] %>%
194   - .[length(.)] %>%
195   - gsub("\\D","",.) %>%
196   - c("GSE",.,"aftexcel.txt") %>%
197   - paste(collapse = "")
198   -write.table(t(Fullalzdw), file = nfnaex, sep = "\t")