#Libraries required to run the code library(MASS) library(pryr) library(dplyr) library(tidyr) library(readr) library(stringr) #Necessary Functions #1#Function for handling the changing of row names and column names chngrownm <- function(mat){ row <- dim(mat)[1] col <- dim(mat)[2] j <- 1 x <- 1 p <- 1 a <- 1 b <- 1 g <- 1 for(j in 1:col){ if("!Sample_source_name_ch1"==mat[1,j]){ colnames(mat)[j] <- "Brain_Region" } if("!Sample_title" == mat[1,j]){ colnames(mat)[j] <- "Title" } if("!Sample_geo_accession" == mat[1,j]){ colnames(mat)[j] <- "ID_REF" } else{ if(grepl("Sex|gender|Gender|sex",mat[2,j])==TRUE){ colnames(mat)[j] <- paste0("Sex",x) x = x + 1 } if(grepl("postmorteminterval|PMI|pmi",mat[2,j])==TRUE){ colnames(mat)[j] <- paste0("PMI",p) p = p + 1 } if(grepl("age|Age|AGE",mat[2,j])==TRUE){ colnames(mat)[j] <- paste0("Age",a) a = a + 1 } if(grepl("braak|b&b",mat[2,j])==TRUE){ colnames(mat)[j] <- paste0("Braak",b) b = b + 1 } if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control",mat[2,j])==TRUE){ colnames(mat)[j] <- paste0("Group",g) g = g + 1 } } j = j + 1 } mat } #2#Function for reorganizing information within the columns cinfo <- function(mat){ col <- dim(mat)[2] j <-2 for(j in 2:col){ if(grepl("Group",colnames(mat)[j]) == TRUE){ mat[,j] <- gsub(".+:\\s|\\s.+;.+","",mat[,j]) } if(grepl("Age",colnames(mat)[j])==TRUE){ mat[,j] <- gsub("\\D","",mat[,j])%>% as.integer() } if(grepl("Sex",colnames(mat)[j])==TRUE){ mat[,j] <- gsub(".+:\\s","",mat[,j]) } if(grepl("PMI",colnames(mat)[j])==TRUE){ mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>% as.numeric() } if(grepl("Braak",colnames(mat)[j])==TRUE){ mat[,j]<-gsub(".+:\\s","",mat[,j])%>% as.roman()%>% as.integer() } j=j+1 } mat } #3#Function for changing the gene ID to gene name cgeneID <- function(GeneName,DATA){ colGene <- dim(GeneName)[2] j <- 1 for(j in 1:colGene){ chngsreq <- grep(GeneName[1,j],DATA[1,]) DATA[1,chngsreq] <- gsub(GeneName[1,j],GeneName[2,j],DATA[1,chngsreq]) j = j+1 } DATA } #4#Function for adjusting the gene names gcnames <- function(DiData,usecol=1){ nuruns <- dim(DiData)[2] i = 1 nwnam <- rep("0",length.out=nuruns) for(i in 1:nuruns){ if(length(strsplit(colnames(DiData)[i],"///")[[1]]) >= usecol){ nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][usecol] } else{ nwnam[i]=strsplit(colnames(DiData)[i],"///")[[1]][1] } } nwnam } #The Rest of this code will be used every time you want to change a data set #Getting the series matrix file print("Choose the series matrix file that you want to Analyze") alz <- file.choose() #Getting the GPL file print("Choose the GPL file that correlates with the above series matrix file") genena <- file.choose() #Set working directory based on the directory of the series matrix file ##strsplit(alz,"[\\]") %>% ## .[[1]] %>% ## .[-length(.)] %>% ## paste(.,collapse="/") %>% ## setwd() #Working with the wordy part of the document alzword <- alz %>% read_delim(delim ="\t",comment = "!Series",col_names = FALSE)%>% filter(grepl("!Sample",X1))%>% filter(!grepl("!Sample_contact",X1)) ##Changing row names and column names: ALZWORD <- t(alzword) rownames(ALZWORD)=NULL colnames(ALZWORD) <- colnames(ALZWORD,do.NULL=FALSE) ALZWORD <- chngrownm(ALZWORD)[-1,] ALZWORD <- ALZWORD%>% as.data.frame()%>% dplyr::select(-starts_with("col")) ##Reorganizing information within the columns ALZWORDF <- cinfo(ALZWORD) #Working with Actual Data part of file alzdat <- alz %>% read_delim(delim="\t",col_names=TRUE,comment = "!",skip=1) ALZDAT <- t(alzdat[,-1]) rownames(ALZDAT)=NULL ##Gene ID to Gene Name geneIDNam <- genena %>% read_delim(delim="\t",comment = "#")%>% dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) ##Changing the ID to a Name ALZDAT1 <- cgeneID(t(geneIDNam),t(alzdat)) colnames(ALZDAT) = ALZDAT1[1,] ##Adjusting the column names aka the gene names colnames(ALZDAT) <- gcnames(ALZDAT) #Full Data Fullalzdw <- ALZDAT %>% as.data.frame() %>% cbind(ALZWORDF,.) ##since the order in which the packages are added matters I moved this package to the top ##library(MASS) nfna <- strsplit(alz,"[\\]") %>% .[[1]] %>% .[length(.)] %>% gsub("\\D","",.) %>% c("GSE",.,"after.txt") %>% paste(collapse = "") MASS::write.matrix(Fullalzdw,file = nfna,sep = "\t") #Perfect for excel viewing nfnaex <- strsplit(alz,"[\\]") %>% .[[1]] %>% .[length(.)] %>% gsub("\\D","",.) %>% c("GSE",.,"aftexcel.txt") %>% paste(collapse = "") write.table(t(Fullalzdw), file = nfnaex, sep = "\t")