Commit e340baf086d0c414acf14dfc085805879c94e966

Authored by Efrain Gonzalez
Exists in master

Merge branch 'master' of smlg.fiu.edu:efraingonzalez0/cleaning-and-fixing-data-with-r

... ... @@ -28,29 +28,29 @@ chngrownm <- function(mat){
28 28 if("!Sample_source_name_ch1"==mat[1,e]){
29 29 colnames(mat)[e] <- "Brain_Region"
30 30 }
31   - if("!Sample_title" == mat[1,e]){
  31 + else if("!Sample_title" == mat[1,e]){
32 32 colnames(mat)[e] <- "Title"
33 33 }
34   - if("!Sample_geo_accession" == mat[1,e]){
  34 + else if("!Sample_geo_accession" == mat[1,e]){
35 35 colnames(mat)[e] <- "ID_REF"
36 36 } else{
37 37 if(grepl("Sex|gender|Gender|sex",mat[2,e])==TRUE){
38 38 colnames(mat)[e] <- paste0("Sex",r)
39 39 r = r + 1
40 40 }
41   - if(grepl("postmorteminterval|PMI|pmi",mat[2,e])==TRUE){
  41 + else if(grepl("postmorteminterval|PMI|pmi",mat[2,e])==TRUE){
42 42 colnames(mat)[e] <- paste0("PMI",a)
43 43 a = a + 1
44 44 }
45   - if(grepl("age|Age|AGE",mat[2,e])==TRUE){
  45 + else if(grepl("age|Age|AGE",mat[2,e])==TRUE){
46 46 colnames(mat)[e] <- paste0("Age",h)
47 47 h = h + 1
48 48 }
49   - if(grepl("braak|b&b",mat[2,e])==TRUE){
  49 + else if(grepl("braak|b&b",mat[2,e])==TRUE){
50 50 colnames(mat)[e] <- paste0("Braak",g)
51 51 g = g + 1
52 52 }
53   - if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control|Normal",mat[2,e])==TRUE){
  53 + else if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control|Normal",mat[2,e])==TRUE){
54 54 colnames(mat)[e] <- paste0("Group",o)
55 55 o = o + 1
56 56 }
... ... @@ -69,18 +69,18 @@ cinfo &lt;- function(mat){
69 69 if(grepl("Group",colnames(mat)[j]) == TRUE){
70 70 mat[,j] <- gsub(".+:\\s|\\s.+;.+","",mat[,j])
71 71 }
72   - if(grepl("Age",colnames(mat)[j])==TRUE){
  72 + else if(grepl("Age",colnames(mat)[j])==TRUE){
73 73 mat[,j] <- gsub("\\D","",mat[,j])%>%
74 74 as.integer()
75 75 }
76   - if(grepl("Sex",colnames(mat)[j])==TRUE){
  76 + else if(grepl("Sex",colnames(mat)[j])==TRUE){
77 77 mat[,j] <- gsub(".+:\\s","",mat[,j])
78 78 }
79   - if(grepl("PMI",colnames(mat)[j])==TRUE){
  79 + else if(grepl("PMI",colnames(mat)[j])==TRUE){
80 80 mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>%
81 81 as.numeric()
82 82 }
83   - if(grepl("Braak",colnames(mat)[j])==TRUE){
  83 + else if(grepl("Braak",colnames(mat)[j])==TRUE){
84 84 mat[,j]<-gsub(".+:\\s","",mat[,j])%>%
85 85 as.roman()%>%
86 86 as.integer()
... ... @@ -235,7 +235,7 @@ THEFT &lt;- function(){
235 235 read_delim(delim="\t",col_names = c("ID","Symbol"), comment = "!")
236 236  
237 237 }
238   - if(clfileex == 0){
  238 + else if(clfileex == 0){
239 239 ##Lets Create a clean version
240 240  
241 241 ##Gene ID to Gene Name
... ... @@ -259,7 +259,7 @@ THEFT &lt;- function(){
259 259 read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>%
260 260 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
261 261 }
262   - if(IDF == 0){
  262 + else if(IDF == 0){
263 263 #No information on this particular GPL file
264 264 idLOCGPL <- genena %>%
265 265 read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
... ... @@ -273,7 +273,7 @@ THEFT &lt;- function(){
273 273 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
274 274 }
275 275 }
276   - if(fileex == 0){
  276 + else if(fileex == 0){
277 277 #We must create a file that we can access for later use
278 278 idLOCGPL <- genena %>%
279 279 read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
... ... @@ -288,7 +288,7 @@ THEFT &lt;- function(){
288 288 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
289 289 }
290 290 }
291   - if(soft == FALSE){
  291 + else if(soft == FALSE){
292 292 geneIDNam <- genena %>%
293 293 read_delim(delim="\t",comment = "#")%>%
294 294 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
... ... @@ -391,7 +391,7 @@ THEFT &lt;- function(){
391 391 NuRDATN[,j] <- RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])]
392 392 }
393 393 ##Averaging duplicates and putting them in their new homes
394   - if(tabRDATID[j] > 1){
  394 + else if(tabRDATID[j] > 1){
395 395 NuRDATN[,j] <- rowMeans(RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])],na.rm = TRUE)
396 396 }
397 397 j <- j + 1
... ... @@ -461,7 +461,7 @@ THEFT &lt;- function(){
461 461 }
462 462  
463 463 #CHOOSE A DATA FILE TO CLEAN OR SEVERAL DATA FILES TO CLEAN
464   - if(numDAT == 2){
  464 + else if(numDAT == 2){
465 465 #All the files you want to analyze
466 466 ANDIS <- select.list(choices = list.files()[GSEfileloc],multiple = TRUE, title = "Choose the file/files you want to analyze:")
467 467 if(length(ANDIS) == 0){
... ... @@ -524,7 +524,7 @@ THEFT &lt;- function(){
524 524 read_delim(delim="\t",col_names = c("ID","Symbol"), comment = "!")
525 525  
526 526 }
527   - if(clfileex == 0){
  527 + else if(clfileex == 0){
528 528 ##Lets Create a clean version
529 529  
530 530 ##Gene ID to Gene Name
... ... @@ -548,7 +548,7 @@ THEFT &lt;- function(){
548 548 read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>%
549 549 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
550 550 }
551   - if(IDF == 0){
  551 + else if(IDF == 0){
552 552 #No information on this particular GPL file
553 553 idLOCGPL <- genena %>%
554 554 read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
... ... @@ -562,7 +562,7 @@ THEFT &lt;- function(){
562 562 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
563 563 }
564 564 }
565   - if(fileex == 0){
  565 + else if(fileex == 0){
566 566 #We must create a file that we can access for later use
567 567 idLOCGPL <- genena %>%
568 568 read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
... ... @@ -577,7 +577,7 @@ THEFT &lt;- function(){
577 577 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
578 578 }
579 579 }
580   - if(soft == FALSE){
  580 + else if(soft == FALSE){
581 581 geneIDNam <- genena %>%
582 582 read_delim(delim="\t",comment = "#")%>%
583 583 dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
... ... @@ -680,7 +680,7 @@ THEFT &lt;- function(){
680 680 NuRDATN[,j] <- RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])]
681 681 }
682 682 ##Averaging duplicates and putting them in their new homes
683   - if(tabRDATID[j] > 1){
  683 + else if(tabRDATID[j] > 1){
684 684 NuRDATN[,j] <- rowMeans(RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])],na.rm = TRUE)
685 685 }
686 686 j <- j + 1