Commit e340baf086d0c414acf14dfc085805879c94e966
Exists in
master
Merge branch 'master' of smlg.fiu.edu:efraingonzalez0/cleaning-and-fixing-data-with-r
Showing
1 changed file
Show diff stats
RAutoClDs.R
... | ... | @@ -28,29 +28,29 @@ chngrownm <- function(mat){ |
28 | 28 | if("!Sample_source_name_ch1"==mat[1,e]){ |
29 | 29 | colnames(mat)[e] <- "Brain_Region" |
30 | 30 | } |
31 | - if("!Sample_title" == mat[1,e]){ | |
31 | + else if("!Sample_title" == mat[1,e]){ | |
32 | 32 | colnames(mat)[e] <- "Title" |
33 | 33 | } |
34 | - if("!Sample_geo_accession" == mat[1,e]){ | |
34 | + else if("!Sample_geo_accession" == mat[1,e]){ | |
35 | 35 | colnames(mat)[e] <- "ID_REF" |
36 | 36 | } else{ |
37 | 37 | if(grepl("Sex|gender|Gender|sex",mat[2,e])==TRUE){ |
38 | 38 | colnames(mat)[e] <- paste0("Sex",r) |
39 | 39 | r = r + 1 |
40 | 40 | } |
41 | - if(grepl("postmorteminterval|PMI|pmi",mat[2,e])==TRUE){ | |
41 | + else if(grepl("postmorteminterval|PMI|pmi",mat[2,e])==TRUE){ | |
42 | 42 | colnames(mat)[e] <- paste0("PMI",a) |
43 | 43 | a = a + 1 |
44 | 44 | } |
45 | - if(grepl("age|Age|AGE",mat[2,e])==TRUE){ | |
45 | + else if(grepl("age|Age|AGE",mat[2,e])==TRUE){ | |
46 | 46 | colnames(mat)[e] <- paste0("Age",h) |
47 | 47 | h = h + 1 |
48 | 48 | } |
49 | - if(grepl("braak|b&b",mat[2,e])==TRUE){ | |
49 | + else if(grepl("braak|b&b",mat[2,e])==TRUE){ | |
50 | 50 | colnames(mat)[e] <- paste0("Braak",g) |
51 | 51 | g = g + 1 |
52 | 52 | } |
53 | - if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control|Normal",mat[2,e])==TRUE){ | |
53 | + else if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control|Normal",mat[2,e])==TRUE){ | |
54 | 54 | colnames(mat)[e] <- paste0("Group",o) |
55 | 55 | o = o + 1 |
56 | 56 | } |
... | ... | @@ -69,18 +69,18 @@ cinfo <- function(mat){ |
69 | 69 | if(grepl("Group",colnames(mat)[j]) == TRUE){ |
70 | 70 | mat[,j] <- gsub(".+:\\s|\\s.+;.+","",mat[,j]) |
71 | 71 | } |
72 | - if(grepl("Age",colnames(mat)[j])==TRUE){ | |
72 | + else if(grepl("Age",colnames(mat)[j])==TRUE){ | |
73 | 73 | mat[,j] <- gsub("\\D","",mat[,j])%>% |
74 | 74 | as.integer() |
75 | 75 | } |
76 | - if(grepl("Sex",colnames(mat)[j])==TRUE){ | |
76 | + else if(grepl("Sex",colnames(mat)[j])==TRUE){ | |
77 | 77 | mat[,j] <- gsub(".+:\\s","",mat[,j]) |
78 | 78 | } |
79 | - if(grepl("PMI",colnames(mat)[j])==TRUE){ | |
79 | + else if(grepl("PMI",colnames(mat)[j])==TRUE){ | |
80 | 80 | mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>% |
81 | 81 | as.numeric() |
82 | 82 | } |
83 | - if(grepl("Braak",colnames(mat)[j])==TRUE){ | |
83 | + else if(grepl("Braak",colnames(mat)[j])==TRUE){ | |
84 | 84 | mat[,j]<-gsub(".+:\\s","",mat[,j])%>% |
85 | 85 | as.roman()%>% |
86 | 86 | as.integer() |
... | ... | @@ -235,7 +235,7 @@ THEFT <- function(){ |
235 | 235 | read_delim(delim="\t",col_names = c("ID","Symbol"), comment = "!") |
236 | 236 | |
237 | 237 | } |
238 | - if(clfileex == 0){ | |
238 | + else if(clfileex == 0){ | |
239 | 239 | ##Lets Create a clean version |
240 | 240 | |
241 | 241 | ##Gene ID to Gene Name |
... | ... | @@ -259,7 +259,7 @@ THEFT <- function(){ |
259 | 259 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>% |
260 | 260 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
261 | 261 | } |
262 | - if(IDF == 0){ | |
262 | + else if(IDF == 0){ | |
263 | 263 | #No information on this particular GPL file |
264 | 264 | idLOCGPL <- genena %>% |
265 | 265 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
... | ... | @@ -273,7 +273,7 @@ THEFT <- function(){ |
273 | 273 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
274 | 274 | } |
275 | 275 | } |
276 | - if(fileex == 0){ | |
276 | + else if(fileex == 0){ | |
277 | 277 | #We must create a file that we can access for later use |
278 | 278 | idLOCGPL <- genena %>% |
279 | 279 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
... | ... | @@ -288,7 +288,7 @@ THEFT <- function(){ |
288 | 288 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
289 | 289 | } |
290 | 290 | } |
291 | - if(soft == FALSE){ | |
291 | + else if(soft == FALSE){ | |
292 | 292 | geneIDNam <- genena %>% |
293 | 293 | read_delim(delim="\t",comment = "#")%>% |
294 | 294 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
... | ... | @@ -391,7 +391,7 @@ THEFT <- function(){ |
391 | 391 | NuRDATN[,j] <- RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])] |
392 | 392 | } |
393 | 393 | ##Averaging duplicates and putting them in their new homes |
394 | - if(tabRDATID[j] > 1){ | |
394 | + else if(tabRDATID[j] > 1){ | |
395 | 395 | NuRDATN[,j] <- rowMeans(RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])],na.rm = TRUE) |
396 | 396 | } |
397 | 397 | j <- j + 1 |
... | ... | @@ -461,7 +461,7 @@ THEFT <- function(){ |
461 | 461 | } |
462 | 462 | |
463 | 463 | #CHOOSE A DATA FILE TO CLEAN OR SEVERAL DATA FILES TO CLEAN |
464 | - if(numDAT == 2){ | |
464 | + else if(numDAT == 2){ | |
465 | 465 | #All the files you want to analyze |
466 | 466 | ANDIS <- select.list(choices = list.files()[GSEfileloc],multiple = TRUE, title = "Choose the file/files you want to analyze:") |
467 | 467 | if(length(ANDIS) == 0){ |
... | ... | @@ -524,7 +524,7 @@ THEFT <- function(){ |
524 | 524 | read_delim(delim="\t",col_names = c("ID","Symbol"), comment = "!") |
525 | 525 | |
526 | 526 | } |
527 | - if(clfileex == 0){ | |
527 | + else if(clfileex == 0){ | |
528 | 528 | ##Lets Create a clean version |
529 | 529 | |
530 | 530 | ##Gene ID to Gene Name |
... | ... | @@ -548,7 +548,7 @@ THEFT <- function(){ |
548 | 548 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>% |
549 | 549 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
550 | 550 | } |
551 | - if(IDF == 0){ | |
551 | + else if(IDF == 0){ | |
552 | 552 | #No information on this particular GPL file |
553 | 553 | idLOCGPL <- genena %>% |
554 | 554 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
... | ... | @@ -562,7 +562,7 @@ THEFT <- function(){ |
562 | 562 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
563 | 563 | } |
564 | 564 | } |
565 | - if(fileex == 0){ | |
565 | + else if(fileex == 0){ | |
566 | 566 | #We must create a file that we can access for later use |
567 | 567 | idLOCGPL <- genena %>% |
568 | 568 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
... | ... | @@ -577,7 +577,7 @@ THEFT <- function(){ |
577 | 577 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
578 | 578 | } |
579 | 579 | } |
580 | - if(soft == FALSE){ | |
580 | + else if(soft == FALSE){ | |
581 | 581 | geneIDNam <- genena %>% |
582 | 582 | read_delim(delim="\t",comment = "#")%>% |
583 | 583 | dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.))) |
... | ... | @@ -680,7 +680,7 @@ THEFT <- function(){ |
680 | 680 | NuRDATN[,j] <- RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])] |
681 | 681 | } |
682 | 682 | ##Averaging duplicates and putting them in their new homes |
683 | - if(tabRDATID[j] > 1){ | |
683 | + else if(tabRDATID[j] > 1){ | |
684 | 684 | NuRDATN[,j] <- rowMeans(RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])],na.rm = TRUE) |
685 | 685 | } |
686 | 686 | j <- j + 1 |