Commit b3a97716377820e2cd3097e6e96e20b7a2622cad
1 parent
22a75a38eb
Exists in
master
Update to the Rcode (UNTESTED)
Showing
1 changed file
with
9 additions
and
8 deletions
Show diff stats
RClean4.R
| ... | ... | @@ -241,20 +241,20 @@ if(clfileex == 0){ |
| 241 | 241 | .[IDLOCAL] |
| 242 | 242 | geneIDNam <- genena %>% |
| 243 | 243 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>% |
| 244 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
| 244 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
| 245 | 245 | } |
| 246 | 246 | if(IDF == 0){ |
| 247 | 247 | #No information on this particular GPL file |
| 248 | 248 | idLOCGPL <- genena %>% |
| 249 | 249 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
| 250 | 250 | t(.) %>% |
| 251 | - grep("^\\D",.) %>% | |
| 252 | - length()-1 | |
| 251 | + grep("^ID\\s*$",.) %>% | |
| 252 | + -1 | |
| 253 | 253 | cbind(as.integer(gplnum),as.integer(idLOCGPL)) %>% |
| 254 | 254 | cat(file="GPL_ID_LOC.txt",sep = "\t", fill = TRUE, append = TRUE) |
| 255 | 255 | geneIDNam <- genena %>% |
| 256 | 256 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>% |
| 257 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
| 257 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
| 258 | 258 | } |
| 259 | 259 | } |
| 260 | 260 | if(fileex == 0){ |
| ... | ... | @@ -262,20 +262,20 @@ if(clfileex == 0){ |
| 262 | 262 | idLOCGPL <- genena %>% |
| 263 | 263 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
| 264 | 264 | t(.) %>% |
| 265 | - grep("^\\D",.) %>% | |
| 266 | - length()-1 | |
| 265 | + grep("^ID\\s*$",.) %>% | |
| 266 | + -1 | |
| 267 | 267 | Firstval <- cbind(as.integer(gplnum),as.integer(idLOCGPL)) |
| 268 | 268 | colnames(Firstval) <- c("GPL_FILE_NUM","LOC_ID") |
| 269 | 269 | write.table(Firstval,file = "GPL_ID_LOC.txt", sep = "\t",row.names = FALSE, col.names = TRUE) |
| 270 | 270 | geneIDNam <- genena %>% |
| 271 | 271 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>% |
| 272 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
| 272 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
| 273 | 273 | } |
| 274 | 274 | } |
| 275 | 275 | if(soft == FALSE){ |
| 276 | 276 | geneIDNam <- genena %>% |
| 277 | 277 | read_delim(delim="\t",comment = "#")%>% |
| 278 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
| 278 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
| 279 | 279 | } |
| 280 | 280 | |
| 281 | 281 | ##Labeling the gene IDs without names |
| ... | ... | @@ -322,3 +322,4 @@ nfnaex <- strsplit(alz,"[\\]") %>% |
| 322 | 322 | paste(collapse = "") |
| 323 | 323 | write.table(t(Fullalzdw), file = nfnaex, sep = "\t") |
| 324 | 324 | |
| 325 | + |