Commit b3a97716377820e2cd3097e6e96e20b7a2622cad
1 parent
22a75a38eb
Exists in
master
Update to the Rcode (UNTESTED)
Showing
1 changed file
with
9 additions
and
8 deletions
Show diff stats
RClean4.R
... | ... | @@ -241,20 +241,20 @@ if(clfileex == 0){ |
241 | 241 | .[IDLOCAL] |
242 | 242 | geneIDNam <- genena %>% |
243 | 243 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>% |
244 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
244 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
245 | 245 | } |
246 | 246 | if(IDF == 0){ |
247 | 247 | #No information on this particular GPL file |
248 | 248 | idLOCGPL <- genena %>% |
249 | 249 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
250 | 250 | t(.) %>% |
251 | - grep("^\\D",.) %>% | |
252 | - length()-1 | |
251 | + grep("^ID\\s*$",.) %>% | |
252 | + -1 | |
253 | 253 | cbind(as.integer(gplnum),as.integer(idLOCGPL)) %>% |
254 | 254 | cat(file="GPL_ID_LOC.txt",sep = "\t", fill = TRUE, append = TRUE) |
255 | 255 | geneIDNam <- genena %>% |
256 | 256 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>% |
257 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
257 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
258 | 258 | } |
259 | 259 | } |
260 | 260 | if(fileex == 0){ |
... | ... | @@ -262,20 +262,20 @@ if(clfileex == 0){ |
262 | 262 | idLOCGPL <- genena %>% |
263 | 263 | read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>% |
264 | 264 | t(.) %>% |
265 | - grep("^\\D",.) %>% | |
266 | - length()-1 | |
265 | + grep("^ID\\s*$",.) %>% | |
266 | + -1 | |
267 | 267 | Firstval <- cbind(as.integer(gplnum),as.integer(idLOCGPL)) |
268 | 268 | colnames(Firstval) <- c("GPL_FILE_NUM","LOC_ID") |
269 | 269 | write.table(Firstval,file = "GPL_ID_LOC.txt", sep = "\t",row.names = FALSE, col.names = TRUE) |
270 | 270 | geneIDNam <- genena %>% |
271 | 271 | read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>% |
272 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
272 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
273 | 273 | } |
274 | 274 | } |
275 | 275 | if(soft == FALSE){ |
276 | 276 | geneIDNam <- genena %>% |
277 | 277 | read_delim(delim="\t",comment = "#")%>% |
278 | - dplyr::select(.,ID,grep("Symbol|ORF",colnames(.))) | |
278 | + dplyr::select(.,ID,grep("Symbol|^ORF\\s*$",colnames(.))) | |
279 | 279 | } |
280 | 280 | |
281 | 281 | ##Labeling the gene IDs without names |
... | ... | @@ -322,3 +322,4 @@ nfnaex <- strsplit(alz,"[\\]") %>% |
322 | 322 | paste(collapse = "") |
323 | 323 | write.table(t(Fullalzdw), file = nfnaex, sep = "\t") |
324 | 324 | |
325 | + |