RMatchGenes.R
1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#Checking for similar genes in both data sets
#Required libraries
library(MASS)
library(readr)
library(dplyr)
Check2Match <- function(){
ANDIS <- select.list(choices = list.files(),multiple = TRUE, title = "Choose the file/files you want to analyze:")
numrows <- "How many rows of clinical data are their in each data set (separate each number by a comma no spaces)?: " %>%
readline(prompt = .) %>%
strsplit(.,split = ",") %>%
.[[1]] %>%
as.integer(.)
i <- 1
for(i in 1:length(numrows)){
if( i == 1){
edfile <- ANDIS[i]
ed <- edfile %>%
read_delim(.,delim = "\t")
ednocd <- ed[-(1:numrows[1]),]
#Second file brought in
eddfile <- ANDIS[i + 1]
edd <- eddfile %>%
read_delim(.,delim = "\t")
eddnocd <- edd[-(1:numrows[2]),]
##Fully matched both data sets
eddy <- inner_join(ednocd,eddnocd,by = "ID_REF")
#Matches
meds <- eddy[,1]
}
if(i > 1 && ((i + 1) <= length(numrows))){
eddfile <- ANDIS[i + 1]
edd <- eddfile %>%
read_delim(.,delim = "\t")
eddnocd <- edd[-(1:numrows[i + 1]),]
##Fully matched both data sets
eddy <- inner_join(meds,eddnocd,by = "ID_REF")
meds <- eddy[,1]
}
}
meds
for(j in 1:length(numrows)){
edfile <- ANDIS[j]
ed <- edfile %>%
read_delim(.,delim = "\t")
ednocd <- ed[-(1:numrows[j]),]
#use meds to match
eddy <- inner_join(meds,ednocd,by = "ID_REF")
Finedm <- rbind(ed[1:numrows[j],],eddy)
nam_fil_ed <- strsplit(edfile,"[\\|/]") %>%
.[[1]] %>%
.[length(.)] %>%
gsub("\\D","",.) %>%
c("GSE",.,"matched.txt") %>%
paste(collapse = "")
write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE)
}
meds
}
Check2Match()