Commit cb3deb0578045972ebb5540d8827f7516f0b3c4d
1 parent
fc0d5afa8a
Exists in
master
Update that allows for more data set inputs at once
Showing
1 changed file
with
55 additions
and
48 deletions
Show diff stats
RMatchGenes.R
1 | #Checking for similar genes in both data sets | 1 | #Checking for similar genes in both data sets |
2 | 2 | ||
3 | #Required libraries | 3 | #Required libraries |
4 | library(MASS) | 4 | library(MASS) |
5 | library(readr) | 5 | library(readr) |
6 | library(dplyr) | 6 | library(dplyr) |
7 | 7 | ||
8 | Check2Match <- function(){ | 8 | Check2Match <- function(){ |
9 | #Bring in the two files | 9 | numrows <- "How many rows of clinical data are their in the each data set?: " %>% |
10 | ##Number of rows with clinical data for first file | ||
11 | numrow1 <- "How many rows of clinical data are their in the first data set?: " %>% | ||
12 | readline(prompt = .) %>% | 10 | readline(prompt = .) %>% |
11 | strsplit(.,split = ",") %>% | ||
12 | .[[1]] %>% | ||
13 | as.integer(.) | 13 | as.integer(.) |
14 | i <- 1 | ||
15 | ANDIS <- select.list(choices = list.files(),multiple = TRUE, title = "Choose the file/files you want to analyze:") | ||
14 | 16 | ||
15 | ##Number of rows with clinical data for second file | 17 | for(i in 1:length(numrows)){ |
16 | numrow2 <- "How many rows of clinical data are their in the second data set?: " %>% | 18 | if( i == 1){ |
17 | readline(prompt = .) %>% | 19 | edfile <- ANDIS[i] |
18 | as.integer(.) | 20 | ed <- edfile %>% |
19 | 21 | read_delim(.,delim = "\t") | |
20 | edfile <- file.choose() | 22 | ednocd <- ed[-(1:numrows[1]),] |
21 | ed <- edfile %>% | 23 | #Second file brought in |
22 | read_delim(.,delim = "\t") | 24 | eddfile <- ANDIS[i + 1] |
23 | 25 | edd <- eddfile %>% | |
24 | ednocd <- ed[-(1:numrow1),] | 26 | read_delim(.,delim = "\t") |
25 | eddfile <- file.choose() | 27 | eddnocd <- edd[-(1:numrows[2]),] |
26 | edd <- eddfile %>% | 28 | |
27 | read_delim(.,delim = "\t") | 29 | ##Fully matched both data sets |
30 | eddy <- inner_join(ednocd,eddnocd) | ||
31 | #Matches | ||
32 | meds <- eddy[,1] | ||
33 | |||
34 | } | ||
35 | if(i > 1 && i + 1 < length(numrows)){ | ||
36 | eddfile <- ANDIS[i + 1] | ||
37 | edd <- eddfile %>% | ||
38 | read_delim(.,delim = "\t") | ||
39 | eddnocd <- edd[-(1:numrows[i + 1]),] | ||
40 | ##Fully matched both data sets | ||
41 | eddy <- inner_join(meds,eddnocd) | ||
42 | meds <- eddy[,1] | ||
28 | 43 | ||
29 | eddnocd <- edd[-(1:numrow2),] | 44 | |
30 | 45 | } | |
31 | ##Number of columns that belong to the first data file | 46 | i <- i + 1 |
32 | numbcol1 <- dim(ednocd)[2] | 47 | } |
33 | ##Number of columns that belong to the second data file | 48 | meds |
34 | numbcol2 <- dim(eddnocd)[2] | 49 | for(j in 1:length(numrows)){ |
35 | 50 | edfile <- ANDIS[j] | |
36 | ##Fully matched both data sets | 51 | ed <- edfile %>% |
37 | eddy <- inner_join(ednocd,eddnocd) | 52 | read_delim(.,delim = "\t") |
38 | #Matched ed | 53 | ednocd <- ed[-(1:numrows[j]),] |
39 | eddy[,1:numbcol1] | 54 | #use meds to match |
40 | Finedm <- rbind(ed[1:numrow1,],eddy[,1:numbcol1]) | 55 | eddy <- inner_join(meds,ednocd) |
41 | nam_fil_ed <- strsplit(edfile,"[\\|/]") %>% | 56 | Finedm <- rbind(ed[1:numrows[j],],eddy) |
42 | .[[1]] %>% | 57 | nam_fil_ed <- strsplit(edfile,"[\\|/]") %>% |
43 | .[length(.)] %>% | 58 | .[[1]] %>% |
44 | gsub("\\D","",.) %>% | 59 | .[length(.)] %>% |
45 | c("GSE",.,"matched.txt") %>% | 60 | gsub("\\D","",.) %>% |
46 | paste(collapse = "") | 61 | c("GSE",.,"matched.txt") %>% |
47 | write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE) | 62 | paste(collapse = "") |
48 | #Matched edd | 63 | write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE) |
49 | eddy[,(numbcol1 + 1):dim(eddy)[2]] | 64 | j <- j + 1 |
50 | fineddm <- cbind(eddy[,1],eddy[,(numbcol1 + 1):dim(eddy)[2]]) | 65 | } |
51 | Fineddm <- rbind(edd[1:numrow2,],fineddm) | 66 | meds |
52 | nam_fil_edd <- strsplit(eddfile,"[\\|/]") %>% |