Commit cb3deb0578045972ebb5540d8827f7516f0b3c4d

Authored by Efrain Gonzalez
1 parent fc0d5afa8a
Exists in master

Update that allows for more data set inputs at once

Showing 1 changed file with 55 additions and 48 deletions   Show diff stats
1 #Checking for similar genes in both data sets 1 #Checking for similar genes in both data sets
2 2
3 #Required libraries 3 #Required libraries
4 library(MASS) 4 library(MASS)
5 library(readr) 5 library(readr)
6 library(dplyr) 6 library(dplyr)
7 7
8 Check2Match <- function(){ 8 Check2Match <- function(){
9 #Bring in the two files 9 numrows <- "How many rows of clinical data are their in the each data set?: " %>%
10 ##Number of rows with clinical data for first file
11 numrow1 <- "How many rows of clinical data are their in the first data set?: " %>%
12 readline(prompt = .) %>% 10 readline(prompt = .) %>%
11 strsplit(.,split = ",") %>%
12 .[[1]] %>%
13 as.integer(.) 13 as.integer(.)
14 i <- 1
15 ANDIS <- select.list(choices = list.files(),multiple = TRUE, title = "Choose the file/files you want to analyze:")
14 16
15 ##Number of rows with clinical data for second file 17 for(i in 1:length(numrows)){
16 numrow2 <- "How many rows of clinical data are their in the second data set?: " %>% 18 if( i == 1){
17 readline(prompt = .) %>% 19 edfile <- ANDIS[i]
18 as.integer(.) 20 ed <- edfile %>%
19 21 read_delim(.,delim = "\t")
20 edfile <- file.choose() 22 ednocd <- ed[-(1:numrows[1]),]
21 ed <- edfile %>% 23 #Second file brought in
22 read_delim(.,delim = "\t") 24 eddfile <- ANDIS[i + 1]
23 25 edd <- eddfile %>%
24 ednocd <- ed[-(1:numrow1),] 26 read_delim(.,delim = "\t")
25 eddfile <- file.choose() 27 eddnocd <- edd[-(1:numrows[2]),]
26 edd <- eddfile %>% 28
27 read_delim(.,delim = "\t") 29 ##Fully matched both data sets
30 eddy <- inner_join(ednocd,eddnocd)
31 #Matches
32 meds <- eddy[,1]
33
34 }
35 if(i > 1 && i + 1 < length(numrows)){
36 eddfile <- ANDIS[i + 1]
37 edd <- eddfile %>%
38 read_delim(.,delim = "\t")
39 eddnocd <- edd[-(1:numrows[i + 1]),]
40 ##Fully matched both data sets
41 eddy <- inner_join(meds,eddnocd)
42 meds <- eddy[,1]
28 43
29 eddnocd <- edd[-(1:numrow2),] 44
30 45 }
31 ##Number of columns that belong to the first data file 46 i <- i + 1
32 numbcol1 <- dim(ednocd)[2] 47 }
33 ##Number of columns that belong to the second data file 48 meds
34 numbcol2 <- dim(eddnocd)[2] 49 for(j in 1:length(numrows)){
35 50 edfile <- ANDIS[j]
36 ##Fully matched both data sets 51 ed <- edfile %>%
37 eddy <- inner_join(ednocd,eddnocd) 52 read_delim(.,delim = "\t")
38 #Matched ed 53 ednocd <- ed[-(1:numrows[j]),]
39 eddy[,1:numbcol1] 54 #use meds to match
40 Finedm <- rbind(ed[1:numrow1,],eddy[,1:numbcol1]) 55 eddy <- inner_join(meds,ednocd)
41 nam_fil_ed <- strsplit(edfile,"[\\|/]") %>% 56 Finedm <- rbind(ed[1:numrows[j],],eddy)
42 .[[1]] %>% 57 nam_fil_ed <- strsplit(edfile,"[\\|/]") %>%
43 .[length(.)] %>% 58 .[[1]] %>%
44 gsub("\\D","",.) %>% 59 .[length(.)] %>%
45 c("GSE",.,"matched.txt") %>% 60 gsub("\\D","",.) %>%
46 paste(collapse = "") 61 c("GSE",.,"matched.txt") %>%
47 write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE) 62 paste(collapse = "")
48 #Matched edd 63 write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE)
49 eddy[,(numbcol1 + 1):dim(eddy)[2]] 64 j <- j + 1
50 fineddm <- cbind(eddy[,1],eddy[,(numbcol1 + 1):dim(eddy)[2]]) 65 }
51 Fineddm <- rbind(edd[1:numrow2,],fineddm) 66 meds
52 nam_fil_edd <- strsplit(eddfile,"[\\|/]") %>%