diff --git a/RMatchGenes.R b/RMatchGenes.R new file mode 100644 index 0000000..f93b92b --- /dev/null +++ b/RMatchGenes.R @@ -0,0 +1,62 @@ +#Checking for similar genes in both data sets + +#Required libraries +library(MASS) +library(readr) +library(dplyr) + +Check2Match <- function(){ +#Bring in the two files +##Number of rows with clinical data for first file +numrow1 <- "How many rows of clinical data are their in the first data set?: " %>% + readline(prompt = .) %>% + as.integer(.) + +##Number of rows with clinical data for second file +numrow2 <- "How many rows of clinical data are their in the second data set?: " %>% + readline(prompt = .) %>% + as.integer(.) + +edfile <- file.choose() +ed <- edfile %>% + read_delim(.,delim = "\t") + +ednocd <- ed[-(1:numrow1),] +eddfile <- file.choose() +edd <- eddfile %>% + read_delim(.,delim = "\t") + +eddnocd <- edd[-(1:numrow2),] + +##Number of columns that belong to the first data file +numbcol1 <- dim(ednocd)[2] +##Number of columns that belong to the second data file +numbcol2 <- dim(eddnocd)[2] + +##Fully matched both data sets +eddy <- inner_join(ednocd,eddnocd) +#Matched ed +eddy[,1:numbcol1] +Finedm <- rbind(ed[1:numrow1,],eddy[,1:numbcol1]) +nam_fil_ed <- strsplit(edfile,"[\\|/]") %>% + .[[1]] %>% + .[length(.)] %>% + gsub("\\D","",.) %>% + c("GSE",.,"matched.txt") %>% + paste(collapse = "") +write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE) +#Matched edd +eddy[,(numbcol1 + 1):dim(eddy)[2]] +fineddm <- cbind(eddy[,1],eddy[,(numbcol1 + 1):dim(eddy)[2]]) +Fineddm <- rbind(edd[1:numrow2,],fineddm) +nam_fil_edd <- strsplit(eddfile,"[\\|/]") %>% + .[[1]] %>% + .[length(.)] %>% + gsub("\\D","",.) %>% + c("GSE",.,"matched.txt") %>% + paste(collapse = "") +write.table(Fineddm,file = nam_fil_edd,sep = "\t",row.names = FALSE) + +} + +Check2Match()