RMatchGenes.R 1.62 KB
#Checking for similar genes in both data sets

#Required libraries
library(MASS)
library(readr)
library(dplyr)

Check2Match <- function(){
#Bring in the two files
##Number of rows with clinical data for first file
numrow1 <- "How many rows of clinical data are their in the first data set?: " %>%
	readline(prompt = .) %>%
	as.integer(.)

##Number of rows with clinical data for second file
numrow2 <- "How many rows of clinical data are their in the second data set?: " %>%
	readline(prompt = .) %>%
	as.integer(.)

edfile <- file.choose()
ed <- edfile %>%
	read_delim(.,delim = "\t")

ednocd <- ed[-(1:numrow1),]
eddfile <- file.choose()
edd <- eddfile %>%
	read_delim(.,delim = "\t")
	
eddnocd <- edd[-(1:numrow2),]

##Number of columns that belong to the first data file
numbcol1 <- dim(ednocd)[2]
##Number of columns that belong to the second data file
numbcol2 <- dim(eddnocd)[2]

##Fully matched both data sets
eddy <- inner_join(ednocd,eddnocd)
#Matched ed
eddy[,1:numbcol1]
Finedm <- rbind(ed[1:numrow1,],eddy[,1:numbcol1])
nam_fil_ed <- strsplit(edfile,"[\\|/]") %>%
			.[[1]] %>%
			.[length(.)] %>%
			gsub("\\D","",.) %>%
			c("GSE",.,"matched.txt") %>%
			paste(collapse = "")
write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE)
#Matched edd
eddy[,(numbcol1 + 1):dim(eddy)[2]]
fineddm <- cbind(eddy[,1],eddy[,(numbcol1 + 1):dim(eddy)[2]])
Fineddm <- rbind(edd[1:numrow2,],fineddm)
nam_fil_edd <- strsplit(eddfile,"[\\|/]") %>%
			.[[1]] %>%
			.[length(.)] %>%
			gsub("\\D","",.) %>%
			c("GSE",.,"matched.txt") %>%
			paste(collapse = "")
write.table(Fineddm,file = nam_fil_edd,sep = "\t",row.names = FALSE)

}

Check2Match()