#Checking for similar genes in both data sets

#Required libraries
library(MASS)
library(readr)
library(dplyr)

Check2Match <- function(){
    ANDIS <- select.list(choices = list.files(),multiple = TRUE, title = "Choose the file/files you want to analyze:")
    numrows <- "How many rows of clinical data are their in each data set (separate each number by a comma no spaces)?: " %>%
	readline(prompt = .) %>%
	strsplit(.,split = ",") %>%
	.[[1]] %>%
	as.integer(.)
    i <- 1
    

    for(i in 1:length(numrows)){
	    if( i == 1){
		    edfile <- ANDIS[i]
		    ed <- edfile %>%
			    read_delim(.,delim = "\t")
		    ednocd <- ed[-(1:numrows[1]),]
		    #Second file brought in
		    eddfile <- ANDIS[i + 1]
		    edd <- eddfile %>%
			    read_delim(.,delim = "\t")	
		    eddnocd <- edd[-(1:numrows[2]),]
        
		    ##Fully matched both data sets
		    eddy <- inner_join(ednocd,eddnocd,by = "ID_REF")
		    #Matches		
		    meds <- eddy[,1]
		
	    }	
	    if(i > 1 && ((i + 1) <= length(numrows))){
		    eddfile <- ANDIS[i + 1]
		    edd <- eddfile %>%
		    	read_delim(.,delim = "\t")	
		    eddnocd <- edd[-(1:numrows[i + 1]),]
		    ##Fully matched both data sets
		    eddy <- inner_join(meds,eddnocd,by = "ID_REF")
		    meds <- eddy[,1]
	
		
	    }
    }
    meds
    for(j in 1:length(numrows)){
	    edfile <- ANDIS[j]
	    ed <- edfile %>%
    		read_delim(.,delim = "\t")
    	ednocd <- ed[-(1:numrows[j]),]
    	#use meds to match
    	eddy <- inner_join(meds,ednocd,by = "ID_REF")
    	Finedm <- rbind(ed[1:numrows[j],],eddy)
    	nam_fil_ed <- strsplit(edfile,"[\\|/]") %>%
    		.[[1]] %>%
    		.[length(.)] %>%
    		gsub("\\D","",.) %>%
    		c("GSE",.,"matched.txt") %>%
    		paste(collapse = "")
    	write.table(Finedm,file = nam_fil_ed,sep = "\t",row.names = FALSE)
    }
    meds	
}

Check2Match()