diff --git a/RCleanDscret.R b/RCleanDscret.R
new file mode 100644
index 0000000..2b9666c
--- /dev/null
+++ b/RCleanDscret.R
@@ -0,0 +1,445 @@
+#Libraries required to run the code
+library(pryr)
+library(MASS)
+library(dplyr)
+library(tidyr)
+library(readr)
+library(stringr)
+
+
+#Necessary Functions
+#1#Function for handling the changing of row names and column names
+chngrownm <- function(mat){
+	row <- dim(mat)[1]
+	col <- dim(mat)[2]
+	j <- 1
+	x <- 1
+	p <- 1	
+	a <- 1 
+	b <- 1
+	g <- 1
+	for(j in 1:col){
+		if("!Sample_source_name_ch1"==mat[1,j]){
+			colnames(mat)[j] <- "Brain_Region"	
+		} 
+		if("!Sample_title" == mat[1,j]){
+			colnames(mat)[j] <- "Title"
+		} 
+		if("!Sample_geo_accession" == mat[1,j]){
+			colnames(mat)[j] <- "ID_REF"
+		} else{
+			if(grepl("Sex|gender|Gender|sex",mat[2,j])==TRUE){
+				colnames(mat)[j] <- paste0("Sex",x)
+				x = x + 1
+			}
+			if(grepl("postmorteminterval|PMI|pmi",mat[2,j])==TRUE){
+				colnames(mat)[j] <- paste0("PMI",p)
+				p = p + 1
+			}
+			if(grepl("age|Age|AGE",mat[2,j])==TRUE){
+				colnames(mat)[j] <- paste0("Age",a)
+				a = a + 1
+			 }
+			if(grepl("braak|b&b",mat[2,j])==TRUE){
+				colnames(mat)[j] <- paste0("Braak",b)
+				b = b + 1
+			}
+			if(grepl("group|disease|control|AD|normal|diagnosis|Alzheimer|Control",mat[2,j])==TRUE){
+				colnames(mat)[j] <- paste0("Group",g)
+				g = g + 1
+			}
+			
+		}
+		j = j + 1
+	}
+	mat
+}			
+
+#2#Function for reorganizing information within the columns
+cinfo <- function(mat){
+	col <- dim(mat)[2]
+	j <-2
+	for(j in 2:col){
+		if(grepl("Group",colnames(mat)[j]) == TRUE){
+			mat[,j] <- gsub(".+:\\s|\\s.+;.+","",mat[,j])
+		}
+		if(grepl("Age",colnames(mat)[j])==TRUE){
+			mat[,j] <- gsub("\\D","",mat[,j])%>%
+				as.integer()
+		}
+		if(grepl("Sex",colnames(mat)[j])==TRUE){
+			mat[,j] <- gsub(".+:\\s","",mat[,j])
+		}
+		if(grepl("PMI",colnames(mat)[j])==TRUE){
+			mat[,j] <- gsub("[^0-9\\.]","",mat[,j])%>%
+				as.numeric() 
+		}
+		if(grepl("Braak",colnames(mat)[j])==TRUE){
+			mat[,j]<-gsub(".+:\\s","",mat[,j])%>%
+				as.roman()%>%
+				as.integer()
+		}
+	j=j+1
+	}
+	mat
+}
+
+#3#Function for labeling the gene IDs without names
+NAFIXING <- function(GIDNAM){
+	row <- dim(GIDNAM)[1]
+	i <- 1
+	for(i in 1:row){
+		if(grepl("^NA\\s*$",GIDNAM[i,2])==TRUE||is.na(GIDNAM[i,2])==TRUE){
+			GIDNAM[i,2] <- GIDNAM[i,1]
+		}
+		i <- i + 1
+	}
+	GIDNAM
+}	
+
+#4#Function for changing the gene ID to gene name
+cgeneID <- function(GeneName,DATA){
+    colGene <- dim(GeneName)[2]
+     j <- 1
+     for(j in 1:colGene){
+	chngsreq <- grep(paste0("^",GeneName[1,j],"$"),DATA[1,])
+	if(is.na(sum(chngsreq))==FALSE){
+		if(sum(chngsreq) > 0){
+			DATA[1,chngsreq] <- gsub(paste0("^",GeneName[1,j]),GeneName[2,j],DATA[1,chngsreq])
+		}
+	}
+		#if(sum(chngsreq) > 0){
+		##DATA[1,chngsreq] <- gsub(GeneName[1,j],GeneName[2,j],DATA[1,chngsreq])
+		#DATA[1,chngsreq] <- gsub(paste0("^",GeneName[1,j]),GeneName[2,j],DATA[1,chngsreq])
+		#}
+	j = j+1
+	}
+	DATA
+}
+
+#5#Function for adjusting the gene names
+gcnames <- function(DiData,usecol=1){
+	nuruns <- dim(DiData)[2]
+	i = 1
+	nwnam <- rep("0",length.out=nuruns)
+	for(i in 1:nuruns){
+		if(length(strsplit(colnames(DiData)[i],"///")[[1]]) >= usecol){
+			nwnam[i]=str_trim(strsplit(colnames(DiData)[i],"///")[[1]][usecol])
+		} else{
+			nwnam[i]=str_trim(strsplit(colnames(DiData)[i],"///")[[1]][1])
+		}
+		
+	}
+	nwnam
+
+}
+
+#6# Function for discretizing the data 
+dndat <- function(NDATA){
+	rownd <- dim(NDATA)[1]
+	colnd <- dim(NDATA)[2]
+	DDATA <- matrix(0,nrow=rownd,ncol=colnd)
+	colnames(DDATA) <- colnames(NDATA)
+	i <- 1
+	for(i in 1:rownd){
+		j <- 1
+		for(j in 1:colnd){
+			if(is.na(NDATA[i,j])==FALSE){
+			
+				if(NDATA[i,j] < -1){
+					DDATA[i,j]=0L
+				}
+				if(NDATA[i,j] > 1){
+					DDATA[i,j]=2L
+				}
+				if(-1 <= NDATA[i,j] && NDATA[i,j] < 1){
+					DDATA[i,j]=1L
+				}
+			} else{
+				DDATA[i,j] = NDATA[i,j]
+			}
+			j = j + 1			
+		}
+		i = i + 1
+	}
+	DDATA
+}
+
+
+#The Rest of this code will be used every time you want to change a data set
+
+#Getting the series matrix file
+print("Choose the series matrix file that you want to Analyze")
+alz <- file.choose()
+
+#Getting the GPL file
+print("Choose the GPL file that correlates with the above series matrix file")
+genena <- file.choose()
+
+
+#Find out if it is a soft GPL file or not
+soft <- strsplit(genena,"[\\|/]") %>%
+	.[[1]] %>%
+	.[length(.)] %>%
+	grepl("soft|annot",.)
+
+#Working with the wordy part of the document
+alzword <- alz %>%
+	read_delim(delim ="\t",comment = "!Series",col_names = FALSE)%>%
+	filter(grepl("!Sample",X1))%>%
+	filter(!grepl("!Sample_contact",X1))
+
+##Changing row names and column names:
+ALZWORD <- t(alzword)
+rownames(ALZWORD)=NULL
+colnames(ALZWORD) <- colnames(ALZWORD,do.NULL=FALSE)
+ALZWORD <- chngrownm(ALZWORD)[-1,]
+ALZWORD <- ALZWORD%>%
+	as.data.frame()%>%
+	dplyr::select(-starts_with("col"))
+
+##Reorganizing information within the columns
+ALZWORDF <- cinfo(ALZWORD)
+
+
+#Working with Actual Data part of file
+alzdat <- alz %>% 
+	read_delim(delim="\t",col_names=TRUE,comment = "!",skip=1)
+ALZDAT <- t(alzdat[,-1])
+rownames(ALZDAT)=NULL
+
+##Is there a clean version of the GPL file available?
+gplnum <- strsplit(genena,"[\\|/]") %>%
+	.[[1]] %>%
+	.[length(.)] %>%
+	gsub("\\D","",.)
+clfileex <- sum(grepl(paste0("Clean_GPL",gplnum),list.files()))
+if(clfileex >= 1){
+#use the clean version
+geneIDNam <-  paste0("Clean_GPL",gplnum,".txt") %>%
+	read_delim(delim="\t",col_names = c("ID","Symbol"), comment = "!")
+
+} 
+if(clfileex == 0){
+##Lets Create a clean version
+ 
+##Gene ID to Gene Name
+	if(soft == TRUE){
+		#Check to see if there is already a file containing information on soft files
+		fileex <- sum(grepl("GPL_ID_LOC.txt",list.files()))
+		if(fileex == 1){
+			#Check to see if this GPL soft file has been used before
+			IDF <- read_delim("GPL_ID_LOC.txt",delim = "\t",col_names = TRUE) %>%
+				.$GPL_FILE_NUM%>%
+				grepl(gplnum,.) %>%
+				sum()
+			if(IDF == 1){
+				IDLOCAL <- read_delim("GPL_ID_LOC.txt",delim = "\t",col_names = TRUE) %>%
+					.$GPL_FILE_NUM%>%
+					grep(gplnum,.)
+				idlocgpl <- read_delim("GPL_ID_LOC.txt",delim = "\t",col_names = TRUE) %>%
+					.$LOC_ID %>%
+					.[IDLOCAL]
+				geneIDNam <- genena %>%
+					read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idlocgpl) %>%
+					dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
+			}
+			if(IDF == 0){
+				#No information on this particular GPL file
+				idLOCGPL <- genena %>%
+					read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
+					t(.) %>%
+					grep("^ID\\s*$",.) %>%
+					-1
+				cbind(as.integer(gplnum),as.integer(idLOCGPL)) %>%
+					cat(file="GPL_ID_LOC.txt",sep = "\t", fill = TRUE, append = TRUE)
+				geneIDNam <- genena %>%
+					read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>%
+					dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
+			}
+		}
+		if(fileex == 0){
+			#We must create a file that we can access for later use
+			idLOCGPL <- genena %>%
+				read_delim(delim="\t",col_names = FALSE, comment = "!", n_max = 1000) %>%
+				t(.) %>%
+				grep("^ID\\s*$",.) %>%
+				-1
+			Firstval <- cbind(as.integer(gplnum),as.integer(idLOCGPL))
+			colnames(Firstval) <- c("GPL_FILE_NUM","LOC_ID")
+			write.table(Firstval,file = "GPL_ID_LOC.txt", sep = "\t",row.names = FALSE, col.names = TRUE)
+			geneIDNam <- genena %>%
+				read_delim(delim="\t",col_names = TRUE, comment = "!", skip = idLOCGPL) %>%
+				dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
+		}
+	 }
+	if(soft == FALSE){
+		geneIDNam <- genena %>%
+		read_delim(delim="\t",comment = "#")%>%
+		dplyr::select(.,ID,grep("Symbol|^ORF\\s*$|^gene_assignment\\s*$",colnames(.)))
+	}
+
+	##Labeling the gene IDs without names
+	geneIDNam <- NAFIXING(geneIDNam)	
+
+	##remove the whitespace
+	geneIDNam <- t(rbind(str_trim(t(geneIDNam)[1,]),str_trim(t(geneIDNam)[2,])))
+	
+	##Here is the clean version
+	write.table(geneIDNam,file = paste0("Clean_GPL",gplnum,".txt"),sep = "\t",row.names = FALSE, col.names = FALSE)
+}
+
+
+
+##Changing the gene ID to gene name
+ALZDAT1 <- cgeneID(t(geneIDNam),t(alzdat))
+colnames(ALZDAT) = ALZDAT1[1,]
+
+
+##Adjusting the column names aka the gene names
+colnames(ALZDAT) <- gcnames(ALZDAT)
+
+
+#Full RAW Data
+Fullalzdwr <- ALZDAT %>%
+	as.data.frame() %>%
+	cbind(ALZWORDF,.)
+
+
+#Raw file is output
+nfnaex <- strsplit(alz,"[\\]") %>%
+	.[[1]] %>%
+	.[length(.)] %>%
+	gsub("\\D","",.) %>%
+	c("GSE",.,"aftexcel.txt") %>%
+	paste(collapse = "")
+write.table(t(Fullalzdwr), file = nfnaex, sep = "\t")
+
+
+#Now for the discretization part
+##get the wordy part again
+rawword <- t(ALZWORDF)
+
+##where is ID_REF located
+hereim <- grep("ID_REF",rawword[,1])
+
+##Subject Names GSM...
+subjnam <- rawword[hereim,]
+
+##Getting the names for the rows
+namedarows <- rownames(rawword)[-hereim] %>%
+	as.data.frame()
+RAWWORD <- rawword[-hereim,] %>%
+	as.data.frame() %>%
+	bind_cols(namedarows,.)
+z <- 1
+naroww <- as.data.frame(rep(0,dim(RAWWORD)[1]),stringsAsFactors = FALSE)
+for(z in 1:dim(RAWWORD)[1]){
+	naroww[z,1] <- as.integer(sum(is.na(RAWWORD[z,])))
+	z <- z + 1
+}
+
+colnames(naroww) <- "ROW_NAs"
+RAWWORD <- bind_cols(RAWWORD,naroww)
+
+
+roALZna <- t(ALZDAT) %>%
+	rownames(.) %>%
+	as.data.frame(.)
+colnames(roALZna) <- "ID_REF"
+
+RAWDAT <- t(ALZDAT) %>%
+	as.data.frame(.)
+colnames(RAWDAT) <- NULL
+rownames(RAWDAT) <- NULL
+
+RAWDAT2 <- RAWDAT %>%
+	cbind(roALZna,.) %>%
+	dplyr::arrange(.,ID_REF)
+
+##Editing the file for R processing
+RAWDATID <- RAWDAT2[,1] %>%
+	as.matrix(.)
+
+RAWDATNUM <- RAWDAT2[,-1] %>%
+	mapply(.,FUN = as.numeric) %>%
+	t(.)
+
+##Consolidating genes with the same name
+###create empty matrix of size equal  to tabRDATID
+tabRDATID <- table(RAWDATID)
+NuRDATN <- matrix(0, nrow = dim(RAWDATNUM)[1], ncol = length(tabRDATID))
+j <- 1
+for(j in 1:length(tabRDATID)){
+
+	##Putting the ones without duplicates in their new homes
+	if(tabRDATID[j] == 1){
+		NuRDATN[,j] <- RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])]
+	}
+	##Averaging duplicates and putting them in their new homes
+	if(tabRDATID[j] > 1){
+		NuRDATN[,j] <- rowMeans(RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])],na.rm = TRUE)
+	}
+	j <- j + 1
+}
+
+##Scaling the Data
+scrawdat <- NuRDATN%>%
+	scale()
+attr(scrawdat,"scaled:center") <- NULL
+attr(scrawdat,"scaled:scale") <- NULL
+colnames(scrawdat) <- rownames(tabRDATID)
+
+##Discretized the Data
+dialzdat <- scrawdat %>%
+	dndat(.) %>%
+	t()%>%
+	as.data.frame(.)
+colnames(dialzdat) <- rownames(RAWDATNUM)
+
+##setting "ID_REF" as a new variable
+geneNAM <- as.data.frame(as.matrix(rownames(dialzdat),ncol=1))
+colnames(geneNAM) <- "ID_REF"
+rownames(dialzdat) <- NULL
+dialzdat <-bind_cols(geneNAM,dialzdat)
+
+##NAs in a column
+x <- 2
+nacol <- as.data.frame(t(rep(0,dim(dialzdat)[2])),stringsAsFactors = FALSE)
+nacol[1,1] = "COL_NAs"
+for(x in 2:dim(dialzdat)[2]){
+	nacol[1,x] <- as.integer(sum(is.na(dialzdat[,x])))
+	x <- x + 1
+}
+colnames(nacol) <- colnames(dialzdat)
+dialzdat<-bind_rows(dialzdat,nacol)
+
+##NAs in a row
+y <- 1
+narowd <- as.data.frame(rep(0,dim(dialzdat)[1]),stringsAsFactors = FALSE)
+for(y in 1:dim(dialzdat)[1]){
+	narowd[y,1] <- as.integer(sum(is.na(dialzdat[y,])))
+	y <- y + 1
+}
+colnames(narowd) <- "ROW_NAs"
+dialzdat <- bind_cols(dialzdat,narowd)
+colnames(dialzdat)[2:(dim(dialzdat)[2]-1)] <- subjnam
+colnames(RAWWORD) <- colnames(dialzdat)
+##converting to character so that the clinical can be brought together with discrete data
+k <- 2
+for(k in 2:dim(dialzdat)[2]-1){
+	dialzdat[,k] <- as.character(dialzdat[,k])
+	k <- k + 1
+}
+#The End the full data
+Dscrtalzdw <- bind_rows(RAWWORD,dialzdat)
+
+#Produces Discrete file
+nfnaex <- strsplit(rawdat,"[\\|/]") %>%
+	.[[1]] %>%
+	.[length(.)] %>%
+	gsub("\\D","",.) %>%
+	c("GSE",.,"dscrt.txt") %>%
+	paste(collapse = "")
+write.table(Dscrtalzdw, file = nfnaex, sep = "\t",col.names = TRUE,row.names = FALSE)
+