Blame view
RPostClean.R
3.64 KB
788834dd7 This code takes t... |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
#For Reading Raw Data from the created file #Required Libraries library(MASS) library(dplyr) library(tidyr) library(readr) library(stringr) #Necessary Functions #1# Function for discretizing the data dndat <- function(NDATA){ rownd <- dim(NDATA)[1] colnd <- dim(NDATA)[2] DDATA <- matrix(0,nrow=rownd,ncol=colnd) colnames(DDATA) <- colnames(NDATA) i = 1 for(i in 1:rownd){ |
2167ed763 Update |
21 |
j <- 1 |
788834dd7 This code takes t... |
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
for(j in 1:colnd){ if(is.na(NDATA[i,j])==FALSE){ if(NDATA[i,j] < -1){ DDATA[i,j]=0L } if(NDATA[i,j] > 1){ DDATA[i,j]=2L } if(-1 <= NDATA[i,j] && NDATA[i,j] < 1){ DDATA[i,j]=1L } } else{ DDATA[i,j] = NDATA[i,j] } j = j + 1 } i = i + 1 } DDATA } #Bringing in the file rawdat <- file.choose() RAWDAT <- rawdat %>% read_delim(delim ="\t",col_names = FALSE,skip=1) %>% filter(.,!grepl("Group|Age|Region|PMI|Title|Sex|Braak",X1)) attributes(RAWDAT)$names <- RAWDAT[1,] #Just the clinical data RAWWORD <- rawdat %>% read_delim(delim ="\t",col_names = FALSE,skip=1) %>% filter(.,grepl("Group|Age|Region|PMI|Title|Sex|Braak",X1)) attributes(RAWWORD)$names <- RAWDAT[1,] #Add col of NAs to clinical data z <- 1 naroww <- as.data.frame(rep(0,dim(RAWWORD)[1]),stringsAsFactors = FALSE) for(z in 1:dim(RAWWORD)[1]){ naroww[z,1] <- as.integer(sum(is.na(RAWWORD[z,]))) z <- z + 1 } colnames(naroww) <- "ROW_NAs" RAWWORD <- bind_cols(RAWWORD,naroww) ##Getting back to the data RAWDAT2 <- RAWDAT[-1,] %>% dplyr::arrange(.,ID_REF) ##Editing the file for R processing RAWDATID <- RAWDAT2[,1] %>% as.matrix(.) RAWDATNUM <- RAWDAT2[,-1] %>% mapply(.,FUN = as.numeric) %>% t(.) ##Consolidating genes with the same name tabRDATID <- table(RAWDATID) NuRDATN <- matrix(0, nrow = dim(RAWDATNUM)[1], ncol = length(tabRDATID)) j <- 1 for(j in 1:length(tabRDATID)){ ##Putting the ones without duplicates in their new homes if(tabRDATID[j] == 1){ NuRDATN[,j] <- RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])] } ##Averaging duplicates and putting them in their new homes if(tabRDATID[j] > 1){ NuRDATN[,j] <- rowMeans(RAWDATNUM[,which(RAWDATID==rownames(tabRDATID)[j])],na.rm = TRUE) } j <- j + 1 } #Scaling the Data scrawdat <- NuRDATN%>% scale() attr(scrawdat,"scaled:center") <- NULL attr(scrawdat,"scaled:scale") <- NULL colnames(scrawdat) <- rownames(tabRDATID) #Discretized the Data dialzdat <- scrawdat %>% dndat(.) %>% t()%>% as.data.frame(.) colnames(dialzdat) <- rownames(RAWDATNUM) #gene names genena <- as.data.frame(as.matrix(rownames(dialzdat),ncol=1)) #setting "ID_REF" as a new variable colnames(genena) <- "ID_REF" rownames(dialzdat) <- NULL dialzdat <-bind_cols(genena,dialzdat) #NAs in a column x <- 2 nacol <- as.data.frame(t(rep(0,dim(dialzdat)[2])),stringsAsFactors = FALSE) nacol[1,1] = "COL_NAs" for(x in 2:dim(dialzdat)[2]){ nacol[1,x] <- as.integer(sum(is.na(dialzdat[,x]))) x <- x + 1 } colnames(nacol) <- colnames(dialzdat) dialzdat<-bind_rows(dialzdat,nacol) #NAs in a row y <- 1 narowd <- as.data.frame(rep(0,dim(dialzdat)[1]),stringsAsFactors = FALSE) for(y in 1:dim(dialzdat)[1]){ narowd[y,1] <- as.integer(sum(is.na(dialzdat[y,]))) y <- y + 1 } colnames(narowd) <- "ROW_NAs" dialzdat <- bind_cols(dialzdat,narowd) #converting to character so that the clinical can be brought together with discrete data k <- 2 for(k in 2:dim(dialzdat)[2]-1){ dialzdat[,k] <- as.character(dialzdat[,k]) k <- k + 1 } |
8bfefd7af Update |
145 |
#The End the full data |
788834dd7 This code takes t... |
146 147 148 149 150 151 152 153 154 155 |
Fullalzdw <- bind_rows(RAWWORD,dialzdat) #Create the file nfnaex <- strsplit(rawdat,"[\\|/]") %>% .[[1]] %>% .[length(.)] %>% gsub("\\D","",.) %>% c("GSE",.,"dscrt.txt") %>% paste(collapse = "") write.table(Fullalzdw, file = nfnaex, sep = "\t",col.names = TRUE,row.names = FALSE) |