# Read in the data for prostate cancer from Singh et al. 2002; it has 2135 genes and 102 samples
# These are the microarray expression values
prostate.temp.cov <- read.table(file="prostate_preprocessed.txt",row.names=1,sep="",nrows=2135)
# This is the vector of responses, cancer vs. noncancer
prostate.response <- read.table(file="prostate_preprocessed.txt",row.names=1,sep="",skip=2135,nrows=1)
prostate.response <- t(prostate.response)
# Microarray data comes in the format where rows are covariates and columns are samples; all of our methods need the data in the transpose
prostate.unscaled.cov <- t(prostate.temp.cov)
# Scale the covariates
prostate.cov <- scale(prostate.unscaled.cov)
# Make training and testing sets using (pseudo-)random numbers
n.samp <- length(prostate.cov[,1])
# Set the seed for a pseudorandom number (this means that you can generate random numbers, but these will be the same sequence for everyone)
set.seed(314159)
ind.train <- sample(1:n.samp,70)
ind.test <- setdiff(1:n.samp,ind.train)
cov.test <- prostate.cov[ind.test,]
cov.train <- prostate.cov[ind.train,]
resp.train <- prostate.response[ind.train]
resp.test <- prostate.response[ind.test]