Skip to content

Commit

Permalink
Update Zeiden_HW2.r
Browse files Browse the repository at this point in the history
  • Loading branch information
mfzeidan committed Oct 16, 2017
1 parent f4211ee commit 0c8e316
Showing 1 changed file with 17 additions and 13 deletions.
30 changes: 17 additions & 13 deletions Zeiden_HW2.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ library("pls")
# (B)
#

## here we are filtering out columns with low frequencies
ncol(fingerprints)

nzv_cols <- nearZeroVar(fingerprints)
if(length(nzv_cols) > 0) fingerprints <- fingerprints[, -nzv_cols]
ncol(fingerprints) ## 388 predictors remain from the original 1107 predictors (719 removed)
Expand All @@ -17,30 +17,37 @@ ncol(fingerprints) ## 388 predictors remain from the original 1107 predictors (7
#


## split the data into a test and training data set
## preprocess the data and tune a PLS model

## 75% of the sample size
## this is to split the training and test sets up from the data set
smp_size <- floor(0.75 * nrow(fingerprints))

## set the seed to make your partition reproductible
set.seed(123)

## breaking up the data into training and test data sets for the fingerprint and
## permeability data set

train_ind <- sample(seq_len(nrow(fingerprints)), size = smp_size)
##permeability <- data.frame(permeability)
train_ind_perm <- sample(seq_len(nrow(fingerprints)), size = smp_size)

## permeability test and training data sets
train_perm <- permeability[train_ind_perm,]
test_perm <- permeability[-train_ind_perm,]

## fingerprint test and training data set
train_fp <- fingerprints[train_ind, ]
test_fp <- fingerprints[-train_ind, ]


## PLS model

## 10 folds cross validation

ctrl <- trainControl(method = "cv", number = 10)

## need to turn this into a df for the plsFit
df_train_fp <- data.frame(train_fp)

#####

plsFit = plsr(train_perm ~ . , data = df_train_fp, ncomp = 20)

## try on test data (316)
Expand All @@ -49,7 +56,7 @@ plsPred = predict(plsFit, df_train_fp, ncomp = 20)
plsValue = data.frame(obs = train_perm, pred = plsPred[,,1])

defaultSummary(plsValue)

## RMSE 9.5 Rsquared 0.614 MAE 6.60

plsModel <- train(x=train_fp, y = train_perm,
method="pls",
Expand Down Expand Up @@ -80,6 +87,8 @@ testResultsPLS <- data.frame(obs = train_perm, pred = predict(plsTune, df_train_

defaultSummary(testResultsPLS)

## RMSE of 14.54 and Rsquared of 0.09 MAE 11.09

set.seed(123)
pcrTune <- train(x = df_train_fp, y = train_perm, method = "pcr", tuneGrid = expand.grid(ncomp = 1:35), trControl = ctrl)
pcrTune
Expand Down Expand Up @@ -112,8 +121,3 @@ plot(pcrImp, top = 25, scales = list(y = list(cex = .95)))


# exercise 7.4





0 comments on commit 0c8e316

Please sign in to comment.