forked from h2oai/h2o-2
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move pub853 test to algo directory, remove NOPASS
- Loading branch information
Showing
3 changed files
with
115 additions
and
113 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
## | ||
# Testing glm modeling performance with sparse Gisette dataset with and without strong rules. | ||
# Test for JIRA PUB-853 | ||
# 'Early termination in glm resulting in underfitting' | ||
## | ||
|
||
|
||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
|
||
test <- function(conn) { | ||
print("Reading in Gisette data for gaussian modeling.") | ||
gisette.train = h2o.uploadFile(conn, locate("smalldata/gisette/Gisette_train_data.csv.gzip"), key="gisette.train", header=FALSE) | ||
gisette.train.label = h2o.uploadFile(conn, locate("smalldata/gisette/Gisette_train_labels.csv.gzip"), key="gisette.train.label", header=FALSE) | ||
gisette.train.full = h2o.assign(data=(cbind(gisette.train,gisette.train.label)),key="gisette.train.full") | ||
print("Head of Gisette data: ") | ||
head(gisette.train.full) | ||
print("Dimension of Gisette data: ") | ||
print(dim(gisette.train.full)) | ||
|
||
print("Reading in Gisette validation data.") | ||
gisette.valid = h2o.uploadFile(conn, locate("smalldata/gisette/Gisette_valid_data.csv.gzip"), key="gisette.valid", header=FALSE) | ||
gisette.label = h2o.uploadFile(conn, locate("smalldata/gisette/Gisette_valid_labels.csv.gzip"), key="gisette.label", header=FALSE) | ||
gisette.valid.label = h2o.assign(data=ifelse(gisette.label==1,1,0), key="gisette.valid.label") | ||
gisette.valid.full = h2o.assign(data=(cbind(gisette.valid,gisette.valid.label)),key="gisette.valid.full") | ||
print("Head of gisette validation data: ") | ||
head(gisette.valid.full) | ||
print("Dimension of gisette validation data: ") | ||
print(dim(gisette.valid.full)) | ||
|
||
print("Run model on 4500 columns of Gisette with strong rules on.") | ||
time.SR.4500 <- system.time(model.SR.4500 <- h2o.glm(x=c(1:4500), y="gisette.train.label", data=gisette.train.full, family="gaussian", lambda_search=TRUE, alpha=1, use_all_factor_levels=1, nfolds=0, higher_accuracy=TRUE)) | ||
print(time.SR.4500) | ||
|
||
print("Run model on all 5000 columns of Gisette with strong rules on.") | ||
time.SR.5000 <- system.time(model.SR.5000 <- h2o.glm(x=c(1:5000), y="gisette.train.label", data=gisette.train.full, family="gaussian", lambda_search=TRUE, alpha=1, use_all_factor_levels=1, nfolds=0, higher_accuracy=TRUE)) | ||
print(time.SR.5000) | ||
|
||
print("Test models on validation set.") | ||
predict.SR.4500 <- h2o.predict(model.SR.4500, gisette.valid.full) | ||
predict.SR.5000 <- h2o.predict(model.SR.5000, gisette.valid.full) | ||
|
||
print("Check performance of predictions.") | ||
perf.SR.4500 <- h2o.performance(predict.SR.4500$"predict", gisette.valid.full$"gisette.valid.label") | ||
perf.SR.5000 <- h2o.performance(predict.SR.5000$"predict", gisette.valid.full$"gisette.valid.label") | ||
|
||
testEnd() | ||
} | ||
|
||
doTest("Testing glm modeling performance with Arcene and Gisette with and without strong rules", test) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
## | ||
# Testing glm modeling performance with wide Arcene dataset with and without strong rules. | ||
# Test for JIRA PUB-853 | ||
# 'Early termination in glm resulting in underfitting' | ||
## | ||
|
||
|
||
setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) | ||
source('../../findNSourceUtils.R') | ||
|
||
|
||
test <- function(conn) { | ||
print("Reading in Arcene training data for binomial modeling.") | ||
arcene.train = h2o.uploadFile(conn, locate("smalldata/arcene/arcene_train.data"), key="arcene.train", header=FALSE) | ||
arcene.label = h2o.uploadFile(conn, locate("smalldata/arcene/arcene_train_labels.labels"), key="arcene.label", header=FALSE) | ||
arcene.train.label = h2o.assign(data=ifelse(arcene.label==1,1,0), key="arcene.train.label") | ||
arcene.train.full = h2o.assign(data=(cbind(arcene.train,arcene.train.label)),key="arcene.train.full") | ||
print("Head of arcene training data: ") | ||
head(arcene.train.full) | ||
print("Dimension of arcene training data: ") | ||
dim(arcene.train.full) | ||
|
||
print("Reading in Arcene validation data.") | ||
arcene.valid = h2o.uploadFile(conn, locate("smalldata/arcene/arcene_valid.data"), key="arcene.valid", header=FALSE) | ||
arcene.label = h2o.uploadFile(conn, locate("smalldata/arcene/arcene_valid_labels.labels"), key="arcene.label", header=FALSE) | ||
arcene.valid.label = h2o.assign(data=ifelse(arcene.label==1,1,0), key="arcene.valid.label") | ||
arcene.valid.full = h2o.assign(data=(cbind(arcene.valid,arcene.valid.label)),key="arcene.valid.full") | ||
print("Head of arcene validation data: ") | ||
head(arcene.valid.full) | ||
print("Dimension of arcene validation data: ") | ||
dim(arcene.valid.full) | ||
|
||
print("Run model on 3000 columns of Arcene with strong rules off.") | ||
time.noSR.3000 <- system.time(model.noSR.3000 <- h2o.glm(x=c(1:3000), y="arcene.train.label", data=arcene.train.full, family="binomial", lambda_search=FALSE, alpha=1, nfolds=0, use_all_factor_levels=1, higher_accuracy=TRUE)) | ||
print(time.noSR.3000) | ||
|
||
print("Run model on 3250 columns of Arcene with strong rules off.") | ||
time.noSR.3250 <- system.time(model.noSR.3250 <- h2o.glm(x=c(1:3250), y="arcene.train.label", data=arcene.train.full, family="binomial", lambda_search=FALSE, alpha=1, nfolds=0, use_all_factor_levels=1, higher_accuracy=TRUE)) | ||
print(time.noSR.3250) | ||
|
||
print("Check that modeling with additional columns takes more time to compute without strong rules, ie doesn't quit too early.") | ||
# looks at total elapsed time | ||
stopifnot(time.noSR.3000[3] <= time.noSR.3250[3]) | ||
|
||
print("Test models on validation set.") | ||
predict.noSR.3000 <- h2o.predict(model.noSR.3000, arcene.valid.full) | ||
predict.noSR.3250 <- h2o.predict(model.noSR.3250, arcene.valid.full) | ||
|
||
print("Check performance of predictions.") | ||
perf.noSR.3000 <- h2o.performance(predict.noSR.3000$"1", arcene.valid.full$"arcene.valid.label") | ||
auc.noSR.3000 <- perf.noSR.3000@model$auc | ||
print(auc.noSR.3000) | ||
perf.noSR.3250 <- h2o.performance(predict.noSR.3250$"1", arcene.valid.full$"arcene.valid.label") | ||
auc.noSR.3250 <- perf.noSR.3250@model$auc | ||
print(auc.noSR.3250) | ||
|
||
print("Check that prediction AUC better than guessing (0.5).") | ||
stopifnot(auc.noSR.3000 > 0.5) | ||
stopifnot(auc.noSR.3250 > 0.5) | ||
|
||
testEnd() | ||
} | ||
|
||
doTest("Testing glm modeling performance with wide Arcene dataset with and without strong rules", test) |
113 changes: 0 additions & 113 deletions
113
R/tests/testdir_jira/runit_NOPASS_pub_853_glm_performance.R
This file was deleted.
Oops, something went wrong.