Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SequentialFeatureSelection Early Stopping Criterion #886

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
test and refactoring
  • Loading branch information
aldder committed Feb 2, 2022
commit fff216316bafd1682a30928c8352610b44d3dd4e
10 changes: 5 additions & 5 deletions mlxtend/feature_selection/sequential_feature_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,12 @@ class SequentialFeatureSelector(_BaseXComposition, MetaEstimatorMixin):
The number of CPUs to use for evaluating different feature subsets
in parallel. -1 means 'all CPUs'.
early_stop : bool (default: False)
Determines whether to prematurely stop execution if the score does not
improve after a number of iterations set by the `early_stop_rounds`
Determines whether to prematurely stop execution if the score does not
improve after a number of iterations set by the `early_stop_rounds`
parameter.
early_stop_rounds : int (default 3)
Used when early_stop is True, it determines the number of iterations
after which, if no performance boost has been seen, execution is
Used when `early_stop` is True, it determines the number of iterations
after which, if no performance boost has been seen, execution is
stopped.
pre_dispatch : int, or string (default: '2*n_jobs')
Controls the number of jobs that get dispatched
Expand Down Expand Up @@ -186,7 +186,7 @@ def __init__(self, estimator, k_features=1,
forward=True, floating=False,
verbose=0, scoring=None,
cv=5, n_jobs=1,
early_stop=False,
early_stop=False,
early_stop_rounds=3,
pre_dispatch='2*n_jobs',
clone_estimator=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -983,3 +983,51 @@ def test_custom_feature_names():
assert sfs1.k_feature_names_ == ('sepal width', 'petal width')
assert sfs1.subsets_[2]['feature_names'] == ('sepal width',
'petal width')


def test_run_forward_earlystop():
np.random.seed(0)
iris = load_iris()
X_iris = iris.data
y_iris = iris.target
X_iris_with_noise = np.concatenate(
(X_iris,
np.random.randn(X_iris.shape[0], X_iris.shape[1])),
axis=1)
knn = KNeighborsClassifier()
esr = 2
sfs = SFS(estimator=knn,
k_features=X_iris_with_noise.shape[1],
forward=True,
floating=False,
early_stop=True,
early_stop_rounds=esr,
verbose=0)
sfs.fit(X_iris_with_noise, y_iris)
assert len(sfs.subsets_) < X_iris_with_noise.shape[1]
assert all([sfs.subsets_[list(sfs.subsets_)[-esr-1]]['avg_score']
>= sfs.subsets_[i]['avg_score'] for i in sfs.subsets_.keys()])


def test_run_backward_earlystop():
np.random.seed(0)
iris = load_iris()
X_iris = iris.data
y_iris = iris.target
X_iris_with_noise = np.concatenate(
(X_iris,
np.random.randn(X_iris.shape[0], X_iris.shape[1])),
axis=1)
knn = KNeighborsClassifier()
esr = 2
sfs = SFS(estimator=knn,
k_features=1,
forward=False,
floating=False,
early_stop=True,
early_stop_rounds=esr,
verbose=0)
sfs.fit(X_iris_with_noise, y_iris)
assert len(sfs.subsets_) > 1
assert all([sfs.subsets_[list(sfs.subsets_)[-esr-1]]['avg_score']
>= sfs.subsets_[i]['avg_score'] for i in sfs.subsets_.keys()])