test and refactoring

rasbt · aldder · Feb 2, 2022 · Feb 2, 2022 · Feb 2, 2022 · Feb 2, 2022
commit fff216316bafd1682a30928c8352610b44d3dd4e
diff --git a/mlxtend/feature_selection/sequential_feature_selector.py b/mlxtend/feature_selection/sequential_feature_selector.py
@@ -115,12 +115,12 @@ class SequentialFeatureSelector(_BaseXComposition, MetaEstimatorMixin):
         The number of CPUs to use for evaluating different feature subsets
         in parallel. -1 means 'all CPUs'.
     early_stop : bool (default: False)
-        Determines whether to prematurely stop execution if the score does not 
-        improve after a number of iterations set by the `early_stop_rounds` 
+        Determines whether to prematurely stop execution if the score does not
+        improve after a number of iterations set by the `early_stop_rounds`
         parameter.
     early_stop_rounds : int (default 3)
-        Used when early_stop is True, it determines the number of iterations 
-        after which, if no performance boost has been seen, execution is 
+        Used when `early_stop` is True, it determines the number of iterations
+        after which, if no performance boost has been seen, execution is
         stopped.
     pre_dispatch : int, or string (default: '2*n_jobs')
         Controls the number of jobs that get dispatched
@@ -186,7 +186,7 @@ def __init__(self, estimator, k_features=1,
                  forward=True, floating=False,
                  verbose=0, scoring=None,
                  cv=5, n_jobs=1,
-                 early_stop=False, 
+                 early_stop=False,
                  early_stop_rounds=3,
                  pre_dispatch='2*n_jobs',
                  clone_estimator=True,

diff --git a/mlxtend/feature_selection/tests/test_sequential_feature_selector.py b/mlxtend/feature_selection/tests/test_sequential_feature_selector.py
@@ -983,3 +983,51 @@ def test_custom_feature_names():
     assert sfs1.k_feature_names_ == ('sepal width', 'petal width')
     assert sfs1.subsets_[2]['feature_names'] == ('sepal width',
                                                  'petal width')
+
+
+def test_run_forward_earlystop():
+    np.random.seed(0)
+    iris = load_iris()
+    X_iris = iris.data
+    y_iris = iris.target
+    X_iris_with_noise = np.concatenate(
+        (X_iris,
+         np.random.randn(X_iris.shape[0], X_iris.shape[1])),
+        axis=1)
+    knn = KNeighborsClassifier()
+    esr = 2
+    sfs = SFS(estimator=knn,
+              k_features=X_iris_with_noise.shape[1],
+              forward=True,
+              floating=False,
+              early_stop=True,
+              early_stop_rounds=esr,
+              verbose=0)
+    sfs.fit(X_iris_with_noise, y_iris)
+    assert len(sfs.subsets_) < X_iris_with_noise.shape[1]
+    assert all([sfs.subsets_[list(sfs.subsets_)[-esr-1]]['avg_score']
+               >= sfs.subsets_[i]['avg_score'] for i in sfs.subsets_.keys()])
+
+
+def test_run_backward_earlystop():
+    np.random.seed(0)
+    iris = load_iris()
+    X_iris = iris.data
+    y_iris = iris.target
+    X_iris_with_noise = np.concatenate(
+        (X_iris,
+         np.random.randn(X_iris.shape[0], X_iris.shape[1])),
+        axis=1)
+    knn = KNeighborsClassifier()
+    esr = 2
+    sfs = SFS(estimator=knn,
+              k_features=1,
+              forward=False,
+              floating=False,
+              early_stop=True,
+              early_stop_rounds=esr,
+              verbose=0)
+    sfs.fit(X_iris_with_noise, y_iris)
+    assert len(sfs.subsets_) > 1
+    assert all([sfs.subsets_[list(sfs.subsets_)[-esr-1]]['avg_score']
+               >= sfs.subsets_[i]['avg_score'] for i in sfs.subsets_.keys()])