rasbt · aldder · Feb 2, 2022 · Feb 2, 2022 · Feb 2, 2022 · Feb 2, 2022
diff --git a/mlxtend/feature_selection/sequential_feature_selector.py b/mlxtend/feature_selection/sequential_feature_selector.py
@@ -114,6 +114,11 @@ class SequentialFeatureSelector(_BaseXComposition, MetaEstimatorMixin):
     n_jobs : int (default: 1)
         The number of CPUs to use for evaluating different feature subsets
         in parallel. -1 means 'all CPUs'.
+    early_stop_rounds : int (default 0)
+        Enable early stopping criterion when > 0, this value determines the
+        number of iterations after which, if no performance boost has been
+        seen, execution is stopped.
+        Used only when `k_features == 'best'` or `k_features == 'parsimonious'`
     pre_dispatch : int, or string (default: '2*n_jobs')
         Controls the number of jobs that get dispatched
         during parallel execution if `n_jobs > 1` or `n_jobs=-1`.
@@ -174,10 +179,12 @@ class SequentialFeatureSelector(_BaseXComposition, MetaEstimatorMixin):
     http://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/
 
     """
+
     def __init__(self, estimator, k_features=1,
                  forward=True, floating=False,
                  verbose=0, scoring=None,
                  cv=5, n_jobs=1,
+                 early_stop_rounds=0,
                  pre_dispatch='2*n_jobs',
                  clone_estimator=True,
                  fixed_features=None):
@@ -201,6 +208,13 @@ def __init__(self, estimator, k_features=1,
         self.verbose = verbose
         self.clone_estimator = clone_estimator
 
+        if not isinstance(early_stop_rounds, int) or early_stop_rounds < 0:
+            raise ValueError('Number of early stopping round should be '
+                             'an integer value greater than or equal to 0.'
+                             'Got %s' % early_stop_rounds)
+
+        self.early_stop_rounds = early_stop_rounds
+
         if fixed_features is not None:
             if isinstance(self.k_features, int) and \
                     self.k_features <= len(fixed_features):
@@ -385,6 +399,12 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
             select_in_range = False
             k_to_select = self.k_features
 
+        if self.early_stop_rounds and isinstance(self.k_features, str) and\
+                not self.k_features in {'best', 'parsimonious'}:
+            raise ValueError('Early stopping is allowed only when `k_features`'
+                             ' is "best" or "parsimonious".  Got'
+                             ' `k_features=%s`' % self.k_features)
+
         orig_set = set(range(X_.shape[1]))
         n_features = X_.shape[1]
 
@@ -424,6 +444,8 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
             }
         best_subset = None
         k_score = 0
+        best_score = -np.inf
+        early_stop_count = self.early_stop_rounds
 
         try:
             while k != k_to_select:
@@ -550,6 +572,18 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
                                           X)
                     raise KeyboardInterrupt
 
+                # early stop
+                if self.early_stop_rounds and k != k_to_select:
+                    if k_score <= best_score:
+                        early_stop_count -= 1
+                        if early_stop_count == 0:
+                            print('Performances not improved for %d rounds. '
+                                  'Stopping now!' % self.early_stop_rounds)
+                            break
+                    else:
+                        early_stop_count = self.early_stop_rounds
+                        best_score = k_score
+
         except KeyboardInterrupt:
             self.interrupted_ = True
             sys.stderr.write('\nSTOPPING EARLY DUE TO KEYBOARD INTERRUPT...')

diff --git a/mlxtend/feature_selection/tests/test_sequential_feature_selector.py b/mlxtend/feature_selection/tests/test_sequential_feature_selector.py
@@ -978,8 +978,54 @@ def test_custom_feature_names():
                n_jobs=1)
 
     sfs1 = sfs1.fit(X, y, custom_feature_names=(
-          'sepal length', 'sepal width', 'petal length', 'petal width'))
+        'sepal length', 'sepal width', 'petal length', 'petal width'))
     assert sfs1.k_feature_idx_ == (1, 3)
     assert sfs1.k_feature_names_ == ('sepal width', 'petal width')
     assert sfs1.subsets_[2]['feature_names'] == ('sepal width',
                                                  'petal width')
+
+
+def test_run_forward_earlystop():
+    np.random.seed(0)
+    iris = load_iris()
+    X_iris = iris.data
+    y_iris = iris.target
+    X_iris_with_noise = np.concatenate(
+        (X_iris,
+         np.random.randn(X_iris.shape[0], X_iris.shape[1])),
+        axis=1)
+    knn = KNeighborsClassifier()
+    esr = 2
+    sfs = SFS(estimator=knn,
+              k_features='best',
+              forward=True,
+              floating=False,
+              early_stop_rounds=esr,
+              verbose=0)
+    sfs.fit(X_iris_with_noise, y_iris)
+    assert len(sfs.subsets_) < X_iris_with_noise.shape[1]
+    assert all([sfs.k_score_ >= sfs.subsets_[i]['avg_score']
+               for i in sfs.subsets_])
+
+
+def test_run_backward_earlystop():
+    np.random.seed(0)
+    iris = load_iris()
+    X_iris = iris.data
+    y_iris = iris.target
+    X_iris_with_noise = np.concatenate(
+        (X_iris,
+         np.random.randn(X_iris.shape[0], X_iris.shape[1])),
+        axis=1)
+    knn = KNeighborsClassifier()
+    esr = 2
+    sfs = SFS(estimator=knn,
+              k_features='best',
+              forward=False,
+              floating=False,
+              early_stop_rounds=esr,
+              verbose=0)
+    sfs.fit(X_iris_with_noise, y_iris)
+    assert len(sfs.subsets_) > 1
+    assert all([sfs.k_score_ >= sfs.subsets_[i]['avg_score']
+               for i in sfs.subsets_])