Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove old dask-glm based logistic regression #6028

Merged
3 changes: 0 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ dependencies:
- numpydoc
- nvcc_linux-64=11.8
- packaging
- pip
- pydata-sphinx-theme!=0.14.2
- pylibraft==24.10.*,>=0.0.0a0
- pynndescent
Expand Down Expand Up @@ -78,6 +77,4 @@ dependencies:
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.6
- pip:
- dask-glm==0.3.0
name: all_cuda-118_arch-x86_64
3 changes: 0 additions & 3 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ dependencies:
- numpy>=1.23,<3.0a0
- numpydoc
- packaging
- pip
- pydata-sphinx-theme!=0.14.2
- pylibraft==24.10.*,>=0.0.0a0
- pynndescent
Expand Down Expand Up @@ -74,6 +73,4 @@ dependencies:
- sysroot_linux-64==2.17
- treelite==4.3.0
- umap-learn==0.5.6
- pip:
- dask-glm==0.3.0
name: all_cuda-125_arch-x86_64
8 changes: 0 additions & 8 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -518,14 +518,6 @@ dependencies:
- umap-learn==0.5.6
- pynndescent
- setuptools # Needed on Python 3.12 for dask-glm, which requires pkg_resources but Python 3.12 doesn't have setuptools by default
- output_types: conda
packages:
- pip
- pip:
- dask-glm==0.3.0
- output_types: pyproject
packages:
- dask-glm==0.3.0
test_notebooks:
common:
- output_types: [conda, requirements]
Expand Down
Empty file.
27 changes: 0 additions & 27 deletions python/cuml/cuml/dask/extended/linear_model/__init__.py

This file was deleted.

219 changes: 0 additions & 219 deletions python/cuml/cuml/dask/extended/linear_model/logistic_regression.py

This file was deleted.

86 changes: 0 additions & 86 deletions python/cuml/cuml/tests/dask/test_dask_logistic_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,92 +103,6 @@ def make_classification_dataset(
return X, y


def select_sk_solver(cuml_solver):
if cuml_solver == "newton":
return "newton-cg"
elif cuml_solver in ["admm", "lbfgs"]:
return "lbfgs"
else:
pytest.xfail("No matched sklearn solver")


@pytest.mark.mg
@pytest.mark.parametrize("nrows", [1e5])
@pytest.mark.parametrize("ncols", [20])
@pytest.mark.parametrize("n_parts", [2, 6])
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("datatype", [np.float32, np.float64])
@pytest.mark.parametrize("gpu_array_input", [False, True])
@pytest.mark.parametrize(
"solver", ["admm", "gradient_descent", "newton", "lbfgs", "proximal_grad"]
)
def test_lr_fit_predict_score(
nrows,
ncols,
n_parts,
fit_intercept,
datatype,
gpu_array_input,
solver,
client,
):
sk_solver = select_sk_solver(cuml_solver=solver)

def imp():
import cuml.comm.serialize # NOQA

client.run(imp)

from cuml.dask.extended.linear_model import (
LogisticRegression as cumlLR_dask,
)

n_info = 5
nrows = int(nrows)
ncols = int(ncols)
X, y = make_classification_dataset(datatype, nrows, ncols, n_info)

gX, gy = _prep_training_data(client, X, y, n_parts)

if gpu_array_input:
gX = gX.values
gX._meta = cp.asarray(gX._meta)
gy = gy.values
gy._meta = cp.asarray(gy._meta)

cuml_model = cumlLR_dask(
fit_intercept=fit_intercept, solver=solver, max_iter=10
)

# test fit and predict
cuml_model.fit(gX, gy)
cu_preds = cuml_model.predict(gX)
accuracy_cuml = accuracy_score(y, cu_preds.compute().get())

sk_model = skLR(fit_intercept=fit_intercept, solver=sk_solver, max_iter=10)
sk_model.fit(X, y)
sk_preds = sk_model.predict(X)
accuracy_sk = accuracy_score(y, sk_preds)

assert (accuracy_cuml >= accuracy_sk) | (
np.abs(accuracy_cuml - accuracy_sk) < 1e-3
)

# score
accuracy_cuml = cuml_model.score(gX, gy).compute().item()
accuracy_sk = sk_model.score(X, y)

assert (accuracy_cuml >= accuracy_sk) | (
np.abs(accuracy_cuml - accuracy_sk) < 1e-3
)

# predicted probabilities should differ by <= 5%
# even with different solvers (arbitrary)
probs_cuml = cuml_model.predict_proba(gX).compute()
probs_sk = sk_model.predict_proba(X)[:, 1]
assert np.abs(probs_sk - probs_cuml.get()).max() <= 0.05


@pytest.mark.mg
@pytest.mark.parametrize("n_parts", [2])
@pytest.mark.parametrize("datatype", [np.float32, np.float64])
Expand Down
1 change: 0 additions & 1 deletion python/cuml/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ classifiers = [
[project.optional-dependencies]
test = [
"cython>=3.0.0",
"dask-glm==0.3.0",
"dask-ml",
"hdbscan>=0.8.38,<0.8.39",
"hypothesis>=6.0,<7",
Expand Down
Loading