Enable pandas pytests for cudf.pandas (rapidsai#15147)

This PR enables `cudf.pandas` pandas pytest suite. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Jake Awe (https://github.com/AyodeAwe) - Lawrence Mitchell (https://github.com/wence-) URL: rapidsai#15147
PointKernel · Mar 12, 2024 · d48b904 · d48b904
1 parent 241825a
commit d48b904
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 32 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -28,7 +28,7 @@ jobs:
       - wheel-tests-dask-cudf
       - devcontainer
       - unit-tests-cudf-pandas
-      # - pandas-tests
+      - pandas-tests
       #- pandas-tests-diff
       #- pandas-tests-diff-comment
     secrets: inherit
@@ -156,21 +156,20 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
     with:
-      # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
       build_type: pull-request
       script: ci/cudf_pandas_scripts/run_tests.sh
-  # pandas-tests:
-  #   # run the Pandas unit tests using PR branch
-  #   needs: wheel-build-cudf
-  #   secrets: inherit
-  #   uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
-  #   with:
-  #     matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
-  #     build_type: pull-request
-  #     script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
-  #     # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
-  #     test_summary_show: "none"
+  pandas-tests:
+    # run the Pandas unit tests using PR branch
+    needs: wheel-build-cudf
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    with:
+      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+      build_type: pull-request
+      script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
+      # Hide test failures because they exceed the GITHUB_STEP_SUMMARY output limit.
+      test_summary_show: "none"
   #pandas-tests-diff:
   #  # diff the results of running the Pandas unit tests and publish a job summary
   #  needs: [pandas-tests-main, pandas-tests-pr]

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -115,15 +115,15 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/cudf_pandas_scripts/run_tests.sh
-  # pandas-tests:
-  #   # run the Pandas unit tests
-  #   secrets: inherit
-  #   uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
-  #   with:
-  #     matrix_filter: map(select(.ARCH == "amd64")) | max_by(.CUDA_VER) | [.]
-  #     build_type: nightly
-  #     branch: ${{ inputs.branch }}
-  #     date: ${{ inputs.date }}
-  #     sha: ${{ inputs.sha }}
-  #     # pr mode uses the HEAD of the branch, which is also correct for nightlies
-  #     script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
+  pandas-tests:
+    # run the Pandas unit tests
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04
+    with:
+      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+      build_type: nightly
+      branch: ${{ inputs.branch }}
+      date: ${{ inputs.date }}
+      sha: ${{ inputs.sha }}
+      # pr mode uses the HEAD of the branch, which is also correct for nightlies
+      script: ci/cudf_pandas_scripts/pandas-tests/run.sh pr
diff --git a/ci/cudf_pandas_scripts/pandas-tests/run.sh b/ci/cudf_pandas_scripts/pandas-tests/run.sh
@@ -33,10 +33,11 @@ mkdir -p "${RAPIDS_TESTS_DIR}"
 
 bash python/cudf/cudf/pandas/scripts/run-pandas-tests.sh \
   -n 10 \
-  --tb=line \
+  --tb=no \
   -m "not slow" \
   --max-worker-restart=3 \
   --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-pandas.xml" \
+  --dist worksteal \
   --report-log=${PANDAS_TESTS_BRANCH}.json 2>&1
 
 # summarize the results and save them to artifacts:

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -717,8 +717,11 @@ dependencies:
         packages:
           # dependencies to run pandas tests
           # https://github.com/pandas-dev/pandas/blob/main/environment.yml
-          # pandas[all] includes all of the required dependencies
-          - pandas[all]
+          # pandas[...] includes all of the required dependencies.
+          # Intentionally excluding `postgresql` because of
+          # installation issues with `psycopg2`.
+          - pandas[test, pyarrow, performance, computation, fss, excel, parquet, feather, hdf5, spss, html, xml, plot, output-formatting, clipboard, compression]
+          - pytest-reportlog
   test_python_cudf_pandas:
     common:
       - output_types: pyproject

diff --git a/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh b/python/cudf/cudf/pandas/scripts/run-pandas-tests.sh
@@ -22,7 +22,18 @@ set -euo pipefail
 # of Pandas installed.
 PANDAS_VERSION=$(python -c "import pandas; print(pandas.__version__)")
 
-PYTEST_IGNORES="--ignore=tests/io/test_user_agent.py --ignore=tests/interchange/test_impl.py --ignore=tests/window/test_dtypes.py --ignore=tests/strings/test_api.py --ignore=tests/window/test_numba.py"
+PYTEST_IGNORES="--ignore=tests/io/test_user_agent.py \
+--ignore=tests/interchange/test_impl.py \
+--ignore=tests/window/test_dtypes.py \
+--ignore=tests/strings/test_api.py \
+--ignore=tests/window/test_numba.py \
+--ignore=tests/window \
+--ignore=tests/io/pytables \
+--ignore=tests/plotting \
+--ignore=tests/scalar \
+--ignore=tests/series/test_arithmetic.py \
+--ignore=tests/tslibs/test_parsing.py \
+--ignore=tests/io/parser/common/test_read_errors.py"
 
 mkdir -p pandas-testing
 cd pandas-testing
@@ -185,7 +196,6 @@ and not test_numpy_ufuncs_basic[nullable_float-rad2deg]"
 PANDAS_CI="1" python -m pytest -p cudf.pandas \
     -v -m "not single_cpu and not db" \
     -k "not test_overwrite_warns and not test_complex_series_frame_alignment and not test_to_parquet_gcs_new_file and not test_qcut_nat and not test_add and not test_ismethods and $TEST_NUMPY_UFUNCS_BASIC_FLAKY" \
-    --durations=50 \
     --import-mode=importlib \
     -o xfail_strict=True \
     ${PYTEST_IGNORES} \

diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
 [build-system]
 build-backend = "scikit_build_core.build"
@@ -69,7 +69,8 @@ test = [
     "tzdata",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 pandas-tests = [
-    "pandas[all]",
+    "pandas[test, pyarrow, performance, computation, fss, excel, parquet, feather, hdf5, spss, html, xml, plot, output-formatting, clipboard, compression]",
+    "pytest-reportlog",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 cudf-pandas-tests = [
     "ipython",