diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 356955cf..727e5180 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -10,13 +10,21 @@ concurrency: cancel-in-progress: true jobs: + pr-builder: + needs: + - build + - test-conda-nightly-env + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 build: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 with: build_type: pull-request - pr-builder: - needs: - - build + test-conda-nightly-env: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 + # We use a build workflow so that we get CPU jobs and high matrix coverage + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + with: + build_type: pull-request + script: "ci/test_conda_nightly_env.sh" diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml new file mode 100644 index 00000000..bb57d711 --- /dev/null +++ b/.github/workflows/test.yaml @@ -0,0 +1,23 @@ +name: test + +on: + workflow_dispatch: + inputs: + branch: + required: true + type: string + date: + required: true + type: string + sha: + required: true + type: string + +jobs: + test-conda-nightly-env: + secrets: inherit + # We use a build workflow so that we get CPU jobs and high matrix coverage + uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-24.10 + with: + build_type: pull-request + script: "ci/test_conda_nightly_env.sh" diff --git a/ci/check_conda_nightly_env.py b/ci/check_conda_nightly_env.py new file mode 100644 index 00000000..32bf4fae --- /dev/null +++ b/ci/check_conda_nightly_env.py @@ -0,0 +1,145 @@ +import json +import re +import subprocess +import sys +from datetime import datetime, timedelta + + +OLD_PACKAGE_THRESHOLD_DAYS = 3 + +EXCLUDED_PACKAGES = { + # These packages are not built every night: + "rapids", + "rapids-xgboost", + # These packages do not have date strings: + "cubinlinker", + "pynvjitlink", + "rapids-dask-dependency", + "libxgboost", + "py-xgboost", + "xgboost", + # TODO: Do we want ucx-proc on rapidsai or from conda-forge? + "ucx-proc", +} + +# ANSI color codes used to highlight lines +FAIL = "\033[31m" +WARNING = "\033[33m" +OKGREEN = "\033[32m" +ENDC = "\033[0m" + + +def is_rapids_nightly_package(package_info): + return package_info["channel"] == "rapidsai-nightly" + + +def get_package_date(package): + if package["name"] in EXCLUDED_PACKAGES: + return None + + # Matches 6 digits starting with "2", which should be YYMMDD + date_re = r"_(2\d{5})_" + + # Use regex to find the date string in the input + match = re.search(date_re, package["build_string"]) + + if match: + # Convert the date string to a datetime object + date_string = match.group(1) + date_object = datetime.strptime(date_string, "%y%m%d") + return date_object + + print( + f"{WARNING}Date string not found for {package['name']} " + f"in the build string '{package['build_string']}'.{ENDC}" + ) + + +def check_env(json_path): + """Validate rapids conda environments. + + Parses JSON output of `conda create` and check the dates on the RAPIDS + packages to ensure nightlies are relatively new. + + Returns an exit code value. + """ + + exit_code = 0 + + with open(json_path) as f: + try: + json_data = json.load(f) + except ValueError as e: + print("Error: JSON data file from conda failed to load:") + print(e) + return 1 + + if "error" in json_data: + print("Error: conda failed:") + print() + print(json_data["error"]) + return 1 + + package_data = json_data["actions"]["LINK"] + + rapids_package_data = list(filter(is_rapids_nightly_package, package_data)) + + # Dictionary to store the packages and their dates + rapids_package_dates = { + package["name"]: get_package_date(package) for package in rapids_package_data + } + + # If there are old packages, show an error + today = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) + old_threshold = today - timedelta(days=OLD_PACKAGE_THRESHOLD_DAYS) + old_packages = { + package: date + for package, date in rapids_package_dates.items() + if date is not None and date < old_threshold + } + if old_packages: + exit_code = 1 + print() + print( + f"{FAIL}Error: The following packages are more than " + f"{OLD_PACKAGE_THRESHOLD_DAYS} days old:{ENDC}" + ) + for package, date in sorted(old_packages.items()): + date_string = date.strftime("%Y-%m-%d") + print(f"{FAIL} - {(package + ':'):<24}\t{date_string}{ENDC}") + + # If there are undated packages, show an error + undated_packages = { + package: date + for package, date in rapids_package_dates.items() + if package not in EXCLUDED_PACKAGES and date is None + } + if undated_packages: + exit_code = 1 + print() + print( + f"{FAIL}Error: The following packages are missing dates in their build strings:{ENDC}" + ) + for package, date in sorted(undated_packages.items()): + print(f"{FAIL} - {package}{ENDC}") + + print() + print( + f"The following packages are less than {OLD_PACKAGE_THRESHOLD_DAYS} days old:" + ) + for package, date in sorted(rapids_package_dates.items()): + if date is None: + continue + date_string = date.strftime("%Y-%m-%d") + status = WARNING if date < today else OKGREEN + print(f"{status} - {(package + ':'):<24}\t{date_string}{ENDC}") + + return exit_code + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Provide only one argument, the filepath to a JSON output from " "conda.") + sys.exit(1) + + sys.exit(check_env(sys.argv[1])) diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7892aec0..fd5ba876 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -31,6 +31,7 @@ function sed_runner() { } sed_runner "/RAPIDS_VER=/ s/[0-9][0-9].[0-9][0-9]/${NEXT_SHORT_TAG}/" ci/conda-pack.sh +sed_runner "/RAPIDS_VERSION=/ s/[0-9][0-9].[0-9][0-9]/${NEXT_SHORT_TAG}/" ci/test_conda_nightly_env.sh for FILE in .github/workflows/*.yaml; do sed_runner "/shared-workflows/ s/@.*/@branch-${NEXT_SHORT_TAG}/g" "${FILE}" diff --git a/ci/test_conda_nightly_env.sh b/ci/test_conda_nightly_env.sh new file mode 100755 index 00000000..1cbd9e83 --- /dev/null +++ b/ci/test_conda_nightly_env.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +RAPIDS_VERSION="24.10" +CUDA_VERSION=${RAPIDS_CUDA_VERSION%.*} + +JSON_FILENAME="rapids_cuda${CUDA_VERSION}_py${RAPIDS_PY_VERSION}.json" + +rapids-logger "Creating conda environment with rapids=${RAPIDS_VERSION}, python=${RAPIDS_PY_VERSION}, cuda-version=${CUDA_VERSION}" + +rapids-conda-retry \ + create \ + -n rapids-${RAPIDS_VERSION} \ + -c rapidsai-nightly \ + -c conda-forge \ + -c nvidia \ + rapids=${RAPIDS_VERSION} \ + python=${RAPIDS_PY_VERSION} \ + cuda-version=${CUDA_VERSION} \ + --dry-run \ + --json \ + | tee "${JSON_FILENAME}" + +rapids-logger "Parsing results from conda dry-run with rapids=${RAPIDS_VERSION}, python=${RAPIDS_PY_VERSION}, cuda-version=${CUDA_VERSION}" + +python ci/check_conda_nightly_env.py "${JSON_FILENAME}"