Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent internal usage of expensive APIs #10263

Merged
merged 8 commits into from
Feb 16, 2022
8 changes: 4 additions & 4 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
##############################################
# cuDF GPU build and test script for CI #
##############################################
Expand Down Expand Up @@ -249,15 +249,15 @@ fi

cd "$WORKSPACE/python/cudf"
gpuci_logger "Python py.test for cuDF"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config=.coveragerc --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope cudf

cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term dask_cudf

cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz
vyasr marked this conversation as resolved.
Show resolved Hide resolved

gpuci_logger "Test notebooks"
"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
Expand Down
31 changes: 31 additions & 0 deletions python/cudf/cudf/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,41 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION.

import os
import pathlib

import pytest

import rmm # noqa: F401

_CURRENT_DIRECTORY = str(pathlib.Path(__file__).resolve().parent)


@pytest.fixture(scope="session")
def datadir():
return pathlib.Path(__file__).parent / "data"


# To set and remove the NO_EXTERNAL_ONLY_APIS environment variable we must use
# the sessionstart and sessionfinish hooks rather than a simple autouse,
# session-scope fixture because we need to set these variable before collection
# occurs because the environment variable will be checked as soon as cudf is
# imported anywhere.
def pytest_sessionstart(session):
"""
Called after the Session object has been created and
before performing collection and entering the run test loop.
"""
os.environ["NO_EXTERNAL_ONLY_APIS"] = "1"
os.environ["_CUDF_TEST_ROOT"] = _CURRENT_DIRECTORY


def pytest_sessionfinish(session, exitstatus):
"""
Called after whole test run finished, right before
returning the exit status to the system.
"""
try:
del os.environ["NO_EXTERNAL_ONLY_APIS"]
del os.environ["_CUDF_TEST_ROOT"]
except KeyError:
pass
56 changes: 56 additions & 0 deletions python/cudf/cudf/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import decimal
import functools
import os
import traceback
from collections.abc import Sequence
from typing import FrozenSet, Set, Union

Expand Down Expand Up @@ -37,6 +39,60 @@
}


# The test root is set by pytest to support situations where tests are run from
# a source tree on a built version of cudf.
NO_EXTERNAL_ONLY_APIS = os.getenv("NO_EXTERNAL_ONLY_APIS")

_cudf_root = os.path.dirname(cudf.__file__)
# If the environment variable for the test root is not set, we default to
# using the path relative to the cudf root directory.
_tests_root = os.getenv("_CUDF_TEST_ROOT") or os.path.join(_cudf_root, "tests")


def _external_only_api(func, alternative=""):
"""Decorator to indicate that a function should not be used internally.

cudf contains many APIs that exist for pandas compatibility but are
intrinsically inefficient. For some of these cudf has internal
equivalents that are much faster. Usage of the slow public APIs inside
our implementation can lead to unnecessary performance bottlenecks.
Applying this decorator to such functions and setting the environment
variable NO_EXTERNAL_ONLY_APIS will cause such functions to raise
exceptions if they are called from anywhere inside cudf, making it easy
to identify and excise such usage.

The `alternative` should be a complete phrase or sentence since it will
be used verbatim in error messages.
"""

# If the first arg is a string then an alternative function to use in
# place of this API was provided, so we pass that to a subsequent call.
# It would be cleaner to implement this pattern by using a class
# decorator with a factory method, but there is no way to generically
# wrap docstrings on a class (we would need the docstring to be on the
# class itself, not instances, because that's what `help` looks at) and
# there is also no way to make mypy happy with that approach.
if isinstance(func, str):
return lambda actual_func: _external_only_api(actual_func, func)

if not NO_EXTERNAL_ONLY_APIS:
return func

@functools.wraps(func)
def wrapper(*args, **kwargs):
# Check the immediately preceding frame to see if it's in cudf.
frame, lineno = next(traceback.walk_stack(None))
fn = frame.f_code.co_filename
if _cudf_root in fn and _tests_root not in fn:
raise RuntimeError(
f"External-only API called in {fn} at line {lineno}. "
f"{alternative}"
)
return func(*args, **kwargs)

return wrapper


def scalar_broadcast_to(scalar, size, dtype=None):

if isinstance(size, (tuple, list)):
Expand Down