From e38e7595653bca8bd550996c2bf78d3936e364e9 Mon Sep 17 00:00:00 2001 From: Stu Hood Date: Mon, 13 Apr 2020 23:59:30 -0700 Subject: [PATCH] Port from cffi to the cpython crate. [ci skip-jvm-tests] --- 3rdparty/python/requirements.txt | 1 - build-support/bin/native/bootstrap_cffi.sh | 27 - src/python/pants/backend/python/rules/pex.py | 2 +- src/python/pants/engine/BUILD | 1 - src/python/pants/engine/native.py | 948 ++-------- src/python/pants/engine/scheduler.py | 200 +- src/rust/engine/Cargo.lock | 68 +- src/rust/engine/Cargo.toml | 1 + src/rust/engine/engine_cffi/Cargo.toml | 7 +- src/rust/engine/engine_cffi/build.rs | 151 +- src/rust/engine/engine_cffi/cbindgen.toml | 41 - .../engine/engine_cffi/src/cffi_externs.rs | 25 - src/rust/engine/engine_cffi/src/lib.rs | 1674 +++++++++-------- src/rust/engine/logging/src/logger.rs | 13 +- src/rust/engine/src/context.rs | 3 - src/rust/engine/src/core.rs | 102 +- src/rust/engine/src/externs.rs | 818 +++----- src/rust/engine/src/handles.rs | 94 - src/rust/engine/src/interning.rs | 82 +- src/rust/engine/src/intrinsics.rs | 25 +- src/rust/engine/src/lib.rs | 4 +- src/rust/engine/src/nodes.rs | 61 +- src/rust/engine/src/types.rs | 8 +- .../base/test_exception_sink_integration.py | 2 +- 24 files changed, 1501 insertions(+), 2857 deletions(-) delete mode 100755 build-support/bin/native/bootstrap_cffi.sh delete mode 100644 src/rust/engine/engine_cffi/cbindgen.toml delete mode 100644 src/rust/engine/engine_cffi/src/cffi_externs.rs delete mode 100644 src/rust/engine/src/handles.rs diff --git a/3rdparty/python/requirements.txt b/3rdparty/python/requirements.txt index bcb123003f0..bfec87271d7 100644 --- a/3rdparty/python/requirements.txt +++ b/3rdparty/python/requirements.txt @@ -1,6 +1,5 @@ ansicolors==1.1.8 beautifulsoup4>=4.6.0,<4.7 -cffi==1.13.2 contextlib2==0.5.5 coverage>=4.5,<4.6 dataclasses==0.6 diff --git a/build-support/bin/native/bootstrap_cffi.sh b/build-support/bin/native/bootstrap_cffi.sh deleted file mode 100755 index 4984f8bfe02..00000000000 --- a/build-support/bin/native/bootstrap_cffi.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -set -e - -REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../../.. && pwd -P)" - -# shellcheck source=build-support/pants_venv -source "${REPO_ROOT}/build-support/pants_venv" - -if (( $# != 2 )); then - cat << USAGE -Usage: $0 -USAGE - exit 1 -fi -readonly output_dir="$1" -readonly scheduler_bindings_path="$2" - -activate_pants_venv 1>&2 - -PYTHONPATH="${REPO_ROOT}/src/python:${PYTHONPATH}" exec python << BOOTSTRAP_C_SOURCE -from pants.engine.native import bootstrap_c_source - -bootstrap_c_source("${scheduler_bindings_path}", "${output_dir}") -BOOTSTRAP_C_SOURCE diff --git a/src/python/pants/backend/python/rules/pex.py b/src/python/pants/backend/python/rules/pex.py index e1a23c41fcb..99d63e9bf25 100644 --- a/src/python/pants/backend/python/rules/pex.py +++ b/src/python/pants/backend/python/rules/pex.py @@ -345,7 +345,7 @@ async def create_pex( argv.extend(pex_debug.iter_pex_args()) if python_setup.resolver_jobs: - argv.extend(["--jobs", python_setup.resolver_jobs]) + argv.extend(["--jobs", str(python_setup.resolver_jobs)]) if python_setup.manylinux: argv.extend(["--manylinux", python_setup.manylinux]) diff --git a/src/python/pants/engine/BUILD b/src/python/pants/engine/BUILD index d883049b33f..43357dbea66 100644 --- a/src/python/pants/engine/BUILD +++ b/src/python/pants/engine/BUILD @@ -262,7 +262,6 @@ python_library( ':interactive_runner', ':isolated_process', ':selectors', - '3rdparty/python:cffi', '3rdparty/python:setuptools', 'src/python/pants/binaries', 'src/python/pants/util:dirutil', diff --git a/src/python/pants/engine/native.py b/src/python/pants/engine/native.py index 2a44a379a25..dda738ca1e0 100644 --- a/src/python/pants/engine/native.py +++ b/src/python/pants/engine/native.py @@ -1,18 +1,14 @@ -# Copyright 2016 Pants project contributors (see CONTRIBUTORS.md). +# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). # Licensed under the Apache License, Version 2.0 (see LICENSE). import importlib import logging import os -import re import sys -import sysconfig -import traceback from contextlib import closing from types import CoroutineType -from typing import Any, Iterable, List, NamedTuple, Tuple, Type, cast +from typing import Iterable, List, cast -import cffi import pkg_resources from pants.base.project_tree import Dir, File, Link @@ -37,471 +33,64 @@ from pants.engine.platform import Platform from pants.engine.selectors import Get from pants.engine.unions import union -from pants.util.contextutil import temporary_dir -from pants.util.dirutil import read_file, safe_mkdir, safe_mkdtemp -from pants.util.memo import memoized_classproperty, memoized_property +from pants.util.dirutil import safe_mkdtemp +from pants.util.memo import memoized_property from pants.util.meta import SingletonMetaclass -from pants.util.ordered_set import FrozenOrderedSet logger = logging.getLogger(__name__) NATIVE_ENGINE_MODULE = "native_engine" -# NB: This is a "patch" applied to CFFI's generated sources to remove the ifdefs that would -# usually cause only one of the two module definition functions to be defined. Instead, we define -# both. Since `patch` is not available in all relevant environments (notably, many docker images), -# this is accomplished using string replacement. To (re)-generate this patch, fiddle with the -# unmodified output of `ffibuilder.emit_c_code`. -CFFI_C_PATCH_BEFORE = """ -# ifdef _MSC_VER - PyMODINIT_FUNC -# if PY_MAJOR_VERSION >= 3 - PyInit_native_engine(void) { return NULL; } -# else - initnative_engine(void) { } -# endif -# endif -#elif PY_MAJOR_VERSION >= 3 -PyMODINIT_FUNC -PyInit_native_engine(void) -{ - return _cffi_init("native_engine", 0x2601, &_cffi_type_context); -} -#else -PyMODINIT_FUNC -initnative_engine(void) -{ - _cffi_init("native_engine", 0x2601, &_cffi_type_context); -} -#endif -""" -CFFI_C_PATCH_AFTER = """ -#endif - -PyObject* // PyMODINIT_FUNC for PY3 -wrapped_PyInit_native_engine(void) -{ - return _cffi_init("native_engine", 0x2601, &_cffi_type_context); -} - -void // PyMODINIT_FUNC for PY2 -wrapped_initnative_engine(void) -{ - _cffi_init("native_engine", 0x2601, &_cffi_type_context); -} -""" - - -def get_build_cflags(): - """Synthesize a CFLAGS env var from the current python env for building of C modules.""" - return "{} {} -I{}".format( - sysconfig.get_config_var("BASECFLAGS"), - sysconfig.get_config_var("OPT"), - sysconfig.get_path("include"), - ) - - -_preprocessor_directive_replacement_stub = "HACKY_CDEF_PREPROCESSOR_DIRECTIVE" - - -def _hackily_rewrite_scheduler_bindings(bindings): - # We need #include lines and header guards in the generated C source file, but this won't parse in - # the .cdef call (it can't handle any preprocessor directives), so we put them behind a comment - # line for now. - preprocessor_directives_removed = re.sub( - r"^(#.*)$", - r"// {}: \1".format(_preprocessor_directive_replacement_stub), - bindings, - flags=re.MULTILINE, - ) - # This is an opaque struct member, which is not exposed to the FFI (and errors if this is - # removed). - hidden_vec_pyresult = re.sub( - r"^.*Vec_PyResult nodes;.*$", - "// Additional fields removed", - preprocessor_directives_removed, - flags=re.MULTILINE, - ) - # The C bindings generated for tuple structs by default use _0, _1, etc for members. The cffi - # library doesn't allow leading underscores on members like that, so we produce e.g. tup_0 - # instead. This works because the header file produced by cbindgen is reliably formatted. - positional_fields_prefixed = re.sub( - r"(_[0-9]+;)$", r"tup\1", hidden_vec_pyresult, flags=re.MULTILINE - ) - # Avoid clashing with common python symbols (we again assume the generated bindings are reliably - # formatted). - special_python_symbols_mangled = re.sub(r"\bid\b", "id_", positional_fields_prefixed) - return special_python_symbols_mangled - - -def _hackily_recreate_includes_for_bindings(bindings): - # Undo the mangling we did for preprocessor directives such as #include lines previously so that - # the generated C source file will have access to the necessary includes for the types produced by - # cbindgen. - return re.sub( - r"^// {}: (.*)$".format(_preprocessor_directive_replacement_stub), - r"\1", - bindings, - flags=re.MULTILINE, - ) - - -def bootstrap_c_source(scheduler_bindings_path, output_dir, module_name=NATIVE_ENGINE_MODULE): - """Bootstrap an external CFFI C source file.""" - - safe_mkdir(output_dir) - - with temporary_dir() as tempdir: - temp_output_prefix = os.path.join(tempdir, module_name) - real_output_prefix = os.path.join(output_dir, module_name) - temp_c_file = "{}.c".format(temp_output_prefix) - c_file = "{}.c".format(real_output_prefix) - env_script = "{}.cflags".format(real_output_prefix) - - # Preprocessor directives won't parse in the .cdef calls, so we have to hide them for now. - scheduler_bindings_content = read_file(scheduler_bindings_path) - scheduler_bindings = _hackily_rewrite_scheduler_bindings(scheduler_bindings_content) - - ffibuilder = cffi.FFI() - ffibuilder.cdef(scheduler_bindings) - ffibuilder.cdef(_FFISpecification.format_cffi_externs()) - ffibuilder.set_source(module_name, scheduler_bindings) - ffibuilder.emit_c_code(temp_c_file) - - # Work around https://github.com/rust-lang/rust/issues/36342 by renaming initnative_engine to - # wrapped_initnative_engine so that the rust code can define the symbol initnative_engine. - # - # If we dont do this, we end up at the mercy of the implementation details of rust's stripping - # and LTO. In the past we have found ways to trick it into not stripping symbols which was handy - # (it kept the binary working) but inconvenient (it was relying on unspecified behavior, it meant - # our binaries couldn't be stripped which inflated them by 2~3x, and it reduced the amount of LTO - # we could use, which led to unmeasured performance hits). - # - # We additionally remove the ifdefs that apply conditional `init` logic for Py2 vs Py3, in order - # to define a module that is loadable by either 2 or 3. - # TODO: Because PyPy uses the same `init` function name regardless of the python version, this - # trick does not work there: we leave its conditional in place. - file_content = read_file(temp_c_file) - if CFFI_C_PATCH_BEFORE not in file_content: - raise Exception("The patch for the CFFI generated code will not apply cleanly.") - file_content = file_content.replace(CFFI_C_PATCH_BEFORE, CFFI_C_PATCH_AFTER) - - # Extract the preprocessor directives we had to hide to get the .cdef call to parse. - file_content = _hackily_recreate_includes_for_bindings(file_content) - - _replace_file(c_file, file_content) - - # Write a shell script to be sourced at build time that contains inherited CFLAGS. - _replace_file(env_script, get_build_cflags()) - - -def _replace_file(path, content): - """Writes a file if it doesn't already exist with the same content. - - This is useful because cargo uses timestamps to decide whether to compile things. - """ - if os.path.exists(path): - with open(path, "r") as f: - if content == f.read(): - print("Not overwriting {} because it is unchanged".format(path), file=sys.stderr) - return - - with open(path, "w") as f: - f.write(content) - - -class _ExternSignature(NamedTuple): - """A type signature for a python-defined FFI function.""" - return_type: str - method_name: str - arg_types: Tuple[str, ...] +class Externs: + """Methods exposed from Python to Rust. - def pretty_print(self): - return " {ret}\t{name}({args});".format( - ret=self.return_type, name=self.method_name, args=", ".join(self.arg_types) - ) - - -def _extern_decl(return_type, arg_types): - """A decorator for methods corresponding to extern functions. All types should be strings. - - The _FFISpecification class is able to automatically convert these into method declarations for - cffi. + TODO: These could be implemented in Rust in `externs.rs` via the cpython API. """ - def wrapper(func): - signature = _ExternSignature( - return_type=str(return_type), method_name=str(func.__name__), arg_types=tuple(arg_types) - ) - func.extern_signature = signature - return func - - return wrapper - - -class _FFISpecification(object): - def __init__(self, ffi, lib): - self._ffi = ffi - self._lib = lib - - @memoized_classproperty - def _extern_fields(cls): - return { - field_name: f - for field_name, f in cls.__dict__.items() - if hasattr(f, "extern_signature") - } - - @classmethod - def format_cffi_externs(cls): - """Generate stubs for the cffi bindings from @_extern_decl methods.""" - extern_decls = [f.extern_signature.pretty_print() for _, f in cls._extern_fields.items()] - return 'extern "Python" {\n' + "\n".join(extern_decls) + "\n}\n" - - def register_cffi_externs(self, native): - """Registers the @_extern_decl methods with our ffi instance. - - Also establishes an `onerror` handler for each extern method which stores any exception in the - `native` object so that it can be retrieved later. See - https://cffi.readthedocs.io/en/latest/using.html#extern-python-reference for more info. - """ - native.reset_cffi_extern_method_runtime_exceptions() - - def exc_handler(exc_type, exc_value, traceback): - error_info = native.CFFIExternMethodRuntimeErrorInfo(exc_type, exc_value, traceback) - native.add_cffi_extern_method_runtime_exception(error_info) + def __init__(self, lib): + self.lib = lib - for field_name, _ in self._extern_fields.items(): - bound_method = getattr(self, field_name) - self._ffi.def_extern(onerror=exc_handler)(bound_method) - - def to_py_str(self, msg_ptr, msg_len): - return bytes(self._ffi.buffer(msg_ptr, msg_len)).decode() - - @classmethod - def call(cls, c, func, args): - try: - val = func(*args) - is_throw = False - except Exception as e: - val = e - is_throw = True - e._formatted_exc = traceback.format_exc() - - return PyResult(is_throw, c.to_value(val)) - - @_extern_decl("TypeId", ["ExternContext*", "Handle*"]) - def extern_get_type_for(self, context_handle, val): - """Return a representation of the object's type.""" - c = self._ffi.from_handle(context_handle) - obj = self._ffi.from_handle(val[0]) - type_id = c.to_id(type(obj)) - return TypeId(type_id) - - @_extern_decl("Handle", ["ExternContext*", "TypeId"]) - def extern_get_handle_from_type_id(self, context_handle, ty): - c = self._ffi.from_handle(context_handle) - obj = c.from_id(ty.tup_0) - return c.to_value(obj) - - @_extern_decl("bool", ["ExternContext*", "TypeId"]) - def extern_is_union(self, context_handle, type_id): - """Return whether or not a type is a member of a union.""" - c = self._ffi.from_handle(context_handle) - input_type = c.from_id(type_id.tup_0) - return union.is_instance(input_type) - - _do_raise_keyboardinterrupt_on_identify = bool( - os.environ.get("_RAISE_KEYBOARDINTERRUPT_IN_CFFI_IDENTIFY", False) + _do_raise_keyboardinterrupt_on_union = bool( + os.environ.get("_RAISE_KEYBOARDINTERRUPT_IN_CFFI_UNION", False) ) - @_extern_decl("Ident", ["ExternContext*", "Handle*"]) - def extern_identify(self, context_handle, val): - """Return a representation of the object's identity, including a hash and TypeId. - - `extern_get_type_for()` also returns a TypeId, but doesn't hash the object -- this allows - that method to be used on unhashable objects. `extern_identify()` returns a TypeId as well - to avoid having to make two separate Python calls when interning a Python object in - interning.rs, which requires both the hash and type. - """ + def is_union(self, input_type): + """Return whether or not a type is a member of a union.""" # NB: This check is exposed for testing error handling in CFFI methods. This code path should # never be active in normal pants usage. - if self._do_raise_keyboardinterrupt_on_identify: + if self._do_raise_keyboardinterrupt_on_union: raise KeyboardInterrupt("ctrl-c interrupted execution of a cffi method!") - c = self._ffi.from_handle(context_handle) - obj = self._ffi.from_handle(val[0]) - return c.identify(obj) - - @_extern_decl("_Bool", ["ExternContext*", "Handle*", "Handle*"]) - def extern_equals(self, context_handle, val1, val2): - """Return true if the given Handles are __eq__.""" - return self._ffi.from_handle(val1[0]) == self._ffi.from_handle(val2[0]) - - @_extern_decl("Handle", ["ExternContext*", "Handle*"]) - def extern_clone_val(self, context_handle, val): - """Clone the given Handle.""" - c = self._ffi.from_handle(context_handle) - return c.to_value(self._ffi.from_handle(val[0])) - - @_extern_decl("void", ["ExternContext*", "DroppingHandle*", "uint64_t"]) - def extern_drop_handles(self, context_handle, handles_ptr, handles_len): - """Drop the given Handles.""" - c = self._ffi.from_handle(context_handle) - handles = self._ffi.unpack(handles_ptr, handles_len) - c.drop_handles(handles) - - @_extern_decl("Buffer", ["ExternContext*", "TypeId"]) - def extern_type_to_str(self, context_handle, type_id): - """Given a TypeId, write type.__name__ and return it.""" - c = self._ffi.from_handle(context_handle) - return c.utf8_buf(str(c.from_id(type_id.tup_0).__name__)) - - # If we try to pass a None to the CFFI layer, it will silently fail - # in a weird way. So instead we use the empty string/bytestring as - # a de-facto null value, in both `extern_val_to_str` and - # `extern_val_to_bytes`. - @_extern_decl("Buffer", ["ExternContext*", "Handle*"]) - def extern_val_to_bytes(self, context_handle, val): - """Given a Handle for `obj`, write bytes(obj) and return it.""" - c = self._ffi.from_handle(context_handle) - v = c.from_value(val[0]) - v_bytes = b"" if v is None else bytes(v) - return c.buf(v_bytes) - - @_extern_decl("Buffer", ["ExternContext*", "Handle*"]) - def extern_val_to_str(self, context_handle, val): - """Given a Handle for `obj`, write str(obj) and return it.""" - c = self._ffi.from_handle(context_handle) - v = c.from_value(val[0]) - v_str = "" if v is None else str(v) - return c.utf8_buf(v_str) - - @_extern_decl("_Bool", ["ExternContext*", "Handle*"]) - def extern_val_to_bool(self, context_handle, val): - """Given a Handle for `obj`, write bool(obj) and return it.""" - c = self._ffi.from_handle(context_handle) - v = c.from_value(val[0]) - return bool(v) - - @_extern_decl("Handle", ["ExternContext*", "Handle**", "uint64_t"]) - def extern_store_tuple(self, context_handle, vals_ptr, vals_len): - """Given storage and an array of Handles, return a new Handle to represent the list.""" - c = self._ffi.from_handle(context_handle) - return c.to_value( - tuple(c.from_value(val[0]) for val in self._ffi.unpack(vals_ptr, vals_len)) - ) - - @_extern_decl("Handle", ["ExternContext*", "Handle**", "uint64_t"]) - def extern_store_set(self, context_handle, vals_ptr, vals_len): - """Given storage and an array of Handles, return a new Handle to represent the set.""" - c = self._ffi.from_handle(context_handle) - return c.to_value( - FrozenOrderedSet(c.from_value(val[0]) for val in self._ffi.unpack(vals_ptr, vals_len)) - ) + return union.is_instance(input_type) - @_extern_decl("Handle", ["ExternContext*", "Handle**", "uint64_t"]) - def extern_store_dict(self, context_handle, vals_ptr, vals_len): - """Given storage and an array of Handles, return a new Handle to represent the dict. - - Array of handles alternates keys and values (i.e. key0, value0, key1, value1, ...). - - It is assumed that an even number of values were passed. - """ - c = self._ffi.from_handle(context_handle) - tup = tuple(c.from_value(val[0]) for val in self._ffi.unpack(vals_ptr, vals_len)) - d = dict() - for i in range(0, len(tup), 2): - d[tup[i]] = tup[i + 1] - return c.to_value(d) - - @_extern_decl("Handle", ["ExternContext*", "uint8_t*", "uint64_t"]) - def extern_store_bytes(self, context_handle, bytes_ptr, bytes_len): - """Given a context and raw bytes, return a new Handle to represent the content.""" - c = self._ffi.from_handle(context_handle) - return c.to_value(bytes(self._ffi.buffer(bytes_ptr, bytes_len))) - - @_extern_decl("Handle", ["ExternContext*", "uint8_t*", "uint64_t"]) - def extern_store_utf8(self, context_handle, utf8_ptr, utf8_len): - """Given a context and UTF8 bytes, return a new Handle to represent the content.""" - c = self._ffi.from_handle(context_handle) - return c.to_value(self._ffi.string(utf8_ptr, utf8_len).decode()) - - @_extern_decl("Handle", ["ExternContext*", "uint64_t"]) - def extern_store_u64(self, context_handle, u64): - """Given a context and uint64_t, return a new Handle to represent the uint64_t.""" - c = self._ffi.from_handle(context_handle) - return c.to_value(u64) - - @_extern_decl("Handle", ["ExternContext*", "int64_t"]) - def extern_store_i64(self, context_handle, i64): - """Given a context and int64_t, return a new Handle to represent the int64_t.""" - c = self._ffi.from_handle(context_handle) - return c.to_value(i64) - - @_extern_decl("Handle", ["ExternContext*", "double"]) - def extern_store_f64(self, context_handle, f64): - """Given a context and double, return a new Handle to represent the double.""" - c = self._ffi.from_handle(context_handle) - return c.to_value(f64) - - @_extern_decl("Handle", ["ExternContext*", "_Bool"]) - def extern_store_bool(self, context_handle, b): - """Given a context and _Bool, return a new Handle to represent the _Bool.""" - c = self._ffi.from_handle(context_handle) - return c.to_value(b) - - @_extern_decl("Handle", ["ExternContext*", "Handle*", "uint8_t*", "uint64_t"]) - def extern_project_ignoring_type(self, context_handle, val, field_str_ptr, field_str_len): - """Given a Handle for `obj`, and a field name, project the field as a new Handle.""" - c = self._ffi.from_handle(context_handle) - obj = c.from_value(val[0]) - field_name = self.to_py_str(field_str_ptr, field_str_len) - projected = getattr(obj, field_name) - - return c.to_value(projected) - - @_extern_decl("HandleBuffer", ["ExternContext*", "Handle*", "uint8_t*", "uint64_t"]) - def extern_project_multi(self, context_handle, val, field_str_ptr, field_str_len): - """Given a Key for `obj`, and a field name, project the field as a list of Keys.""" - c = self._ffi.from_handle(context_handle) - obj = c.from_value(val[0]) - field_name = self.to_py_str(field_str_ptr, field_str_len) - - return c.vals_buf(tuple(c.to_value(p) for p in getattr(obj, field_name))) - - @_extern_decl("Handle", ["ExternContext*", "uint8_t*", "uint64_t"]) - def extern_create_exception(self, context_handle, msg_ptr, msg_len): + def create_exception(self, msg): """Given a utf8 message string, create an Exception object.""" - c = self._ffi.from_handle(context_handle) - msg = self.to_py_str(msg_ptr, msg_len) - return c.to_value(Exception(msg)) + return Exception(msg) + + def val_to_str(self, val): + """Given a `obj`, return str(obj).""" + return "" if val is None else str(val) - @_extern_decl("PyGeneratorResponse", ["ExternContext*", "Handle*", "Handle*"]) - def extern_generator_send(self, context_handle, func, arg): + def generator_send(self, func, arg): """Given a generator, send it the given value and return a response.""" - c = self._ffi.from_handle(context_handle) - response = self._ffi.new("PyGeneratorResponse*") try: - res = c.from_value(func[0]).send(c.from_value(arg[0])) + res = func.send(arg) if isinstance(res, Get): # Get. - response.tag = self._lib.Get - response.get = ( - TypeId(c.to_id(res.product)), - c.to_value(res.subject), - c.identify(res.subject), - TypeId(c.to_id(res.subject_declared_type)), + return self.lib.PyGeneratorResponseGet( + res.product, res.subject_declared_type, res.subject, ) elif type(res) in (tuple, list): # GetMulti. - response.tag = self._lib.GetMulti - response.get_multi = ( - c.type_ids_buf([TypeId(c.to_id(g.product)) for g in res]), - c.vals_buf([c.to_value(g.subject) for g in res]), - c.identities_buf([c.identify(g.subject) for g in res]), + return self.lib.PyGeneratorResponseGetMulti( + tuple( + self.lib.PyGeneratorResponseGet( + get.product, get.subject_declared_type, get.subject, + ) + for get in res + ) ) else: raise ValueError(f"internal engine error: unrecognized coroutine result {res}") @@ -510,246 +99,21 @@ def extern_generator_send(self, context_handle, func, arg): raise # This was a `return` from a coroutine, as opposed to a `StopIteration` raised # by calling `next()` on an empty iterator. - response.tag = self._lib.Broke - response.broke = (c.to_value(e.value),) - except Exception as e: - # Throw. - response.tag = self._lib.Throw - val = e - val._formatted_exc = traceback.format_exc() - response.throw = (c.to_value(val),) - - return response[0] - - @_extern_decl("PyResult", ["ExternContext*", "Handle*", "Handle**", "uint64_t"]) - def extern_call(self, context_handle, func, args_ptr, args_len): - """Given a callable, call it.""" - c = self._ffi.from_handle(context_handle) - runnable = c.from_value(func[0]) - args = tuple(c.from_value(arg[0]) for arg in self._ffi.unpack(args_ptr, args_len)) - return self.call(c, runnable, args) - - -class TypeId(NamedTuple): - """Corresponds to the native object of the same name.""" - - tup_0: Any - - -class Key(NamedTuple): - """Corresponds to the native object of the same name.""" - - tup_0: Any - type_id: TypeId - - -class Function(NamedTuple): - """Corresponds to the native object of the same name.""" - - key: Key - - -class EngineTypes(NamedTuple): - """Python types that need to be passed to the engine. - - N.B. EngineTypes needs to correspond field-by-field to the Types struct defined in - `src/rust/engine/src/types.rs` in order to avoid breakage (field definition order matters, not - just the names of fields!). - """ - - construct_directory_digest: Function - directory_digest: TypeId - construct_snapshot: Function - snapshot: TypeId - construct_file_content: Function - construct_files_content: Function - files_content: TypeId - construct_process_result: Function - construct_materialize_directories_results: Function - construct_materialize_directory_result: Function - address: TypeId - path_globs: TypeId - directories_to_merge: TypeId - directory_with_prefix_to_strip: TypeId - directory_with_prefix_to_add: TypeId - input_files_content: TypeId - dir: TypeId - file: TypeId - link: TypeId - platform: TypeId - multi_platform_process: TypeId - process_result: TypeId - coroutine: TypeId - url_to_fetch: TypeId - string: TypeId - bytes: TypeId - construct_interactive_process_result: Function - interactive_process: TypeId - interactive_process_result: TypeId - snapshot_subset: TypeId - construct_platform: Function - - -class PyResult(NamedTuple): - """Corresponds to the native object of the same name.""" - - is_throw: bool - handle: Any - - -class RawResult(NamedTuple): - """Corresponds to the native object of the same name.""" - - is_throw: bool - handle: Any - raw_pointer: Any - - -class ExternContext: - """A wrapper around python objects used in static extern functions in this module. - - See comments in `src/rust/engine/src/interning.rs` for more information on the relationship - between `Key`s and `Handle`s. - """ - - def __init__(self, ffi, lib): - """ - :param CompiledCFFI ffi: The CFFI handle to the compiled native engine lib. - """ - self._ffi = ffi - self._lib = lib - - # A handle to this object to ensure that the native wrapper survives at least as - # long as this object. - self._handle = self._ffi.new_handle(self) - - # A lookup table for `id(type) -> types`. - self._types = {} - - # Outstanding FFI object handles. - self._handles = set() - - def buf(self, bytestring): - buf = self._ffi.new("uint8_t[]", bytestring) - return (buf, len(bytestring), self.to_value(buf)) - - def utf8_buf(self, string): - return self.buf(string.encode()) - - def utf8_buf_buf(self, strings): - bufs = [self.utf8_buf(string) for string in strings] - buf_buf = self._ffi.new("Buffer[]", bufs) - return (buf_buf, len(bufs), self.to_value(buf_buf)) - - def utf8_dict(self, d): - """Stores the dict as a list of interleaved keys and values, as utf8 strings.""" - bufs = [self.utf8_buf(item) for keyvalue in d.items() for item in keyvalue] - buf_buf = self._ffi.new("Buffer[]", bufs) - return (buf_buf, len(bufs), self.to_value(buf_buf)) - - def vals_buf(self, vals): - buf = self._ffi.new("Handle[]", vals) - return (buf, len(vals), self.to_value(buf)) - - def identities_buf(self, idents): - buf = self._ffi.new("Ident[]", idents) - return (buf, len(idents), self.to_value(buf)) - - def type_ids_buf(self, types): - buf = self._ffi.new("TypeId[]", types) - return (buf, len(types), self.to_value(buf)) - - def to_value(self, obj): - handle = self._ffi.new_handle(obj) - self._handles.add(handle) - return handle - - def from_value(self, val): - return self._ffi.from_handle(val) - - def raise_or_return(self, pyresult): - """Consumes the given PyResult to raise/return the exception/value it represents.""" - value = self.from_value(pyresult.handle) - self._handles.remove(pyresult.handle) - if pyresult.is_throw: - raise value - else: - return value - - def drop_handles(self, handles): - self._handles -= set(handles) - - def identify(self, obj): - """Return an Ident-shaped tuple for the given object.""" - try: - hash_ = hash(obj) - except TypeError as e: - raise TypeError(f"failed to hash object {obj}: {e}") from e - type_id = self.to_id(type(obj)) - return (hash_, TypeId(type_id)) - - def to_id(self, typ): - type_id = id(typ) - self._types[type_id] = typ - return type_id - - def from_id(self, type_id): - return self._types[type_id] - - def to_key(self, obj): - cdata = self._lib.key_for(self.to_value(obj)) - return Key(cdata.id_, TypeId(cdata.type_id.tup_0)) - - def from_key(self, key): - return self._lib.val_for(key) + return self.lib.PyGeneratorResponseBreak(e.value) class Native(metaclass=SingletonMetaclass): """Encapsulates fetching a platform specific version of the native portion of the engine.""" - _errors_during_execution = None - - class CFFIExternMethodRuntimeErrorInfo(NamedTuple): - """Encapsulates an exception raised when a CFFI extern is called so that it can be - displayed. - - When an exception is raised in the body of a CFFI extern, the `onerror` handler is used to - capture it, storing the exception info as an instance of `CFFIExternMethodRuntimeErrorInfo` with - `.add_cffi_extern_method_runtime_exception()`. The scheduler will then check whether any - exceptions were stored by calling `.consume_cffi_extern_method_runtime_exceptions()` after - specific calls to the native library which may raise. - - Note that `.consume_cffi_extern_method_runtime_exceptions()` will also clear out all stored - exceptions, so exceptions should be stored separately after consumption. - - Some ways that exceptions in CFFI extern methods can be handled are described in - https://cffi.readthedocs.io/en/latest/using.html#extern-python-reference. - """ - - exc_type: Type - exc_value: BaseException - traceback: Any - - def reset_cffi_extern_method_runtime_exceptions(self): - self._errors_during_execution = [] - - def _peek_cffi_extern_method_runtime_exceptions(self): - return self._errors_during_execution - - def consume_cffi_extern_method_runtime_exceptions(self): - res = self._peek_cffi_extern_method_runtime_exceptions() - self.reset_cffi_extern_method_runtime_exceptions() - return res - - def add_cffi_extern_method_runtime_exception(self, error_info): - assert isinstance(error_info, self.CFFIExternMethodRuntimeErrorInfo) - self._errors_during_execution.append(error_info) + def __init__(self): + self.externs = Externs(self.lib) + self.lib.externs_set(self.externs) class BinaryLocationError(Exception): pass @memoized_property - def binary(self): + def _binary(self): """Load and return the path to the native engine binary.""" lib_name = "{}.so".format(NATIVE_ENGINE_MODULE) lib_path = os.path.join(safe_mkdtemp(), lib_name) @@ -770,136 +134,50 @@ def binary(self): @memoized_property def lib(self): - """Load and return the native engine module.""" - lib = self.ffi.dlopen(self.binary) - _FFISpecification(self.ffi, lib).register_cffi_externs(self) - return lib - - @memoized_property - def ffi(self): - """A CompiledCFFI handle as imported from the native engine python module.""" - return getattr(self._ffi_module, "ffi") - - @memoized_property - def ffi_lib(self): - """A CFFI Library handle as imported from the native engine python module.""" - return getattr(self._ffi_module, "lib") - - @memoized_property - def _ffi_module(self): - """Load the native engine as a python module and register CFFI externs.""" - native_bin_dir = os.path.dirname(self.binary) + """Load the native engine as a python module.""" + native_bin_dir = os.path.dirname(self._binary) logger.debug("loading native engine python module from: %s", native_bin_dir) sys.path.insert(0, native_bin_dir) return importlib.import_module(NATIVE_ENGINE_MODULE) - @memoized_property - def context(self): - # We statically initialize a ExternContext to correspond to the queue of dropped - # Handles that the native code maintains. - def init_externs(): - context = ExternContext(self.ffi, self.lib) - none = self.ffi.from_handle(context._handle).to_value(None) - self.lib.externs_set( - context._handle, - logger.getEffectiveLevel(), - none, - self.ffi_lib.extern_call, - self.ffi_lib.extern_generator_send, - self.ffi_lib.extern_get_type_for, - self.ffi_lib.extern_get_handle_from_type_id, - self.ffi_lib.extern_is_union, - self.ffi_lib.extern_identify, - self.ffi_lib.extern_equals, - self.ffi_lib.extern_clone_val, - self.ffi_lib.extern_drop_handles, - self.ffi_lib.extern_type_to_str, - self.ffi_lib.extern_val_to_bytes, - self.ffi_lib.extern_val_to_str, - self.ffi_lib.extern_store_tuple, - self.ffi_lib.extern_store_set, - self.ffi_lib.extern_store_dict, - self.ffi_lib.extern_store_bytes, - self.ffi_lib.extern_store_utf8, - self.ffi_lib.extern_store_u64, - self.ffi_lib.extern_store_i64, - self.ffi_lib.extern_store_f64, - self.ffi_lib.extern_store_bool, - self.ffi_lib.extern_project_ignoring_type, - self.ffi_lib.extern_project_multi, - self.ffi_lib.extern_val_to_bool, - self.ffi_lib.extern_create_exception, - ) - return context - - return self.ffi.init_once(init_externs, "ExternContext singleton") - - def new(self, cdecl, init): - return self.ffi.new(cdecl, init) - - def gc(self, cdata, destructor): - """Register a method to be called when `cdata` is garbage collected. - - Returns a new reference that should be used in place of `cdata`. - """ - return self.ffi.gc(cdata, destructor) - - def unpack(self, cdata_ptr, count): - """Given a pointer representing an array, and its count of entries, return a list.""" - return self.ffi.unpack(cdata_ptr, count) - - def buffer(self, cdata): - return self.ffi.buffer(cdata) - - def to_ids_buf(self, types): - return self.context.type_ids_buf([TypeId(self.context.to_id(t)) for t in types]) - def decompress_tarball(self, tarfile_path, dest_dir): - result = self.lib.decompress_tarball(tarfile_path, dest_dir) - return self.context.raise_or_return(result) + return self.lib.decompress_tarball(tarfile_path, dest_dir) def init_rust_logging(self, level, log_show_rust_3rdparty): return self.lib.init_logging(level, log_show_rust_3rdparty) def setup_pantsd_logger(self, log_file_path, level): - log_file_path = log_file_path.encode() - result = self.lib.setup_pantsd_logger(log_file_path, level) - return self.context.raise_or_return(result) + return self.lib.setup_pantsd_logger(log_file_path, level) def setup_stderr_logger(self, level): return self.lib.setup_stderr_logger(level) def write_log(self, msg, level, target): - msg = msg.encode() - target = target.encode() return self.lib.write_log(msg, level, target) def write_stdout(self, session, msg: str): - return self.lib.write_stdout(session, msg.encode()) + return self.lib.write_stdout(session, msg) def write_stderr(self, session, msg: str): - return self.lib.write_stdout(session, msg.encode()) + return self.lib.write_stdout(session, msg) def flush_log(self): return self.lib.flush_log() def override_thread_logging_destination_to_just_pantsd(self): - self.lib.override_thread_logging_destination(self.lib.Pantsd) + self.lib.override_thread_logging_destination("pantsd") def override_thread_logging_destination_to_just_stderr(self): - self.lib.override_thread_logging_destination(self.lib.Stderr) + self.lib.override_thread_logging_destination("stderr") def match_path_globs(self, path_globs: PathGlobs, paths: Iterable[str]) -> bool: - path_globs = self.context.to_value(path_globs) - paths_buf = self.context.utf8_buf_buf(tuple(paths)) - result = self.lib.match_path_globs(path_globs, paths_buf) - return cast(bool, self.context.raise_or_return(result)) + return cast(bool, self.lib.match_path_globs(path_globs, tuple(paths))) def new_tasks(self): - return self.gc(self.lib.tasks_create(), self.lib.tasks_destroy) + return self.lib.PyTasks() def new_execution_request(self): - return self.gc(self.lib.execution_request_create(), self.lib.execution_request_destroy) + return self.lib.PyExecutionRequest() def new_session( self, @@ -909,15 +187,12 @@ def new_session( build_id, should_report_workunits: bool, ): - return self.gc( - self.lib.session_create( - scheduler, - should_record_zipkin_spans, - should_render_ui, - self.context.utf8_buf(build_id), - should_report_workunits, - ), - self.lib.session_destroy, + return self.lib.PySession( + scheduler, + should_record_zipkin_spans, + should_render_ui, + build_id, + should_report_workunits, ) def new_scheduler( @@ -930,89 +205,80 @@ def new_scheduler( use_gitignore: bool, execution_options, ): - """Create and return an ExternContext and native Scheduler.""" - - def func(fn): - return Function(self.context.to_key(fn)) - - def ti(type_obj): - return TypeId(self.context.to_id(type_obj)) - - engine_types = EngineTypes( - construct_directory_digest=func(Digest), - directory_digest=ti(Digest), - construct_snapshot=func(Snapshot), - snapshot=ti(Snapshot), - construct_file_content=func(FileContent), - construct_files_content=func(FilesContent), - files_content=ti(FilesContent), - construct_process_result=func(FallibleProcessResultWithPlatform), - construct_materialize_directories_results=func(MaterializeDirectoriesResult), - construct_materialize_directory_result=func(MaterializeDirectoryResult), - address=ti(Address), - path_globs=ti(PathGlobs), - directories_to_merge=ti(DirectoriesToMerge), - directory_with_prefix_to_strip=ti(DirectoryWithPrefixToStrip), - directory_with_prefix_to_add=ti(DirectoryWithPrefixToAdd), - input_files_content=ti(InputFilesContent), - dir=ti(Dir), - file=ti(File), - link=ti(Link), - platform=ti(Platform), - multi_platform_process=ti(MultiPlatformProcess), - process_result=ti(FallibleProcessResultWithPlatform), - coroutine=ti(CoroutineType), - url_to_fetch=ti(UrlToFetch), - string=ti(str), - bytes=ti(bytes), - construct_interactive_process_result=func(InteractiveProcessResult), - interactive_process=ti(InteractiveProcessRequest), - interactive_process_result=ti(InteractiveProcessResult), - snapshot_subset=ti(SnapshotSubset), - construct_platform=func(Platform), + """Create and return a native Scheduler.""" + + # TODO: There is no longer a need to differentiate constructors from types, as types are + # callable as well with the cpython crate. + engine_types = self.lib.PyTypes( + construct_directory_digest=Digest, + directory_digest=Digest, + construct_snapshot=Snapshot, + snapshot=Snapshot, + construct_file_content=FileContent, + construct_files_content=FilesContent, + files_content=FilesContent, + construct_process_result=FallibleProcessResultWithPlatform, + construct_materialize_directories_results=MaterializeDirectoriesResult, + construct_materialize_directory_result=MaterializeDirectoryResult, + address=Address, + path_globs=PathGlobs, + directories_to_merge=DirectoriesToMerge, + directory_with_prefix_to_strip=DirectoryWithPrefixToStrip, + directory_with_prefix_to_add=DirectoryWithPrefixToAdd, + input_files_content=InputFilesContent, + dir=Dir, + file=File, + link=Link, + platform=Platform, + multi_platform_process=MultiPlatformProcess, + process_result=FallibleProcessResultWithPlatform, + coroutine=CoroutineType, + url_to_fetch=UrlToFetch, + string=str, + bytes=bytes, + construct_interactive_process_result=InteractiveProcessResult, + interactive_process_request=InteractiveProcessRequest, + interactive_process_result=InteractiveProcessResult, + snapshot_subset=SnapshotSubset, + construct_platform=Platform, ) - scheduler_result = self.lib.scheduler_create( + return self.lib.scheduler_create( tasks, engine_types, # Project tree. - self.context.utf8_buf(build_root), - self.context.utf8_buf(local_store_dir), - self.context.utf8_buf_buf(ignore_patterns), + build_root, + local_store_dir, + ignore_patterns, use_gitignore, - self.to_ids_buf(root_subject_types), + root_subject_types, # Remote execution config. execution_options.remote_execution, - self.context.utf8_buf_buf(execution_options.remote_store_server), - # We can't currently pass Options to the rust side, so we pass empty strings for None. - self.context.utf8_buf(execution_options.remote_execution_server or ""), - self.context.utf8_buf(execution_options.remote_execution_process_cache_namespace or ""), - self.context.utf8_buf(execution_options.remote_instance_name or ""), - self.context.utf8_buf(execution_options.remote_ca_certs_path or ""), - self.context.utf8_buf(execution_options.remote_oauth_bearer_token_path or ""), + execution_options.remote_store_server, + execution_options.remote_execution_server, + execution_options.remote_execution_process_cache_namespace, + execution_options.remote_instance_name, + execution_options.remote_ca_certs_path, + execution_options.remote_oauth_bearer_token_path, execution_options.remote_store_thread_count, execution_options.remote_store_chunk_bytes, execution_options.remote_store_connection_limit, execution_options.remote_store_chunk_upload_timeout_seconds, execution_options.remote_store_rpc_retries, - self.context.utf8_buf_buf(execution_options.remote_execution_extra_platform_properties), + tuple( + tuple(pair.split("=", 1)) + for pair in execution_options.remote_execution_extra_platform_properties + ), execution_options.process_execution_local_parallelism, execution_options.process_execution_remote_parallelism, execution_options.process_execution_cleanup_local_dirs, execution_options.process_execution_speculation_delay, - self.context.utf8_buf(execution_options.process_execution_speculation_strategy), + execution_options.process_execution_speculation_strategy, execution_options.process_execution_use_local_cache, - self.context.utf8_dict(execution_options.remote_execution_headers), + tuple((k, v) for (k, v) in execution_options.remote_execution_headers.items()), execution_options.process_execution_local_enable_nailgun, execution_options.experimental_fs_watcher, ) - if scheduler_result.is_throw: - value = self.context.from_value(scheduler_result.throw_handle) - self.context.drop_handles([scheduler_result.throw_handle]) - raise value - else: - scheduler = scheduler_result.raw_pointer - return self.gc(scheduler, self.lib.scheduler_destroy) def set_panic_handler(self): if os.getenv("RUST_BACKTRACE", "0") == "0": diff --git a/src/python/pants/engine/scheduler.py b/src/python/pants/engine/scheduler.py index 3cfbea406ab..c1b3fd733fe 100644 --- a/src/python/pants/engine/scheduler.py +++ b/src/python/pants/engine/scheduler.py @@ -3,11 +3,8 @@ import logging import os -import sys import time -import traceback from dataclasses import dataclass -from textwrap import dedent from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, cast from typing_extensions import TypedDict @@ -22,7 +19,6 @@ MaterializeDirectoryResult, PathGlobsAndRoot, ) -from pants.engine.native import Function, TypeId from pants.engine.nodes import Return, Throw from pants.engine.rules import Rule, RuleIndex, TaskRule from pants.engine.selectors import Params @@ -132,45 +128,13 @@ def __init__( def graph_trace(self, session, execution_request): with temporary_file_path() as path: - self._native.lib.graph_trace(self._scheduler, session, execution_request, path.encode()) + self._native.lib.graph_trace(self._scheduler, session, execution_request, path) with open(path, "r") as fd: for line in fd.readlines(): yield line.rstrip() def _assert_ruleset_valid(self): - self._raise_or_return(self._native.lib.validator_run(self._scheduler)) - - def _to_vals_buf(self, objs): - return self._native.context.vals_buf( - tuple(self._native.context.to_value(obj) for obj in objs) - ) - - def _to_value(self, obj): - return self._native.context.to_value(obj) - - def _from_value(self, val): - return self._native.context.from_value(val) - - def _raise_or_return(self, pyresult): - return self._native.context.raise_or_return(pyresult) - - def _to_id(self, typ): - return self._native.context.to_id(typ) - - def _to_key(self, obj): - return self._native.context.to_key(obj) - - def _from_key(self, cdata): - return self._native.context.from_key(cdata) - - def _to_type(self, type_obj): - return TypeId(self._to_id(type_obj)) - - def _to_ids_buf(self, types): - return self._native.to_ids_buf(types) - - def _to_utf8_buf(self, string): - return self._native.context.utf8_buf(string) + self._native.lib.validator_run(self._scheduler) def _to_params_list(self, subject_or_params): if isinstance(subject_or_params, Params): @@ -194,19 +158,18 @@ def _register_task( self, tasks, output_type, rule: TaskRule, union_rules: Dict[Type, "OrderedSet[Type]"] ) -> None: """Register the given TaskRule with the native scheduler.""" - func = Function(self._to_key(rule.func)) - self._native.lib.tasks_task_begin(tasks, func, self._to_type(output_type), rule.cacheable) + self._native.lib.tasks_task_begin(tasks, rule.func, output_type, rule.cacheable) for selector in rule.input_selectors: - self._native.lib.tasks_add_select(tasks, self._to_type(selector)) + self._native.lib.tasks_add_select(tasks, selector) anno = rule.annotations if anno.canonical_name: name = anno.canonical_name desc = anno.desc if anno.desc else "" - self._native.lib.tasks_add_display_info(tasks, name.encode(), desc.encode()) + self._native.lib.tasks_add_display_info(tasks, name, desc) def add_get_edge(product, subject): - self._native.lib.tasks_add_get(tasks, self._to_type(product), self._to_type(subject)) + self._native.lib.tasks_add_get(tasks, product, subject) for the_get in rule.input_gets: if union.is_instance(the_get.subject_declared_type): @@ -220,20 +183,15 @@ def add_get_edge(product, subject): self._native.lib.tasks_task_end(tasks) def visualize_graph_to_file(self, session, filename): - res = self._native.lib.graph_visualize(self._scheduler, session, filename.encode()) - self._raise_or_return(res) + self._native.lib.graph_visualize(self._scheduler, session, filename) def visualize_rule_graph_to_file(self, filename): - res = self._native.lib.rule_graph_visualize(self._scheduler, filename.encode()) - self._raise_or_return(res) + self._native.lib.rule_graph_visualize(self._scheduler, filename) def visualize_rule_subgraph_to_file(self, filename, root_subject_types, product_type): - root_type_ids = self._to_ids_buf(root_subject_types) - product_type_id = TypeId(self._to_id(product_type)) - res = self._native.lib.rule_subgraph_visualize( - self._scheduler, root_type_ids, product_type_id, filename.encode() + self._native.lib.rule_subgraph_visualize( + self._scheduler, root_subject_types, product_type, filename ) - self._raise_or_return(res) def rule_graph_visualization(self): with temporary_file_path() as path: @@ -268,63 +226,34 @@ def graph_len(self): def add_root_selection(self, execution_request, subject_or_params, product): params = self._to_params_list(subject_or_params) - res = self._native.lib.execution_add_root_select( - self._scheduler, execution_request, self._to_vals_buf(params), self._to_type(product) + self._native.lib.execution_add_root_select( + self._scheduler, execution_request, params, product ) - self._raise_or_return(res) @property def visualize_to_dir(self): return self._visualize_to_dir def _metrics(self, session): - return self._from_value(self._native.lib.scheduler_metrics(self._scheduler, session)) + return self._native.lib.scheduler_metrics(self._scheduler, session) def poll_workunits(self, session) -> PolledWorkunits: - result: Tuple[Tuple[Workunit], Tuple[Workunit]] = self._from_value( - self._native.lib.poll_session_workunits(self._scheduler, session) + result: Tuple[Tuple[Workunit], Tuple[Workunit]] = self._native.lib.poll_session_workunits( + self._scheduler, session ) return {"started": result[0], "completed": result[1]} def _run_and_return_roots(self, session, execution_request): raw_roots = self._native.lib.scheduler_execute(self._scheduler, session, execution_request) - if raw_roots == self._native.ffi.NULL: - raise KeyboardInterrupt - remaining_runtime_exceptions_to_capture = list( - self._native.consume_cffi_extern_method_runtime_exceptions() - ) - try: - roots = [] - for raw_root in self._native.unpack(raw_roots.nodes_ptr, raw_roots.nodes_len): - # Check if there were any uncaught exceptions within rules that were executed. - remaining_runtime_exceptions_to_capture.extend( - self._native.consume_cffi_extern_method_runtime_exceptions() - ) - - if raw_root.is_throw: - state = Throw(self._from_value(raw_root.handle)) - elif raw_root.handle == self._native.ffi.NULL: - # NB: We expect all NULL handles to correspond to uncaught exceptions which are collected - # in `self._native._peek_cffi_extern_method_runtime_exceptions()`! - if not remaining_runtime_exceptions_to_capture: - raise ExecutionError( - "Internal logic error in scheduler: expected more elements in " - "`self._native._peek_cffi_extern_method_runtime_exceptions()`." - ) - matching_runtime_exception = remaining_runtime_exceptions_to_capture.pop(0) - state = Throw(matching_runtime_exception) - else: - state = Return(self._from_value(raw_root.handle)) - roots.append(state) - finally: - self._native.lib.nodes_destroy(raw_roots) + roots = [] + for raw_root in raw_roots: + if raw_root.is_throw(): + state = Throw(raw_root.handle()) + else: + state = Return(raw_root.handle()) + roots.append(state) - if remaining_runtime_exceptions_to_capture: - raise ExecutionError( - "Internal logic error in scheduler: expected elements in " - "`self._native._peek_cffi_extern_method_runtime_exceptions()`." - ) return roots def lease_files_in_graph(self, session): @@ -528,67 +457,7 @@ def product_request(self, product, subjects): :returns: A list of the requested products, with length match len(subjects). """ request = None - raised_exception = None - try: - request = self.execution_request([product], subjects) - except: # noqa: T803 - # If there are any exceptions during CFFI extern method calls, we want to return an error with - # them and whatever failure results from it. This typically results from unhashable types. - if self._scheduler._native._peek_cffi_extern_method_runtime_exceptions(): - raised_exception = sys.exc_info()[0:3] - else: - # Otherwise, this is likely an exception coming from somewhere else, and we don't want to - # swallow that, so re-raise. - raise - - # We still want to raise whenever there are any exceptions in any CFFI extern methods, even if - # that didn't lead to an exception in generating the execution request for some reason, so we - # check the extern exceptions list again. - internal_errors = self._scheduler._native.consume_cffi_extern_method_runtime_exceptions() - if internal_errors: - error_tracebacks = [ - "".join( - traceback.format_exception( - etype=error_info.exc_type, - value=error_info.exc_value, - tb=error_info.traceback, - ) - ) - for error_info in internal_errors - ] - - raised_exception_message = None - if raised_exception: - exc_type, exc_value, tb = raised_exception - raised_exception_message = dedent( - """\ - The engine execution request raised this error, which is probably due to the errors in the - CFFI extern methods listed above, as CFFI externs return None upon error: - {} - """ - ).format( - "".join(traceback.format_exception(etype=exc_type, value=exc_value, tb=tb)) - ) - - raise ExecutionError( - dedent( - """\ - {error_description} raised in CFFI extern methods: - {joined_tracebacks}{raised_exception_message} - """ - ).format( - error_description=pluralize(len(internal_errors), "Exception"), - joined_tracebacks="\n+++++++++\n".join( - formatted_tb for formatted_tb in error_tracebacks - ), - raised_exception_message=( - "\n\n{}".format(raised_exception_message) - if raised_exception_message - else "" - ), - ) - ) - + request = self.execution_request([product], subjects) returns, throws = self.execute(request) # Throw handling. @@ -610,12 +479,11 @@ def capture_snapshots(self, path_globs_and_roots): directory relative to which each should be captured. :returns: A tuple of Snapshots. """ - result = self._scheduler._native.lib.capture_snapshots( + return self._scheduler._native.lib.capture_snapshots( self._scheduler._scheduler, self._session, - self._scheduler._to_value(_PathGlobsAndRootCollection(path_globs_and_roots)), + _PathGlobsAndRootCollection(path_globs_and_roots), ) - return self._scheduler._raise_or_return(result) def merge_directories(self, directory_digests): """Merges any number of directories. @@ -623,23 +491,18 @@ def merge_directories(self, directory_digests): :param directory_digests: Tuple of DirectoryDigests. :return: A Digest. """ - result = self._scheduler._native.lib.merge_directories( - self._scheduler._scheduler, - self._session, - self._scheduler._to_value(_DirectoryDigests(directory_digests)), + return self._scheduler._native.lib.merge_directories( + self._scheduler._scheduler, self._session, _DirectoryDigests(directory_digests) ) - return self._scheduler._raise_or_return(result) def run_local_interactive_process( self, request: "InteractiveProcessRequest" ) -> "InteractiveProcessResult": sched_pointer = self._scheduler._scheduler session_pointer = self._session - - wrapped_result = self._scheduler._native.lib.run_local_interactive_process( - sched_pointer, session_pointer, self._scheduler._to_value(request) + result: "InteractiveProcessResult" = self._scheduler._native.lib.run_local_interactive_process( + sched_pointer, session_pointer, request ) - result: "InteractiveProcessResult" = self._scheduler._raise_or_return(wrapped_result) return result def materialize_directory( @@ -661,12 +524,11 @@ def materialize_directories( dir_list = [dtm.path_prefix for dtm in directories_to_materialize] check_no_overlapping_paths(dir_list) - wrapped_result = self._scheduler._native.lib.materialize_directories( + result: MaterializeDirectoriesResult = self._scheduler._native.lib.materialize_directories( self._scheduler._scheduler, self._session, - self._scheduler._to_value(_DirectoriesToMaterialize(directories_to_materialize)), + _DirectoriesToMaterialize(directories_to_materialize), ) - result: MaterializeDirectoriesResult = self._scheduler._raise_or_return(wrapped_result) return result def lease_files_in_graph(self): diff --git a/src/rust/engine/Cargo.lock b/src/rust/engine/Cargo.lock index c86a6816fcd..7f9699c039b 100644 --- a/src/rust/engine/Cargo.lock +++ b/src/rust/engine/Cargo.lock @@ -326,23 +326,6 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "cbindgen" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", - "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", - "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", - "serde 1.0.105 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_derive 1.0.105 (registry+https://github.com/rust-lang/crates.io-index)", - "serde_json 1.0.48 (registry+https://github.com/rust-lang/crates.io-index)", - "syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)", - "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "toml 0.4.10 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "cc" version = "1.0.50" @@ -494,6 +477,17 @@ name = "core-foundation-sys" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "cpython" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "paste 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "python3-sys 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "crates-io" version = "0.22.0" @@ -692,6 +686,7 @@ dependencies = [ "boxfuture 0.0.1", "bytes 0.4.12 (registry+https://github.com/rust-lang/crates.io-index)", "concrete_time 0.0.1", + "cpython 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.5.13 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -732,9 +727,7 @@ dependencies = [ name = "engine_cffi" version = "0.0.1" dependencies = [ - "build_utils 0.0.1", - "cbindgen 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)", - "cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)", + "cpython 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "engine 0.0.1", "futures 0.1.29 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -745,7 +738,6 @@ dependencies = [ "store 0.1.0", "tar_api 0.0.1", "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 2.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "workunit_store 0.0.1", ] @@ -2033,6 +2025,26 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "paste" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "paste-impl 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro-hack 0.5.12 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "paste-impl" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro-hack 0.5.12 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 1.0.9 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "peeking_take_while" version = "0.1.2" @@ -2278,6 +2290,15 @@ dependencies = [ "protoc 2.10.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "python3-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.3.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -3701,7 +3722,6 @@ dependencies = [ "checksum bytes 0.5.4 (registry+https://github.com/rust-lang/crates.io-index)" = "130aac562c0dd69c56b3b1cc8ffd2e17be31d0b6c25b61c96b76231aa23e39e1" "checksum bytesize 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "716960a18f978640f25101b5cbf1c6f6b0d3192fab36a2d98ca96f0ecbe41010" "checksum cargo 0.34.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f7e90b5f23ae79af3ec0e4dc670349167fd47d6c1134f139cf0627817a4792bf" -"checksum cbindgen 0.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1f861ef68cabbb271d373a7795014052bff37edce22c620d95e395e8719d7dc5" "checksum cc 1.0.50 (registry+https://github.com/rust-lang/crates.io-index)" = "95e28fa049fda1c330bcf9d723be7663a899c4679724b34c81e9f5a326aab8cd" "checksum cexpr 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27" "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" @@ -3720,6 +3740,7 @@ dependencies = [ "checksum core-foundation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "57d24c7a13c43e870e37c1556b74555437870a04514f7685f5b354e090567171" "checksum core-foundation-sys 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e7ca8a5221364ef15ce201e8ed2f609fc312682a8f4e0e3d4aa5879764e0fa3b" "checksum core-foundation-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac" +"checksum cpython 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c2efcf01fcd3a7322d82970f45bc02cc089282fe9dea6f6efb45b173f10eacec" "checksum crates-io 0.22.0 (registry+https://github.com/rust-lang/crates.io-index)" = "091018c3f5e8109d82d94b648555f0d4a308d15626da2fb22c76f32117e24569" "checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" "checksum crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c8ec7fcd21571dc78f96cc96243cab8d8f035247c3efd16c687be154c3fa9efa" @@ -3865,6 +3886,8 @@ dependencies = [ "checksum parking_lot 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f0802bff09003b291ba756dc7e79313e51cc31667e94afbe847def490424cde5" "checksum parking_lot_core 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "4db1a8ccf734a7bce794cc19b3df06ed87ab2f3907036b693c68f56b4d4537fa" "checksum parking_lot_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad7f7e6ebdc79edff6fdcb87a55b620174f7a989e3eb31b65231f4af57f00b8c" +"checksum paste 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "ab4fb1930692d1b6a9cfabdde3d06ea0a7d186518e2f4d67660d8970e2fa647a" +"checksum paste-impl 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "a62486e111e571b1e93b710b61e8f493c0013be39629b714cb166bdb06aa5a8a" "checksum peeking_take_while 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" "checksum percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "31010dd2e1ac33d5b46a5b413495239882813e0369f8ed8a5e266f173602f831" "checksum percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" @@ -3888,6 +3911,7 @@ dependencies = [ "checksum protobuf-codegen 2.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c12a571137dc99703cb46fa21f185834fc5578a65836573fcff127f7b53f41e1" "checksum protoc 2.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8f89b56360a99c36ff8dcf7cc969b291fbae680b027e768bfd27110606f45271" "checksum protoc-grpcio 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b0292d93a536174ff6bafe8b5e8534aeeb2b039146bae59770c07f4d2c2458c9" +"checksum python3-sys 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "968ddca15e0fa74da3207aeb7b9fbbe94864dd13a17eaa95f75b5b836abf3007" "checksum quick-error 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" "checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" "checksum quote 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2bdc6c187c65bca4260c9011c9e3132efe4909da44726bad24cf7572ae338d7f" diff --git a/src/rust/engine/Cargo.toml b/src/rust/engine/Cargo.toml index 4b32107f994..279ffd4fc2b 100644 --- a/src/rust/engine/Cargo.toml +++ b/src/rust/engine/Cargo.toml @@ -86,6 +86,7 @@ async-trait = "0.1" boxfuture = { path = "boxfuture" } bytes = "0.4.5" concrete_time = { path = "concrete_time" } +cpython = "0.5" crossbeam-channel = "0.3" fnv = "1.0.5" fs = { path = "fs" } diff --git a/src/rust/engine/engine_cffi/Cargo.toml b/src/rust/engine/engine_cffi/Cargo.toml index d8978c47ff5..2a0539ac642 100644 --- a/src/rust/engine/engine_cffi/Cargo.toml +++ b/src/rust/engine/engine_cffi/Cargo.toml @@ -9,6 +9,7 @@ publish = false crate-type = ["cdylib"] [dependencies] +cpython = { version = "0.5", features = ["extension-module"] } engine = { path = ".." } futures01 = { package = "futures", version = "0.1" } futures = { version = "0.3", features = ["compat"] } @@ -20,9 +21,3 @@ store = { path = "../fs/store" } tar_api = { path = "../tar_api" } tempfile = "3" workunit_store = { path = "../workunit_store" } - -[build-dependencies] -build_utils = { path = "../build_utils" } -cbindgen = "0.8.6" -cc = "1.0" -walkdir = "2" diff --git a/src/rust/engine/engine_cffi/build.rs b/src/rust/engine/engine_cffi/build.rs index 2394f033cde..89fe5639da4 100644 --- a/src/rust/engine/engine_cffi/build.rs +++ b/src/rust/engine/engine_cffi/build.rs @@ -25,60 +25,7 @@ // Arc can be more clear than needing to grok Orderings: #![allow(clippy::mutex_atomic)] -use cbindgen; -use cc; - -/* -We use the `gcc` crate to compile the CFFI C sources (`native_engine.c`) -generated by `bootstrap.sh` into a (private) static lib (`libnative_engine_ffi.a`), -which then gets linked into the final `cargo build` product (the native engine binary). -This process mixes the Python module initialization function and other symbols into the -native engine binary, allowing us to address it both as an importable python module -(`from _native_engine import X`) as well as a C library (`ffi.dlopen(native_engine.so)`). - -*/ - -use std::env; -use std::fs; -use std::io::{self, Read}; -use std::path::Path; -use std::process::{exit, Command}; - -use build_utils::BuildRoot; - -#[derive(Debug)] -enum CffiBuildError { - Io(io::Error), - Env(env::VarError), - Cbindgen(cbindgen::Error), -} - -impl From for CffiBuildError { - fn from(err: env::VarError) -> Self { - CffiBuildError::Env(err) - } -} - -impl From for CffiBuildError { - fn from(err: io::Error) -> Self { - CffiBuildError::Io(err) - } -} - -impl From for CffiBuildError { - fn from(err: cbindgen::Error) -> Self { - CffiBuildError::Cbindgen(err) - } -} - -// A message is printed to stderr, and the script fails, if main() results in a CffiBuildError. -fn main() -> Result<(), CffiBuildError> { - // NB: When built with Python 3, `native_engine.so` only works with a Python 3 interpreter. - // When built with Python 2, it works with both Python 2 and Python 3. - // So, we check to see if the under-the-hood interpreter has changed and rebuild the native engine - // when needed. - println!("cargo:rerun-if-env-changed=PY"); - +fn main() { if cfg!(target_os = "linux") { // We depend on grpcio, which uses C++. // On Linux, with g++, some part of that compilation depends on @@ -87,108 +34,14 @@ fn main() -> Result<(), CffiBuildError> { println!("cargo:rustc-cdylib-link-arg=-lstdc++"); } - // Generate the scheduler.h bindings from the rust code in this crate. - let bindings_config_path = Path::new("cbindgen.toml"); - mark_for_change_detection(&bindings_config_path); - mark_for_change_detection(Path::new("src")); - // Explicitly re-run if engine or logging is modified because they're hard-coded deps in cbindgen.toml. - // Ideally we could just point at the engine crate here, but it's the workspace that contains all - // code, and we don't want to re-run on _any_ code changes (even though we'll probably re-run for - // them anyway), so we explicitly mark ../src and ../Cargo.toml for change-detection. - // - // This list should be kept in sync with the equivalent list of included crates in cbindgen.toml - mark_for_change_detection(Path::new("../src")); - mark_for_change_detection(Path::new("../Cargo.toml")); - mark_for_change_detection(Path::new("../logging")); - - let scheduler_file_path = Path::new("src/cffi/scheduler.h"); - let crate_dir = env::var("CARGO_MANIFEST_DIR")?; - cbindgen::generate(crate_dir)?.write_to_file(scheduler_file_path); - - // Generate the cffi c sources. - let build_root = BuildRoot::find()?; - let cffi_bootstrapper = build_root.join("build-support/bin/native/bootstrap_cffi.sh"); - mark_for_change_detection(&cffi_bootstrapper); - - // TODO: bootstrap_c_source() is used to generate C source code from @_extern_decl methods in - // native.py. It would be very useful to be able to detect when those /declarations/ haven't - // changed and avoid rebuilding the engine crate if we are just iterating on the implementations. - mark_for_change_detection(&build_root.join("src/python/pants/engine/native.py")); - - let cffi_dir = Path::new("src/cffi"); - - let result = Command::new(&cffi_bootstrapper) - .arg(cffi_dir) - .arg(scheduler_file_path) - .status()?; - if !result.success() { - let exit_code = result.code(); - eprintln!( - "Execution of {:?} failed with exit code {:?}", - cffi_bootstrapper, exit_code - ); - exit(exit_code.unwrap_or(1)); - } - - // N.B. The filename of this source code - at generation time - must line up 1:1 with the - // python import name, as python keys the initialization function name off of the import name. - let c_path = cffi_dir.join("native_engine.c"); - mark_for_change_detection(&c_path); - let env_script_path = cffi_dir.join("native_engine.cflags"); - mark_for_change_detection(&env_script_path); - - // Now compile the cffi c sources. - let mut config = cc::Build::new(); - - let cfg_path = c_path.to_str().unwrap(); - config.file(cfg_path); - for flag in make_flags(&env_script_path)? { - config.flag(flag.as_str()); - } - - // cffi generates missing field initializers :( - config.flag("-Wno-missing-field-initializers"); - - config.compile("libnative_engine_ffi.a"); - if cfg!(target_os = "macos") { // N.B. On OSX, we force weak linking by passing the param `-undefined dynamic_lookup` to // the underlying linker. This avoids "missing symbol" errors for Python symbols - // (e.g. `_PyImport_ImportModule`) at build time when bundling the CFFI C sources. + // (e.g. `_PyImport_ImportModule`) at build time when bundling the cpython sources. // The missing symbols will instead by dynamically resolved in the address space of the parent // binary (e.g. `python`) at runtime. We do this to avoid needing to link to libpython // (which would constrain us to specific versions of Python). println!("cargo:rustc-cdylib-link-arg=-undefined"); println!("cargo:rustc-cdylib-link-arg=dynamic_lookup"); } - - Ok(()) -} - -fn mark_for_change_detection(path: &Path) { - // Restrict re-compilation check to just our input files. - // See: http://doc.crates.io/build-script.html#outputs-of-the-build-script - if !path.exists() { - panic!( - "Cannot mark non-existing path for change detection: {}", - path.display() - ); - } - for file in walkdir::WalkDir::new(path) { - println!("cargo:rerun-if-changed={}", file.unwrap().path().display()); - } -} - -fn make_flags(env_script_path: &Path) -> Result, io::Error> { - let mut contents = String::new(); - fs::File::open(env_script_path)?.read_to_string(&mut contents)?; - // It would be a shame if someone were to include a space in an actual quoted value. - // If they did that, I guess we'd need to implement shell tokenization or something. - Ok( - contents - .trim() - .split_whitespace() - .map(str::to_owned) - .collect(), - ) } diff --git a/src/rust/engine/engine_cffi/cbindgen.toml b/src/rust/engine/engine_cffi/cbindgen.toml deleted file mode 100644 index 6bfa005e13c..00000000000 --- a/src/rust/engine/engine_cffi/cbindgen.toml +++ /dev/null @@ -1,41 +0,0 @@ -header = ''' -#ifndef __PANTS_SCHEDULER_CBINDGEN_H__ -#define __PANTS_SCHEDULER_CBINDGEN_H__ -/* - * Copyright 2018 Pants project contributors (see CONTRIBUTORS.md). - * Licensed under the Apache License, Version 2.0 (see LICENSE). - */ - -// Handle is declared as a typedef rather than a wrapper struct because it avoids needing to wrap -// the inner handle/`void*` in a tuple or datatype at the ffi boundary. For most types that -// overhead would not be worth worrying about, but Handle is used often enough that it gives a 6% -// speedup to avoid the wrapping. - -typedef void* Handle; -''' -trailer = ''' -#endif // __PANTS_SCHEDULER_CBINDGEN_H__ -''' -include_version = true -line_length = 100 -language = "C" - -[parse] -parse_deps = true -# If more deps are added here, make sure to also add them to be rerun-triggers in src/cffi_build.rs. -# We specify this explicitly because otherwise all transitive deps are parsed, and that takes a -# very long time. -# -# Feel free to add more crates here if you need; basically any type that gets exposed over a C -# interface needs to be listed here. Please make sure to update build.rs to mark any crates which -# are added here for change detection. -# -# We need to parse the engine crate to codegen PyResult (because externs are both something we use -# from Python in engine, and something we expose to python in engine_cffi). -# -# We need to parse the logging crate to codegen PythonLogLevel and Destination. -include = ["engine", "logging"] - -[export] -# TODO: wrapped_PyInit_native_engine is declared twice if not excluded here, figure out why. -exclude = ["Handle", "wrapped_PyInit_native_engine"] diff --git a/src/rust/engine/engine_cffi/src/cffi_externs.rs b/src/rust/engine/engine_cffi/src/cffi_externs.rs deleted file mode 100644 index 28203e7e772..00000000000 --- a/src/rust/engine/engine_cffi/src/cffi_externs.rs +++ /dev/null @@ -1,25 +0,0 @@ -// This file creates the unmangled symbols initnative_engine and PyInit_native_engine, which cffi -// will use as the entry point to this shared library in python 2 and python 3, respectively. -// -// It calls the extern'd wrapped_initnative_engine and wrapped_PyInit_native_engine (generated in -// src/rust/engine/src/cffi/native_engine.c by build-support/native-engine/bootstrap_cffi.py). -// This is a bit awkward and fiddly, but necessary because rust doesn't currently have a way to -// re-export symbols from C libraries, other than this. -// See https://github.com/rust-lang/rust/issues/36342 - -use std::os::raw; - -extern "C" { - pub fn wrapped_initnative_engine(); - pub fn wrapped_PyInit_native_engine() -> *mut raw::c_void; -} - -#[no_mangle] -pub extern "C" fn initnative_engine() { - unsafe { wrapped_initnative_engine() } -} - -#[no_mangle] -pub extern "C" fn PyInit_native_engine() -> *mut raw::c_void { - unsafe { wrapped_PyInit_native_engine() } -} diff --git a/src/rust/engine/engine_cffi/src/lib.rs b/src/rust/engine/engine_cffi/src/lib.rs index 7c520e3455b..c825bf50e96 100644 --- a/src/rust/engine/engine_cffi/src/lib.rs +++ b/src/rust/engine/engine_cffi/src/lib.rs @@ -1,4 +1,4 @@ -// Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). +// Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). #![deny(warnings)] @@ -24,24 +24,30 @@ #![allow(clippy::new_without_default, clippy::new_ret_no_self)] // Arc can be more clear than needing to grok Orderings: #![allow(clippy::mutex_atomic)] -// We only use unsafe pointer derefrences in our no_mangle exposed API, but it is nicer to list -// just the one minor call as unsafe, than to mark the whole function as unsafe which may hide -// other unsafeness. -#![allow(clippy::not_unsafe_ptr_arg_deref)] -// This crate is a wrapper around the engine crate which exposes a C interface which we can access -// from Python using cffi. -// -// The engine crate contains some C interop which we use, notably externs which are functions and -// types from Python which we can read from our Rust. This particular wrapper crate is just for how -// we expose ourselves back to Python. +// File-specific allowances to silence internal warnings of `py_class!`. +#![allow( + clippy::used_underscore_binding, + clippy::transmute_ptr_to_ptr, + clippy::zero_ptr +)] +// There are some large async-await types generated here. Nothing to worry about? #![type_length_limit = "1744838"] -mod cffi_externs; +/// +/// This crate is a wrapper around the engine crate which exposes a python module via cpython. +/// +/// The engine crate contains some cpython interop which we use, notably externs which are functions +/// and types from Python which we can read from our Rust. This particular wrapper crate is just for +/// how we expose ourselves back to Python. +/// +use cpython::{ + exc, py_class, py_fn, py_module_initializer, NoArgs, PyClone, PyErr, PyObject, + PyResult as CPyResult, PyTuple, PyType, Python, PythonObject, +}; -use engine::externs::*; use engine::{ - externs, nodes, Core, ExecutionRequest, ExecutionTermination, Function, Handle, Intrinsics, Key, - Params, RootResult, Rule, Scheduler, Session, Tasks, TypeId, Types, Value, + externs, nodes, Core, ExecutionRequest, ExecutionTermination, Failure, Function, Intrinsics, + Params, Rule, Scheduler, Session, Tasks, Types, Value, }; use futures::future::{self as future03, TryFutureExt}; use futures01::{future, Future}; @@ -50,13 +56,12 @@ use log::{error, warn, Log}; use logging::logger::LOGGER; use logging::{Destination, Logger}; use rule_graph::RuleGraph; + use std::any::Any; -use std::borrow::Borrow; -use std::ffi::CStr; +use std::cell::RefCell; +use std::convert::TryInto; use std::fs::File; use std::io; -use std::mem; -use std::os::raw; use std::panic; use std::path::{Path, PathBuf}; use std::time::Duration; @@ -66,110 +71,417 @@ use workunit_store::{StartedWorkUnit, WorkUnit}; #[cfg(test)] mod tests; -// TODO: Consider renaming and making generic for collections of PyResults. -#[repr(C)] -pub struct RawNodes { - nodes_ptr: *const PyResult, - nodes_len: u64, - nodes: Vec, -} - -impl RawNodes { - fn create(node_states: Vec) -> Box { - let nodes = node_states.into_iter().map(PyResult::from).collect(); - let mut raw_nodes = Box::new(RawNodes { - nodes_ptr: Vec::new().as_ptr(), - nodes_len: 0, - nodes: nodes, - }); - // Creates a pointer into the struct itself, which is not possible to do in safe rust. - raw_nodes.nodes_ptr = raw_nodes.nodes.as_ptr(); - raw_nodes.nodes_len = raw_nodes.nodes.len() as u64; - raw_nodes +py_module_initializer!(native_engine, |py, m| { + m.add( + py, + "init_logging", + py_fn!(py, init_logging(a: u64, b: bool)), + )?; + m.add( + py, + "setup_pantsd_logger", + py_fn!(py, setup_pantsd_logger(a: String, b: u64)), + )?; + m.add( + py, + "setup_stderr_logger", + py_fn!(py, setup_stderr_logger(a: u64)), + )?; + m.add(py, "flush_log", py_fn!(py, flush_log()))?; + m.add( + py, + "override_thread_logging_destination", + py_fn!(py, override_thread_logging_destination(a: String)), + )?; + m.add( + py, + "write_log", + py_fn!(py, write_log(a: String, b: u64, c: String)), + )?; + m.add( + py, + "write_stdout", + py_fn!(py, write_stdout(a: PySession, b: String)), + )?; + m.add( + py, + "write_stderr", + py_fn!(py, write_stderr(a: PySession, b: String)), + )?; + + m.add(py, "set_panic_handler", py_fn!(py, set_panic_handler()))?; + + m.add(py, "externs_set", py_fn!(py, externs_set(a: PyObject)))?; + + m.add( + py, + "match_path_globs", + py_fn!(py, match_path_globs(a: PyObject, b: Vec)), + )?; + m.add( + py, + "materialize_directories", + py_fn!( + py, + materialize_directories(a: PyScheduler, b: PySession, c: PyObject) + ), + )?; + m.add( + py, + "merge_directories", + py_fn!( + py, + merge_directories(a: PyScheduler, b: PySession, c: PyObject) + ), + )?; + m.add( + py, + "capture_snapshots", + py_fn!( + py, + capture_snapshots(a: PyScheduler, b: PySession, c: PyObject) + ), + )?; + m.add( + py, + "run_local_interactive_process", + py_fn!( + py, + run_local_interactive_process(a: PyScheduler, b: PySession, c: PyObject) + ), + )?; + m.add( + py, + "decompress_tarball", + py_fn!(py, decompress_tarball(a: String, b: String)), + )?; + + m.add( + py, + "graph_invalidate", + py_fn!(py, graph_invalidate(a: PyScheduler, b: Vec)), + )?; + m.add( + py, + "graph_invalidate_all_paths", + py_fn!(py, graph_invalidate_all_paths(a: PyScheduler)), + )?; + m.add(py, "graph_len", py_fn!(py, graph_len(a: PyScheduler)))?; + m.add( + py, + "graph_visualize", + py_fn!(py, graph_visualize(a: PyScheduler, b: PySession, d: String)), + )?; + m.add( + py, + "graph_trace", + py_fn!( + py, + graph_trace( + a: PyScheduler, + b: PySession, + c: PyExecutionRequest, + d: String + ) + ), + )?; + + m.add( + py, + "garbage_collect_store", + py_fn!(py, garbage_collect_store(a: PyScheduler)), + )?; + m.add( + py, + "lease_files_in_graph", + py_fn!(py, lease_files_in_graph(a: PyScheduler, b: PySession)), + )?; + m.add( + py, + "check_invalidation_watcher_liveness", + py_fn!(py, check_invalidation_watcher_liveness(a: PyScheduler)), + )?; + + m.add( + py, + "validator_run", + py_fn!(py, validator_run(a: PyScheduler)), + )?; + m.add( + py, + "rule_graph_visualize", + py_fn!(py, rule_graph_visualize(a: PyScheduler, d: String)), + )?; + m.add( + py, + "rule_subgraph_visualize", + py_fn!( + py, + rule_subgraph_visualize(a: PyScheduler, b: Vec, c: PyType, d: String) + ), + )?; + + m.add( + py, + "execution_add_root_select", + py_fn!( + py, + execution_add_root_select( + a: PyScheduler, + b: PyExecutionRequest, + c: Vec, + d: PyType + ) + ), + )?; + + m.add( + py, + "poll_session_workunits", + py_fn!(py, poll_session_workunits(a: PyScheduler, b: PySession)), + )?; + + m.add( + py, + "tasks_task_begin", + py_fn!( + py, + tasks_task_begin(a: PyTasks, b: PyObject, c: PyType, d: bool) + ), + )?; + m.add( + py, + "tasks_add_get", + py_fn!(py, tasks_add_get(a: PyTasks, b: PyType, c: PyType)), + )?; + m.add( + py, + "tasks_add_select", + py_fn!(py, tasks_add_select(a: PyTasks, b: PyType)), + )?; + m.add( + py, + "tasks_add_display_info", + py_fn!(py, tasks_add_display_info(a: PyTasks, b: String, c: String)), + )?; + m.add(py, "tasks_task_end", py_fn!(py, tasks_task_end(a: PyTasks)))?; + + m.add( + py, + "scheduler_execute", + py_fn!( + py, + scheduler_execute(a: PyScheduler, b: PySession, c: PyExecutionRequest) + ), + )?; + m.add( + py, + "scheduler_metrics", + py_fn!(py, scheduler_metrics(a: PyScheduler, b: PySession)), + )?; + m.add( + py, + "scheduler_create", + py_fn!( + py, + scheduler_create( + tasks_ptr: PyTasks, + types_ptr: PyTypes, + build_root_buf: String, + local_store_dir_buf: String, + ignore_patterns: Vec, + use_gitignore: bool, + root_type_ids: Vec, + remote_execution: bool, + remote_store_servers: Vec, + remote_execution_server: Option, + remote_execution_process_cache_namespace: Option, + remote_instance_name: Option, + remote_root_ca_certs_path: Option, + remote_oauth_bearer_token_path: Option, + remote_store_thread_count: u64, + remote_store_chunk_bytes: u64, + remote_store_connection_limit: u64, + remote_store_chunk_upload_timeout_seconds: u64, + remote_store_rpc_retries: u64, + remote_execution_extra_platform_properties: Vec<(String, String)>, + process_execution_local_parallelism: u64, + process_execution_remote_parallelism: u64, + process_execution_cleanup_local_dirs: bool, + process_execution_speculation_delay: f64, + process_execution_speculation_strategy_buf: String, + process_execution_use_local_cache: bool, + remote_execution_headers: Vec<(String, String)>, + process_execution_local_enable_nailgun: bool, + experimental_fs_watcher: bool + ) + ), + )?; + + m.add_class::(py)?; + m.add_class::(py)?; + m.add_class::(py)?; + m.add_class::(py)?; + m.add_class::(py)?; + m.add_class::(py)?; + + m.add_class::(py)?; + m.add_class::(py)?; + m.add_class::(py)?; + + Ok(()) +}); + +py_class!(class PyTasks |py| { + data tasks: RefCell; + def __new__(_cls) -> CPyResult { + Self::create_instance(py, RefCell::new(Tasks::new())) + } +}); + +py_class!(class PyTypes |py| { + data types: RefCell>; + + def __new__( + _cls, + construct_directory_digest: PyObject, + directory_digest: PyType, + construct_snapshot: PyObject, + snapshot: PyType, + construct_file_content: PyObject, + construct_files_content: PyObject, + files_content: PyType, + construct_process_result: PyObject, + construct_materialize_directories_results: PyObject, + construct_materialize_directory_result: PyObject, + address: PyType, + path_globs: PyType, + directories_to_merge: PyType, + directory_with_prefix_to_strip: PyType, + directory_with_prefix_to_add: PyType, + input_files_content: PyType, + dir: PyType, + file: PyType, + link: PyType, + platform: PyType, + multi_platform_process: PyType, + process_result: PyType, + coroutine: PyType, + url_to_fetch: PyType, + string: PyType, + bytes: PyType, + construct_interactive_process_result: PyObject, + interactive_process_request: PyType, + interactive_process_result: PyType, + snapshot_subset: PyType, + construct_platform: PyObject + ) -> CPyResult { + Self::create_instance( + py, + RefCell::new(Some(Types { + construct_directory_digest: Function(externs::key_for(py, construct_directory_digest.into())?), + directory_digest: externs::type_for(py, directory_digest), + construct_snapshot: Function(externs::key_for(py, construct_snapshot.into())?), + snapshot: externs::type_for(py, snapshot), + construct_file_content: Function(externs::key_for(py, construct_file_content.into())?), + construct_files_content: Function(externs::key_for(py, construct_files_content.into())?), + files_content: externs::type_for(py, files_content), + construct_process_result: Function(externs::key_for(py, construct_process_result.into())?), + construct_materialize_directories_results: Function(externs::key_for(py, construct_materialize_directories_results.into())?), + construct_materialize_directory_result: Function(externs::key_for(py, construct_materialize_directory_result.into())?), + address: externs::type_for(py, address), + path_globs: externs::type_for(py, path_globs), + directories_to_merge: externs::type_for(py, directories_to_merge), + directory_with_prefix_to_strip: externs::type_for(py, directory_with_prefix_to_strip), + directory_with_prefix_to_add: externs::type_for(py, directory_with_prefix_to_add), + input_files_content: externs::type_for(py, input_files_content), + dir: externs::type_for(py, dir), + file: externs::type_for(py, file), + link: externs::type_for(py, link), + platform: externs::type_for(py, platform), + multi_platform_process: externs::type_for(py, multi_platform_process), + process_result: externs::type_for(py, process_result), + coroutine: externs::type_for(py, coroutine), + url_to_fetch: externs::type_for(py, url_to_fetch), + string: externs::type_for(py, string), + bytes: externs::type_for(py, bytes), + construct_interactive_process_result: Function(externs::key_for(py, construct_interactive_process_result.into())?), + interactive_process_request: externs::type_for(py, interactive_process_request), + interactive_process_result: externs::type_for(py, interactive_process_result), + snapshot_subset: externs::type_for(py, snapshot_subset), + construct_platform: Function(externs::key_for(py, construct_platform.into())?), + })), + ) } -} +}); + +py_class!(class PyScheduler |py| { + data scheduler: Scheduler; +}); + +py_class!(class PySession |py| { + data session: Session; + def __new__(_cls, + scheduler_ptr: PyScheduler, + should_record_zipkin_spans: bool, + should_render_ui: bool, + build_id: String, + should_report_workunits: bool + ) -> CPyResult { + Self::create_instance(py, Session::new( + scheduler_ptr.scheduler(py), + should_record_zipkin_spans, + should_render_ui, + build_id, + should_report_workunits, + ) + ) + } +}); -#[no_mangle] -pub extern "C" fn externs_set( - context: *const ExternContext, - log_level: u8, - none: Handle, - call: CallExtern, - generator_send: GeneratorSendExtern, - get_type_for: GetTypeForExtern, - get_handle_from_type_id: GetHandleFromTypeIdExtern, - is_union: IsUnionExtern, - identify: IdentifyExtern, - equals: EqualsExtern, - clone_val: CloneValExtern, - drop_handles: DropHandlesExtern, - type_to_str: TypeToStrExtern, - val_to_bytes: ValToBytesExtern, - val_to_str: ValToStrExtern, - store_tuple: StoreTupleExtern, - store_set: StoreTupleExtern, - store_dict: StoreTupleExtern, - store_bytes: StoreBytesExtern, - store_utf8: StoreUtf8Extern, - store_u64: StoreU64Extern, - store_i64: StoreI64Extern, - store_f64: StoreF64Extern, - store_bool: StoreBoolExtern, - project_ignoring_type: ProjectIgnoringTypeExtern, - project_multi: ProjectMultiExtern, - val_to_bool: ValToBoolExtern, - create_exception: CreateExceptionExtern, -) { - externs::set_externs(Externs { - context, - log_level, - none, - call, - generator_send, - get_type_for, - get_handle_from_type_id, - is_union, - identify, - equals, - clone_val, - drop_handles, - type_to_str, - val_to_bytes, - val_to_str, - store_tuple, - store_set, - store_dict, - store_bytes, - store_utf8, - store_u64, - store_i64, - store_f64, - store_bool, - project_ignoring_type, - project_multi, - val_to_bool, - create_exception, - }); -} +py_class!(class PyExecutionRequest |py| { + data execution_request: RefCell; + def __new__(_cls) -> CPyResult { + Self::create_instance(py, RefCell::new(ExecutionRequest::new())) + } +}); -#[no_mangle] -pub extern "C" fn key_for(value: Handle) -> Key { - externs::key_for(value.into()) -} +py_class!(class PyResult |py| { + data _is_throw: bool; + data _handle: PyObject; + + def __new__(_cls, is_throw: bool, handle: PyObject) -> CPyResult { + Self::create_instance(py, is_throw, handle) + } + + def is_throw(&self) -> CPyResult { + Ok(*self._is_throw(py)) + } -#[no_mangle] -pub extern "C" fn val_for(key: Key) -> Handle { - externs::val_for(&key).into() + def handle(&self) -> CPyResult { + Ok(self._handle(py).clone_ref(py)) + } +}); + +fn py_result_from_root(py: Python, result: Result) -> PyResult { + match result { + Ok(val) => PyResult::create_instance(py, false, val.into()).unwrap(), + Err(f) => { + let val = match f { + f @ Failure::Invalidated => externs::create_exception(&format!("{}", f)), + Failure::Throw(exc, _) => exc, + }; + PyResult::create_instance(py, true, val.into()).unwrap() + } + } } -// Like PyResult, but for values that aren't Python values. -// throw_handle will be set iff is_throw, otherwise accessing it will likely segfault. -// raw_pointer will be set iff !is_throw, otherwise accessing it will likely segfault. -#[repr(C)] -pub struct RawResult { - is_throw: bool, - throw_handle: Handle, - raw_pointer: *const raw::c_void, +// TODO: It's not clear how to return "nothing" (None) in a CPyResult, so this is a placeholder. +type PyUnitResult = CPyResult>; + +fn externs_set(_: Python, externs: PyObject) -> PyUnitResult { + externs::set_externs(externs); + Ok(None) } /// @@ -178,228 +490,93 @@ pub struct RawResult { /// The given Tasks struct will be cloned, so no additional mutation of the reference will /// affect the created Scheduler. /// -#[no_mangle] -pub extern "C" fn scheduler_create( - tasks_ptr: *mut Tasks, - types: Types, - build_root_buf: Buffer, - local_store_dir_buf: Buffer, - ignore_patterns_buf: BufferBuffer, +fn scheduler_create( + py: Python, + tasks_ptr: PyTasks, + types_ptr: PyTypes, + build_root_buf: String, + local_store_dir_buf: String, + ignore_patterns: Vec, use_gitignore: bool, - root_type_ids: TypeIdBuffer, + root_type_ids: Vec, remote_execution: bool, - remote_store_servers_buf: BufferBuffer, - remote_execution_server: Buffer, - remote_execution_process_cache_namespace: Buffer, - remote_instance_name: Buffer, - remote_root_ca_certs_path_buffer: Buffer, - remote_oauth_bearer_token_path_buffer: Buffer, + remote_store_servers: Vec, + remote_execution_server: Option, + remote_execution_process_cache_namespace: Option, + remote_instance_name: Option, + remote_root_ca_certs_path: Option, + remote_oauth_bearer_token_path: Option, remote_store_thread_count: u64, remote_store_chunk_bytes: u64, remote_store_connection_limit: u64, remote_store_chunk_upload_timeout_seconds: u64, remote_store_rpc_retries: u64, - remote_execution_extra_platform_properties_buf: BufferBuffer, + remote_execution_extra_platform_properties: Vec<(String, String)>, process_execution_local_parallelism: u64, process_execution_remote_parallelism: u64, process_execution_cleanup_local_dirs: bool, process_execution_speculation_delay: f64, - process_execution_speculation_strategy_buf: Buffer, + process_execution_speculation_strategy: String, process_execution_use_local_cache: bool, - remote_execution_headers_buf: BufferBuffer, + remote_execution_headers: Vec<(String, String)>, process_execution_local_enable_nailgun: bool, experimental_fs_watcher: bool, -) -> RawResult { - match make_core( - tasks_ptr, - types, - build_root_buf, - local_store_dir_buf, - ignore_patterns_buf, - use_gitignore, - root_type_ids, - remote_execution, - remote_store_servers_buf, - remote_execution_server, - remote_execution_process_cache_namespace, - remote_instance_name, - remote_root_ca_certs_path_buffer, - remote_oauth_bearer_token_path_buffer, - remote_store_thread_count, - remote_store_chunk_bytes, - remote_store_connection_limit, - remote_store_chunk_upload_timeout_seconds, - remote_store_rpc_retries, - remote_execution_extra_platform_properties_buf, - process_execution_local_parallelism, - process_execution_remote_parallelism, - process_execution_cleanup_local_dirs, - process_execution_speculation_delay, - process_execution_speculation_strategy_buf, - process_execution_use_local_cache, - remote_execution_headers_buf, - process_execution_local_enable_nailgun, - experimental_fs_watcher, - ) { - Ok(core) => RawResult { - is_throw: false, - raw_pointer: Box::into_raw(Box::new(Scheduler::new(core))) as *const raw::c_void, - throw_handle: Handle(std::ptr::null()), - }, - Err(err) => RawResult { - is_throw: true, - throw_handle: externs::create_exception(&err).into(), - raw_pointer: std::ptr::null(), - }, - } -} - -fn make_core( - tasks_ptr: *mut Tasks, - types: Types, - build_root_buf: Buffer, - local_store_dir_buf: Buffer, - ignore_patterns_buf: BufferBuffer, - use_gitignore: bool, - root_type_ids: TypeIdBuffer, - remote_execution: bool, - remote_store_servers_buf: BufferBuffer, - remote_execution_server: Buffer, - remote_execution_process_cache_namespace: Buffer, - remote_instance_name: Buffer, - remote_root_ca_certs_path_buffer: Buffer, - remote_oauth_bearer_token_path_buffer: Buffer, - remote_store_thread_count: u64, - remote_store_chunk_bytes: u64, - remote_store_connection_limit: u64, - remote_store_chunk_upload_timeout_seconds: u64, - remote_store_rpc_retries: u64, - remote_execution_extra_platform_properties_buf: BufferBuffer, - process_execution_local_parallelism: u64, - process_execution_remote_parallelism: u64, - process_execution_cleanup_local_dirs: bool, - process_execution_speculation_delay: f64, - process_execution_speculation_strategy_buf: Buffer, - process_execution_use_local_cache: bool, - remote_execution_headers_buf: BufferBuffer, - process_execution_local_enable_nailgun: bool, - experimental_fs_watcher: bool, -) -> Result { - let root_type_ids = root_type_ids.to_vec(); - let ignore_patterns = ignore_patterns_buf - .to_strings() - .map_err(|err| format!("Failed to decode ignore patterns as UTF8: {:?}", err))?; - let intrinsics = Intrinsics::new(&types); - #[allow(clippy::redundant_closure)] // I couldn't find an easy way to remove this closure. - let mut tasks = with_tasks(tasks_ptr, |tasks| tasks.clone()); - tasks.intrinsics_set(&intrinsics); - // Allocate on the heap via `Box` and return a raw pointer to the boxed value. - let remote_store_servers_vec = remote_store_servers_buf - .to_strings() - .map_err(|err| format!("Failed to decode remote_store_servers: {}", err))?; - let remote_execution_server_string = remote_execution_server - .to_string() - .map_err(|err| format!("remote_execution_server was not valid UTF8: {}", err))?; - let remote_execution_process_cache_namespace_string = remote_execution_process_cache_namespace - .to_string() - .map_err(|err| { - format!( - "remote_execution_process_cache_namespace was not valid UTF8: {}", - err - ) - })?; - let remote_instance_name_string = remote_instance_name - .to_string() - .map_err(|err| format!("remote_instance_name was not valid UTF8: {}", err))?; - let remote_execution_extra_platform_properties_list = remote_execution_extra_platform_properties_buf - .to_strings() - .map_err(|err| format!("Failed to decode remote_execution_extra_platform_properties: {}", err))? - .into_iter() - .map(|s| { - let mut parts: Vec<_> = s.splitn(2, '=').collect(); - if parts.len() != 2 { - return Err(format!("Got invalid remote_execution_extra_platform_properties - must be of format key=value but got {}", s)); - } - let (value, key) = (parts.pop().unwrap().to_owned(), parts.pop().unwrap().to_owned()); - Ok((key, value)) - }).collect::, _>>()?; - let remote_root_ca_certs_path = { - let path = remote_root_ca_certs_path_buffer.to_os_string(); - if path.is_empty() { - None - } else { - Some(PathBuf::from(path)) - } - }; - - let remote_oauth_bearer_token_path = { - let path = remote_oauth_bearer_token_path_buffer.to_os_string(); - if path.is_empty() { - None - } else { - Some(PathBuf::from(path)) - } - }; - - let process_execution_speculation_strategy = process_execution_speculation_strategy_buf - .to_string() - .map_err(|err| { - format!( - "process_execution_speculation_strategy was not valid UTF8: {}", - err - ) - })?; - - let remote_execution_headers = remote_execution_headers_buf.to_map("remote-execution-headers")?; - Core::new( - root_type_ids, - tasks, - types, - intrinsics, - PathBuf::from(build_root_buf.to_os_string()), - &ignore_patterns, - use_gitignore, - PathBuf::from(local_store_dir_buf.to_os_string()), - remote_execution, - remote_store_servers_vec, - if remote_execution_server_string.is_empty() { - None - } else { - Some(remote_execution_server_string) - }, - if remote_execution_process_cache_namespace_string.is_empty() { - None - } else { - Some(remote_execution_process_cache_namespace_string) - }, - if remote_instance_name_string.is_empty() { - None - } else { - Some(remote_instance_name_string) - }, - remote_root_ca_certs_path, - remote_oauth_bearer_token_path, - remote_store_thread_count as usize, - remote_store_chunk_bytes as usize, - Duration::from_secs(remote_store_chunk_upload_timeout_seconds), - remote_store_rpc_retries as usize, - remote_store_connection_limit as usize, - remote_execution_extra_platform_properties_list, - process_execution_local_parallelism as usize, - process_execution_remote_parallelism as usize, - process_execution_cleanup_local_dirs, - // convert delay from float to millisecond resolution. use from_secs_f64 when it is - // off nightly. https://github.com/rust-lang/rust/issues/54361 - Duration::from_millis((process_execution_speculation_delay * 1000.0).round() as u64), - process_execution_speculation_strategy, - process_execution_use_local_cache, - remote_execution_headers, - process_execution_local_enable_nailgun, - experimental_fs_watcher, +) -> CPyResult { + let core: Result = Ok(()).and_then(move |()| { + let types = types_ptr + .types(py) + .borrow_mut() + .take() + .ok_or_else(|| "An instance of PyTypes may only be used once.".to_owned())?; + let intrinsics = Intrinsics::new(&types); + let mut tasks = tasks_ptr.tasks(py).replace(Tasks::new()); + tasks.intrinsics_set(&intrinsics); + + Core::new( + root_type_ids + .into_iter() + .map(|pt| externs::type_for(py, pt)) + .collect(), + tasks, + types, + intrinsics, + PathBuf::from(build_root_buf), + &ignore_patterns, + use_gitignore, + PathBuf::from(local_store_dir_buf), + remote_execution, + remote_store_servers, + remote_execution_server, + remote_execution_process_cache_namespace, + remote_instance_name, + remote_root_ca_certs_path.map(PathBuf::from), + remote_oauth_bearer_token_path.map(PathBuf::from), + remote_store_thread_count as usize, + remote_store_chunk_bytes as usize, + Duration::from_secs(remote_store_chunk_upload_timeout_seconds), + remote_store_rpc_retries as usize, + remote_store_connection_limit as usize, + remote_execution_extra_platform_properties, + process_execution_local_parallelism as usize, + process_execution_remote_parallelism as usize, + process_execution_cleanup_local_dirs, + // convert delay from float to millisecond resolution. use from_secs_f64 when it is + // off nightly. https://github.com/rust-lang/rust/issues/54361 + Duration::from_millis((process_execution_speculation_delay * 1000.0).round() as u64), + process_execution_speculation_strategy, + process_execution_use_local_cache, + remote_execution_headers.into_iter().collect(), + process_execution_local_enable_nailgun, + experimental_fs_watcher, + ) + }); + PyScheduler::create_instance( + py, + Scheduler::new(core.map_err(|e| PyErr::new::(py, (e,)))?), ) } -fn started_workunit_to_py_value(started_workunit: &StartedWorkUnit) -> Value { +fn started_workunit_to_py_value(started_workunit: &StartedWorkUnit) -> CPyResult { use std::time::UNIX_EPOCH; let duration = started_workunit .start_time @@ -438,10 +615,10 @@ fn started_workunit_to_py_value(started_workunit: &StartedWorkUnit) -> Value { )); } - externs::store_dict(&dict_entries.as_slice()) + externs::store_dict(dict_entries) } -fn workunit_to_py_value(workunit: &WorkUnit) -> Value { +fn workunit_to_py_value(workunit: &WorkUnit) -> CPyResult { let mut dict_entries = vec![ ( externs::store_utf8("name"), @@ -482,59 +659,56 @@ fn workunit_to_py_value(workunit: &WorkUnit) -> Value { )); } - externs::store_dict(&dict_entries.as_slice()) + externs::store_dict(dict_entries) } -fn workunits_to_py_tuple_value<'a>(workunits: impl Iterator) -> Value { +fn workunits_to_py_tuple_value<'a>( + workunits: impl Iterator, +) -> CPyResult { let workunit_values = workunits .map(|workunit: &WorkUnit| workunit_to_py_value(workunit)) - .collect::>(); - externs::store_tuple(&workunit_values) + .collect::, _>>()?; + Ok(externs::store_tuple(workunit_values)) } fn started_workunits_to_py_tuple_value<'a>( workunits: impl Iterator, -) -> Value { +) -> CPyResult { let workunit_values = workunits .map(|started_workunit: &StartedWorkUnit| started_workunit_to_py_value(started_workunit)) - .collect::>(); - externs::store_tuple(&workunit_values) + .collect::, _>>()?; + Ok(externs::store_tuple(workunit_values)) } -#[no_mangle] -pub extern "C" fn poll_session_workunits( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, -) -> Handle { - with_scheduler(scheduler_ptr, |_scheduler| { - with_session(session_ptr, |session| { - let value = session +fn poll_session_workunits( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, +) -> CPyResult { + with_scheduler(py, scheduler_ptr, |_scheduler| { + with_session(py, session_ptr, |session| { + session .workunit_store() .with_latest_workunits(|started, completed| { let mut started_iter = started.iter(); - let started = started_workunits_to_py_tuple_value(&mut started_iter); + let started = started_workunits_to_py_tuple_value(&mut started_iter)?; let mut completed_iter = completed.iter(); - let completed = workunits_to_py_tuple_value(&mut completed_iter); + let completed = workunits_to_py_tuple_value(&mut completed_iter)?; - externs::store_tuple(&[started, completed]) - }); - value.into() + Ok(externs::store_tuple(vec![started, completed]).into()) + }) }) }) } -/// -/// Returns a Handle representing a dictionary where key is metric name string and value is -/// metric value int. -/// -#[no_mangle] -pub extern "C" fn scheduler_metrics( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, -) -> Handle { - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { +fn scheduler_metrics( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, +) -> CPyResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { let mut values = scheduler .metrics(session) .into_iter() @@ -543,301 +717,253 @@ pub extern "C" fn scheduler_metrics( if session.should_record_zipkin_spans() { let workunits = session.workunit_store().get_workunits(); let mut iter = workunits.iter(); - let value = workunits_to_py_tuple_value(&mut iter); + let value = workunits_to_py_tuple_value(&mut iter)?; values.push((externs::store_utf8("engine_workunits"), value)); }; - externs::store_dict(values.as_slice()).into() + externs::store_dict(values).map(|d| d.consume_into_py_object(py)) }) }) } -#[no_mangle] -pub extern "C" fn scheduler_execute( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, - execution_request_ptr: *mut ExecutionRequest, -) -> *const RawNodes { - with_scheduler(scheduler_ptr, |scheduler| { - with_execution_request(execution_request_ptr, |execution_request| { - with_session(session_ptr, |session| { +fn scheduler_execute( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, + execution_request_ptr: PyExecutionRequest, +) -> CPyResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_execution_request(py, execution_request_ptr, |execution_request| { + with_session(py, session_ptr, |session| { // TODO: A parent_id should be an explicit argument. session.workunit_store().init_thread_state(None); - match scheduler.execute(execution_request, session) { - Ok(raw_results) => Box::into_raw(RawNodes::create(raw_results)), - //TODO: Passing a raw null pointer to Python is a less-than-ideal way - //of noting an error condition. When we have a better way to send complicated - //error-signaling values over the FFI boundary, we should revisit this. - Err(ExecutionTermination::KeyboardInterrupt) => std::ptr::null(), - } + py.allow_threads(|| scheduler.execute(execution_request, session)) + .map(|root_results| { + let py_results = root_results + .into_iter() + .map(|rr| py_result_from_root(py, rr).into_object()) + .collect::>(); + PyTuple::new(py, &py_results) + }) + .map_err(|ExecutionTermination::KeyboardInterrupt| { + PyErr::new::(py, NoArgs) + }) }) }) }) } -#[no_mangle] -pub extern "C" fn scheduler_destroy(scheduler_ptr: *mut Scheduler) { - // convert the raw pointer back to a Box (without `forget`ing it) in order to cause it - // to be destroyed at the end of this function. - let _ = unsafe { Box::from_raw(scheduler_ptr) }; -} - -#[no_mangle] -pub extern "C" fn execution_add_root_select( - scheduler_ptr: *mut Scheduler, - execution_request_ptr: *mut ExecutionRequest, - param_vals: HandleBuffer, - product: TypeId, -) -> PyResult { - with_scheduler(scheduler_ptr, |scheduler| { - with_execution_request(execution_request_ptr, |execution_request| { - Params::new(param_vals.to_vec().into_iter().map(externs::key_for)) +fn execution_add_root_select( + py: Python, + scheduler_ptr: PyScheduler, + execution_request_ptr: PyExecutionRequest, + param_vals: Vec, + product: PyType, +) -> PyUnitResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_execution_request(py, execution_request_ptr, |execution_request| { + let product = externs::type_for(py, product); + let keys = param_vals + .into_iter() + .map(|p| externs::key_for(py, p.into())) + .collect::, _>>()?; + Params::new(keys) .and_then(|params| scheduler.add_root_select(execution_request, params, product)) - .into() + .map_err(|e| PyErr::new::(py, (e,))) + .map(|()| None) }) }) } -#[no_mangle] -pub extern "C" fn tasks_create() -> *const Tasks { - // Allocate on the heap via `Box` and return a raw pointer to the boxed value. - Box::into_raw(Box::new(Tasks::new())) -} - -#[no_mangle] -pub extern "C" fn tasks_task_begin( - tasks_ptr: *mut Tasks, - func: Function, - output_type: TypeId, +fn tasks_task_begin( + py: Python, + tasks_ptr: PyTasks, + func: PyObject, + output_type: PyType, cacheable: bool, -) { - with_tasks(tasks_ptr, |tasks| { +) -> PyUnitResult { + with_tasks(py, tasks_ptr, |tasks| { + let func = Function(externs::key_for(py, func.into())?); + let output_type = externs::type_for(py, output_type); tasks.task_begin(func, output_type, cacheable); + Ok(None) }) } -#[no_mangle] -pub extern "C" fn tasks_add_get(tasks_ptr: *mut Tasks, product: TypeId, subject: TypeId) { - with_tasks(tasks_ptr, |tasks| { +fn tasks_add_get(py: Python, tasks_ptr: PyTasks, product: PyType, subject: PyType) -> PyUnitResult { + with_tasks(py, tasks_ptr, |tasks| { + let product = externs::type_for(py, product); + let subject = externs::type_for(py, subject); tasks.add_get(product, subject); + Ok(None) }) } -#[no_mangle] -pub extern "C" fn tasks_add_select(tasks_ptr: *mut Tasks, product: TypeId) { - with_tasks(tasks_ptr, |tasks| { +fn tasks_add_select(py: Python, tasks_ptr: PyTasks, product: PyType) -> PyUnitResult { + with_tasks(py, tasks_ptr, |tasks| { + let product = externs::type_for(py, product); tasks.add_select(product); + Ok(None) }) } -#[no_mangle] -pub extern "C" fn tasks_add_display_info( - tasks_ptr: *mut Tasks, - name_ptr: *const raw::c_char, - desc_ptr: *const raw::c_char, -) { - let name = unsafe { str_ptr_to_string(name_ptr) }; - let desc = unsafe { str_ptr_to_string(desc_ptr) }; - with_tasks(tasks_ptr, |tasks| { +fn tasks_add_display_info( + py: Python, + tasks_ptr: PyTasks, + name: String, + desc: String, +) -> PyUnitResult { + with_tasks(py, tasks_ptr, |tasks| { tasks.add_display_info(name, desc); + Ok(None) }) } -#[no_mangle] -pub extern "C" fn tasks_task_end(tasks_ptr: *mut Tasks) { - with_tasks(tasks_ptr, |tasks| { +fn tasks_task_end(py: Python, tasks_ptr: PyTasks) -> PyUnitResult { + with_tasks(py, tasks_ptr, |tasks| { tasks.task_end(); + Ok(None) }) } -#[no_mangle] -pub extern "C" fn tasks_destroy(tasks_ptr: *mut Tasks) { - let _ = unsafe { Box::from_raw(tasks_ptr) }; -} - -#[no_mangle] -pub extern "C" fn graph_invalidate(scheduler_ptr: *mut Scheduler, paths_buf: BufferBuffer) -> u64 { - with_scheduler(scheduler_ptr, |scheduler| { - let paths = paths_buf - .to_os_strings() - .into_iter() - .map(PathBuf::from) - .collect(); - scheduler.invalidate(&paths) as u64 +fn graph_invalidate(py: Python, scheduler_ptr: PyScheduler, paths: Vec) -> CPyResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + let paths = paths.into_iter().map(PathBuf::from).collect(); + py.allow_threads(|| Ok(scheduler.invalidate(&paths) as u64)) }) } -#[no_mangle] -pub extern "C" fn graph_invalidate_all_paths(scheduler_ptr: *mut Scheduler) -> u64 { - with_scheduler(scheduler_ptr, |scheduler| { - scheduler.invalidate_all_paths() as u64 +fn graph_invalidate_all_paths(py: Python, scheduler_ptr: PyScheduler) -> CPyResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + py.allow_threads(|| Ok(scheduler.invalidate_all_paths() as u64)) }) } -#[no_mangle] -pub extern "C" fn check_invalidation_watcher_liveness(scheduler_ptr: *mut Scheduler) -> bool { - with_scheduler(scheduler_ptr, |scheduler| scheduler.core.watcher.is_alive()) +fn check_invalidation_watcher_liveness(py: Python, scheduler_ptr: PyScheduler) -> CPyResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + Ok(scheduler.core.watcher.is_alive()) + }) } -#[no_mangle] -pub extern "C" fn graph_len(scheduler_ptr: *mut Scheduler) -> u64 { - with_scheduler(scheduler_ptr, |scheduler| scheduler.core.graph.len() as u64) -} +fn decompress_tarball(py: Python, tar_path: String, output_dir: String) -> PyUnitResult { + let tar_path = PathBuf::from(tar_path); + let output_dir = PathBuf::from(output_dir); -#[no_mangle] -pub extern "C" fn decompress_tarball( - tar_path: *const raw::c_char, - output_dir: *const raw::c_char, -) -> PyResult { - let tar_path_str = PathBuf::from( - unsafe { CStr::from_ptr(tar_path) } - .to_string_lossy() - .into_owned(), - ); - let output_dir_str = PathBuf::from( - unsafe { CStr::from_ptr(output_dir) } - .to_string_lossy() - .into_owned(), - ); - - tar_api::decompress_tgz(tar_path_str.as_path(), output_dir_str.as_path()) + // TODO: Need to do a whole lot more releasing of the GIL. + py.allow_threads(|| tar_api::decompress_tgz(tar_path.as_path(), output_dir.as_path())) .map_err(|e| { - format!( - "Failed to untar {:?} to {:?}:\n{:?}", - tar_path_str.as_path(), - output_dir_str.as_path(), + let e = format!( + "Failed to untar {} to {}: {:?}", + tar_path.display(), + output_dir.display(), e - ) + ); + let gil = Python::acquire_gil(); + PyErr::new::(gil.python(), (e,)) }) - .into() + .map(|()| None) +} + +fn graph_len(py: Python, scheduler_ptr: PyScheduler) -> CPyResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + Ok(scheduler.core.graph.len() as u64) + }) } -#[no_mangle] -pub extern "C" fn graph_visualize( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, - path_ptr: *const raw::c_char, -) -> PyResult { - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { - let path_str = unsafe { CStr::from_ptr(path_ptr).to_string_lossy().into_owned() }; - let path = PathBuf::from(path_str); +fn graph_visualize( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, + path: String, +) -> PyUnitResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { + let path = PathBuf::from(path); scheduler .visualize(session, path.as_path()) - .map_err(|e| format!("Failed to visualize to {}: {:?}", path.display(), e)) - .into() + .map_err(|e| { + let e = format!("Failed to visualize to {}: {:?}", path.display(), e); + PyErr::new::(py, (e,)) + }) + .map(|()| None) }) }) } -#[no_mangle] -pub extern "C" fn graph_trace( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, - execution_request_ptr: *mut ExecutionRequest, - path_ptr: *const raw::c_char, -) { - let path_str = unsafe { CStr::from_ptr(path_ptr).to_string_lossy().into_owned() }; - let path = PathBuf::from(path_str); - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { - with_execution_request(execution_request_ptr, |execution_request| { - scheduler - .trace(session, execution_request, path.as_path()) - .unwrap_or_else(|e| { - println!("Failed to write trace to {}: {:?}", path.display(), e); - }); - }); - }); - }); -} - -#[no_mangle] -pub extern "C" fn nodes_destroy(raw_nodes_ptr: *mut RawNodes) { - let _ = unsafe { Box::from_raw(raw_nodes_ptr) }; -} - -#[no_mangle] -pub extern "C" fn session_create( - scheduler_ptr: *mut Scheduler, - should_record_zipkin_spans: bool, - should_render_ui: bool, - build_id: Buffer, - should_report_workunits: bool, -) -> *const Session { - let build_id = build_id - .to_string() - .expect("build_id was not a valid UTF-8 string"); - with_scheduler(scheduler_ptr, |scheduler| { - Box::into_raw(Box::new(Session::new( - scheduler, - should_record_zipkin_spans, - should_render_ui, - build_id, - should_report_workunits, - ))) +fn graph_trace( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, + execution_request_ptr: PyExecutionRequest, + path: String, +) -> PyUnitResult { + let path = PathBuf::from(path); + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { + with_execution_request(py, execution_request_ptr, |execution_request| { + py.allow_threads(|| scheduler.trace(session, execution_request, path.as_path())) + .map_err(|e| { + let e = format!("Failed to write trace to {}: {:?}", path.display(), e); + PyErr::new::(py, (e,)) + }) + .map(|()| None) + }) + }) }) } -#[no_mangle] -pub extern "C" fn session_destroy(ptr: *mut Session) { - let _ = unsafe { Box::from_raw(ptr) }; -} - -#[no_mangle] -pub extern "C" fn execution_request_create() -> *const ExecutionRequest { - Box::into_raw(Box::new(ExecutionRequest::new())) -} - -#[no_mangle] -pub extern "C" fn execution_request_destroy(ptr: *mut ExecutionRequest) { - let _ = unsafe { Box::from_raw(ptr) }; -} - -#[no_mangle] -pub extern "C" fn validator_run(scheduler_ptr: *mut Scheduler) -> PyResult { - with_scheduler(scheduler_ptr, |scheduler| { - scheduler.core.rule_graph.validate().into() +fn validator_run(py: Python, scheduler_ptr: PyScheduler) -> PyUnitResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + scheduler + .core + .rule_graph + .validate() + .map_err(|e| PyErr::new::(py, (e,))) + .map(|()| None) }) } -#[no_mangle] -pub extern "C" fn rule_graph_visualize( - scheduler_ptr: *mut Scheduler, - path_ptr: *const raw::c_char, -) -> PyResult { - with_scheduler(scheduler_ptr, |scheduler| { - let path_str = unsafe { CStr::from_ptr(path_ptr).to_string_lossy().into_owned() }; - let path = PathBuf::from(path_str); +fn rule_graph_visualize(py: Python, scheduler_ptr: PyScheduler, path: String) -> PyUnitResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + let path = PathBuf::from(path); // TODO(#7117): we want to represent union types in the graph visualizer somehow!!! write_to_file(path.as_path(), &scheduler.core.rule_graph) - .map_err(|e| format!("Failed to visualize to {}: {:?}", path.display(), e)) - .into() + .map_err(|e| { + let e = format!("Failed to visualize to {}: {:?}", path.display(), e); + PyErr::new::(py, (e,)) + }) + .map(|()| None) }) } -#[no_mangle] -pub extern "C" fn rule_subgraph_visualize( - scheduler_ptr: *mut Scheduler, - param_types: TypeIdBuffer, - product_type: TypeId, - path_ptr: *const raw::c_char, -) -> PyResult { - with_scheduler(scheduler_ptr, |scheduler| { - let path_str = unsafe { CStr::from_ptr(path_ptr).to_string_lossy().into_owned() }; - let path = PathBuf::from(path_str); +fn rule_subgraph_visualize( + py: Python, + scheduler_ptr: PyScheduler, + param_types: Vec, + product_type: PyType, + path: String, +) -> PyUnitResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + let param_types = param_types + .into_iter() + .map(|pt| externs::type_for(py, pt)) + .collect::>(); + let product_type = externs::type_for(py, product_type); + let path = PathBuf::from(path); // TODO(#7117): we want to represent union types in the graph visualizer somehow!!! - match scheduler + let subgraph = scheduler .core .rule_graph - .subgraph(param_types.to_vec(), product_type) - { - Ok(subgraph) => write_to_file(path.as_path(), &subgraph) - .map_err(|e| format!("Failed to visualize to {}: {:?}", path.display(), e)) - .into(), - e @ Err(_) => e.map(|_| ()).into(), - } + .subgraph(param_types, product_type) + .map_err(|e| PyErr::new::(py, (e,)))?; + + write_to_file(path.as_path(), &subgraph) + .map_err(|e| { + let e = format!("Failed to visualize to {}: {:?}", path.display(), e); + PyErr::new::(py, (e,)) + }) + .map(|()| None) }) } @@ -852,8 +978,7 @@ fn generate_panic_string(payload: &(dyn Any + Send)) -> String { } } -#[no_mangle] -pub extern "C" fn set_panic_handler() { +fn set_panic_handler(_: Python) -> PyUnitResult { panic::set_hook(Box::new(|panic_info| { let payload = panic_info.payload(); let mut panic_str = generate_panic_string(payload); @@ -868,60 +993,55 @@ pub extern "C" fn set_panic_handler() { let panic_file_bug_str = "Please set RUST_BACKTRACE=1, re-run, and then file a bug at https://github.com/pantsbuild/pants/issues."; error!("{}", panic_file_bug_str); })); + Ok(None) } -#[no_mangle] -pub extern "C" fn garbage_collect_store(scheduler_ptr: *mut Scheduler) { - with_scheduler(scheduler_ptr, |scheduler| { - match scheduler.core.store().garbage_collect( - store::DEFAULT_LOCAL_STORE_GC_TARGET_BYTES, - store::ShrinkBehavior::Fast, - ) { - Ok(_) => {} - Err(err) => error!("{}", err), - } - }); +fn garbage_collect_store(py: Python, scheduler_ptr: PyScheduler) -> PyUnitResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + py.allow_threads(|| { + scheduler.core.store().garbage_collect( + store::DEFAULT_LOCAL_STORE_GC_TARGET_BYTES, + store::ShrinkBehavior::Fast, + ) + }) + .map_err(|e| PyErr::new::(py, (e,))) + .map(|()| None) + }) } -#[no_mangle] -pub extern "C" fn lease_files_in_graph(scheduler_ptr: *mut Scheduler, session_ptr: *mut Session) { - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { +fn lease_files_in_graph( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, +) -> PyUnitResult { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { let digests = scheduler.all_digests(session); - match scheduler.core.store().lease_all(digests.iter()) { - Ok(_) => {} - Err(err) => error!("{}", &err), - } + py.allow_threads(|| scheduler.core.store().lease_all(digests.iter())) + .map_err(|e| PyErr::new::(py, (e,))) + .map(|()| None) }) - }); + }) } -#[no_mangle] -pub extern "C" fn match_path_globs(path_globs: Handle, paths_buf: BufferBuffer) -> PyResult { - let path_globs = match nodes::Snapshot::lift_path_globs(&path_globs.into()) { - Ok(path_globs) => path_globs, - Err(msg) => { - let e: Result<(), _> = Err(msg); - return e.into(); - } - }; - - let paths = paths_buf - .to_os_strings() - .into_iter() - .map(PathBuf::from) - .collect::>(); - externs::store_bool(path_globs.matches(&paths)).into() +fn match_path_globs(py: Python, path_globs: PyObject, paths: Vec) -> CPyResult { + let path_globs = nodes::Snapshot::lift_path_globs(&path_globs.into()) + .map_err(|e| PyErr::new::(py, (e,)))?; + + py.allow_threads(|| { + let paths = paths.into_iter().map(PathBuf::from).collect::>(); + Ok(path_globs.matches(&paths)) + }) } -#[no_mangle] -pub extern "C" fn capture_snapshots( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, - path_globs_and_root_tuple_wrapper: Handle, -) -> PyResult { +fn capture_snapshots( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, + path_globs_and_root_tuple_wrapper: PyObject, +) -> CPyResult { let values = externs::project_multi(&path_globs_and_root_tuple_wrapper.into(), "dependencies"); - let path_globs_and_roots_result = values + let path_globs_and_roots = values .iter() .map(|value| { let root = PathBuf::from(externs::project_str(&value, "root")); @@ -937,18 +1057,11 @@ pub extern "C" fn capture_snapshots( }; path_globs.map(|path_globs| (path_globs, root, digest_hint)) }) - .collect::, _>>(); + .collect::, _>>() + .map_err(|e| PyErr::new::(py, (e,)))?; - let path_globs_and_roots = match path_globs_and_roots_result { - Ok(v) => v, - Err(err) => { - let e: Result = Err(err); - return e.into(); - } - }; - - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { // TODO: A parent_id should be an explicit argument. session.workunit_store().init_thread_state(None); let core = scheduler.core.clone(); @@ -965,65 +1078,63 @@ pub extern "C" fn capture_snapshots( digest_hint, ) .await?; - let res: Result<_, String> = Ok(nodes::Snapshot::store_snapshot(&core, &snapshot)); - res + nodes::Snapshot::store_snapshot(&core, &snapshot) } }) .collect::>(); - core.executor.block_on( - future03::try_join_all(snapshot_futures).map_ok(|values| externs::store_tuple(&values)), - ) + py.allow_threads(|| { + core.executor.block_on( + future03::try_join_all(snapshot_futures) + .map_ok(|values| externs::store_tuple(values).into()), + ) + }) + .map_err(|e| PyErr::new::(py, (e,))) }) }) - .into() } -#[no_mangle] -pub extern "C" fn merge_directories( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, - directories_value: Handle, -) -> PyResult { - let digests_result: Result, String> = - externs::project_multi(&directories_value.into(), "dependencies") - .iter() - .map(|v| nodes::lift_digest(v)) - .collect(); - let digests = match digests_result { - Ok(d) => d, - Err(err) => { - let e: Result = Err(err); - return e.into(); - } - }; +fn merge_directories( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, + directories_value: PyObject, +) -> CPyResult { + let digests = externs::project_multi(&directories_value.into(), "dependencies") + .iter() + .map(|v| nodes::lift_digest(v)) + .collect::, _>>() + .map_err(|e| PyErr::new::(py, (e,)))?; - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { // TODO: A parent_id should be an explicit argument. session.workunit_store().init_thread_state(None); - scheduler - .core - .executor - .block_on(store::Snapshot::merge_directories( - scheduler.core.store(), - digests, - )) - .map(|dir| nodes::Snapshot::store_directory(&scheduler.core, &dir)) + py.allow_threads(|| { + scheduler + .core + .executor + .block_on(store::Snapshot::merge_directories( + scheduler.core.store(), + digests, + )) + .map(|dir| nodes::Snapshot::store_directory(&scheduler.core, &dir).into()) + }) + .map_err(|e| PyErr::new::(py, (e,))) }) }) - .into() } -#[no_mangle] -pub extern "C" fn run_local_interactive_process( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, - request: Handle, -) -> PyResult { +fn run_local_interactive_process( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, + request: PyObject, +) -> CPyResult { use std::process; - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { + py.allow_threads(|| session.with_console_ui_disabled(|| { let types = &scheduler.core.types; let construct_interactive_process_result = types.construct_interactive_process_result; @@ -1089,70 +1200,65 @@ pub extern "C" fn run_local_interactive_process( let exit_status = subprocess.wait().map_err(|e| e.to_string())?; let code = exit_status.code().unwrap_or(-1); - let output: Result = Ok(externs::unsafe_call( + Ok(externs::unsafe_call( &construct_interactive_process_result, &[externs::store_i64(i64::from(code))], - )); - output + ).into()) }) + ) }) }) - .into() + .map_err(|e| PyErr::new::(py, (e,))) } -#[no_mangle] -pub extern "C" fn materialize_directories( - scheduler_ptr: *mut Scheduler, - session_ptr: *mut Session, - directories_digests_and_path_prefixes_value: Handle, -) -> PyResult { - let values = externs::project_multi( +fn materialize_directories( + py: Python, + scheduler_ptr: PyScheduler, + session_ptr: PySession, + directories_digests_and_path_prefixes_value: PyObject, +) -> CPyResult { + let digests_and_path_prefixes = externs::project_multi( &directories_digests_and_path_prefixes_value.into(), "dependencies", - ); - let directories_digests_and_path_prefixes_results: Result, String> = - values - .iter() - .map(|value| { - let dir_digest = - nodes::lift_digest(&externs::project_ignoring_type(&value, "directory_digest")); - let path_prefix = PathBuf::from(externs::project_str(&value, "path_prefix")); - dir_digest.map(|dir_digest| (dir_digest, path_prefix)) - }) - .collect(); + ) + .into_iter() + .map(|value| { + let dir_digest = + nodes::lift_digest(&externs::project_ignoring_type(&value, "directory_digest")); + let path_prefix = PathBuf::from(externs::project_str(&value, "path_prefix")); + dir_digest.map(|dir_digest| (dir_digest, path_prefix)) + }) + .collect::, _>>() + .map_err(|e| PyErr::new::(py, (e,)))?; - let digests_and_path_prefixes = match directories_digests_and_path_prefixes_results { - Ok(d) => d, - Err(err) => { - let e: Result = Err(err); - return e.into(); - } - }; - with_scheduler(scheduler_ptr, |scheduler| { - with_session(session_ptr, |session| { + with_scheduler(py, scheduler_ptr, |scheduler| { + with_session(py, session_ptr, |session| { // TODO: A parent_id should be an explicit argument. session.workunit_store().init_thread_state(None); let types = &scheduler.core.types; let construct_materialize_directories_results = types.construct_materialize_directories_results; let construct_materialize_directory_result = types.construct_materialize_directory_result; - future::join_all( - digests_and_path_prefixes - .into_iter() - .map(|(digest, path_prefix)| { - // NB: all DirectoryToMaterialize paths are validated in Python to be relative paths. - // Here, we join them with the build root. - let mut destination = PathBuf::new(); - destination.push(scheduler.core.build_root.clone()); - destination.push(path_prefix); - let metadata = scheduler - .core - .store() - .materialize_directory(destination.clone(), digest); - metadata.map(|m| (destination, m)) - }) - .collect::>(), - ) + + py.allow_threads(|| { + future::join_all( + digests_and_path_prefixes + .into_iter() + .map(|(digest, path_prefix)| { + // NB: all DirectoryToMaterialize paths are validated in Python to be relative paths. + // Here, we join them with the build root. + let mut destination = PathBuf::new(); + destination.push(scheduler.core.build_root.clone()); + destination.push(path_prefix); + let metadata = scheduler + .core + .store() + .materialize_directory(destination.clone(), digest); + metadata.map(|m| (destination, m)) + }) + .collect::>(), + ) + }) .map(move |metadata_list| { let entries: Vec = metadata_list .iter() @@ -1171,86 +1277,88 @@ pub extern "C" fn materialize_directories( externs::unsafe_call( &construct_materialize_directory_result, - &[externs::store_tuple(&path_values)], + &[externs::store_tuple(path_values)], ) }, ) .collect(); - let output: Value = externs::unsafe_call( + externs::unsafe_call( &construct_materialize_directories_results, - &[externs::store_tuple(&entries)], - ); - output + &[externs::store_tuple(entries)], + ) + .into() }) + .map_err(|e| PyErr::new::(py, (e,))) .wait() }) }) - .into() } -// This is called before externs are set up, so we cannot return a PyResult -#[no_mangle] -pub extern "C" fn init_logging(level: u64, show_rust_3rdparty_logs: bool) { +fn init_logging(_: Python, level: u64, show_rust_3rdparty_logs: bool) -> PyUnitResult { Logger::init(level, show_rust_3rdparty_logs); + Ok(None) } -#[no_mangle] -pub extern "C" fn setup_pantsd_logger(log_file_ptr: *const raw::c_char, level: u64) -> PyResult { +fn setup_pantsd_logger(py: Python, log_file: String, level: u64) -> CPyResult { logging::set_thread_destination(Destination::Pantsd); - let path_str = unsafe { CStr::from_ptr(log_file_ptr).to_string_lossy().into_owned() }; - let path = PathBuf::from(path_str); + let path = PathBuf::from(log_file); LOGGER .set_pantsd_logger(path, level) .map(i64::from) - .map(externs::store_i64) - .into() + .map_err(|e| PyErr::new::(py, (e,))) } -// Might be called before externs are set, therefore can't return a PyResult -#[no_mangle] -pub extern "C" fn setup_stderr_logger(level: u64) { +fn setup_stderr_logger(_: Python, level: u64) -> PyUnitResult { logging::set_thread_destination(Destination::Stderr); LOGGER .set_stderr_logger(level) .expect("Error setting up STDERR logger"); + Ok(None) } -// Might be called before externs are set, therefore can't return a PyResult -#[no_mangle] -pub extern "C" fn write_log(msg: *const raw::c_char, level: u64, target: *const raw::c_char) { - let message_str = unsafe { CStr::from_ptr(msg).to_string_lossy() }; - let target_str = unsafe { CStr::from_ptr(target).to_string_lossy() }; - LOGGER - .log_from_python(message_str.borrow(), level, target_str.borrow()) - .expect("Error logging message"); +fn write_log(py: Python, msg: String, level: u64, path: String) -> PyUnitResult { + py.allow_threads(|| { + LOGGER + .log_from_python(&msg, level, &path) + .expect("Error logging message"); + Ok(None) + }) } -#[no_mangle] -pub extern "C" fn write_stdout(session_ptr: *mut Session, msg: *const raw::c_char) { - with_session(session_ptr, |session| { - let message_str = unsafe { CStr::from_ptr(msg).to_string_lossy() }; - session.write_stdout(&message_str); - }); +fn write_stdout(py: Python, session_ptr: PySession, msg: String) -> PyUnitResult { + with_session(py, session_ptr, |session| { + py.allow_threads(|| { + session.write_stdout(&msg); + Ok(None) + }) + }) } -#[no_mangle] -pub extern "C" fn write_stderr(session_ptr: *mut Session, msg: *const raw::c_char) { - with_session(session_ptr, |session| { - let message_str = unsafe { CStr::from_ptr(msg).to_string_lossy() }; - session.write_stderr(&message_str); - }); +fn write_stderr(py: Python, session_ptr: PySession, msg: String) -> PyUnitResult { + with_session(py, session_ptr, |session| { + py.allow_threads(|| { + session.write_stderr(&msg); + Ok(None) + }) + }) } -#[no_mangle] -pub extern "C" fn flush_log() { - LOGGER.flush(); +fn flush_log(py: Python) -> PyUnitResult { + py.allow_threads(|| { + LOGGER.flush(); + Ok(None) + }) } -#[no_mangle] -pub extern "C" fn override_thread_logging_destination(destination: Destination) { +fn override_thread_logging_destination(py: Python, destination: String) -> PyUnitResult { + let destination = destination + .as_str() + .try_into() + .map_err(|e| PyErr::new::(py, (e,)))?; logging::set_thread_destination(destination); + Ok(None) } fn write_to_file(path: &Path, graph: &RuleGraph) -> io::Result<()> { @@ -1259,60 +1367,48 @@ fn write_to_file(path: &Path, graph: &RuleGraph) -> io::Result<()> { graph.visualize(&mut f) } -unsafe fn str_ptr_to_string(ptr: *const raw::c_char) -> String { - CStr::from_ptr(ptr).to_string_lossy().into_owned() -} - /// /// Scheduler and Session are intended to be shared between threads, and so their context /// methods provide immutable references. The remaining types are not intended to be shared /// between threads, so mutable access is provided. /// -fn with_scheduler(scheduler_ptr: *mut Scheduler, f: F) -> T +fn with_scheduler(py: Python, scheduler_ptr: PyScheduler, f: F) -> T where F: FnOnce(&Scheduler) -> T, { - let scheduler = unsafe { Box::from_raw(scheduler_ptr) }; - let t = scheduler.core.runtime.enter(|| f(&scheduler)); - mem::forget(scheduler); - t + let scheduler = scheduler_ptr.scheduler(py); + scheduler.core.runtime.enter(|| f(scheduler)) } /// /// See `with_scheduler`. /// -fn with_session(session_ptr: *mut Session, f: F) -> T +fn with_session(py: Python, session_ptr: PySession, f: F) -> T where F: FnOnce(&Session) -> T, { - let session = unsafe { Box::from_raw(session_ptr) }; - let t = f(&session); - mem::forget(session); - t + let session = session_ptr.session(py); + f(&session) } /// /// See `with_scheduler`. /// -fn with_execution_request(execution_request_ptr: *mut ExecutionRequest, f: F) -> T +fn with_execution_request(py: Python, execution_request_ptr: PyExecutionRequest, f: F) -> T where F: FnOnce(&mut ExecutionRequest) -> T, { - let mut execution_request = unsafe { Box::from_raw(execution_request_ptr) }; - let t = f(&mut execution_request); - mem::forget(execution_request); - t + let mut execution_request = execution_request_ptr.execution_request(py).borrow_mut(); + f(&mut execution_request) } /// /// See `with_scheduler`. /// -fn with_tasks(tasks_ptr: *mut Tasks, f: F) -> T +fn with_tasks(py: Python, tasks_ptr: PyTasks, f: F) -> T where F: FnOnce(&mut Tasks) -> T, { - let mut tasks = unsafe { Box::from_raw(tasks_ptr) }; - let t = f(&mut tasks); - mem::forget(tasks); - t + let mut tasks = tasks_ptr.tasks(py).borrow_mut(); + f(&mut tasks) } diff --git a/src/rust/engine/logging/src/logger.rs b/src/rust/engine/logging/src/logger.rs index d01dbfc5c0c..0b07f7c8705 100644 --- a/src/rust/engine/logging/src/logger.rs +++ b/src/rust/engine/logging/src/logger.rs @@ -5,7 +5,7 @@ use crate::PythonLogLevel; use std::cell::RefCell; use std::collections::HashMap; -use std::convert::TryInto; +use std::convert::{TryFrom, TryInto}; use std::fs::File; use std::fs::OpenOptions; use std::future::Future; @@ -271,6 +271,17 @@ pub enum Destination { Stderr, } +impl TryFrom<&str> for Destination { + type Error = String; + fn try_from(dest: &str) -> Result { + match dest { + "pantsd" => Ok(Destination::Pantsd), + "stderr" => Ok(Destination::Stderr), + other => Err(format!("Unknown log destination: {:?}", other)), + } + } +} + thread_local! { static THREAD_DESTINATION: RefCell = RefCell::new(Destination::Stderr) } diff --git a/src/rust/engine/src/context.rs b/src/rust/engine/src/context.rs index c198f7c120a..62f4bb55c45 100644 --- a/src/rust/engine/src/context.rs +++ b/src/rust/engine/src/context.rs @@ -12,7 +12,6 @@ use futures::compat::Future01CompatExt; use futures01::Future; use crate::core::{Failure, TypeId}; -use crate::handles::maybe_drop_handles; use crate::intrinsics::Intrinsics; use crate::nodes::{NodeKey, WrappedNode}; use crate::scheduler::Session; @@ -315,8 +314,6 @@ impl Context { /// Get the future value for the given Node implementation. /// pub fn get(&self, node: N) -> BoxFuture { - // TODO: Odd place for this... could do it periodically in the background? - maybe_drop_handles(); let result = if let Some(entry_id) = self.entry_id { self.core.graph.get(entry_id, self, node.into()).to_boxed() } else { diff --git a/src/rust/engine/src/core.rs b/src/rust/engine/src/core.rs index e51e3683ed0..bf66834078c 100644 --- a/src/rust/engine/src/core.rs +++ b/src/rust/engine/src/core.rs @@ -8,8 +8,8 @@ use std::sync::Arc; use std::{fmt, hash}; use crate::externs; -use crate::handles::Handle; +use cpython::{FromPyObject, PyClone, PyDict, PyErr, PyObject, PyResult, Python}; use rule_graph; use smallvec::{smallvec, SmallVec}; @@ -123,11 +123,7 @@ pub struct TypeId(pub Id); impl TypeId { fn pretty_print(self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if self == ANY_TYPE { - write!(f, "Any") - } else { - write!(f, "{}", externs::type_to_str(self)) - } + write!(f, "{}", externs::type_to_str(self)) } } @@ -155,9 +151,6 @@ impl fmt::Display for TypeId { } } -// On the python side, the 0th type id is used as an anonymous id -pub const ANY_TYPE: TypeId = TypeId(0); - // An identifier for a python function. #[repr(C)] #[derive(Clone, Copy, Eq, Hash, PartialEq)] @@ -166,11 +159,12 @@ pub struct Function(pub Key); impl Function { pub fn name(&self) -> String { let Function(key) = self; - let module = externs::project_str(&externs::val_for(&key), "__module__"); - let name = externs::project_str(&externs::val_for(&key), "__name__"); + let val = externs::val_for(&key); + let module = externs::project_str(&val, "__module__"); + let name = externs::project_str(&val, "__name__"); // NB: this is a custom dunder method that Python code should populate before sending the // function (e.g. an `@rule`) through FFI. - let line_number = externs::project_str(&externs::val_for(&key), "__line_number__"); + let line_number = externs::project_u64(&val, "__line_number__"); format!("{}:{}:{}", module, line_number, name) } } @@ -188,7 +182,7 @@ impl fmt::Debug for Function { } /// -/// Wraps a type id for use as a key in HashMaps and sets. +/// An interned key for a Value for use as a key in HashMaps and sets. /// #[repr(C)] #[derive(Clone, Copy)] @@ -238,21 +232,38 @@ impl Key { } /// -/// A wrapper around a Arc +/// We wrap PyObject (which cannot be cloned without acquiring the GIL) in an Arc in order to avoid +/// accessing the Gil in many cases. /// -#[derive(Clone, Eq, PartialEq)] -pub struct Value(Arc); +#[derive(Clone)] +pub struct Value(Arc); impl Value { - pub fn new(handle: Handle) -> Value { + pub fn new(handle: PyObject) -> Value { Value(Arc::new(handle)) } + + // NB: Longer name because overloaded in a few places. + pub fn consume_into_py_object(self, py: Python) -> PyObject { + match Arc::try_unwrap(self.0) { + Ok(handle) => handle, + Err(arc_handle) => arc_handle.clone_ref(py), + } + } } +impl PartialEq for Value { + fn eq(&self, other: &Value) -> bool { + externs::equals(&self.0, &other.0) + } +} + +impl Eq for Value {} + impl Deref for Value { - type Target = Handle; + type Target = PyObject; - fn deref(&self) -> &Handle { + fn deref(&self) -> &PyObject { &self.0 } } @@ -263,21 +274,26 @@ impl fmt::Debug for Value { } } -/// -/// Creates a Handle (which represents exclusive access) from a Value (which might be shared), -/// cloning if necessary. -/// -impl From for Handle { +impl FromPyObject<'_> for Value { + fn extract(py: Python, obj: &PyObject) -> PyResult { + Ok(obj.clone_ref(py).into()) + } +} + +impl From for PyObject { fn from(value: Value) -> Self { match Arc::try_unwrap(value.0) { Ok(handle) => handle, - Err(arc_handle) => externs::clone_val(&arc_handle), + Err(arc_handle) => { + let gil = Python::acquire_gil(); + arc_handle.clone_ref(gil.python()) + } } } } -impl From for Value { - fn from(handle: Handle) -> Self { +impl From for Value { + fn from(handle: PyObject) -> Self { Value::new(handle) } } @@ -291,6 +307,33 @@ pub enum Failure { Throw(Value, String), } +impl Failure { + pub fn from_py_err(py: Python, mut py_err: PyErr) -> Failure { + let instance = Value::from(py_err.instance(py)); + let traceback = if let Some(tb) = py_err.ptraceback.as_ref() { + let locals = PyDict::new(py); + locals + .set_item(py, "traceback", py.import("traceback").unwrap()) + .unwrap(); + locals.set_item(py, "tb", tb).unwrap(); + py.eval("''.join(traceback.format_tb(tb))", None, Some(&locals)) + .unwrap() + .extract::(py) + .unwrap() + } else { + Self::native_traceback(&externs::val_to_str(&instance)) + }; + Failure::Throw(instance, traceback) + } + + pub fn native_traceback(msg: &str) -> String { + format!( + "Traceback (no traceback):\n \nException: {}", + msg + ) + } +} + impl fmt::Display for Failure { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -303,9 +346,6 @@ impl fmt::Display for Failure { pub fn throw(msg: &str) -> Failure { Failure::Throw( externs::create_exception(msg), - format!( - "Traceback (no traceback):\n \nException: {}", - msg - ), + Failure::native_traceback(msg), ) } diff --git a/src/rust/engine/src/externs.rs b/src/rust/engine/src/externs.rs index 2072aa1221a..80f39b932a6 100644 --- a/src/rust/engine/src/externs.rs +++ b/src/rust/engine/src/externs.rs @@ -1,178 +1,187 @@ -// Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). +// Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). // Licensed under the Apache License, Version 2.0 (see LICENSE). -use std::ffi::OsStr; -use std::ffi::OsString; +// File-specific allowances to silence internal warnings of `py_class!`. +#![allow( + clippy::used_underscore_binding, + clippy::transmute_ptr_to_ptr, + clippy::zero_ptr +)] + +use std::cell::RefCell; +use std::collections::BTreeMap; use std::fmt; -use std::mem; -use std::os::raw; -use std::os::unix::ffi::{OsStrExt, OsStringExt}; -use std::string::FromUtf8Error; use crate::core::{Failure, Function, Key, TypeId, Value}; -use crate::handles::{DroppingHandle, Handle}; use crate::interning::Interns; + +use cpython::{ + py_class, CompareOp, FromPyObject, GILProtected, ObjectProtocol, PyBool, PyBytes, PyClone, + PyDict, PyErr, PyObject, PyResult as CPyResult, PyString, PyTuple, PyType, Python, PythonObject, + ToPyObject, +}; use itertools::Itertools; use lazy_static::lazy_static; -use parking_lot::RwLock; -use std::collections::BTreeMap; /// Return the Python value None. -pub fn none() -> Handle { - with_externs(|e| (e.clone_val)(e.context, &e.none)) +pub fn none() -> PyObject { + let gil = Python::acquire_gil(); + gil.python().None() } pub fn get_value_from_type_id(ty: TypeId) -> Value { - with_externs(|e| { - let handle = (e.get_handle_from_type_id)(e.context, ty); - Value::new(handle) - }) + let gil = Python::acquire_gil(); + let py = gil.python(); + let interns = INTERNS.get(py).borrow(); + Value::from(interns.type_get(&ty).clone_ref(py).into_object()) } pub fn get_type_for(val: &Value) -> TypeId { - with_externs(|e| (e.get_type_for)(e.context, val as &Handle)) + let gil = Python::acquire_gil(); + let py = gil.python(); + let py_type = val.get_type(py); + let mut interns = INTERNS.get(py).borrow_mut(); + interns.type_insert(py, py_type) } pub fn is_union(ty: TypeId) -> bool { - with_externs(|e| (e.is_union)(e.context, ty)) -} - -pub fn identify(val: &Value) -> Ident { - with_externs(|e| (e.identify)(e.context, val as &Handle)) + with_externs(|py, e| { + let interns = INTERNS.get(py).borrow(); + let py_type = interns.type_get(&ty); + e.call_method(py, "is_union", (py_type,), None) + .unwrap() + .cast_as::(py) + .unwrap() + .is_true() + }) } -pub fn equals(h1: &Handle, h2: &Handle) -> bool { - with_externs(|e| (e.equals)(e.context, h1, h2)) +pub fn equals(h1: &PyObject, h2: &PyObject) -> bool { + let gil = Python::acquire_gil(); + h1.rich_compare(gil.python(), h2, CompareOp::Eq) + .unwrap() + .cast_as::(gil.python()) + .unwrap() + .is_true() } -pub fn key_for(val: Value) -> Key { - let mut interns = INTERNS.write(); - interns.insert(val) +pub fn type_for(py: Python, py_type: PyType) -> TypeId { + let mut interns = INTERNS.get(py).borrow_mut(); + interns.type_insert(py, py_type) } -pub fn val_for(key: &Key) -> Value { - let interns = INTERNS.read(); - interns.get(key).clone() +pub fn acquire_key_for(val: Value) -> Result { + let gil = Python::acquire_gil(); + let py = gil.python(); + key_for(py, val).map_err(|e| Failure::from_py_err(py, e)) } -pub fn clone_val(handle: &Handle) -> Handle { - with_externs(|e| (e.clone_val)(e.context, handle)) +pub fn key_for(py: Python, val: Value) -> Result { + let mut interns = INTERNS.get(py).borrow_mut(); + interns.key_insert(py, val) } -pub fn drop_handles(handles: &[DroppingHandle]) { - with_externs(|e| (e.drop_handles)(e.context, handles.as_ptr(), handles.len() as u64)) +pub fn val_for(key: &Key) -> Value { + let gil = Python::acquire_gil(); + let interns = INTERNS.get(gil.python()).borrow(); + interns.key_get(key).clone() } -pub fn store_tuple(values: &[Value]) -> Value { - let handles: Vec<_> = values - .iter() - .map(|v| v as &Handle as *const Handle) +pub fn store_tuple(values: Vec) -> Value { + let gil = Python::acquire_gil(); + let arg_handles: Vec<_> = values + .into_iter() + .map(|v| v.consume_into_py_object(gil.python())) .collect(); - with_externs(|e| (e.store_tuple)(e.context, handles.as_ptr(), handles.len() as u64).into()) -} - -#[allow(dead_code)] -pub fn store_set>(values: I) -> Value { - let handles: Vec<_> = values.map(|v| &v as &Handle as *const Handle).collect(); - with_externs(|e| (e.store_set)(e.context, handles.as_ptr(), handles.len() as u64).into()) + Value::from(PyTuple::new(gil.python(), &arg_handles).into_object()) } /// Store a slice containing 2-tuples of (key, value) as a Python dictionary. -pub fn store_dict(keys_and_values: &[(Value, Value)]) -> Value { - let handles: Vec<_> = keys_and_values - .iter() - .flat_map(|(k, v)| vec![k, v].into_iter()) - .map(|v| v as &Handle as *const Handle) - .collect(); - with_externs(|e| (e.store_dict)(e.context, handles.as_ptr(), handles.len() as u64).into()) +pub fn store_dict(keys_and_values: Vec<(Value, Value)>) -> Result { + let gil = Python::acquire_gil(); + let py = gil.python(); + let dict = PyDict::new(py); + for (k, v) in keys_and_values { + dict.set_item( + gil.python(), + k.consume_into_py_object(py), + v.consume_into_py_object(py), + )?; + } + Ok(Value::from(dict.into_object())) } /// /// Store an opqaue buffer of bytes to pass to Python. This will end up as a Python `bytes`. /// pub fn store_bytes(bytes: &[u8]) -> Value { - with_externs(|e| (e.store_bytes)(e.context, bytes.as_ptr(), bytes.len() as u64).into()) + let gil = Python::acquire_gil(); + Value::from(PyBytes::new(gil.python(), bytes).into_object()) } /// /// Store an buffer of utf8 bytes to pass to Python. This will end up as a Python `unicode`. /// pub fn store_utf8(utf8: &str) -> Value { - with_externs(|e| (e.store_utf8)(e.context, utf8.as_ptr(), utf8.len() as u64).into()) -} - -/// -/// Store a buffer of utf8 bytes to pass to Python. This will end up as a Python `unicode`. -/// -#[cfg(unix)] -pub fn store_utf8_osstr(utf8: &OsStr) -> Value { - let bytes = utf8.as_bytes(); - with_externs(|e| (e.store_utf8)(e.context, bytes.as_ptr(), bytes.len() as u64).into()) + let gil = Python::acquire_gil(); + Value::from(utf8.to_py_object(gil.python()).into_object()) } pub fn store_u64(val: u64) -> Value { - with_externs(|e| (e.store_u64)(e.context, val).into()) + let gil = Python::acquire_gil(); + Value::from(val.to_py_object(gil.python()).into_object()) } pub fn store_i64(val: i64) -> Value { - with_externs(|e| (e.store_i64)(e.context, val).into()) + let gil = Python::acquire_gil(); + Value::from(val.to_py_object(gil.python()).into_object()) } -#[allow(dead_code)] -pub fn store_f64(val: f64) -> Value { - with_externs(|e| (e.store_f64)(e.context, val).into()) +pub fn store_bool(val: bool) -> Value { + let gil = Python::acquire_gil(); + Value::from(val.to_py_object(gil.python()).into_object()) } -#[allow(dead_code)] -pub fn store_bool(val: bool) -> Value { - with_externs(|e| (e.store_bool)(e.context, val).into()) +/// +/// Gets an attribute of the given value as the given type. +/// +pub fn getattr(value: &Value, field: &str) -> Result +where + for<'a> T: FromPyObject<'a>, +{ + let gil = Python::acquire_gil(); + let py = gil.python(); + value + .getattr(py, field) + .map_err(|e| format!("Could not get field `{}`: {:?}", field, e))? + .extract::(py) + .map_err(|e| { + format!( + "Field `{}` was not convertible to type {}: {:?}", + field, + core::any::type_name::(), + e + ) + }) } /// /// Pulls out the value specified by the field name from a given Value /// pub fn project_ignoring_type(value: &Value, field: &str) -> Value { - with_externs(|e| { - (e.project_ignoring_type)( - e.context, - value as &Handle, - field.as_ptr(), - field.len() as u64, - ) - .into() - }) + getattr(value, field).unwrap() } pub fn project_multi(value: &Value, field: &str) -> Vec { - with_externs(|e| { - (e.project_multi)( - e.context, - value as &Handle, - field.as_ptr(), - field.len() as u64, - ) - .to_vec() - }) + getattr(value, field).unwrap() } pub fn project_bool(value: &Value, field: &str) -> bool { - with_externs(|e| { - let named_val: Value = (e.project_ignoring_type)( - e.context, - value as &Handle, - field.as_ptr(), - field.len() as u64, - ) - .into(); - (e.val_to_bool)(e.context, &named_val as &Handle) - }) + getattr(value, field).unwrap() } -pub fn project_multi_strs(item: &Value, field: &str) -> Vec { - project_multi(item, field) - .iter() - .map(|v| val_to_str(v)) - .collect() +pub fn project_multi_strs(value: &Value, field: &str) -> Vec { + getattr(value, field).unwrap() } // This is intended for projecting environment variable maps - i.e. Python Dict[str, str] that are @@ -190,29 +199,25 @@ pub fn project_tuple_encoded_map( } pub fn project_str(value: &Value, field: &str) -> String { - let name_val = with_externs(|e| { - (e.project_ignoring_type)( - e.context, - value as &Handle, - field.as_ptr(), - field.len() as u64, - ) - .into() - }); - val_to_str(&name_val) + // TODO: It's possible to view a python string as a `Cow`, so we could avoid actually + // cloning in some cases. + // TODO: We can't directly extract as a string here, because val_to_str defaults to empty string + // for None. + val_to_str(&getattr(value, field).unwrap()) +} + +pub fn project_u64(value: &Value, field: &str) -> u64 { + getattr(value, field).unwrap() +} + +pub fn project_f64(value: &Value, field: &str) -> f64 { + getattr(value, field).unwrap() } pub fn project_bytes(value: &Value, field: &str) -> Vec { - let name_val = with_externs(|e| { - (e.project_ignoring_type)( - e.context, - value as &Handle, - field.as_ptr(), - field.len() as u64, - ) - .into() - }); - val_to_bytes(&name_val) + // TODO: It's possible to view a python bytes as a `&[u8]`, so we could avoid actually + // cloning in some cases. + getattr(value, field).unwrap() } pub fn key_to_str(key: &Key) -> String { @@ -220,79 +225,81 @@ pub fn key_to_str(key: &Key) -> String { } pub fn type_to_str(type_id: TypeId) -> String { - with_externs(|e| { - (e.type_to_str)(e.context, type_id) - .to_string() - .unwrap_or_else(|e| format!("", type_id, e)) - }) -} - -pub fn val_to_bytes(val: &Value) -> Vec { - with_externs(|e| (e.val_to_bytes)(e.context, val as &Handle).to_bytes()) + project_str(&get_value_from_type_id(type_id), "__name__") } pub fn val_to_str(val: &Value) -> String { - with_externs(|e| { - (e.val_to_str)(e.context, val as &Handle) - .to_string() - .unwrap_or_else(|e| format!("", val, e)) + // TODO: to_string(py) returns a Cow, so we could avoid actually cloning in some cases. + with_externs(|py, e| { + e.call_method(py, "val_to_str", (val as &PyObject,), None) + .unwrap() + .cast_as::(py) + .unwrap() + .to_string(py) + .map(|cow| cow.into_owned()) + .unwrap() }) } pub fn create_exception(msg: &str) -> Value { - with_externs(|e| (e.create_exception)(e.context, msg.as_ptr(), msg.len() as u64).into()) + Value::from(with_externs(|py, e| e.call_method(py, "create_exception", (msg,), None)).unwrap()) } pub fn call_method(value: &Value, method: &str, args: &[Value]) -> Result { - call(&project_ignoring_type(&value, method), args) + let arg_handles: Vec = args.iter().map(|v| v.clone().into()).collect(); + let gil = Python::acquire_gil(); + let args_tuple = PyTuple::new(gil.python(), &arg_handles); + value + .call_method(gil.python(), method, args_tuple, None) + .map(Value::from) + .map_err(|py_err| Failure::from_py_err(gil.python(), py_err)) } pub fn call(func: &Value, args: &[Value]) -> Result { - let arg_handles: Vec<_> = args.iter().map(|v| v as &Handle as *const Handle).collect(); - with_externs(|e| { - (e.call)( - e.context, - func as &Handle, - arg_handles.as_ptr(), - args.len() as u64, - ) - }) - .into() + let arg_handles: Vec = args.iter().map(|v| v.clone().into()).collect(); + let gil = Python::acquire_gil(); + let args_tuple = PyTuple::new(gil.python(), &arg_handles); + func + .call(gil.python(), args_tuple, None) + .map(Value::from) + .map_err(|py_err| Failure::from_py_err(gil.python(), py_err)) } pub fn generator_send(generator: &Value, arg: &Value) -> Result { - let response = - with_externs(|e| (e.generator_send)(e.context, generator as &Handle, arg as &Handle)); - match response { - PyGeneratorResponse::Broke(h) => Ok(GeneratorResponse::Break(Value::new(h))), - PyGeneratorResponse::Throw(h) => Err(PyResult::failure_from(Value::new(h))), - PyGeneratorResponse::Get(product, handle, ident, declared_subject) => { - let mut interns = INTERNS.write(); - let g = Get { - product, - subject: interns.insert_with(Value::new(handle), ident), - declared_subject: Some(declared_subject), - }; - Ok(GeneratorResponse::Get(g)) - } - PyGeneratorResponse::GetMulti(products, handles, identities) => { - let mut interns = INTERNS.write(); - let products = products.to_vec(); - let identities = identities.to_vec(); - let values = handles.to_vec(); - assert_eq!(products.len(), values.len()); - let gets: Vec = products - .into_iter() - .zip(values.into_iter()) - .zip(identities.into_iter()) - .map(|((p, v), i)| Get { - product: p, - subject: interns.insert_with(v, i), - declared_subject: None, - }) - .collect(); - Ok(GeneratorResponse::GetMulti(gets)) - } + let response = with_externs(|py, e| { + e.call_method( + py, + "generator_send", + (generator as &PyObject, arg as &PyObject), + None, + ) + .map_err(|py_err| Failure::from_py_err(py, py_err)) + })?; + + let gil = Python::acquire_gil(); + let py = gil.python(); + if let Ok(b) = response.cast_as::(py) { + Ok(GeneratorResponse::Break(Value::new( + b.val(py).clone_ref(py), + ))) + } else if let Ok(get) = response.cast_as::(py) { + let mut interns = INTERNS.get(py).borrow_mut(); + Ok(GeneratorResponse::Get(Get::new(py, &mut interns, get)?)) + } else if let Ok(get_multi) = response.cast_as::(py) { + let mut interns = INTERNS.get(py).borrow_mut(); + let gets = get_multi + .gets(py) + .iter(py) + .map(|g| { + let get = g + .cast_as::(py) + .map_err(|e| Failure::from_py_err(py, e.into()))?; + Ok(Get::new(py, &mut interns, get)?) + }) + .collect::, _>>()?; + Ok(GeneratorResponse::GetMulti(gets)) + } else { + panic!("generator_send returned unrecognized type: {:?}", response); } } @@ -301,200 +308,64 @@ pub fn generator_send(generator: &Value, arg: &Value) -> Result Value { - let interns = INTERNS.read(); - let func_val = interns.get(&func.0); - call(func_val, args).unwrap_or_else(|e| { - panic!("Core function `{}` failed: {:?}", val_to_str(func_val), e); + let func_val = { + let gil = Python::acquire_gil(); + let interns = INTERNS.get(gil.python()).borrow(); + interns.key_get(&func.0).clone() + }; + call(&func_val, args).unwrap_or_else(|e| { + panic!("Core function `{}` failed: {:?}", val_to_str(&func_val), e); }) } -///////////////////////////////////////////////////////////////////////////////////////// -// The remainder of this file deals with the static initialization of the Externs. -///////////////////////////////////////////////////////////////////////////////////////// - lazy_static! { - // NB: Unfortunately, it's not currently possible to merge these locks, because mutating - // the `Interns` requires calls to extern functions, which would be re-entrant. - static ref EXTERNS: RwLock> = RwLock::new(None); - static ref INTERNS: RwLock = RwLock::new(Interns::new()); + static ref EXTERNS: GILProtected> = GILProtected::new(RefCell::new({ + // See set_externs. + none() + })); + static ref INTERNS: GILProtected> = GILProtected::new(RefCell::new(Interns::new())); } /// /// Set the static Externs for this process. All other methods of this module will fail /// until this has been called. /// -pub fn set_externs(externs: Externs) { - let mut externs_ref = EXTERNS.write(); - *externs_ref = Some(externs); +pub fn set_externs(externs: PyObject) { + let gil = Python::acquire_gil(); + EXTERNS.get(gil.python()).replace(externs); } fn with_externs(f: F) -> T where - F: FnOnce(&Externs) -> T, + F: FnOnce(Python, &PyObject) -> T, { - let externs_opt = EXTERNS.read(); - let externs = externs_opt - .as_ref() - .unwrap_or_else(|| panic!("externs used before static initialization.")); - f(externs) -} - -// An opaque pointer to a context used by the extern functions. -pub type ExternContext = raw::c_void; - -pub struct Externs { - pub context: *const ExternContext, - pub log_level: u8, - pub none: Handle, - pub call: CallExtern, - pub generator_send: GeneratorSendExtern, - pub get_type_for: GetTypeForExtern, - pub get_handle_from_type_id: GetHandleFromTypeIdExtern, - pub is_union: IsUnionExtern, - pub identify: IdentifyExtern, - pub equals: EqualsExtern, - pub clone_val: CloneValExtern, - pub drop_handles: DropHandlesExtern, - pub store_tuple: StoreTupleExtern, - pub store_set: StoreTupleExtern, - pub store_dict: StoreTupleExtern, - pub store_bytes: StoreBytesExtern, - pub store_utf8: StoreUtf8Extern, - pub store_u64: StoreU64Extern, - pub store_i64: StoreI64Extern, - pub store_f64: StoreF64Extern, - pub store_bool: StoreBoolExtern, - pub project_ignoring_type: ProjectIgnoringTypeExtern, - pub project_multi: ProjectMultiExtern, - pub type_to_str: TypeToStrExtern, - pub val_to_bytes: ValToBytesExtern, - pub val_to_str: ValToStrExtern, - pub val_to_bool: ValToBoolExtern, - pub create_exception: CreateExceptionExtern, -} - -// The pointer to the context is safe for sharing between threads. -unsafe impl Sync for Externs {} -unsafe impl Send for Externs {} - -pub type GetTypeForExtern = extern "C" fn(*const ExternContext, *const Handle) -> TypeId; - -pub type GetHandleFromTypeIdExtern = extern "C" fn(*const ExternContext, TypeId) -> Handle; - -pub type IsUnionExtern = extern "C" fn(*const ExternContext, TypeId) -> bool; - -pub type IdentifyExtern = extern "C" fn(*const ExternContext, *const Handle) -> Ident; - -pub type EqualsExtern = extern "C" fn(*const ExternContext, *const Handle, *const Handle) -> bool; - -pub type CloneValExtern = extern "C" fn(*const ExternContext, *const Handle) -> Handle; - -pub type DropHandlesExtern = extern "C" fn(*const ExternContext, *const DroppingHandle, u64); - -pub type StoreTupleExtern = - extern "C" fn(*const ExternContext, *const *const Handle, u64) -> Handle; - -pub type StoreBytesExtern = extern "C" fn(*const ExternContext, *const u8, u64) -> Handle; - -pub type StoreUtf8Extern = extern "C" fn(*const ExternContext, *const u8, u64) -> Handle; - -pub type StoreU64Extern = extern "C" fn(*const ExternContext, u64) -> Handle; - -pub type StoreI64Extern = extern "C" fn(*const ExternContext, i64) -> Handle; - -pub type StoreF64Extern = extern "C" fn(*const ExternContext, f64) -> Handle; - -pub type StoreBoolExtern = extern "C" fn(*const ExternContext, bool) -> Handle; - -/// -/// NB: When a PyResult is handed from Python to Rust, the Rust side destroys the handle. But when -/// it is passed from Rust to Python, Python must destroy the handle. -/// -#[repr(C)] -pub struct PyResult { - is_throw: bool, - handle: Handle, + let gil = Python::acquire_gil(); + let externs = EXTERNS.get(gil.python()).borrow(); + f(gil.python(), &externs) } -impl PyResult { - fn failure_from(v: Value) -> Failure { - let traceback = project_str(&v, "_formatted_exc"); - Failure::Throw(v, traceback) - } -} - -impl From for PyResult { - fn from(val: Value) -> Self { - PyResult { - is_throw: false, - handle: val.into(), +py_class!(pub class PyGeneratorResponseBreak |py| { + data val: PyObject; + def __new__(_cls, val: PyObject) -> CPyResult { + Self::create_instance(py, val) } - } -} - -impl From> for PyResult { - fn from(result: Result) -> Self { - match result { - Ok(val) => val.into(), - Err(f) => { - let val = match f { - f @ Failure::Invalidated => create_exception(&format!("{}", f)), - Failure::Throw(exc, _) => exc, - }; - PyResult { - is_throw: true, - handle: val.into(), - } - } +}); + +py_class!(pub class PyGeneratorResponseGet |py| { + data product: PyType; + data declared_subject: PyType; + data subject: PyObject; + def __new__(_cls, product: PyType, declared_subject: PyType, subject: PyObject) -> CPyResult { + Self::create_instance(py, product, declared_subject, subject) } - } -} - -impl From for Result { - fn from(result: PyResult) -> Self { - let value = result.handle.into(); - if result.is_throw { - Err(PyResult::failure_from(value)) - } else { - Ok(value) - } - } -} +}); -impl From> for PyResult { - fn from(res: Result) -> Self { - match res { - Ok(v) => PyResult { - is_throw: false, - handle: v.into(), - }, - Err(msg) => PyResult { - is_throw: true, - handle: create_exception(&msg).into(), - }, +py_class!(pub class PyGeneratorResponseGetMulti |py| { + data gets: PyTuple; + def __new__(_cls, gets: PyTuple) -> CPyResult { + Self::create_instance(py, gets) } - } -} - -impl From> for PyResult { - fn from(res: Result<(), String>) -> Self { - PyResult::from(res.map(|()| Value::from(none()))) - } -} - -/// -/// The response from a call to extern_generator_send. Gets include Idents for their Handles -/// in order to avoid roundtripping to intern them, and to eagerly trigger errors for unhashable -/// types on the python side where possible. -/// -#[repr(C)] -pub enum PyGeneratorResponse { - Get(TypeId, Handle, Ident, TypeId), - GetMulti(TypeIdBuffer, HandleBuffer, IdentBuffer), - // NB: Broke not Break because C keyword. - Broke(Handle), - Throw(Handle), -} +}); #[derive(Debug)] pub struct Get { @@ -503,6 +374,18 @@ pub struct Get { pub declared_subject: Option, } +impl Get { + fn new(py: Python, interns: &mut Interns, get: &PyGeneratorResponseGet) -> Result { + Ok(Get { + product: interns.type_insert(py, get.product(py).clone_ref(py)), + subject: interns + .key_insert(py, get.subject(py).clone_ref(py).into()) + .map_err(|e| Failure::from_py_err(py, e))?, + declared_subject: Some(interns.type_insert(py, get.declared_subject(py).clone_ref(py))), + }) + } +} + impl fmt::Display for Get { fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { write!( @@ -519,230 +402,3 @@ pub enum GeneratorResponse { Get(Get), GetMulti(Vec), } - -/// -/// The result of an `identify` call, including the __hash__ of a Handle and a TypeId representing -/// the object's type. -/// -#[repr(C)] -#[derive(Clone, Copy)] -pub struct Ident { - pub hash: i64, - pub type_id: TypeId, -} - -pub trait RawBuffer { - fn ptr(&self) -> *mut Raw; - fn len(&self) -> u64; - - /// - /// A buffer-specific shallow clone operation (possibly just implemented via clone). - /// - fn lift(t: &Raw) -> Output; - - /// - /// Returns a Vec copy of the buffer contents. - /// - fn to_vec(&self) -> Vec { - with_vec(self.ptr(), self.len() as usize, |vec| { - vec.iter().map(Self::lift).collect() - }) - } - - /// - /// Asserts that the buffer contains one item, and returns a copy of it. - /// - fn unwrap_one(&self) -> Output { - assert!( - self.len() == 1, - "Expected exactly 1 item in Buffer, but had: {}", - self.len() - ); - with_vec(self.ptr(), self.len() as usize, |vec| Self::lift(&vec[0])) - } -} - -/// -/// Points to an array containing a series of values allocated by Python. -/// -/// TODO: An interesting optimization might be possible where we avoid actually -/// allocating the values array for values_len == 1, and instead store the Handle in -/// the `handle_` field. -/// -#[repr(C)] -pub struct HandleBuffer { - handles_ptr: *mut Handle, - handles_len: u64, - // A Handle to hold the underlying buffer alive. - handle_: Handle, -} - -impl RawBuffer for HandleBuffer { - fn ptr(&self) -> *mut Handle { - self.handles_ptr - } - - fn len(&self) -> u64 { - self.handles_len - } - - fn lift(t: &Handle) -> Value { - Value::new(unsafe { t.clone_shallow() }) - } -} - -#[repr(C)] -pub struct IdentBuffer { - idents_ptr: *mut Ident, - idents_len: u64, - // A Handle to hold the underlying array alive. - handle_: Handle, -} - -impl RawBuffer for IdentBuffer { - fn ptr(&self) -> *mut Ident { - self.idents_ptr - } - - fn len(&self) -> u64 { - self.idents_len - } - - fn lift(t: &Ident) -> Ident { - *t - } -} - -#[repr(C)] -pub struct TypeIdBuffer { - ids_ptr: *mut TypeId, - ids_len: u64, - // A Handle to hold the underlying array alive. - handle_: Handle, -} - -impl RawBuffer for TypeIdBuffer { - fn ptr(&self) -> *mut TypeId { - self.ids_ptr - } - - fn len(&self) -> u64 { - self.ids_len - } - - fn lift(t: &TypeId) -> TypeId { - *t - } -} - -pub type ProjectIgnoringTypeExtern = extern "C" fn( - *const ExternContext, - *const Handle, - field_name_ptr: *const u8, - field_name_len: u64, -) -> Handle; - -pub type ProjectMultiExtern = extern "C" fn( - *const ExternContext, - *const Handle, - field_name_ptr: *const u8, - field_name_len: u64, -) -> HandleBuffer; - -#[repr(C)] -pub struct Buffer { - bytes_ptr: *mut u8, - bytes_len: u64, - // A Handle to hold the underlying array alive. - handle_: Handle, -} - -impl Buffer { - pub fn to_bytes(&self) -> Vec { - with_vec(self.bytes_ptr, self.bytes_len as usize, Vec::clone) - } - - pub fn to_os_string(&self) -> OsString { - OsString::from_vec(self.to_bytes()) - } - - pub fn to_string(&self) -> Result { - String::from_utf8(self.to_bytes()) - } -} - -/// -/// Points to an array of (byte) Buffers. -/// -/// TODO: Because this is only ever passed from Python to Rust, it could just use -/// `project_multi_strs`. -/// -#[repr(C)] -pub struct BufferBuffer { - bufs_ptr: *mut Buffer, - bufs_len: u64, - // A Handle to hold the underlying array alive. - handle_: Handle, -} - -impl BufferBuffer { - pub fn to_bytes_vecs(&self) -> Vec> { - with_vec(self.bufs_ptr, self.bufs_len as usize, |vec| { - vec.iter().map(Buffer::to_bytes).collect() - }) - } - - pub fn to_os_strings(&self) -> Vec { - self - .to_bytes_vecs() - .into_iter() - .map(OsString::from_vec) - .collect() - } - - pub fn to_strings(&self) -> Result, FromUtf8Error> { - self - .to_bytes_vecs() - .into_iter() - .map(String::from_utf8) - .collect() - } - - pub fn to_map(&self, name: &'static str) -> Result, String> { - let strings = self - .to_strings() - .map_err(|err| format!("Error decoding UTF8 from {}: {}", name, err))?; - - if strings.len() % 2 != 0 { - return Err(format!("Map for {} had an odd number of elements", name)); - } - - Ok(strings.into_iter().tuples::<(_, _)>().collect()) - } -} - -pub type TypeToStrExtern = extern "C" fn(*const ExternContext, TypeId) -> Buffer; - -pub type ValToStrExtern = extern "C" fn(*const ExternContext, *const Handle) -> Buffer; -pub type ValToBytesExtern = extern "C" fn(*const ExternContext, *const Handle) -> Buffer; - -pub type ValToBoolExtern = extern "C" fn(*const ExternContext, *const Handle) -> bool; - -pub type CreateExceptionExtern = - extern "C" fn(*const ExternContext, str_ptr: *const u8, str_len: u64) -> Handle; - -pub type CallExtern = - extern "C" fn(*const ExternContext, *const Handle, *const *const Handle, u64) -> PyResult; - -pub type GeneratorSendExtern = - extern "C" fn(*const ExternContext, *const Handle, *const Handle) -> PyGeneratorResponse; - -pub fn with_vec(c_ptr: *mut C, c_len: usize, f: F) -> T -where - F: FnOnce(&Vec) -> T, -{ - let cs = unsafe { Vec::from_raw_parts(c_ptr, c_len, c_len) }; - let output = f(&cs); - mem::forget(cs); - output -} diff --git a/src/rust/engine/src/handles.rs b/src/rust/engine/src/handles.rs deleted file mode 100644 index 7690290a471..00000000000 --- a/src/rust/engine/src/handles.rs +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2017 Pants project contributors (see CONTRIBUTORS.md). -// Licensed under the Apache License, Version 2.0 (see LICENSE). - -use std::os::raw; - -use crate::externs; -use lazy_static::lazy_static; -use parking_lot::Mutex; - -pub type RawHandle = *const raw::c_void; - -/// -/// Represents a handle to a python object, explicitly without equality or hashing. Whenever -/// the equality/identity of a Value matters, a Key should be computed for it and used instead. -/// -/// Handle implements Clone by calling out to a python extern `clone_val` which clones the -/// underlying CFFI handle. -/// -#[repr(C)] -pub struct Handle(pub RawHandle); - -impl Handle { - /// - /// An escape hatch to allow for cloning a Handle without cloning the value it points to. - /// - /// # Safety - /// - /// You should not call this method unless you are certain the input Handle has been - /// mem::forgotten (otherwise it will be `Drop`ed twice). - /// - pub unsafe fn clone_shallow(&self) -> Handle { - Handle(self.0) - } -} - -impl PartialEq for Handle { - fn eq(&self, other: &Handle) -> bool { - externs::equals(self, other) - } -} - -impl Eq for Handle {} - -impl Drop for Handle { - fn drop(&mut self) { - DROPPING_HANDLES.lock().push(DroppingHandle(self.0)); - } -} - -// By default, a Handle would not be marked Send because of the raw pointer it holds. -// Because Python objects are threadsafe, we can safely implement Send. -unsafe impl Send for Handle {} -unsafe impl Sync for Handle {} - -const MIN_DRAIN_HANDLES: usize = 256; - -/// -/// A Handle that is currently being dropped. This wrapper exists to mark the pointer Send. -/// -#[repr(C)] -pub struct DroppingHandle(RawHandle); - -unsafe impl Send for DroppingHandle {} - -lazy_static! { - /// - /// A static queue of Handles which used to be owned by `Value`s. When a Value is dropped, its - /// Handle is added to this queue. Some thread with access to the ExternContext should periodically - /// consume this queue to drop the relevant handles on the python side. - /// - /// This queue avoids giving every `Value` a reference to the ExternContext, which would allow them - /// to drop themselves directly, but would increase their size. - /// - /// TODO: This queue should likely move to `core` to allow `enqueue` to be private. - /// - static ref DROPPING_HANDLES: Mutex> = Mutex::new(Vec::new()); -} - -/// -/// If an appreciable number of Handles have been queued, drop them. -/// -pub fn maybe_drop_handles() { - let handles: Option> = { - let mut q = DROPPING_HANDLES.lock(); - if q.len() > MIN_DRAIN_HANDLES { - Some(q.drain(..).collect::>()) - } else { - None - } - }; - if let Some(handles) = handles { - externs::drop_handles(&handles); - } -} diff --git a/src/rust/engine/src/interning.rs b/src/rust/engine/src/interning.rs index 868ac4d00ba..70806cc29d2 100644 --- a/src/rust/engine/src/interning.rs +++ b/src/rust/engine/src/interning.rs @@ -4,8 +4,10 @@ use std::collections::HashMap; use std::hash; -use crate::core::{Key, Value, FNV}; -use crate::externs::{self, Ident}; +use cpython::{ObjectProtocol, PyClone, PyErr, PyType, Python, PythonObject, ToPyObject}; + +use crate::core::{Key, TypeId, Value, FNV}; +use crate::externs; /// /// A struct that encapsulates interning of python `Value`s as comparable `Key`s. @@ -33,8 +35,10 @@ use crate::externs::{self, Ident}; /// #[derive(Default)] pub struct Interns { - forward: HashMap, - reverse: HashMap, + forward_keys: HashMap, + reverse_keys: HashMap, + forward_types: HashMap, + reverse_types: HashMap, id_generator: u64, } @@ -43,37 +47,53 @@ impl Interns { Interns::default() } - pub fn insert(&mut self, v: Value) -> Key { - let ident = externs::identify(&v); - self.insert_with(v, ident) + pub fn key_insert(&mut self, py: Python, v: Value) -> Result { + let intern_key = InternKey(v.hash(py)?, v.to_py_object(py).into()); + + let key = if let Some(key) = self.forward_keys.get(&intern_key) { + *key + } else { + let id = self.id_generator; + self.id_generator += 1; + let key = Key::new(id, self.type_insert(py, v.get_type(py))); + self.forward_keys.insert(intern_key, key.clone()); + self.reverse_keys.insert(key.clone(), v); + key + }; + Ok(key) + } + + pub fn key_get(&self, k: &Key) -> &Value { + self + .reverse_keys + .get(&k) + .unwrap_or_else(|| panic!("Previously memoized object disappeared for {:?}", k)) } - pub fn insert_with(&mut self, v: Value, ident: Ident) -> Key { - let mut inserted = false; - let id_generator = self.id_generator; - let key = *self - .forward - .entry(InternKey(ident.hash, v.clone())) - .or_insert_with(|| { - inserted = true; - Key::new(id_generator, ident.type_id) - }); - if inserted { - self.reverse.insert(key, v); + pub fn type_insert(&mut self, py: Python, v: PyType) -> TypeId { + let intern_type = InternType(v.as_object().hash(py).unwrap(), v.clone_ref(py)); + + if let Some(type_id) = self.forward_types.get(&intern_type) { + *type_id + } else { + let id = self.id_generator; self.id_generator += 1; + let type_id = TypeId(id); + self.forward_types.insert(intern_type, type_id.clone()); + self.reverse_types.insert(type_id.clone(), v); + type_id } - key } - pub fn get(&self, k: &Key) -> &Value { + pub fn type_get(&self, k: &TypeId) -> &PyType { self - .reverse + .reverse_types .get(&k) .unwrap_or_else(|| panic!("Previously memoized object disappeared for {:?}", k)) } } -struct InternKey(i64, Value); +struct InternKey(isize, Value); impl Eq for InternKey {} @@ -88,3 +108,19 @@ impl hash::Hash for InternKey { self.0.hash(state); } } + +struct InternType(isize, PyType); + +impl Eq for InternType {} + +impl PartialEq for InternType { + fn eq(&self, other: &InternType) -> bool { + self.1 == other.1 + } +} + +impl hash::Hash for InternType { + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} diff --git a/src/rust/engine/src/intrinsics.rs b/src/rust/engine/src/intrinsics.rs index 6a74e16be17..0e90d403e0a 100644 --- a/src/rust/engine/src/intrinsics.rs +++ b/src/rust/engine/src/intrinsics.rs @@ -6,7 +6,7 @@ use crate::nodes::{lift_digest, DownloadedFile, NodeFuture, Snapshot}; use crate::tasks::Intrinsic; use crate::types::Types; -use boxfuture::Boxable; +use boxfuture::{try_future, Boxable}; use bytes; use futures::future::{self as future03, TryFutureExt}; use futures01::{future, Future}; @@ -83,7 +83,7 @@ impl Intrinsics { intrinsics.insert( Intrinsic { product: types.process_result, - inputs: vec![types.multi_platform_process_request, types.platform], + inputs: vec![types.multi_platform_process, types.platform], }, Box::new(multi_platform_process_request_to_process_result), ); @@ -153,8 +153,8 @@ fn directory_digest_to_files_content(context: Context, args: Vec) -> Node .core .store() .contents_for_directory(digest) - .map_err(|str| throw(&str)) - .map(move |files_content| Snapshot::store_files_content(&context, &files_content)) + .and_then(move |files_content| Snapshot::store_files_content(&context, &files_content)) + .map_err(|s| throw(&s)) }) .to_boxed() } @@ -206,8 +206,7 @@ fn digest_to_snapshot(context: Context, args: Vec) -> NodeFuture { Box::pin(async move { let digest = lift_digest(&args[0])?; let snapshot = store::Snapshot::from_digest(store, digest).await?; - let res: Result<_, String> = Ok(Snapshot::store_snapshot(&core, &snapshot)); - res + Snapshot::store_snapshot(&core, &snapshot) }) .compat() .map_err(|e: String| throw(&e)) @@ -234,16 +233,20 @@ fn directories_to_merge_to_digest(context: Context, args: Vec) -> NodeFut fn url_to_fetch_to_snapshot(context: Context, mut args: Vec) -> NodeFuture { let core = context.core.clone(); context - .get(DownloadedFile(externs::key_for(args.pop().unwrap()))) - .map(move |snapshot| Snapshot::store_snapshot(&core, &snapshot)) + .get(DownloadedFile(try_future!(externs::acquire_key_for( + args.pop().unwrap() + )))) + .and_then(move |snapshot| Snapshot::store_snapshot(&core, &snapshot).map_err(|err| throw(&err))) .to_boxed() } fn path_globs_to_snapshot(context: Context, mut args: Vec) -> NodeFuture { let core = context.core.clone(); context - .get(Snapshot(externs::key_for(args.pop().unwrap()))) - .map(move |snapshot| Snapshot::store_snapshot(&core, &snapshot)) + .get(Snapshot(try_future!(externs::acquire_key_for( + args.pop().unwrap() + )))) + .and_then(move |snapshot| Snapshot::store_snapshot(&core, &snapshot).map_err(|err| throw(&err))) .to_boxed() } @@ -293,7 +296,7 @@ fn snapshot_subset_to_snapshot(context: Context, args: Vec) -> NodeFuture let snapshot = store::Snapshot::get_snapshot_subset(store, original_digest, path_globs).await?; - Ok(Snapshot::store_snapshot(&context.core, &snapshot)) + Ok(Snapshot::store_snapshot(&context.core, &snapshot)?) }) .compat() .map_err(|err: String| throw(&err)) diff --git a/src/rust/engine/src/lib.rs b/src/rust/engine/src/lib.rs index 55e5ce62e09..229954397a6 100644 --- a/src/rust/engine/src/lib.rs +++ b/src/rust/engine/src/lib.rs @@ -32,7 +32,6 @@ mod context; mod core; pub mod externs; -mod handles; mod interning; mod intrinsics; pub mod nodes; @@ -43,8 +42,7 @@ mod types; mod watch; pub use crate::context::Core; -pub use crate::core::{Function, Key, Params, TypeId, Value}; -pub use crate::handles::Handle; +pub use crate::core::{Failure, Function, Key, Params, TypeId, Value}; pub use crate::intrinsics::Intrinsics; pub use crate::scheduler::{ ExecutionRequest, ExecutionTermination, RootResult, Scheduler, Session, diff --git a/src/rust/engine/src/nodes.rs b/src/rust/engine/src/nodes.rs index 55d3c11b809..c4e505adb33 100644 --- a/src/rust/engine/src/nodes.rs +++ b/src/rust/engine/src/nodes.rs @@ -61,10 +61,7 @@ impl VFS for Context { } fn mk_error(msg: &str) -> Failure { - Failure::Throw( - externs::create_exception(msg), - "".to_string(), - ) + throw(msg) } } @@ -209,13 +206,10 @@ impl From