Skip to content

Commit

Permalink
[internal] Switch from Rust-Cpython to PyO3 (#13526)
Browse files Browse the repository at this point in the history
See #12110 for the original motivation.

## Why migrate

Beyond the APIs requiring much less boilerplate and being more ergonomic (e.g. the `.call0()` and `call1()` variants of `.call()`), I think the biggest benefit is clearer modeling for when the GIL is held.

With PyO3, the two [core types](https://pyo3.rs/v0.15.0/types.html) are `&PyAny` and `PyObject` (aka `Py<PyAny`). `&PyAny` is always a reference with the same lifetime as the `Python` token, which represents when the GIL is used. Whenever you want to do Python operations in Rust, like `.getattr()`, you use `&PyAny`. `PyObject` and any `Py<T>` are GIL-independent.

In contrast, rust-cpython only had `PyObject` and `&PyObject`. It passed around `Python` as an argument to any Python functions like `.getattr()`.

--

An additional benefit is that using proc macros instead of declarative macros means PyCharm no longer hangs for me when working in the `engine/` crate! PyCharm does much better w/ proc macros, and things feel zippy now like autocomplete working.

## Migration strategy

I tried originally doing an incremental strategy, most recently with #12451, which proved technically infeasible.

This PR completely swaps Rust-Cpython with PyO3, but tries to minimize the diff. It only makes changes when it was too difficult to get working with PyO3. For example, `interface.rs` is far too big, but this keeps the same organization as before.

For now, we still have the `native_engine_pyo3` extension along with `native_engine`. That will be consolidated in a followup.

## Benchmark

Before:
```
❯ hyperfine -w 1 -r 10 './pants --no-pantsd list ::'
Benchmark #1: ./pants --no-pantsd list ::
  Time (mean ± σ):      2.170 s ±  0.025 s    [User: 1.786 s, System: 0.303 s]
  Range (min … max):    2.142 s …  2.227 s    10 runs
```

After:

```
❯ hyperfine -w 1 -r 10 './pants --no-pantsd list ::'
Benchmark #1: ./pants --no-pantsd list ::
  Time (mean ± σ):      2.193 s ±  0.060 s    [User: 1.798 s, System: 0.292 s]
  Range (min … max):    2.144 s …  2.348 s    10 runs
```
  • Loading branch information
Eric-Arellano authored Nov 14, 2021
1 parent 815b3d8 commit 021c3c0
Show file tree
Hide file tree
Showing 19 changed files with 1,457 additions and 2,048 deletions.
1 change: 0 additions & 1 deletion src/python/pants/bin/daemon_pants_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ def __call__(
command: str,
args: Tuple[str, ...],
env: Dict[str, str],
working_directory: bytes,
cancellation_latch: PySessionCancellationLatch,
stdin_fileno: int,
stdout_fileno: int,
Expand Down
83 changes: 32 additions & 51 deletions src/python/pants/engine/fs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@
Workspace,
)
from pants.engine.goal import Goal, GoalSubsystem
from pants.engine.internals.native_engine_pyo3 import PyDigest as DigestPyO3
from pants.engine.internals.native_engine_pyo3 import PySnapshot as SnapshotPyO3
from pants.engine.internals.scheduler import ExecutionError
from pants.engine.rules import Get, goal_rule, rule
from pants.testutil.rule_runner import QueryRule, RuleRunner
Expand Down Expand Up @@ -1093,60 +1091,50 @@ def is_changed_snapshot() -> bool:
# -----------------------------------------------------------------------------------------------


@pytest.mark.parametrize("cls", [Digest, DigestPyO3])
def test_digest_properties(cls) -> None:
digest = cls("a" * 64, 1000)
def test_digest_properties() -> None:
digest = Digest("a" * 64, 1000)
assert digest.fingerprint == "a" * 64
assert digest.serialized_bytes_length == 1000


@pytest.mark.parametrize("cls", [Digest, DigestPyO3])
def test_digest_repr(cls) -> None:
assert str(cls("a" * 64, 1)) == f"Digest({repr('a' * 64)}, 1)"
def test_digest_repr() -> None:
assert str(Digest("a" * 64, 1)) == f"Digest({repr('a' * 64)}, 1)"


@pytest.mark.parametrize("cls", [Digest, DigestPyO3])
def test_digest_hash(cls) -> None:
assert hash(cls("a" * 64, 1)) == -6148914691236517206
assert hash(cls("b" * 64, 1)) == -4919131752989213765
def test_digest_hash() -> None:
assert hash(Digest("a" * 64, 1)) == -6148914691236517206
assert hash(Digest("b" * 64, 1)) == -4919131752989213765
# Note that the size bytes is not considered in the hash.
assert hash(cls("a" * 64, 1000)) == -6148914691236517206
assert hash(Digest("a" * 64, 1000)) == -6148914691236517206


@pytest.mark.parametrize("cls", [Digest, DigestPyO3])
def test_digest_equality(cls) -> None:
digest = cls("a" * 64, 1)
assert digest == cls("a" * 64, 1)
assert digest != cls("a" * 64, 1000)
assert digest != cls("0" * 64, 1)
def test_digest_equality() -> None:
digest = Digest("a" * 64, 1)
assert digest == Digest("a" * 64, 1)
assert digest != Digest("a" * 64, 1000)
assert digest != Digest("0" * 64, 1)
with pytest.raises(TypeError):
digest < digest
digest < digest # type: ignore[operator]


@pytest.mark.parametrize(
"snapshot_cls,digest_cls", [(Snapshot, Digest), (SnapshotPyO3, DigestPyO3)]
)
def test_snapshot_properties(snapshot_cls, digest_cls) -> None:
digest = digest_cls("a" * 64, 1000)
snapshot = snapshot_cls._create_for_testing(digest, ["f.ext", "dir/f.ext"], ["dir"])
def test_snapshot_properties() -> None:
digest = Digest("a" * 64, 1000)
snapshot = Snapshot._create_for_testing(digest, ["f.ext", "dir/f.ext"], ["dir"])
assert snapshot.digest == digest
assert snapshot.files == ("f.ext", "dir/f.ext")
assert snapshot.dirs == ("dir",)


@pytest.mark.parametrize(
"snapshot_cls,digest_cls", [(Snapshot, Digest), (SnapshotPyO3, DigestPyO3)]
)
def test_snapshot_hash(snapshot_cls, digest_cls) -> None:
def test_snapshot_hash() -> None:
def assert_hash(
expected: int,
*,
digest_char: str = "a",
files: Optional[List[str]] = None,
dirs: Optional[List[str]] = None,
) -> None:
digest = digest_cls(digest_char * 64, 1000)
snapshot = snapshot_cls._create_for_testing(
digest = Digest(digest_char * 64, 1000)
snapshot = Snapshot._create_for_testing(
digest, files or ["f.ext", "dir/f.ext"], dirs or ["dir"]
)
assert hash(snapshot) == expected
Expand All @@ -1159,28 +1147,21 @@ def assert_hash(
assert_hash(-4919131752989213765, digest_char="b")


@pytest.mark.parametrize(
"snapshot_cls,digest_cls", [(Snapshot, Digest), (SnapshotPyO3, DigestPyO3)]
)
def test_snapshot_equality(snapshot_cls, digest_cls) -> None:
def test_snapshot_equality() -> None:
# Only the digest is used for equality.
snapshot = snapshot_cls._create_for_testing(
digest_cls("a" * 64, 1000), ["f.ext", "dir/f.ext"], ["dir"]
)
assert snapshot == snapshot_cls._create_for_testing(
digest_cls("a" * 64, 1000), ["f.ext", "dir/f.ext"], ["dir"]
)
assert snapshot == snapshot_cls._create_for_testing(
digest_cls("a" * 64, 1000), ["f.ext", "dir/f.ext"], ["foo"]
snapshot = Snapshot._create_for_testing(Digest("a" * 64, 1000), ["f.ext", "dir/f.ext"], ["dir"])
assert snapshot == Snapshot._create_for_testing(
Digest("a" * 64, 1000), ["f.ext", "dir/f.ext"], ["dir"]
)
assert snapshot == snapshot_cls._create_for_testing(
digest_cls("a" * 64, 1000), ["f.ext"], ["dir"]
assert snapshot == Snapshot._create_for_testing(
Digest("a" * 64, 1000), ["f.ext", "dir/f.ext"], ["foo"]
)
assert snapshot != snapshot_cls._create_for_testing(
digest_cls("a" * 64, 0), ["f.ext", "dir/f.ext"], ["dir"]
assert snapshot == Snapshot._create_for_testing(Digest("a" * 64, 1000), ["f.ext"], ["dir"])
assert snapshot != Snapshot._create_for_testing(
Digest("a" * 64, 0), ["f.ext", "dir/f.ext"], ["dir"]
)
assert snapshot != snapshot_cls._create_for_testing(
digest_cls("b" * 64, 1000), ["f.ext", "dir/f.ext"], ["dir"]
assert snapshot != Snapshot._create_for_testing(
Digest("b" * 64, 1000), ["f.ext", "dir/f.ext"], ["dir"]
)
with pytest.raises(TypeError):
snapshot < snapshot
snapshot < snapshot # type: ignore[operator]
9 changes: 7 additions & 2 deletions src/python/pants/engine/internals/native_engine.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ class RawFdRunner(Protocol):
command: str,
args: tuple[str, ...],
env: dict[str, str],
working_directory: bytes,
cancellation_latch: PySessionCancellationLatch,
stdin_fileno: int,
stdout_fileno: int,
Expand Down Expand Up @@ -149,6 +148,9 @@ class PyDigest:
def fingerprint(self) -> str: ...
@property
def serialized_bytes_length(self) -> int: ...
def __eq__(self, other: PyDigest | Any) -> bool: ...
def __hash__(self) -> int: ...
def __repr__(self) -> str: ...

class PySnapshot:
def __init__(self) -> None: ...
Expand All @@ -162,6 +164,9 @@ class PySnapshot:
def dirs(self) -> tuple[str, ...]: ...
@property
def files(self) -> tuple[str, ...]: ...
def __eq__(self, other: PySnapshot | Any) -> bool: ...
def __hash__(self) -> int: ...
def __repr__(self) -> str: ...

class PyExecutionRequest:
def __init__(
Expand All @@ -184,7 +189,7 @@ class PyGeneratorResponseGetMulti:
def __init__(self, gets: tuple[PyGeneratorResponseGet, ...]) -> None: ...

class PyNailgunServer:
pass
def port(self) -> int: ...

class PyRemotingOptions:
def __init__(self, **kwargs: Any) -> None: ...
Expand Down
26 changes: 0 additions & 26 deletions src/python/pants/engine/internals/native_engine_pyo3.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@

from __future__ import annotations

from typing import Any, Sequence

from pants.engine.fs import PathGlobs

# TODO: black and flake8 disagree about the content of this file:
Expand All @@ -28,30 +26,6 @@ def default_cache_path() -> str: ...
# cast to `tuple()` when not necessary.
def match_path_globs(path_globs: PathGlobs, paths: tuple[str, ...]) -> str: ...

class PyDigest:
def __init__(self, fingerprint: str, serialized_bytes_length: int) -> None: ...
@property
def fingerprint(self) -> str: ...
@property
def serialized_bytes_length(self) -> int: ...
def __eq__(self, other: PyDigest | Any) -> bool: ...
def __hash__(self) -> int: ...

class PySnapshot:
def __init__(self) -> None: ...
@classmethod
def _create_for_testing(
cls, digest: PyDigest, files: Sequence[str], dirs: Sequence[str]
) -> PySnapshot: ...
@property
def digest(self) -> PyDigest: ...
@property
def dirs(self) -> tuple[str, ...]: ...
@property
def files(self) -> tuple[str, ...]: ...
def __eq__(self, other: PySnapshot | Any) -> bool: ...
def __hash__(self) -> int: ...

# ------------------------------------------------------------------------------
# Workunits
# ------------------------------------------------------------------------------
Expand Down
10 changes: 5 additions & 5 deletions src/python/pants/engine/internals/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,12 +469,12 @@ def execute(

states = [
Throw(
raw_root.result(),
python_traceback=raw_root.python_traceback(),
engine_traceback=raw_root.engine_traceback(),
raw_root.result,
python_traceback=raw_root.python_traceback,
engine_traceback=raw_root.engine_traceback,
)
if raw_root.is_throw()
else Return(raw_root.result())
if raw_root.is_throw
else Return(raw_root.result)
for raw_root in raw_roots
]

Expand Down
58 changes: 17 additions & 41 deletions src/rust/engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 021c3c0

Please sign in to comment.