Skip to content

Commit

Permalink
Always capture and cache Digests in coursier and ivy (#7835)
Browse files Browse the repository at this point in the history
### Problem

Capturing digests for `coursier` and `ivy` artifacts is currently optional, presumably due to the performance impact of not being cached run over run.

### Solution

#7241 added support for stashing `Digest`s next to digested files, and support for providing a `Digest` hint that will skip snapshotting if the `Digest` is already stored. We use that support here to always-snapshot 3rdparty inputs (and to skip re-snapshotting if a `Digest` was stashed).

### Result

One fewer option, and slightly better performance when re-running.
  • Loading branch information
stuhood authored Jun 2, 2019
1 parent 306820c commit 944b42c
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 41 deletions.
22 changes: 15 additions & 7 deletions src/python/pants/backend/jvm/tasks/resolve_shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from builtins import next

from pants.base.build_environment import get_buildroot
from pants.engine.fs import PathGlobs, PathGlobsAndRoot
from pants.engine.fs import Digest, PathGlobs, PathGlobsAndRoot
from pants.java.jar.jar_dependency_utils import ResolvedJar
from pants.task.task import TaskBase
from pants.util.dirutil import fast_relpath
Expand All @@ -22,11 +22,11 @@ def register_options(cls, register):
This class is intended to be extended by Jvm resolvers (coursier and ivy), and the option name should reflect that.
"""
super(JvmResolverBase, cls).register_options(register)
# TODO This flag should be defaulted to True when we are doing hermetic execution,
# and should probably go away as we move forward into that direction.
register('--capture-snapshots', type=bool, default=False,
help='Enable capturing snapshots to add directory digests to dependency jars.'
'Note that this is necessary when hermetic execution is enabled.')
removal_version='1.19.0.dev2',
removal_hint='Enabled by default.',
help='Enable capturing snapshots to add directory digests to dependency jars.'
'Note that this is necessary when hermetic execution is enabled.')

def add_directory_digests_for_jars(self, targets_and_jars):
"""For each target, get DirectoryDigests for its jars and return them zipped with the jars.
Expand All @@ -37,18 +37,26 @@ def add_directory_digests_for_jars(self, targets_and_jars):

targets_and_jars=list(targets_and_jars)

if not targets_and_jars or not self.get_options().capture_snapshots:
if not targets_and_jars:
return targets_and_jars

jar_paths = []
for target, jars_to_snapshot in targets_and_jars:
for jar in jars_to_snapshot:
jar_paths.append(fast_relpath(jar.pants_path, get_buildroot()))

# Capture Snapshots for jars, using an optional adjacent digest. Create the digest afterward
# if it does not exist.
snapshots = self.context._scheduler.capture_snapshots(
tuple(
PathGlobsAndRoot(PathGlobs([jar]), get_buildroot()) for jar in jar_paths
PathGlobsAndRoot(
PathGlobs([jar]),
get_buildroot(),
Digest.load(jar),
) for jar in jar_paths
))
for snapshot, jar_path in zip(snapshots, jar_paths):
snapshot.directory_digest.dump(jar_path)

# We want to map back the list[Snapshot] to targets_and_jars
# We assume that (1) jars_to_snapshot has the same number of ResolveJars as snapshots does Snapshots,
Expand Down
22 changes: 11 additions & 11 deletions src/python/pants/engine/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,31 +78,31 @@ class Digest(datatype([('fingerprint', text_type), ('serialized_bytes_length', i
"""

@classmethod
def _path(cls, directory):
return '{}.digest'.format(directory.rstrip(os.sep))
def _path(cls, digested_path):
return '{}.digest'.format(digested_path.rstrip(os.sep))

@classmethod
def clear(cls, directory):
"""Clear any existing Digest file adjacent to the given directory."""
safe_delete(cls._path(directory))
def clear(cls, digested_path):
"""Clear any existing Digest file adjacent to the given digested_path."""
safe_delete(cls._path(digested_path))

@classmethod
def load(cls, directory):
"""Load a Digest from a `.digest` file adjacent to the given directory.
def load(cls, digested_path):
"""Load a Digest from a `.digest` file adjacent to the given digested_path.
:return: A Digest, or None if the Digest did not exist.
"""
read_file = maybe_read_file(cls._path(directory))
read_file = maybe_read_file(cls._path(digested_path))
if read_file:
fingerprint, length = read_file.split(':')
return Digest(fingerprint, int(length))
else:
return None

def dump(self, directory):
"""Dump this Digest object adjacent to the given directory."""
def dump(self, digested_path):
"""Dump this Digest object adjacent to the given digested_path."""
payload = '{}:{}'.format(self.fingerprint, self.serialized_bytes_length)
safe_file_dump(self._path(directory), payload=payload)
safe_file_dump(self._path(digested_path), payload=payload)

def __repr__(self):
return '''Digest(fingerprint={}, serialized_bytes_length={})'''.format(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -431,29 +431,6 @@ def main(args: Array[String]) {
path = os.path.join(compile_dir, path_suffix)
self.assertTrue(os.path.exists(path), "Want path {} to exist".format(path))

def test_hermetic_binary_with_capturing_off(self):
capture_snapshots = False
config = {
'resolve.ivy': {'capture_snapshots': capture_snapshots},
'resolve.coursier': {'capture_snapshots': capture_snapshots},
'compile.zinc': {
'execution_strategy': 'hermetic',
'use_classpath_jars': False,
'incremental': False,
},
}
with self.temporary_workdir() as workdir:
with self.temporary_file_content("readme.txt", b"yo"):
pants_run = self.run_pants_with_workdir(
[
'run',
'testprojects/src/java/org/pantsbuild/testproject/cwdexample',
],
workdir,
config,
)
self.assert_failure(pants_run)

def test_hermetic_binary_with_3rdparty_dependencies_ivy(self):
config = {
'resolve.ivy': {'capture_snapshots': True},
Expand Down

0 comments on commit 944b42c

Please sign in to comment.