Skip to content

Commit

Permalink
use task fingerprint for build invalidation to avoid results_dir cl…
Browse files Browse the repository at this point in the history
…ashes (#5170)

### Problem

In #4966, it was mentioned how tasks in different option scopes can end up with the same `results_dir`, which results in an error if tasks with different scopes but the same `stable_name` both try to use the cache in the same pants run (e.g. `lint.scalastyle` and `compile.scalastyle`). 

### Solution

- Use `Task#fingerprint` to compute the build invalidation key (in `TaskBase#_build_invalidator()`) and to generate the name of the cache subdirectory where results from the task run are stored (in `CacheFactory.make_task_cache_dirname( )`
- Incorporate the `stable_name()` and `implementation_version()` into the computation of the task fingerprint.
- Incorporate the `options_scope` of the `Task` subclass and the scopes of its `subsystem_dependencies` into the computation of the task fingerprint. Traverse all registered option scopes and incorporate the values of relevant options with `OptionsFingerprinter.combined_options_fingerprint_for_scope()`.

### Result

- The task fingerprint is now rigorously described and tested.
- The task fingerprint is now used for the invalidation key and for the name of the results directory. A corollary of this is that e.g. `lint.scalastyle` and `compile.scalastyle` no longer use the same `results_dir`, which resolves the issue described in #4966.
- The directory path containing the task results after execution can no longer be guessed without instantiating the task, because the task fingerprint requires fingerprinting the option values relevant to the task. In `test_cache_cleanup_integration.py`, we run the pants subprocess twice in a few tests to get around this.
  • Loading branch information
cosmicexplorer authored and Stu Hood committed Dec 20, 2017
1 parent f5345bf commit 0bbe913
Show file tree
Hide file tree
Showing 24 changed files with 755 additions and 284 deletions.
1 change: 1 addition & 0 deletions src/python/pants/backend/jvm/argfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pants.util.contextutil import temporary_file
from pants.util.dirutil import safe_open


logger = logging.getLogger(__name__)


Expand Down
32 changes: 22 additions & 10 deletions src/python/pants/cache/cache_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from pants.cache.resolver import NoopResolver, Resolver, RESTfulResolver
from pants.cache.restful_artifact_cache import RESTfulArtifactCache
from pants.subsystem.subsystem import Subsystem
from pants.util.memo import memoized_property


class EmptyCacheSpecError(ArtifactCacheError): pass
Expand Down Expand Up @@ -87,26 +88,26 @@ def register_options(cls, register):
help='Permissions to use when writing artifacts to a local cache, in octal.')

@classmethod
def create_cache_factory_for_task(cls, task, pinger=None, resolver=None):
return CacheFactory(cls.scoped_instance(task).get_options(),
task.context.log, task.stable_name(), pinger=pinger, resolver=resolver)
def create_cache_factory_for_task(cls, task, **kwargs):
scoped_options = cls.scoped_instance(task).get_options()
return CacheFactory(scoped_options, task.context.log, task, **kwargs)


class CacheFactory(object):

def __init__(self, options, log, stable_name, pinger=None, resolver=None):
def __init__(self, options, log, task, pinger=None, resolver=None):
"""Create a cache factory from settings.
:param options: Task's scoped options.
:param log: Task's context log.
:param stable_name: Task's stable name.
:param task: Task to cache results for.
:param pinger: Pinger to choose the best remote artifact cache URL.
:param resolver: Resolver to look up remote artifact cache URLs.
:return: cache factory.
"""
self._options = options
self._log = log
self._stable_name = stable_name
self._task = task

# Created on-demand.
self._read_cache = None
Expand All @@ -130,6 +131,16 @@ def __init__(self, options, log, stable_name, pinger=None, resolver=None):
else:
self._resolver = NoopResolver()

@staticmethod
def make_task_cache_dirname(task):
"""Use the task fingerprint as the name of the cache subdirectory to store
results from the task."""
return task.fingerprint

@memoized_property
def _cache_dirname(self):
return self.make_task_cache_dirname(self._task)

@property
def ignore(self):
return self._options.ignore
Expand Down Expand Up @@ -261,9 +272,9 @@ def _do_create_artifact_cache(self, spec, action):
artifact_root = self._options.pants_workdir

def create_local_cache(parent_path):
path = os.path.join(parent_path, self._stable_name)
path = os.path.join(parent_path, self._cache_dirname)
self._log.debug('{0} {1} local artifact cache at {2}'
.format(self._stable_name, action, path))
.format(self._task.stable_name(), action, path))
return LocalArtifactCache(artifact_root, path, compression,
self._options.max_entries_per_target,
permissions=self._options.write_permissions,
Expand All @@ -273,8 +284,9 @@ def create_remote_cache(remote_spec, local_cache):
urls = self.get_available_urls(remote_spec.split('|'))

if len(urls) > 0:
best_url_selector = BestUrlSelector(['{}/{}'.format(url.rstrip('/'), self._stable_name)
for url in urls])
best_url_selector = BestUrlSelector(
['{}/{}'.format(url.rstrip('/'), self._cache_dirname) for url in urls]
)
local_cache = local_cache or TempLocalArtifactCache(artifact_root, compression)
return RESTfulArtifactCache(artifact_root, best_url_selector, local_cache)

Expand Down
1 change: 0 additions & 1 deletion src/python/pants/invalidation/build_invalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)


import errno
import hashlib
import os
Expand Down
39 changes: 18 additions & 21 deletions src/python/pants/option/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pants.option.global_options import GlobalOptionsRegistrar
from pants.option.option_util import is_list_option
from pants.option.option_value_container import OptionValueContainer
from pants.option.parser_hierarchy import ParserHierarchy, enclosing_scope
from pants.option.parser_hierarchy import ParserHierarchy, all_enclosing_scopes, enclosing_scope
from pants.option.scope import ScopeInfo


Expand Down Expand Up @@ -86,11 +86,9 @@ def complete_scopes(cls, scope_infos):
# components) we can replace this line with `for si in scope_infos:`, because it will
# not be possible for a deprecated_scope to introduce any new intermediate scopes.
for si in copy.copy(ret):
scope = si.scope
while scope != '':
for scope in all_enclosing_scopes(si.scope, allow_global=False):
if scope not in original_scopes:
ret.add(ScopeInfo(scope, ScopeInfo.INTERMEDIATE))
scope = enclosing_scope(scope)
return ret

@classmethod
Expand Down Expand Up @@ -312,55 +310,54 @@ def for_scope(self, scope, inherit_from_enclosing_scope=True):

return values

def get_fingerprintable_for_scope(self, scope, include_passthru=False, fingerprint_key=None,
invert=False):
def get_fingerprintable_for_scope(self, bottom_scope, include_passthru=False,
fingerprint_key=None, invert=False):
"""Returns a list of fingerprintable (option type, option value) pairs for the given scope.
Fingerprintable options are options registered via a "fingerprint=True" kwarg. This flag
can be parameterized with `fingerprint_key` for special cases.
:param str scope: The scope to gather fingerprintable options for.
:param bool include_passthru: Whether to include passthru args captured by `scope` in the
This method also searches enclosing options scopes of `bottom_scope` to determine the set of
fingerprintable pairs.
:param str bottom_scope: The scope to gather fingerprintable options for.
:param bool include_passthru: Whether to include passthru args captured by `bottom_scope` in the
fingerprintable options.
:param string fingerprint_key: The option kwarg to match against (defaults to 'fingerprint').
:param bool invert: Whether or not to invert the boolean check for the fingerprint_key value.
:API: public
"""
fingerprint_key = fingerprint_key or 'fingerprint'
fingerprint_default = False if invert else None
pairs = []

if include_passthru:
# Passthru args can only be sent to outermost scopes so we gather them once here up-front.
passthru_args = self.passthru_args_for_scope(scope)
passthru_args = self.passthru_args_for_scope(bottom_scope)
# NB: We can't sort passthru args, the underlying consumer may be order-sensitive.
pairs.extend((str, passthru_arg) for passthru_arg in passthru_args)
pairs.extend((str, pass_arg) for pass_arg in passthru_args)

# Note that we iterate over options registered at `scope` and at all enclosing scopes, since
# option-using code can read those values indirectly via its own OptionValueContainer, so
# they can affect that code's output.
registration_scope = scope
while registration_scope is not None:
# Note that we iterate over options registered at `bottom_scope` and at all
# enclosing scopes, since option-using code can read those values indirectly
# via its own OptionValueContainer, so they can affect that code's output.
for registration_scope in all_enclosing_scopes(bottom_scope):
parser = self._parser_hierarchy.get_parser_by_scope(registration_scope)
# Sort the arguments, so that the fingerprint is consistent.
for (_, kwargs) in sorted(parser.option_registrations_iter()):
if kwargs.get('recursive') and not kwargs.get('recursive_root'):
if kwargs.get('recursive', False) and not kwargs.get('recursive_root', False):
continue # We only need to fprint recursive options once.
if kwargs.get(fingerprint_key, fingerprint_default) is not (False if invert else True):
if bool(invert) == bool(kwargs.get(fingerprint_key, False)):
continue
# Note that we read the value from scope, even if the registration was on an enclosing
# scope, to get the right value for recursive options (and because this mirrors what
# option-using code does).
val = self.for_scope(scope)[kwargs['dest']]
val = self.for_scope(bottom_scope)[kwargs['dest']]
# If we have a list then we delegate to the fingerprinting implementation of the members.
if is_list_option(kwargs):
val_type = kwargs.get('member_type', str)
else:
val_type = kwargs.get('type', str)
pairs.append((val_type, val))
registration_scope = (None if registration_scope == ''
else enclosing_scope(registration_scope))
return pairs

def __getitem__(self, scope):
Expand Down
17 changes: 8 additions & 9 deletions src/python/pants/option/options_fingerprinter.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,23 @@ class OptionsFingerprinter(object):
"""

@classmethod
def combined_options_fingerprint_for_scope(cls, scope, options, fingerprint_key=None,
invert=None, build_graph=None):
def combined_options_fingerprint_for_scope(cls, scope, options,
build_graph=None, **kwargs):
"""Given options and a scope, compute a combined fingerprint for the scope.
:param string scope: The scope to fingerprint.
:param Options options: The `Options` object to fingerprint.
:param string fingerprint_key: The options kwarg to filter against.
:param bool invert: Whether or not to invert the boolean check for the fingerprint_key value.
:param BuildGraph build_graph: A `BuildGraph` instance, only needed if fingerprinting
target options.
:param dict **kwargs: Keyword parameters passed on to
`Options#get_fingerprintable_for_scope`.
:return: Hexadecimal string representing the fingerprint for all `options`
values in `scope`.
"""
fingerprinter = cls(build_graph)
hasher = sha1()
for (option_type, option_value) in options.get_fingerprintable_for_scope(
scope,
fingerprint_key=fingerprint_key,
invert=invert
):
pairs = options.get_fingerprintable_for_scope(scope, **kwargs)
for (option_type, option_value) in pairs:
hasher.update(
# N.B. `OptionsFingerprinter.fingerprint()` can return `None`,
# so we always cast to bytes here.
Expand Down
35 changes: 35 additions & 0 deletions src/python/pants/option/parser_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,51 @@
from __future__ import (absolute_import, division, generators, nested_scopes, print_function,
unicode_literals, with_statement)

import re

from pants.option.arg_splitter import GLOBAL_SCOPE
from pants.option.config import Config
from pants.option.parser import Parser


class InvalidScopeError(Exception):
pass

_empty_scope_component_re = re.compile(r'\.\.')


def _validate_full_scope(scope):
if _empty_scope_component_re.search(scope):
raise InvalidScopeError(
"full scope '{}' has at least one empty component".format(scope))


def enclosing_scope(scope):
"""Utility function to return the scope immediately enclosing a given scope."""
_validate_full_scope(scope)
return scope.rpartition('.')[0]


def all_enclosing_scopes(scope, allow_global=True):
"""Utility function to return all scopes up to the global scope enclosing a
given scope."""

_validate_full_scope(scope)

# TODO(cosmicexplorer): validate scopes here and/or in `enclosing_scope()`
# instead of assuming correctness.
def scope_within_range(tentative_scope):
if tentative_scope is None:
return False
if not allow_global and tentative_scope == GLOBAL_SCOPE:
return False
return True

while scope_within_range(scope):
yield scope
scope = (None if scope == GLOBAL_SCOPE else enclosing_scope(scope))


class ParserHierarchy(object):
"""A hierarchy of scoped Parser instances.
Expand Down
37 changes: 18 additions & 19 deletions src/python/pants/task/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,11 @@ def __init__(self, *args, **kwargs):
self._task_name = type(self).__name__
self._cache_key_errors = set()
self._cache_factory = CacheSetup.create_cache_factory_for_task(self)
self._options_fingerprinter = OptionsFingerprinter(self.context.build_graph)
self._force_invalidated = False

@memoized_method
def _build_invalidator(self, root=False):
build_task = None if root else self.stable_name()
build_task = None if root else self.fingerprint
return BuildInvalidator.Factory.create(build_task=build_task)

def get_options(self):
Expand Down Expand Up @@ -216,16 +215,16 @@ def workdir(self):
return self._workdir

def _options_fingerprint(self, scope):
pairs = self.context.options.get_fingerprintable_for_scope(
options_hasher = sha1()
options_hasher.update(scope)
options_fp = OptionsFingerprinter.combined_options_fingerprint_for_scope(
scope,
include_passthru=self.supports_passthru_args()
self.context.options,
build_graph=self.context.build_graph,
include_passthru=self.supports_passthru_args(),
)
hasher = sha1()
for (option_type, option_val) in pairs:
fp = self._options_fingerprinter.fingerprint(option_type, option_val)
if fp is not None:
hasher.update(fp)
return hasher.hexdigest()
options_hasher.update(options_fp)
return options_hasher.hexdigest()

@memoized_property
def fingerprint(self):
Expand All @@ -238,6 +237,7 @@ def fingerprint(self):
A task's fingerprint is only valid afer the task has been fully initialized.
"""
hasher = sha1()
hasher.update(self.stable_name())
hasher.update(self._options_fingerprint(self.options_scope))
hasher.update(self.implementation_version_str())
# TODO: this is not recursive, but should be: see #2739
Expand Down Expand Up @@ -364,10 +364,10 @@ def invalidated(self,
targets.extend(vt.targets)

if len(targets):
msg_elements = ['Invalidated ',
items_to_report_element([t.address.reference() for t in targets], 'target'),
'.']
self.context.log.info(*msg_elements)
msg, detail = items_to_report_element(
[t.address.reference() for t in targets],
'target')
self.context.log.info('Invalidated {}: {}.'.format(msg, detail))

self._update_invalidation_report(invalidation_check, 'pre-check')

Expand Down Expand Up @@ -582,11 +582,10 @@ def _get_update_artifact_cache_work(self, vts_artifactfiles_pairs):

def _report_targets(self, prefix, targets, suffix, logger=None):
logger = logger or self.context.log.info
logger(
prefix,
items_to_report_element([t.address.reference() for t in targets], 'target'),
suffix,
)
msg, detail = items_to_report_element(
[t.address.reference() for t in targets],
'target')
logger(prefix + '{}: {}'.format(msg, detail) + suffix)

def require_single_root_target(self):
"""If a single target was specified on the cmd line, returns that target.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from pants.backend.codegen.jaxb.jaxb_library import JaxbLibrary
from pants.backend.codegen.jaxb.register import build_file_aliases as register_codegen
from pants.build_graph.register import build_file_aliases as register_core

from pants_test.jvm.nailgun_task_test_base import NailgunTaskTestBase


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,3 @@ def test_coverage_forced(self):

no_force_cobertura._nothing_to_instrument = False
self.assertEquals(no_force_cobertura.should_report(exception), False, 'Don\'t report after a failure if coverage isn\'t forced.')



Loading

0 comments on commit 0bbe913

Please sign in to comment.