Skip to content

Commit

Permalink
Batch execution of address Specs and remove SelectTransitive (#5605)
Browse files Browse the repository at this point in the history
While chasing performance in one area for #4349 we added `SelectTransitive`. This decreased performance in another significant area: executions involving multiple transitive roots can't structure-share at all when expanding dependencies, leading to many redundant walks (#4533).

Additionally, usability regressed in a few ways: the product `Graph` could not implement cycle detection for dependencies expanded via `SelectTransitive` (#4358), and in the case of a missing or broken dependency, the referring context was lost (#4515).

* Remove usage of `SelectTransitive` for `TransitiveHydratedTargets` to reintroduce structure sharing and improve usability (fixes #4358 and fixes #4515).
* Replace `@rule`s that operate on single-`Spec`s with batch forms that operate on a `Specs` collection (fixes #4533).

Cycles should be detected much earlier with:
```
Exception message: Build graph construction failed: ExecutionError Received unexpected Throw state(s):
Computing Select(Collection(dependencies=(SingleAddress(directory=u'testprojects/src/java/org/pantsbuild/testproject/cycle1', name=u'cycle1'),)), =TransitiveHydratedTargets)
  Computing Task(transitive_hydrated_targets, Collection(dependencies=(SingleAddress(directory=u'testprojects/src/java/org/pantsbuild/testproject/cycle1', name=u'cycle1'),)), =TransitiveHydratedTargets)
    Computing Task(transitive_hydrated_target, testprojects/src/java/org/pantsbuild/testproject/cycle1:cycle1, =TransitiveHydratedTarget)
      Computing Task(transitive_hydrated_target, testprojects/src/java/org/pantsbuild/testproject/cycle2:cycle2, =TransitiveHydratedTarget)
        Throw(No source of required dependency: Dep graph contained a cycle.)
          Traceback (no traceback):
            <pants native internals>
          Exception: No source of required dependency: Dep graph contained a cycle.
```
_(more polish needed here: re-opened #3695 to clean up `trace`)_

And time taken is proportional to the total number of matched targets, rather than to the sum of the closure sizes of the roots:
```
$ time ./pants --target-spec-file=targets.txt list | wc -l
    1500

real	0m15.297s
user	0m14.603s
sys	0m1.625s

$ time ./pants --target-spec-file=targets.txt list | wc -l
    1500

real	0m5.989s
user	0m5.261s
sys	0m1.310s
```

Runtimes for things like `./pants list ::` are unaffected, although memory usage increases by about 5%, likely due to the branchier resulting `Graph`.
  • Loading branch information
Stu Hood authored and stuhood committed Mar 20, 2018
1 parent df0e9a6 commit 1267b39
Show file tree
Hide file tree
Showing 25 changed files with 232 additions and 481 deletions.
4 changes: 2 additions & 2 deletions src/python/pants/bin/engine_initializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pants.base.build_environment import get_buildroot, get_scm
from pants.base.file_system_project_tree import FileSystemProjectTree
from pants.base.target_roots import ChangedTargetRoots, LiteralTargetRoots
from pants.engine.build_files import BuildFileAddresses, create_graph_rules
from pants.engine.build_files import BuildFileAddresses, Specs, create_graph_rules
from pants.engine.fs import create_fs_rules
from pants.engine.isolated_process import create_process_rules
from pants.engine.legacy.address_mapper import LegacyAddressMapper
Expand Down Expand Up @@ -84,7 +84,7 @@ def warm_product_graph(self, target_roots):
if type(target_roots) is ChangedTargetRoots:
subjects = [BuildFileAddresses(target_roots.addresses)]
elif type(target_roots) is LiteralTargetRoots:
subjects = target_roots.specs
subjects = [Specs(tuple(target_roots.specs))]
else:
raise ValueError('Unexpected TargetRoots type: `{}`.'.format(target_roots))
request = self.scheduler.execution_request([TransitiveHydratedTargets], subjects)
Expand Down
3 changes: 2 additions & 1 deletion src/python/pants/engine/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,15 @@ python_library(
':struct',
'src/python/pants/base:project_tree',
'src/python/pants/build_graph',
'src/python/pants/util:dirutil',
'src/python/pants/util:objects',
]
)

python_library(
name='mapper',
sources=['mapper.py'],
dependencies=[
'3rdparty/python:pathspec',
':objects',
':parser',
'src/python/pants/build_graph',
Expand Down
158 changes: 91 additions & 67 deletions src/python/pants/engine/build_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,25 +12,21 @@

from pants.base.project_tree import Dir
from pants.base.specs import (AscendantAddresses, DescendantAddresses, SiblingAddresses,
SingleAddress)
SingleAddress, Spec)
from pants.build_graph.address import Address, BuildFileAddress
from pants.build_graph.address_lookup_error import AddressLookupError
from pants.engine.addressable import (AddressableDescriptor, BuildFileAddresses, Collection,
Exactly, TypeConstraintError)
TypeConstraintError)
from pants.engine.fs import FilesContent, PathGlobs, Snapshot
from pants.engine.mapper import AddressFamily, AddressMap, AddressMapper, ResolveError
from pants.engine.objects import Locatable, SerializableFactory, Validatable
from pants.engine.rules import RootRule, SingletonRule, TaskRule, rule
from pants.engine.selectors import Select, SelectDependencies, SelectProjection
from pants.engine.struct import Struct
from pants.util.dirutil import fast_relpath_optional
from pants.util.objects import datatype


_SPECS_CONSTRAINT = Exactly(SingleAddress,
SiblingAddresses,
DescendantAddresses,
AscendantAddresses)


class ResolvedTypeMismatchError(ResolveError):
"""Indicates a resolved object was not of the expected type."""

Expand All @@ -52,6 +48,10 @@ class BuildFileGlobs(datatype('BuildFilesGlobs', ['path_globs'])):
"""A wrapper around PathGlobs that are known to match a build file pattern."""


class Specs(Collection.of(Spec)):
"""A collection of Spec subclasses."""


@rule(BuildFiles,
[SelectProjection(FilesContent, PathGlobs, 'path_globs', BuildFileGlobs)])
def build_files(files_content):
Expand All @@ -60,8 +60,10 @@ def build_files(files_content):

@rule(BuildFileGlobs, [Select(AddressMapper), Select(Dir)])
def buildfile_path_globs_for_dir(address_mapper, directory):
patterns = address_mapper.build_patterns
return BuildFileGlobs(PathGlobs.create(directory.path, include=patterns, exclude=()))
patterns = tuple(join(directory.path, p) for p in address_mapper.build_patterns)
return BuildFileGlobs(PathGlobs.create('',
include=patterns,
exclude=address_mapper.build_ignore_patterns))


@rule(AddressFamily, [Select(AddressMapper), Select(Dir), Select(BuildFiles)])
Expand All @@ -74,11 +76,7 @@ def parse_address_family(address_mapper, path, build_files):
if not files_content:
raise ResolveError('Directory "{}" does not contain build files.'.format(path))
address_maps = []
paths = (f.path for f in files_content)
ignored_paths = set(address_mapper.build_ignore_patterns.match_files(paths))
for filecontent_product in files_content:
if filecontent_product.path in ignored_paths:
continue
address_maps.append(AddressMap.parse(filecontent_product.path,
filecontent_product.content,
address_mapper.parser))
Expand Down Expand Up @@ -232,43 +230,74 @@ def _hydrate(item_type, spec_path, **kwargs):
@rule(BuildFileAddresses,
[Select(AddressMapper),
SelectDependencies(AddressFamily, BuildDirs, field_types=(Dir,)),
Select(_SPECS_CONSTRAINT)])
def addresses_from_address_families(address_mapper, address_families, spec):
"""Given a list of AddressFamilies and a Spec, return matching Addresses.
Select(Specs)])
def addresses_from_address_families(address_mapper, address_families, specs):
"""Given a list of AddressFamilies matching a list of Specs, return matching Addresses.
Raises a ResolveError if:
Raises a AddressLookupError if:
- there were no matching AddressFamilies, or
- the Spec matches no addresses for SingleAddresses.
"""

def raise_if_empty_address_families():
if not address_families:
raise ResolveError('Path "{}" contains no BUILD files.'.format(spec.directory))
# NB: `@memoized` does not work on local functions.
def by_directory():
if by_directory.cached is None:
by_directory.cached = {af.namespace: af for af in address_families}
return by_directory.cached
by_directory.cached = None

def raise_empty_address_family(spec):
raise ResolveError('Path "{}" contains no BUILD files.'.format(spec.directory))

def exclude_address(address):
if address_mapper.exclude_patterns:
address_str = address.spec
return any(p.search(address_str) is not None for p in address_mapper.exclude_patterns)
return False

def all_included_addresses():
return (a
for af in address_families
for a in af.addressables.keys()
if not exclude_address(a))

if type(spec) in (DescendantAddresses, SiblingAddresses):
raise_if_empty_address_families()
addresses = tuple(all_included_addresses())
elif type(spec) is SingleAddress:
raise_if_empty_address_families()
addresses = tuple(a for a in all_included_addresses() if a.target_name == spec.name)
if not addresses and len(address_families) == 1:
_raise_did_you_mean(address_families[0], spec.name)
elif type(spec) is AscendantAddresses:
addresses = tuple(all_included_addresses())
else:
raise ValueError('Unrecognized Spec type: {}'.format(spec))
addresses = []
included = set()
def include(address_families, predicate=None):
matched = False
for af in address_families:
for a in af.addressables.keys():
if a in included:
continue
if not exclude_address(a) and (predicate is None or predicate(a)):
matched = True
addresses.append(a)
included.add(a)
return matched

for spec in specs.dependencies:
if type(spec) is DescendantAddresses:
matched = include(
af
for af in address_families
if fast_relpath_optional(af.namespace, spec.directory) is not None
)
if not matched:
raise AddressLookupError(
'Spec {} does not match any targets.'.format(spec))
elif type(spec) is SiblingAddresses:
address_family = by_directory().get(spec.directory)
if not address_family:
raise_empty_address_family(spec)
include([address_family])
elif type(spec) is SingleAddress:
address_family = by_directory().get(spec.directory)
if not address_family:
raise_empty_address_family(spec)
if not include([address_family], predicate=lambda a: a.target_name == spec.name):
_raise_did_you_mean(address_family, spec.name)
elif type(spec) is AscendantAddresses:
include(
af
for af in address_families
if fast_relpath_optional(spec.directory, af.namespace) is not None
)
else:
raise ValueError('Unrecognized Spec type: {}'.format(spec))

return BuildFileAddresses(addresses)

Expand All @@ -277,30 +306,28 @@ def all_included_addresses():
def filter_build_dirs(address_mapper, snapshot):
"""Given a Snapshot matching a build pattern, return parent directories as BuildDirs."""
dirnames = set(dirname(f.stat.path) for f in snapshot.files)
ignored_dirnames = address_mapper.build_ignore_patterns.match_files('{}/'.format(dirname) for dirname in dirnames)
ignored_dirnames = set(d.rstrip('/') for d in ignored_dirnames)
return BuildDirs(tuple(Dir(d) for d in dirnames if d not in ignored_dirnames))


@rule(PathGlobs, [Select(AddressMapper), Select(_SPECS_CONSTRAINT)])
def spec_to_globs(address_mapper, spec):
"""Given a Spec object, return a PathGlobs object for the build files that it matches."""
if type(spec) is DescendantAddresses:
directory = spec.directory
patterns = [join('**', pattern) for pattern in address_mapper.build_patterns]
elif type(spec) in (SiblingAddresses, SingleAddress):
directory = spec.directory
patterns = address_mapper.build_patterns
elif type(spec) is AscendantAddresses:
directory = ''
patterns = [
join(f, pattern)
for pattern in address_mapper.build_patterns
for f in _recursive_dirname(spec.directory)
]
else:
raise ValueError('Unrecognized Spec type: {}'.format(spec))
return PathGlobs.create(directory, include=patterns, exclude=[])
return BuildDirs(tuple(Dir(d) for d in dirnames))


@rule(PathGlobs, [Select(AddressMapper), Select(Specs)])
def spec_to_globs(address_mapper, specs):
"""Given a Spec object, return a PathGlobs object for the build files that it matches.
"""
patterns = set()
for spec in specs.dependencies:
if type(spec) is DescendantAddresses:
patterns.update(join(spec.directory, '**', pattern)
for pattern in address_mapper.build_patterns)
elif type(spec) in (SiblingAddresses, SingleAddress):
patterns.update(join(spec.directory, pattern)
for pattern in address_mapper.build_patterns)
elif type(spec) is AscendantAddresses:
patterns.update(join(f, pattern)
for pattern in address_mapper.build_patterns
for f in _recursive_dirname(spec.directory))
else:
raise ValueError('Unrecognized Spec type: {}'.format(spec))
return PathGlobs.create('', include=patterns, exclude=address_mapper.build_ignore_patterns)


def _recursive_dirname(f):
Expand Down Expand Up @@ -370,8 +397,5 @@ def create_graph_rules(address_mapper, symbol_table):
RootRule(Address),
RootRule(BuildFileAddress),
RootRule(BuildFileAddresses),
RootRule(AscendantAddresses),
RootRule(DescendantAddresses),
RootRule(SiblingAddresses),
RootRule(SingleAddress),
RootRule(Specs),
]
57 changes: 29 additions & 28 deletions src/python/pants/engine/legacy/address_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@

from pants.base.build_file import BuildFile
from pants.base.specs import DescendantAddresses, SiblingAddresses
from pants.build_graph.address_lookup_error import AddressLookupError
from pants.build_graph.address_mapper import AddressMapper
from pants.engine.addressable import BuildFileAddresses
from pants.engine.build_files import BuildFilesCollection
from pants.engine.build_files import BuildFilesCollection, Specs
from pants.engine.mapper import ResolveError
from pants.engine.nodes import Throw
from pants.util.dirutil import fast_relpath
Expand All @@ -32,16 +33,12 @@ def __init__(self, scheduler, build_root):
self._build_root = build_root

def scan_build_files(self, base_path):
request = self._scheduler.execution_request([BuildFilesCollection], [(DescendantAddresses(base_path))])

result = self._scheduler.execute(request)
if result.error:
raise result.error
specs = (DescendantAddresses(base_path),)
build_files_collection, = self._scheduler.product_request(BuildFilesCollection, [Specs(specs)])

build_files_set = set()
for _, state in result.root_products:
for build_files in state.value.dependencies:
build_files_set.update(f.path for f in build_files.files_content.dependencies)
for build_files in build_files_collection.dependencies:
build_files_set.update(f.path for f in build_files.files_content.dependencies)

return build_files_set

Expand All @@ -68,30 +65,34 @@ def addresses_in_spec_path(self, spec_path):
def scan_specs(self, specs, fail_fast=True):
return self._internal_scan_specs(specs, fail_fast=fail_fast, missing_is_fatal=True)

def _specs_string(self, specs):
return ', '.join(s.to_spec_string() for s in specs)

def _internal_scan_specs(self, specs, fail_fast=True, missing_is_fatal=True):
request = self._scheduler.execution_request([BuildFileAddresses], specs)
# TODO: This should really use `product_request`, but on the other hand, we need to
# deprecate the entire `AddressMapper` interface anyway. See #4769.
request = self._scheduler.execution_request([BuildFileAddresses], [Specs(tuple(specs))])
result = self._scheduler.execute(request)
if result.error:
raise self.BuildFileScanError(str(result.error))

addresses = set()
for (spec, _), state in result.root_products:
if isinstance(state, Throw):
if isinstance(state.exc, ResolveError):
if missing_is_fatal:
raise self.BuildFileScanError(
'Spec `{}` does not match any targets.\n{}'.format(spec.to_spec_string(), str(state.exc)))
else:
# NB: ignore Throws containing ResolveErrors because they are due to missing targets / files
continue
(_, state), = result.root_products

if isinstance(state, Throw):
if isinstance(state.exc, (AddressLookupError, ResolveError)):
if missing_is_fatal:
raise self.BuildFileScanError(
'Spec `{}` does not match any targets.\n{}'.format(
self._specs_string(specs), str(state.exc)))
else:
raise self.BuildFileScanError(str(state.exc))
elif missing_is_fatal and not state.value.dependencies:
raise self.BuildFileScanError(
'Spec `{}` does not match any targets.'.format(spec.to_spec_string()))

addresses.update(state.value.dependencies)
return addresses
# NB: ignore Throws containing ResolveErrors because they are due to missing targets / files
return set()
else:
raise self.BuildFileScanError(str(state.exc))
elif missing_is_fatal and not state.value.dependencies:
raise self.BuildFileScanError(
'Spec `{}` does not match any targets.'.format(self._specs_string(specs)))

return set(state.value.dependencies)

def scan_addresses(self, root=None):
if root:
Expand Down
Loading

0 comments on commit 1267b39

Please sign in to comment.