Skip to content

Commit

Permalink
[internal] Improve Go performance by running fewer processes to analy…
Browse files Browse the repository at this point in the history
…ze external packages (#13128)

We were running `go list -json` much more frequently than necessary. We run it once per module to generate `_go_external_package` targets, but then we would rerun it on each package within the module. Those each often took 5-9 seconds, so this was atrocious for performance. Instead, we can simply run it once per module and leverage the engine's memoization to avoid re-running.

This refactors our external module code so that it no longer interacts at all with the Target API, which reduces coupling. We rename things like the file to `external_pkg.py` so that only the `ExternalModuleInfo` and `ExternalPkgInfo` types are exposed. Notably, we no longer use `ResolvedGoPackage` for external code, which allows us to ignore lots of irrelevant fields like `test_imports`.

### `dependencies` benchmark

This requires changing this line to use `Targets` so that generated targets have their dependencies resolved too:

https://github.com/pantsbuild/pants/blob/a744978d2badd06509cc6e8091c14220bed3aff6/src/python/pants/backend/project_info/dependencies.py#L94

Before (I aborted after 1 run because it was so slow): 

```
❯ hyperfine -r 5 './pants_from_sources --no-pantsd --no-process-execution-local-cache dependencies ::'
Current estimate: 344.640 s
```

After:

```
❯ hyperfine -r 5 './pants_from_sources --no-pantsd --no-process-execution-local-cache dependencies ::'
  Time (mean ± σ):     26.501 s ±  1.537 s    [User: 29.554 s, System: 24.115 s]
  Range (min … max):   24.928 s … 28.763 s    5 runs
```

### `package` benchmark

Compilation is faster now too, as we no longer need to run `go list` to determine the `.go` and `.s` files. It's already eagerly computed beforehand.

Before:

```
❯ hyperfine -r 5 './pants_from_sources --no-pantsd --no-process-execution-local-cache package ::'
Benchmark #1: ./pants_from_sources --no-pantsd --no-process-execution-local-cache package ::
  Time (mean ± σ):     54.981 s ±  1.441 s    [User: 70.577 s, System: 81.823 s]
  Range (min … max):   53.426 s … 57.188 s    5 runs
```

After:

```
❯ hyperfine -r 5 './pants_from_sources --no-pantsd --no-process-execution-local-cache package ::'
  Time (mean ± σ):     33.777 s ±  0.248 s    [User: 39.221 s, System: 39.389 s]
  Range (min … max):   33.517 s … 34.062 s    5 runs
```

[ci skip-rust]
[ci skip-build-wheels]
  • Loading branch information
Eric-Arellano authored Oct 6, 2021
1 parent 45a144c commit 5d4ba99
Show file tree
Hide file tree
Showing 11 changed files with 327 additions and 307 deletions.
4 changes: 2 additions & 2 deletions src/python/pants/backend/experimental/go/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
assembly,
build_go_pkg,
compile,
external_module,
external_pkg,
go_mod,
go_pkg,
import_analysis,
Expand All @@ -36,7 +36,7 @@ def rules():
*assembly.rules(),
*build_go_pkg.rules(),
*compile.rules(),
*external_module.rules(),
*external_pkg.rules(),
*golang.rules(),
*import_analysis.rules(),
*go_mod.rules(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
assembly,
build_go_pkg,
compile,
external_module,
external_pkg,
go_mod,
go_pkg,
import_analysis,
Expand Down Expand Up @@ -46,7 +46,7 @@ def rule_runner() -> RuleRunner:
*go_mod.rules(),
*link.rules(),
*target_type_rules.rules(),
*external_module.rules(),
*external_pkg.rules(),
*sdk.rules(),
QueryRule(BuiltPackage, (GoBinaryFieldSet,)),
],
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/go/goals/tailor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
)
from pants.backend.go.goals.tailor import rules as go_tailor_rules
from pants.backend.go.target_types import GoModTarget, GoPackage
from pants.backend.go.util_rules import external_module, go_mod, sdk
from pants.backend.go.util_rules import external_pkg, go_mod, sdk
from pants.core.goals.tailor import (
AllOwnedSources,
PutativeTarget,
Expand All @@ -31,7 +31,7 @@ def rule_runner() -> RuleRunner:
*go_tailor_rules(),
*external_tool.rules(),
*source_files.rules(),
*external_module.rules(),
*external_pkg.rules(),
*go_mod.rules(),
*sdk.rules(),
*target_type_rules.rules(),
Expand Down
66 changes: 29 additions & 37 deletions src/python/pants/backend/go/target_type_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@
GoPackageSources,
)
from pants.backend.go.util_rules import go_pkg, import_analysis
from pants.backend.go.util_rules.external_module import (
ExternalModulePkgImportPaths,
ExternalModulePkgImportPathsRequest,
ResolveExternalGoPackageRequest,
from pants.backend.go.util_rules.external_pkg import (
ExternalModuleInfo,
ExternalModuleInfoRequest,
ExternalPkgInfo,
ExternalPkgInfoRequest,
)
from pants.backend.go.util_rules.go_mod import (
GoModInfo,
GoModInfoRequest,
ModuleDescriptor,
OwningGoMod,
OwningGoModRequest,
)
Expand Down Expand Up @@ -189,30 +189,30 @@ class InjectGoExternalPackageDependenciesRequest(InjectDependenciesRequest):
inject_for = GoExternalPackageDependencies


# TODO(#12761): This duplicates first-party dependency inference but that other rule cannot operate
# on _go_external_package targets since there is no sources field in a _go_external_package.
# Consider how to merge the inference/injection rules into one. Maybe use a private Sources field?
@rule
async def inject_go_external_package_dependencies(
request: InjectGoExternalPackageDependenciesRequest,
std_lib_imports: GoStdLibImports,
package_mapping: GoImportPathToPackageMapping,
) -> InjectedDependencies:
wrapped_target = await Get(WrappedTarget, Address, request.dependencies_field.address)
addr = request.dependencies_field.address
wrapped_target = await Get(WrappedTarget, Address, addr)
tgt = wrapped_target.target
assert isinstance(tgt, GoExternalPackageTarget)

owning_go_mod = await Get(OwningGoMod, OwningGoModRequest(tgt.address))
owning_go_mod = await Get(OwningGoMod, OwningGoModRequest(addr))
go_mod_info = await Get(GoModInfo, GoModInfoRequest(owning_go_mod.address))

this_go_package = await Get(
ResolvedGoPackage, ResolveExternalGoPackageRequest(tgt, go_mod_info.stripped_digest)
pkg_info = await Get(
ExternalPkgInfo,
ExternalPkgInfoRequest(
module_path=tgt[GoExternalModulePathField].value,
version=tgt[GoExternalModuleVersionField].value,
import_path=tgt[GoExternalPackageImportPathField].value,
go_mod_stripped_digest=go_mod_info.stripped_digest,
),
)

# Loop through all of the imports of this package and add dependencies on other packages and
# external modules.
inferred_dependencies = []
for import_path in this_go_package.imports + this_go_package.test_imports:
for import_path in pkg_info.imports:
if import_path in std_lib_imports:
continue

Expand All @@ -229,7 +229,7 @@ async def inject_go_external_package_dependencies(
else:
logger.debug(
f"Unable to infer dependency for import path '{import_path}' "
f"in go_external_package at address '{this_go_package.address}'."
f"in go_external_package at address '{addr}'."
)

return InjectedDependencies(inferred_dependencies)
Expand All @@ -250,10 +250,10 @@ async def generate_go_external_package_targets(
) -> GeneratedTargets:
generator_addr = request.generator.address
go_mod_info = await Get(GoModInfo, GoModInfoRequest(generator_addr))
all_pkg_import_paths = await MultiGet(
all_module_info = await MultiGet(
Get(
ExternalModulePkgImportPaths,
ExternalModulePkgImportPathsRequest(
ExternalModuleInfo,
ExternalModuleInfoRequest(
module_path=module_descriptor.path,
version=module_descriptor.version,
go_mod_stripped_digest=go_mod_info.stripped_digest,
Expand All @@ -262,31 +262,23 @@ async def generate_go_external_package_targets(
for module_descriptor in go_mod_info.modules
)

def create_tgt(
module_descriptor: ModuleDescriptor, pkg_import_path: str
) -> GoExternalPackageTarget:
def create_tgt(pkg_info: ExternalPkgInfo) -> GoExternalPackageTarget:
return GoExternalPackageTarget(
{
GoExternalModulePathField.alias: module_descriptor.path,
GoExternalModuleVersionField.alias: module_descriptor.version,
GoExternalPackageImportPathField.alias: pkg_import_path,
GoExternalModulePathField.alias: pkg_info.module_path,
GoExternalModuleVersionField.alias: pkg_info.version,
GoExternalPackageImportPathField.alias: pkg_info.import_path,
},
# E.g. `src/go:mod#github.com/google/uuid`.
Address(
generator_addr.spec_path,
target_name=generator_addr.target_name,
generated_name=pkg_import_path,
),
generator_addr.create_generated(pkg_info.import_path),
)

return GeneratedTargets(
request.generator,
(
create_tgt(module_descriptor, pkg_import_path)
for module_descriptor, pkg_import_paths in zip(
go_mod_info.modules, all_pkg_import_paths
)
for pkg_import_path in pkg_import_paths
create_tgt(pkg_info)
for module_info in all_module_info
for pkg_info in module_info.values()
),
)

Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/go/target_type_rules_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
GoPackage,
GoPackageSources,
)
from pants.backend.go.util_rules import external_module, go_mod, go_pkg, sdk
from pants.backend.go.util_rules import external_pkg, go_mod, go_pkg, sdk
from pants.base.exceptions import ResolveError
from pants.build_graph.address import Address
from pants.core.target_types import GenericTarget
Expand All @@ -53,7 +53,7 @@ def rule_runner() -> RuleRunner:
rules=[
*go_mod.rules(),
*go_pkg.rules(),
*external_module.rules(),
*external_pkg.rules(),
*sdk.rules(),
*target_type_rules.rules(),
QueryRule(Addresses, [DependenciesRequest]),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
assembly,
build_go_pkg,
compile,
external_module,
external_pkg,
go_mod,
go_pkg,
import_analysis,
Expand Down Expand Up @@ -46,7 +46,7 @@ def rule_runner() -> RuleRunner:
*go_mod.rules(),
*link.rules(),
*target_type_rules.rules(),
*external_module.rules(),
*external_pkg.rules(),
*sdk.rules(),
QueryRule(BuiltPackage, (GoBinaryFieldSet,)),
],
Expand Down
61 changes: 30 additions & 31 deletions src/python/pants/backend/go/util_rules/build_go_pkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pants.backend.go.target_types import (
GoExternalModulePathField,
GoExternalModuleVersionField,
GoExternalPackageTarget,
GoExternalPackageImportPathField,
GoPackageSources,
)
from pants.backend.go.util_rules.assembly import (
Expand All @@ -19,11 +19,7 @@
AssemblyPreCompilationRequest,
)
from pants.backend.go.util_rules.compile import CompiledGoSources, CompileGoSourcesRequest
from pants.backend.go.util_rules.external_module import (
DownloadedModule,
DownloadedModuleRequest,
ResolveExternalGoPackageRequest,
)
from pants.backend.go.util_rules.external_pkg import ExternalPkgInfo, ExternalPkgInfoRequest
from pants.backend.go.util_rules.go_mod import (
GoModInfo,
GoModInfoRequest,
Expand Down Expand Up @@ -75,40 +71,45 @@ async def build_go_package(request: BuildGoPackageRequest) -> BuiltGoPackage:
target = wrapped_target.target

if is_first_party_package_target(target):
source_files, resolved_package = await MultiGet(
_source_files, _resolved_package = await MultiGet(
Get(
SourceFiles,
SourceFilesRequest((target[GoPackageSources],)),
),
Get(ResolvedGoPackage, ResolveGoPackageRequest(address=target.address)),
)
source_files_digest = source_files.snapshot.digest
source_files_digest = _source_files.snapshot.digest
source_files_subpath = target.address.spec_path

original_import_path = _resolved_package.import_path
go_files = _resolved_package.go_files
s_files = _resolved_package.s_files

elif is_third_party_package_target(target):
assert isinstance(target, GoExternalPackageTarget)
original_import_path = target[GoExternalPackageImportPathField].value
_module_path = target[GoExternalModulePathField].value
source_files_subpath = original_import_path[len(_module_path) :]

_owning_go_mod = await Get(OwningGoMod, OwningGoModRequest(target.address))
_go_mod_info = await Get(GoModInfo, GoModInfoRequest(_owning_go_mod.address))
module_path = target[GoExternalModulePathField].value
_downloaded_module, resolved_package = await MultiGet(
Get(
DownloadedModule,
DownloadedModuleRequest(
module_path,
target[GoExternalModuleVersionField].value,
_go_mod_info.stripped_digest,
),
),
Get(
ResolvedGoPackage,
ResolveExternalGoPackageRequest(target, _go_mod_info.stripped_digest),
_pkg_info = await Get(
ExternalPkgInfo,
ExternalPkgInfoRequest(
import_path=original_import_path,
module_path=_module_path,
version=target[GoExternalModuleVersionField].value,
go_mod_stripped_digest=_go_mod_info.stripped_digest,
),
)
source_files_digest = _downloaded_module.digest
source_files_subpath = resolved_package.import_path[len(module_path) :]

source_files_digest = _pkg_info.digest
go_files = _pkg_info.go_files
s_files = _pkg_info.s_files

else:
raise AssertionError(f"Unknown how to build target at address {request.address} with Go.")

import_path = "main" if request.is_main else resolved_package.import_path
import_path = "main" if request.is_main else original_import_path

# TODO: If you use `Targets` here, then we replace the direct dep on the `go_mod` with all
# of its generated targets...Figure this out.
Expand Down Expand Up @@ -138,12 +139,10 @@ async def build_go_package(request: BuildGoPackageRequest) -> BuiltGoPackage:

assembly_digests = None
symabis_path = None
if resolved_package.s_files:
if s_files:
assembly_setup = await Get(
AssemblyPreCompilation,
AssemblyPreCompilationRequest(
input_digest, resolved_package.s_files, source_files_subpath
),
AssemblyPreCompilationRequest(input_digest, s_files, source_files_subpath),
)
input_digest = assembly_setup.merged_compilation_input_digest
assembly_digests = assembly_setup.assembly_digests
Expand All @@ -153,7 +152,7 @@ async def build_go_package(request: BuildGoPackageRequest) -> BuiltGoPackage:
CompiledGoSources,
CompileGoSourcesRequest(
digest=input_digest,
sources=tuple(f"./{source_files_subpath}/{name}" for name in resolved_package.go_files),
sources=tuple(f"./{source_files_subpath}/{name}" for name in go_files),
import_path=import_path,
description=f"Compile Go package: {import_path}",
import_config_path=import_config.CONFIG_PATH,
Expand All @@ -167,7 +166,7 @@ async def build_go_package(request: BuildGoPackageRequest) -> BuiltGoPackage:
AssemblyPostCompilationRequest(
compilation_digest,
assembly_digests,
resolved_package.s_files,
s_files,
source_files_subpath,
),
)
Expand Down
4 changes: 2 additions & 2 deletions src/python/pants/backend/go/util_rules/build_go_pkg_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
assembly,
build_go_pkg,
compile,
external_module,
external_pkg,
go_mod,
go_pkg,
import_analysis,
Expand All @@ -41,7 +41,7 @@ def rule_runner() -> RuleRunner:
*import_analysis.rules(),
*go_mod.rules(),
*go_pkg.rules(),
*external_module.rules(),
*external_pkg.rules(),
*target_type_rules.rules(),
QueryRule(BuiltGoPackage, [BuildGoPackageRequest]),
],
Expand Down
Loading

0 comments on commit 5d4ba99

Please sign in to comment.