Skip to content

Commit

Permalink
[jvm] Avoid filename collisions for coursier's fetched files (#13607)
Browse files Browse the repository at this point in the history
Make filename collisions less likely by including the group in the name. Additionally, dedupe file copies during post-processing, since `coursier` will occasionally report duplicates with the same source and dest.

[ci skip-build-wheels]
[ci skip-rust]
  • Loading branch information
stuhood authored Nov 13, 2021
1 parent 77e8a6e commit 66673f0
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 34 deletions.
15 changes: 5 additions & 10 deletions src/python/pants/jvm/goals/coursier_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,7 @@
from pants.jvm.target_types import JvmArtifact, JvmDependencyLockfile
from pants.jvm.testutil import maybe_skip_jdk_test
from pants.jvm.util_rules import rules as util_rules
from pants.testutil.rule_runner import PYTHON_BOOTSTRAP_ENV, RuleRunner, logging
from pants.util.logging import LogLevel
from pants.testutil.rule_runner import PYTHON_BOOTSTRAP_ENV, RuleRunner

HAMCREST_COORD = Coordinate(
group="org.hamcrest",
Expand Down Expand Up @@ -68,7 +67,6 @@ def rule_runner() -> RuleRunner:
return rule_runner


@logging(level=LogLevel.DEBUG)
@maybe_skip_jdk_test
def test_coursier_resolve_creates_missing_lockfile(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
Expand Down Expand Up @@ -99,7 +97,7 @@ def test_coursier_resolve_creates_missing_lockfile(rule_runner: RuleRunner) -> N
entries=(
CoursierLockfileEntry(
coord=HAMCREST_COORD,
file_name="hamcrest-core-1.3.jar",
file_name="org.hamcrest_hamcrest-core_1.3.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand All @@ -115,14 +113,13 @@ def test_coursier_resolve_creates_missing_lockfile(rule_runner: RuleRunner) -> N
)


@logging(level=LogLevel.DEBUG)
@maybe_skip_jdk_test
def test_coursier_resolve_noop_does_not_touch_lockfile(rule_runner: RuleRunner) -> None:
expected_lockfile = CoursierResolvedLockfile(
entries=(
CoursierLockfileEntry(
coord=HAMCREST_COORD,
file_name="hamcrest-core-1.3.jar",
file_name="org.hamcrest_hamcrest-core_1.3.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand Down Expand Up @@ -163,7 +160,6 @@ def test_coursier_resolve_noop_does_not_touch_lockfile(rule_runner: RuleRunner)
assert result.stderr == ""


@logging(level=LogLevel.DEBUG)
@maybe_skip_jdk_test
def test_coursier_resolve_updates_lockfile(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
Expand Down Expand Up @@ -199,7 +195,7 @@ def test_coursier_resolve_updates_lockfile(rule_runner: RuleRunner) -> None:
entries=(
CoursierLockfileEntry(
coord=HAMCREST_COORD,
file_name="hamcrest-core-1.3.jar",
file_name="org.hamcrest_hamcrest-core_1.3.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand All @@ -215,7 +211,6 @@ def test_coursier_resolve_updates_lockfile(rule_runner: RuleRunner) -> None:
)


@logging(level=LogLevel.DEBUG)
@maybe_skip_jdk_test
def test_coursier_resolve_updates_bogus_lockfile(rule_runner: RuleRunner) -> None:
rule_runner.write_files(
Expand Down Expand Up @@ -251,7 +246,7 @@ def test_coursier_resolve_updates_bogus_lockfile(rule_runner: RuleRunner) -> Non
entries=(
CoursierLockfileEntry(
coord=HAMCREST_COORD,
file_name="hamcrest-core-1.3.jar",
file_name="org.hamcrest_hamcrest-core_1.3.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand Down
2 changes: 1 addition & 1 deletion src/python/pants/jvm/jdk_rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ async def setup_jdk(coursier: Coursier, javac: JavacSubsystem, bash: BashBinary)
ClasspathEntry,
CoursierLockfileEntry(
coord=Coordinate.from_coord_str("com.martiansoftware:nailgun-server:0.9.1"),
file_name="nailgun-server-0.9.1.jar",
file_name="com.martiansoftware_nailgun-server_0.9.1.jar",
direct_dependencies=Coordinates(),
dependencies=Coordinates(),
file_digest=FileDigest(
Expand Down
23 changes: 17 additions & 6 deletions src/python/pants/jvm/resolve/coursier_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import os
from dataclasses import dataclass
from functools import reduce
from pathlib import PurePath
from typing import Any, Iterable, Iterator

from pants.base.glob_match_error_behavior import GlobMatchErrorBehavior
Expand Down Expand Up @@ -283,6 +282,16 @@ def to_json(self) -> bytes:
)


def classpath_dest_filename(coord: str, src_filename: str) -> str:
"""Calculates the destination filename on the classpath for the given source filename and coord.
TODO: This is duplicated in `COURSIER_POST_PROCESSING_SCRIPT`.
"""
dest_name = coord.replace(":", "_")
_, ext = os.path.splitext(src_filename)
return f"{dest_name}{ext}"


@rule(level=LogLevel.DEBUG)
async def coursier_resolve_lockfile(
bash: BashBinary,
Expand Down Expand Up @@ -354,7 +363,9 @@ async def coursier_resolve_lockfile(
report_contents = await Get(DigestContents, Digest, report_digest)
report = json.loads(report_contents[0].content)

artifact_file_names = tuple(PurePath(dep["file"]).name for dep in report["dependencies"])
artifact_file_names = tuple(
classpath_dest_filename(dep["coord"], dep["file"]) for dep in report["dependencies"]
)
artifact_output_paths = tuple(f"classpath/{file_name}" for file_name in artifact_file_names)
artifact_digests = await MultiGet(
Get(Digest, DigestSubset(process_result.output_digest, PathGlobs([output_path])))
Expand Down Expand Up @@ -483,22 +494,22 @@ async def coursier_fetch_one_coord(
f'Coursier resolved coord "{resolved_coord.to_coord_str()}" does not match requested coord "{request.coord.to_coord_str()}".'
)

file_path = PurePath(dep["file"])
classpath_dest = f"classpath/{file_path.name}"
classpath_dest_name = classpath_dest_filename(dep["coord"], dep["file"])
classpath_dest = f"classpath/{classpath_dest_name}"

resolved_file_digest = await Get(
Digest, DigestSubset(process_result.output_digest, PathGlobs([classpath_dest]))
)
stripped_digest = await Get(Digest, RemovePrefix(resolved_file_digest, "classpath"))
file_digest = await Get(
FileDigest,
ExtractFileDigest(stripped_digest, file_path.name),
ExtractFileDigest(stripped_digest, classpath_dest_name),
)
if file_digest != request.file_digest:
raise CoursierError(
f"Coursier fetch for '{resolved_coord}' succeeded, but fetched artifact {file_digest} did not match the expected artifact: {request.file_digest}."
)
return ClasspathEntry(digest=stripped_digest, filenames=(file_path.name,))
return ClasspathEntry(digest=stripped_digest, filenames=(classpath_dest_name,))


@rule(level=LogLevel.DEBUG)
Expand Down
21 changes: 11 additions & 10 deletions src/python/pants/jvm/resolve/coursier_fetch_integration_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_resolve_with_no_deps(rule_runner: RuleRunner) -> None:
entries=(
CoursierLockfileEntry(
coord=HAMCREST_COORD,
file_name="hamcrest-core-1.3.jar",
file_name="org.hamcrest_hamcrest-core_1.3.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand All @@ -101,7 +101,7 @@ def test_resolve_with_transitive_deps(rule_runner: RuleRunner) -> None:
entries=(
CoursierLockfileEntry(
coord=junit_coord,
file_name="junit-4.13.2.jar",
file_name="junit_junit_4.13.2.jar",
direct_dependencies=Coordinates([HAMCREST_COORD]),
dependencies=Coordinates([HAMCREST_COORD]),
file_digest=FileDigest(
Expand All @@ -111,7 +111,7 @@ def test_resolve_with_transitive_deps(rule_runner: RuleRunner) -> None:
),
CoursierLockfileEntry(
coord=HAMCREST_COORD,
file_name="hamcrest-core-1.3.jar",
file_name="org.hamcrest_hamcrest-core_1.3.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand Down Expand Up @@ -140,7 +140,7 @@ def test_resolve_with_inexact_coord(rule_runner: RuleRunner) -> None:
entries=(
CoursierLockfileEntry(
coord=Coordinate(group="junit", artifact="junit", version="4.8.2"),
file_name="junit-4.8.2.jar",
file_name="junit_junit_4.8.2.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand Down Expand Up @@ -178,7 +178,7 @@ def test_fetch_one_coord_with_no_deps(rule_runner: RuleRunner) -> None:
[
CoursierLockfileEntry(
coord=HAMCREST_COORD,
file_name="hamcrest-core-1.3.jar",
file_name="org.hamcrest_hamcrest-core_1.3.jar",
direct_dependencies=Coordinates([]),
dependencies=Coordinates([]),
file_digest=FileDigest(
Expand All @@ -188,9 +188,10 @@ def test_fetch_one_coord_with_no_deps(rule_runner: RuleRunner) -> None:
)
],
)
assert classpath_entry.filenames == ("hamcrest-core-1.3.jar",)
assert classpath_entry.filenames == ("org.hamcrest_hamcrest-core_1.3.jar",)
file_digest = rule_runner.request(
FileDigest, [ExtractFileDigest(classpath_entry.digest, "hamcrest-core-1.3.jar")]
FileDigest,
[ExtractFileDigest(classpath_entry.digest, "org.hamcrest_hamcrest-core_1.3.jar")],
)
assert file_digest == FileDigest(
fingerprint="66fdef91e9739348df7a096aa384a5685f4e875584cce89386a7a47251c4d8e9",
Expand All @@ -206,7 +207,7 @@ def test_fetch_one_coord_with_transitive_deps(rule_runner: RuleRunner) -> None:
[
CoursierLockfileEntry(
coord=junit_coord,
file_name="junit-4.13.2.jar",
file_name="junit_junit_4.13.2.jar",
direct_dependencies=Coordinates([HAMCREST_COORD]),
dependencies=Coordinates([HAMCREST_COORD]),
file_digest=FileDigest(
Expand All @@ -216,9 +217,9 @@ def test_fetch_one_coord_with_transitive_deps(rule_runner: RuleRunner) -> None:
)
],
)
assert classpath_entry.filenames == ("junit-4.13.2.jar",)
assert classpath_entry.filenames == ("junit_junit_4.13.2.jar",)
file_digest = rule_runner.request(
FileDigest, [ExtractFileDigest(classpath_entry.digest, "junit-4.13.2.jar")]
FileDigest, [ExtractFileDigest(classpath_entry.digest, "junit_junit_4.13.2.jar")]
)
assert file_digest == FileDigest(
fingerprint="8e495b634469d64fb8acfa3495a065cbacc8a0fff55ce1e31007be4c16dc57d3",
Expand Down
28 changes: 21 additions & 7 deletions src/python/pants/jvm/resolve/coursier_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,33 @@
"""\
import json
import sys
import os
from pathlib import PurePath
from shutil import copyfile
report = json.load(open(sys.argv[1]))
classpath = set()
# Mapping from dest path to source path. It is ok to capture the same output filename multiple
# times if the source is the same as well.
classpath = dict()
for dep in report['dependencies']:
file_path = PurePath(dep['file'])
classpath_dest = f"classpath/{file_path.name}"
if classpath_dest in classpath:
raise Exception(f"Found duplicate jar name {file_path.name}, which isn't currently supported")
classpath.add(classpath_dest)
copyfile(file_path, classpath_dest)
source = PurePath(dep['file'])
dest_name = dep['coord'].replace(":", "_")
_, ext = os.path.splitext(source)
classpath_dest = f"classpath/{dest_name}{ext}"
existing_source = classpath.get(classpath_dest)
if existing_source:
if existing_source == source:
# We've already captured this file.
continue
raise Exception(
f"Duplicate jar name {classpath_dest} with incompatible source:\\n"
f" {source}\\n"
f" {existing_source}\\n"
)
classpath[classpath_dest] = source
copyfile(source, classpath_dest)
"""
)

Expand Down

0 comments on commit 66673f0

Please sign in to comment.