Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

docker: include file dependencies in docker build context properly. #12758

Merged
merged 12 commits into from
Sep 8, 2021
26 changes: 7 additions & 19 deletions src/python/pants/backend/docker/docker_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,7 @@

from pants.backend.docker.docker_binary import DockerBinary, DockerBinaryRequest
from pants.backend.docker.docker_build_context import DockerBuildContext, DockerBuildContextRequest
from pants.backend.docker.target_types import (
DockerContextRoot,
DockerImageSources,
DockerImageVersion,
)
from pants.backend.docker.target_types import DockerImageSources, DockerImageVersion
from pants.core.goals.package import BuiltPackage, BuiltPackageArtifact, PackageFieldSet
from pants.engine.process import Process, ProcessResult
from pants.engine.rules import Get, MultiGet, collect_rules, rule
Expand All @@ -24,7 +20,6 @@
class DockerFieldSet(PackageFieldSet):
required_fields = (DockerImageSources,)

context_root_field: DockerContextRoot
image_version: DockerImageVersion
sources: DockerImageSources

Expand All @@ -38,17 +33,6 @@ def dockerfile_relpath(self) -> str:
def dockerfile_path(self) -> str:
return os.path.join(self.address.spec_path, self.dockerfile_relpath)

@property
def context_root(self) -> str:
"""Translates context_root from a users perspective in the BUILD file, to Dockers
perspective of the file system."""
path = self.context_root_field.value or "."
if os.path.isabs(path):
path = os.path.relpath(path, "/")
else:
path = os.path.join(self.address.spec_path, path)
return path

@property
def image_tag(self) -> str:
return ":".join(s for s in [self.address.target_name, self.image_version.value] if s)
Expand All @@ -61,7 +45,11 @@ async def build_docker_image(
docker, context = await MultiGet(
Get(DockerBinary, DockerBinaryRequest()),
Get(
DockerBuildContext, DockerBuildContextRequest(field_set.address, field_set.context_root)
DockerBuildContext,
DockerBuildContextRequest(
address=field_set.address,
build_upstream_images=True,
),
),
)

Expand All @@ -71,7 +59,7 @@ async def build_docker_image(
docker.build_image(
field_set.image_tag,
context.digest,
field_set.context_root,
".",
Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this arg to build_image() at all now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, we could leave that as a default arg, and drop it here to make it cleaner. Will do.

Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean can we just get rid of it entirely in the signature of build_image, since we only set it once, to a hard-coded value. We can just hard-code it in the process call?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure :)

field_set.dockerfile_path,
),
)
Expand Down
57 changes: 35 additions & 22 deletions src/python/pants/backend/docker/docker_build_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,22 @@

import logging
from dataclasses import dataclass
from itertools import chain

from pants.backend.docker.target_types import DockerImageSources
from pants.core.goals.package import BuiltPackage, PackageFieldSet
from pants.core.target_types import FilesSources, ResourcesSources
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
from pants.engine.addresses import Address
from pants.engine.fs import AddPrefix, Digest, MergeDigests
from pants.engine.fs import Digest, MergeDigests
from pants.engine.rules import Get, MultiGet, collect_rules, rule
from pants.engine.target import (
Dependencies,
DependenciesRequest,
FieldSetsPerTarget,
FieldSetsPerTargetRequest,
Sources,
Targets,
TransitiveTargets,
TransitiveTargetsRequest,
)
Expand All @@ -30,19 +34,34 @@ class DockerBuildContext:
@dataclass(frozen=True)
class DockerBuildContextRequest:
address: Address
context_root: str
build_upstream_images: bool = False
benjyw marked this conversation as resolved.
Show resolved Hide resolved


@rule
async def create_docker_build_context(request: DockerBuildContextRequest) -> DockerBuildContext:
# Get all targets to include in context.
transitive_targets = await Get(TransitiveTargets, TransitiveTargetsRequest([request.address]))

# Get all sources, going into the build context.
# Get all dependencies from those root targets.
all_dependencies = await MultiGet(
Get(Targets, DependenciesRequest(target.get(Dependencies), include_special_cased_deps=True))
Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why include_special_cased_deps=True? Was this on purpose, or copy-pasta? I don't actually think we want that. Special-cased deps are a weird thing, that possibly shouldn't exist, but right now they do things like make sure that if you depend on relocated_files you don't also depend on the original pre-relocation files.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Otherwise I didn't get files targets referenced from my dependencies.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, well, guess it was something else, cause my tests works fine without. Will remove, then.

for target in transitive_targets.roots
)

# Get Dockerfiles from all roots.
Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that there will be exactly one root, because transitive_targets was created from a single address.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Adjust comment?

Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, probably good to clarify for the casual reader.

dockerfiles_request = Get(
SourceFiles,
SourceFilesRequest(
sources_fields=[t.get(Sources) for t in transitive_targets.roots],
for_sources_types=(DockerImageSources,),
),
)
# Get all sources from all dependencies (i.e. files and resources).
sources_request = Get(
SourceFiles,
SourceFilesRequest(
sources_fields=[t.get(Sources) for t in transitive_targets.closure],
for_sources_types=(DockerImageSources, FilesSources, ResourcesSources),
sources_fields=[t.get(Sources) for t in chain(*all_dependencies)],
for_sources_types=(FilesSources, ResourcesSources),
Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this will now make sure that other Dockerfiles won't get pulled in via dependencies? Good!

Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we might as well go ahead and stop handling ResourcesSources here. As we discussed, it doesn't make much sense to do so.

Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, and we will still pull in the files() targets that we depend on via other docker_image targets, but as previously discussed this is a general problem that we don't need to solve here right now.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check on dropping resources here.

No, I have tests covering that they don't pull in files from other docker_images.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about archives?

Copy link
Sponsor Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think those should just work - they are packaged resources. Are you seeing otherwise?

I'm surprised that files from other images (that this one depends on) aren't being pulled in. What prevents it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't tried anything with archives yet. Great if it just works. :)

all_dependencies are not really all dependencies for all transient targets, just all dependencies for the roots, i.e. our docker_file.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, right. packaged resources, now I get it. Awesome. Yeah, that should work.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I rephrased the comments and renamed a few variables to make things hopefully clearer.

),
)

Expand All @@ -51,37 +70,31 @@ async def create_docker_build_context(request: DockerBuildContextRequest) -> Doc
FieldSetsPerTargetRequest(PackageFieldSet, transitive_targets.dependencies),
)

sources, embedded_pkgs_per_target = await MultiGet(
sources_request, embedded_pkgs_per_target_request
dockerfiles, sources, embedded_pkgs_per_target = await MultiGet(
dockerfiles_request,
sources_request,
embedded_pkgs_per_target_request,
)

# Package binary dependencies for build context.
embedded_pkgs = await MultiGet(
Get(BuiltPackage, PackageFieldSet, field_set)
for field_set in embedded_pkgs_per_target.field_sets
# Exclude docker images, unless build_upstream_images is true.
if request.build_upstream_images
or not isinstance(getattr(field_set, "sources", None), DockerImageSources)
)

packages_str = ", ".join(a.relpath for p in embedded_pkgs for a in p.artifacts if a.relpath)
logger.debug(f"Packages for Docker image: {packages_str}")

embedded_pkgs_digests = tuple(built_package.digest for built_package in embedded_pkgs)
if request.context_root != ".":
# Copy packages to context root tree, unless the context root is at the project root.
embedded_pkgs_digests = await MultiGet(
Get(Digest, AddPrefix(digest, request.context_root)) for digest in embedded_pkgs_digests
)
embedded_pkgs_digest = [built_package.digest for built_package in embedded_pkgs]
all_digests = (dockerfiles.snapshot.digest, sources.snapshot.digest, *embedded_pkgs_digest)

# Merge build context.
# Merge all digests to get the final docker build context.
context = await Get(
Digest,
MergeDigests(
d
for d in (
sources.snapshot.digest,
*embedded_pkgs_digests,
)
if d
),
MergeDigests(d for d in all_digests if d),
)

return DockerBuildContext(context)
Expand Down
203 changes: 135 additions & 68 deletions src/python/pants/backend/docker/docker_build_context_test.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,146 @@
# Copyright 2021 Pants project contributors (see CONTRIBUTORS.md).
# Licensed under the Apache License, Version 2.0 (see LICENSE).

from pants.backend.docker.docker_build_context import (
DockerBuildContextRequest,
create_docker_build_context,
)
from __future__ import annotations

from textwrap import dedent

import pytest

from pants.backend.docker.docker_build_context import DockerBuildContext, DockerBuildContextRequest
from pants.backend.docker.rules import rules
from pants.backend.docker.target_types import DockerImage
from pants.backend.python.goals.package_pex_binary import PexBinaryFieldSet
from pants.backend.python.target_types import PexBinary
from pants.core.goals.package import BuiltPackage, PackageFieldSet
from pants.core.util_rules.source_files import SourceFiles, SourceFilesRequest
from pants.core.target_types import Files
from pants.core.util_rules.source_files import rules as source_files_rules
from pants.engine.addresses import Address
from pants.engine.fs import EMPTY_DIGEST, EMPTY_SNAPSHOT, AddPrefix, Digest, MergeDigests
from pants.engine.target import (
FieldSetsPerTarget,
FieldSetsPerTargetRequest,
TransitiveTargets,
TransitiveTargetsRequest,
)
from pants.engine.unions import UnionMembership
from pants.testutil.rule_runner import MockGet, run_rule_with_mocks


def test_create_docker_build_context():
benjyw marked this conversation as resolved.
Show resolved Hide resolved
img = DockerImage(address=Address("src/test", target_name="image"), unhydrated_values={})
pex = PexBinary(
address=Address("src/test", target_name="bin"),
unhydrated_values={"entry_point": "src.test.main:main"},
from pants.engine.fs import Snapshot
from pants.testutil.rule_runner import QueryRule, RuleRunner


@pytest.fixture
def rule_runner() -> RuleRunner:
return RuleRunner(
rules=[
*rules(),
*source_files_rules(),
QueryRule(DockerBuildContext, (DockerBuildContextRequest,)),
],
target_types=[DockerImage, Files],
)


def assert_build_context(
rule_runner: RuleRunner,
address: Address,
expected_files: list[str],
) -> None:
context = rule_runner.request(
DockerBuildContext,
[
DockerBuildContextRequest(
address=address,
build_upstream_images=False,
)
],
)

snapshot = rule_runner.request(Snapshot, [context.digest])
assert sorted(expected_files) == sorted(snapshot.files)


def test_file_dependencies(rule_runner: RuleRunner) -> None:
# img_A -> files_A
# img_A -> img_B -> files_B
rule_runner.add_to_build_file(
"src/a",
dedent(
"""\
docker_image(name="img_A", dependencies=[":files_A", "src/b:img_B"])
files(name="files_A", sources=["files/**"])
"""
),
)
rule_runner.add_to_build_file(
"src/b",
dedent(
"""\
docker_image(name="img_B", dependencies=[":files_B"])
files(name="files_B", sources=["files/**"])
"""
),
)
rule_runner.create_files("src/a", ["Dockerfile"])
rule_runner.create_files("src/a/files", ["a01", "a02"])
rule_runner.create_files("src/b", ["Dockerfile"])
rule_runner.create_files("src/b/files", ["b01", "b02"])

# We want files_B in build context for img_B
assert_build_context(
rule_runner,
Address("src/b", target_name="img_B"),
expected_files=["src/b/Dockerfile", "src/b/files/b01", "src/b/files/b02"],
)
request = DockerBuildContextRequest(
address=img.address,
context_root=".",

# We want files_A in build context for img_A, but not files_B
assert_build_context(
rule_runner,
Address("src/a", target_name="img_A"),
expected_files=["src/a/Dockerfile", "src/a/files/a01", "src/a/files/a02"],
)

# Mixed.
rule_runner.add_to_build_file(
"src/c",
dedent(
"""\
docker_image(name="img_C", dependencies=["src/a:files_A", "src/b:files_B"])
"""
),
)
rule_runner.create_files("src/c", ["Dockerfile"])

result = run_rule_with_mocks(
create_docker_build_context,
rule_args=[request],
mock_gets=[
MockGet(
output_type=TransitiveTargets,
input_type=TransitiveTargetsRequest,
mock=lambda _: TransitiveTargets([img], [pex]),
),
MockGet(
output_type=SourceFiles,
input_type=SourceFilesRequest,
mock=lambda _: SourceFiles(
snapshot=EMPTY_SNAPSHOT,
unrooted_files=tuple(),
),
),
MockGet(
output_type=FieldSetsPerTarget,
input_type=FieldSetsPerTargetRequest,
mock=lambda request: FieldSetsPerTarget([[PexBinaryFieldSet.create(pex)]]),
),
MockGet(
output_type=BuiltPackage,
input_type=PackageFieldSet,
mock=lambda _: BuiltPackage(EMPTY_DIGEST, []),
),
MockGet(
output_type=Digest,
input_type=AddPrefix,
mock=lambda _: EMPTY_DIGEST,
),
MockGet(
output_type=Digest,
input_type=MergeDigests,
mock=lambda _: EMPTY_DIGEST,
),
assert_build_context(
rule_runner,
Address("src/c", target_name="img_C"),
expected_files=[
"src/c/Dockerfile",
"src/a/files/a01",
"src/a/files/a02",
"src/b/files/b01",
"src/b/files/b02",
],
# need AddPrefix here, since UnionMembership.is_member() throws when called with non
# registered types
union_membership=UnionMembership({PackageFieldSet: [PexBinaryFieldSet], AddPrefix: []}),
)

assert result.digest == EMPTY_DIGEST

def test_files_out_of_tree(rule_runner: RuleRunner) -> None:
# src/a:img_A -> res/static:files
rule_runner.add_to_build_file(
"src/a",
dedent(
"""\
docker_image(name="img_A", dependencies=["res/static:files"])
"""
),
)
rule_runner.add_to_build_file(
"res/static",
dedent(
"""\
files(name="files", sources=["!BUILD", "**/*"])
"""
),
)
rule_runner.create_files("src/a", ["Dockerfile"])
rule_runner.create_files("res/static", ["s01", "s02"])
rule_runner.create_files("res/static/sub", ["s03"])

assert_build_context(
rule_runner,
Address("src/a", target_name="img_A"),
expected_files=[
"src/a/Dockerfile",
"res/static/s01",
"res/static/s02",
"res/static/sub/s03",
],
)
Loading