Skip to content

Commit

Permalink
[rfc] Allow repository rules to lazily declare environment variable deps
Browse files Browse the repository at this point in the history
This commit adds a new repository context method, `getenv`, which allows Starlark repository rule implementations to lazily declare environment variable dependencies.  This is intended to allow repository rules to establish dependencies on environment variables whose names aren't known in advance.

This is work towards #19511.

Notes
=====
- I don't speak Java, so expect funny smells and copypasta.
- `rctx.getenv` behaves similarly to Python's `os.getenv`.
- `rctx.getenv` is to environment variables what `rctx.path(Label)` is to files.

Future work
===========
- Implement bzlmod support.

Closes #20787.

PiperOrigin-RevId: 599568358
Change-Id: I2c1948cd23643d28bf1d41e9baaf98a902112cc7
  • Loading branch information
rbeasley authored and copybara-github committed Jan 18, 2024
1 parent 1536e62 commit c230e39
Show file tree
Hide file tree
Showing 7 changed files with 187 additions and 10 deletions.
11 changes: 5 additions & 6 deletions site/en/extending/repo.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,11 @@ following things changes:

* The attributes passed to the repo rule invocation.
* The Starlark code comprising the implementation of the repo rule.
* The value of any environment variable declared with the `environ`
attribute of the [`repository_rule`](/rules/lib/globals/bzl#repository_rule).
The values of these environment variables can be hard-wired on the command
line with the
[`--action_env`](/reference/command-line-reference#flag--action_env)
flag (but this flag will invalidate every action of the build).
* The value of any environment variable passed to `repository_ctx`'s
`getenv()` method or declared with the `environ` attribute of the
[`repository_rule`](/rules/lib/globals/bzl#repository_rule). The values
of these environment variables can be hard-wired on the command line with the
[`--repo_env`](/reference/command-line-reference#flag--repo_env) flag.
* The content of any file passed to the `read()`, `execute()` and similar
methods of `repository_ctx` which is referred to by a label (for example,
`//mypkg:label.txt` but not `mypkg/label.txt`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/rules:repository/workspace_attribute_mapper",
"//src/main/java/com/google/devtools/build/lib/rules:repository/workspace_file_helper",
"//src/main/java/com/google/devtools/build/lib/shell",
"//src/main/java/com/google/devtools/build/lib/skyframe:action_environment_function",
"//src/main/java/com/google/devtools/build/lib/skyframe:ignored_package_prefixes_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/starlarkbuildapi/repository",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Maps;
import com.google.common.util.concurrent.Futures;
Expand Down Expand Up @@ -51,6 +52,7 @@
import com.google.devtools.build.lib.runtime.ProcessWrapper;
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor;
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor.ExecutionResult;
import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction;
import com.google.devtools.build.lib.util.OsUtils;
import com.google.devtools.build.lib.util.io.OutErr;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
Expand All @@ -77,6 +79,7 @@
import java.util.Arrays;
import java.util.Base64;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
Expand All @@ -89,6 +92,7 @@
import net.starlark.java.annot.StarlarkMethod;
import net.starlark.java.eval.Dict;
import net.starlark.java.eval.EvalException;
import net.starlark.java.eval.NoneType;
import net.starlark.java.eval.Printer;
import net.starlark.java.eval.Sequence;
import net.starlark.java.eval.Starlark;
Expand Down Expand Up @@ -137,6 +141,7 @@ private interface AsyncTask {
@Nullable private final ProcessWrapper processWrapper;
protected final StarlarkSemantics starlarkSemantics;
private final HashMap<Label, String> accumulatedFileDigests = new HashMap<>();
private final HashSet<String> accumulatedEnvKeys = new HashSet<>();
private final RepositoryRemoteExecutor remoteExecutor;
private final List<AsyncTask> asyncTasks;

Expand Down Expand Up @@ -193,6 +198,11 @@ public ImmutableMap<Label, String> getAccumulatedFileDigests() {
return ImmutableMap.copyOf(accumulatedFileDigests);
}

/** Returns set of environment variable keys encountered so far. */
public ImmutableSet<String> getAccumulatedEnvKeys() {
return ImmutableSet.copyOf(accumulatedEnvKeys);
}

protected void checkInOutputDirectory(String operation, StarlarkPath path)
throws RepositoryFunctionException {
if (!path.getPath().getPathString().startsWith(workingDirectory.getPathString())) {
Expand Down Expand Up @@ -1054,6 +1064,47 @@ public void createFile(
}
}

// Move to a common location like net.starlark.java.eval.Starlark?
@Nullable
private static <T> T nullIfNone(Object object, Class<T> type) {
return object != Starlark.NONE ? type.cast(object) : null;
}

@StarlarkMethod(
name = "getenv",
doc =
"Returns the value of an environment variable <code>name</code> as a string if exists, "
+ "or <code>default</code> if it doesn't."
+ "<p>When building incrementally, any change to the value of the variable named by "
+ "<code>name</code> will cause this repository to be re-fetched.",
parameters = {
@Param(
name = "name",
doc = "name of desired environment variable",
allowedTypes = {@ParamType(type = String.class)}),
@Param(
name = "default",
doc = "Default value to return if `name` is not found",
allowedTypes = {@ParamType(type = String.class), @ParamType(type = NoneType.class)},
defaultValue = "None")
},
allowReturnNones = true)
@Nullable
public String getEnvironmentValue(String name, Object defaultValue)
throws InterruptedException, NeedsSkyframeRestartException {
// Must look up via AEF, rather than solely copy from `this.envVariables`, in order to
// establish a SkyKey dependency relationship.
if (env.getValue(ActionEnvironmentFunction.key(name)) == null) {
throw new NeedsSkyframeRestartException();
}

// However, to account for --repo_env we take the value from `this.envVariables`.
// See https://github.com/bazelbuild/bazel/pull/20787#discussion_r1445571248 .
String envVarValue = envVariables.get(name);
accumulatedEnvKeys.add(name);
return envVarValue != null ? envVarValue : nullIfNone(defaultValue, String.class);
}

@StarlarkMethod(
name = "path",
doc =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,16 @@ public boolean isImmutable() {
return true; // immutable and Starlark-hashable
}

@StarlarkMethod(name = "environ", structField = true, doc = "The list of environment variables.")
@StarlarkMethod(
name = "environ",
structField = true,
doc =
"The dictionary of environment variables."
+ "<p><b>NOTE</b>: Retrieving an environment variable from this dictionary does not "
+ "establish a dependency from a repository rule or module extension to the "
+ "environment variable. To establish a dependency when looking up an "
+ "environment variable, use either <code>repository_ctx.getenv</code> or "
+ "<code>module_ctx.getenv</code> instead.")
public ImmutableMap<String, String> getEnvironmentVariables() {
return environ;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
import com.google.devtools.build.skyframe.SkyKey;
import java.io.IOException;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -323,6 +325,11 @@ private RepositoryDirectoryValue.Builder fetchInternal(
markerData.put("FILE:" + entry.getKey(), entry.getValue());
}

// Ditto for environment variables accessed via `getenv`.
for (String envKey : starlarkRepositoryContext.getAccumulatedEnvKeys()) {
markerData.put("ENV:" + envKey, clientEnvironment.get(envKey));
}

env.getListener().post(resolved);
} catch (NeedsSkyframeRestartException e) {
// A dependency is missing, cleanup and returns null
Expand Down Expand Up @@ -373,6 +380,47 @@ private static ImmutableSet<String> getEnviron(Rule rule) {
return ImmutableSet.copyOf((Iterable<String>) rule.getAttr("$environ"));
}

/**
* Verify marker data previously saved by {@link #declareEnvironmentDependencies(Map, Environment,
* Set)} and/or {@link #fetchInternal(Rule, Path, BlazeDirectories, Environment, Map, SkyKey)} (on
* behalf of {@link StarlarkBaseExternalContext#getEnvironmentValue(String, Object)}).
*/
@Override
protected boolean verifyEnvironMarkerData(
Map<String, String> markerData, Environment env, Set<String> keys)
throws InterruptedException {
/*
* We can ignore `keys` and instead only verify what's recorded in the marker file, because
* any change to `keys` between builds would be caused by a change to a .bzl file, and that's
* covered by RepositoryDelegatorFunction.DigestWriter#areRepositoryAndMarkerFileConsistent.
*/

ImmutableSet<String> markerKeys =
markerData.keySet().stream()
.filter(s -> s.startsWith("ENV:"))
.collect(ImmutableSet.toImmutableSet());

ImmutableMap<String, String> environ =
getEnvVarValues(
env,
markerKeys.stream()
.map(s -> s.substring(4)) // ENV:FOO -> FOO
.collect(ImmutableSet.toImmutableSet()));
if (environ == null) {
return false;
}

for (String key : markerKeys) {
String markerValue = markerData.get(key);
String envKey = key.substring(4); // ENV:FOO -> FOO
if (!Objects.equals(markerValue, environ.get(envKey))) {
return false;
}
}

return true;
}

@Override
protected boolean isLocal(Rule rule) {
return (Boolean) rule.getAttr("$local");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,10 @@ public interface RepositoryModuleApi {
},
defaultValue = "[]",
doc =
"Provides a list of environment variable that this repository rule depends on. If "
+ "an environment variable in that list change, the repository will be "
"<b>Deprecated</b>. This parameter has been deprecated. Migrate to "
+ "<code>repository_ctx.getenv</code> instead.<br/>"
+ "Provides a list of environment variable that this repository rule depends "
+ "on. If an environment variable in that list change, the repository will be "
+ "refetched.",
named = true,
positional = false),
Expand Down
69 changes: 68 additions & 1 deletion src/test/shell/bazel/external_integration_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1943,7 +1943,7 @@ EOF

function test_cache_hit_reported() {
# Verify that information about a cache hit is reported
# if an error happend in that repository. This information
# if an error happened in that repository. This information
# is useful as users sometimes change the URL but do not
# update the hash.
WRKDIR=$(mktemp -d "${TEST_TMPDIR}/testXXXXXX")
Expand Down Expand Up @@ -3000,4 +3000,71 @@ EOF
test -h "$execroot/external/ext" || fail "Expected symlink to external repo."
}

function test_environ_incrementally() {
# Set up workspace with a repository rule to examine env vars. Assert that undeclared
# env vars don't trigger reevaluations.
cat > repo.bzl <<EOF
def _impl(rctx):
rctx.symlink(rctx.attr.build_file, 'BUILD')
print('UNDECLARED_KEY=%s' % rctx.os.environ.get('UNDECLARED_KEY'))
print('PREDECLARED_KEY=%s' % rctx.os.environ.get('PREDECLARED_KEY'))
print('LAZYEVAL_KEY=%s' % rctx.getenv('LAZYEVAL_KEY'))
dummy_repository = repository_rule(
implementation = _impl,
attrs = {'build_file': attr.label()},
environ = ['PREDECLARED_KEY'], # sic
)
EOF
cat > BUILD.dummy <<EOF
filegroup(name='dummy', srcs=['BUILD'])
EOF
touch BUILD
cat > WORKSPACE <<EOF
load('//:repo.bzl', 'dummy_repository')
dummy_repository(name = 'foo', build_file = '@@//:BUILD.dummy')
EOF

# Baseline: DEBUG: UNDECLARED_KEY is logged to stderr.
UNDECLARED_KEY=val1 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_log "UNDECLARED_KEY=val1"

# UNDECLARED_KEY is, well, undeclared. This will be a no-op.
UNDECLARED_KEY=val2 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_not_log "UNDECLARED_KEY"

#---

# Predeclared key.
PREDECLARED_KEY=wal1 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_log "PREDECLARED_KEY=wal1"

# Predeclared key, no-op build.
PREDECLARED_KEY=wal1 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_not_log "PREDECLARED_KEY"

# Predeclared key, new value -> refetch.
PREDECLARED_KEY=wal2 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_log "PREDECLARED_KEY=wal2"

#---

# Side-effect key.
LAZYEVAL_KEY=xal1 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_log "PREDECLARED_KEY=None"
expect_log "LAZYEVAL_KEY=xal1"

# Side-effect key, no-op build.
LAZYEVAL_KEY=xal1 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_not_log "LAZYEVAL_KEY"

# Side-effect key, new value -> refetch.
LAZYEVAL_KEY=xal2 bazel query @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_log "LAZYEVAL_KEY=xal2"

# Ditto, but with --repo_env overriding environment.
LAZYEVAL_KEY=xal2 bazel query --repo_env=LAZYEVAL_KEY=xal3 @foo//:BUILD 2>$TEST_log || fail 'Expected no-op build to succeed'
expect_log "LAZYEVAL_KEY=xal3"
}

run_suite "external tests"

0 comments on commit c230e39

Please sign in to comment.