Skip to content

Commit

Permalink
Make lockfile's RepoSpec attributes more readable
Browse files Browse the repository at this point in the history
String-valued tag and repo rule attributes in the `MODULE.bazel.lock` file have to be serialized in a way that distinguishes them from label-valued attributes. This commit uses the fact that the string representation of labels always starts with `@@` to escape strings only if they start with `@@` (or happen to be of the form of an escaped string). Since string-valued attributes rarely start with `@@`, in particular when specified by humans and not macros, this greatly reduces the need for escaping in the lockfile.

This commit raises the lockfile version to 3.

Along the way also disables HTML escaping for attributes as it isn't needed and results in less readable representations of certain characters.

Closes bazelbuild#19684.

PiperOrigin-RevId: 571037360
Change-Id: I04bb60d371cd09326780004122e729d78c99732a
  • Loading branch information
fmeum authored and copybara-github committed Oct 5, 2023
1 parent 5b4e986 commit 5bdb208
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 25 deletions.
5 changes: 3 additions & 2 deletions scripts/bootstrap/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,9 @@ EOF
link_file "${PWD}/src/MODULE.tools" "${BAZEL_TOOLS_REPO}/MODULE.bazel"
new_hash=$(shasum -a 256 "${BAZEL_TOOLS_REPO}/MODULE.bazel" | awk '{print $1}')
sed -i.bak "/\"bazel_tools\":/s/\"[a-f0-9]*\"/\"$new_hash\"/" MODULE.bazel.lock
# TODO: Temporary hack for lockfile version mismatch, remove these two lines after updating to 6.4.0
sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 2/' MODULE.bazel.lock
# TODO: Temporary hack for lockfile version mismatch, remove these lines after updating to 6.4.0
sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 3/' MODULE.bazel.lock
sed -i.bak 's/"--/"/g' MODULE.bazel.lock
sed -i.bak 's/"moduleExtensions":/"moduleExtensions-old":/' MODULE.bazel.lock
rm MODULE.bazel.lock.bak

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@

package com.google.devtools.build.lib.bazel.bzlmod;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.cmdline.Label;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonNull;
Expand All @@ -40,7 +43,7 @@
/** Helps serialize/deserialize {@link AttributeValues}, which contains Starlark values. */
public class AttributeValuesAdapter extends TypeAdapter<AttributeValues> {

private final Gson gson = new Gson();
private final Gson gson = new GsonBuilder().disableHtmlEscaping().create();

@Override
public void write(JsonWriter out, AttributeValues attributeValues) throws IOException {
Expand Down Expand Up @@ -129,32 +132,51 @@ private Object deserializeObject(JsonElement json) {
}
}

@VisibleForTesting static final String STRING_ESCAPE_SEQUENCE = "'";

/**
* Serializes an object (Label or String) to String A label is converted to a String as it is. A
* String is being modified with a delimiter to be easily differentiated from the label when
* deserializing.
* Serializes an object (Label or String) to String. A label is converted to a String as it is. A
* String that looks like a label is escaped so that it can be differentiated from a label when
* deserializing, otherwise it is emitted as is.
*
* @param obj String or Label
* @return serialized object
*/
private String serializeObjToString(Object obj) {
if (obj instanceof Label) {
return ((Label) obj).getUnambiguousCanonicalForm();
String labelString = ((Label) obj).getUnambiguousCanonicalForm();
Preconditions.checkState(labelString.startsWith("@@"));
return labelString;
}
String string = (String) obj;
// Strings that start with "@@" need to be escaped to avoid being interpreted as a label. We
// escape by wrapping the string in the escape sequence and strip one layer of this sequence
// during deserialization, so strings that happen to already start and end with the escape
// sequence also have to be escaped.
if (string.startsWith("@@")
|| (string.startsWith(STRING_ESCAPE_SEQUENCE) && string.endsWith(STRING_ESCAPE_SEQUENCE))) {
return STRING_ESCAPE_SEQUENCE + string + STRING_ESCAPE_SEQUENCE;
}
return "--" + obj;
return string;
}

/**
* Deserializes a string to either a label or a String depending on the prefix. A string will have
* a delimiter at the start, else it is converted to a label.
* Deserializes a string to either a label or a String depending on the prefix and presence of the
* escape sequence.
*
* @param value String to be deserialized
* @return Object of type String of Label
*/
private Object deserializeStringToObject(String value) {
if (value.startsWith("--")) {
return value.substring(2);
// A string represents a label if and only if it starts with "@@".
if (value.startsWith("@@")) {
return Label.parseCanonicalUnchecked(value);
}
// Strings that start and end with the escape sequence always require one layer to be stripped.
if (value.startsWith(STRING_ESCAPE_SEQUENCE) && value.endsWith(STRING_ESCAPE_SEQUENCE)) {
return value.substring(
STRING_ESCAPE_SEQUENCE.length(), value.length() - STRING_ESCAPE_SEQUENCE.length());
}
return Label.parseCanonicalUnchecked(value);
return value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
@GenerateTypeAdapter
public abstract class BazelLockFileValue implements SkyValue, Postable {

public static final int LOCK_FILE_VERSION = 2;
public static final int LOCK_FILE_VERSION = 3;

@SerializationConstant public static final SkyKey KEY = () -> SkyFunctions.BAZEL_LOCK_FILE;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
package com.google.devtools.build.lib.bazel.bzlmod;

import static com.google.common.truth.Truth.assertThat;
import static com.google.devtools.build.lib.bazel.bzlmod.AttributeValuesAdapter.STRING_ESCAPE_SEQUENCE;

import com.google.devtools.build.lib.cmdline.Label;
import com.google.devtools.build.lib.testutil.FoundationTestCase;
Expand Down Expand Up @@ -43,7 +44,23 @@ public void testAttributeValuesAdapter() throws IOException {
Label l2 = Label.parseCanonicalUnchecked("@//foo:tar");
dict.put("Integer", StarlarkInt.of(56));
dict.put("Boolean", false);
dict.put("String", "Hello");
dict.put("String", "Hello String");
dict.put("StringWithAngleBrackets", "<Hello>");
dict.put(
"LabelLikeString",
StarlarkList.of(Mutability.IMMUTABLE, "@//foo:bar", ":baz", "@@//baz:quz"));
dict.put(
"StringsWithEscapeSequence",
StarlarkList.of(
Mutability.IMMUTABLE,
"@@//foo:bar" + STRING_ESCAPE_SEQUENCE,
STRING_ESCAPE_SEQUENCE + "@@//foo:bar",
STRING_ESCAPE_SEQUENCE + "@@//foo:bar" + STRING_ESCAPE_SEQUENCE,
STRING_ESCAPE_SEQUENCE
+ STRING_ESCAPE_SEQUENCE
+ "@@//foo:bar"
+ STRING_ESCAPE_SEQUENCE
+ STRING_ESCAPE_SEQUENCE));
dict.put("Label", l1);
dict.put(
"ListOfInts", StarlarkList.of(Mutability.IMMUTABLE, StarlarkInt.of(1), StarlarkInt.of(2)));
Expand All @@ -66,6 +83,10 @@ public void testAttributeValuesAdapter() throws IOException {
attributeValues = attrAdapter.read(new JsonReader(stringReader));
}

// Verify that the JSON string does not contain any escaped angle brackets.
assertThat(jsonString).doesNotContain("\\u003c");
// Verify that the String "Hello String" is preserved as is, without any additional escaping.
assertThat(jsonString).contains(":\"Hello String\"");
assertThat((Map<?, ?>) attributeValues.attributes()).containsExactlyEntriesIn(builtDict);
}
}
3 changes: 2 additions & 1 deletion src/test/shell/bazel/bazel_determinism_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ function test_determinism() {
new_hash=$(shasum -a 256 "src/MODULE.tools" | awk '{print $1}')
sed -i.bak "/\"bazel_tools\":/s/\"[a-f0-9]*\"/\"$new_hash\"/" MODULE.bazel.lock
# TODO: Temporary hack for lockfile version mismatch, remove these two lines after updating to 6.4.0
sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 2/' MODULE.bazel.lock
sed -i.bak 's/"lockFileVersion": 1/"lockFileVersion": 3/' MODULE.bazel.lock
sed -i.bak 's/"--/"/g' MODULE.bazel.lock
sed -i.bak 's/"moduleExtensions":/"moduleExtensions-old":/' MODULE.bazel.lock
rm MODULE.bazel.lock.bak

Expand Down
21 changes: 12 additions & 9 deletions src/tools/bzlmod/utils.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ def extract_url(attributes):
The url extracted from the given attributes.
"""
if "urls" in attributes:
return attributes["urls"][0][2:]
return attributes["urls"][0].removeprefix("--")
elif "url" in attributes:
return attributes["url"][2:]
return attributes["url"].removeprefix("--")
else:
fail("Could not find url in attributes %s" % attributes)

Expand All @@ -49,7 +49,10 @@ def parse_http_artifacts(ctx, lockfile_path, required_repos):
A list of http artifacts in the form of
[{"integrity": <integrity value>, "url": <url>}, {"sha256": <sha256 value>, "url": <url>}, ...]
All lockfile string values are prefixed with `--`, hence the `[2:]` is needed to remove the prefix.
All lockfile string values in version 2, but not version 3, are prefixed with `--`, hence the
`.removeprefix("--")` is needed to remove the prefix if it exists. This is a heuristic as
version 3 strings could start with `--`, but that is unlikely.
TODO: Remove this hack after the release of Bazel 6.4.0.
"""
lockfile = json.decode(ctx.read(lockfile_path))
http_artifacts = []
Expand All @@ -58,21 +61,21 @@ def parse_http_artifacts(ctx, lockfile_path, required_repos):
if "repoSpec" in module and module["repoSpec"]["ruleClassName"] == "http_archive":
repo_spec = module["repoSpec"]
attributes = repo_spec["attributes"]
repo_name = attributes["name"][2:]
repo_name = attributes["name"].removeprefix("--")

if repo_name not in required_repos:
continue
found_repos.append(repo_name)

http_artifacts.append({
"integrity": attributes["integrity"][2:],
"integrity": attributes["integrity"].removeprefix("--"),
"url": extract_url(attributes),
})
if "remote_patches" in attributes:
for patch, integrity in attributes["remote_patches"].items():
http_artifacts.append({
"integrity": integrity[2:],
"url": patch[2:],
"integrity": integrity.removeprefix("--"),
"url": patch.removeprefix("--"),
})

for _, extension in lockfile["moduleExtensions"].items():
Expand All @@ -81,14 +84,14 @@ def parse_http_artifacts(ctx, lockfile_path, required_repos):
rule_class = repo_spec["ruleClassName"]
if rule_class == "http_archive" or rule_class == "http_file" or rule_class == "http_jar":
attributes = repo_spec["attributes"]
repo_name = attributes["name"][2:]
repo_name = attributes["name"].removeprefix("--")

if repo_name not in required_repos:
continue
found_repos.append(repo_name)

http_artifacts.append({
"sha256": attributes["sha256"][2:],
"sha256": attributes["sha256"].removeprefix("--"),
"url": extract_url(attributes),
})

Expand Down

0 comments on commit 5bdb208

Please sign in to comment.