From 1c6c015ff12deffdbb65a1e3a2ad065c05fa821a Mon Sep 17 00:00:00 2001 From: Googler Date: Wed, 14 Feb 2024 15:36:39 -0800 Subject: [PATCH] [7.1.0] Collect directory contents in parallel in CompactSpawnLogContext. Significantly reduces time spent computing a log entry for large tree artifacts (in a particular execution with ~250k files, the time is reduced from ~72s to ~6s). This is the same idea as https://github.com/bazelbuild/bazel/commit/368bf117513731909337b6e2bd5082c046399506, later improved by https://github.com/bazelbuild/bazel/commit/df07d275ce17f14224af0f1a5862755a7db44397. In theory, the optimization could also be applied to ExpandedSpawnLogContext. However, I don't want to invest resources into improving the old log formats at this time. Also sort the directory contents alphabetically, fixing an oversight in the previous implementation. PiperOrigin-RevId: 607126723 Change-Id: I0ebcdbd5e897e25b5b0c28a642e50acae2fd6730 --- .../build/lib/bazel/SpawnLogModule.java | 5 + .../com/google/devtools/build/lib/exec/BUILD | 1 + .../lib/exec/CompactSpawnLogContext.java | 152 +++++++++++++----- .../lib/exec/SpawnLogContextTestBase.java | 15 +- 4 files changed, 130 insertions(+), 43 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/bazel/SpawnLogModule.java b/src/main/java/com/google/devtools/build/lib/bazel/SpawnLogModule.java index 62bbbd7c0b384b..bcd8429dd378d6 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/SpawnLogModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/SpawnLogModule.java @@ -88,6 +88,7 @@ private void initOutputs(CommandEnvironment env) throws IOException { Path outputBase = env.getOutputBase(); if (executionOptions.executionLogCompactFile != null) { + try { spawnLogContext = new CompactSpawnLogContext( workingDirectory.getRelative(executionOptions.executionLogCompactFile), @@ -95,6 +96,10 @@ private void initOutputs(CommandEnvironment env) throws IOException { env.getOptions().getOptions(RemoteOptions.class), env.getRuntime().getFileSystem().getDigestFunction(), env.getXattrProvider()); + } catch (InterruptedException e) { + env.getReporter() + .handle(Event.error("Error while setting up the execution log: " + e.getMessage())); + } } else { Path outputPath = null; Encoding encoding = null; diff --git a/src/main/java/com/google/devtools/build/lib/exec/BUILD b/src/main/java/com/google/devtools/build/lib/exec/BUILD index ae365f3899a59d..37870bdb3ba589 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/BUILD +++ b/src/main/java/com/google/devtools/build/lib/exec/BUILD @@ -270,6 +270,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:runfiles_supplier", "//src/main/java/com/google/devtools/build/lib/analysis/platform:platform_utils", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", + "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/profiler", "//src/main/java/com/google/devtools/build/lib/remote/options", "//src/main/java/com/google/devtools/build/lib/util/io", diff --git a/src/main/java/com/google/devtools/build/lib/exec/CompactSpawnLogContext.java b/src/main/java/com/google/devtools/build/lib/exec/CompactSpawnLogContext.java index 15b691469c124d..40749d736f925d 100644 --- a/src/main/java/com/google/devtools/build/lib/exec/CompactSpawnLogContext.java +++ b/src/main/java/com/google/devtools/build/lib/exec/CompactSpawnLogContext.java @@ -13,6 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.exec; +import static com.google.common.base.Preconditions.checkNotNull; import static com.google.common.base.Preconditions.checkState; import com.github.luben.zstd.ZstdOutputStream; @@ -27,6 +28,9 @@ import com.google.devtools.build.lib.actions.Spawns; import com.google.devtools.build.lib.actions.cache.VirtualActionInput; import com.google.devtools.build.lib.collect.nestedset.NestedSet; +import com.google.devtools.build.lib.concurrent.AbstractQueueVisitor; +import com.google.devtools.build.lib.concurrent.ErrorClassifier; +import com.google.devtools.build.lib.concurrent.NamedForkJoinPool; import com.google.devtools.build.lib.exec.Protos.Digest; import com.google.devtools.build.lib.exec.Protos.ExecLogEntry; import com.google.devtools.build.lib.exec.Protos.Platform; @@ -37,6 +41,7 @@ import com.google.devtools.build.lib.util.io.MessageOutputStream; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.FileSystem; +import com.google.devtools.build.lib.vfs.IORuntimeException; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; import com.google.devtools.build.lib.vfs.XattrProvider; @@ -45,18 +50,84 @@ import java.io.BufferedOutputStream; import java.io.IOException; import java.time.Duration; -import java.util.ArrayDeque; +import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.SortedMap; +import java.util.concurrent.ForkJoinPool; import javax.annotation.Nullable; import javax.annotation.concurrent.GuardedBy; /** A {@link SpawnLogContext} implementation that produces a log in compact format. */ public class CompactSpawnLogContext extends SpawnLogContext { + private static final Comparator EXEC_LOG_ENTRY_FILE_COMPARATOR = + Comparator.comparing(ExecLogEntry.File::getPath); + + private static final ForkJoinPool VISITOR_POOL = + NamedForkJoinPool.newNamedPool( + "execlog-directory-visitor", Runtime.getRuntime().availableProcessors()); + + /** Visitor for use in {@link #visitDirectory}. */ + protected interface DirectoryChildVisitor { + void visit(Path path) throws IOException; + } + + private static class DirectoryVisitor extends AbstractQueueVisitor { + private final Path rootDir; + private final DirectoryChildVisitor childVisitor; + + private DirectoryVisitor(Path rootDir, DirectoryChildVisitor childVisitor) { + super( + VISITOR_POOL, + ExecutorOwnership.SHARED, + ExceptionHandlingMode.FAIL_FAST, + ErrorClassifier.DEFAULT); + this.rootDir = checkNotNull(rootDir); + this.childVisitor = checkNotNull(childVisitor); + } + + private void run() throws IOException, InterruptedException { + execute(() -> visitSubdirectory(rootDir)); + try { + awaitQuiescence(true); + } catch (IORuntimeException e) { + throw e.getCauseIOException(); + } + } + + private void visitSubdirectory(Path dir) { + try { + for (Path child : dir.getDirectoryEntries()) { + if (child.isDirectory()) { + execute(() -> visitSubdirectory(child)); + continue; + } + childVisitor.visit(child); + } + } catch (IOException e) { + throw new IORuntimeException(e); + } + } + } + + /** + * Visits a directory hierarchy in parallel. + * + *

Calls {@code childVisitor} for every descendant path of {@code rootDir} that isn't itself a + * directory, following symlinks. The visitor may be concurrently called by multiple threads, and + * must synchronize accesses to shared data. + */ + private void visitDirectory(Path rootDir, DirectoryChildVisitor childVisitor) + throws IOException, InterruptedException { + new DirectoryVisitor(rootDir, childVisitor).run(); + } + private interface ExecLogEntrySupplier { - ExecLogEntry.Builder get() throws IOException; + ExecLogEntry.Builder get() throws IOException, InterruptedException; } private final PathFragment execRoot; @@ -84,7 +155,7 @@ public CompactSpawnLogContext( @Nullable RemoteOptions remoteOptions, DigestHashFunction digestHashFunction, XattrProvider xattrProvider) - throws IOException { + throws IOException, InterruptedException { this.execRoot = execRoot; this.remoteOptions = remoteOptions; this.digestHashFunction = digestHashFunction; @@ -101,7 +172,7 @@ private static MessageOutputStream getOutputStream(Path path) thro path.toString(), new ZstdOutputStream(new BufferedOutputStream(path.getOutputStream()))); } - private void logInvocation() throws IOException { + private void logInvocation() throws IOException, InterruptedException { logEntry( null, () -> @@ -119,7 +190,7 @@ public void logSpawn( FileSystem fileSystem, Duration timeout, SpawnResult result) - throws IOException, ExecException { + throws IOException, InterruptedException, ExecException { try (SilentCloseable c = Profiler.instance().profile("logSpawn")) { ExecLogEntry.Spawn.Builder builder = ExecLogEntry.Spawn.newBuilder(); @@ -187,7 +258,7 @@ public void logSpawn( */ private int logInputs( Spawn spawn, InputMetadataProvider inputMetadataProvider, FileSystem fileSystem) - throws IOException { + throws IOException, InterruptedException { // Add runfiles and filesets as additional direct members of the top-level nested set of inputs. // This prevents it from being shared, but experimentally, the top-level input nested set for a @@ -229,7 +300,7 @@ private int logInputs( */ private int logTools( Spawn spawn, InputMetadataProvider inputMetadataProvider, FileSystem fileSystem) - throws IOException { + throws IOException, InterruptedException { return logNestedSet( spawn.getToolFiles(), ImmutableList.of(), @@ -254,7 +325,7 @@ private int logNestedSet( InputMetadataProvider inputMetadataProvider, FileSystem fileSystem, boolean shared) - throws IOException { + throws IOException, InterruptedException { if (set.isEmpty() && additionalDirectoryIds.isEmpty()) { return 0; } @@ -308,7 +379,7 @@ private int logNestedSet( * @return the entry ID of the {@link ExecLogEntry.File} describing the file. */ private int logFile(ActionInput input, Path path, InputMetadataProvider inputMetadataProvider) - throws IOException { + throws IOException, InterruptedException { checkState(!(input instanceof VirtualActionInput.EmptyActionInput)); return logEntry( @@ -347,7 +418,7 @@ private int logFile(ActionInput input, Path path, InputMetadataProvider inputMet */ private int logDirectory( ActionInput input, Path root, InputMetadataProvider inputMetadataProvider) - throws IOException { + throws IOException, InterruptedException { return logEntry( input.getExecPathString(), () -> @@ -375,7 +446,7 @@ private int logRunfilesDirectory( Map mapping, InputMetadataProvider inputMetadataProvider, FileSystem fileSystem) - throws IOException { + throws IOException, InterruptedException { return logEntry( root.getPathString(), () -> { @@ -423,38 +494,36 @@ private int logRunfilesDirectory( * @param pathPrefix a prefix to prepend to each child path * @return the list of files transitively contained in the directory */ - private ImmutableList expandDirectory( + private List expandDirectory( Path root, @Nullable String pathPrefix, InputMetadataProvider inputMetadataProvider) - throws IOException { - ImmutableList.Builder builder = ImmutableList.builder(); - - ArrayDeque dirs = new ArrayDeque<>(); - dirs.add(root); - - while (!dirs.isEmpty()) { - Path dir = dirs.removeFirst(); - for (Path child : dir.getDirectoryEntries()) { - if (child.isDirectory()) { - dirs.addLast(child); - continue; - } + throws IOException, InterruptedException { + ArrayList files = new ArrayList<>(); + visitDirectory( + root, + (child) -> { + String childPath = pathPrefix != null ? pathPrefix + "/" : ""; + childPath += child.relativeTo(root).getPathString(); + + Digest digest = + computeDigest( + /* input= */ null, + child, + inputMetadataProvider, + xattrProvider, + digestHashFunction, + /* includeHashFunctionName= */ false); + + ExecLogEntry.File file = + ExecLogEntry.File.newBuilder().setPath(childPath).setDigest(digest).build(); - String childPath = pathPrefix != null ? pathPrefix + "/" : ""; - childPath += child.relativeTo(root).getPathString(); + synchronized (files) { + files.add(file); + } + }); - Digest digest = - computeDigest( - /* input= */ null, - child, - inputMetadataProvider, - xattrProvider, - digestHashFunction, - /* includeHashFunctionName= */ false); + Collections.sort(files, EXEC_LOG_ENTRY_FILE_COMPARATOR); - builder.add(ExecLogEntry.File.newBuilder().setPath(childPath).setDigest(digest).build()); - } - } - return builder.build(); + return files; } /** @@ -466,7 +535,8 @@ private ImmutableList expandDirectory( * @return the entry ID of the {@link ExecLogEntry.UnresolvedSymlink} describing the unresolved * symlink. */ - private int logUnresolvedSymlink(ActionInput input, Path path) throws IOException { + private int logUnresolvedSymlink(ActionInput input, Path path) + throws IOException, InterruptedException { return logEntry( input.getExecPathString(), () -> @@ -489,7 +559,7 @@ private int logUnresolvedSymlink(ActionInput input, Path path) throws IOExceptio */ @CanIgnoreReturnValue private synchronized int logEntry(@Nullable Object key, ExecLogEntrySupplier supplier) - throws IOException { + throws IOException, InterruptedException { try (SilentCloseable c = Profiler.instance().profile("logEntry/synchronized")) { if (key == null) { // No need to check for a previously added entry. diff --git a/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java b/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java index 2f578a2e452262..02b183de0abbab 100644 --- a/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java +++ b/src/test/java/com/google/devtools/build/lib/exec/SpawnLogContextTestBase.java @@ -507,7 +507,14 @@ public void testTreeOutput( actualPath.createDirectoryAndParents(); if (!dirContents.isEmpty()) { - writeFile(actualPath.getChild("child"), "abc"); + Path firstChildPath = actualPath.getRelative("dir1/file1"); + Path secondChildPath = actualPath.getRelative("dir2/file2"); + firstChildPath.getParentDirectory().createDirectoryAndParents(); + secondChildPath.getParentDirectory().createDirectoryAndParents(); + writeFile(firstChildPath, "abc"); + writeFile(secondChildPath, "def"); + Path emptySubdirPath = actualPath.getRelative("dir3"); + emptySubdirPath.createDirectoryAndParents(); } Spawn spawn = defaultSpawnBuilder().withOutputs(treeOutput).build(); @@ -531,8 +538,12 @@ public void testTreeOutput( ? ImmutableList.of() : ImmutableList.of( File.newBuilder() - .setPath("out/tree/child") + .setPath("out/tree/dir1/file1") .setDigest(getDigest("abc")) + .build(), + File.newBuilder() + .setPath("out/tree/dir2/file2") + .setDigest(getDigest("def")) .build())) .build()); }