Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RemoteCacheMetrics] Fix percentage bug and improve stats #124

Merged
merged 2 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,30 @@ public class BazelProfileConstants {
public static final String COMPLETE_SUBPROCESS_RUN = "subprocess.run";
public static final String COMPLETE_EXECUTE_REMOTELY = "execute remotely";

// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteRepositoryRemoteExecutor.java#L160
/**
* For complete events of category {@link #CAT_REMOTE_EXECUTION_UPLOAD_TIME}, this name indicates
* that missing inputs were uploaded to a remote execution service.
*/
public static final String COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_MISSING_INPUTS =
"upload missing inputs";

// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java#L1417
/**
* For complete events of category {@link #CAT_REMOTE_EXECUTION_UPLOAD_TIME}, this name indicates
* that outputs were uploaded to a remote cache. This includes local "uploads" when using
* `disk_cache`.
*/
public static final String COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS =
"upload outputs";

// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java#L359
/**
* For complete events of category {@link #CAT_REMOTE_EXECUTION_UPLOAD_TIME}, this name indicates
* that outputs were uploaded to a remote cache while using a remote execution service.
*/
public static final String COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD = "upload";

// InstantEvent names
public static final String INSTANT_FINISHING = "Finishing";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_GENERAL_INFORMATION;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_LOCAL_ACTION_EXECUTION;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_ACTION_CACHE_CHECK;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_SUBPROCESS_RUN;

import com.engflow.bazel.invocation.analyzer.core.Datum;
Expand Down Expand Up @@ -105,12 +106,15 @@ public static class LocalAction implements Comparable<LocalAction> {

private final CompleteEvent action;
private final ImmutableList<CompleteEvent> relatedEvents;

private final boolean checksRemoteCache;
private final boolean executedLocally;

@VisibleForTesting
public LocalAction(CompleteEvent action, List<CompleteEvent> relatedEvents) {
this.action = action;
this.relatedEvents = ImmutableList.copyOf(relatedEvents);
this.checksRemoteCache = relatedEvents.stream().anyMatch(e -> checksRemoteCache(e));
this.executedLocally = relatedEvents.stream().anyMatch(e -> indicatesLocalExecution(e));
}

Expand All @@ -122,6 +126,10 @@ public List<CompleteEvent> getRelatedEvents() {
return relatedEvents;
}

public boolean hasRemoteCacheCheck() {
return checksRemoteCache;
}

public boolean isExecutedLocally() {
return executedLocally;
}
Expand Down Expand Up @@ -167,6 +175,10 @@ public int compareTo(LocalAction o) {
return action.start.compareTo(o.action.start);
}

public static boolean checksRemoteCache(CompleteEvent event) {
return CAT_REMOTE_ACTION_CACHE_CHECK.equals(event.category);
}

public static boolean indicatesLocalExecution(CompleteEvent event) {
return CAT_LOCAL_ACTION_EXECUTION.equals(event.category)
|| CAT_GENERAL_INFORMATION.equals(event.category)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,63 @@
import com.engflow.bazel.invocation.analyzer.time.DurationUtil;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import java.time.Duration;

/**
* Metrics on remote caching, namely how much time was spent on cache checks, downloading outputs,
* uploading outputs, and what percentage of actions were cached remotely.
*
* <p>Note that Bazel does not differentiate between a remote cache configured via `--disk_cache` or
* `--remote_cache`.
*/
public class RemoteCacheMetrics implements Datum {

private final Duration totalCacheCheck;
private final Duration totalDownloadOutputs;
private final Duration totalUploadOutputs;
private final int cacheChecks;
private final int cacheMisses;
private final Duration cacheCheckDuration;
private final Duration downloadOutputsDuration;
private final Duration uploadOutputsDuration;

private final float percentCachedRemotely;

RemoteCacheMetrics() {
this(Duration.ZERO, Duration.ZERO, Duration.ZERO, 0f);
this(0, 0, Duration.ZERO, Duration.ZERO, Duration.ZERO);
}

RemoteCacheMetrics(
Duration totalCacheCheck,
Duration totalDownloadOutputs,
Duration totalUploadOutputs,
float percentCachedRemotely) {
this.totalCacheCheck = Preconditions.checkNotNull(totalCacheCheck);
this.totalDownloadOutputs = Preconditions.checkNotNull(totalDownloadOutputs);
this.totalUploadOutputs = Preconditions.checkNotNull(totalUploadOutputs);
this.percentCachedRemotely = percentCachedRemotely;
int cacheChecks,
int cacheMisses,
Duration totalCacheCheckDuration,
Duration downloadOutputsDuration,
Duration uploadOutputsDuration) {
this.cacheChecks = cacheChecks;
this.cacheMisses = cacheMisses;
this.percentCachedRemotely = 100f * (cacheChecks - cacheMisses) / cacheChecks;
this.cacheCheckDuration = Preconditions.checkNotNull(totalCacheCheckDuration);
this.downloadOutputsDuration = Preconditions.checkNotNull(downloadOutputsDuration);
this.uploadOutputsDuration = Preconditions.checkNotNull(uploadOutputsDuration);
}

@Override
public boolean isEmpty() {
return totalCacheCheck.isZero() && totalDownloadOutputs.isZero() && totalUploadOutputs.isZero();
return cacheChecks == 0;
}

@Override
public String getEmptyReason() {
return isEmpty() ? "No remote cache operations available." : null;
return isEmpty() ? "Could not find any remote cache checks." : null;
}

@Override
public String getDescription() {
return "Collection of remote cache related metrics.";
return "Metrics on the remote caching used. This includes both the use of `--remote_cache` and"
+ " `--disk_cache`.";
}

@Override
public String toString() {
return "RemoteCacheMetrics{"
+ "totalCacheCheck="
+ totalCacheCheck
+ ", totalDownloadOutputs="
+ totalDownloadOutputs
+ ", totalUploadOutputs="
+ totalUploadOutputs
+ ", percentCached="
+ percentCachedRemotely
+ '}';
return getSummary();
}

@Override
Expand All @@ -71,28 +72,41 @@ public boolean equals(Object o) {
return false;
}
RemoteCacheMetrics that = (RemoteCacheMetrics) o;
return Float.compare(that.percentCachedRemotely, percentCachedRemotely) == 0
&& Objects.equal(totalCacheCheck, that.totalCacheCheck)
&& Objects.equal(totalDownloadOutputs, that.totalDownloadOutputs)
&& Objects.equal(totalUploadOutputs, that.totalUploadOutputs);
return that.cacheChecks == cacheChecks
&& that.cacheMisses == cacheMisses
&& Float.compare(that.percentCachedRemotely, percentCachedRemotely) == 0
&& Objects.equal(cacheCheckDuration, that.cacheCheckDuration)
&& Objects.equal(downloadOutputsDuration, that.downloadOutputsDuration)
&& Objects.equal(uploadOutputsDuration, that.uploadOutputsDuration);
}

@Override
public int hashCode() {
return Objects.hashCode(
totalCacheCheck, totalDownloadOutputs, totalUploadOutputs, percentCachedRemotely);
cacheChecks,
cacheMisses,
percentCachedRemotely,
cacheCheckDuration,
downloadOutputsDuration,
uploadOutputsDuration);
}

@Override
public String getSummary() {
String formattedPercentage = String.format("%,.2f%%", percentCachedRemotely);
var width = Math.max(formattedPercentage.length(), String.valueOf(cacheChecks).length());
return String.format(
"Total Remote Cache Check Duration: %s\n"
+ "Total Remote Download Outputs: %s\n"
+ "Total Remote Upload Outputs: %s\n"
+ "Percent cached remotely: %,.2f%%",
DurationUtil.formatDuration(totalCacheCheck),
DurationUtil.formatDuration(totalDownloadOutputs),
DurationUtil.formatDuration(totalUploadOutputs),
percentCachedRemotely);
"Number of cache checks: %s\n"
+ "Number of cache misses: %s\n"
+ "Cache hit percentage: %s\n"
+ "Time spend checking for cache hits: %s\n"
+ "Time spend downloading outputs: %s\n"
+ "Time spend uploading outputs: %s",
Strings.padStart(String.valueOf(cacheChecks), width, ' '),
Strings.padStart(String.valueOf(cacheMisses), width, ' '),
Strings.padStart(formattedPercentage, width, ' '),
DurationUtil.formatDuration(cacheCheckDuration),
DurationUtil.formatDuration(downloadOutputsDuration),
DurationUtil.formatDuration(uploadOutputsDuration));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_ACTION_CACHE_CHECK;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_EXECUTION_UPLOAD_TIME;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_OUTPUT_DOWNLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS;

import com.engflow.bazel.invocation.analyzer.core.DataProvider;
import com.engflow.bazel.invocation.analyzer.core.DatumSupplier;
Expand All @@ -17,7 +19,10 @@
import java.util.List;
import java.util.stream.Collectors;

/** A {@link DataProvider} that supplies data on remote caching. */
/**
* A {@link DataProvider} that supplies data on remote caching. Note that `disk_cache` is also a
* remote cache, even though it interacts with the local disk.
*/
public class RemoteCacheMetricsDataProvider extends DataProvider {

@Override
Expand All @@ -31,15 +36,14 @@ public List<DatumSupplierSpecification<?>> getSuppliers() {
RemoteCacheMetrics derive()
throws InvalidProfileException, MissingInputException, NullDatumException {
var metrics =
getDataManager().getDatum(LocalActions.class).parallelStream()
getDataManager().getDatum(LocalActions.class).stream()
.filter(action -> action.hasRemoteCacheCheck())
.parallel()
.map(this::coalesce)
.collect(Collectors.toList());
var summary = metrics.stream().reduce(RemoteCacheData.EMPTY, RemoteCacheData::plus);
return new RemoteCacheMetrics(
summary.check,
summary.download,
summary.upload,
((float) summary.uncached / metrics.size()) * 100f);
metrics.size(), summary.cacheMisses, summary.check, summary.download, summary.upload);
}

RemoteCacheData coalesce(LocalAction action) {
Expand All @@ -60,21 +64,21 @@ RemoteCacheData plus(RemoteCacheData that) {
public final Duration check;
public final Duration download;
private final Duration upload;
private final int uncached;
private final int cacheMisses;

RemoteCacheData(Duration check, Duration download, Duration upload, int uncached) {
RemoteCacheData(Duration check, Duration download, Duration upload, int cacheMisses) {
this.check = check;
this.download = download;
this.upload = upload;
this.uncached = uncached;
this.cacheMisses = cacheMisses;
}

RemoteCacheData plus(RemoteCacheData other) {
return new RemoteCacheData(
check.plus(other.check),
download.plus(other.download),
upload.plus(other.upload),
uncached + other.uncached);
cacheMisses + other.cacheMisses);
}

RemoteCacheData calculateCacheState() {
Expand All @@ -87,14 +91,23 @@ RemoteCacheData calculateCacheState() {
}

RemoteCacheData plus(CompleteEvent event) {
// The event documents a remote cache check.
if (CAT_REMOTE_ACTION_CACHE_CHECK.equals(event.category)) {
return new RemoteCacheData(check.plus(event.duration), download, upload, uncached);
return new RemoteCacheData(check.plus(event.duration), download, upload, cacheMisses);
}
// The event documents downloading outputs from a remote cache.
if (CAT_REMOTE_OUTPUT_DOWNLOAD.equals(event.category)) {
return new RemoteCacheData(check, download.plus(event.duration), upload, uncached);
return new RemoteCacheData(check, download.plus(event.duration), upload, cacheMisses);
}
if (CAT_REMOTE_EXECUTION_UPLOAD_TIME.equals(event.category)) {
return new RemoteCacheData(check, download, upload.plus(event.duration), uncached);
// The event documents uploading outputs to a remote cache.
// See
// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java#L1417
// and
// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java#L359
if (CAT_REMOTE_EXECUTION_UPLOAD_TIME.equals(event.category)
&& (COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS.equals(event.name)
|| COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD.equals(event.name))) {
return new RemoteCacheData(check, download, upload.plus(event.duration), cacheMisses);
}
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ public CompleteEvent related(int duration, String category) {
return related(nextRelatedStart, duration, category);
}

public CompleteEvent related(int duration, String category, String name) {
return related(nextRelatedStart, duration, category, name);
}

public TraceEvent asEvent() {
return thread(
id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_ACTION_CACHE_CHECK;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_EXECUTION_UPLOAD_TIME;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_OUTPUT_DOWNLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_MISSING_INPUTS;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS;
import static com.google.common.truth.Truth.assertThat;
import static org.mockito.Mockito.when;

Expand Down Expand Up @@ -62,25 +65,36 @@ public void summarizeCacheData()
thread.related(1, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(2, CAT_REMOTE_OUTPUT_DOWNLOAD))),
new LocalActions.LocalAction(
thread.actionProcessingAction("More Cached Work", "WorkC", 5),
thread.actionProcessingAction("Cache Miss Work", "WorkC", 5),
List.of(
thread.related(4, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(8, CAT_REMOTE_OUTPUT_DOWNLOAD))),
thread.related(
3,
CAT_REMOTE_EXECUTION_UPLOAD_TIME,
COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS))),
new LocalActions.LocalAction(
thread.actionProcessingAction("Cache Miss Work", "WorkC", 5),
thread.actionProcessingAction("Cache Miss Work RE", "WorkC", 5),
List.of(
thread.related(16, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(32, CAT_REMOTE_EXECUTION_UPLOAD_TIME))),
thread.related(8, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(
5,
CAT_REMOTE_EXECUTION_UPLOAD_TIME,
COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_MISSING_INPUTS),
thread.related(
7,
CAT_REMOTE_EXECUTION_UPLOAD_TIME,
COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD))),
new LocalActions.LocalAction(
thread.actionProcessingAction("UnCached Work", "LocalWorkC", 5),
thread.actionProcessingAction("Work without cache check", "LocalWorkC", 5),
List.of()))));

Truth.assertThat(provider.derive())
.isEqualTo(
new RemoteCacheMetrics(
Duration.ofSeconds(1 + 4 + 16),
Duration.ofSeconds(2 + 8),
Duration.ofSeconds(32),
25.0f));
3,
2,
Duration.ofSeconds(1 + 4 + 8),
Duration.ofSeconds(2),
Duration.ofSeconds(3 + 7)));
}
}