Skip to content

Commit

Permalink
[RemoteCacheMetrics] Fix percentage bug and improve stats (#124)
Browse files Browse the repository at this point in the history
This change

* fixes the percentage shown, it was showing the inverse value (cache
miss % instead of cache hit %)
* fixes the "remote upload outputs" time, which mistakenly also included
upload times of inputs (for RE).
* filters out local actions that don't do remote cache checks, as these
are not relevant and falsify the %
* adds absolute numbers for how many cache checks were performed and how
many were misses
* adds some documentation on `disk_cache` also being a remote cache

Contributes to #90

---------

Signed-off-by: Sara Adams <sara.e.adams@gmail.com>
  • Loading branch information
saraadams authored Nov 27, 2023
1 parent 13f1e27 commit dcc2907
Show file tree
Hide file tree
Showing 6 changed files with 143 additions and 62 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,30 @@ public class BazelProfileConstants {
public static final String COMPLETE_SUBPROCESS_RUN = "subprocess.run";
public static final String COMPLETE_EXECUTE_REMOTELY = "execute remotely";

// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteRepositoryRemoteExecutor.java#L160
/**
* For complete events of category {@link #CAT_REMOTE_EXECUTION_UPLOAD_TIME}, this name indicates
* that missing inputs were uploaded to a remote execution service.
*/
public static final String COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_MISSING_INPUTS =
"upload missing inputs";

// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java#L1417
/**
* For complete events of category {@link #CAT_REMOTE_EXECUTION_UPLOAD_TIME}, this name indicates
* that outputs were uploaded to a remote cache. This includes local "uploads" when using
* `disk_cache`.
*/
public static final String COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS =
"upload outputs";

// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java#L359
/**
* For complete events of category {@link #CAT_REMOTE_EXECUTION_UPLOAD_TIME}, this name indicates
* that outputs were uploaded to a remote cache while using a remote execution service.
*/
public static final String COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD = "upload";

// InstantEvent names
public static final String INSTANT_FINISHING = "Finishing";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_GENERAL_INFORMATION;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_LOCAL_ACTION_EXECUTION;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_ACTION_CACHE_CHECK;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_SUBPROCESS_RUN;

import com.engflow.bazel.invocation.analyzer.core.Datum;
Expand Down Expand Up @@ -105,12 +106,15 @@ public static class LocalAction implements Comparable<LocalAction> {

private final CompleteEvent action;
private final ImmutableList<CompleteEvent> relatedEvents;

private final boolean checksRemoteCache;
private final boolean executedLocally;

@VisibleForTesting
public LocalAction(CompleteEvent action, List<CompleteEvent> relatedEvents) {
this.action = action;
this.relatedEvents = ImmutableList.copyOf(relatedEvents);
this.checksRemoteCache = relatedEvents.stream().anyMatch(e -> checksRemoteCache(e));
this.executedLocally = relatedEvents.stream().anyMatch(e -> indicatesLocalExecution(e));
}

Expand All @@ -122,6 +126,10 @@ public List<CompleteEvent> getRelatedEvents() {
return relatedEvents;
}

public boolean hasRemoteCacheCheck() {
return checksRemoteCache;
}

public boolean isExecutedLocally() {
return executedLocally;
}
Expand Down Expand Up @@ -167,6 +175,10 @@ public int compareTo(LocalAction o) {
return action.start.compareTo(o.action.start);
}

public static boolean checksRemoteCache(CompleteEvent event) {
return CAT_REMOTE_ACTION_CACHE_CHECK.equals(event.category);
}

public static boolean indicatesLocalExecution(CompleteEvent event) {
return CAT_LOCAL_ACTION_EXECUTION.equals(event.category)
|| CAT_GENERAL_INFORMATION.equals(event.category)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,63 @@
import com.engflow.bazel.invocation.analyzer.time.DurationUtil;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import java.time.Duration;

/**
* Metrics on remote caching, namely how much time was spent on cache checks, downloading outputs,
* uploading outputs, and what percentage of actions were cached remotely.
*
* <p>Note that Bazel does not differentiate between a remote cache configured via `--disk_cache` or
* `--remote_cache`.
*/
public class RemoteCacheMetrics implements Datum {

private final Duration totalCacheCheck;
private final Duration totalDownloadOutputs;
private final Duration totalUploadOutputs;
private final int cacheChecks;
private final int cacheMisses;
private final Duration cacheCheckDuration;
private final Duration downloadOutputsDuration;
private final Duration uploadOutputsDuration;

private final float percentCachedRemotely;

RemoteCacheMetrics() {
this(Duration.ZERO, Duration.ZERO, Duration.ZERO, 0f);
this(0, 0, Duration.ZERO, Duration.ZERO, Duration.ZERO);
}

RemoteCacheMetrics(
Duration totalCacheCheck,
Duration totalDownloadOutputs,
Duration totalUploadOutputs,
float percentCachedRemotely) {
this.totalCacheCheck = Preconditions.checkNotNull(totalCacheCheck);
this.totalDownloadOutputs = Preconditions.checkNotNull(totalDownloadOutputs);
this.totalUploadOutputs = Preconditions.checkNotNull(totalUploadOutputs);
this.percentCachedRemotely = percentCachedRemotely;
int cacheChecks,
int cacheMisses,
Duration totalCacheCheckDuration,
Duration downloadOutputsDuration,
Duration uploadOutputsDuration) {
this.cacheChecks = cacheChecks;
this.cacheMisses = cacheMisses;
this.percentCachedRemotely = 100f * (cacheChecks - cacheMisses) / cacheChecks;
this.cacheCheckDuration = Preconditions.checkNotNull(totalCacheCheckDuration);
this.downloadOutputsDuration = Preconditions.checkNotNull(downloadOutputsDuration);
this.uploadOutputsDuration = Preconditions.checkNotNull(uploadOutputsDuration);
}

@Override
public boolean isEmpty() {
return totalCacheCheck.isZero() && totalDownloadOutputs.isZero() && totalUploadOutputs.isZero();
return cacheChecks == 0;
}

@Override
public String getEmptyReason() {
return isEmpty() ? "No remote cache operations available." : null;
return isEmpty() ? "Could not find any remote cache checks." : null;
}

@Override
public String getDescription() {
return "Collection of remote cache related metrics.";
return "Metrics on the remote caching used. This includes both the use of `--remote_cache` and"
+ " `--disk_cache`.";
}

@Override
public String toString() {
return "RemoteCacheMetrics{"
+ "totalCacheCheck="
+ totalCacheCheck
+ ", totalDownloadOutputs="
+ totalDownloadOutputs
+ ", totalUploadOutputs="
+ totalUploadOutputs
+ ", percentCached="
+ percentCachedRemotely
+ '}';
return getSummary();
}

@Override
Expand All @@ -71,28 +72,41 @@ public boolean equals(Object o) {
return false;
}
RemoteCacheMetrics that = (RemoteCacheMetrics) o;
return Float.compare(that.percentCachedRemotely, percentCachedRemotely) == 0
&& Objects.equal(totalCacheCheck, that.totalCacheCheck)
&& Objects.equal(totalDownloadOutputs, that.totalDownloadOutputs)
&& Objects.equal(totalUploadOutputs, that.totalUploadOutputs);
return that.cacheChecks == cacheChecks
&& that.cacheMisses == cacheMisses
&& Float.compare(that.percentCachedRemotely, percentCachedRemotely) == 0
&& Objects.equal(cacheCheckDuration, that.cacheCheckDuration)
&& Objects.equal(downloadOutputsDuration, that.downloadOutputsDuration)
&& Objects.equal(uploadOutputsDuration, that.uploadOutputsDuration);
}

@Override
public int hashCode() {
return Objects.hashCode(
totalCacheCheck, totalDownloadOutputs, totalUploadOutputs, percentCachedRemotely);
cacheChecks,
cacheMisses,
percentCachedRemotely,
cacheCheckDuration,
downloadOutputsDuration,
uploadOutputsDuration);
}

@Override
public String getSummary() {
String formattedPercentage = String.format("%,.2f%%", percentCachedRemotely);
var width = Math.max(formattedPercentage.length(), String.valueOf(cacheChecks).length());
return String.format(
"Total Remote Cache Check Duration: %s\n"
+ "Total Remote Download Outputs: %s\n"
+ "Total Remote Upload Outputs: %s\n"
+ "Percent cached remotely: %,.2f%%",
DurationUtil.formatDuration(totalCacheCheck),
DurationUtil.formatDuration(totalDownloadOutputs),
DurationUtil.formatDuration(totalUploadOutputs),
percentCachedRemotely);
"Number of cache checks: %s\n"
+ "Number of cache misses: %s\n"
+ "Cache hit percentage: %s\n"
+ "Time spend checking for cache hits: %s\n"
+ "Time spend downloading outputs: %s\n"
+ "Time spend uploading outputs: %s",
Strings.padStart(String.valueOf(cacheChecks), width, ' '),
Strings.padStart(String.valueOf(cacheMisses), width, ' '),
Strings.padStart(formattedPercentage, width, ' '),
DurationUtil.formatDuration(cacheCheckDuration),
DurationUtil.formatDuration(downloadOutputsDuration),
DurationUtil.formatDuration(uploadOutputsDuration));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_ACTION_CACHE_CHECK;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_EXECUTION_UPLOAD_TIME;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_OUTPUT_DOWNLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS;

import com.engflow.bazel.invocation.analyzer.core.DataProvider;
import com.engflow.bazel.invocation.analyzer.core.DatumSupplier;
Expand All @@ -17,7 +19,10 @@
import java.util.List;
import java.util.stream.Collectors;

/** A {@link DataProvider} that supplies data on remote caching. */
/**
* A {@link DataProvider} that supplies data on remote caching. Note that `disk_cache` is also a
* remote cache, even though it interacts with the local disk.
*/
public class RemoteCacheMetricsDataProvider extends DataProvider {

@Override
Expand All @@ -31,15 +36,14 @@ public List<DatumSupplierSpecification<?>> getSuppliers() {
RemoteCacheMetrics derive()
throws InvalidProfileException, MissingInputException, NullDatumException {
var metrics =
getDataManager().getDatum(LocalActions.class).parallelStream()
getDataManager().getDatum(LocalActions.class).stream()
.filter(action -> action.hasRemoteCacheCheck())
.parallel()
.map(this::coalesce)
.collect(Collectors.toList());
var summary = metrics.stream().reduce(RemoteCacheData.EMPTY, RemoteCacheData::plus);
return new RemoteCacheMetrics(
summary.check,
summary.download,
summary.upload,
((float) summary.uncached / metrics.size()) * 100f);
metrics.size(), summary.cacheMisses, summary.check, summary.download, summary.upload);
}

RemoteCacheData coalesce(LocalAction action) {
Expand All @@ -60,21 +64,21 @@ RemoteCacheData plus(RemoteCacheData that) {
public final Duration check;
public final Duration download;
private final Duration upload;
private final int uncached;
private final int cacheMisses;

RemoteCacheData(Duration check, Duration download, Duration upload, int uncached) {
RemoteCacheData(Duration check, Duration download, Duration upload, int cacheMisses) {
this.check = check;
this.download = download;
this.upload = upload;
this.uncached = uncached;
this.cacheMisses = cacheMisses;
}

RemoteCacheData plus(RemoteCacheData other) {
return new RemoteCacheData(
check.plus(other.check),
download.plus(other.download),
upload.plus(other.upload),
uncached + other.uncached);
cacheMisses + other.cacheMisses);
}

RemoteCacheData calculateCacheState() {
Expand All @@ -87,14 +91,23 @@ RemoteCacheData calculateCacheState() {
}

RemoteCacheData plus(CompleteEvent event) {
// The event documents a remote cache check.
if (CAT_REMOTE_ACTION_CACHE_CHECK.equals(event.category)) {
return new RemoteCacheData(check.plus(event.duration), download, upload, uncached);
return new RemoteCacheData(check.plus(event.duration), download, upload, cacheMisses);
}
// The event documents downloading outputs from a remote cache.
if (CAT_REMOTE_OUTPUT_DOWNLOAD.equals(event.category)) {
return new RemoteCacheData(check, download.plus(event.duration), upload, uncached);
return new RemoteCacheData(check, download.plus(event.duration), upload, cacheMisses);
}
if (CAT_REMOTE_EXECUTION_UPLOAD_TIME.equals(event.category)) {
return new RemoteCacheData(check, download, upload.plus(event.duration), uncached);
// The event documents uploading outputs to a remote cache.
// See
// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteExecutionService.java#L1417
// and
// https://github.com/bazelbuild/bazel/blob/7d10999fc0357596824f2b6022bbbd895f245a3c/src/main/java/com/google/devtools/build/lib/remote/RemoteSpawnRunner.java#L359
if (CAT_REMOTE_EXECUTION_UPLOAD_TIME.equals(event.category)
&& (COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS.equals(event.name)
|| COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD.equals(event.name))) {
return new RemoteCacheData(check, download, upload.plus(event.duration), cacheMisses);
}
return this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ public CompleteEvent related(int duration, String category) {
return related(nextRelatedStart, duration, category);
}

public CompleteEvent related(int duration, String category, String name) {
return related(nextRelatedStart, duration, category, name);
}

public TraceEvent asEvent() {
return thread(
id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_ACTION_CACHE_CHECK;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_EXECUTION_UPLOAD_TIME;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.CAT_REMOTE_OUTPUT_DOWNLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_MISSING_INPUTS;
import static com.engflow.bazel.invocation.analyzer.bazelprofile.BazelProfileConstants.COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS;
import static com.google.common.truth.Truth.assertThat;
import static org.mockito.Mockito.when;

Expand Down Expand Up @@ -62,25 +65,36 @@ public void summarizeCacheData()
thread.related(1, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(2, CAT_REMOTE_OUTPUT_DOWNLOAD))),
new LocalActions.LocalAction(
thread.actionProcessingAction("More Cached Work", "WorkC", 5),
thread.actionProcessingAction("Cache Miss Work", "WorkC", 5),
List.of(
thread.related(4, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(8, CAT_REMOTE_OUTPUT_DOWNLOAD))),
thread.related(
3,
CAT_REMOTE_EXECUTION_UPLOAD_TIME,
COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_OUTPUTS))),
new LocalActions.LocalAction(
thread.actionProcessingAction("Cache Miss Work", "WorkC", 5),
thread.actionProcessingAction("Cache Miss Work RE", "WorkC", 5),
List.of(
thread.related(16, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(32, CAT_REMOTE_EXECUTION_UPLOAD_TIME))),
thread.related(8, CAT_REMOTE_ACTION_CACHE_CHECK),
thread.related(
5,
CAT_REMOTE_EXECUTION_UPLOAD_TIME,
COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD_MISSING_INPUTS),
thread.related(
7,
CAT_REMOTE_EXECUTION_UPLOAD_TIME,
COMPLETE_REMOTE_EXECUTION_UPLOAD_TIME_UPLOAD))),
new LocalActions.LocalAction(
thread.actionProcessingAction("UnCached Work", "LocalWorkC", 5),
thread.actionProcessingAction("Work without cache check", "LocalWorkC", 5),
List.of()))));

Truth.assertThat(provider.derive())
.isEqualTo(
new RemoteCacheMetrics(
Duration.ofSeconds(1 + 4 + 16),
Duration.ofSeconds(2 + 8),
Duration.ofSeconds(32),
25.0f));
3,
2,
Duration.ofSeconds(1 + 4 + 8),
Duration.ofSeconds(2),
Duration.ofSeconds(3 + 7)));
}
}

0 comments on commit dcc2907

Please sign in to comment.