Skip to content

Commit

Permalink
Add a Checksum class for plumbing HTTP downloader content checksums.
Browse files Browse the repository at this point in the history
This is extracted from #7208 ("Support subresource integrity format for `download()` checksums.")

I'm trying to revive that PR and rebase to master, but the replacement of `String sha256` through the HTTP downloader code is causing a lot of Git conflicts. I expect that portion of the change to require less discussion than the proposed content integrity changes, so I'm moving it to a separate PR to simplify review.

@dslomov or @aehlig, could one of you take a look? Once this is merged, I will rebase PR 7208 onto it.

Closes #8714.

PiperOrigin-RevId: 256188026
  • Loading branch information
jmillikin authored and copybara-github committed Jul 2, 2019
1 parent 2260b97 commit 62fa89d
Show file tree
Hide file tree
Showing 11 changed files with 233 additions and 134 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright 2019 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.bazel.repository.downloader;

import com.google.common.hash.HashCode;
import com.google.devtools.build.lib.bazel.repository.cache.RepositoryCache.KeyType;

/** The content checksum for an HTTP download, which knows its own type. */
public class Checksum {
private final KeyType keyType;
private final HashCode hashCode;

private Checksum(KeyType keyType, HashCode hashCode) {
this.keyType = keyType;
this.hashCode = hashCode;
}

/** Constructs a new Checksum for a given key type and hash, in hex format. */
public static Checksum fromString(KeyType keyType, String hash) {
if (!keyType.isValid(hash)) {
throw new IllegalArgumentException("Invalid " + keyType + " checksum '" + hash + "'");
}
return new Checksum(keyType, HashCode.fromString(hash));
}

@Override
public String toString() {
return hashCode.toString();
}

public HashCode getHashCode() {
return hashCode;
}

public KeyType getKeyType() {
return keyType;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
package com.google.devtools.build.lib.bazel.repository.downloader;

import com.google.common.hash.HashCode;
import com.google.common.hash.HashFunction;
import com.google.common.hash.Hasher;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadCompatible;
import java.io.IOException;
Expand All @@ -41,11 +40,10 @@ final class HashInputStream extends InputStream {
private final HashCode code;
@Nullable private volatile HashCode actual;

HashInputStream(
@WillCloseWhenClosed InputStream delegate, HashFunction function, HashCode code) {
HashInputStream(@WillCloseWhenClosed InputStream delegate, Checksum checksum) {
this.delegate = delegate;
this.hasher = function.newHasher();
this.code = code;
this.hasher = checksum.getKeyType().newHasher();
this.code = checksum.getHashCode();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

import static com.google.common.collect.ImmutableSortedSet.toImmutableSortedSet;

import com.google.common.base.Preconditions;
import com.google.common.base.Optional;
import com.google.common.base.Predicates;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Ordering;
Expand Down Expand Up @@ -95,8 +95,8 @@ final class HttpConnectorMultiplexer {
this.sleeper = sleeper;
}

public HttpStream connect(List<URL> urls, String sha256) throws IOException {
return connect(urls, sha256, ImmutableMap.<URI, Map<String, String>>of());
public HttpStream connect(List<URL> urls, Optional<Checksum> checksum) throws IOException {
return connect(urls, checksum, ImmutableMap.<URI, Map<String, String>>of());
}

/**
Expand All @@ -116,22 +116,22 @@ public HttpStream connect(List<URL> urls, String sha256) throws IOException {
* and block until the connection can be renegotiated transparently right where it left off.
*
* @param urls mirrors by preference; each URL can be: file, http, or https
* @param sha256 hex checksum lazily checked on entire payload, or empty to disable
* @param checksum checksum lazily checked on entire payload, or empty to disable
* @return an {@link InputStream} of response payload
* @throws IOException if all mirrors are down and contains suppressed exception of each attempt
* @throws InterruptedIOException if current thread is being cast into oblivion
* @throws IllegalArgumentException if {@code urls} is empty or has an unsupported protocol
*/
public HttpStream connect(
List<URL> urls, String sha256, Map<URI, Map<String, String>> authHeaders) throws IOException {
Preconditions.checkNotNull(sha256);
List<URL> urls, Optional<Checksum> checksum, Map<URI, Map<String, String>> authHeaders)
throws IOException {
HttpUtils.checkUrlsArgument(urls);
if (Thread.interrupted()) {
throw new InterruptedIOException();
}
// If there's only one URL then there's no need for us to run all our fancy thread stuff.
if (urls.size() == 1) {
return establishConnection(urls.get(0), sha256, authHeaders);
return establishConnection(urls.get(0), checksum, authHeaders);
}
MutexConditionSharedMemory context = new MutexConditionSharedMemory();
// The parent thread always holds the lock except when released by wait().
Expand All @@ -140,7 +140,7 @@ public HttpStream connect(
long now = clock.currentTimeMillis();
long startAtTime = now;
for (URL url : urls) {
context.jobs.add(new WorkItem(url, sha256, startAtTime, authHeaders));
context.jobs.add(new WorkItem(url, checksum, startAtTime, authHeaders));
startAtTime += FAILOVER_DELAY_MS;
}
// Create the worker thread pool.
Expand Down Expand Up @@ -210,13 +210,17 @@ private static class MutexConditionSharedMemory {

private static class WorkItem {
final URL url;
final String sha256;
final Optional<Checksum> checksum;
final long startAtTime;
final Map<URI, Map<String, String>> authHeaders;

WorkItem(URL url, String sha256, long startAtTime, Map<URI, Map<String, String>> authHeaders) {
WorkItem(
URL url,
Optional<Checksum> checksum,
long startAtTime,
Map<URI, Map<String, String>> authHeaders) {
this.url = url;
this.sha256 = sha256;
this.checksum = checksum;
this.startAtTime = startAtTime;
this.authHeaders = authHeaders;
}
Expand Down Expand Up @@ -263,7 +267,7 @@ public void run() {
// Now we're actually going to attempt to connect to the remote server.
HttpStream result;
try {
result = establishConnection(work.url, work.sha256, work.authHeaders);
result = establishConnection(work.url, work.checksum, work.authHeaders);
} catch (InterruptedIOException e) {
// The parent thread got its result from another thread and killed this one.
synchronized (context) {
Expand Down Expand Up @@ -307,7 +311,7 @@ private void tellParentThreadWeAreDone() {
}

private HttpStream establishConnection(
final URL url, String sha256, Map<URI, Map<String, String>> additionalHeaders)
final URL url, Optional<Checksum> checksum, Map<URI, Map<String, String>> additionalHeaders)
throws IOException {
ImmutableMap<String, String> headers = REQUEST_HEADERS;
try {
Expand All @@ -327,7 +331,7 @@ private HttpStream establishConnection(
return httpStreamFactory.create(
connection,
url,
sha256,
checksum,
new Reconnector() {
@Override
public URLConnection connect(Throwable cause, ImmutableMap<String, String> extraHeaders)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,12 @@ public void setTimeoutScaling(float timeoutScaling) {
/**
* Downloads file to disk and returns path.
*
* <p>If the SHA256 checksum and path to the repository cache is specified, attempt to load the
* file from the {@link RepositoryCache}. If it doesn't exist, proceed to download the file and
* load it into the cache prior to returning the value.
* <p>If the checksum and path to the repository cache is specified, attempt to load the file from
* the {@link RepositoryCache}. If it doesn't exist, proceed to download the file and load it into
* the cache prior to returning the value.
*
* @param urls list of mirror URLs with identical content
* @param sha256 valid SHA256 hex checksum string which is checked, or empty to disable
* @param checksum valid checksum which is checked, or empty to disable
* @param type extension, e.g. "tar.gz" to force on downloaded filename, or empty to not do this
* @param output destination filename if {@code type} is <i>absent</i>, otherwise output directory
* @param eventHandler CLI progress reporter
Expand All @@ -91,7 +91,7 @@ public void setTimeoutScaling(float timeoutScaling) {
public Path download(
List<URL> urls,
Map<URI, Map<String, String>> authHeaders,
String sha256,
Optional<Checksum> checksum,
String canonicalId,
Optional<String> type,
Path output,
Expand All @@ -116,14 +116,15 @@ public Path download(
}
Path destination = getDownloadDestination(mainUrl, type, output);

// Is set to true if the value should be cached by the sha256 value provided
boolean isCachingByProvidedSha256 = false;
// Is set to true if the value should be cached by the checksum value provided
boolean isCachingByProvidedChecksum = false;

if (!sha256.isEmpty()) {
if (checksum.isPresent()) {
String cacheKey = checksum.get().toString();
KeyType cacheKeyType = checksum.get().getKeyType();
try {
String currentSha256 =
RepositoryCache.getChecksum(KeyType.SHA256, destination);
if (currentSha256.equals(sha256)) {
String currentChecksum = RepositoryCache.getChecksum(cacheKeyType, destination);
if (currentChecksum.equals(cacheKey)) {
// No need to download.
return destination;
}
Expand All @@ -132,14 +133,14 @@ public Path download(
}

if (repositoryCache.isEnabled()) {
isCachingByProvidedSha256 = true;
isCachingByProvidedChecksum = true;

try {
Path cachedDestination =
repositoryCache.get(sha256, destination, KeyType.SHA256, canonicalId);
repositoryCache.get(cacheKey, destination, cacheKeyType, canonicalId);
if (cachedDestination != null) {
// Cache hit!
eventHandler.post(new RepositoryCacheHitEvent(repo, sha256, mainUrl));
eventHandler.post(new RepositoryCacheHitEvent(repo, cacheKey, mainUrl));
return cachedDestination;
}
} catch (IOException e) {
Expand All @@ -163,16 +164,16 @@ public Path download(
boolean match = false;
Path candidate = dir.getRelative(destination.getBaseName());
try {
match = RepositoryCache.getChecksum(KeyType.SHA256, candidate).equals(sha256);
match = RepositoryCache.getChecksum(cacheKeyType, candidate).equals(cacheKey);
} catch (IOException e) {
// Not finding anything in a distdir is a normal case, so handle it absolutely
// quietly. In fact, it is not uncommon to specify a whole list of dist dirs,
// with the asumption that only one will contain an entry.
}
if (match) {
if (isCachingByProvidedSha256) {
if (isCachingByProvidedChecksum) {
try {
repositoryCache.put(sha256, candidate, KeyType.SHA256, canonicalId);
repositoryCache.put(cacheKey, candidate, cacheKeyType, canonicalId);
} catch (IOException e) {
eventHandler.handle(
Event.warn("Failed to copy " + candidate + " to repository cache: " + e));
Expand Down Expand Up @@ -201,7 +202,7 @@ public Path download(
// Connect to the best mirror and download the file, while reporting progress to the CLI.
semaphore.acquire();
boolean success = false;
try (HttpStream payload = multiplexer.connect(urls, sha256, authHeaders);
try (HttpStream payload = multiplexer.connect(urls, checksum, authHeaders);
OutputStream out = destination.getOutputStream()) {
ByteStreams.copy(payload, out);
success = true;
Expand All @@ -215,8 +216,9 @@ public Path download(
eventHandler.post(new FetchEvent(urls.get(0).toString(), success));
}

if (isCachingByProvidedSha256) {
repositoryCache.put(sha256, destination, KeyType.SHA256, canonicalId);
if (isCachingByProvidedChecksum) {
repositoryCache.put(
checksum.get().toString(), destination, checksum.get().getKeyType(), canonicalId);
} else if (repositoryCache.isEnabled()) {
String newSha256 = repositoryCache.put(destination, KeyType.SHA256, canonicalId);
eventHandler.handle(Event.info("SHA256 (" + urls.get(0) + ") = " + newSha256));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@

package com.google.devtools.build.lib.bazel.repository.downloader;

import com.google.common.base.Optional;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.bazel.repository.downloader.RetryingInputStream.Reconnector;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadCompatible;
Expand Down Expand Up @@ -62,9 +61,9 @@ static class Factory {
HttpStream create(
@WillCloseWhenClosed URLConnection connection,
URL originalUrl,
String sha256,
Optional<Checksum> checksum,
Reconnector reconnector)
throws IOException {
throws IOException {
InputStream stream = new InterruptibleInputStream(connection.getInputStream());
try {
// If server supports range requests, we can retry on read errors. See RFC7233 § 2.3.
Expand All @@ -89,8 +88,8 @@ HttpStream create(
stream = new GZIPInputStream(stream, GZIP_BUFFER_BYTES);
}

if (!sha256.isEmpty()) {
stream = new HashInputStream(stream, Hashing.sha256(), HashCode.fromString(sha256));
if (checksum.isPresent()) {
stream = new HashInputStream(stream, checksum.get());
if (retrier != null) {
retrier.disabled = true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import com.google.devtools.build.lib.bazel.repository.DecompressorValue;
import com.google.devtools.build.lib.bazel.repository.cache.RepositoryCache;
import com.google.devtools.build.lib.bazel.repository.cache.RepositoryCache.KeyType;
import com.google.devtools.build.lib.bazel.repository.downloader.Checksum;
import com.google.devtools.build.lib.bazel.repository.downloader.HttpDownloader;
import com.google.devtools.build.lib.bazel.repository.downloader.HttpUtils;
import com.google.devtools.build.lib.cmdline.Label;
Expand Down Expand Up @@ -474,6 +475,12 @@ public StructImpl download(
warnAboutSha256Error(urls, sha256);
sha256 = "";
}
Optional<Checksum> checksum;
if (sha256.isEmpty()) {
checksum = Optional.absent();
} else {
checksum = Optional.of(Checksum.fromString(KeyType.SHA256, sha256));
}
SkylarkPath outputPath = getPath("download()", output);
WorkspaceRuleEvent w =
WorkspaceRuleEvent.newDownloadEvent(
Expand All @@ -487,7 +494,7 @@ public StructImpl download(
httpDownloader.download(
urls,
authHeaders,
sha256,
checksum,
canonicalId,
Optional.<String>absent(),
outputPath.getPath(),
Expand Down Expand Up @@ -579,6 +586,12 @@ public StructImpl downloadAndExtract(
warnAboutSha256Error(urls, sha256);
sha256 = "";
}
Optional<Checksum> checksum;
if (sha256.isEmpty()) {
checksum = Optional.absent();
} else {
checksum = Optional.of(Checksum.fromString(KeyType.SHA256, sha256));
}

WorkspaceRuleEvent w =
WorkspaceRuleEvent.newDownloadAndExtractEvent(
Expand All @@ -601,7 +614,7 @@ public StructImpl downloadAndExtract(
httpDownloader.download(
urls,
authHeaders,
sha256,
checksum,
canonicalId,
Optional.of(type),
outputPath.getPath(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ java_test(
deps = [
"//src/main/java/com/google/devtools/build/lib:events",
"//src/main/java/com/google/devtools/build/lib:util",
"//src/main/java/com/google/devtools/build/lib/bazel/repository/cache",
"//src/main/java/com/google/devtools/build/lib/bazel/repository/downloader",
"//src/test/java/com/google/devtools/build/lib:foundations_testutil",
"//src/test/java/com/google/devtools/build/lib:test_runner",
Expand Down
Loading

0 comments on commit 62fa89d

Please sign in to comment.