Skip to content

Commit

Permalink
Implement read-only local cache (#2048)
Browse files Browse the repository at this point in the history
  • Loading branch information
uellenberg authored Jan 26, 2024
1 parent d2a89d7 commit 742d126
Show file tree
Hide file tree
Showing 10 changed files with 350 additions and 65 deletions.
1 change: 1 addition & 0 deletions docs/Configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ configuration variables
* `SCCACHE_DIR` local on disk artifact cache directory
* `SCCACHE_CACHE_SIZE` maximum size of the local on disk cache i.e. `2G` - default is 10G
* `SCCACHE_DIRECT` enable/disable preprocessor caching (see [the local doc](Local.md))
* `SCCACHE_LOCAL_RW_MODE` the mode that the cache will operate in (`READ_ONLY` or `READ_WRITE`)

#### s3 compatible

Expand Down
8 changes: 8 additions & 0 deletions docs/Local.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,11 @@ Configuration options and their default values:
See where to write the config in [the configuration doc](Configuration.md).

*Note that preprocessor caching is currently only implemented for GCC and Clang and when using local storage.*

## Read-only cache mode

By default, the local cache operates in read/write mode. The `SCCACHE_LOCAL_RW_MODE` environment variable can be set to `READ_ONLY` (or `READ_WRITE`) to modify this behavior.

You can use read-only mode to prevent sccache from writing new cache items to the disk. This can be useful, for example, if you want to use items that have already been cached, but not add new ones to the cache.

Note that this feature is only effective if you already have items in your cache. Using this option on an empty cache will cause sccache to simply do nothing, just add overhead.
80 changes: 71 additions & 9 deletions src/cache/cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
use crate::cache::azure::AzureBlobCache;
use crate::cache::disk::DiskCache;
#[cfg(feature = "gcs")]
use crate::cache::gcs::{GCSCache, RWMode};
use crate::cache::gcs::GCSCache;
#[cfg(feature = "gha")]
use crate::cache::gha::GHACache;
#[cfg(feature = "memcached")]
Expand Down Expand Up @@ -115,8 +115,8 @@ impl fmt::Debug for Cache {
}
}

/// CacheMode is used to repreent which mode we are using.
#[derive(Debug)]
/// CacheMode is used to represent which mode we are using.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum CacheMode {
/// Only read cache from storage.
ReadOnly,
Expand Down Expand Up @@ -578,17 +578,12 @@ pub fn storage_from_config(
}) => {
debug!("Init gcs cache with bucket {bucket}, key_prefix {key_prefix}");

let gcs_read_write_mode = match rw_mode {
config::GCSCacheRWMode::ReadOnly => RWMode::ReadOnly,
config::GCSCacheRWMode::ReadWrite => RWMode::ReadWrite,
};

let storage = GCSCache::build(
bucket,
key_prefix,
cred_path.as_deref(),
service_account.as_deref(),
gcs_read_write_mode,
(*rw_mode).into(),
credential_url.as_deref(),
)
.map_err(|err| anyhow!("create gcs cache failed: {err:?}"))?;
Expand Down Expand Up @@ -680,18 +675,21 @@ pub fn storage_from_config(

let (dir, size) = (&config.fallback_cache.dir, config.fallback_cache.size);
let preprocessor_cache_mode_config = config.fallback_cache.preprocessor_cache_mode;
let rw_mode = config.fallback_cache.rw_mode.into();
debug!("Init disk cache with dir {:?}, size {}", dir, size);
Ok(Arc::new(DiskCache::new(
dir,
size,
pool,
preprocessor_cache_mode_config,
rw_mode,
)))
}

#[cfg(test)]
mod test {
use super::*;
use crate::config::CacheModeConfig;

#[test]
fn test_normalize_key() {
Expand All @@ -700,4 +698,68 @@ mod test {
"0/1/2/0123456789abcdef0123456789abcdef"
);
}

#[test]
fn test_read_write_mode_local() {
let runtime = tokio::runtime::Builder::new_current_thread()
.enable_all()
.worker_threads(1)
.build()
.unwrap();

// Use disk cache.
let mut config = Config {
cache: None,
..Default::default()
};

let tempdir = tempfile::Builder::new()
.prefix("sccache_test_rust_cargo")
.tempdir()
.context("Failed to create tempdir")
.unwrap();
let cache_dir = tempdir.path().join("cache");
fs::create_dir(&cache_dir).unwrap();

config.fallback_cache.dir = cache_dir;

// Test Read Write
config.fallback_cache.rw_mode = CacheModeConfig::ReadWrite;

{
let cache = storage_from_config(&config, runtime.handle()).unwrap();

runtime.block_on(async move {
cache.put("test1", CacheWrite::default()).await.unwrap();
cache
.put_preprocessor_cache_entry("test1", PreprocessorCacheEntry::default())
.unwrap();
});
}

// Test Read-only
config.fallback_cache.rw_mode = CacheModeConfig::ReadOnly;

{
let cache = storage_from_config(&config, runtime.handle()).unwrap();

runtime.block_on(async move {
assert_eq!(
cache
.put("test1", CacheWrite::default())
.await
.unwrap_err()
.to_string(),
"Cannot write to a read-only cache"
);
assert_eq!(
cache
.put_preprocessor_cache_entry("test1", PreprocessorCacheEntry::default())
.unwrap_err()
.to_string(),
"Cannot write to a read-only cache"
);
});
}
}
}
18 changes: 17 additions & 1 deletion src/cache/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::cache::{Cache, CacheRead, CacheWrite, Storage};
use crate::cache::{Cache, CacheMode, CacheRead, CacheWrite, Storage};
use crate::compiler::PreprocessorCacheEntry;
use crate::lru_disk_cache::LruDiskCache;
use crate::lru_disk_cache::{Error as LruError, ReadSeek};
Expand Down Expand Up @@ -72,6 +72,7 @@ pub struct DiskCache {
pool: tokio::runtime::Handle,
preprocessor_cache_mode_config: PreprocessorCacheModeConfig,
preprocessor_cache: Arc<Mutex<LazyDiskCache>>,
rw_mode: CacheMode,
}

impl DiskCache {
Expand All @@ -81,6 +82,7 @@ impl DiskCache {
max_size: u64,
pool: &tokio::runtime::Handle,
preprocessor_cache_mode_config: PreprocessorCacheModeConfig,
rw_mode: CacheMode,
) -> DiskCache {
DiskCache {
lru: Arc::new(Mutex::new(LazyDiskCache::Uninit {
Expand All @@ -95,6 +97,7 @@ impl DiskCache {
.into_os_string(),
max_size,
})),
rw_mode,
}
}
}
Expand Down Expand Up @@ -137,6 +140,11 @@ impl Storage for DiskCache {
// We should probably do this on a background thread if we're going to buffer
// everything in memory...
trace!("DiskCache::finish_put({})", key);

if self.rw_mode == CacheMode::ReadOnly {
return Err(anyhow!("Cannot write to a read-only cache"));
}

let lru = self.lru.clone();
let key = make_key_path(key);

Expand All @@ -150,6 +158,10 @@ impl Storage for DiskCache {
.await?
}

async fn check(&self) -> Result<CacheMode> {
Ok(self.rw_mode)
}

fn location(&self) -> String {
format!("Local disk: {:?}", self.lru.lock().unwrap().path())
}
Expand Down Expand Up @@ -178,6 +190,10 @@ impl Storage for DiskCache {
key: &str,
preprocessor_cache_entry: PreprocessorCacheEntry,
) -> Result<()> {
if self.rw_mode == CacheMode::ReadOnly {
return Err(anyhow!("Cannot write to a read-only cache"));
}

let key = normalize_key(key);
let mut buf = vec![];
preprocessor_cache_entry.serialize_to(&mut buf)?;
Expand Down
23 changes: 8 additions & 15 deletions src/cache/gcs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use crate::cache::CacheMode;
use crate::errors::*;
use opendal::Operator;
use opendal::{layers::LoggingLayer, services::Gcs};
Expand All @@ -21,18 +22,10 @@ use reqwest::Client;
use serde::Deserialize;
use url::Url;

#[derive(Copy, Clone)]
pub enum RWMode {
ReadOnly,
ReadWrite,
}

impl RWMode {
fn to_scope(self) -> &'static str {
match self {
RWMode::ReadOnly => "https://www.googleapis.com/auth/devstorage.read_only",
RWMode::ReadWrite => "https://www.googleapis.com/auth/devstorage.read_write",
}
fn rw_to_scope(mode: CacheMode) -> &'static str {
match mode {
CacheMode::ReadOnly => "https://www.googleapis.com/auth/devstorage.read_only",
CacheMode::ReadWrite => "https://www.googleapis.com/auth/devstorage.read_write",
}
}

Expand All @@ -46,13 +39,13 @@ impl GCSCache {
key_prefix: &str,
cred_path: Option<&str>,
service_account: Option<&str>,
rw_mode: RWMode,
rw_mode: CacheMode,
credential_url: Option<&str>,
) -> Result<Operator> {
let mut builder = Gcs::default();
builder.bucket(bucket);
builder.root(key_prefix);
builder.scope(rw_mode.to_scope());
builder.scope(rw_to_scope(rw_mode));

if let Some(service_account) = service_account {
builder.service_account(service_account);
Expand All @@ -67,7 +60,7 @@ impl GCSCache {
.map_err(|err| anyhow!("gcs credential url is invalid: {err:?}"))?;

builder.customed_token_loader(Box::new(TaskClusterTokenLoader {
scope: rw_mode.to_scope().to_string(),
scope: rw_to_scope(rw_mode).to_string(),
url: cred_url.to_string(),
}));
}
Expand Down
71 changes: 42 additions & 29 deletions src/compiler/c.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,42 +340,51 @@ where
let mut updated = false;
let hit = preprocessor_cache_entry
.lookup_result_digest(preprocessor_cache_mode_config, &mut updated);

let mut update_failed = false;
if updated {
// Time macros have been found, we need to update
// the preprocessor cache entry. See [`PreprocessorCacheEntry::result_matches`].
debug!(
"Preprocessor cache updated because of time macros: {preprocessor_key}"
);
storage.put_preprocessor_cache_entry(

if let Err(e) = storage.put_preprocessor_cache_entry(
preprocessor_key,
preprocessor_cache_entry,
)?;
) {
debug!("Failed to update preprocessor cache: {}", e);
update_failed = true;
}
}
if let Some(key) = hit {
debug!("Preprocessor cache hit: {preprocessor_key}");
// A compiler binary may be a symlink to another and
// so has the same digest, but that means
// the toolchain will not contain the correct path
// to invoke the compiler! Add the compiler
// executable path to try and prevent this
let weak_toolchain_key =
format!("{}-{}", executable.to_string_lossy(), executable_digest);
return Ok(HashResult {
key,
compilation: Box::new(CCompilation {
parsed_args: parsed_args.to_owned(),
#[cfg(feature = "dist-client")]
// TODO or is it never relevant since dist?
preprocessed_input: vec![],
executable: executable.to_owned(),
compiler: compiler.to_owned(),
cwd: cwd.to_owned(),
env_vars: env_vars.to_owned(),
}),
weak_toolchain_key,
});
} else {
debug!("Preprocessor cache miss: {preprocessor_key}");

if !update_failed {
if let Some(key) = hit {
debug!("Preprocessor cache hit: {preprocessor_key}");
// A compiler binary may be a symlink to another and
// so has the same digest, but that means
// the toolchain will not contain the correct path
// to invoke the compiler! Add the compiler
// executable path to try and prevent this
let weak_toolchain_key =
format!("{}-{}", executable.to_string_lossy(), executable_digest);
return Ok(HashResult {
key,
compilation: Box::new(CCompilation {
parsed_args: parsed_args.to_owned(),
#[cfg(feature = "dist-client")]
// TODO or is it never relevant since dist?
preprocessed_input: vec![],
executable: executable.to_owned(),
compiler: compiler.to_owned(),
cwd: cwd.to_owned(),
env_vars: env_vars.to_owned(),
}),
weak_toolchain_key,
});
} else {
debug!("Preprocessor cache miss: {preprocessor_key}");
}
}
}
}
Expand Down Expand Up @@ -491,8 +500,12 @@ where
.collect();
files.sort_unstable_by(|a, b| a.1.cmp(&b.1));
preprocessor_cache_entry.add_result(start_of_compilation, &key, files);
storage
.put_preprocessor_cache_entry(&preprocessor_key, preprocessor_cache_entry)?;

if let Err(e) = storage
.put_preprocessor_cache_entry(&preprocessor_key, preprocessor_cache_entry)
{
debug!("Failed to update preprocessor cache: {}", e);
}
}
}

Expand Down
Loading

0 comments on commit 742d126

Please sign in to comment.