Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

locking behaviour improvements #146

Merged
merged 4 commits into from
Aug 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ tempfile = "3.5.0"
ureq = { version = "2.7.1", features = ["http-interop"] }
reqwest = { version = "0.11.18", features = ["blocking", "gzip"] }
serial_test = "2.0.0"
parking_lot = "0.12.1"

[package.metadata.docs.rs]
targets = ["x86_64-unknown-linux-gnu"]
Expand Down
1 change: 1 addition & 0 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub enum GixError {
#[error("The '{}' file is missing at the root of the tree of the crates index", path.display())]
PathMissing { path: std::path::PathBuf },
#[error(transparent)]
#[deprecated(note = "This variant can't happen anymore as locks aren't used when opening the index")]
LockAcquire(#[from] gix::lock::acquire::Error),
#[error(transparent)]
ParseRefSpec(#[from] gix::refspec::parse::Error),
Expand Down
129 changes: 85 additions & 44 deletions src/git/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@
#[doc(hidden)]
#[deprecated(note = "use new_cargo_default()")]
pub fn new<P: Into<PathBuf>>(path: P) -> Self {
Self::from_path_and_url(path.into(), URL.into()).unwrap()
Self::from_path_and_url(path.into(), URL.into(), Mode::ReadOnly)
.unwrap()
.expect("repo present after possibly cloning index")
}

/// Creates an index for the default crates.io registry, using the same
Expand All @@ -69,29 +71,66 @@
/// *Note that this clones a new index if none is present yet.
///
/// Note this function takes the `CARGO_HOME` environment variable into account
#[inline]
///
/// ### Concurrency
///
/// Concurrent invocations may fail if the index needs to be cloned. To prevent that,
/// use synchronization mechanisms like mutexes or file locks as needed by the application.
pub fn new_cargo_default() -> Result<Self, Error> {
let url = config::get_crates_io_replacement(None, None)?;
Self::from_url(url.as_deref().unwrap_or(URL))
}

/// Like [`Self::new_cargo_default()`], but read-only without auto-cloning the cargo default git index.
pub fn try_new_cargo_default() -> Result<Option<Self>, Error> {
let url = config::get_crates_io_replacement(None, None)?;
Self::try_from_url(url.as_deref().unwrap_or(URL))
}

/// Creates a bare index from a provided URL, opening the same location on
/// disk that Cargo uses for that registry index.
///
/// *Note that this clones a new index if none is present yet.
///
/// It can be used to access custom registries.
///
/// ### Concurrency
///
/// Concurrent invocations may fail if the index needs to be cloned. To prevent that,
/// use synchronization mechanisms like mutexes or file locks as needed by the application.
pub fn from_url(url: &str) -> Result<Self, Error> {
let (path, canonical_url) = local_path_and_canonical_url(url, None)?;
Self::from_path_and_url(path, canonical_url)
Ok(
Self::from_path_and_url(path, canonical_url, Mode::CloneUrlToPathIfRepoMissing)?
.expect("repo present after possibly cloning it"),
)
}

/// Like [`Self::from_url()`], but read-only without auto-cloning the index at `url`.
pub fn try_from_url(url: &str) -> Result<Option<Self>, Error> {
let (path, canonical_url) = local_path_and_canonical_url(url, None)?;
Self::from_path_and_url(path, canonical_url, Mode::ReadOnly)
}

/// Creates a bare index at the provided `path` with the specified repository `URL`.
///
/// *Note that this clones a new index to `path` if none is present there yet.
#[inline]
///
/// ### Concurrency
///
/// Concurrent invocations may fail if the index needs to be cloned. To prevent that,
/// use synchronization mechanisms like mutexes or file locks as needed by the application.
pub fn with_path<P: Into<PathBuf>, S: Into<String>>(path: P, url: S) -> Result<Self, Error> {
Self::from_path_and_url(path.into(), url.into())
Ok(
Self::from_path_and_url(path.into(), url.into(), Mode::CloneUrlToPathIfRepoMissing)?
.expect("repo present after possibly cloning it"),
)
}

/// Like [`Self::with_path()`], but read-only without auto-cloning the index at `url` if it's not already
/// present at `path`.
pub fn try_with_path<P: Into<PathBuf>, S: Into<String>>(path: P, url: S) -> Result<Option<Self>, Error> {
Self::from_path_and_url(path.into(), url.into(), Mode::ReadOnly)
}

/// Get the index directory.
Expand Down Expand Up @@ -123,8 +162,7 @@
Ok(changes::Changes::new(self)?)
}

fn from_path_and_url(path: PathBuf, url: String) -> Result<Self, Error> {
let mut mapping = gix::sec::trust::Mapping::default();
fn from_path_and_url(path: PathBuf, url: String, mode: Mode) -> Result<Option<Self>, Error> {
let open_with_complete_config = gix::open::Options::default().permissions(gix::open::Permissions {
config: gix::open::permissions::Config {
// Be sure to get all configuration, some of which is only known by the git binary.
Expand All @@ -134,47 +172,45 @@
},
..Default::default()
});
mapping.reduced = open_with_complete_config.clone();
mapping.full = open_with_complete_config.clone();

let _lock = gix::lock::Marker::acquire_to_hold_resource(
path.with_extension("crates-index"),
gix::lock::acquire::Fail::AfterDurationWithBackoff(std::time::Duration::from_secs(60 * 10)),
Some(PathBuf::from_iter(Some(std::path::Component::RootDir))),
)
.map_err(GixError::from)?;
let repo = gix::ThreadSafeRepository::discover_opts(
&path,
gix::discover::upwards::Options::default().apply_environment(),
mapping,
)
.ok()
.map(|repo| repo.to_thread_local())
.filter(|repo| {
// The `cargo` standard registry clone has no configured origin (when created with `git2`).
repo.find_remote("origin").map_or(true, |remote| {
remote
.url(gix::remote::Direction::Fetch)
.map_or(false, |remote_url| remote_url.to_bstring() == url)
})
});

let repo = match repo {
Some(repo) => repo,
None => match gix::open_opts(&path, open_with_complete_config).ok() {
None => clone_url(&url, &path)?,
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let repo = gix::open_opts(&path, open_with_complete_config.clone())
.ok()
.filter(|repo| {
// The `cargo` standard registry clone has no configured origin (when created with `git2`).
repo.find_remote("origin").map_or(true, |remote| {
remote
.url(gix::remote::Direction::Fetch)
.map_or(false, |remote_url| remote_url.to_bstring() == url)
})
});

let repo = match mode {
Mode::ReadOnly => repo,
Mode::CloneUrlToPathIfRepoMissing => Some(match repo {
Some(repo) => repo,
},
None => match gix::open_opts(&path, open_with_complete_config).ok() {
None => clone_url(&url, &path)?,
Some(repo) => repo,
},
}),
};

let head_commit = Self::find_repo_head(&repo, &path)?;
Ok(Self {
path,
url,
repo,
head_commit_hex: head_commit.to_hex().to_string(),
head_commit,
})
match repo {
None => Ok(None),
Some(repo) => {
let head_commit = Self::find_repo_head(&repo, &path)?;
Ok(Some(Self {
path,
url,
repo,
head_commit_hex: head_commit.to_hex().to_string(),
head_commit,
}))
}
}
}

fn tree(&self) -> Result<gix::Tree<'_>, GixError> {
Expand Down Expand Up @@ -492,7 +528,7 @@

enum MaybeOwned<'a, T> {
Owned(T),
Borrowed(&'a mut T),

Check warning on line 531 in src/git/mod.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

variant `Borrowed` is never constructed

Check warning on line 531 in src/git/mod.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

variant `Borrowed` is never constructed

Check warning on line 531 in src/git/mod.rs

View workflow job for this annotation

GitHub Actions / Test (ubuntu-latest, stable)

variant `Borrowed` is never constructed

Check warning on line 531 in src/git/mod.rs

View workflow job for this annotation

GitHub Actions / Test (windows-latest, stable)

variant `Borrowed` is never constructed
}

/// Iterator over all crates in the index. Skips crates that failed to parse.
Expand All @@ -518,6 +554,11 @@
}
}

enum Mode {
ReadOnly,
CloneUrlToPathIfRepoMissing,
}

#[cfg(test)]
#[cfg(feature = "git-https")]
mod test;
3 changes: 3 additions & 0 deletions src/git/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ fn parse_all_blobs() {
}

fn shared_index() -> GitIndex {
static LOCK: parking_lot::Mutex<()> = parking_lot::Mutex::new(());
let _guard = LOCK.lock();

let index_path = "tests/fixtures/git-registry";
if is_ci::cached() {
GitIndex::new_cargo_default().expect("CI has just cloned this index and its ours and valid")
Expand Down
23 changes: 15 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,11 @@
//! clone the default crates index (or the given one) if it no git
//! repository is present at the destination path.
//!
//! In order to protect from parallel operations of this kind, a
//! file-based lock is used. When interrupting the program with `Ctrl + C`,
//! by default the program will be aborted which won't run destructors.
//! This will cause the file lock to be stranded, causing all future operations
//! to fail.
//! This operation is racy and opening the index concurrently can lead to errors
//! as multiple threads may try to clone the index at the same time if it wasn't there yet.
//!
//! To prevent this issue, the application must integrate with the
//! [`gix-tempfile` signal handler](https://docs.rs/gix-tempfile/latest/gix_tempfile/#initial-setup),
//! which allows locks to be deleted when typical signals are received.
//! To prevent that, consider using synchronization primitives on application level that
//! synchronize methods like [`GitIndex::new_cargo_default()`] and its siblings.
//!
//! ## Git Repository Performance
//!
Expand Down Expand Up @@ -114,6 +110,17 @@ use std::path::{Path, PathBuf};
///
/// Uses a "bare" git index that fetches files directly from the repo instead of local checkout.
/// Uses Cargo's cache.
///
/// ### Instantiation
///
/// When creating an instance of this type, the crates-index will be cloned automatically should it not
/// be present. If a repository is present at the location but the remote doesn't match the desired index URL,
/// a new remote will be added and fetched from.
///
/// Please note that concurrent calls to [`GitIndex::new_cargo_default()`] (and related) will automatically block
/// and wait for each other, so only one instance will try to clone the index while the others will wait for completion.
///
/// This, however, only protects from itself and `cargo` cloning the index at the same time might interfere.
#[cfg(feature = "git")]
pub struct GitIndex {
path: std::path::PathBuf,
Expand Down
10 changes: 10 additions & 0 deletions tests/git/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@ pub(crate) mod with_https {
}
assert!(found_first_crate);
assert!(found_second_crate);

assert!(
GitIndex::try_with_path(repo.path(), repo.url())
.expect("no error opening")
.is_some(),
"index present as we worked with it"
);
}

#[test]
Expand Down Expand Up @@ -161,6 +168,9 @@ pub(crate) mod with_https {
}

pub(crate) fn shared_index() -> GitIndex {
static LOCK: parking_lot::Mutex<()> = parking_lot::Mutex::new(());
let _guard = LOCK.lock();

let index_path = "tests/fixtures/git-registry";
if is_ci::cached() {
GitIndex::new_cargo_default().expect("CI has just cloned this index and its ours and valid")
Expand Down
Loading