Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linker copies files as a fallback when ref-linking fails #1773

Merged
merged 5 commits into from
Feb 22, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Address suggestions - share Attempt type with `hardlink_wheel_files…
…`, properly recurse in copy fallback attempts / create directories as needed, implement fallbacks when overwriting via reflinks fails.
  • Loading branch information
snowsignal committed Feb 21, 2024
commit 41533253fa1ae0909ba42d971c35e686f65665fd
80 changes: 46 additions & 34 deletions crates/install-wheel-rs/src/linker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::str::FromStr;
use configparser::ini::Ini;
use fs_err as fs;
use fs_err::{DirEntry, File};
use reflink_copy as reflink;
use tempfile::tempdir_in;
use tracing::{debug, instrument};

Expand Down Expand Up @@ -280,7 +281,7 @@ fn clone_wheel_files(
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
let mut count = 0usize;
let mut attempt = CloneAttempt::default();
let mut attempt = Attempt::default();

// On macOS, directly can be recursively copied with a single `clonefile` call.
// So we only need to iterate over the top-level of the directory, and copy each file or
Expand All @@ -299,10 +300,14 @@ fn clone_wheel_files(
Ok(count)
}

#[derive(Debug, Default, Clone, Copy)]
enum CloneAttempt {
// Hard linking / reflinking might not be supported but we (afaik) can't detect this ahead of time,
// so we'll try hard linking / reflinking the first file - if this succeeds we'll know later
// errors are not due to lack of os/fs support. If it fails, we'll switch to copying for the rest of the
// install.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
enum Attempt {
#[default]
First,
Initial,
Subsequent,
UseCopyFallback,
}
Expand All @@ -312,7 +317,7 @@ fn clone_recursive(
site_packages: &Path,
wheel: &Path,
entry: &DirEntry,
attempt: &mut CloneAttempt,
attempt: &mut Attempt,
) -> Result<(), Error> {
// Determine the existing and destination paths.
let from = entry.path();
Expand All @@ -321,11 +326,10 @@ fn clone_recursive(
debug!("Cloning {} to {}", from.display(), to.display());

match attempt {
CloneAttempt::First => {
*attempt = CloneAttempt::Subsequent;
if let Err(err) = reflink_copy::reflink(&from, &to) {
if err.kind() == std::io::ErrorKind::AlreadyExists {
// If copying fails and the directory exists already, it must be merged recursively.
Attempt::Initial => {
if let Err(err) = reflink::reflink(&from, &to) {
if matches!(err.kind(), std::io::ErrorKind::AlreadyExists) {
// If cloning/copying fails and the directory exists already, it must be merged recursively.
if entry.file_type()?.is_dir() {
for entry in fs::read_dir(from)? {
clone_recursive(site_packages, wheel, &entry?, attempt)?;
Expand All @@ -334,20 +338,30 @@ fn clone_recursive(
// If file already exists, overwrite it.
let tempdir = tempdir_in(site_packages)?;
let tempfile = tempdir.path().join(from.file_name().unwrap());
reflink_copy::reflink(from, &tempfile)?;
fs::rename(&tempfile, to)?;
if reflink::reflink(&from, &tempfile).is_ok() {
fs::rename(&tempfile, to)?;
} else {
debug!("Failed to clone {} to temporary location {} - attempting to copy files as a fallback", from.display(), tempfile.display());
*attempt = Attempt::UseCopyFallback;
fs::copy(&from, &to)?;
}
}
} else {
debug!(
"Failed to clone {} to {} - attempting to copy files as a fallback",
from.display(),
to.display()
);
// switch to copy fallback
*attempt = CloneAttempt::UseCopyFallback;
return clone_recursive(site_packages, wheel, entry, attempt);
*attempt = Attempt::UseCopyFallback;
clone_recursive(site_packages, wheel, entry, attempt)?;
}
}
}
CloneAttempt::Subsequent => {
if let Err(err) = reflink_copy::reflink(&from, &to) {
if err.kind() == std::io::ErrorKind::AlreadyExists {
// If copying fails and the directory exists already, it must be merged recursively.
Attempt::Subsequent => {
if let Err(err) = reflink::reflink(&from, &to) {
if matches!(err.kind(), std::io::ErrorKind::AlreadyExists) {
// If cloning/copying fails and the directory exists already, it must be merged recursively.
if entry.file_type()?.is_dir() {
for entry in fs::read_dir(from)? {
clone_recursive(site_packages, wheel, &entry?, attempt)?;
Expand All @@ -356,19 +370,29 @@ fn clone_recursive(
// If file already exists, overwrite it.
let tempdir = tempdir_in(site_packages)?;
let tempfile = tempdir.path().join(from.file_name().unwrap());
reflink_copy::reflink(from, &tempfile)?;
reflink::reflink(&from, &tempfile)?;
fs::rename(&tempfile, to)?;
}
} else {
return Err(Error::Reflink { to, from, err });
return Err(Error::Reflink { from, to, err });
}
}
}
CloneAttempt::UseCopyFallback => {
fs::copy(&from, &to)?;
Attempt::UseCopyFallback => {
if entry.file_type()?.is_dir() {
fs::create_dir_all(&to)?;
for entry in fs::read_dir(from)? {
clone_recursive(site_packages, wheel, &entry?, attempt)?;
}
} else {
fs::copy(&from, &to)?;
}
}
}

if *attempt == Attempt::Initial {
*attempt = Attempt::Subsequent;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this different than including it as an else in the Attempt::Initial arm?

E.g.:

diff --git a/crates/install-wheel-rs/src/linker.rs b/crates/install-wheel-rs/src/linker.rs
index 6275994f..69eff653 100644
--- a/crates/install-wheel-rs/src/linker.rs
+++ b/crates/install-wheel-rs/src/linker.rs
@@ -356,6 +356,9 @@ fn clone_recursive(
                     *attempt = Attempt::UseCopyFallback;
                     clone_recursive(site_packages, wheel, entry, attempt)?;
                 }
+            } else {
+                // If the first reflink succeeded, we'll use reflink for the rest of the install.
+                *attempt = Attempt::Subsequent;
             }
         }
         Attempt::Subsequent => {
@@ -390,9 +393,6 @@ fn clone_recursive(
         }
     }

-    if *attempt == Attempt::Initial {
-        *attempt = Attempt::Subsequent;
-    }
     Ok(())
 }

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be, because we also want to update the attempt value in the case of an AlreadyExists error that we handle. For example, if we have to overwrite a file, but don't need to fall-back to copying, this else branch would not be hit.

Ok(())
}

Expand Down Expand Up @@ -406,18 +430,6 @@ fn hardlink_wheel_files(
site_packages: impl AsRef<Path>,
wheel: impl AsRef<Path>,
) -> Result<usize, Error> {
// Hard linking might not be supported but we (afaik) can't detect this ahead of time, so we'll
// try hard linking the first file, if this succeeds we'll know later hard linking errors are
// not due to lack of os/fs support, if it fails we'll switch to copying for the rest of the
// install
#[derive(Debug, Default, Clone, Copy)]
enum Attempt {
#[default]
Initial,
Subsequent,
UseCopyFallback,
}

let mut attempt = Attempt::default();
let mut count = 0usize;

Expand Down