Skip to content

Commit

Permalink
accounts-db: unpack_archive: unpack accounts straight into their fina…
Browse files Browse the repository at this point in the history
…l destination (solana-labs#289)

* accounts-db: unpack_archive: avoid extra iteration on each path

We used to do a iterator.clone().any(...) followed by
iterator.collect(). Merge the two and avoid an extra iteration and
re-parsing of the path.

* accounts-db: unpack_archive: unpack accounts straight into their final destination

We used to unpack accounts into account_path/accounts/<account> then
rename to account_path/<account>. We now unpack them into their final
destination directly and avoid the rename syscall.
  • Loading branch information
alessandrod authored and willhickey committed Mar 20, 2024
1 parent 7ad99f3 commit dcdce7c
Showing 1 changed file with 50 additions and 26 deletions.
76 changes: 50 additions & 26 deletions accounts-db/src/hardened_unpack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,27 +112,26 @@ where
// first by ourselves when there are odd paths like including `..` or /
// for our clearer pattern matching reasoning:
// https://docs.rs/tar/0.4.26/src/tar/entry.rs.html#371
let parts = path.components().map(|p| match p {
CurDir => Some("."),
Normal(c) => c.to_str(),
_ => None, // Prefix (for Windows) and RootDir are forbidden
});
let parts = path
.components()
.map(|p| match p {
CurDir => Ok("."),
Normal(c) => c.to_str().ok_or(()),
_ => Err(()), // Prefix (for Windows) and RootDir are forbidden
})
.collect::<std::result::Result<Vec<_>, _>>();

// Reject old-style BSD directory entries that aren't explicitly tagged as directories
let legacy_dir_entry =
entry.header().as_ustar().is_none() && entry.path_bytes().ends_with(b"/");
let kind = entry.header().entry_type();
let reject_legacy_dir_entry = legacy_dir_entry && (kind != Directory);

if parts.clone().any(|p| p.is_none()) || reject_legacy_dir_entry {
let (Ok(parts), false) = (parts, reject_legacy_dir_entry) else {
return Err(UnpackError::Archive(format!(
"invalid path found: {path_str:?}"
)));
}
};

let parts: Vec<_> = parts.map(|p| p.unwrap()).collect();
let account_filename =
(parts.len() == 2 && parts[0] == "accounts").then(|| PathBuf::from(parts[1]));
let unpack_dir = match entry_checker(parts.as_slice(), kind) {
UnpackPath::Invalid => {
return Err(UnpackError::Archive(format!(
Expand All @@ -159,30 +158,32 @@ where
)?;
total_count = checked_total_count_increment(total_count, limit_count)?;

let target = sanitize_path(&entry.path()?, unpack_dir)?; // ? handles file system errors
if target.is_none() {
let account_filename = match parts.as_slice() {
["accounts", account_filename] => Some(PathBuf::from(account_filename)),
_ => None,
};
let entry_path = if let Some(account) = account_filename {
// Special case account files. We're unpacking an account entry inside one of the
// account_paths returned by `entry_checker`. We want to unpack into
// account_path/<account> instead of account_path/accounts/<account> so we strip the
// accounts/ prefix.
sanitize_path(&account, unpack_dir)
} else {
sanitize_path(&path, unpack_dir)
}?; // ? handles file system errors
let Some(entry_path) = entry_path else {
continue; // skip it
}
let target = target.unwrap();
};

let unpack = entry.unpack(target);
let unpack = entry.unpack(&entry_path);
check_unpack_result(unpack.map(|_unpack| true)?, path_str)?;

// Sanitize permissions.
let mode = match entry.header().entry_type() {
GNUSparse | Regular => 0o644,
_ => 0o755,
};
let entry_path_buf = unpack_dir.join(entry.path()?);
set_perms(&entry_path_buf, mode)?;

let entry_path = if let Some(account_filename) = account_filename {
let stripped_path = unpack_dir.join(account_filename); // strip away "accounts"
fs::rename(&entry_path_buf, &stripped_path)?;
stripped_path
} else {
entry_path_buf
};
set_perms(&entry_path, mode)?;

// Process entry after setting permissions
entry_processor(entry_path);
Expand Down Expand Up @@ -1029,4 +1030,27 @@ mod tests {
if message == "too many files in snapshot: 1000000000000"
);
}

#[test]
fn test_archive_unpack_account_path() {
let mut header = Header::new_gnu();
header.set_path("accounts/123.456").unwrap();
header.set_size(4);
header.set_cksum();
let data: &[u8] = &[1, 2, 3, 4];

let mut archive = Builder::new(Vec::new());
archive.append(&header, data).unwrap();
let result = with_finalize_and_unpack(archive, |ar, tmp| {
unpack_snapshot_with_processors(
ar,
tmp,
&[tmp.join("accounts_dest")],
None,
|_, _| {},
|path| assert_eq!(path, tmp.join("accounts_dest/123.456")),
)
});
assert_matches!(result, Ok(()));
}
}

0 comments on commit dcdce7c

Please sign in to comment.