diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d28c642..b2061b60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,22 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## Unreleased +### Added +* A new installation mode has been defined which symlinks identical cargo + artifacts against previously generated ones. This allows for linear space + usage in the Nix store across many chained derivations (as opposed to using a + zstd compressed tarball which uses quadratic space across many chained + derivations). + +### Changed +* **Breaking**: all cargo-based derivations will now default to using symlinking + their installed artifacts together instead of using zstd compressed tarballs. + To get the old behavior back, set `installCargoArtifactsMode = "use-zstd";` in + the derivation. + - Note that `buildPackage` will continue to use zstd compressed tarballs while + building dependencies (unless either of `cargoArtifacts` or + `installCargoArtifactsMode` is defined, in which case they will be honored) + ## [0.9.0] - 2022-10-29 ### Changed diff --git a/ci/run-tests.sh b/ci/run-tests.sh index 64ac2968..c6adcfe6 100755 --- a/ci/run-tests.sh +++ b/ci/run-tests.sh @@ -11,7 +11,10 @@ main() { runLocked="1" while [ $# -gt 0 ]; do - case "$1" in + local arg="$1"; + shift + + case "${arg}" in "--locked") runLocked="1" runStable="" @@ -21,7 +24,7 @@ main() { runStable="1" ;; *) - echo "unrecognized option $1" + echo "unrecognized option ${arg}" exit 1 ;; esac @@ -58,4 +61,4 @@ runtests() { done } -main +main "$@" diff --git a/docs/API.md b/docs/API.md index d1cab071..78fc5e86 100644 --- a/docs/API.md +++ b/docs/API.md @@ -168,7 +168,8 @@ subsequently install from that log. `target` directory, which will be reused at the start of the derivation. Useful for caching incremental cargo builds. - Default value: the result of `buildDepsOnly` after applying the arguments - set (with the respective default values) + set (with the respective default values). `installCargoArtifactsMode` will + be set to `"use-zstd"` if not specified. * `cargoBuildCommand`: A cargo invocation to run during the derivation's build phase - Default value: `"cargo build --profile release"` @@ -1095,7 +1096,14 @@ directory using a previous derivation. It takes two positional arguments: * If not specified, the value of `$cargoArtifacts` will be used * If `cargoArtifacts` is not specified, an error will be raised * If the specified path is a directory which contains a file called - `target.tar.zst`, then that file will be used during unpacking + `target.tar.zst`, then that file will be used as specified below + * If the specified path is a file (and not a directory) it is assumed that it + contains a zstd compressed tarball and will be decompressed and unpacked + into the specified cargo artifacts directory + * If the specified path is a directory which contains another directory + called `target`, then that directory will be used as specified below + * If the specified path is a directory, its contents will be copied into the + specified cargo artifacts directory * The previously prepared artifacts are expected to be a zstd compressed tarball 1. the path to cargo's artifact directory, where the previously prepared @@ -1112,18 +1120,56 @@ post patch hook. ### `lib.installCargoArtifactsHook` -Defines `prepareAndInstallCargoArtifactsDir()` which handles installing cargo's -artifact directory to the derivation's output. It takes two positional -arguments: +Defines `compressAndInstallCargoArtifactsDir()` which handles installing +cargo's artifact directory to the derivation's output as a zstd compressed +tarball. It takes two positional arguments: 1. the installation directory for the output. - * If not specified, the value of `$out` will be used + * An error will be raised if not specified * Cargo's artifact directory will be compressed as a reproducible tarball with zstd compression. It will be written to this directory and named `target.tar.zstd` +1. the path to cargo's artifact directory + * An error will be raised if not specified + +Defines `dedupAndInstallCargoArtifactsDir()` which handles installing +cargo's artifact directory to the derivation's output after deduplicating +identical files against a directory of previously prepared cargo artifacts. +It takes three positional arguments: +1. the installation directory for the output. + * An error will be raised if not specified + * If the specified path is a directory which exists then the current cargo + artifacts will be compared with the contents of said directory. Any files + whose contents and paths match will be symbolically linked together to + reduce the size of the data stored in the Nix store. +1. the path to cargo's artifact directory + * An error will be raised if not specified +1. a path to the previously prepared cargo artifacts + * An error will be raised if not specified + * `/dev/null` can be specified here if there is no previous directory to + deduplicate against + +Defines `prepareAndInstallCargoArtifactsDir()` which handles installing cargo's +artifact directory to the derivation's output. It takes three positional +arguments: +1. the installation directory for the output. + * If not specified, the value of `$out` will be used + * Cargo's artifact directory will be installed based on the installation mode + selected below 1. the path to cargo's artifact directory * If not specified, the value of `$CARGO_TARGET_DIR` will be used * If `CARGO_TARGET_DIR` is not set, cargo's default target location (i.e. `./target`) will be used. +1. the installation mode to apply + * If not specified, the value of `$installCargoArtifactsMode` will be used. + If `$installCargoArtifactsMode` is not specified, a default value of + `"use-symlink" will be used + * If set to "use-symlink" then `dedupAndInstallCargoArtifactsDir()` will be + used. + - If `$cargoArtifacts` is defined and `$cargoArtifacts/target` is a valid + directory, it will be used during file deduplication + * If set to "use-zstd" then `compressAndInstallCargoArtifactsDir()` will be + used. + * Otherwise an error will be raised if the mode is not recognized **Automatic behavior:** if `doInstallCargoArtifacts` is set to `1`, then `prepareAndInstallCargoArtifactsDir "$out" "$CARGO_TARGET_DIR"` will be run as a diff --git a/lib/buildPackage.nix b/lib/buildPackage.nix index 643c0a44..fc1b07c0 100644 --- a/lib/buildPackage.nix +++ b/lib/buildPackage.nix @@ -35,7 +35,9 @@ mkCargoDerivation (cleanedArgs // memoizedArgs // { doCheck = args.doCheck or true; doInstallCargoArtifacts = args.doInstallCargoArtifacts or false; - cargoArtifacts = args.cargoArtifacts or (buildDepsOnly args // memoizedArgs); + cargoArtifacts = args.cargoArtifacts or (buildDepsOnly (args // memoizedArgs // { + installCargoArtifactsMode = args.installCargoArtifactsMode or "use-zstd"; + })); buildPhaseCargoCommand = args.buildPhaseCargoCommand or '' cargoBuildLog=$(mktemp cargoBuildLogXXXX.json) diff --git a/lib/setupHooks/inheritCargoArtifactsHook.sh b/lib/setupHooks/inheritCargoArtifactsHook.sh index 8d46aa79..8f00b13d 100644 --- a/lib/setupHooks/inheritCargoArtifactsHook.sh +++ b/lib/setupHooks/inheritCargoArtifactsHook.sh @@ -5,15 +5,41 @@ inheritCargoArtifacts() { local cargoTargetDir="${2:-${CARGO_TARGET_DIR:-target}}" if [ -d "${preparedArtifacts}" ]; then - local preparedArtifacts="${preparedArtifacts}/target.tar.zst" + local candidateTarZst="${preparedArtifacts}/target.tar.zst" + local candidateTargetDir="${preparedArtifacts}/target" + + if [ -f "${candidateTarZst}" ]; then + local preparedArtifacts="${candidateTarZst}" + elif [ -d "${candidateTargetDir}" ]; then + local preparedArtifacts="${candidateTargetDir}" + fi fi + mkdir -p "${cargoTargetDir}" if [ -f "${preparedArtifacts}" ]; then - mkdir -p "${cargoTargetDir}" - echo "copying cargo artifacts from ${preparedArtifacts} to ${cargoTargetDir}" - + echo "decompressing cargo artifacts from ${preparedArtifacts} to ${cargoTargetDir}" + zstd -d "${preparedArtifacts}" --stdout | \ tar -x -C "${cargoTargetDir}" --strip-components=1 + elif [ -d "${preparedArtifacts}" ]; then + echo "copying cargo artifacts from ${preparedArtifacts} to ${cargoTargetDir}" + + # NB: rustc doesn't like it when artifacts are either symlinks or hardlinks to the store + # (it tries to truncate files instead of unlinking and recreating them) + # so we're forced to do a full copy here :( + # + # Notes: + # - --no-target-directory to avoid nesting (i.e. `./target/target`) + # - preserve timestamps to avoid rebuilding + # - no-preserve mode to ensure copies are writable + cp -r "${preparedArtifacts}" \ + --no-target-directory "${cargoTargetDir}" \ + --preserve=timestamps \ + --no-preserve=mode + + # NB: cargo also doesn't like it if `.cargo-lock` files remain with a + # timestamp in the distant past so we need to delete them here + find "${cargoTargetDir}" -name '.cargo-lock' -delete else echo unable to copy cargo artifacts, \"${preparedArtifacts}\" looks invalid false diff --git a/lib/setupHooks/installCargoArtifactsHook.sh b/lib/setupHooks/installCargoArtifactsHook.sh index d238b532..89b5f770 100644 --- a/lib/setupHooks/installCargoArtifactsHook.sh +++ b/lib/setupHooks/installCargoArtifactsHook.sh @@ -1,23 +1,79 @@ +compressAndInstallCargoArtifactsDir() { + local dir="${1:?destination directory not defined}" + local cargoTargetDir="${2:?cargoTargetDir not defined}" + + mkdir -p "${dir}" + + local dest="${dir}/target.tar.zst" + echo "compressing ${cargoTargetDir} to ${dest}" + ( + export SOURCE_DATE_EPOCH=1 + tar --sort=name \ + --mtime="@${SOURCE_DATE_EPOCH}" \ + --owner=0 \ + --group=0 \ + --numeric-owner \ + --pax-option=exthdr.name=%d/PaxHeaders/%f,delete=atime,delete=ctime \ + -c "${cargoTargetDir}" | zstd -o "${dest}" + ) +} + +dedupAndInstallCargoArtifactsDir() { + local dest="${1:?destination directory not defined}" + local cargoTargetDir="${2:?cargoTargetDir not defined}" + local prevCargoTargetDir="${3:?prevCargoTargetDir not defined}" + + mkdir -p "${dest}" + + if [ -d "${prevCargoTargetDir}" ]; then + echo "symlinking duplicates in ${cargoTargetDir} to ${prevCargoTargetDir}" + + while read -r fullTargetFile; do + # Strip the common prefix of the current target directory + local targetFile="${fullTargetFile#"${cargoTargetDir}"}" + # Join the path and ensure we don't have a duplicate `/` separator + local candidateOrigFile="${prevCargoTargetDir}/${targetFile#/}" + + if cmp --silent "${candidateOrigFile}" "${fullTargetFile}"; then + ln --symbolic --force --logical "${candidateOrigFile}" "${fullTargetFile}" + fi + done < <(find "${cargoTargetDir}" -type f) + fi + + echo installing "${cargoTargetDir}" to "${dest}" + mv "${cargoTargetDir}" --target-directory="${dest}" +} + prepareAndInstallCargoArtifactsDir() { # Allow for calling with customized parameters # or fall back to defaults if none are provided local dir="${1:-${out}}" local cargoTargetDir="${2:-${CARGO_TARGET_DIR:-target}}" - local dest="${dir}/target.tar.zst" - - echo "copying ${cargoTargetDir} to ${dest}" + local mode="${3:-${installCargoArtifactsMode:-use-symlink}}" - export SOURCE_DATE_EPOCH=1 mkdir -p "${dir}" - # See: https://reproducible-builds.org/docs/archives/ - tar --sort=name \ - --mtime="@${SOURCE_DATE_EPOCH}" \ - --owner=0 \ - --group=0 \ - --numeric-owner \ - --pax-option=exthdr.name=%d/PaxHeaders/%f,delete=atime,delete=ctime \ - -c "${cargoTargetDir}" | zstd -o "${dest}" + case "${mode}" in + "use-zstd") + compressAndInstallCargoArtifactsDir "${dir}" "${cargoTargetDir}" + ;; + + "use-symlink") + # Placeholder if previous artifacts aren't present + local prevCargoTargetDir="/dev/null" + + if [ -n "${cargoArtifacts}" ] && [ -d "${cargoArtifacts}/target" ]; then + local prevCargoTargetDir="${cargoArtifacts}/target" + fi + + dedupAndInstallCargoArtifactsDir "${dir}" "${cargoTargetDir}" "${prevCargoTargetDir}" + ;; + + *) + echo "unknown mode: \"${mode}\"" + false + ;; + esac } if [ "1" = "${doInstallCargoArtifacts-}" ]; then