From f41e5c78fd53461c378fd53a40149a42f3007452 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Tue, 21 May 2024 12:45:14 +0700 Subject: [PATCH 01/26] Bash script to copy projects from staging or prod Often fails to copy assets because `kubectl cp` or `kubectl exec tar` get cut off partway through, but that should go away once Kubernetes version 1.30 is released. --- backup.sh | 92 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100755 backup.sh diff --git a/backup.sh b/backup.sh new file mode 100755 index 0000000000..81ba92f665 --- /dev/null +++ b/backup.sh @@ -0,0 +1,92 @@ +#!/bin/bash + +# ===== EDIT THIS if your Kubernetes context names are different ===== + +staging_context="dallas-rke" +prod_context="aws-rke" + +# Uncomment one of the two blocks below to choose between staging and prod + +echo "Using staging context..." >&2 +context="${staging_context}" + +# echo "Using prod context..." >&2 +# context="${prod_context}" + +# ===== END of "EDIT THIS" block ===== + +echo "Checking for necessary tools..." >&2 +which jq >/dev/null +if [ $? -ne 0 ]; then + echo "jq (JSON Query) not found. This script needs it to run." >&2 + echo "Try \"apt install jq\" on Linux, \"brew install jq\" on Mac, or \"choco install jq\" or \"winget install jqlang.jq\" on Windows." >&2 + exit 1 +fi + +proj=$1 + +# Create a temp dir reliably on both Linux and OS X +workdir=$(mktemp -d 2>/dev/null || mktemp -d -t 'sfbackup') + +function cleanup { + echo "Cleaning up temporary directory ${workdir}..." >&2 + # Commented out for now since we want to be able to examine the results + # [ -n "${workdir}" ] && [ -d "${workdir}" ] && rm -rf "${workdir}" +} + +[ -n "${workdir}" ] && [ -d "${workdir}" ] && trap cleanup EXIT + +echo "Looking up Mongo ID of local admin user..." >&2 +admin_id=$(docker exec lf-db mongosh -u admin -p pass --authenticationDatabase admin scriptureforge --eval "db.users.findOne({username: 'admin'}, {_id: 1})" | cut -d"'" -f 2) + +if [ -z "${admin_id}" ]; then + echo "Could not find local admin ID. Please try running 'docker exec -it lf-db mongosh' and see what happens." >&2 + exit 1 +fi + +echo "Verifying admin ID..." >&2 +docker exec lf-db mongosh -u admin -p pass --authenticationDatabase admin scriptureforge --eval "db.users.findOne({_id: ObjectId('${admin_id}')}, {name: 1, username: 1, email: 1})" +echo "If that looks wrong, hit Ctrl+C NOW" >&2 +sleep 1 + +echo "Backing up project with ID ${proj}..." >&2 +echo "Getting project code..." >&2 + +projCode=$(kubectl --context="${context}" exec deploy/db -- mongosh --quiet scriptureforge --eval 'db.projects.findOne({_id: ObjectId('"'${proj}'"')}, {projectCode: 1})' | grep projectCode | cut -d"'" -f 2) +echo "Project code: $projCode" >&2 + +echo "If that looks wrong, hit Ctrl+C NOW" >&2 +sleep 1 + +echo "Getting project record..." >&2 + +kubectl --context="${context}" exec deploy/db -- mongosh --quiet scriptureforge --eval 'db.projects.findOne({_id: ObjectId('"'${proj}'"')})' --json=canonical > "${workdir}/project.json" + +echo "Removing users and replacing project manager with admin..." >&2 +jq "setpath([\"users\"]; {\"${admin_id}\": {\"role\": \"project_manager\"}}) | setpath([\"ownerRef\"]; {\"\$oid\": \"${admin_id}\"} )" < "${workdir}/project.json" > "${workdir}/project-modified.json" + +echo "Getting project database..." >&2 +dbname="sf_${projCode}" + +kubectl --context="${context}" exec deploy/db -- mongodump -d "${dbname}" --archive > "${workdir}/db.archive" +# Once we require auth, this will become: +# kubectl --context="${context}" exec deploy/db -- mongodump -u admin -p pass --authenticationDatabase admin -d "${dbname}" --archive > "${workdir}/db.archive" +docker exec -i lf-db mongorestore -u admin -p pass --authenticationDatabase admin -d "${dbname}" --drop --archive < "${workdir}"/db.archive + +echo "Loaded project database ${dbname} successfully. Probably. You should check mongosh to be sure." >&2 + +echo "Importing project record into local projects collection..." >&2 +docker exec -i lf-db mongoimport -u admin -p pass --authenticationDatabase admin -d scriptureforge -c projects --mode=upsert < "${workdir}/project-modified.json" + +echo "Okay, ${projCode} should be available in your Language Forge installation now." >&2 + +echo "Fetching assets (might fail or only partially transfer)..." >&2 +echo "NOTE: This may take a long time without feedback, and might fail without warning if the kubectl connection gets dropped partway through..." >&2 +mkdir -p "${workdir}/assets/${dbname}" +kubectl --context="${context}" exec deploy/app -- tar chf - -C "/var/www/html/assets/lexicon/${dbname}" . | tar xf - -i -C "${workdir}/assets/${dbname}" + +echo "Verifying assets (if you see tar errors above, then it might only be a partial transfer)..." >&2 +ls -lR "${workdir}/assets" + +# The /. at the end of the src tells Docker "just copy the *contents* of the directory, don't copy the directory itself" +docker cp "${assetSrc}/." "lf-app:/var/www/html/assets/lexicon/${dbname}" From 67930fbaff6ce899556eb34661dd27b59889a2e0 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 22 May 2024 12:18:03 +0700 Subject: [PATCH 02/26] Fetch project assets with rsync and retry failures This should ensure that the project assets eventually get copied over to the local Docker setup even under conditions where `kubectl exec` is flaky and fails every couple of minutes. --- backup.sh | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/backup.sh b/backup.sh index 81ba92f665..c5675945ce 100755 --- a/backup.sh +++ b/backup.sh @@ -80,13 +80,25 @@ docker exec -i lf-db mongoimport -u admin -p pass --authenticationDatabase admin echo "Okay, ${projCode} should be available in your Language Forge installation now." >&2 -echo "Fetching assets (might fail or only partially transfer)..." >&2 -echo "NOTE: This may take a long time without feedback, and might fail without warning if the kubectl connection gets dropped partway through..." >&2 +echo "Setting up rsync on target container..." >&2 +kubectl exec --context="${context}" deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)" + +echo "Fetching assets via rsync (and retrying until success)..." >&2 +echo >&2 +echo "===== IMPORTANT NOTE =====" >&2 +echo "If this stalls at exactly 50% done, then it's really 100% done and hasn't realized it. Just hit Ctrl+C and it will succeed on the retry" >&2 +# TODO: Figure out why rsync is misidentifying the size. Is it related to the -L option (follow symlinks)? +echo "===== IMPORTANT NOTE =====" >&2 +echo >&2 mkdir -p "${workdir}/assets/${dbname}" -kubectl --context="${context}" exec deploy/app -- tar chf - -C "/var/www/html/assets/lexicon/${dbname}" . | tar xf - -i -C "${workdir}/assets/${dbname}" +until rsync -rLt --partial --info=progress2 --blocking-io --rsync-path="/var/www/html/assets/lexicon/${dbname}" --rsh="kubectl --context=${context} exec -i deploy/app -- " "rsync:/var/www/html/assets/lexicon/${dbname}/" "${workdir}/assets/${dbname}/" +do + RSYNC_EXIT_CODE=$? + echo "Rsync's exit code was $RSYNC_EXIT_CODE. Retrying..." >&2 +done echo "Verifying assets (if you see tar errors above, then it might only be a partial transfer)..." >&2 ls -lR "${workdir}/assets" # The /. at the end of the src tells Docker "just copy the *contents* of the directory, don't copy the directory itself" -docker cp "${assetSrc}/." "lf-app:/var/www/html/assets/lexicon/${dbname}" +docker cp "${workdir}/assets/${dbname}/." "lf-app:/var/www/html/assets/lexicon/${dbname}" From 93ba2d5771663e2106e1b8b79ae5bb6e0eff4626 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 22 May 2024 12:23:40 +0700 Subject: [PATCH 03/26] Clean up temp dir and remove debugging code Now that this is working, I can get rid of the `ls -lR` step (which is effectively redundant anyway as `docker cp` is chatty about what files it's copying), and enable the final cleanup of the temporary directory. --- backup.sh | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/backup.sh b/backup.sh index c5675945ce..0e91617444 100755 --- a/backup.sh +++ b/backup.sh @@ -30,8 +30,7 @@ workdir=$(mktemp -d 2>/dev/null || mktemp -d -t 'sfbackup') function cleanup { echo "Cleaning up temporary directory ${workdir}..." >&2 - # Commented out for now since we want to be able to examine the results - # [ -n "${workdir}" ] && [ -d "${workdir}" ] && rm -rf "${workdir}" + [ -n "${workdir}" ] && [ -d "${workdir}" ] && rm -rf "${workdir}" } [ -n "${workdir}" ] && [ -d "${workdir}" ] && trap cleanup EXIT @@ -97,8 +96,6 @@ do echo "Rsync's exit code was $RSYNC_EXIT_CODE. Retrying..." >&2 done -echo "Verifying assets (if you see tar errors above, then it might only be a partial transfer)..." >&2 -ls -lR "${workdir}/assets" - +echo "Copying assets into local Docker container..." >&2 # The /. at the end of the src tells Docker "just copy the *contents* of the directory, don't copy the directory itself" docker cp "${workdir}/assets/${dbname}/." "lf-app:/var/www/html/assets/lexicon/${dbname}" From 4bdf4f619bc55ab88fc2cdb695c3ac13cd290780 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 22 May 2024 12:27:48 +0700 Subject: [PATCH 04/26] Also check for rsync and warn if not present --- backup.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backup.sh b/backup.sh index 0e91617444..f410c2adaf 100755 --- a/backup.sh +++ b/backup.sh @@ -23,6 +23,13 @@ if [ $? -ne 0 ]; then exit 1 fi +which rsync >/dev/null +if [ $? -ne 0 ]; then + echo "rsync not found. This script needs it in order to copy asset files." >&2 + echo "Try \"apt install rsync\" on Linux, ??? on Mac, or ??? on Windows." >&2 + echo "Continuing anyway, but you may get failures on the asset-copying step..." >&2 +fi + proj=$1 # Create a temp dir reliably on both Linux and OS X From 6578b4de3f79d0ec3ea43c3dd394a3d8a058fed3 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 22 May 2024 12:33:07 +0700 Subject: [PATCH 05/26] Remove one unnecessary output line --- backup.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/backup.sh b/backup.sh index f410c2adaf..931465e570 100755 --- a/backup.sh +++ b/backup.sh @@ -79,8 +79,6 @@ kubectl --context="${context}" exec deploy/db -- mongodump -d "${dbname}" --arch # kubectl --context="${context}" exec deploy/db -- mongodump -u admin -p pass --authenticationDatabase admin -d "${dbname}" --archive > "${workdir}/db.archive" docker exec -i lf-db mongorestore -u admin -p pass --authenticationDatabase admin -d "${dbname}" --drop --archive < "${workdir}"/db.archive -echo "Loaded project database ${dbname} successfully. Probably. You should check mongosh to be sure." >&2 - echo "Importing project record into local projects collection..." >&2 docker exec -i lf-db mongoimport -u admin -p pass --authenticationDatabase admin -d scriptureforge -c projects --mode=upsert < "${workdir}/project-modified.json" From 43d29562c9881207b5dbe15425609bfdff2f9f6c Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 22 May 2024 12:57:33 +0700 Subject: [PATCH 06/26] Fix permission issues on copied files The docker cp command was preserving the UID/GID of the copied files even though I didn't pass it the `-a` parameter (whose purpose is to preserve the UID/GID of the copied files). To work around this issue, we set the file ownership to 33/33 before copying the files into Docker. --- backup.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/backup.sh b/backup.sh index 931465e570..03cc514ecd 100755 --- a/backup.sh +++ b/backup.sh @@ -95,12 +95,20 @@ echo "If this stalls at exactly 50% done, then it's really 100% done and hasn't echo "===== IMPORTANT NOTE =====" >&2 echo >&2 mkdir -p "${workdir}/assets/${dbname}" -until rsync -rLt --partial --info=progress2 --blocking-io --rsync-path="/var/www/html/assets/lexicon/${dbname}" --rsh="kubectl --context=${context} exec -i deploy/app -- " "rsync:/var/www/html/assets/lexicon/${dbname}/" "${workdir}/assets/${dbname}/" -do +until rsync -rLt --partial --info=progress2 --blocking-io --rsync-path="/var/www/html/assets/lexicon/${dbname}" --rsh="kubectl --context=${context} exec -i deploy/app -- " "rsync:/var/www/html/assets/lexicon/${dbname}/" "${workdir}/assets/${dbname}/"; do RSYNC_EXIT_CODE=$? echo "Rsync's exit code was $RSYNC_EXIT_CODE. Retrying..." >&2 done +echo "Conserving file permissions (you may be prompted for a sudo password)..." >&2 + +sudo chown -R 33:33 "${workdir}/assets" + echo "Copying assets into local Docker container..." >&2 -# The /. at the end of the src tells Docker "just copy the *contents* of the directory, don't copy the directory itself" +# The /. at the end of the src tells Docker "just copy the *contents* of the directory, not the directory itself" docker cp "${workdir}/assets/${dbname}/." "lf-app:/var/www/html/assets/lexicon/${dbname}" + +echo "Resetting file permissions of assets so cleanup step will work..." >&2 +CUR_UID=$(id -u) +CUR_GID=$(id -g) +sudo chown -R $CUR_UID:$CUR_GID "${workdir}/assets" From c3d244700e359d156535026bebcdd0106b1d490f Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 22 May 2024 13:11:44 +0700 Subject: [PATCH 07/26] Better solution for file ownership of copied files The previous solution was too Linux-y; this one doesn't rely on `sudo` or `id` working in a Git Bash environment on Windows. --- backup.sh | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/backup.sh b/backup.sh index 03cc514ecd..b65ed103de 100755 --- a/backup.sh +++ b/backup.sh @@ -100,15 +100,8 @@ until rsync -rLt --partial --info=progress2 --blocking-io --rsync-path="/var/www echo "Rsync's exit code was $RSYNC_EXIT_CODE. Retrying..." >&2 done -echo "Conserving file permissions (you may be prompted for a sudo password)..." >&2 - -sudo chown -R 33:33 "${workdir}/assets" - echo "Copying assets into local Docker container..." >&2 # The /. at the end of the src tells Docker "just copy the *contents* of the directory, not the directory itself" docker cp "${workdir}/assets/${dbname}/." "lf-app:/var/www/html/assets/lexicon/${dbname}" - -echo "Resetting file permissions of assets so cleanup step will work..." >&2 -CUR_UID=$(id -u) -CUR_GID=$(id -g) -sudo chown -R $CUR_UID:$CUR_GID "${workdir}/assets" +# The files produced by docker cp will end up being owned by your UID on the host, so we need to set their ownership after the docker cp step +docker exec lf-app chown -R www-data:www-data "/var/www/html/assets/lexicon/${dbname}" From e13eaa6c4af03aab462c45a30172eb55f2abfd48 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 22 May 2024 18:12:09 +0700 Subject: [PATCH 08/26] WIP converting script to Node.JS Not done yet, so don't try to run this yet. --- jsbackup.mjs | 163 ++++++++++++++++++++++++++++++++++++++++++++++ package-lock.json | 139 +++++++++++++++++++++++++++++++++++++++ package.json | 1 + 3 files changed, 303 insertions(+) create mode 100644 jsbackup.mjs diff --git a/jsbackup.mjs b/jsbackup.mjs new file mode 100644 index 0000000000..548c53c778 --- /dev/null +++ b/jsbackup.mjs @@ -0,0 +1,163 @@ +// TODO: Rename to backup.mjs before committing + +import { exec, execSync, spawn } from "child_process"; +import { existsSync, mkdirSync, mkdtempSync, rmSync } from "fs"; +import { MongoClient, ObjectId } from "mongodb"; +import os from "os"; +import path from "path"; + +// ===== EDIT THIS ===== + +const stagingContext = "dallas-rke"; +const prodContext = "aws-rke"; + +// Choose one, comment out the other +const context = stagingContext; +// const context = prodContext + +// ===== END of EDIT THIS ===== + +// Create a temp dir reliably +const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-")); + +const cleanup = () => { + console.error(`Cleaning up temporary directory ${tempdir}...`); + if (existsSync(tempdir)) { + rmSync(tempdir, { recursive: true, force: true }); + } +}; + +process.on("exit", cleanup); + +function run(cmd) { + return execSync(cmd).toString().trimEnd(); +} + +function getContexts() { + var stdout = run("kubectl config get-contexts -o name"); + return stdout.split("\n"); +} + +function localSpawn(cmd, opts = {}) { + return spawn(`docker exec -i lf-db ${cmd}`, opts); +} + +function localExec(cmd, opts = {}) { + return execSync(`docker exec -i lf-db ${cmd}`, opts); +} + +function remoteSpawn(cmd, opts = {}) { + return spawn(`kubectl --context="${context}" exec -i deploy/db -- ${cmd}`, opts); +} +function remoteExec(cmd, opts = {}) { + console.log("Running: ", `kubectl --context="${context}" exec -i deploy/db -- ${cmd}`); + return execSync(`kubectl --context="${context}" exec -i deploy/db -- ${cmd}`, opts); +} +// Sanity check + +var contexts = getContexts(); +if (!contexts.includes(stagingContext)) { + console.log("Staging context not found. Tried", stagingContext, "but did not find it in", contexts); + console.log("Might need to edit the top level of this file and try again"); + process.exit(1); +} +if (!contexts.includes(prodContext)) { + console.log("Prod context not found. Tried", prodContext, "but did not find it in", contexts); + console.log("Might need to edit the top level of this file and try again"); + process.exit(1); +} + +// Start running + +// TODO: Improve by finding a local port that's not in use, rather than hardcoding this +let portForwardingReady; +const portForwardingPromise = new Promise((resolve) => { + portForwardingReady = resolve; +}); +const portForwardProcess = spawn("kubectl", [`--context=${context}`, "port-forward", "svc/db", "27018:27017"], { + stdio: "pipe", +}); +portForwardProcess.stdout.on("data", (data) => { + portForwardingReady(); +}); +portForwardProcess.stderr.on("data", (data) => { + console.log("Port forwarding failed:"); + console.log(data.toString()); + console.log("Exiting"); + process.exit(1); +}); + +const localMongoPort = run("docker compose port db 27017").split(":")[1]; +const localConnStr = `mongodb://admin:pass@localhost:${localMongoPort}/?authSource=admin`; +const localConn = await MongoClient.connect(localConnStr); + +const localAdmin = await localConn.db("scriptureforge").collection("users").findOne({ username: "admin" }); +const adminId = localAdmin._id.toString(); +console.log("Local admin ID:", adminId); + +// await portForwardingPromise +const remoteConnStr = `mongodb://localhost:27018`; +const remoteConn = await MongoClient.connect(remoteConnStr); + +const remoteAdmin = await remoteConn.db("scriptureforge").collection("users").findOne({ username: "admin" }); +console.log("Remote admin ID:", remoteAdmin._id.toString()); + +// Get project record + +const projId = "5dbf805650b51914727e06c4"; // TODO: Get from argv +const project = await remoteConn + .db("scriptureforge") + .collection("projects") + .findOne({ _id: new ObjectId(projId) }); +console.log("Project code:", project.projectCode); + +const dbname = `sf_${project.projectCode}`; +project.users = { [adminId]: { role: "project_manager" } }; +project.ownerRef = new ObjectId(adminId); +console.log(project.users); +delete project._id; // Otherwise Mongo complains that we're trying to alter it, which is dumb + +console.log("Copying project record..."); +await localConn + .db("scriptureforge") + .collection("projects") + .findOneAndReplace({ _id: projId }, project, { upsert: true }); + +// Mongo removed the .copyDatabase method in version 4.2, whose release notes said to just use mongodump/mongorestore if you want to do that + +console.log(`Fetching ${dbname} database...`); +remoteExec(`mongodump --archive -d "${dbname}" > ${tempdir}/dump`); +localExec(`mongorestore --archive --drop -d "${dbname}" ${localConnStr} < ${tempdir}/dump`); + +console.log("Setting up rsync on target container..."); +execSync( + `kubectl exec --context="${context}" deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)"`, +); + +console.log("Fetching assets via rsync (and retrying until success)..."); +console.log("\n===== IMPORTANT NOTE ====="); +console.log( + "If this stalls at exactly 50% done, then it's really 100% done and hasn't realized it. Just hit Ctrl+C and it will succeed on the retry", +); +console.log("===== IMPORTANT NOTE =====\n"); + +// NOTE: Hitting Ctrl+C worked in the bash script, but here it kills the Node process rather than being passed through to rsync +// TODO: Find a way to handle the "kill rsync and retry" thing gracefully, or else find a different solution than rsync + +mkdirSync(`${tempdir}/assets/${dbname}`, { recursive: true }); +let done = false; +while (!done) { + try { + execSync( + `rsync -rLt --partial --info=progress2 --blocking-io --rsync-path="/var/www/html/assets/lexicon/${dbname}" --rsh="kubectl --context=${context} exec -i deploy/app -- " "rsync:/var/www/html/assets/lexicon/${dbname}/" "${tempdir}/assets/${dbname}/"`, + { stdio: "inherit" }, + ); + done = true; + } catch (err) { + console.log(`Rsync failed with error: ${err}. Retrying...`); + } +} + +await localConn.close(); +await remoteConn.close(); +await portForwardProcess.kill(); diff --git a/package-lock.json b/package-lock.json index b78bb5f704..fb9390f3bf 100644 --- a/package-lock.json +++ b/package-lock.json @@ -61,6 +61,7 @@ "jasmine-spec-reporter": "^4.1.1", "lint-staged": "^13.0.3", "mini-css-extract-plugin": "^1.3.9", + "mongodb": "^6.6.2", "ng-annotate-loader": "^0.7.0", "ngtemplate-loader": "^2.1.0", "npm-run-all": "^4.1.5", @@ -1923,6 +1924,15 @@ "integrity": "sha512-Vo+PSpZG2/fmgmiNzYK9qWRh8h/CHrwD0mo1h1DzL4yzHNSfWYujGTYsWGreD000gcgmZ7K4Ys6Tx9TxtsKdDw==", "dev": true }, + "node_modules/@mongodb-js/saslprep": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.1.7.tgz", + "integrity": "sha512-dCHW/oEX0KJ4NjDULBo3JiOaK5+6axtpBbS+ao2ZInoAL9/YRQLhXzSNAFz7hP4nzLkIqsfYAK/PDE3+XHny0Q==", + "dev": true, + "dependencies": { + "sparse-bitfield": "^3.0.3" + } + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -2441,12 +2451,27 @@ "integrity": "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==", "dev": true }, + "node_modules/@types/webidl-conversions": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/@types/webidl-conversions/-/webidl-conversions-7.0.3.tgz", + "integrity": "sha512-CiJJvcRtIgzadHCYXw7dqEnMNRjhGZlYK05Mj9OyktqV8uVT8fD2BFOB7S1uwBE3Kj2Z+4UyPmFw/Ixgw/LAlA==", + "dev": true + }, "node_modules/@types/webpack-env": { "version": "1.18.4", "resolved": "https://registry.npmjs.org/@types/webpack-env/-/webpack-env-1.18.4.tgz", "integrity": "sha512-I6e+9+HtWADAWeeJWDFQtdk4EVSAbj6Rtz4q8fJ7mSr1M0jzlFcs8/HZ+Xb5SHzVm1dxH7aUiI+A8kA8Gcrm0A==", "dev": true }, + "node_modules/@types/whatwg-url": { + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/@types/whatwg-url/-/whatwg-url-11.0.5.tgz", + "integrity": "sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==", + "dev": true, + "dependencies": { + "@types/webidl-conversions": "*" + } + }, "node_modules/@types/ws": { "version": "8.5.10", "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.5.10.tgz", @@ -3169,6 +3194,15 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, + "node_modules/bson": { + "version": "6.7.0", + "resolved": "https://registry.npmjs.org/bson/-/bson-6.7.0.tgz", + "integrity": "sha512-w2IquM5mYzYZv6rs3uN2DZTOBe2a0zXLj53TGDqwF4l6Sz/XsISrisXOJihArF9+BZ6Cq/GjVht7Sjfmri7ytQ==", + "dev": true, + "engines": { + "node": ">=16.20.1" + } + }, "node_modules/buffer-from": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", @@ -6216,6 +6250,12 @@ "node": ">= 4.0.0" } }, + "node_modules/memory-pager": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/memory-pager/-/memory-pager-1.5.0.tgz", + "integrity": "sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==", + "dev": true + }, "node_modules/memorystream": { "version": "0.3.1", "resolved": "https://registry.npmjs.org/memorystream/-/memorystream-0.3.1.tgz", @@ -6399,6 +6439,96 @@ "node": ">=10" } }, + "node_modules/mongodb": { + "version": "6.6.2", + "resolved": "https://registry.npmjs.org/mongodb/-/mongodb-6.6.2.tgz", + "integrity": "sha512-ZF9Ugo2JCG/GfR7DEb4ypfyJJyiKbg5qBYKRintebj8+DNS33CyGMkWbrS9lara+u+h+yEOGSRiLhFO/g1s1aw==", + "dev": true, + "dependencies": { + "@mongodb-js/saslprep": "^1.1.5", + "bson": "^6.7.0", + "mongodb-connection-string-url": "^3.0.0" + }, + "engines": { + "node": ">=16.20.1" + }, + "peerDependencies": { + "@aws-sdk/credential-providers": "^3.188.0", + "@mongodb-js/zstd": "^1.1.0", + "gcp-metadata": "^5.2.0", + "kerberos": "^2.0.1", + "mongodb-client-encryption": ">=6.0.0 <7", + "snappy": "^7.2.2", + "socks": "^2.7.1" + }, + "peerDependenciesMeta": { + "@aws-sdk/credential-providers": { + "optional": true + }, + "@mongodb-js/zstd": { + "optional": true + }, + "gcp-metadata": { + "optional": true + }, + "kerberos": { + "optional": true + }, + "mongodb-client-encryption": { + "optional": true + }, + "snappy": { + "optional": true + }, + "socks": { + "optional": true + } + } + }, + "node_modules/mongodb-connection-string-url": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mongodb-connection-string-url/-/mongodb-connection-string-url-3.0.1.tgz", + "integrity": "sha512-XqMGwRX0Lgn05TDB4PyG2h2kKO/FfWJyCzYQbIhXUxz7ETt0I/FqHjUeqj37irJ+Dl1ZtU82uYyj14u2XsZKfg==", + "dev": true, + "dependencies": { + "@types/whatwg-url": "^11.0.2", + "whatwg-url": "^13.0.0" + } + }, + "node_modules/mongodb-connection-string-url/node_modules/tr46": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-4.1.1.tgz", + "integrity": "sha512-2lv/66T7e5yNyhAAC4NaKe5nVavzuGJQVVtRYLyQ2OI8tsJ61PMLlelehb0wi2Hx6+hT/OJUWZcw8MjlSRnxvw==", + "dev": true, + "dependencies": { + "punycode": "^2.3.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/mongodb-connection-string-url/node_modules/webidl-conversions": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-7.0.0.tgz", + "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", + "dev": true, + "engines": { + "node": ">=12" + } + }, + "node_modules/mongodb-connection-string-url/node_modules/whatwg-url": { + "version": "13.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-13.0.0.tgz", + "integrity": "sha512-9WWbymnqj57+XEuqADHrCJ2eSXzn8WXIW/YSGaZtb2WKAInQ6CHfaUUcTyyver0p8BDg5StLQq8h1vtZuwmOig==", + "dev": true, + "dependencies": { + "tr46": "^4.1.1", + "webidl-conversions": "^7.0.0" + }, + "engines": { + "node": ">=16" + } + }, "node_modules/mrmime": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/mrmime/-/mrmime-2.0.0.tgz", @@ -8408,6 +8538,15 @@ "deprecated": "Please use @jridgewell/sourcemap-codec instead", "dev": true }, + "node_modules/sparse-bitfield": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/sparse-bitfield/-/sparse-bitfield-3.0.3.tgz", + "integrity": "sha512-kvzhi7vqKTfkh0PZU+2D2PIllw2ymqJKujUcyPMd9Y75Nv4nPbGJZXNhxsgdQab2BmlDct1YnfQCguEvHr7VsQ==", + "dev": true, + "dependencies": { + "memory-pager": "^1.0.2" + } + }, "node_modules/spdx-correct": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.2.0.tgz", diff --git a/package.json b/package.json index ca0f87d2a6..d7393e9744 100644 --- a/package.json +++ b/package.json @@ -77,6 +77,7 @@ "jasmine-spec-reporter": "^4.1.1", "lint-staged": "^13.0.3", "mini-css-extract-plugin": "^1.3.9", + "mongodb": "^6.6.2", "ng-annotate-loader": "^0.7.0", "ngtemplate-loader": "^2.1.0", "npm-run-all": "^4.1.5", From 0ddfe00798fdeeb6b1db3117c2376b8e07e90ee2 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Thu, 23 May 2024 10:21:54 +0700 Subject: [PATCH 09/26] Finish converting script to Node.JS Now takes project ID or URL as parameter --- jsbackup.mjs | 231 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 164 insertions(+), 67 deletions(-) diff --git a/jsbackup.mjs b/jsbackup.mjs index 548c53c778..dffe863856 100644 --- a/jsbackup.mjs +++ b/jsbackup.mjs @@ -1,10 +1,11 @@ // TODO: Rename to backup.mjs before committing -import { exec, execSync, spawn } from "child_process"; -import { existsSync, mkdirSync, mkdtempSync, rmSync } from "fs"; +import { execSync, spawn } from "child_process"; +import { existsSync, mkdtempSync, rmSync, statSync } from "fs"; import { MongoClient, ObjectId } from "mongodb"; import os from "os"; import path from "path"; +import net from "net"; // ===== EDIT THIS ===== @@ -19,15 +20,33 @@ const context = stagingContext; // Create a temp dir reliably const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-")); +let portForwardProcess; +let localConn; +let remoteConn; -const cleanup = () => { - console.error(`Cleaning up temporary directory ${tempdir}...`); +async function cleanup() { if (existsSync(tempdir)) { + console.warn(`Cleaning up temporary directory ${tempdir}...`); rmSync(tempdir, { recursive: true, force: true }); } -}; + if (localConn) await localConn.close(); + if (remoteConn) await remoteConn.close(); + if (portForwardProcess) await portForwardProcess.kill(); +} + +async function randomFreePort() { + return new Promise((resolve) => { + const server = net.createServer(); + server.listen(0, () => { + // Asking for port 0 makes Node automatically find a free port + const port = server.address().port; + server.close((_) => resolve(port)); + }); + }); +} process.on("exit", cleanup); +process.on("uncaughtExceptionMonitor", cleanup); function run(cmd) { return execSync(cmd).toString().trimEnd(); @@ -38,73 +57,86 @@ function getContexts() { return stdout.split("\n"); } -function localSpawn(cmd, opts = {}) { - return spawn(`docker exec -i lf-db ${cmd}`, opts); -} - -function localExec(cmd, opts = {}) { - return execSync(`docker exec -i lf-db ${cmd}`, opts); -} - -function remoteSpawn(cmd, opts = {}) { - return spawn(`kubectl --context="${context}" exec -i deploy/db -- ${cmd}`, opts); -} -function remoteExec(cmd, opts = {}) { - console.log("Running: ", `kubectl --context="${context}" exec -i deploy/db -- ${cmd}`); - return execSync(`kubectl --context="${context}" exec -i deploy/db -- ${cmd}`, opts); -} // Sanity check var contexts = getContexts(); if (!contexts.includes(stagingContext)) { - console.log("Staging context not found. Tried", stagingContext, "but did not find it in", contexts); - console.log("Might need to edit the top level of this file and try again"); + console.warn("Staging context not found. Tried", stagingContext, "but did not find it in", contexts); + console.warn("Might need to edit the top level of this file and try again"); process.exit(1); } if (!contexts.includes(prodContext)) { - console.log("Prod context not found. Tried", prodContext, "but did not find it in", contexts); - console.log("Might need to edit the top level of this file and try again"); + console.warn("Prod context not found. Tried", prodContext, "but did not find it in", contexts); + console.warn("Might need to edit the top level of this file and try again"); process.exit(1); } +// Process args + +if (process.argv.length < 3) { + console.warn("Please pass project ID or URL as argument, e.g. node backup.mjs 5dbf805650b51914727e06c4"); + process.exit(2); +} + +let projId; +const arg = process.argv[2]; +if (URL.canParse(arg)) { + const url = new URL(arg); + if (url.pathname.startsWith("/app/lexicon/")) { + projId = url.pathname.substring("/app/lexicon/".length); + } else { + projId = url.pathname; // Will probably fail, but worth a try + } +} else { + projId = arg; +} + +projId = projId.trim(); +console.log("Project ID:", projId); + // Start running +console.warn("Setting up kubectl port forwarding for remote Mongo..."); +const remoteMongoPort = await randomFreePort(); // TODO: Improve by finding a local port that's not in use, rather than hardcoding this let portForwardingReady; const portForwardingPromise = new Promise((resolve) => { portForwardingReady = resolve; }); -const portForwardProcess = spawn("kubectl", [`--context=${context}`, "port-forward", "svc/db", "27018:27017"], { +portForwardProcess = spawn("kubectl", [`--context=${context}`, "port-forward", "svc/db", `${remoteMongoPort}:27017`], { stdio: "pipe", }); portForwardProcess.stdout.on("data", (data) => { portForwardingReady(); }); portForwardProcess.stderr.on("data", (data) => { - console.log("Port forwarding failed:"); - console.log(data.toString()); - console.log("Exiting"); + console.warn("Port forwarding failed:"); + console.warn(data.toString()); + console.warn("Exiting"); process.exit(1); }); +console.warn("Setting up local Mongo connection..."); + const localMongoPort = run("docker compose port db 27017").split(":")[1]; const localConnStr = `mongodb://admin:pass@localhost:${localMongoPort}/?authSource=admin`; -const localConn = await MongoClient.connect(localConnStr); +localConn = await MongoClient.connect(localConnStr); const localAdmin = await localConn.db("scriptureforge").collection("users").findOne({ username: "admin" }); const adminId = localAdmin._id.toString(); -console.log("Local admin ID:", adminId); +console.log(`Local admin ID: ${adminId}`); +console.warn("If that doesn't look right, hit Ctrl+C NOW"); -// await portForwardingPromise -const remoteConnStr = `mongodb://localhost:27018`; -const remoteConn = await MongoClient.connect(remoteConnStr); +await portForwardingPromise; +console.warn("Port forwarding is ready. Setting up remote Mongo connection..."); + +const remoteConnStr = `mongodb://localhost:${remoteMongoPort}`; +remoteConn = await MongoClient.connect(remoteConnStr); const remoteAdmin = await remoteConn.db("scriptureforge").collection("users").findOne({ username: "admin" }); -console.log("Remote admin ID:", remoteAdmin._id.toString()); +console.warn("Remote Mongo connection established. Fetching project record..."); // Get project record - -const projId = "5dbf805650b51914727e06c4"; // TODO: Get from argv const project = await remoteConn .db("scriptureforge") .collection("projects") @@ -114,50 +146,115 @@ console.log("Project code:", project.projectCode); const dbname = `sf_${project.projectCode}`; project.users = { [adminId]: { role: "project_manager" } }; project.ownerRef = new ObjectId(adminId); -console.log(project.users); -delete project._id; // Otherwise Mongo complains that we're trying to alter it, which is dumb +console.warn(project.users); -console.log("Copying project record..."); +// TODO: Move to after database is copied, so there's never a race condition where the project exists but its entry database doesn't +console.warn("Copying project record..."); await localConn .db("scriptureforge") .collection("projects") - .findOneAndReplace({ _id: projId }, project, { upsert: true }); + .findOneAndReplace({ _id: new ObjectId(projId) }, project, { upsert: true }); // Mongo removed the .copyDatabase method in version 4.2, whose release notes said to just use mongodump/mongorestore if you want to do that -console.log(`Fetching ${dbname} database...`); -remoteExec(`mongodump --archive -d "${dbname}" > ${tempdir}/dump`); -localExec(`mongorestore --archive --drop -d "${dbname}" ${localConnStr} < ${tempdir}/dump`); +console.warn(`Copying ${dbname} database...`); +const collections = await remoteConn.db(dbname).collections(); +for (const remoteColl of collections) { + const name = remoteColl.collectionName; + console.log(` Copying ${name} collection...`); + const indexes = await remoteColl.indexes(); + const cursor = remoteColl.find(); + const docs = await cursor.toArray(); + const localColl = await localConn.db(dbname).collection(name); + try { + await localColl.drop(); + } catch (_) {} // Throws if collection doesn't exist, which is fine + try { + await localColl.dropIndexes(); + } catch (_) {} // Throws if collection doesn't exist, which is fine + await localColl.createIndexes(indexes); + await localColl.insertMany(docs); + console.log(` ${docs.length} documents copied`); +} +console.warn(`${dbname} database successfully copied`); -console.log("Setting up rsync on target container..."); +// NOTE: mongodump/mongorestore approach below can be revived once Kubernetes 1.30 is installed on client *and* server, so kubectl exec is finally reliable + +// console.warn(`About to try fetching ${dbname} database from remote, will retry until success`); +// let done = false; +// while (!done) { +// try { +// console.warn(`Fetching ${dbname} database...`); +// execSync( +// `kubectl --context="${context}" exec -i deploy/db -- mongodump --archive -d "${dbname}" > ${tempdir}/dump`, +// ); +// console.warn(`Uploading to local ${dbname} database...`); +// execSync(`docker exec -i lf-db mongorestore --archive --drop -d "${dbname}" ${localConnStr} < ${tempdir}/dump`); +// console.warn(`Successfully uploaded ${dbname} database`); +// done = true; +// } catch (err) { +// console.warn("mongodump failed, retrying..."); +// } +// } + +console.warn("Setting up rsync on target container..."); execSync( - `kubectl exec --context="${context}" deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)"`, + `kubectl exec --context="${context}" -c app deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)"`, ); -console.log("Fetching assets via rsync (and retrying until success)..."); -console.log("\n===== IMPORTANT NOTE ====="); -console.log( - "If this stalls at exactly 50% done, then it's really 100% done and hasn't realized it. Just hit Ctrl+C and it will succeed on the retry", +console.warn("Creating assets tarball in remote..."); +execSync( + `kubectl --context="${context}" exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" .`, ); -console.log("===== IMPORTANT NOTE =====\n"); - -// NOTE: Hitting Ctrl+C worked in the bash script, but here it kills the Node process rather than being passed through to rsync -// TODO: Find a way to handle the "kill rsync and retry" thing gracefully, or else find a different solution than rsync +const sizeStr = run( + `kubectl --context="${context}" exec -c app deploy/app -- sh -c 'ls -l /tmp/assets-${dbname}.tar | cut -d" " -f5'`, +); +const correctSize = +sizeStr; +console.warn(`Asserts tarball size is ${sizeStr}`); -mkdirSync(`${tempdir}/assets/${dbname}`, { recursive: true }); -let done = false; -while (!done) { - try { - execSync( - `rsync -rLt --partial --info=progress2 --blocking-io --rsync-path="/var/www/html/assets/lexicon/${dbname}" --rsh="kubectl --context=${context} exec -i deploy/app -- " "rsync:/var/www/html/assets/lexicon/${dbname}/" "${tempdir}/assets/${dbname}/"`, - { stdio: "inherit" }, - ); - done = true; - } catch (err) { - console.log(`Rsync failed with error: ${err}. Retrying...`); +console.warn("Getting name of remote app pod..."); +const pod = run( + `kubectl --context="${context}" get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`, +); +console.warn("Trying to fetch assets tarball with kubectl cp..."); +let failed = false; +try { + execSync(`kubectl --context="${context}" cp ${pod}:/tmp/assets-${dbname}.tar ${tempdir}/assets-${dbname}.tar`); +} catch (_) { + console.warn("kubectl cp failed. Will try to continue with rsync..."); + failed = true; +} +if (!failed) { + const localSize = statSync(`${tempdir}/assets-${dbname}.tar`).size; + if (localSize < correctSize) { + console.warn(`Got only ${localSize} bytes instead of ${correctSize}. Will try to continue with rsync...`); + failed = true; } } +if (failed) { + console.warn("\n===== IMPORTANT NOTE ====="); + console.warn( + "This may (probably will) stall at 100%. You'll have to find the rsync process and kill it. Sorry about that.", + ); + console.warn("===== IMPORTANT NOTE =====\n"); + let done = false; + while (!done) { + try { + execSync( + `rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} exec -i -c app deploy/app -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`, + { stdio: "inherit" }, // Allows us to see rsync progress + ); + done = true; + } catch (err) { + console.warn(`Rsync failed with error: ${err}. Retrying...`); + } + } +} +console.warn("Uploading assets tarball to local..."); +execSync( + `docker exec lf-app mkdir -p "/var/www/html/assets/lexicon/${dbname}" ; docker exec lf-app chown www-data:www-data "/var/www/html/assets/lexicon/${dbname}" || true`, +); +execSync(`docker cp - lf-app:/var/www/html/assets/lexicon/${dbname}/ < ${tempdir}/assets-${dbname}.tar`); +console.warn("Assets successfully uploaded"); -await localConn.close(); -await remoteConn.close(); -await portForwardProcess.kill(); +process.exit(0); From 0cbe33f5c5b7ec090c42b2a41b6a589fa5e1e208 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Thu, 23 May 2024 15:35:17 +0700 Subject: [PATCH 10/26] Rename backup script now that it works --- jsbackup.mjs => backup.mjs | 0 backup.sh | 107 ------------------------------------- 2 files changed, 107 deletions(-) rename jsbackup.mjs => backup.mjs (100%) delete mode 100755 backup.sh diff --git a/jsbackup.mjs b/backup.mjs similarity index 100% rename from jsbackup.mjs rename to backup.mjs diff --git a/backup.sh b/backup.sh deleted file mode 100755 index b65ed103de..0000000000 --- a/backup.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash - -# ===== EDIT THIS if your Kubernetes context names are different ===== - -staging_context="dallas-rke" -prod_context="aws-rke" - -# Uncomment one of the two blocks below to choose between staging and prod - -echo "Using staging context..." >&2 -context="${staging_context}" - -# echo "Using prod context..." >&2 -# context="${prod_context}" - -# ===== END of "EDIT THIS" block ===== - -echo "Checking for necessary tools..." >&2 -which jq >/dev/null -if [ $? -ne 0 ]; then - echo "jq (JSON Query) not found. This script needs it to run." >&2 - echo "Try \"apt install jq\" on Linux, \"brew install jq\" on Mac, or \"choco install jq\" or \"winget install jqlang.jq\" on Windows." >&2 - exit 1 -fi - -which rsync >/dev/null -if [ $? -ne 0 ]; then - echo "rsync not found. This script needs it in order to copy asset files." >&2 - echo "Try \"apt install rsync\" on Linux, ??? on Mac, or ??? on Windows." >&2 - echo "Continuing anyway, but you may get failures on the asset-copying step..." >&2 -fi - -proj=$1 - -# Create a temp dir reliably on both Linux and OS X -workdir=$(mktemp -d 2>/dev/null || mktemp -d -t 'sfbackup') - -function cleanup { - echo "Cleaning up temporary directory ${workdir}..." >&2 - [ -n "${workdir}" ] && [ -d "${workdir}" ] && rm -rf "${workdir}" -} - -[ -n "${workdir}" ] && [ -d "${workdir}" ] && trap cleanup EXIT - -echo "Looking up Mongo ID of local admin user..." >&2 -admin_id=$(docker exec lf-db mongosh -u admin -p pass --authenticationDatabase admin scriptureforge --eval "db.users.findOne({username: 'admin'}, {_id: 1})" | cut -d"'" -f 2) - -if [ -z "${admin_id}" ]; then - echo "Could not find local admin ID. Please try running 'docker exec -it lf-db mongosh' and see what happens." >&2 - exit 1 -fi - -echo "Verifying admin ID..." >&2 -docker exec lf-db mongosh -u admin -p pass --authenticationDatabase admin scriptureforge --eval "db.users.findOne({_id: ObjectId('${admin_id}')}, {name: 1, username: 1, email: 1})" -echo "If that looks wrong, hit Ctrl+C NOW" >&2 -sleep 1 - -echo "Backing up project with ID ${proj}..." >&2 -echo "Getting project code..." >&2 - -projCode=$(kubectl --context="${context}" exec deploy/db -- mongosh --quiet scriptureforge --eval 'db.projects.findOne({_id: ObjectId('"'${proj}'"')}, {projectCode: 1})' | grep projectCode | cut -d"'" -f 2) -echo "Project code: $projCode" >&2 - -echo "If that looks wrong, hit Ctrl+C NOW" >&2 -sleep 1 - -echo "Getting project record..." >&2 - -kubectl --context="${context}" exec deploy/db -- mongosh --quiet scriptureforge --eval 'db.projects.findOne({_id: ObjectId('"'${proj}'"')})' --json=canonical > "${workdir}/project.json" - -echo "Removing users and replacing project manager with admin..." >&2 -jq "setpath([\"users\"]; {\"${admin_id}\": {\"role\": \"project_manager\"}}) | setpath([\"ownerRef\"]; {\"\$oid\": \"${admin_id}\"} )" < "${workdir}/project.json" > "${workdir}/project-modified.json" - -echo "Getting project database..." >&2 -dbname="sf_${projCode}" - -kubectl --context="${context}" exec deploy/db -- mongodump -d "${dbname}" --archive > "${workdir}/db.archive" -# Once we require auth, this will become: -# kubectl --context="${context}" exec deploy/db -- mongodump -u admin -p pass --authenticationDatabase admin -d "${dbname}" --archive > "${workdir}/db.archive" -docker exec -i lf-db mongorestore -u admin -p pass --authenticationDatabase admin -d "${dbname}" --drop --archive < "${workdir}"/db.archive - -echo "Importing project record into local projects collection..." >&2 -docker exec -i lf-db mongoimport -u admin -p pass --authenticationDatabase admin -d scriptureforge -c projects --mode=upsert < "${workdir}/project-modified.json" - -echo "Okay, ${projCode} should be available in your Language Forge installation now." >&2 - -echo "Setting up rsync on target container..." >&2 -kubectl exec --context="${context}" deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)" - -echo "Fetching assets via rsync (and retrying until success)..." >&2 -echo >&2 -echo "===== IMPORTANT NOTE =====" >&2 -echo "If this stalls at exactly 50% done, then it's really 100% done and hasn't realized it. Just hit Ctrl+C and it will succeed on the retry" >&2 -# TODO: Figure out why rsync is misidentifying the size. Is it related to the -L option (follow symlinks)? -echo "===== IMPORTANT NOTE =====" >&2 -echo >&2 -mkdir -p "${workdir}/assets/${dbname}" -until rsync -rLt --partial --info=progress2 --blocking-io --rsync-path="/var/www/html/assets/lexicon/${dbname}" --rsh="kubectl --context=${context} exec -i deploy/app -- " "rsync:/var/www/html/assets/lexicon/${dbname}/" "${workdir}/assets/${dbname}/"; do - RSYNC_EXIT_CODE=$? - echo "Rsync's exit code was $RSYNC_EXIT_CODE. Retrying..." >&2 -done - -echo "Copying assets into local Docker container..." >&2 -# The /. at the end of the src tells Docker "just copy the *contents* of the directory, not the directory itself" -docker cp "${workdir}/assets/${dbname}/." "lf-app:/var/www/html/assets/lexicon/${dbname}" -# The files produced by docker cp will end up being owned by your UID on the host, so we need to set their ownership after the docker cp step -docker exec lf-app chown -R www-data:www-data "/var/www/html/assets/lexicon/${dbname}" From 6d9bd556148e03815bb5f35456a0d1e5d9cff862 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Thu, 23 May 2024 16:13:32 +0700 Subject: [PATCH 11/26] Remove no-longer-used query Was used during development as a way to test remote connection, no longer needed. --- backup.mjs | 1 - 1 file changed, 1 deletion(-) diff --git a/backup.mjs b/backup.mjs index dffe863856..88e3f8be53 100644 --- a/backup.mjs +++ b/backup.mjs @@ -133,7 +133,6 @@ console.warn("Port forwarding is ready. Setting up remote Mongo connection..."); const remoteConnStr = `mongodb://localhost:${remoteMongoPort}`; remoteConn = await MongoClient.connect(remoteConnStr); -const remoteAdmin = await remoteConn.db("scriptureforge").collection("users").findOne({ username: "admin" }); console.warn("Remote Mongo connection established. Fetching project record..."); // Get project record From 6f9a9cf1973c7dc6b7eca59dab0cfc245522ba6f Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Thu, 23 May 2024 16:30:02 +0700 Subject: [PATCH 12/26] Allow selecting qa or prod via 2nd command-line arg Defaults to qa/staging for obvious reasons --- backup.mjs | 47 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/backup.mjs b/backup.mjs index 88e3f8be53..0f82af130f 100644 --- a/backup.mjs +++ b/backup.mjs @@ -13,8 +13,13 @@ const stagingContext = "dallas-rke"; const prodContext = "aws-rke"; // Choose one, comment out the other -const context = stagingContext; -// const context = prodContext +// Alternately, pass a second argument to this script: "qa" or "staging" select staging, "live" or "prod" or "production" select production + +let contextName = "staging"; +let context = stagingContext; + +// let contextName = 'production' +// let context = prodContext // ===== END of EDIT THIS ===== @@ -91,9 +96,45 @@ if (URL.canParse(arg)) { projId = arg; } +if (process.argv.length > 3) { + const env = process.argv[3]; + switch (env) { + case "qa": + context = stagingContext; + contextName = "staging"; + break; + case "staging": + context = stagingContext; + contextName = "staging"; + break; + + case "live": + context = prodContext; + contextName = "production"; + break; + case "prod": + context = prodContext; + contextName = "production"; + break; + case "production": + context = prodContext; + contextName = "production"; + break; + + default: + console.warn(`Unknown environment ${env}`); + console.warn(`Valid values are qa, staging, live, prod, or production`); + process.exit(2); + } +} + projId = projId.trim(); -console.log("Project ID:", projId); +console.warn(`Fetching project with ID ${projId} from ${contextName} context, named "${context}"`); +console.warn("If that looks wrong, hit Ctrl+C right NOW!"); +console.warn(); +console.warn("Pausing for 2 seconds to give you time to hit Ctrl+C..."); +await new Promise((resolve) => setTimeout(resolve, 2000)); // Start running console.warn("Setting up kubectl port forwarding for remote Mongo..."); From 23c9c9dd3f2d162ad5b110b05c6f7943d8615141 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Thu, 23 May 2024 16:32:33 +0700 Subject: [PATCH 13/26] Fix kubectl port forward on production server Our staging server has a port defined on the db service, but our production server does not. Switching to port forward to `deploy/db`, which will automatically select the Mongo pod (which *does* have a port open to forward to). --- backup.mjs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/backup.mjs b/backup.mjs index 0f82af130f..4142df0ddc 100644 --- a/backup.mjs +++ b/backup.mjs @@ -144,9 +144,13 @@ let portForwardingReady; const portForwardingPromise = new Promise((resolve) => { portForwardingReady = resolve; }); -portForwardProcess = spawn("kubectl", [`--context=${context}`, "port-forward", "svc/db", `${remoteMongoPort}:27017`], { - stdio: "pipe", -}); +portForwardProcess = spawn( + "kubectl", + [`--context=${context}`, "port-forward", "deploy/db", `${remoteMongoPort}:27017`], + { + stdio: "pipe", + }, +); portForwardProcess.stdout.on("data", (data) => { portForwardingReady(); }); From 07ce62d6906d04ea8a668af1119e752904d42496 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Thu, 23 May 2024 16:34:20 +0700 Subject: [PATCH 14/26] Only set up rsync if needed This will save a bit of time when kubectl cp is being reliable Also force languageforge namespace just in case --- backup.mjs | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/backup.mjs b/backup.mjs index 4142df0ddc..c086e4860f 100644 --- a/backup.mjs +++ b/backup.mjs @@ -146,7 +146,7 @@ const portForwardingPromise = new Promise((resolve) => { }); portForwardProcess = spawn( "kubectl", - [`--context=${context}`, "port-forward", "deploy/db", `${remoteMongoPort}:27017`], + [`--context=${context}`, "--namespace=languageforge", "port-forward", "deploy/db", `${remoteMongoPort}:27017`], { stdio: "pipe", }, @@ -230,7 +230,7 @@ console.warn(`${dbname} database successfully copied`); // try { // console.warn(`Fetching ${dbname} database...`); // execSync( -// `kubectl --context="${context}" exec -i deploy/db -- mongodump --archive -d "${dbname}" > ${tempdir}/dump`, +// `kubectl --context="${context}" --namespace=languageforge exec -i deploy/db -- mongodump --archive -d "${dbname}" > ${tempdir}/dump`, // ); // console.warn(`Uploading to local ${dbname} database...`); // execSync(`docker exec -i lf-db mongorestore --archive --drop -d "${dbname}" ${localConnStr} < ${tempdir}/dump`); @@ -241,29 +241,26 @@ console.warn(`${dbname} database successfully copied`); // } // } -console.warn("Setting up rsync on target container..."); -execSync( - `kubectl exec --context="${context}" -c app deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)"`, -); - console.warn("Creating assets tarball in remote..."); execSync( - `kubectl --context="${context}" exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" .`, + `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" .`, ); const sizeStr = run( - `kubectl --context="${context}" exec -c app deploy/app -- sh -c 'ls -l /tmp/assets-${dbname}.tar | cut -d" " -f5'`, + `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- sh -c 'ls -l /tmp/assets-${dbname}.tar | cut -d" " -f5'`, ); const correctSize = +sizeStr; console.warn(`Asserts tarball size is ${sizeStr}`); console.warn("Getting name of remote app pod..."); const pod = run( - `kubectl --context="${context}" get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`, + `kubectl --context="${context}" --namespace=languageforge get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`, ); console.warn("Trying to fetch assets tarball with kubectl cp..."); let failed = false; try { - execSync(`kubectl --context="${context}" cp ${pod}:/tmp/assets-${dbname}.tar ${tempdir}/assets-${dbname}.tar`); + execSync( + `kubectl --context="${context}" --namespace=languageforge cp ${pod}:/tmp/assets-${dbname}.tar ${tempdir}/assets-${dbname}.tar`, + ); } catch (_) { console.warn("kubectl cp failed. Will try to continue with rsync..."); failed = true; @@ -276,16 +273,20 @@ if (!failed) { } } if (failed) { + console.warn("Ensuring rsync exists in target container..."); + execSync( + `kubectl exec --context="${context}" -c app deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)"`, + ); console.warn("\n===== IMPORTANT NOTE ====="); console.warn( - "This may (probably will) stall at 100%. You'll have to find the rsync process and kill it. Sorry about that.", + "The rsync transfer may (probably will) stall at 100%. You'll have to find the rsync process and kill it. Sorry about that.", ); console.warn("===== IMPORTANT NOTE =====\n"); let done = false; while (!done) { try { execSync( - `rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} exec -i -c app deploy/app -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`, + `rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} --namespace=languageforge exec -i -c app deploy/app -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`, { stdio: "inherit" }, // Allows us to see rsync progress ); done = true; From ff0f165ec480263a546cec3913596bbb2a93f3bf Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Fri, 24 May 2024 08:20:39 +0700 Subject: [PATCH 15/26] Only include assets that are really there Before including pictures and audio in the tarball, make sure they're really there, and skip them if they are a broken symlink. --- backup.mjs | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/backup.mjs b/backup.mjs index c086e4860f..5312effee7 100644 --- a/backup.mjs +++ b/backup.mjs @@ -62,6 +62,17 @@ function getContexts() { return stdout.split("\n"); } +function reallyExists(name) { + // Sometimes the audio and/or pictures folders in assets are symlinks, and sometimes they're broken symlinks + // This returns true if the name is a real file/directory *or* a symlink with a valid target, or false if it doesn't exist or is broken + const result = execSync( + `kubectl --context=${context} --namespace=languageforge exec -c app deploy/app -- sh -c 'readlink -eq ${name} >/dev/null && echo -n yes || echo -n no'`, + ).toString(); + if (result === "yes") return true; + if (result === "no") return false; + throw new Error(`Unexpected result from readlink ${name}: ${result}`); +} + // Sanity check var contexts = getContexts(); @@ -241,9 +252,30 @@ console.warn(`${dbname} database successfully copied`); // } // } +console.warn("Checking that remote assets really exist..."); +const includeAudio = reallyExists(`/var/www/html/assets/lexicon/${dbname}/audio`); +const includePictures = reallyExists(`/var/www/html/assets/lexicon/${dbname}/pictures`); +console.log(`Copy audio? ${includeAudio ? "yes" : "no"}`); +console.log(`Copy pictures? ${includePictures ? "yes" : "no"}`); + +const filesNeeded = []; +if (includeAudio) { + filesNeeded.push("audio"); +} +if (includePictures) { + filesNeeded.push("pictures"); +} + +if (filesNeeded.length === 0) { + console.warn("Project has no assets. Copy complete."); + process.exit(0); +} + +const tarTargets = filesNeeded.join(" "); + console.warn("Creating assets tarball in remote..."); execSync( - `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" .`, + `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" ${tarTargets}`, ); const sizeStr = run( `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- sh -c 'ls -l /tmp/assets-${dbname}.tar | cut -d" " -f5'`, From 801f723075a735cb288b3b7f8a607150ad52631a Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Mon, 10 Jun 2024 15:37:31 +0700 Subject: [PATCH 16/26] Remove now-completed TODO comments --- backup.mjs | 3 --- 1 file changed, 3 deletions(-) diff --git a/backup.mjs b/backup.mjs index 5312effee7..2e0d9a8aa7 100644 --- a/backup.mjs +++ b/backup.mjs @@ -1,5 +1,3 @@ -// TODO: Rename to backup.mjs before committing - import { execSync, spawn } from "child_process"; import { existsSync, mkdtempSync, rmSync, statSync } from "fs"; import { MongoClient, ObjectId } from "mongodb"; @@ -150,7 +148,6 @@ await new Promise((resolve) => setTimeout(resolve, 2000)); console.warn("Setting up kubectl port forwarding for remote Mongo..."); const remoteMongoPort = await randomFreePort(); -// TODO: Improve by finding a local port that's not in use, rather than hardcoding this let portForwardingReady; const portForwardingPromise = new Promise((resolve) => { portForwardingReady = resolve; From 80d90be2e345979386e5a6c740b1b20cd7d129dc Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Mon, 10 Jun 2024 15:40:34 +0700 Subject: [PATCH 17/26] Stop using `echo -n` as it may cause Windows issues It's possible that Windows is doing something strange here that's causing the `echo` tobe handled by the Windows shell instead of as part of the kubectl input passed to `sh`. Switching to plain echo and then stripping newlines from the result should produce the same result without any cross-platform hiccups. --- backup.mjs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backup.mjs b/backup.mjs index 2e0d9a8aa7..8a350eb513 100644 --- a/backup.mjs +++ b/backup.mjs @@ -64,8 +64,10 @@ function reallyExists(name) { // Sometimes the audio and/or pictures folders in assets are symlinks, and sometimes they're broken symlinks // This returns true if the name is a real file/directory *or* a symlink with a valid target, or false if it doesn't exist or is broken const result = execSync( - `kubectl --context=${context} --namespace=languageforge exec -c app deploy/app -- sh -c 'readlink -eq ${name} >/dev/null && echo -n yes || echo -n no'`, - ).toString(); + `kubectl --context=${context} --namespace=languageforge exec -c app deploy/app -- sh -c 'readlink -eq ${name} >/dev/null && echo yes || echo no'`, + ) + .toString() + .trimEnd(); if (result === "yes") return true; if (result === "no") return false; throw new Error(`Unexpected result from readlink ${name}: ${result}`); From a8dbffa903bc049c342a4ad82aaf484ba33fcce5 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Tue, 11 Jun 2024 14:52:08 +0700 Subject: [PATCH 18/26] Make backup script slightly more cross-platform Windows has issues with single-quotes for quoting command-line params, but thankfully Linux handles double-quotes correctly in all the places I used single-quotes, so we'll just switch to double-quotes everywhere. --- backup.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backup.mjs b/backup.mjs index 8a350eb513..2d1b744fa3 100644 --- a/backup.mjs +++ b/backup.mjs @@ -64,7 +64,7 @@ function reallyExists(name) { // Sometimes the audio and/or pictures folders in assets are symlinks, and sometimes they're broken symlinks // This returns true if the name is a real file/directory *or* a symlink with a valid target, or false if it doesn't exist or is broken const result = execSync( - `kubectl --context=${context} --namespace=languageforge exec -c app deploy/app -- sh -c 'readlink -eq ${name} >/dev/null && echo yes || echo no'`, + `kubectl --context=${context} --namespace=languageforge exec -c app deploy/app -- sh -c "readlink -eq ${name} >/dev/null && echo yes || echo no"`, ) .toString() .trimEnd(); @@ -277,7 +277,7 @@ execSync( `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" ${tarTargets}`, ); const sizeStr = run( - `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- sh -c 'ls -l /tmp/assets-${dbname}.tar | cut -d" " -f5'`, + `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- sh -c "ls -l /tmp/assets-${dbname}.tar | cut -d' ' -f5"`, ); const correctSize = +sizeStr; console.warn(`Asserts tarball size is ${sizeStr}`); From 04a3201fa7a8de6907db030375e59096c454fa6b Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Tue, 11 Jun 2024 15:02:06 +0700 Subject: [PATCH 19/26] Mongo doesn't like `.insertMany([])` Mongo doesn't like it when you call `.insertMany` and pass it an empty list. You'd think they would handle that case gracefully, but they don't and Mongo throws an error "Invalid BulkOperation, Batch cannot be empty". So we will skip calling `.insertMany` if there are no records to insert. --- backup.mjs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backup.mjs b/backup.mjs index 2d1b744fa3..ca1d153709 100644 --- a/backup.mjs +++ b/backup.mjs @@ -226,8 +226,8 @@ for (const remoteColl of collections) { try { await localColl.dropIndexes(); } catch (_) {} // Throws if collection doesn't exist, which is fine - await localColl.createIndexes(indexes); - await localColl.insertMany(docs); + if (indexes?.length) await localColl.createIndexes(indexes); + if (docs?.length) await localColl.insertMany(docs); console.log(` ${docs.length} documents copied`); } console.warn(`${dbname} database successfully copied`); From d2ce9476f1bf292fcc0c733a6db6e3510f89e88e Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Tue, 11 Jun 2024 16:30:18 +0700 Subject: [PATCH 20/26] Address one last TODO comment --- backup.mjs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/backup.mjs b/backup.mjs index ca1d153709..923c8714d9 100644 --- a/backup.mjs +++ b/backup.mjs @@ -202,13 +202,6 @@ project.users = { [adminId]: { role: "project_manager" } }; project.ownerRef = new ObjectId(adminId); console.warn(project.users); -// TODO: Move to after database is copied, so there's never a race condition where the project exists but its entry database doesn't -console.warn("Copying project record..."); -await localConn - .db("scriptureforge") - .collection("projects") - .findOneAndReplace({ _id: new ObjectId(projId) }, project, { upsert: true }); - // Mongo removed the .copyDatabase method in version 4.2, whose release notes said to just use mongodump/mongorestore if you want to do that console.warn(`Copying ${dbname} database...`); @@ -232,6 +225,13 @@ for (const remoteColl of collections) { } console.warn(`${dbname} database successfully copied`); +// Copy project record after its database has been copied, so there's never a race condition where the project exists but its entry database doesn't +console.warn("Copying project record..."); +await localConn + .db("scriptureforge") + .collection("projects") + .findOneAndReplace({ _id: new ObjectId(projId) }, project, { upsert: true }); + // NOTE: mongodump/mongorestore approach below can be revived once Kubernetes 1.30 is installed on client *and* server, so kubectl exec is finally reliable // console.warn(`About to try fetching ${dbname} database from remote, will retry until success`); From 038451ed3237948d26a9c9452bdaef098363751c Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Tue, 11 Jun 2024 16:32:20 +0700 Subject: [PATCH 21/26] Remove one unnecessary line of output --- backup.mjs | 1 - 1 file changed, 1 deletion(-) diff --git a/backup.mjs b/backup.mjs index 923c8714d9..bb2a57a630 100644 --- a/backup.mjs +++ b/backup.mjs @@ -200,7 +200,6 @@ console.log("Project code:", project.projectCode); const dbname = `sf_${project.projectCode}`; project.users = { [adminId]: { role: "project_manager" } }; project.ownerRef = new ObjectId(adminId); -console.warn(project.users); // Mongo removed the .copyDatabase method in version 4.2, whose release notes said to just use mongodump/mongorestore if you want to do that From 6bd5c1dbd46d81cb95de8685181df6daa4a95c46 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Tue, 11 Jun 2024 16:37:56 +0700 Subject: [PATCH 22/26] Better EDIT THIS section, add explanatory comments --- backup.mjs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/backup.mjs b/backup.mjs index bb2a57a630..cad3536d16 100644 --- a/backup.mjs +++ b/backup.mjs @@ -5,22 +5,20 @@ import os from "os"; import path from "path"; import net from "net"; +// Expected arguments: first arg is project ID (5dbf805650b51914727e06c4) or URL (http://localhost:8080/app/lexicon/5dbf805650b51914727e06c4) +// Second arg is "qa" or "staging" to copy from staging, "live" or "prod" or "production" to copy from production +// NOTE: You must edit the context names below if they don't match the context names you have (see `kubectl config get-contexts` output) + // ===== EDIT THIS ===== const stagingContext = "dallas-rke"; const prodContext = "aws-rke"; -// Choose one, comment out the other -// Alternately, pass a second argument to this script: "qa" or "staging" select staging, "live" or "prod" or "production" select production - -let contextName = "staging"; -let context = stagingContext; - -// let contextName = 'production' -// let context = prodContext - // ===== END of EDIT THIS ===== +let defaultContext = stagingContext; +let defaultContextName = "staging"; + // Create a temp dir reliably const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-")); let portForwardProcess; @@ -107,6 +105,9 @@ if (URL.canParse(arg)) { projId = arg; } +let context = defaultContext; +let contextName = defaultContextName; + if (process.argv.length > 3) { const env = process.argv[3]; switch (env) { From d1aae7bc449954cfc72233238d23578eb4635fd6 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Tue, 11 Jun 2024 16:43:19 +0700 Subject: [PATCH 23/26] Explain how to select prod if CLI arg missing --- backup.mjs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backup.mjs b/backup.mjs index cad3536d16..83aa8c61b3 100644 --- a/backup.mjs +++ b/backup.mjs @@ -138,6 +138,9 @@ if (process.argv.length > 3) { console.warn(`Valid values are qa, staging, live, prod, or production`); process.exit(2); } +} else { + console.warn("No environment selected. Defaulting to staging environment."); + console.warn('Pass "prod" or "production" as second arg to copy projects from production envrionment instead.'); } projId = projId.trim(); From b22a0dde2bd078ce658e6afdf1479d6d1d25cb13 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 12 Jun 2024 16:10:44 +0700 Subject: [PATCH 24/26] Clean up assets tarball when done Also use pod name instead of deploy/app since not every user account has access to deploy objects, at least on production --- backup.mjs | 55 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/backup.mjs b/backup.mjs index 83aa8c61b3..970c5eaaa4 100644 --- a/backup.mjs +++ b/backup.mjs @@ -24,15 +24,33 @@ const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-")); let portForwardProcess; let localConn; let remoteConn; +let remoteTarball = undefined; +let remotePodname = undefined; async function cleanup() { - if (existsSync(tempdir)) { - console.warn(`Cleaning up temporary directory ${tempdir}...`); - rmSync(tempdir, { recursive: true, force: true }); - } - if (localConn) await localConn.close(); - if (remoteConn) await remoteConn.close(); - if (portForwardProcess) await portForwardProcess.kill(); + try { + if (existsSync(tempdir)) { + console.warn(`Cleaning up temporary directory ${tempdir}...`); + rmSync(tempdir, { recursive: true, force: true }); + } + } catch (_) {} + try { + if (remotePodname && remoteTarball) { + console.warn(`Cleaning up assets tarball from remote side...`); + execSync( + `kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- rm -f ${remoteTarball}`, + ); + } + } catch (_) {} + try { + if (localConn) await localConn.close(); + } catch (_) {} + try { + if (remoteConn) await remoteConn.close(); + } catch (_) {} + try { + if (portForwardProcess) await portForwardProcess.kill(); + } catch (_) {} } async function randomFreePort() { @@ -62,7 +80,7 @@ function reallyExists(name) { // Sometimes the audio and/or pictures folders in assets are symlinks, and sometimes they're broken symlinks // This returns true if the name is a real file/directory *or* a symlink with a valid target, or false if it doesn't exist or is broken const result = execSync( - `kubectl --context=${context} --namespace=languageforge exec -c app deploy/app -- sh -c "readlink -eq ${name} >/dev/null && echo yes || echo no"`, + `kubectl --context=${context} --namespace=languageforge exec -c app pod/${remotePodname} -- sh -c "readlink -eq ${name} >/dev/null && echo yes || echo no"`, ) .toString() .trimEnd(); @@ -254,6 +272,11 @@ await localConn // } // } +console.warn("Getting name of remote app pod..."); +remotePodname = run( + `kubectl --context="${context}" --namespace=languageforge get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`, +); + console.warn("Checking that remote assets really exist..."); const includeAudio = reallyExists(`/var/www/html/assets/lexicon/${dbname}/audio`); const includePictures = reallyExists(`/var/www/html/assets/lexicon/${dbname}/pictures`); @@ -276,24 +299,20 @@ if (filesNeeded.length === 0) { const tarTargets = filesNeeded.join(" "); console.warn("Creating assets tarball in remote..."); +remoteTarball = `/tmp/assets-${dbname}.tar`; execSync( - `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- tar chf /tmp/assets-${dbname}.tar --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" ${tarTargets}`, + `kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- tar chf ${remoteTarball} --owner=www-data --group=www-data -C "/var/www/html/assets/lexicon/${dbname}" ${tarTargets}`, ); const sizeStr = run( - `kubectl --context="${context}" --namespace=languageforge exec -c app deploy/app -- sh -c "ls -l /tmp/assets-${dbname}.tar | cut -d' ' -f5"`, + `kubectl --context="${context}" --namespace=languageforge exec -c app pod/${remotePodname} -- sh -c "ls -l ${remoteTarball} | cut -d' ' -f5"`, ); const correctSize = +sizeStr; console.warn(`Asserts tarball size is ${sizeStr}`); - -console.warn("Getting name of remote app pod..."); -const pod = run( - `kubectl --context="${context}" --namespace=languageforge get pod -o jsonpath="{.items[*]['metadata.name']}" -l app=app --field-selector "status.phase=Running"`, -); console.warn("Trying to fetch assets tarball with kubectl cp..."); let failed = false; try { execSync( - `kubectl --context="${context}" --namespace=languageforge cp ${pod}:/tmp/assets-${dbname}.tar ${tempdir}/assets-${dbname}.tar`, + `kubectl --context="${context}" --namespace=languageforge cp ${remotePodname}:${remoteTarball} ${tempdir}/assets-${dbname}.tar`, ); } catch (_) { console.warn("kubectl cp failed. Will try to continue with rsync..."); @@ -309,7 +328,7 @@ if (!failed) { if (failed) { console.warn("Ensuring rsync exists in target container..."); execSync( - `kubectl exec --context="${context}" -c app deploy/app -- bash -c "which rsync || (apt update && apt install rsync -y)"`, + `kubectl exec --context="${context}" -c app pod/${remotePodname} -- bash -c "which rsync || (apt update && apt install rsync -y)"`, ); console.warn("\n===== IMPORTANT NOTE ====="); console.warn( @@ -320,7 +339,7 @@ if (failed) { while (!done) { try { execSync( - `rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} --namespace=languageforge exec -i -c app deploy/app -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`, + `rsync -v --partial --info=progress2 --rsync-path="/tmp/" --rsh="kubectl --context=${context} --namespace=languageforge exec -i -c app pod/${remotePodname} -- " "rsync:/tmp/assets-${dbname}.tar" "${tempdir}/"`, { stdio: "inherit" }, // Allows us to see rsync progress ); done = true; From 45d82949f933077d72ce0bec8e2d61200334fd76 Mon Sep 17 00:00:00 2001 From: Robin Munn Date: Wed, 12 Jun 2024 16:15:21 +0700 Subject: [PATCH 25/26] Add note re drive letter bug in kubectl cp --- backup.mjs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backup.mjs b/backup.mjs index 970c5eaaa4..da6e091e9a 100644 --- a/backup.mjs +++ b/backup.mjs @@ -21,6 +21,8 @@ let defaultContextName = "staging"; // Create a temp dir reliably const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-")); +// TODO: Work around kubectl bug where drive letters are interpreted as pod names by kubectl cp +// Might solve by changing C: to \\localhost\C$\ let portForwardProcess; let localConn; let remoteConn; From 1e3ced21865a8614e747599303306c3cd00d2a38 Mon Sep 17 00:00:00 2001 From: Tim Haasdyk Date: Wed, 12 Jun 2024 11:49:43 +0200 Subject: [PATCH 26/26] Rewrite path, to workaround kubectl interpreting Windows drive letter as pod name --- backup.mjs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backup.mjs b/backup.mjs index da6e091e9a..b7ee7ea555 100644 --- a/backup.mjs +++ b/backup.mjs @@ -20,9 +20,9 @@ let defaultContext = stagingContext; let defaultContextName = "staging"; // Create a temp dir reliably -const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-")); -// TODO: Work around kubectl bug where drive letters are interpreted as pod names by kubectl cp -// Might solve by changing C: to \\localhost\C$\ +const tempdir = mkdtempSync(path.join(os.tmpdir(), "lfbackup-")) + // Work around kubectl bug where Windows drive letters are interpreted as pod names by kubectl cp + .replace(/^C:\\/, "\\\\localhost\\C$\\"); let portForwardProcess; let localConn; let remoteConn;