Skip to content

Commit

Permalink
fix: mirrorzim.sh idempotence
Browse files Browse the repository at this point in the history
This enables us to reuse zimdup dump and fixRedirects stages if they
finished successfully, and run other ones more than once without impact
on the output.
  • Loading branch information
lidel committed Feb 16, 2021
1 parent 0739d06 commit 84a70b9
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
15 changes: 11 additions & 4 deletions mirrorzim.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,20 @@ if [ -z ${MAIN_PAGE_VERSION+x} ]; then
MAIN_PAGE_VERSION=""
fi


printf "\nDownload the zim file...\n"
ZIM_FILE_SOURCE_URL="$(./tools/getzim.sh download $WIKI_TYPE $WIKI_TYPE $LANGUAGE_CODE all maxi latest | grep 'URL:' | cut -d' ' -f3)"
ZIM_FILE=$(echo $ZIM_FILE_SOURCE_URL | rev | cut -d'/' -f1 | rev)
TMP_DIRECTORY="./tmp/$(echo $ZIM_FILE | cut -d'.' -f1)"

printf "\nRemove tmp directory $TMP_DIRECTORY before run ..."
rm -rf $TMP_DIRECTORY
# Note: successful zimdump ends with creation of $TMP_DIRECTORY/zimdump_version
# We use it as a hint if tmpdir should be purged or not

printf "\nRemove any partial tmp directory $TMP_DIRECTORY before run ..."
test -e $TMP_DIRECTORY/zimdump_version || rm -rf $TMP_DIRECTORY

printf "\nUnpack the zim file into $TMP_DIRECTORY...\n"
zimdump dump ./snapshots/$ZIM_FILE --dir $TMP_DIRECTORY
printf "\nUnpack the zim file into $TMP_DIRECTORY if not there already...\n"
test -e $TMP_DIRECTORY/zimdump_version || (zimdump dump ./snapshots/$ZIM_FILE --dir $TMP_DIRECTORY && zimdump --version > $TMP_DIRECTORY/zimdump_version)

# Find the main page of ZIM
ZIM_FILE_MAIN_PAGE=$(zimdump info ./snapshots/$ZIM_FILE | grep -oP 'main page: A/\K\S+')
Expand All @@ -108,6 +112,9 @@ node ./bin/run $TMP_DIRECTORY \
${HOSTING_IPNS_HASH:+--hostingipnshash=$HOSTING_IPNS_HASH} \
${MAIN_PAGE_VERSION:+--mainpageversion=$MAIN_PAGE_VERSION}

printf "\n-------------------------\n"
printf "\nIPFS_PATH=$IPFS_PATH\n"

printf "\nAdding the processed tmp directory to IPFS\n(this part may take long time on a slow disk):\n"
CID=$(ipfs add -r --cid-version 1 --pin=false --offline -Qp $TMP_DIRECTORY)
MFS_DIR="/${ZIM_FILE}__$(date +%F_%T)"
Expand Down
15 changes: 14 additions & 1 deletion src/site-transforms.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ import {
readdirSync,
readFileSync,
renameSync,
closeSync,
openSync,
unlinkSync,
writeFileSync
} from 'fs'
Expand Down Expand Up @@ -90,16 +92,23 @@ export const fixRedirects = async ({
unpackedZimDir,
wikiFolder
}: Directories) => {
const done = `${unpackedZimDir}/redirects_fixed`
if (existsSync(done)) {
return
}

cli.action.start(' Fixing redirects ')
const fixupLog = `${unpackedZimDir}_redirect-fixups.log`
if (existsSync(fixupLog)) {
unlinkSync(fixupLog)
}
const output = process.env.DEBUG ? `>> ${fixupLog}` : '> /dev/null'
const util = require('util')
const exec = util.promisify(require('child_process').exec)
// redirect files are smaller than 1k so we can skip bigger ones, making the performance acceptable
const findRedirects = String.raw`find ${wikiFolder} -type f -size -800c -exec fgrep -l "0;url=A/" {} + -exec sed -i "s|0;url=A/|0;url=|" {} >> ${fixupLog} +`
const findRedirects = String.raw`find ${wikiFolder} -type f -size -800c -exec fgrep -l "0;url=A/" {} + -exec sed -i "s|0;url=A/|0;url=|" {} + ${output} || true`
const { stdout, stderr } = await exec(findRedirects, {env: {'LC_ALL': 'C'}})
if (!stderr) closeSync(openSync(done, 'w'))
cli.action.stop()
if (stdout) console.log('redirect fix stdout:', stdout)
if (stderr) console.error('redirect fix stderr:', stderr)
Expand Down Expand Up @@ -127,6 +136,10 @@ export const insertIndexRedirect = (options: Options) => {
const indexPath = join(options.unpackedZimDir, 'index.html')
const wikiIndexPath = join(options.unpackedZimDir, 'wiki', 'index.html')

if (existsSync(indexPath)) {
unlinkSync(indexPath)
}

writeFileSync(
indexPath,
template({
Expand Down

0 comments on commit 84a70b9

Please sign in to comment.