diff --git a/mirrorzim.sh b/mirrorzim.sh index 78a2a1f..a4dae60 100755 --- a/mirrorzim.sh +++ b/mirrorzim.sh @@ -82,16 +82,20 @@ if [ -z ${MAIN_PAGE_VERSION+x} ]; then MAIN_PAGE_VERSION="" fi + printf "\nDownload the zim file...\n" ZIM_FILE_SOURCE_URL="$(./tools/getzim.sh download $WIKI_TYPE $WIKI_TYPE $LANGUAGE_CODE all maxi latest | grep 'URL:' | cut -d' ' -f3)" ZIM_FILE=$(echo $ZIM_FILE_SOURCE_URL | rev | cut -d'/' -f1 | rev) TMP_DIRECTORY="./tmp/$(echo $ZIM_FILE | cut -d'.' -f1)" -printf "\nRemove tmp directory $TMP_DIRECTORY before run ..." -rm -rf $TMP_DIRECTORY +# Note: successful zimdump ends with creation of $TMP_DIRECTORY/zimdump_version +# We use it as a hint if tmpdir should be purged or not + +printf "\nRemove any partial tmp directory $TMP_DIRECTORY before run ..." +test -e $TMP_DIRECTORY/zimdump_version || rm -rf $TMP_DIRECTORY -printf "\nUnpack the zim file into $TMP_DIRECTORY...\n" -zimdump dump ./snapshots/$ZIM_FILE --dir $TMP_DIRECTORY +printf "\nUnpack the zim file into $TMP_DIRECTORY if not there already...\n" +test -e $TMP_DIRECTORY/zimdump_version || (zimdump dump ./snapshots/$ZIM_FILE --dir $TMP_DIRECTORY && zimdump --version > $TMP_DIRECTORY/zimdump_version) # Find the main page of ZIM ZIM_FILE_MAIN_PAGE=$(zimdump info ./snapshots/$ZIM_FILE | grep -oP 'main page: A/\K\S+') @@ -108,6 +112,9 @@ node ./bin/run $TMP_DIRECTORY \ ${HOSTING_IPNS_HASH:+--hostingipnshash=$HOSTING_IPNS_HASH} \ ${MAIN_PAGE_VERSION:+--mainpageversion=$MAIN_PAGE_VERSION} +printf "\n-------------------------\n" +printf "\nIPFS_PATH=$IPFS_PATH\n" + printf "\nAdding the processed tmp directory to IPFS\n(this part may take long time on a slow disk):\n" CID=$(ipfs add -r --cid-version 1 --pin=false --offline -Qp $TMP_DIRECTORY) MFS_DIR="/${ZIM_FILE}__$(date +%F_%T)" diff --git a/src/site-transforms.ts b/src/site-transforms.ts index cd95807..07bbafa 100644 --- a/src/site-transforms.ts +++ b/src/site-transforms.ts @@ -9,6 +9,8 @@ import { readdirSync, readFileSync, renameSync, + closeSync, + openSync, unlinkSync, writeFileSync } from 'fs' @@ -90,16 +92,23 @@ export const fixRedirects = async ({ unpackedZimDir, wikiFolder }: Directories) => { + const done = `${unpackedZimDir}/redirects_fixed` + if (existsSync(done)) { + return + } + cli.action.start(' Fixing redirects ') const fixupLog = `${unpackedZimDir}_redirect-fixups.log` if (existsSync(fixupLog)) { unlinkSync(fixupLog) } + const output = process.env.DEBUG ? `>> ${fixupLog}` : '> /dev/null' const util = require('util') const exec = util.promisify(require('child_process').exec) // redirect files are smaller than 1k so we can skip bigger ones, making the performance acceptable - const findRedirects = String.raw`find ${wikiFolder} -type f -size -800c -exec fgrep -l "0;url=A/" {} + -exec sed -i "s|0;url=A/|0;url=|" {} >> ${fixupLog} +` + const findRedirects = String.raw`find ${wikiFolder} -type f -size -800c -exec fgrep -l "0;url=A/" {} + -exec sed -i "s|0;url=A/|0;url=|" {} + ${output} || true` const { stdout, stderr } = await exec(findRedirects, {env: {'LC_ALL': 'C'}}) + if (!stderr) closeSync(openSync(done, 'w')) cli.action.stop() if (stdout) console.log('redirect fix stdout:', stdout) if (stderr) console.error('redirect fix stderr:', stderr) @@ -127,6 +136,10 @@ export const insertIndexRedirect = (options: Options) => { const indexPath = join(options.unpackedZimDir, 'index.html') const wikiIndexPath = join(options.unpackedZimDir, 'wiki', 'index.html') + if (existsSync(indexPath)) { + unlinkSync(indexPath) + } + writeFileSync( indexPath, template({