Skip to content

Commit

Permalink
mdn: sitemap might be decompressed
Browse files Browse the repository at this point in the history
  • Loading branch information
myfreeer committed Apr 9, 2023
1 parent 12f34b0 commit 4be8d54
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions src/mdn/decompress-sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type {DownloadResource} from 'website-scrap-engine/lib/life-cycle/types';
import {ResourceType} from 'website-scrap-engine/lib/resource';
import {promisify} from 'util';
import {gunzip, InputType} from 'zlib';

const gunzipAsync = promisify(gunzip);

/**
Expand All @@ -28,6 +29,27 @@ export const decompressSitemap = async (
body = Buffer.from(res.body.buffer,
res.body.byteOffset, res.body.byteLength);
}
let isGzip = true;
if (body instanceof ArrayBuffer) {
body = Buffer.from(body);
}
if (Buffer.isBuffer(body)) {
isGzip = body.readUint8(0) === 0x1f &&
body.readUint8(1) === 0x8b;
} else if (body instanceof Uint8Array) {
isGzip = body[0] === 0x1f &&
body[1] === 0x8b;
} else if (typeof body === 'string') {
isGzip = body.charCodeAt(0) === 0x1f &&
body.charCodeAt(1) === 0x8b;
}
if (!isGzip) {
// already decompressed
// make it xml
res.savePath =
res.savePath.replace(/.xml.gz$/, '.xml');
return res;
}
const decompressedBody = await gunzipAsync(body);
if (decompressedBody) {
res.body = decompressedBody;
Expand Down

0 comments on commit 4be8d54

Please sign in to comment.