Skip to content

Commit

Permalink
mdn: skip incorrectly parsed url
Browse files Browse the repository at this point in the history
  • Loading branch information
myfreeer committed Dec 5, 2020
1 parent 8657e2b commit b186d7a
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
18 changes: 15 additions & 3 deletions src/mdn/process-url/skip-process.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import {skipExternal as skipExternalLogger} from 'website-scrap-engine/lib/logger/logger';
import {
skipExternal as skipExternalLogger,
error as errorLogger
} from 'website-scrap-engine/lib/logger/logger';
import URI from 'urijs';
import { downloadableHosts } from './consts';
import {Resource} from 'website-scrap-engine/lib/resource';
Expand Down Expand Up @@ -42,9 +45,18 @@ export const skipProcess = (
}
const uri = URI(url), host = uri.host();
if (host && !downloadableHosts[host]) {
skipExternalLogger.debug('skipped external link', host, url, parent && parent.url);
skipExternalLogger.debug('skipped external link', host, url, parent?.url);
return;
}
// incorrectly parsed url
// localhost:3000
// from https://developer.mozilla.org/zh-CN/docs/Learn/Tools_and_testing/
// Client-side_JavaScript_frameworks/React_getting_started
if (uri.is('absolute') && !host) {
errorLogger.info('skipped incorrectly parsed url', url, parent?.url);
return;
}

const path = uri.path();
if (path.startsWith('/presentations/') ||
// very large file
Expand All @@ -54,7 +66,7 @@ export const skipProcess = (
path.startsWith('/files/5243/IconsCommunications_20130401.psd') ||
path.startsWith('/files/5245/IconsSettings_20130415.psd') ||
path.startsWith('/files/5247/IconsPrimaryAction_20130501.psd')) {
skipExternalLogger.debug('skipped link to large file', url, parent && parent.url);
skipExternalLogger.debug('skipped link to large file', url, parent?.url);
return;
}
return url;
Expand Down
5 changes: 5 additions & 0 deletions test/mdn/process-url/skip-process.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,9 @@ describe('skip-process', function () {
expect(skipProcess('http://www.mozilla.org/favicon.ico', null, null))
.toBe('https://developer.mozilla.org/static/img/favicon32.png');
});

test('skip incorrectly-parsed url', () => {
// https://developer.mozilla.org/zh-CN/docs/Learn/Tools_and_testing/Client-side_JavaScript_frameworks/React_getting_started
exceptSkipped('localhost:3000');
});
});

0 comments on commit b186d7a

Please sign in to comment.