From 54d29c424116bafeda5a85de71444b531d817d17 Mon Sep 17 00:00:00 2001 From: Michal Piechowiak Date: Tue, 15 Feb 2022 16:32:07 +0100 Subject: [PATCH] chore(gatsby): upgrade from lmdb-store to lmdb (#34576) * optimistic bump * .clear -> clearSync * tmp: patch lmdb so we can bundle it for engines * adjust cache-resilience to import from lmdb * bump lmdb to latest * use clearKeptObjects() * fix mocking os.platform() in tests causing loading of wrong lmdb binary * ensure single instance of lmdb in a process - for gatsby serve case mostly * add comment about lmdb webpack loader patch --- .../cache-resilience/gatsby-node.js | 2 +- packages/gatsby/package.json | 2 +- .../src/commands/__tests__/build-utils.ts | 11 ++--- packages/gatsby/src/commands/build-utils.ts | 4 +- .../gatsby/src/datastore/common/iterable.ts | 22 ++++++++- .../src/datastore/lmdb/lmdb-datastore.ts | 38 ++++++++++++--- .../src/datastore/lmdb/updates/nodes.ts | 2 +- packages/gatsby/src/datastore/types.ts | 2 +- .../schema/graphql-engine/bundle-webpack.ts | 15 ++++++ .../graphql-engine/lmdb-bundling-patch.ts | 46 +++++++++++++++++++ packages/gatsby/src/utils/cache-lmdb.ts | 2 +- yarn.lock | 39 ++++++++-------- 12 files changed, 143 insertions(+), 42 deletions(-) create mode 100644 packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts diff --git a/integration-tests/cache-resilience/gatsby-node.js b/integration-tests/cache-resilience/gatsby-node.js index 21ab949eb533a..3847cc1170211 100644 --- a/integration-tests/cache-resilience/gatsby-node.js +++ b/integration-tests/cache-resilience/gatsby-node.js @@ -3,7 +3,7 @@ const v8 = require(`v8`) const glob = require(`glob`) const path = require(`path`) const _ = require(`lodash`) -const { open } = require(`lmdb-store`) +const { open } = require(`lmdb`) const { saveState } = require(`gatsby/dist/redux/save-state`) diff --git a/packages/gatsby/package.json b/packages/gatsby/package.json index 916887054e88f..2c8e05a459844 100644 --- a/packages/gatsby/package.json +++ b/packages/gatsby/package.json @@ -103,7 +103,7 @@ "joi": "^17.4.2", "json-loader": "^0.5.7", "latest-version": "5.1.0", - "lmdb-store": "^1.6.11", + "lmdb": "2.2.1", "lodash": "^4.17.21", "md5-file": "^5.0.0", "meant": "^1.0.3", diff --git a/packages/gatsby/src/commands/__tests__/build-utils.ts b/packages/gatsby/src/commands/__tests__/build-utils.ts index 42b825c22640b..a1f55b7694cf2 100644 --- a/packages/gatsby/src/commands/__tests__/build-utils.ts +++ b/packages/gatsby/src/commands/__tests__/build-utils.ts @@ -5,7 +5,6 @@ import { IStaticQueryResultState, } from "../../redux/types" -const platformSpy = jest.spyOn(require(`os`), `platform`) interface IMinimalStateSliceForTest { html: IGatsbyState["html"] pages: IGatsbyState["pages"] @@ -245,16 +244,13 @@ describe(`calcDirtyHtmlFiles`, () => { }) describe(`onCreatePage + deletePage + createPage that change path casing of a page`, () => { - afterAll(() => { - platformSpy.mockRestore() - }) - it(`linux (case sensitive file system)`, () => { let isolatedCalcDirtyHtmlFiles jest.isolateModules(() => { - platformSpy.mockImplementation(() => `linux`) + process.env.TEST_FORCE_CASE_FS = `SENSITIVE` isolatedCalcDirtyHtmlFiles = require(`../build-utils`).calcDirtyHtmlFiles + delete process.env.TEST_FORCE_CASE_FS }) const state = generateStateToTestHelper({ @@ -280,9 +276,10 @@ describe(`calcDirtyHtmlFiles`, () => { it(`windows / mac (case insensitive file system)`, () => { let isolatedCalcDirtyHtmlFiles jest.isolateModules(() => { - platformSpy.mockImplementation(() => `win32`) + process.env.TEST_FORCE_CASE_FS = `INSENSITIVE` isolatedCalcDirtyHtmlFiles = require(`../build-utils`).calcDirtyHtmlFiles + delete process.env.TEST_FORCE_CASE_FS }) const state = generateStateToTestHelper({ diff --git a/packages/gatsby/src/commands/build-utils.ts b/packages/gatsby/src/commands/build-utils.ts index ece72c8746c63..d018a7bb0fcbc 100644 --- a/packages/gatsby/src/commands/build-utils.ts +++ b/packages/gatsby/src/commands/build-utils.ts @@ -65,7 +65,9 @@ export const removePageFiles = async ( }) } -const FSisCaseInsensitive = platform() === `win32` || platform() === `darwin` +const FSisCaseInsensitive = process.env.TEST_FORCE_CASE_FS + ? process.env.TEST_FORCE_CASE_FS === `INSENSITIVE` + : platform() === `win32` || platform() === `darwin` function normalizePagePath(path: string): string { if (path === `/`) { return `/` diff --git a/packages/gatsby/src/datastore/common/iterable.ts b/packages/gatsby/src/datastore/common/iterable.ts index 2948c6378e172..eb43e1ca4be41 100644 --- a/packages/gatsby/src/datastore/common/iterable.ts +++ b/packages/gatsby/src/datastore/common/iterable.ts @@ -1,3 +1,9 @@ +declare module "lmdb" { + // currently lmdb doesn't have typings for this export + export function clearKeptObjects(): void +} + +import { clearKeptObjects } from "lmdb" /** * Wrapper for any iterable providing chainable interface and convenience methods * similar to array. @@ -10,10 +16,22 @@ export class GatsbyIterable { constructor(private source: Iterable | (() => Iterable)) {} - [Symbol.iterator](): Iterator { + *[Symbol.iterator](): Iterator { const source = typeof this.source === `function` ? this.source() : this.source - return source[Symbol.iterator]() + + let i = 0 + for (const val of source) { + yield val + + // clearKeptObjects just make it possible for WeakRefs used in any way during current + // sync execution tick to be garbage collected. It doesn't force GC, just remove + // internal strong references in V8. + // see https://github.com/kriszyp/weak-lru-cache/issues/4 + if (++i % 100 === 0) { + clearKeptObjects() + } + } } concat(other: Iterable): GatsbyIterable { diff --git a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts index 6d216352222ec..81d1a2bfe251c 100644 --- a/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts +++ b/packages/gatsby/src/datastore/lmdb/lmdb-datastore.ts @@ -1,4 +1,4 @@ -import { RootDatabase, open, ArrayLikeIterable } from "lmdb-store" +import { RootDatabase, open, ArrayLikeIterable } from "lmdb" // import { performance } from "perf_hooks" import { ActionsUnion, IGatsbyNode } from "../../redux/types" import { updateNodes } from "./updates/nodes" @@ -61,8 +61,31 @@ function getRootDb(): RootDatabase { return rootDb } +/* eslint-disable @typescript-eslint/no-namespace */ +declare global { + namespace NodeJS { + // eslint-disable-next-line @typescript-eslint/naming-convention + interface Global { + __GATSBY_OPEN_LMDBS?: Map + } + } +} + function getDatabases(): ILmdbDatabases { if (!databases) { + // __GATSBY_OPEN_LMDBS tracks if we already opened given db in this process + // In `gatsby serve` case we might try to open it twice - once for engines + // and second to get access to `SitePage` nodes (to power trailing slashes + // redirect middleware). This ensure there is single instance within a process. + // Using more instances seems to cause weird random errors. + if (!globalThis.__GATSBY_OPEN_LMDBS) { + globalThis.__GATSBY_OPEN_LMDBS = new Map() + } + databases = globalThis.__GATSBY_OPEN_LMDBS.get(fullDbPath) + if (databases) { + return databases + } + const rootDb = getRootDb() databases = { nodes: rootDb.openDB({ @@ -86,6 +109,7 @@ function getDatabases(): ILmdbDatabases { // dupSort: true }), } + globalThis.__GATSBY_OPEN_LMDBS.set(fullDbPath, databases) } return databases } @@ -184,10 +208,10 @@ function updateDataStore(action: ActionsUnion): void { const dbs = getDatabases() // Force sync commit dbs.nodes.transactionSync(() => { - dbs.nodes.clear() - dbs.nodesByType.clear() - dbs.metadata.clear() - dbs.indexes.clear() + dbs.nodes.clearSync() + dbs.nodesByType.clearSync() + dbs.metadata.clearSync() + dbs.indexes.clearSync() }) break } @@ -229,8 +253,8 @@ function updateDataStore(action: ActionsUnion): void { function clearIndexes(): void { const dbs = getDatabases() dbs.nodes.transactionSync(() => { - dbs.metadata.clear() - dbs.indexes.clear() + dbs.metadata.clearSync() + dbs.indexes.clearSync() }) } diff --git a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts index d2426e3959ef4..1622548020f27 100644 --- a/packages/gatsby/src/datastore/lmdb/updates/nodes.ts +++ b/packages/gatsby/src/datastore/lmdb/updates/nodes.ts @@ -1,5 +1,5 @@ import { ActionsUnion, IGatsbyNode } from "../../../redux/types" -import { Database } from "lmdb-store" +import type { Database } from "lmdb" type NodeId = string diff --git a/packages/gatsby/src/datastore/types.ts b/packages/gatsby/src/datastore/types.ts index 80b6e8d7bdb24..58a1c0e3653e2 100644 --- a/packages/gatsby/src/datastore/types.ts +++ b/packages/gatsby/src/datastore/types.ts @@ -1,4 +1,4 @@ -import { Database } from "lmdb-store" +import { Database } from "lmdb" import { IGatsbyNode } from "../redux/types" import { GatsbyGraphQLType } from "../../index" import { IInputQuery } from "./common/query" diff --git a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts index 8a5ffedb83a1c..b7248a3bbec30 100644 --- a/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts +++ b/packages/gatsby/src/schema/graphql-engine/bundle-webpack.ts @@ -70,6 +70,21 @@ export async function createGraphqlEngineBundle( ], module: { rules: [ + { + test: require.resolve(`lmdb`), + parser: { amd: false }, + use: [ + { + loader: require.resolve(`@vercel/webpack-asset-relocator-loader`), + options: { + outputAssetBase: `assets`, + }, + }, + { + loader: require.resolve(`./lmdb-bundling-patch`), + }, + ], + }, { test: /\.m?js$/, type: `javascript/auto`, diff --git a/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts new file mode 100644 index 0000000000000..06241aa2b68d0 --- /dev/null +++ b/packages/gatsby/src/schema/graphql-engine/lmdb-bundling-patch.ts @@ -0,0 +1,46 @@ +import { createRequireFromPath } from "gatsby-core-utils" + +// This is hacky webpack loader that does string replacements to +// allow lmdb@2 to be bundled by webpack for engines. +// Currently `@vercel/webpack-asset-relocator-loader doesn't handle +// the way lmdb is loading binaries and dictionary file +// (can't statically analyze it). So we perform few localized changes +// and we replace dynamic values with hardcoded ones to allow +// asset-relocator to pick those assets up and handle them. +// +// Because lmdb code can diverge, we also pin version in gatsby +// dependencies and will have manually bump it (with renovate most likely). +// +// To solve this upstream few things would need to change: +// - https://github.com/DoctorEvidence/lmdb-js/blob/544b3fda402f24a70a0e946921e4c9134c5adf85/node-index.js#L14-L16 +// - https://github.com/DoctorEvidence/lmdb-js/blob/544b3fda402f24a70a0e946921e4c9134c5adf85/open.js#L77 +// Reliance on `import.meta.url` + usage of `.replace` is what seems to cause problems currently. + +export default function (source: string): string { + let lmdbBinaryLocation + try { + const lmdbRequire = createRequireFromPath(require.resolve(`lmdb`)) + const nodeGypBuild = lmdbRequire(`node-gyp-build`) + const path = require(`path`) + + lmdbBinaryLocation = nodeGypBuild.path( + path.dirname(require.resolve(`lmdb`)).replace(`/dist`, ``) + ) + } catch (e) { + return source + } + + return source + .replace( + `require$1('node-gyp-build')(dirName)`, + `require(${JSON.stringify(lmdbBinaryLocation)})` + ) + .replace( + `require$2.resolve('./dict/dict.txt')`, + `require.resolve('../dict/dict.txt')` + ) + .replace( + /fs\.readFileSync\(new URL\('\.\/dict\/dict\.txt',\s*\(typeof\s*document\s*===\s*'undefined'\s*\?\s*new\s*\(require\('u'\s*\+\s*'rl'\)\.URL\)\s*\('file:'\s*\+\s*__filename\).href\s*:\s*\(document\.currentScript\s*&&\s*document\.currentScript\.src\s*\|\|\s*new URL\('index\.cjs',\s*document\.baseURI\)\.href\)\)\.replace\(\/dist\[\\\\\\\/\]index\.cjs\$\/,\s*''\)\)\)/g, + `fs.readFileSync(require.resolve('../dict/dict.txt'))` + ) +} diff --git a/packages/gatsby/src/utils/cache-lmdb.ts b/packages/gatsby/src/utils/cache-lmdb.ts index 08ab4abdd8a34..98e94f45c1dc3 100644 --- a/packages/gatsby/src/utils/cache-lmdb.ts +++ b/packages/gatsby/src/utils/cache-lmdb.ts @@ -1,4 +1,4 @@ -import { open, RootDatabase, Database, DatabaseOptions } from "lmdb-store" +import { open, RootDatabase, Database, DatabaseOptions } from "lmdb" import fs from "fs-extra" import path from "path" diff --git a/yarn.lock b/yarn.lock index 8bb59c4c6df76..5116adc393f22 100644 --- a/yarn.lock +++ b/yarn.lock @@ -14583,17 +14583,16 @@ livereload-js@^2.3.0: version "2.3.0" resolved "https://registry.yarnpkg.com/livereload-js/-/livereload-js-2.3.0.tgz#c3ab22e8aaf5bf3505d80d098cbad67726548c9a" -lmdb-store@^1.6.11: - version "1.6.11" - resolved "https://registry.yarnpkg.com/lmdb-store/-/lmdb-store-1.6.11.tgz#801da597af8c7a01c81f87d5cc7a7497e381236d" - integrity sha512-hIvoGmHGsFhb2VRCmfhodA/837ULtJBwRHSHKIzhMB7WtPH6BRLPsvXp1MwD3avqGzuZfMyZDUp3tccLvr721Q== +lmdb@2.2.1: + version "2.2.1" + resolved "https://registry.yarnpkg.com/lmdb/-/lmdb-2.2.1.tgz#b7fd22ed2268ab74aa71108b793678314a7b94bb" + integrity sha512-tUlIjyJvbd4mqdotI9Xe+3PZt/jqPx70VKFDrKMYu09MtBWOT3y2PbuTajX+bJFDjbgLkQC0cTx2n6dithp/zQ== dependencies: + msgpackr "^1.5.4" nan "^2.14.2" node-gyp-build "^4.2.3" - ordered-binary "^1.0.0" - weak-lru-cache "^1.0.0" - optionalDependencies: - msgpackr "^1.4.7" + ordered-binary "^1.2.4" + weak-lru-cache "^1.2.2" load-bmfont@^1.3.1, load-bmfont@^1.4.0: version "1.4.0" @@ -16222,10 +16221,10 @@ msgpackr-extract@^1.0.14: nan "^2.14.2" node-gyp-build "^4.2.3" -msgpackr@^1.4.7: - version "1.4.7" - resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.4.7.tgz#d802ade841e7d2e873000b491cdda6574a3d5748" - integrity sha512-bhC8Ed1au3L3oHaR/fe4lk4w7PLGFcWQ5XY/Tk9N6tzDRz8YndjCG68TD8zcvYZoxNtw767eF/7VpaTpU9kf9w== +msgpackr@^1.5.4: + version "1.5.4" + resolved "https://registry.yarnpkg.com/msgpackr/-/msgpackr-1.5.4.tgz#2b6ea6cb7d79c0ad98fc76c68163c48eda50cf0d" + integrity sha512-Z7w5Jg+2Q9z9gJxeM68d7tSuWZZGnFIRhZnyqcZCa/1dKkhOCNvR1TUV3zzJ3+vj78vlwKRzUgVDlW4jiSOeDA== optionalDependencies: msgpackr-extract "^1.0.14" @@ -17121,10 +17120,10 @@ ora@^5.4.1: strip-ansi "^6.0.0" wcwidth "^1.0.1" -ordered-binary@^1.0.0: - version "1.1.3" - resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.1.3.tgz#11dbc0a4cb7f8248183b9845e031b443be82571e" - integrity sha512-tDTls+KllrZKJrqRXUYJtIcWIyoQycP7cVN7kzNNnhHKF2bMKHflcAQK+pF2Eb1iVaQodHxqZQr0yv4HWLGBhQ== +ordered-binary@^1.2.4: + version "1.2.4" + resolved "https://registry.yarnpkg.com/ordered-binary/-/ordered-binary-1.2.4.tgz#51d3a03af078a0bdba6c7bc8f4fedd1f5d45d83e" + integrity sha512-A/csN0d3n+igxBPfUrjbV5GC69LWj2pjZzAAeeHXLukQ4+fytfP4T1Lg0ju7MSPSwq7KtHkGaiwO8URZN5IpLg== ordered-read-streams@^1.0.0: version "1.0.1" @@ -24527,10 +24526,10 @@ wcwidth@^1.0.0, wcwidth@^1.0.1: dependencies: defaults "^1.0.3" -weak-lru-cache@^1.0.0: - version "1.1.2" - resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.1.2.tgz#a909a97372aabdfbfe3eb33580af255b3b198834" - integrity sha512-Bi5ae8Bev3YulgtLTafpmHmvl3vGbanRkv+qqA2AX8c3qj/MUdvSuaHq7ukDYBcMDINIaRPTPEkXSNCqqWivuA== +weak-lru-cache@^1.2.2: + version "1.2.2" + resolved "https://registry.yarnpkg.com/weak-lru-cache/-/weak-lru-cache-1.2.2.tgz#fdbb6741f36bae9540d12f480ce8254060dccd19" + integrity sha512-DEAoo25RfSYMuTGc9vPJzZcZullwIqRDSI9LOy+fkCJPi6hykCnfKaXTuPBDuXAUcqHXyOgFtHNp/kB2FjYHbw== web-namespaces@^1.0.0: version "1.1.2"