gatsbyjs · imjoshin · Feb 16, 2022 · Feb 11, 2022 · Feb 13, 2022 · Feb 14, 2022
diff --git a/benchmarks/memory/.gitignore b/benchmarks/memory/.gitignore
@@ -0,0 +1,2 @@
+output
+.docker.memusage
diff --git a/benchmarks/memory/Dockerfile b/benchmarks/memory/Dockerfile
@@ -1,12 +1,26 @@
 FROM node:14-buster
+ARG jemalloc
 ENV NODE_ENV=production
 ENV CI=1
 ENV GATSBY_CPU_COUNT=4
 RUN apt-get update -y && apt-get upgrade -y && apt-get install git curl npm -y
 RUN npm i -g gatsby-cli gatsby-dev-cli
-WORKDIR /usr/src/app
+
+# set heap to 16gb just to catch all test cases
+ENV NODE_OPTIONS="--max-old-space-size=16368"
+
 RUN echo "\n\necho \"Welcome to the Gatsby Memory benchmark container!\\n  - /usr/src/gatsby : Your local gatsby repo\\n  - /usr/src/app : The memory benchmark gatsby site\\n\"" > /root/.bashrc
 
+RUN if [ "$jemalloc" = "1" ]; then \
+  echo "Using jemalloc for memory allocation" && \
+  apt-get update && apt-get install -y libjemalloc-dev=5.1.0-3 && \
+  echo "/usr/lib/x86_64-linux-gnu/libjemalloc.so" >> /etc/ld.so.preload && \
+  echo "\n\necho \"This container is using jemelloc.\\n\"" >> /root/.bashrc; \
+fi
+
+
+WORKDIR /usr/src/app
+
 # set up gatsby-dev
 RUN gatsby-dev --set-path-to-repo /usr/src/gatsby
 

diff --git a/benchmarks/memory/README.md b/benchmarks/memory/README.md
@@ -12,15 +12,62 @@ Within the container, two points to your local filesystem are mounted:
 - /usr/src/gatsby : Your local gatsby repo
 - /usr/src/site : The memory benchmark gatsby site
 
+If you'd like to configure `jemalloc` to run within the container, set the `JEMALLOC=1` env var when building the docker container.
+
 ## Commands
 
+### Tests
+
+#### yarn test --memory X --num-nodes Y --node-size Z
+
+Runs a test build within a docker container with the given memory allotment.
+Within our gatsby-node, we'll create X nodes with a string property of size Y.
+
+Example: running a build with 1000 nodes of 1mb each, in a docker container with 8gb of memory.
+
+```
+$ yarn test --memory 8g --num-nodes 500 --node-size 1m
+```
+
+#### yarn test-suite --name some-name --suite [incremental|exhaustive]
+
+Runs through test suites defined in `scripts/test-suite.js` and outputs results to `output/some-name`.
+Output includes a `results.csv` with a summary of all builds, as well as breakdowns for each memory configuration.
+
+##### incremental
+
+Incremental tests run builds with a `node-size` of 1m. For each memory allotment, it will start with 100
+nodes in the build and increment by 100 on each success. The test will stop when all builds in a given
+configuration fail.
+See `incrementalConfig` in `scripts/test-suite.js` to customize test sets.
+
+##### exhaustive
+
+Exhaustive tests are just that, exhaustive. It will measure the time/success of every combination given.
+See `exhaustiveConfig` in `scripts/test-suite.js` to customize test sets.
+
 ### Docker
 
 These commands are used for interfacing with docker and have built-in utilities for managing the docker container.
 
 #### yarn docker:build
 
 Builds the container used for testing.
+If you'd like to configure `jemalloc` to run within the container, set the `JEMALLOC=1` env var.
+
+Example:
+
+```
+$ JEMALLOC=1 yarn docker:build
+```
+
+#### yarn docker:remove
+
+Removes the docker image.
+
+#### yarn docker:rebuild
+
+Shorthand for remove + build.
 
 #### yarn docker:start
 
@@ -81,17 +128,9 @@ When starting working with this benchmark:
 
 - start `yarn watch` (possibly with `--scope`) in monorepo
 - start `gatsby-dev` outside of docker in benchmark directory (just like with regular site)
-- `yarn docker:connect` to get inside docker
-- `npm rebuild` to rebuild binaries inside docker
+- `yarn test --memory 8g --num-nodes 1000 --node-size 1m`
 
 And repeat as many times as you want:
 
 - make changes to `gatsby` source code as you normally would
-- run `yarn gatsby:build` inside docker
-
-## Testing
-
-TODO
-
-- How to configure memory limits
-- Where to look
+- run your `yarn test` command again
diff --git a/benchmarks/memory/gatsby-node.js b/benchmarks/memory/gatsby-node.js
@@ -1,8 +1,19 @@
 const { cpuCoreCount } = require(`gatsby-core-utils`)
 
-const NUM_NODES = parseInt(process.env.NUM_NODES || 300, 10)
+const NUM_KEYS_IN_LARGE_SIZE_OBJ = parseInt(process.env.BUILD_LARGE_OBJECT_COUNT || 1024, 10)
+const NUM_NODES = parseInt(process.env.BUILD_NUM_NODES || 300, 10)
+const LARGE_FIELD_SIZE_RAW = process.env.BUILD_STRING_NODE_SIZE || '1m'
+
+// convert raw size to number
+const regexpSize = /([0-9]+)([kmg])?/;
+const match = LARGE_FIELD_SIZE_RAW.match(regexpSize);
+const suffixSizes = ['k', 'm', 'g'];
+let bytesMultiplier = 1;
+if (match.length > 2 && suffixSizes.indexOf(match[2]) >= 0) {
+  bytesMultiplier = 2 ** ((suffixSizes.indexOf(match[2]) + 1) * 10)
+}
+const LARGE_FIELD_SIZE = parseInt(match[1], 10) * bytesMultiplier;
 
-const NUM_KEYS_IN_LARGE_SIZE_OBJ = 1024
 
 exports.sourceNodes = async ({ actions, reporter }) => {
   const contentDigest = Date.now().toString() // make each sourcing mark everything as dirty
@@ -25,7 +36,7 @@ exports.sourceNodes = async ({ actions, reporter }) => {
       number2: NUM_NODES - i,
       number3: i % 20,
       largeSizeObj,
-      largeSizeString: `x`.repeat(1024 * 1024),
+      largeSizeString: `x`.repeat(LARGE_FIELD_SIZE),
       internal: {
         contentDigest,
         type: `Test`,

diff --git a/benchmarks/memory/package.json b/benchmarks/memory/package.json
@@ -10,12 +10,16 @@
     "gatsby:develop": "NODE_ENV=development yarn gatsby develop -H 0.0.0.0 -p 9000",
     "gatsby:build:debug": "node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby build",
     "gatsby:develop:debug": "NODE_ENV=development node --nolazy --inspect-brk=0.0.0.0:9229 node_modules/.bin/gatsby develop -H 0.0.0.0 -p 9000",
-    "docker:build": "docker build -t gatsby-memory .",
+    "docker:build": "docker build -t gatsby-memory . --build-arg jemalloc=$JEMALLOC",
+    "docker:remove": "docker image rm -f gatsby-memory",
+    "docker:rebuild": "yarn docker:stop; yarn docker:remove && yarn docker:build",
     "docker:start": "./scripts/docker-start",
     "docker:connect": "./scripts/docker-connect",
     "docker:start-and-connect": "./scripts/docker-start && sleep 1 && ./scripts/docker-connect",
     "docker:stop": "./scripts/docker-stop",
-    "docker:stats": "./scripts/docker-stats"
+    "docker:stats": "./scripts/docker-stats",
+    "test": "node scripts/test.js",
+    "test-suite": "node scripts/test-suite.js"
   },
   "repository": {
     "type": "git",

diff --git a/benchmarks/memory/scripts/docker-start b/benchmarks/memory/scripts/docker-start
@@ -5,16 +5,21 @@ if [ -n "$DOCKER_ID" ]; then
   return 1
 fi
 
+MEMORY_LIMIT="${DOCKER_MEMORY_LIMIT:-2g}"
+
 DOCKER_ID=$(\
   docker run -td \
   --mount type=bind,source="$(pwd)/../..",target=/usr/src/gatsby \
   --mount type=bind,source="$(pwd)",target=/usr/src/app \
   --publish 9229:9229 \
   --publish 9000:9000 \
-  --memory="2g" \
-  --memory-swap="2g" \
+  --memory="${MEMORY_LIMIT}" \
+  --memory-swap="${MEMORY_LIMIT}" \
   gatsby-memory \
   | head -c 12 \
 )
 
-echo "\nStarted container id ${DOCKER_ID}! Run \`yarn docker:connect\` to connect to the container.\n"
+sleep 1
+docker exec $DOCKER_ID bash -c "/usr/src/app/scripts/docker-write-memory &"
+
+echo "\nStarted container id ${DOCKER_ID} with ${MEMORY_LIMIT} of memory! Run \`yarn docker:connect\` to connect to the container.\n"
diff --git a/benchmarks/memory/scripts/docker-write-memory b/benchmarks/memory/scripts/docker-write-memory
@@ -0,0 +1,13 @@
+while true; do
+  PROCESS="node"
+
+  # find all node processes
+  PROCESS_USAGES=$(ps -eo rss,pid,euser,args:100 --sort %mem | grep -v grep | grep -i "${PROCESS}" | awk '{print $1}')
+
+  # sum the usage
+  SUM_USAGE=$(echo "$PROCESS_USAGES" | awk '{s+=$1} END {printf "%.0f\n", s}')
+
+  # write to file
+  echo -e "$SUM_USAGE" > /usr/src/app/.docker.memusage
+  sleep .25
+done