Skip to content

Commit

Permalink
Merge pull request #28 from davisjam/NPMClient-PersistentCache
Browse files Browse the repository at this point in the history
Npm client persistent cache
  • Loading branch information
davisjam authored Apr 5, 2018
2 parents 0ca8bab + 97c761a commit 9342146
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 36 deletions.
11 changes: 11 additions & 0 deletions src/cache/client/npm/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# v1

## v1.2

### v1.2.0

*Additions*

- Add a persistent local cache. This is now the default.

Contributors:
- [Jamie Davis](davisjam@vt.edu)

## v1.1

### v1.1.1
Expand Down
4 changes: 4 additions & 0 deletions src/cache/client/npm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ If the server has not seen the regex before, it should have an answer if you que

If you cannot connect to the server or your query is malformed, you'll get the answer "invalid".

## Optimizations

This module maintains a persistent local cache stored in `os.tmpdir()` to reduce the number of HTTP queries.

## Privacy

By using this module you are consenting to send us your regexes.
Expand Down
2 changes: 1 addition & 1 deletion src/cache/client/npm/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vuln-regex-detector",
"version": "1.1.1",
"version": "1.2.0",
"description": "Detect vulnerable regexes by querying a service hosted at Virginia Tech.",
"main": "vuln-regex-detector-client.js",
"directories": {
Expand Down
224 changes: 189 additions & 35 deletions src/cache/client/npm/vuln-regex-detector-client.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,25 @@
'use strict';

/* Dependencies. */
/**********
* Dependencies.
**********/

/* I/O. */
const https = require('https');
const syncRequest = require('sync-request');

/* Globals. */
/* Persistent cache. */
const path = require('path');
const fs = require('fs');
const crypto = require('crypto');

/* Misc. */
const os = require('os');

/**********
* Globals.
**********/

const REQUEST_LOOKUP_ONLY = 'LOOKUP_ONLY'; // Will only make a lookup, won't be submitting an UPDATE later.

const RESPONSE_VULNERABLE = 'VULNERABLE';
Expand All @@ -17,12 +32,20 @@ const DEFAULT_CONFIG = {
port: 8000
};

/* Logging. */
const LOGGING = false;
const USE_CACHE = true;

/* Map pattern to RESPONSE_VULNERABLE or RESPONSE_SAFE in case of duplicate queries.
* We do not cache RESPONSE_UNKNOWN or RESPONSE_INVALID responses since these might change. */
let patternCache = {};
/* Cache config. */
const CACHE_TYPES = {
none: 'none',
memory: 'memory',
persistent: 'persistent'
};
const CACHE_TYPE = CACHE_TYPES.persistent;

/**********
* Functions.
**********/

/**
* @param regex: RegExp or string (e.g. /re/ or 're')
Expand Down Expand Up @@ -51,13 +74,11 @@ function checkRegex (regex, config) {
function promiseResult (options, data) {
log(`promiseResult: data ${data}`);
return new Promise((resolve, reject) => {
if (USE_CACHE) {
/* Check cache to avoid I/O. */
const cacheHit = checkCache(_pattern);
if (cacheHit !== RESPONSE_UNKNOWN) {
log(`Cache hit: ${cacheHit}`);
return resolve(cacheHit);
}
/* Check cache to avoid I/O. */
const cacheHit = checkCache(_pattern);
if (cacheHit !== RESPONSE_UNKNOWN) {
log(`Cache hit: ${cacheHit}`);
return resolve(cacheHit);
}

const req = https.request(options, (res) => {
Expand All @@ -74,9 +95,7 @@ function checkRegex (regex, config) {

const result = serverResponseToRESPONSE(response);
log(`end: result ${result}`);
if (USE_CACHE) {
updateCache(postObject.pattern, result);
}
updateCache(postObject.pattern, result);

if (result === RESPONSE_INVALID) {
return reject(result);
Expand Down Expand Up @@ -122,13 +141,11 @@ function checkRegexSync (regex, config) {
}
log(`Input OK. _pattern /${_pattern}/ _config ${JSON.stringify(_config)}`);

if (USE_CACHE) {
/* Check cache to avoid I/O. */
const cacheHit = checkCache(_pattern);
if (cacheHit !== RESPONSE_UNKNOWN) {
log(`Cache hit: ${cacheHit}`);
return cacheHit;
}
/* Check cache to avoid I/O. */
const cacheHit = checkCache(_pattern);
if (cacheHit !== RESPONSE_UNKNOWN) {
log(`Cache hit: ${cacheHit}`);
return cacheHit;
}

let postObject = generatePostObject(_pattern);
Expand Down Expand Up @@ -157,9 +174,7 @@ function checkRegexSync (regex, config) {

/* Convert to a RESPONSE_X value. */
const result = serverResponseToRESPONSE(responseBody);
if (USE_CACHE) {
updateCache(postObject.pattern, result);
}
updateCache(postObject.pattern, result);

return result;
} catch (e) {
Expand Down Expand Up @@ -253,32 +268,171 @@ function serverResponseToRESPONSE (response) {

/**********
* Cache.
*
* The cache in use is controlled by CACHE_TYPE.
* If CACHE_TYPE is 'none' then APIs behave appropriately.
* The cache is implemented using a key-value interface.
*
* Cache accesses are synchronous.
* If CACHE_TYPE is 'memory' that's fine.
* If CACHE_TYPE is 'persistent' then there are some performance concerns.
* TODO Address this with sync and async versions of the APIs.
**********/

function useCache () {
return CACHE_TYPE !== CACHE_TYPES.none;
}

function updateCache (pattern, response) {
if (!USE_CACHE) {
if (!useCache()) {
return;
}

return kvPut(pattern, response);
}

/* Returns RESPONSE_{VULNERABLE|SAFE} on hit, else RESPONSE_UNKNOWN on miss or disabled. */
function checkCache (pattern) {
if (!useCache()) {
return RESPONSE_UNKNOWN;
}

return kvGet(pattern);
}

function kvPut (key, value) {
/* Only cache VULNERABLE|SAFE responses. */
if (response !== RESPONSE_VULNERABLE && response !== RESPONSE_SAFE) {
if (value !== RESPONSE_VULNERABLE && value !== RESPONSE_SAFE) {
return;
}

if (!patternCache.hasOwnProperty(pattern)) {
patternCache[pattern] = response;
/* Put in the appropriate cache. */
switch (CACHE_TYPE) {
case CACHE_TYPES.memory:
return kvPutMemory(key, value);
case CACHE_TYPES.persistent:
return kvPutPersistent(key, value);
default:
return RESPONSE_UNKNOWN;
}
}

/* Returns RESPONSE_{VULNERABLE|SAFE} on hit, else RESPONSE_UNKNOWN. */
function checkCache (pattern) {
if (!USE_CACHE) {
function kvGet (key) {
/* Get from the appropriate cache. */
switch (CACHE_TYPE) {
case CACHE_TYPES.memory:
return kvGetMemory(key);
case CACHE_TYPES.persistent:
return kvGetPersistent(key);
default:
return RESPONSE_UNKNOWN;
}
}

/* Persistent KV. */

const PERSISTENT_CACHE_DIR = path.join(os.tmpdir(), 'vuln-regex-detector-client-persistentCache');
log(`PERSISTENT_CACHE_DIR ${PERSISTENT_CACHE_DIR}`);

let kvPersistentInitialized = false;
let kvPersistentCouldNotInitialize = false;

/* Returns true if initialized, false on initialization failure. */
function initializeKVPersistent () {
/* Tried before? */
if (kvPersistentInitialized) {
return true;
}
if (kvPersistentCouldNotInitialize) {
return false;
}

/* First time through. */

/* First try a mkdir. Dir might exist already. */
try {
fs.mkdirSync(PERSISTENT_CACHE_DIR);
} catch (e) {
}

/* If we have a dir now, we're happy. */
try {
const stats = fs.lstatSync(PERSISTENT_CACHE_DIR);
if (stats.isDirectory()) {
kvPersistentInitialized = true;
return true;
} else {
kvPersistentCouldNotInitialize = true;
return false;
}
} catch (e) {
/* Hmm. */
kvPersistentCouldNotInitialize = true;
return false;
}
}

function kvPersistentFname (key) {
/* Need something we can safely use as a file name.
* Keys are patterns and might contain /'s or \'s.
*
* Using a hash might give us false reports on collisions, but this is
* exceedingly unlikely in typical use cases (a few hundred regexes tops). */
const hash = crypto.createHash('md5').update(key).digest('hex');
const fname = path.join(PERSISTENT_CACHE_DIR, `${hash}.json`);
return fname;
}

function kvPutPersistent (key, value) {
if (!initializeKVPersistent()) {
log(`kvPutPersistent: could not initialize`);
return;
}

try {
/* This must be atomic in case of concurrent put and get from different processes.
* Hence the use of a tmp file and rename. */
const fname = kvPersistentFname(key);
const tmpFname = `${fname}-${process.pid}-tmp`;
log(`kvPutPersistent: putting result in ${fname}`);
fs.writeFileSync(tmpFname, JSON.stringify({key: key, value: value}));
fs.renameSync(tmpFname, fname);
} catch (e) {
/* Ignore failures. */
}
}

function kvGetPersistent (key) {
if (!initializeKVPersistent()) {
return RESPONSE_UNKNOWN;
}

const hit = patternCache[pattern];
try {
const fname = kvPersistentFname(key);
log(`kvGetPersistent: getting result from ${fname}`);
const cont = JSON.parse(fs.readFileSync(fname));
return cont.value;
} catch (e) {
return RESPONSE_UNKNOWN;
}
}

/* Memory (volatile) KV. */

/* Map pattern to RESPONSE_VULNERABLE or RESPONSE_SAFE in case of duplicate queries.
* We do not cache RESPONSE_UNKNOWN or RESPONSE_INVALID responses since these might change. */
let memoryPattern2response = {};

function kvPutMemory (key, value) {
if (!memoryPattern2response.hasOwnProperty(key)) {
memoryPattern2response[key] = value;
}
}

function kvGetMemory (key) {
const hit = memoryPattern2response[key];
if (hit) {
log(`checkCache: pattern ${pattern}: hit in patternCache\n ${JSON.stringify(patternCache)}`);
log(`kvGetMemory: hit: ${key} -> ${hit}`);
return hit;
} else {
return RESPONSE_UNKNOWN;
Expand Down

0 comments on commit 9342146

Please sign in to comment.