Skip to content

Commit

Permalink
querystring: fix empty pairs and optimize parse()
Browse files Browse the repository at this point in the history
This commit fixes handling of empty pairs that occur before the end
of the query string so that they are also ignored.

Additionally, some optimizations have been made, including:

* Avoid unnecessary code execution where possible
* Use a lookup table when checking for hex characters
* Avoid forced decoding when '+' characters are encountered and we
are using the default decoder

Fixes: #10454
PR-URL: #11234
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Nicu Micleușanu <micnic90@gmail.com>
  • Loading branch information
mscdex authored and jasnell committed Feb 13, 2017
1 parent d3be0f8 commit ff785fd
Show file tree
Hide file tree
Showing 3 changed files with 166 additions and 129 deletions.
40 changes: 23 additions & 17 deletions benchmark/querystring/querystring-parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,42 +12,48 @@ var inputs = {
multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz',
multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' +
'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz',
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z'
manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z',
manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&',
altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi'
};

var bench = common.createBenchmark(main, {
type: Object.keys(inputs),
n: [1e6],
});

// A deopt followed by a reopt of main() can happen right when the timed loop
// starts, which seems to have a noticeable effect on the benchmark results.
// So we explicitly disable optimization of main() to avoid this potential
// issue.
v8.setFlagsFromString('--allow_natives_syntax');
eval('%NeverOptimizeFunction(main)');

function main(conf) {
var type = conf.type;
var n = conf.n | 0;
var input = inputs[type];
var i;

// Force-optimize querystring.parse() so that the benchmark doesn't get
// disrupted by the optimizer kicking in halfway through.
v8.setFlagsFromString('--allow_natives_syntax');
if (type !== 'multicharsep') {
querystring.parse(input);
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
querystring.parse(input);
} else {
querystring.parse(input, '&&&&&&&&&&');
eval('%OptimizeFunctionOnNextCall(querystring.parse)');
querystring.parse(input, '&&&&&&&&&&');
}
// Note: we do *not* use OptimizeFunctionOnNextCall() here because currently
// it causes a deopt followed by a reopt, which could make its way into the
// timed loop. Instead, just execute the function a "sufficient" number of
// times before the timed loop to ensure the function is optimized just once.
if (type === 'multicharsep') {
for (i = 0; i < n; i += 1)
querystring.parse(input, '&&&&&&&&&&');

var i;
if (type !== 'multicharsep') {
bench.start();
for (i = 0; i < n; i += 1)
querystring.parse(input);
querystring.parse(input, '&&&&&&&&&&');
bench.end(n);
} else {
for (i = 0; i < n; i += 1)
querystring.parse(input);

bench.start();
for (i = 0; i < n; i += 1)
querystring.parse(input, '&&&&&&&&&&');
querystring.parse(input);
bench.end(n);
}
}
246 changes: 134 additions & 112 deletions lib/querystring.js
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,25 @@ function stringify(obj, sep, eq, options) {
return '';
}

const isHexTable = [
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0 - 15
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16 - 31
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 32 - 47
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, // 48 - 63
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 64 - 79
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 80 - 95
0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 96 - 111
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 112 - 127
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 128 ...
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // ... 256
];

function charCodes(str) {
if (str.length === 0) return [];
if (str.length === 1) return [str.charCodeAt(0)];
Expand Down Expand Up @@ -292,14 +311,14 @@ function parse(qs, sep, eq, options) {
const customDecode = (decode !== qsUnescape);

const keys = [];
var posIdx = 0;
var lastPos = 0;
var sepIdx = 0;
var eqIdx = 0;
var key = '';
var value = '';
var keyEncoded = customDecode;
var valEncoded = customDecode;
const plusChar = (customDecode ? '%20' : ' ');
var encodeCheck = 0;
for (var i = 0; i < qs.length; ++i) {
const code = qs.charCodeAt(i);
Expand All @@ -310,142 +329,145 @@ function parse(qs, sep, eq, options) {
// Key/value pair separator match!
const end = i - sepIdx + 1;
if (eqIdx < eqLen) {
// If we didn't find the key/value separator, treat the substring as
// part of the key instead of the value
if (lastPos < end)
// We didn't find the (entire) key/value separator
if (lastPos < end) {
// Treat the substring as part of the key instead of the value
key += qs.slice(lastPos, end);
} else if (lastPos < end)
value += qs.slice(lastPos, end);
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);

if (key || value || lastPos - posIdx > sepLen || i === 0) {
// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
if (keyEncoded)
key = decodeStr(key, decode);
} else {
const curValue = obj[key] || '';
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe
// since we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else if (curValue)
obj[key] = [curValue, value];
// We saw an empty substring between separators
if (--pairs === 0)
return obj;
lastPos = i + 1;
sepIdx = eqIdx = 0;
continue;
}
} else {
if (lastPos < end) {
value += qs.slice(lastPos, end);
if (valEncoded)
value = decodeStr(value, decode);
}
} else if (i === 1) {
// A pair with repeated sep could be added into obj in the first loop
// and it should be deleted
delete obj[key];
if (keyEncoded)
key = decodeStr(key, decode);
}

// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
const curValue = obj[key];
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe
// since we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
if (--pairs === 0)
break;
return obj;
keyEncoded = valEncoded = customDecode;
encodeCheck = 0;
key = value = '';
posIdx = lastPos;
encodeCheck = 0;
lastPos = i + 1;
sepIdx = eqIdx = 0;
}
continue;
} else {
sepIdx = 0;
if (!valEncoded) {
// Try to match an (valid) encoded byte (once) to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0 &&
((code >= 48/*0*/ && code <= 57/*9*/) ||
(code >= 65/*A*/ && code <= 70/*F*/) ||
(code >= 97/*a*/ && code <= 102/*f*/))) {
if (++encodeCheck === 3)
valEncoded = true;
// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
if (lastPos < end)
key += qs.slice(lastPos, end);
encodeCheck = 0;
lastPos = i + 1;
}
continue;
} else {
encodeCheck = 0;
eqIdx = 0;
if (!keyEncoded) {
// Try to match an (valid) encoded byte once to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
continue;
} else if (encodeCheck > 0) {
// eslint-disable-next-line no-extra-boolean-cast
if (!!isHexTable[code]) {
if (++encodeCheck === 3)
keyEncoded = true;
continue;
} else {
encodeCheck = 0;
}
}
}
}
}
}

// Try matching key/value separator (e.g. '=') if we haven't already
if (eqIdx < eqLen) {
if (code === eqCodes[eqIdx]) {
if (++eqIdx === eqLen) {
// Key/value separator match!
const end = i - eqIdx + 1;
if (lastPos < end)
key += qs.slice(lastPos, end);
encodeCheck = 0;
if (code === 43/*+*/) {
if (lastPos < i)
key += qs.slice(lastPos, i);
key += plusChar;
lastPos = i + 1;
continue;
}
continue;
} else {
eqIdx = 0;
if (!keyEncoded) {
// Try to match an (valid) encoded byte once to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0 &&
((code >= 48/*0*/ && code <= 57/*9*/) ||
(code >= 65/*A*/ && code <= 70/*F*/) ||
(code >= 97/*a*/ && code <= 102/*f*/))) {
}
if (code === 43/*+*/) {
if (lastPos < i)
value += qs.slice(lastPos, i);
value += plusChar;
lastPos = i + 1;
} else if (!valEncoded) {
// Try to match an (valid) encoded byte (once) to minimize unnecessary
// calls to string decoding functions
if (code === 37/*%*/) {
encodeCheck = 1;
} else if (encodeCheck > 0) {
// eslint-disable-next-line no-extra-boolean-cast
if (!!isHexTable[code]) {
if (++encodeCheck === 3)
keyEncoded = true;
valEncoded = true;
} else {
encodeCheck = 0;
}
}
}
}

if (code === 43/*+*/) {
if (eqIdx < eqLen) {
if (lastPos < i)
key += qs.slice(lastPos, i);
key += '%20';
keyEncoded = true;
} else {
if (lastPos < i)
value += qs.slice(lastPos, i);
value += '%20';
valEncoded = true;
}
lastPos = i + 1;
}
}

// Check if we have leftover key or value data
if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) {
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
else if (sepIdx < sepLen)
value += qs.slice(lastPos);
}
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);
// Use a key array lookup instead of using hasOwnProperty(), which is
// slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
const curValue = obj[key];
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}
// Deal with any leftover key or value data
if (lastPos < qs.length) {
if (eqIdx < eqLen)
key += qs.slice(lastPos);
else if (sepIdx < sepLen)
value += qs.slice(lastPos);
} else if (eqIdx === 0) {
// We ended on an empty substring
return obj;
}
if (keyEncoded)
key = decodeStr(key, decode);
if (valEncoded)
value = decodeStr(value, decode);
// Use a key array lookup instead of using hasOwnProperty(), which is slower
if (keys.indexOf(key) === -1) {
obj[key] = value;
keys[keys.length] = key;
} else {
const curValue = obj[key];
// A simple Array-specific property check is enough here to
// distinguish from a string value and is faster and still safe since
// we are generating all of the values being assigned.
if (curValue.pop)
curValue[curValue.length] = value;
else
obj[key] = [curValue, value];
}

return obj;
Expand Down
9 changes: 9 additions & 0 deletions test/parallel/test-querystring.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,15 @@ const qsTestCases = [
['&&&&', '', {}],
['&=&', '=', { '': '' }],
['&=&=', '=&=', { '': [ '', '' ]}],
['a&&b', 'a=&b=', { 'a': '', 'b': '' }],
['a=a&&b=b', 'a=a&b=b', { 'a': 'a', 'b': 'b' }],
['&a', 'a=', { 'a': '' }],
['&=', '=', { '': '' }],
['a&a&', 'a=&a=', { a: [ '', '' ] }],
['a&a&a&', 'a=&a=&a=', { a: [ '', '', '' ] }],
['a&a&a&a&', 'a=&a=&a=&a=', { a: [ '', '', '', '' ] }],
['a=&a=value&a=', 'a=&a=value&a=', { a: [ '', 'value', '' ] }],
['foo+bar=baz+quux', 'foo%20bar=baz%20quux', { 'foo bar': 'baz quux' }],
[null, '', {}],
[undefined, '', {}]
];
Expand Down

0 comments on commit ff785fd

Please sign in to comment.