diff --git a/src/librustdoc/html/static/js/externs.js b/src/librustdoc/html/static/js/externs.js index 2338931a18fd2..93709e4e830ad 100644 --- a/src/librustdoc/html/static/js/externs.js +++ b/src/librustdoc/html/static/js/externs.js @@ -14,7 +14,7 @@ function initSearch(searchIndex){} * pathWithoutLast: Array, * pathLast: string, * generics: Array, - * bindings: Map<(string|integer), Array>, + * bindings: Map>, * }} */ let QueryElement; @@ -42,6 +42,7 @@ let ParserState; * totalElems: number, * literalSearch: boolean, * corrections: Array<{from: string, to: integer}>, + * typeFingerprint: Uint32Array, * }} */ let ParsedQuery; diff --git a/src/librustdoc/html/static/js/search.js b/src/librustdoc/html/static/js/search.js index 46e3a883da977..9d150767611e1 100644 --- a/src/librustdoc/html/static/js/search.js +++ b/src/librustdoc/html/static/js/search.js @@ -238,6 +238,10 @@ function initSearch(rawSearchIndex) { * @type {Array} */ let searchIndex; + /** + * @type {Uint32Array} + */ + let functionTypeFingerprint; let currentResults; /** * Map from normalized type names to integers. Used to make type search @@ -1049,6 +1053,8 @@ function initSearch(rawSearchIndex) { correction: null, proposeCorrectionFrom: null, proposeCorrectionTo: null, + // bloom filter build from type ids + typeFingerprint: new Uint32Array(4), }; } @@ -1144,7 +1150,6 @@ function initSearch(rawSearchIndex) { query.error = err; return query; } - if (!query.literalSearch) { // If there is more than one element in the query, we switch to literalSearch in any // case. @@ -1952,8 +1957,7 @@ function initSearch(rawSearchIndex) { * @param {integer} path_dist */ function addIntoResults(results, fullId, id, index, dist, path_dist, maxEditDistance) { - const inBounds = dist <= maxEditDistance || index !== -1; - if (dist === 0 || (!parsedQuery.literalSearch && inBounds)) { + if (dist <= maxEditDistance || index !== -1) { if (results.has(fullId)) { const result = results.get(fullId); if (result.dontValidate || result.dist <= dist) { @@ -2001,17 +2005,37 @@ function initSearch(rawSearchIndex) { const fullId = row.id; const searchWord = searchWords[pos]; - const in_args = row.type && row.type.inputs - && checkIfInList(row.type.inputs, elem, row.type.where_clause); - if (in_args) { - // path_dist is 0 because no parent path information is currently stored - // in the search index - addIntoResults(results_in_args, fullId, pos, -1, 0, 0, maxEditDistance); - } - const returned = row.type && row.type.output - && checkIfInList(row.type.output, elem, row.type.where_clause); - if (returned) { - addIntoResults(results_returned, fullId, pos, -1, 0, 0, maxEditDistance); + // fpDist is a minimum possible type distance, where "type distance" is the number of + // atoms in the function not present in the query + const tfpDist = compareTypeFingerprints( + fullId, + parsedQuery.typeFingerprint + ); + if (tfpDist !== false && + !(results_in_args.size >= MAX_RESULTS && tfpDist > results_in_args.max_dist) + ) { + const in_args = row.type && row.type.inputs + && checkIfInList(row.type.inputs, elem, row.type.where_clause); + if (in_args) { + results_in_args.max_dist = Math.max(results_in_args.max_dist || 0, tfpDist); + const maxDist = results_in_args.size < MAX_RESULTS ? + (tfpDist + 1) : + results_in_args.max_dist; + addIntoResults(results_in_args, fullId, pos, -1, tfpDist, 0, maxDist); + } + } + if (tfpDist !== false && + !(results_returned.size >= MAX_RESULTS && tfpDist > results_returned.max_dist) + ) { + const returned = row.type && row.type.output + && checkIfInList(row.type.output, elem, row.type.where_clause); + if (returned) { + results_returned.max_dist = Math.max(results_returned.max_dist || 0, tfpDist); + const maxDist = results_returned.size < MAX_RESULTS ? + (tfpDist + 1) : + results_returned.max_dist; + addIntoResults(results_returned, fullId, pos, -1, tfpDist, 0, maxDist); + } } if (!typePassesFilter(elem.typeFilter, row.ty)) { @@ -2070,6 +2094,17 @@ function initSearch(rawSearchIndex) { return; } + const tfpDist = compareTypeFingerprints( + row.id, + parsedQuery.typeFingerprint + ); + if (tfpDist === false) { + return; + } + if (results.size >= MAX_RESULTS && tfpDist > results.max_dist) { + return; + } + // If the result is too "bad", we return false and it ends this search. if (!unifyFunctionTypes( row.type.inputs, @@ -2088,11 +2123,12 @@ function initSearch(rawSearchIndex) { return; } - addIntoResults(results, row.id, pos, 0, 0, 0, Number.MAX_VALUE); + results.max_dist = Math.max(results.max_dist || 0, tfpDist); + addIntoResults(results, row.id, pos, 0, tfpDist, 0, Number.MAX_VALUE); } function innerRunQuery() { - let elem, i, nSearchWords, in_returned, row; + let elem, i, nSearchWords; let queryLen = 0; for (const elem of parsedQuery.elems) { @@ -2206,50 +2242,30 @@ function initSearch(rawSearchIndex) { ); } + const fps = new Set(); for (const elem of parsedQuery.elems) { convertNameToId(elem); + buildFunctionTypeFingerprint(elem, parsedQuery.typeFingerprint, fps); } for (const elem of parsedQuery.returned) { convertNameToId(elem); + buildFunctionTypeFingerprint(elem, parsedQuery.typeFingerprint, fps); } - if (parsedQuery.foundElems === 1) { - if (parsedQuery.elems.length === 1) { - elem = parsedQuery.elems[0]; - for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { - // It means we want to check for this element everywhere (in names, args and - // returned). - handleSingleArg( - searchIndex[i], - i, - elem, - results_others, - results_in_args, - results_returned, - maxEditDistance - ); - } - } else if (parsedQuery.returned.length === 1) { - // We received one returned argument to check, so looking into returned values. - elem = parsedQuery.returned[0]; - for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { - row = searchIndex[i]; - in_returned = row.type && unifyFunctionTypes( - row.type.output, - parsedQuery.returned, - row.type.where_clause - ); - if (in_returned) { - addIntoResults( - results_others, - row.id, - i, - -1, - 0, - Number.MAX_VALUE - ); - } - } + if (parsedQuery.foundElems === 1 && parsedQuery.returned.length === 0) { + elem = parsedQuery.elems[0]; + for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { + // It means we want to check for this element everywhere (in names, args and + // returned). + handleSingleArg( + searchIndex[i], + i, + elem, + results_others, + results_in_args, + results_returned, + maxEditDistance + ); } } else if (parsedQuery.foundElems > 0) { for (i = 0, nSearchWords = searchWords.length; i < nSearchWords; ++i) { @@ -2796,6 +2812,95 @@ ${item.displayPath}${name}\ }; } + /** + * Type fingerprints allow fast, approximate matching of types. + * + * This algo creates a compact representation of the type set using a Bloom filter. + * This fingerprint is used three ways: + * + * - It accelerates the matching algorithm by checking the function fingerprint against the + * query fingerprint. If any bits are set in the query but not in the function, it can't + * match. + * + * - The fourth section has the number of distinct items in the set. + * This is the distance function, used for filtering and for sorting. + * + * [^1]: Distance is the relatively naive metric of counting the number of distinct items in + * the function that are not present in the query. + * + * @param {FunctionType|QueryElement} type - a single type + * @param {Uint32Array} output - write the fingerprint to this data structure: uses 128 bits + * @param {Set} fps - Set of distinct items + */ + function buildFunctionTypeFingerprint(type, output, fps) { + + const input = type.id; + // `[]` doesn't go in the bloom filter because it's special-cased to match things + // other than itself. + if (input !== null && input !== typeNameIdOfArrayOrSlice) { + // https://docs.rs/rustc-hash/1.1.0/src/rustc_hash/lib.rs.html#60 + // Rotate is skipped because we're only doing one cycle anyway. + const h0 = Math.imul(input, 0x9e3779b9); + const h1 = Math.imul(479001599 ^ input, 0x9e3779b9); + const h2 = Math.imul(433494437 ^ input, 0x9e3779b9); + output[0] |= 1 << (h0 % 32); + output[1] |= 1 << (h1 % 32); + output[2] |= 1 << (h2 % 32); + fps.add(input); + } + for (const g of type.generics) { + buildFunctionTypeFingerprint(g, output, fps); + } + const fb = { + id: null, + ty: 0, + generics: [], + bindings: new Map(), + }; + for (const [k, v] of type.bindings.entries()) { + fb.id = k; + fb.generics = v; + buildFunctionTypeFingerprint(fb, output, fps); + } + output[3] = fps.size; + } + + /** + * Compare the query fingerprint with the function fingerprint. + * + * @param {{number}} fullId - The function + * @param {{Uint32Array}} queryFingerprint - The query + * @returns {number|false} - False if non-match, number if distance + * This function might return 0! + */ + function compareTypeFingerprints(fullId, queryFingerprint) { + + const fh0 = functionTypeFingerprint[fullId * 4]; + const fh1 = functionTypeFingerprint[(fullId * 4) + 1]; + const fh2 = functionTypeFingerprint[(fullId * 4) + 2]; + const qh0 = queryFingerprint[0]; + const qh1 = queryFingerprint[1]; + const qh2 = queryFingerprint[2]; + // Approximate set intersection with bloom filters. + // This can be larger than reality, not smaller, because hashes have + // the property that if they've got the same value, they hash to the + // same thing. False positives exist, but not false negatives. + const [in0, in1, in2, in3] = [fh0 & qh0, fh1 & qh1, fh2 & qh2]; + // Approximate the set of items in the query but not the function. + // This might be smaller than reality, but cannot be bigger. + // + // | in_ | qh_ | XOR | Meaning | + // | --- | --- | --- | ------------------------------------------------ | + // | 0 | 0 | 0 | Not present | + // | 1 | 0 | 1 | IMPOSSIBLE because `in_` is `fh_ & qh_` | + // | 1 | 1 | 0 | If one or both is false positive, false negative | + // | 0 | 1 | 1 | Since in_ has no false negatives, must be real | + if ((in0 ^ qh0) || (in1 ^ qh1) || (in2 ^ qh2)) { + return false; + } + return functionTypeFingerprint[(fullId * 4) + 3]; + } + function buildIndex(rawSearchIndex) { searchIndex = []; /** @@ -2815,6 +2920,22 @@ ${item.displayPath}${name}\ typeNameIdOfSlice = buildTypeMapIndex("slice"); typeNameIdOfArrayOrSlice = buildTypeMapIndex("[]"); + // Function type fingerprints are 128-bit bloom filters that are used to + // estimate the distance between function and query. + // This loop counts the number of items to allocate a fingerprint for. + for (const crate in rawSearchIndex) { + if (!hasOwnPropertyRustdoc(rawSearchIndex, crate)) { + continue; + } + // Each item gets an entry in the fingerprint array, and the crate + // does, too + id += rawSearchIndex[crate].t.length + 1; + } + functionTypeFingerprint = new Uint32Array((id + 1) * 4); + + // This loop actually generates the search item indexes, including + // normalized names, type signature objects and fingerprints, and aliases. + id = 0; for (const crate in rawSearchIndex) { if (!hasOwnPropertyRustdoc(rawSearchIndex, crate)) { continue; @@ -2964,6 +3085,28 @@ ${item.displayPath}${name}\ } searchWords.push(word); const path = itemPaths.has(i) ? itemPaths.get(i) : lastPath; + let type = null; + if (itemFunctionSearchTypes[i] !== 0) { + type = buildFunctionSearchType( + itemFunctionSearchTypes[i], + lowercasePaths + ); + if (type) { + const fp = functionTypeFingerprint.subarray(id * 4, (id + 1) * 4); + const fps = new Set(); + for (const t of type.inputs) { + buildFunctionTypeFingerprint(t, fp, fps); + } + for (const t of type.output) { + buildFunctionTypeFingerprint(t, fp, fps); + } + for (const w of type.where_clause) { + for (const t of w) { + buildFunctionTypeFingerprint(t, fp, fps); + } + } + } + } const row = { crate: crate, ty: itemTypes.charCodeAt(i) - charA, @@ -2971,10 +3114,7 @@ ${item.displayPath}${name}\ path: path, desc: itemDescs[i], parent: itemParentIdxs[i] > 0 ? paths[itemParentIdxs[i] - 1] : undefined, - type: buildFunctionSearchType( - itemFunctionSearchTypes[i], - lowercasePaths - ), + type, id: id, normalizedName: word.indexOf("_") === -1 ? word : word.replace(/_/g, ""), deprecated: deprecatedItems.has(i), diff --git a/tests/rustdoc-js/assoc-type.js b/tests/rustdoc-js/assoc-type.js index cc3afaa17c07d..9b909bcd36c8e 100644 --- a/tests/rustdoc-js/assoc-type.js +++ b/tests/rustdoc-js/assoc-type.js @@ -7,16 +7,16 @@ const EXPECTED = [ 'query': 'iterator -> u32', 'correction': null, 'others': [ - { 'path': 'assoc_type', 'name': 'my_fn' }, { 'path': 'assoc_type::my', 'name': 'other_fn' }, + { 'path': 'assoc_type', 'name': 'my_fn' }, ], }, { 'query': 'iterator', 'correction': null, 'in_args': [ - { 'path': 'assoc_type', 'name': 'my_fn' }, { 'path': 'assoc_type::my', 'name': 'other_fn' }, + { 'path': 'assoc_type', 'name': 'my_fn' }, ], }, // if I write an explicit binding, only it shows up diff --git a/tests/rustdoc-js/big-result.js b/tests/rustdoc-js/big-result.js new file mode 100644 index 0000000000000..07961d196f47d --- /dev/null +++ b/tests/rustdoc-js/big-result.js @@ -0,0 +1,39 @@ +// exact-check + +const EXPECTED = [ + { + 'query': 'First', + 'in_args': (function() { + // Generate the list of 200 items that should match. + const results = []; + function generate(lx, ly) { + for (const x of lx) { + for (const y of ly) { + results.push({ + 'path': `big_result::${y}`, + 'name': x, + }); + } + } + } + // Fewest parameters that still match go on top. + generate( + ['u', 'v', 'w', 'x', 'y'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + generate( + ['p', 'q', 'r', 's', 't'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + generate( + ['k', 'l', 'm', 'n', 'o'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + generate( + ['f', 'g', 'h', 'i', 'j'], + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'] + ); + return results; + })(), + }, +]; diff --git a/tests/rustdoc-js/big-result.rs b/tests/rustdoc-js/big-result.rs new file mode 100644 index 0000000000000..4dfecd6aaadd5 --- /dev/null +++ b/tests/rustdoc-js/big-result.rs @@ -0,0 +1,61 @@ +#![feature(concat_idents)] +#![allow(nonstandard_style)] +/// Generate 250 items that all match the query, starting with the longest. +/// Those long items should be dropped from the result set, and the short ones +/// should be shown instead. +macro_rules! generate { + ([$($x:ident),+], $y:tt, $z:tt) => { + $( + generate!(@ $x, $y, $z); + )+ + }; + (@ $x:ident , [$($y:ident),+], $z:tt) => { + pub struct $x; + $( + generate!(@@ $x, $y, $z); + )+ + }; + (@@ $x:ident , $y:ident, [$($z:ident: $zt:ident),+]) => { + impl $y { + pub fn $x($($z: $zt,)+) {} + } + } +} + +pub struct First; +pub struct Second; +pub struct Third; +pub struct Fourth; +pub struct Fifth; + +generate!( + [a, b, c, d, e], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second, c: Third, d: Fourth, e: Fifth] +); + +generate!( + [f, g, h, i, j], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second, c: Third, d: Fourth] +); + +generate!( + [k, l, m, n, o], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second, c: Third] +); + +generate!( + // reverse it, just to make sure they're alphabetized + // in the result set when all else is equal + [t, s, r, q, p], + [a, b, c, d, e, f, g, h, i, j], + [a: First, b: Second] +); + +generate!( + [u, v, w, x, y], + [a, b, c, d, e, f, g, h, i, j], + [a: First] +); diff --git a/tests/rustdoc-js/full-path-function.js b/tests/rustdoc-js/full-path-function.js index 48be51b156fde..0464f7922174d 100644 --- a/tests/rustdoc-js/full-path-function.js +++ b/tests/rustdoc-js/full-path-function.js @@ -4,16 +4,16 @@ const EXPECTED = [ { 'query': 'sac -> usize', 'others': [ - { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, { 'path': 'full_path_function::b::Sac', 'name': 'len' }, { 'path': 'full_path_function::sac::Sac', 'name': 'len' }, + { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, ], }, { 'query': 'b::sac -> usize', 'others': [ - { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, { 'path': 'full_path_function::b::Sac', 'name': 'len' }, + { 'path': 'full_path_function::b::Sac', 'name': 'bar' }, ], }, { diff --git a/tests/rustdoc-js/generics.js b/tests/rustdoc-js/generics.js index ebc92ccfc0575..b3ca0af3056a5 100644 --- a/tests/rustdoc-js/generics.js +++ b/tests/rustdoc-js/generics.js @@ -1,4 +1,5 @@ // exact-check +// ignore-order const EXPECTED = [ { diff --git a/tests/rustdoc-js/impl-trait.js b/tests/rustdoc-js/impl-trait.js index 00d67d639bd08..8bb3f2d3e99a5 100644 --- a/tests/rustdoc-js/impl-trait.js +++ b/tests/rustdoc-js/impl-trait.js @@ -39,8 +39,8 @@ const EXPECTED = [ { 'path': 'impl_trait', 'name': 'Aaaaaaa' }, ], 'in_args': [ - { 'path': 'impl_trait::Ccccccc', 'name': 'eeeeeee' }, { 'path': 'impl_trait::Ccccccc', 'name': 'fffffff' }, + { 'path': 'impl_trait::Ccccccc', 'name': 'eeeeeee' }, ], 'returned': [ { 'path': 'impl_trait', 'name': 'bbbbbbb' }, diff --git a/tests/rustdoc-js/type-parameters.js b/tests/rustdoc-js/type-parameters.js index e695f189bb672..e045409e507e5 100644 --- a/tests/rustdoc-js/type-parameters.js +++ b/tests/rustdoc-js/type-parameters.js @@ -1,20 +1,19 @@ // exact-check -// ignore-order const EXPECTED = [ { query: '-> trait:Some', others: [ - { path: 'foo', name: 'alef' }, { path: 'foo', name: 'alpha' }, + { path: 'foo', name: 'alef' }, ], }, { query: '-> generic:T', others: [ + { path: 'foo', name: 'beta' }, { path: 'foo', name: 'bet' }, { path: 'foo', name: 'alef' }, - { path: 'foo', name: 'beta' }, ], }, { @@ -44,38 +43,40 @@ const EXPECTED = [ { query: 'Other, Other', others: [ - { path: 'foo', name: 'other' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'generic:T', in_args: [ - { path: 'foo', name: 'bet' }, { path: 'foo', name: 'beta' }, - { path: 'foo', name: 'other' }, + { path: 'foo', name: 'bet' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'generic:Other', in_args: [ - { path: 'foo', name: 'bet' }, { path: 'foo', name: 'beta' }, - { path: 'foo', name: 'other' }, + { path: 'foo', name: 'bet' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'trait:Other', in_args: [ - { path: 'foo', name: 'other' }, { path: 'foo', name: 'alternate' }, + { path: 'foo', name: 'other' }, ], }, { query: 'Other', in_args: [ + // because function is called "other", it's sorted first + // even though it has higher type distance { path: 'foo', name: 'other' }, { path: 'foo', name: 'alternate' }, ],