Skip to content

Commit

Permalink
optimize search 2x (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
mlvzk authored Aug 28, 2020
1 parent db5d710 commit 93c2698
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 28 deletions.
6 changes: 4 additions & 2 deletions src/bin/manix.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,12 @@ fn main() -> Result<()> {
);
}

let query_lower = opt.query.to_ascii_lowercase();
let query = manix::Lowercase(query_lower.as_bytes());
let entries = if opt.strict {
aggregate_source.search(&opt.query)
aggregate_source.search(&query)
} else {
aggregate_source.search_liberal(&opt.query)
aggregate_source.search_liberal(&query)
};
let (entries, key_only_entries): (Vec<DocEntry>, Vec<DocEntry>) =
entries.into_iter().partition(|e| {
Expand Down
17 changes: 10 additions & 7 deletions src/comments_docsource.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::{Cache, DocEntry, DocSource, Errors};
use crate::{
contains_insensitive_ascii, starts_with_insensitive_ascii, Cache, DocEntry, DocSource, Errors,
Lowercase,
};
use colored::*;
use lazy_static::lazy_static;
use rayon::prelude::*;
Expand Down Expand Up @@ -148,22 +151,22 @@ impl DocSource for CommentsDatabase {
.map(|def| def.key.as_ref())
.collect()
}
fn search(&self, query: &str) -> Vec<DocEntry> {
let search_key = query.to_lowercase();
fn search(&self, query: &Lowercase) -> Vec<DocEntry> {
self.hash_to_defs
.values()
.flatten()
.filter(|d| d.comments.len() > 0 && d.key.to_lowercase().starts_with(&search_key))
.filter(|d| {
d.comments.len() > 0 && starts_with_insensitive_ascii(d.key.as_bytes(), query)
})
.cloned()
.map(DocEntry::CommentDoc)
.collect()
}
fn search_liberal(&self, query: &str) -> Vec<DocEntry> {
let search_key = query.to_lowercase();
fn search_liberal(&self, query: &Lowercase) -> Vec<DocEntry> {
self.hash_to_defs
.values()
.flatten()
.filter(|d| d.comments.len() > 0 && d.key.to_lowercase().contains(&search_key))
.filter(|d| d.comments.len() > 0 && contains_insensitive_ascii(d.key.as_bytes(), query))
.cloned()
.map(DocEntry::CommentDoc)
.collect()
Expand Down
84 changes: 80 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ impl DocEntry {

pub trait DocSource {
fn all_keys(&self) -> Vec<&str>;
fn search(&self, query: &str) -> Vec<DocEntry>;
fn search_liberal(&self, query: &str) -> Vec<DocEntry>;
fn search(&self, query: &Lowercase) -> Vec<DocEntry>;
fn search_liberal(&self, query: &Lowercase) -> Vec<DocEntry>;

/// Updates the cache, returns true if anything changed
fn update(&mut self) -> Result<bool, Errors>;
Expand All @@ -104,13 +104,13 @@ impl DocSource for AggregateDocSource {
.flat_map(|source| source.all_keys())
.collect()
}
fn search(&self, query: &str) -> Vec<DocEntry> {
fn search(&self, query: &Lowercase) -> Vec<DocEntry> {
self.sources
.par_iter()
.flat_map(|source| source.search(query))
.collect()
}
fn search_liberal(&self, query: &str) -> Vec<DocEntry> {
fn search_liberal(&self, query: &Lowercase) -> Vec<DocEntry> {
self.sources
.par_iter()
.flat_map(|source| source.search_liberal(query))
Expand All @@ -120,3 +120,79 @@ impl DocSource for AggregateDocSource {
unimplemented!();
}
}

#[repr(transparent)]
pub struct Lowercase<'a>(pub &'a [u8]);

pub(crate) fn starts_with_insensitive_ascii(s: &[u8], prefix: &Lowercase) -> bool {
let prefix = prefix.0;

if s.len() < prefix.len() {
return false;
}

for (i, b) in prefix.into_iter().enumerate() {
// this is safe because of the earlier if check
if unsafe { s.get_unchecked(i) }.to_ascii_lowercase() != *b {
return false;
}
}

true
}

pub(crate) fn contains_insensitive_ascii(s: &[u8], inner: &Lowercase) -> bool {
let inner = inner.0;

if s.len() < inner.len() {
return false;
}

'outer: for i in 0..(s.len() - inner.len()) {
let target = &s[i..i + inner.len()];
for (y, b) in target.into_iter().enumerate() {
if *unsafe { inner.get_unchecked(y) } != b.to_ascii_lowercase() {
continue 'outer;
}
}
return true;
}

false
}

#[test]
fn test_starts_with_insensitive_ascii() {
assert_eq!(
starts_with_insensitive_ascii("This is a string".as_bytes(), &Lowercase(b"this ")),
true,
);
assert_eq!(
starts_with_insensitive_ascii("This is a string".as_bytes(), &Lowercase(b"x")),
false,
);
assert_eq!(
starts_with_insensitive_ascii("thi".as_bytes(), &Lowercase(b"this ")),
false,
);
}

#[test]
fn test_contains_insensitive_ascii() {
assert_eq!(
contains_insensitive_ascii("abc".as_bytes(), &Lowercase(b"b")),
true
);
assert_eq!(
contains_insensitive_ascii("xabcx".as_bytes(), &Lowercase(b"abc")),
true
);
assert_eq!(
contains_insensitive_ascii("abc".as_bytes(), &Lowercase(b"x")),
false
);
assert_eq!(
contains_insensitive_ascii("abc".as_bytes(), &Lowercase(b"abcd")),
false
);
}
13 changes: 8 additions & 5 deletions src/nixpkgs_tree_docsource.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::{Cache, DocEntry, DocSource, Errors};
use crate::{
contains_insensitive_ascii, starts_with_insensitive_ascii, Cache, DocEntry, DocSource, Errors,
Lowercase,
};
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, process::Command};

Expand Down Expand Up @@ -36,17 +39,17 @@ impl DocSource for NixpkgsTreeDatabase {
fn all_keys(&self) -> Vec<&str> {
self.keys.iter().map(|k| k.as_str()).collect()
}
fn search(&self, query: &str) -> Vec<DocEntry> {
fn search(&self, query: &Lowercase) -> Vec<DocEntry> {
self.keys
.iter()
.filter(|k| k.to_lowercase().starts_with(&query.to_lowercase()))
.filter(|k| starts_with_insensitive_ascii(k.as_bytes(), query))
.map(|k| DocEntry::NixpkgsTreeDoc(k.clone()))
.collect()
}
fn search_liberal(&self, query: &str) -> Vec<DocEntry> {
fn search_liberal(&self, query: &Lowercase) -> Vec<DocEntry> {
self.keys
.iter()
.filter(|k| k.to_lowercase().contains(&query.to_lowercase()))
.filter(|k| contains_insensitive_ascii(k.as_bytes(), query))
.map(|k| DocEntry::NixpkgsTreeDoc(k.clone()))
.collect()
}
Expand Down
13 changes: 8 additions & 5 deletions src/options_docsource.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
use crate::{Cache, DocEntry, DocSource, Errors};
use crate::{
contains_insensitive_ascii, starts_with_insensitive_ascii, Cache, DocEntry, DocSource, Errors,
Lowercase,
};
use colored::*;
use serde::{Deserialize, Serialize};
use std::{
Expand Down Expand Up @@ -68,17 +71,17 @@ impl DocSource for OptionsDatabase {
fn all_keys(&self) -> Vec<&str> {
self.options.keys().map(|x| x.as_ref()).collect()
}
fn search(&self, query: &str) -> Vec<DocEntry> {
fn search(&self, query: &Lowercase) -> Vec<DocEntry> {
self.options
.iter()
.filter(|(key, _)| key.to_lowercase().starts_with(&query.to_lowercase()))
.filter(|(key, _)| starts_with_insensitive_ascii(key.as_bytes(), query))
.map(|(_, d)| DocEntry::OptionDoc(d.clone()))
.collect()
}
fn search_liberal(&self, query: &str) -> Vec<DocEntry> {
fn search_liberal(&self, query: &Lowercase) -> Vec<DocEntry> {
self.options
.iter()
.filter(|(key, _)| key.to_lowercase().contains(&query.to_lowercase()))
.filter(|(key, _)| contains_insensitive_ascii(key.as_bytes(), query))
.map(|(_, d)| DocEntry::OptionDoc(d.clone()))
.collect()
}
Expand Down
13 changes: 8 additions & 5 deletions src/xml_docsource.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use colored::*;
use roxmltree::{self, Document};

use crate::{Cache, DocEntry, DocSource, Errors};
use crate::{
contains_insensitive_ascii, starts_with_insensitive_ascii, Cache, DocEntry, DocSource, Errors,
Lowercase,
};
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, path::PathBuf, process::Command};
use walkdir::WalkDir;
Expand Down Expand Up @@ -122,17 +125,17 @@ impl DocSource for XmlFuncDocDatabase {
fn all_keys(&self) -> Vec<&str> {
self.functions.keys().map(|x| x.as_str()).collect()
}
fn search(&self, query: &str) -> Vec<crate::DocEntry> {
fn search(&self, query: &Lowercase) -> Vec<crate::DocEntry> {
self.functions
.iter()
.filter(|(key, _)| key.to_lowercase().starts_with(&query.to_lowercase()))
.filter(|(key, _)| starts_with_insensitive_ascii(key.as_bytes(), query))
.map(|(_, value)| DocEntry::XmlFuncDoc(value.clone()))
.collect()
}
fn search_liberal(&self, query: &str) -> Vec<DocEntry> {
fn search_liberal(&self, query: &Lowercase) -> Vec<DocEntry> {
self.functions
.iter()
.filter(|(key, _)| key.to_lowercase().contains(&query.to_lowercase()))
.filter(|(key, _)| contains_insensitive_ascii(key.as_bytes(), query))
.map(|(_, value)| DocEntry::XmlFuncDoc(value.clone()))
.collect()
}
Expand Down

0 comments on commit 93c2698

Please sign in to comment.