Skip to content

Commit

Permalink
Merge branch 'master' of ssh://github.com/laysakura/trie-rs
Browse files Browse the repository at this point in the history
  • Loading branch information
laysakura committed Feb 10, 2024
2 parents 35e1eaf + 46e6a2f commit 5825661
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [Unreleased]
Add `is_prefix()` and `trie_rs::map::{Trie, TrieBuilder}`.

## [v0.2.0]

Expand Down
5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
[package]
name = "trie-rs"
version = "0.2.0"
authors = ["Sho Nakatani <lay.sakura@gmail.com>"]
description = "Memory efficient trie (prefix tree) library based on LOUDS"
authors = ["Sho Nakatani <lay.sakura@gmail.com>", "Shane Celis <shane.celis@gmail.com>"]
description = "Memory efficient trie (prefix tree) and map library based on LOUDS"
readme = "README.md"
license = "MIT OR Apache-2.0"
repository = "https://github.com/laysakura/trie-rs"
Expand All @@ -12,6 +12,7 @@ categories = ["compression", "data-structures"]
edition = "2018"

[dependencies]
derivative = "2.2.0"
louds-rs = "0.4"

[dev-dependencies]
Expand Down
28 changes: 27 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# trie-rs

Memory efficient trie (prefix tree) library based on LOUDS.
Memory efficient trie (prefix tree) and map library based on LOUDS.

[Master API Docs](https://laysakura.github.io/trie-rs/trie_rs/)
|
Expand Down Expand Up @@ -165,6 +165,32 @@ assert_eq!(
);
```

### Trie Map Usage

To store a value with each word, use `trie_rs::map::{Trie, TrieBuilder}`.

```rust
use std::str;
use trie_rs::map::TrieBuilder;

let mut builder = TrieBuilder::new(); // Inferred `TrieBuilder<u8, u8>` automatically
builder.push("すし", 0);
builder.push("すしや", 1);
builder.push("すしだね", 2);
builder.push("すしづめ", 3);
builder.push("すしめし", 4);
builder.push("すしをにぎる", 5);
builder.push("すし", 6); // Word `push`ed twice is just ignored.
builder.push("🍣", 7);

let trie = builder.build();

// exact_match(): Find a word exactly match to query.
assert_eq!(trie.exact_match("すし"), Some(0));
assert_eq!(trie.exact_match("🍣"), Some(7));
assert_eq!(trie.exact_match("🍜"), None);
```

## Features
- **Generic type support**: As the above examples show, trie-rs can be used for searching not only UTF-8 string but also other data types.
- **Based on [louds-rs](https://crates.io/crates/louds-rs)**, which is fast, parallelized, and memory efficient.
Expand Down
32 changes: 30 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
//! Memory efficient trie (prefix tree) library based on LOUDS.
#![forbid(missing_docs)]
//! Memory efficient trie (prefix tree) and map library based on LOUDS.
//!
//! [Master API Docs](https://laysakura.github.io/trie-rs/trie_rs/)
//! |
Expand Down Expand Up @@ -163,6 +164,32 @@
//! );
//! ```
//!
//! ## Trie Map Usage
//!
//! To store a value with each word, use `trie_rs::map::{Trie, TrieBuilder}`.
//!
//! ```rust
//! use std::str;
//! use trie_rs::map::TrieBuilder;
//!
//! let mut builder = TrieBuilder::new(); // Inferred `TrieBuilder<u8, u8>` automatically
//! builder.push("すし", 0);
//! builder.push("すしや", 1);
//! builder.push("すしだね", 2);
//! builder.push("すしづめ", 3);
//! builder.push("すしめし", 4);
//! builder.push("すしをにぎる", 5);
//! builder.push("すし", 6); // Word `push`ed twice is just ignored.
//! builder.push("🍣", 7);
//!
//! let trie = builder.build();
//!
//! // exact_match(): Find a word exactly match to query.
//! assert_eq!(trie.exact_match("すし"), Some(0));
//! assert_eq!(trie.exact_match("🍣"), Some(7));
//! assert_eq!(trie.exact_match("🍜"), None);
//! ```
//!
//! # Features
//! - **Generic type support**: As the above examples show, trie-rs can be used for searching not only UTF-8 string but also other data types.
//! - **Based on [louds-rs](https://crates.io/crates/louds-rs)**, which is fast, parallelized, and memory efficient.
Expand All @@ -184,4 +211,5 @@ pub use trie::Trie;
pub use trie::TrieBuilder;

mod internal_data_structure;
pub mod trie;
mod trie;
pub mod map;
217 changes: 217 additions & 0 deletions src/map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
//! A trie map stores a value with each word or key.
use crate::{Trie as OldTrie, TrieBuilder as OldTrieBuilder};
use derivative::Derivative;

/// Instead of a label, we use key value pair that only implements Eq and Ord
/// for its key.
#[derive(Derivative, Clone, Debug)]
#[derivative(Eq, Ord, PartialEq, PartialOrd)]
struct KeyValue<K,V>(K,
#[derivative(PartialEq="ignore")]
#[derivative(PartialOrd="ignore")]
#[derivative(Ord="ignore")]
Option<V>);



/// A trie where each key has an associated value. Each entry has an associated value.
pub struct Trie<K,V> {
inner: OldTrie<KeyValue<K,V>>
}
/// A trie builder with a key and value.
pub struct TrieBuilder<K,V> {
inner: OldTrieBuilder<KeyValue<K,V>>
}
#[allow(private_bounds)]
impl<K: Clone, V: Clone> Trie<K,V> where KeyValue<K,V>: Ord + Clone {

/// Return `Some(value)` if query is a key.
pub fn exact_match<Arr: AsRef<[K]>>(&self, query: Arr) -> Option<V> {
let q: Vec<KeyValue<K,V>> = query.as_ref().iter().map(|x: &K| KeyValue(x.clone(), None)).collect();
self.inner.exact_match_node(q).and_then(|n| self.inner.label(n).1)
}

/// Return true if `query` is a prefix.
///
/// Note: A prefix may be an exact match or not, and an exact match may be a
/// prefix or not.
pub fn is_prefix<Arr: AsRef<[K]>>(&self, query: Arr) -> bool {
let q: Vec<KeyValue<K,V>> = query.as_ref().iter().map(|x: &K| KeyValue(x.clone(), None)).collect();
self.inner.is_prefix(q)
}

/// Return all keys and values that match `query`.
///
/// # Panics
/// If `query` is empty.
pub fn predictive_search<Arr: AsRef<[K]>>(&self, query: Arr) -> Vec<(Vec<K>, V)> {
let q: Vec<KeyValue<K,V>> = query.as_ref().iter().map(|x: &K| KeyValue(x.clone(), None)).collect();
self.inner.predictive_search(q).into_iter().map(|v| Self::strip(v)).collect()
}

/// Return the common prefixes and their associated values.
pub fn common_prefix_search<Arr: AsRef<[K]>>(&self, query: Arr) -> Vec<(Vec<K>, V)> {
let q: Vec<KeyValue<K,V>> = query.as_ref().iter().map(|x: &K| KeyValue(x.clone(), None)).collect();
self.inner.common_prefix_search(q).into_iter().map(|v| Self::strip(v)).collect()
}

/// Given a list of `KeyValue`s take the last value and return only the keys.
fn strip(mut word: Vec<KeyValue<K,V>>) -> (Vec<K>, V) {
let value = word.last_mut().expect("word should have length > 0").1.clone().expect("Terminal node should have value");
(word.into_iter().map(|x| x.0).collect(), value)
}
}

#[allow(private_bounds)]
impl<K: Clone, V: Clone> TrieBuilder<K,V> where KeyValue<K,V>: Ord + Clone {

/// Return a [TrieBuilder].
pub fn new() -> Self {
Self { inner: OldTrieBuilder::new() }
}

/// Add a key and value.
pub fn push<Arr: AsRef<[K]>>(&mut self, key: Arr, value: V) {
let mut v: Vec<KeyValue<K,V>> = key.as_ref().iter().map(|x: &K| KeyValue(x.clone(), None)).collect();
v.last_mut().unwrap().1 = Some(value);
self.inner.push(v);
}

/// Build a [Trie].
pub fn build(&self) -> Trie<K,V> {
Trie { inner: self.inner.build() }
}
}

#[cfg(test)]
mod search_tests {
use super::{Trie, TrieBuilder};

fn build_trie() -> Trie<u8, u8> {
let mut builder = TrieBuilder::new();
builder.push("a", 0);
builder.push("app", 1);
builder.push("apple", 2);
builder.push("better", 3);
builder.push("application", 4);
builder.push("アップル🍎", 5);
builder.build()
}

#[test]
fn sanity_check() {
let trie = build_trie();
assert_eq!(trie.predictive_search("apple"), vec![("apple".as_bytes().to_vec(), 2)]);

}

mod exact_match_tests {
macro_rules! parameterized_tests {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let (query, expected_match) = $value;
let trie = super::build_trie();
let result = trie.exact_match(query);
assert_eq!(result, expected_match);
}
)*
}
}

parameterized_tests! {
t1: ("a", Some(0)),
t2: ("app", Some(1)),
t3: ("apple", Some(2)),
t4: ("application", Some(4)),
t5: ("better", Some(3)),
t6: ("アップル🍎", Some(5)),
t7: ("appl", None),
t8: ("appler", None),
}
}

mod is_prefix_tests {
macro_rules! parameterized_tests {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let (query, expected_match) = $value;
let trie = super::build_trie();
let result = trie.is_prefix(query);
assert_eq!(result, expected_match);
}
)*
}
}

parameterized_tests! {
t1: ("a", true),
t2: ("app", true),
t3: ("apple", false),
t4: ("application", false),
t5: ("better", false),
t6: ("アップル🍎", false),
t7: ("appl", true),
t8: ("appler", false),
t9: ("アップル", true),
}
}

mod predictive_search_tests {
macro_rules! parameterized_tests {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let (query, expected_results) = $value;
let trie = super::build_trie();
let results = trie.predictive_search(query);
let expected_results: Vec<(Vec<u8>, u8)> = expected_results.iter().map(|s| (s.0.as_bytes().to_vec(), s.1)).collect();
assert_eq!(results, expected_results);
}
)*
}
}

parameterized_tests! {
t1: ("a", vec![("a", 0), ("app", 1), ("apple", 2), ("application", 4)]),
t2: ("app", vec![("app", 1), ("apple", 2), ("application", 4)]),
t3: ("appl", vec![("apple", 2), ("application", 4)]),
t4: ("apple", vec![("apple", 2)]),
t5: ("b", vec![("better", 3)]),
t6: ("c", Vec::<(&str, u8)>::new()),
t7: ("アップ", vec![("アップル🍎", 5)]),
}
}

mod common_prefix_search_tests {
macro_rules! parameterized_tests {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let (query, expected_results) = $value;
let trie = super::build_trie();
let results = trie.common_prefix_search(query);
let expected_results: Vec<(Vec<u8>, u8)> = expected_results.iter().map(|s| (s.0.as_bytes().to_vec(), s.1)).collect();
assert_eq!(results, expected_results);
}
)*
}
}

parameterized_tests! {
t1: ("a", vec![("a", 0)]),
t2: ("ap", vec![("a", 0)]),
t3: ("appl", vec![("a", 0), ("app", 1)]),
t4: ("appler", vec![("a", 0), ("app", 1), ("apple", 2)]),
t5: ("bette", Vec::<(&str, u8)>::new()),
t6: ("betterment", vec![("better", 3)]),
t7: ("c", Vec::<(&str, u8)>::new()),
t8: ("アップル🍎🍏", vec![("アップル🍎", 5)]),
}
}
}
2 changes: 2 additions & 0 deletions src/trie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ use louds_rs::Louds;
pub mod trie;
pub mod trie_builder;

/// A trie for sequences of the type `Label`.
pub struct Trie<Label> {
louds: Louds,

/// (LoudsNodeNum - 2) -> TrieLabel
trie_labels: Vec<TrieLabel<Label>>,
}

/// A trie builder for [Trie].
pub struct TrieBuilder<Label> {
naive_trie: NaiveTrie<Label>,
}
Expand Down
Loading

0 comments on commit 5825661

Please sign in to comment.