Skip to content

Commit

Permalink
feat: fuzzy match (gorules#168)
Browse files Browse the repository at this point in the history
  • Loading branch information
stefan-gorules authored May 23, 2024
1 parent bfc8f94 commit 7ce6ceb
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 1 deletion.
1 change: 1 addition & 0 deletions core/expression/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ nohash-hasher = "0.2.0"
thiserror = { workspace = true }
rust_decimal = { workspace = true, features = ["maths-nopanic"] }
rust_decimal_macros = { workspace = true }
strsim = "0.11.1"

[dev-dependencies]
anyhow = { workspace = true }
Expand Down
5 changes: 5 additions & 0 deletions core/expression/src/compiler/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,11 @@ impl<'arena, 'bytecode_ref> CompilerInner<'arena, 'bytecode_ref> {
self.compile_argument(kind, arguments, 1)?;
Ok(self.emit(Opcode::Matches))
}
BuiltInFunction::FuzzyMatch => {
self.compile_argument(kind, arguments, 0)?;
self.compile_argument(kind, arguments, 1)?;
Ok(self.emit(Opcode::FuzzyMatch))
}
BuiltInFunction::Extract => {
self.compile_argument(kind, arguments, 0)?;
self.compile_argument(kind, arguments, 1)?;
Expand Down
1 change: 1 addition & 0 deletions core/expression/src/compiler/opcode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ pub enum Opcode<'a> {
StartsWith,
EndsWith,
Matches,
FuzzyMatch,
Extract,
Slice,
Array,
Expand Down
2 changes: 2 additions & 0 deletions core/expression/src/parser/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub enum BuiltInFunction {
EndsWith,
Matches,
Extract,
FuzzyMatch,

// Math
Abs,
Expand Down Expand Up @@ -88,6 +89,7 @@ impl BuiltInFunction {
BuiltInFunction::EndsWith => Arity::Dual,
BuiltInFunction::Matches => Arity::Dual,
BuiltInFunction::Extract => Arity::Dual,
BuiltInFunction::FuzzyMatch => Arity::Dual,

// Math
BuiltInFunction::Abs => Arity::Single,
Expand Down
1 change: 1 addition & 0 deletions core/expression/src/parser/unary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ impl From<&Node<'_>> for UnaryNodeBehaviour {
BuiltInFunction::StartsWith => AsBoolean,
BuiltInFunction::EndsWith => AsBoolean,
BuiltInFunction::Matches => AsBoolean,
BuiltInFunction::FuzzyMatch => CompareWithReference(Equal),
BuiltInFunction::IsNumeric => AsBoolean,
BuiltInFunction::Keys => CompareWithReference(In),
BuiltInFunction::All => AsBoolean,
Expand Down
45 changes: 44 additions & 1 deletion core/expression/src/vm/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use chrono::{Datelike, Timelike};
use regex::Regex;
#[cfg(feature = "regex-lite")]
use regex_lite::Regex;
use rust_decimal::prelude::ToPrimitive;
use rust_decimal::prelude::{FromPrimitive, ToPrimitive};
use rust_decimal::{Decimal, MathematicalOps};
use rust_decimal_macros::dec;

Expand Down Expand Up @@ -989,6 +989,49 @@ impl<'arena, 'parent_ref, 'bytecode_ref> VMInner<'arena, 'parent_ref, 'bytecode_

self.push(Bool(regex.is_match(a)));
}
Opcode::FuzzyMatch => {
let b = self.pop()?;
let a = self.pop()?;

let String(b) = b else {
return Err(OpcodeErr {
opcode: "FuzzyMatch".into(),
message: "Unsupported type".into(),
});
};

match a {
String(a) => {
let sim = strsim::normalized_damerau_levenshtein(a, b);
// This is okay, as NDL will return [0, 1]
self.push(Number(Decimal::from_f64(sim).unwrap_or(dec!(0))));
}
Array(a) => {
let mut sims = BumpVec::with_capacity_in(a.len(), &self.bump);
for v in a.iter() {
let String(s) = v else {
return Err(OpcodeErr {
opcode: "FuzzyMatch".into(),
message: "Unsupported type".into(),
});
};

let sim =
Decimal::from_f64(strsim::normalized_damerau_levenshtein(s, b))
.unwrap_or(dec!(0));
sims.push(&*self.bump.alloc(Number(sim)));
}

self.push(Array(sims.into_bump_slice()))
}
_ => {
return Err(OpcodeErr {
opcode: "FuzzyMatch".into(),
message: "Unsupported type".into(),
})
}
}
}
Opcode::Extract => {
let b = self.pop()?;
let a = self.pop()?;
Expand Down
4 changes: 4 additions & 0 deletions core/expression/tests/data/standard.csv
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ contains("Hello, World!", "foo");; false
matches("Hello, World!", "H[a-z]+, W[a-z]+!");; true
matches("Hello, World!", "[0-9]+");; false
extract("2022-09-18", "(\d{4})-(\d{2})-(\d{2})");; ["2022-09-18", "2022", "09", "18"]
fuzzyMatch("hello", "hello");; 1
fuzzyMatch("world", "hello");; 0.2
fuzzyMatch(["hello", "world"], "hello");;[1, 0.2]


# String Slice
string[0:5];{string: 'sample_string'};'sample'
Expand Down

0 comments on commit 7ce6ceb

Please sign in to comment.