Skip to content

Commit

Permalink
lite: fix stack overflow in NFA compiler
Browse files Browse the repository at this point in the history
This commit fixes a bug where the parser could produce a very deeply
nested Hir value beyond the configured nested limit. This was caused by
the fact that the Hir can have some of its nested structures added to it
without a corresponding recursive call in the parser. For example,
repetition operators. This means that even if we don't blow the nest
limit in the parser, the Hir itself can still become nested beyond the
limit. This in turn will make it possible to unintentionally overflow
the stack in subsequent recursion over the Hir value, such as in the
Thompson NFA compiler.

We fix this by checking the nesting limit both on every recursive parse
call and also on the depth of the final Hir value once parsing is
finished but before it has returned to the caller.

Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608
  • Loading branch information
BurntSushi committed Oct 14, 2023
1 parent 5dff4bd commit 466e42c
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 5 deletions.
Binary file not shown.
60 changes: 55 additions & 5 deletions regex-lite/src/hir/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,24 @@ impl<'a> Parser<'a> {
/// own routine.
impl<'a> Parser<'a> {
pub(super) fn parse(&self) -> Result<Hir, Error> {
let hir = self.parse_inner()?;
// While we also check nesting during parsing, that only checks the
// number of recursive parse calls. It does not necessarily cover
// all possible recursive nestings of the Hir itself. For example,
// repetition operators don't require recursive parse calls. So one
// can stack them arbitrarily without overflowing the stack in the
// *parser*. But then if one recurses over the resulting Hir, a stack
// overflow is possible. So here we check the Hir nesting level
// thoroughly to ensure it isn't nested too deeply.
//
// Note that we do still need the nesting limit check in the parser as
// well, since that will avoid overflowing the stack during parse time
// before the complete Hir value is constructed.
check_hir_nesting(&hir, self.config.nest_limit)?;
Ok(hir)
}

fn parse_inner(&self) -> Result<Hir, Error> {
let depth = self.increment_depth()?;
let mut alternates = vec![];
let mut concat = vec![];
Expand Down Expand Up @@ -806,7 +824,7 @@ impl<'a> Parser<'a> {
if self.bump_if("?P<") || self.bump_if("?<") {
let index = self.next_capture_index()?;
let name = Some(Box::from(self.parse_capture_name()?));
let sub = Box::new(self.parse()?);
let sub = Box::new(self.parse_inner()?);
let cap = hir::Capture { index, name, sub };
Ok(Some(Hir::capture(cap)))
} else if self.bump_if("?") {
Expand All @@ -826,11 +844,11 @@ impl<'a> Parser<'a> {
} else {
assert_eq!(':', self.char());
self.bump();
self.parse().map(Some)
self.parse_inner().map(Some)
}
} else {
let index = self.next_capture_index()?;
let sub = Box::new(self.parse()?);
let sub = Box::new(self.parse_inner()?);
let cap = hir::Capture { index, name: None, sub };
Ok(Some(Hir::capture(cap)))
}
Expand Down Expand Up @@ -1263,6 +1281,38 @@ impl<'a> Parser<'a> {
}
}

/// This checks the depth of the given `Hir` value, and if it exceeds the given
/// limit, then an error is returned.
fn check_hir_nesting(hir: &Hir, limit: u32) -> Result<(), Error> {
fn recurse(hir: &Hir, limit: u32, depth: u32) -> Result<(), Error> {
if depth > limit {
return Err(Error::new(ERR_TOO_MUCH_NESTING));
}
let Some(next_depth) = depth.checked_add(1) else {
return Err(Error::new(ERR_TOO_MUCH_NESTING));
};
match *hir.kind() {
HirKind::Empty
| HirKind::Char(_)
| HirKind::Class(_)
| HirKind::Look(_) => Ok(()),
HirKind::Repetition(hir::Repetition { ref sub, .. }) => {
recurse(sub, limit, next_depth)
}
HirKind::Capture(hir::Capture { ref sub, .. }) => {
recurse(sub, limit, next_depth)
}
HirKind::Concat(ref subs) | HirKind::Alternation(ref subs) => {
for sub in subs.iter() {
recurse(sub, limit, next_depth)?;
}
Ok(())
}
}
}
recurse(hir, limit, 0)
}

/// Converts the given Hir to a literal char if the Hir is just a single
/// character. Otherwise this returns an error.
///
Expand Down Expand Up @@ -1344,12 +1394,12 @@ mod tests {
use super::*;

fn p(pattern: &str) -> Hir {
Parser::new(Config::default(), pattern).parse().unwrap()
Parser::new(Config::default(), pattern).parse_inner().unwrap()
}

fn perr(pattern: &str) -> String {
Parser::new(Config::default(), pattern)
.parse()
.parse_inner()
.unwrap_err()
.to_string()
}
Expand Down
17 changes: 17 additions & 0 deletions regex-lite/tests/fuzz/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,23 @@ fn captures_wrong_order_min() {
let _ = run(data);
}

// Simpler regression test from a failure found by OSS-fuzz[1]. This test,
// when it failed, caused a stack overflow. We fixed it by adding another nest
// check on the Hir value itself, since the Hir type can have depth added to
// it without recursive calls in the parser (which is where the existing nest
// check was).
//
// Many thanks to Addison Crump for coming up with this test case[2].
//
// [1]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608
// [2]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=60608#c1
#[test]
fn many_zero_to_many_reps() {
let pat = format!(".{}", "*".repeat(1 << 15));
let Ok(re) = regex_lite::RegexBuilder::new(&pat).build() else { return };
re.is_match("");
}

// This is the fuzz target function. We duplicate it here since this is the
// thing we use to interpret the data. It is ultimately what we want to
// succeed.
Expand Down

0 comments on commit 466e42c

Please sign in to comment.