diff --git a/Cargo.lock b/Cargo.lock index d85f1646771..c58cc2ef80a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -706,7 +706,6 @@ dependencies = [ "nom_locate 4.0.0", "regex", "thiserror", - "unicode-xid 0.2.2", ] [[package]] diff --git a/nextest-filtering/Cargo.toml b/nextest-filtering/Cargo.toml index 314845d80b8..0c0eac4095b 100644 --- a/nextest-filtering/Cargo.toml +++ b/nextest-filtering/Cargo.toml @@ -24,4 +24,3 @@ nom-tracable = "0.8.0" nom_locate = "4.0.0" regex = "1.5.5" thiserror = "1.0.30" -unicode-xid = "0.2.2" diff --git a/nextest-filtering/src/compile.rs b/nextest-filtering/src/compile.rs index cadee9456b0..d98b5e97a85 100644 --- a/nextest-filtering/src/compile.rs +++ b/nextest-filtering/src/compile.rs @@ -7,8 +7,10 @@ use guppy::{ }; use std::collections::HashSet; -use crate::expression::*; -use crate::parsing::{Expr, SetDef}; +use crate::{ + expression::*, + parsing::{Expr, SetDef}, +}; pub(crate) fn compile(expr: &Expr, graph: &PackageGraph) -> FilteringExpr { let in_workspace_packages: Vec<_> = graph diff --git a/nextest-filtering/src/error.rs b/nextest-filtering/src/error.rs index 9060172fdc9..015bc0a83f9 100644 --- a/nextest-filtering/src/error.rs +++ b/nextest-filtering/src/error.rs @@ -16,14 +16,14 @@ pub enum Error { InvalidRegex(#[label("Invalid regex")] SourceSpan), #[error("Expected close regex")] ExpectedCloseRegex(#[label("Missing '/'")] SourceSpan), - #[error("Invalid identifier")] - InvalidIdentifier( - #[label("Identifier can only contain xid_continue characters and ':'")] SourceSpan, - ), - #[error("Expected identifier")] - ExpectedIdentifier(#[label("Missing identifier")] SourceSpan), + #[error("Expected matcher input")] + ExpectedMatcherInput(#[label("Missing matcher content")] SourceSpan), #[error("Unexpected name matcher")] UnexpectedNameMatcher(#[label("This set doesn't take en argument")] SourceSpan), + #[error("Invalid unicode string")] + InvalidUnicodeString(#[label("This is not a valid unicode string")] SourceSpan), + #[error("Expected close string")] + ExpectedCloseQuote(#[label("Missing `'`")] SourceSpan), #[error("Expected open parentheses")] ExpectedOpenParenthesis(#[label("Missing '('")] SourceSpan), #[error("Expected close parentheses")] diff --git a/nextest-filtering/src/expression.rs b/nextest-filtering/src/expression.rs index 01d0b751762..c94b10e30d7 100644 --- a/nextest-filtering/src/expression.rs +++ b/nextest-filtering/src/expression.rs @@ -4,8 +4,10 @@ use guppy::{graph::PackageGraph, PackageId}; use std::{cell::RefCell, collections::HashSet}; -use crate::error::{Error, FilteringExprParsingError, State}; -use crate::parsing::{parse, ParsedExpr, Span}; +use crate::{ + error::{Error, FilteringExprParsingError, State}, + parsing::{parse, ParsedExpr, Span}, +}; /// Matcher for name /// diff --git a/nextest-filtering/src/parsing.rs b/nextest-filtering/src/parsing.rs index 717e525cef1..364e254bdcb 100644 --- a/nextest-filtering/src/parsing.rs +++ b/nextest-filtering/src/parsing.rs @@ -16,17 +16,17 @@ use miette::SourceSpan; use nom::{ branch::alt, - bytes::complete::{tag, take_till, take_until, take_while1}, + bytes::complete::{tag, take_till, take_until}, character::complete::char, combinator::{eof, map, recognize}, multi::{fold_many0, many0}, sequence::{delimited, pair, preceded, terminated}, }; use nom_tracable::tracable_parser; -use unicode_xid::UnicodeXID; -use crate::error::*; -use crate::NameMatcher; +mod unicode_string; + +use crate::{error::*, NameMatcher}; pub type Span<'a> = nom_locate::LocatedSpan<&'a str, State<'a>>; type IResult<'a, T> = nom::IResult, T>; @@ -190,89 +190,78 @@ fn ws<'a, T, P: FnMut(Span<'a>) -> IResult<'a, T>>( } } -fn is_identifier_char(c: char) -> bool { - // This is use for NameMatcher::Contains(_) and NameMatcher::Equal(_) - // The output should be valid part of a test-name or a package name. - c == ':' || c.is_xid_continue() +// This parse will never fail +#[tracable_parser] +fn parse_matcher_text_no_quote(input: Span) -> IResult> { + map( + take_till::<_, _, nom::error::Error>(|c| c == ')'), + |res: Span| Some(res.fragment().trim().to_string()), + )(input) } #[tracable_parser] -fn parse_identifier_part(input: Span) -> IResult> { - let start = input.location_offset(); - match map( - recognize::<_, _, nom::error::Error, _>(take_while1(is_identifier_char)), - |res: Span| res.fragment().to_string(), - )(input.clone()) - { - Ok((i1, res1)) => { - match recognize::<_, _, nom::error::Error, _>(take_till(|c| c == ')'))(i1.clone()) - { - Ok((i, res)) => { - if res.fragment().trim().is_empty() { - Ok((i1, Some(res1))) - } else { - let end = i.location_offset() - start; - let err = Error::InvalidIdentifier((start, end).into()); +fn parse_matcher_text_quote(input: Span) -> IResult> { + let (input, _) = ws(char('\''))(input)?; + + let (i, res) = + match expect(unicode_string::parse_string, Error::InvalidUnicodeString)(input.clone()) { + Ok((i, res)) => (i, res), + Err(nom::Err::Incomplete(_)) => { + // no closing `'` found for this string + match take_till::<_, _, nom::error::Error>(|c| c == ')')(input.clone()) { + Ok((i, _)) => { + let start = i.location_offset(); + let err = Error::ExpectedCloseQuote((start, 0).into()); i.extra.report_error(err); - Ok((i, None)) + return Ok((i, None)); } + Err(_) => unreachable!(), } - Err(_) => unreachable!(), } - } - Err(_) => { - match recognize::<_, _, nom::error::Error, _>(take_till(|c| c == ')'))(input) { - Ok((i, res)) => { - let end = i.location_offset() - start; - let err = if res.fragment().trim().is_empty() { - Error::ExpectedIdentifier((start, end).into()) - } else { - Error::InvalidIdentifier((start, end).into()) - }; - i.extra.report_error(err); - Ok((i, None)) - } - Err(_) => unreachable!(), - } - } - } + Err(_) => unreachable!(), + }; + + // by construction this can not fail + let (i, _) = char::<_, nom::error::Error>('\'')(i).unwrap(); + Ok((i, res)) +} + +// This parse will never fail (because parse_matcher_text_no_quote never fails) +#[tracable_parser] +fn parse_matcher_text(input: Span) -> IResult> { + alt((parse_matcher_text_quote, parse_matcher_text_no_quote))(input) } // This parse will never fail #[tracable_parser] fn parse_contains_matcher(input: Span) -> IResult> { - ws(map(parse_identifier_part, |res: Option| { + map(parse_matcher_text, |res: Option| { res.map(NameMatcher::Contains) - }))(input) + })(input) } #[tracable_parser] fn parse_equal_matcher(input: Span) -> IResult> { ws(map( - preceded(char('='), ws(parse_identifier_part)), + preceded(char('='), parse_matcher_text), |res: Option| res.map(NameMatcher::Equal), ))(input) } #[tracable_parser] fn parse_regex_(input: Span) -> IResult> { - let (i, res) = - match recognize::<_, _, nom::error::Error, _>(take_until("/"))(input.clone()) { - Ok((i, res)) => (i, res), - Err(_) => { - match recognize::<_, _, nom::error::Error, _>(take_till(|c| c == ')'))( - input.clone(), - ) { - Ok((i, res)) => { - let start = i.location_offset(); - let err = Error::ExpectedCloseRegex((start, 0).into()); - i.extra.report_error(err); - (i, res) - } - Err(_) => return Ok((input, None)), - } + let (i, res) = match take_until::<_, _, nom::error::Error>("/")(input.clone()) { + Ok((i, res)) => (i, res), + Err(_) => match take_till::<_, _, nom::error::Error>(|c| c == ')')(input.clone()) { + Ok((i, _)) => { + let start = i.location_offset(); + let err = Error::ExpectedCloseRegex((start, 0).into()); + i.extra.report_error(err); + return Ok((i, None)); } - }; + Err(_) => unreachable!(), + }, + }; match regex::Regex::new(res.fragment()).map(NameMatcher::Regex) { Ok(res) => Ok((i, Some(res))), _ => { @@ -462,6 +451,31 @@ mod tests { ); } + #[test] + fn test_parse_name_matcher_quote() { + assert_eq!( + SetDef::Test(NameMatcher::Contains("something".to_string())), + parse_set("test('something')") + ); + assert_eq!( + SetDef::Test(NameMatcher::Equal("something".to_string())), + parse_set("test(='something')") + ); + + assert_eq!( + SetDef::Test(NameMatcher::Contains(r"some'thing".to_string())), + parse_set(r"test('some\'thing')") + ); + assert_eq!( + SetDef::Test(NameMatcher::Contains(r"some(thing)".to_string())), + parse_set(r"test('some(thing)')") + ); + assert_eq!( + SetDef::Test(NameMatcher::Contains(r"some U".to_string())), + parse_set(r"test('some \u{55}')") + ); + } + #[test] fn test_parse_set_def() { assert_eq!(SetDef::All, parse_set("all()")); @@ -625,15 +639,6 @@ mod tests { assert_error!(error, ExpectedCloseRegex, 12, 0); } - #[test] - fn test_invalid_identifier() { - let src = "package(a aa)"; - let mut errors = parse_err(src); - assert_eq!(1, errors.len()); - let error = errors.remove(0); - assert_error!(error, InvalidIdentifier, 8, 4); - } - #[test] fn test_unexpected_argument() { let src = "all(aaa)"; @@ -665,12 +670,19 @@ mod tests { fn test_complex_error() { let src = "all) + package(/not) - deps(expr none)"; let mut errors = parse_err(src); - assert_eq!(3, errors.len(), "{:?}", errors); + assert_eq!(2, errors.len(), "{:?}", errors); let error = errors.remove(0); assert_error!(error, ExpectedOpenParenthesis, 3, 0); let error = errors.remove(0); assert_error!(error, ExpectedCloseRegex, 19, 0); + } + + #[test] + fn test_missing_string_close() { + let src = "test('thing)"; + let mut errors = parse_err(src); + assert_eq!(1, errors.len(), "{:?}", errors); let error = errors.remove(0); - assert_error!(error, InvalidIdentifier, 28, 9); + assert_error!(error, ExpectedCloseQuote, 11, 0); } } diff --git a/nextest-filtering/src/parsing/unicode_string.rs b/nextest-filtering/src/parsing/unicode_string.rs new file mode 100644 index 00000000000..5e9c8c165f9 --- /dev/null +++ b/nextest-filtering/src/parsing/unicode_string.rs @@ -0,0 +1,118 @@ +// Copyright (c) The nextest Contributors +// SPDX-License-Identifier: MIT OR Apache-2.0 + +// Adapted from https://github.com/Geal/nom/blob/294ffb3d9e0ade2c3b7ddfff52484b6d643dcce1/examples/string.rs + +use nom::{ + branch::alt, + bytes::streaming::{is_not, take_while_m_n}, + character::streaming::{char, multispace1}, + combinator::{map, map_opt, map_res, value, verify}, + multi::fold_many0, + sequence::{delimited, preceded}, + Slice, +}; +use nom_tracable::tracable_parser; + +use super::{IResult, Span}; + +fn run_str_parser<'a, T, I>(mut inner: I) -> impl FnMut(Span<'a>) -> IResult<'a, T> +where + I: FnMut(&'a str) -> nom::IResult<&'a str, T>, +{ + move |input| match inner(input.fragment()) { + Ok((i, res)) => { + let eaten = input.fragment().len() - i.len(); + Ok((input.slice(eaten..), res)) + } + Err(nom::Err::Error(err)) => { + let nom::error::Error { input: i, code } = err; + let eaten = input.fragment().len() - i.len(); + let err = nom::error::Error { + input: input.slice(eaten..), + code, + }; + Err(nom::Err::Error(err)) + } + Err(nom::Err::Failure(err)) => { + let nom::error::Error { input: i, code } = err; + let eaten = input.fragment().len() - i.len(); + let err = nom::error::Error { + input: input.slice(eaten..), + code, + }; + Err(nom::Err::Failure(err)) + } + Err(nom::Err::Incomplete(err)) => Err(nom::Err::Incomplete(err)), + } +} + +#[tracable_parser] +fn parse_unicode(input: Span) -> IResult { + let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()); + let parse_delimited_hex = preceded(char('u'), delimited(char('{'), parse_hex, char('}'))); + let parse_u32 = map_res(parse_delimited_hex, |hex| u32::from_str_radix(hex, 16)); + run_str_parser(map_opt(parse_u32, std::char::from_u32))(input) +} + +#[tracable_parser] +fn parse_escaped_char(input: Span) -> IResult { + preceded( + char('\\'), + alt(( + parse_unicode, + value('\n', char('n')), + value('\r', char('r')), + value('\t', char('t')), + value('\u{08}', char('b')), + value('\u{0C}', char('f')), + value('\\', char('\\')), + value('/', char('/')), + value('\'', char('\'')), + )), + )(input) +} + +#[tracable_parser] +fn parse_escaped_whitespace(input: Span) -> IResult { + preceded(char('\\'), multispace1)(input) +} + +#[tracable_parser] +fn parse_literal(input: Span) -> IResult { + let not_quote_slash = is_not("\'\\"); + verify(not_quote_slash, |s: &Span| !s.fragment().is_empty())(input) +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum StringFragment<'a> { + Literal(&'a str), + EscapedChar(char), + EscapedWS, +} + +#[tracable_parser] +fn parse_fragment(input: Span) -> IResult> { + alt(( + map(parse_literal, |span| { + StringFragment::Literal(span.fragment()) + }), + map(parse_escaped_char, StringFragment::EscapedChar), + value(StringFragment::EscapedWS, parse_escaped_whitespace), + ))(input) +} + +/// Construct a string by consuming the input until the next unescaped `'` +/// +/// Return Err(Incomplete(1)) if not ending `'` is found +#[tracable_parser] +pub(super) fn parse_string(input: Span) -> IResult { + fold_many0(parse_fragment, String::new, |mut string, fragment| { + match fragment { + StringFragment::Literal(s) => string.push_str(s), + StringFragment::EscapedChar(c) => string.push(c), + StringFragment::EscapedWS => {} + } + string + })(input) +} diff --git a/site/src/book/filtering-expression.md b/site/src/book/filtering-expression.md index 78fcb488f8c..2c84d578f03 100644 --- a/site/src/book/filtering-expression.md +++ b/site/src/book/filtering-expression.md @@ -20,9 +20,15 @@ Basic sets: Name matcher: - `text`: match anything containing `text` +- `'unicode_text'`: match anything containing `unicode_text` - `=text`: match anything equal to `text` +- `='unicode_text'`: match anything equal to `unicode_text` - `/reg/`: match anything matching the regex `reg` +Unicode string: +- can contains escaped quote: `\'` +- can contains unicode sequence: `\u{xxx}` (where `xxx` is an 1 to 6 digits hexadecimal number) + Operations: - `set_1 & set_2` , `set_1 and set_2`: include everything in both `set_1` and `set_2` - `set_1 | set_2`, `set_1 or set_2`, `set_1 + set_2`: include everything in either `set_1` or `set_2`