Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: show location info in parse errors #958

Merged
merged 2 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 66 additions & 35 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ pub enum ParserError {

// Use `Parser::expected` instead, if possible
macro_rules! parser_err {
($MSG:expr) => {
Err(ParserError::ParserError($MSG.to_string()))
($MSG:expr, $loc:expr) => {
Err(ParserError::ParserError(format!("{}{}", $MSG, $loc)))
};
}

Expand Down Expand Up @@ -368,8 +368,8 @@ impl<'a> Parser<'a> {
debug!("Parsing sql '{}'...", sql);
let tokens = Tokenizer::new(self.dialect, sql)
.with_unescape(self.options.unescape)
.tokenize()?;
Ok(self.with_tokens(tokens))
.tokenize_with_location()?;
Ok(self.with_tokens_with_locations(tokens))
}

/// Parse potentially multiple statements
Expand Down Expand Up @@ -724,6 +724,7 @@ impl<'a> Parser<'a> {
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
// expression that should parse as the column name "date".
let loc = self.peek_token().location;
return_ok_if_some!(self.maybe_parse(|parser| {
match parser.parse_data_type()? {
DataType::Interval => parser.parse_interval(),
Expand All @@ -734,7 +735,7 @@ impl<'a> Parser<'a> {
// name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of
// an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the
// `type 'string'` syntax for the custom data types at all.
DataType::Custom(..) => parser_err!("dummy"),
DataType::Custom(..) => parser_err!("dummy", loc),
data_type => Ok(Expr::TypedString {
data_type,
value: parser.parse_literal_string()?,
Expand Down Expand Up @@ -909,7 +910,12 @@ impl<'a> Parser<'a> {
let tok = self.next_token();
let key = match tok.token {
Token::Word(word) => word.to_ident(),
_ => return parser_err!(format!("Expected identifier, found: {tok}")),
_ => {
return parser_err!(
format!("Expected identifier, found: {tok}"),
tok.location
)
}
};
Ok(Expr::CompositeAccess {
expr: Box::new(expr),
Expand Down Expand Up @@ -1220,7 +1226,10 @@ impl<'a> Parser<'a> {
r#in: Box::new(from),
})
} else {
parser_err!("Position function must include IN keyword".to_string())
parser_err!(
"Position function must include IN keyword".to_string(),
self.peek_token().location
)
}
}

Expand Down Expand Up @@ -1884,7 +1893,10 @@ impl<'a> Parser<'a> {
}
}
// Can only happen if `get_next_precedence` got out of sync with this function
_ => parser_err!(format!("No infix parser for token {:?}", tok.token)),
_ => parser_err!(
format!("No infix parser for token {:?}", tok.token),
tok.location
),
}
} else if Token::DoubleColon == tok {
self.parse_pg_cast(expr)
Expand Down Expand Up @@ -1935,7 +1947,10 @@ impl<'a> Parser<'a> {
})
} else {
// Can only happen if `get_next_precedence` got out of sync with this function
parser_err!(format!("No infix parser for token {:?}", tok.token))
parser_err!(
format!("No infix parser for token {:?}", tok.token),
tok.location
)
}
}

Expand Down Expand Up @@ -2213,7 +2228,10 @@ impl<'a> Parser<'a> {

/// Report unexpected token
pub fn expected<T>(&self, expected: &str, found: TokenWithLocation) -> Result<T, ParserError> {
parser_err!(format!("Expected {expected}, found: {found}"))
parser_err!(
format!("Expected {expected}, found: {found}"),
found.location
)
}

/// Look for an expected keyword and consume it if it exists
Expand Down Expand Up @@ -2378,13 +2396,14 @@ impl<'a> Parser<'a> {
/// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns `None` if `ALL` is parsed
/// and results in a `ParserError` if both `ALL` and `DISTINCT` are found.
pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
let loc = self.peek_token().location;
let all = self.parse_keyword(Keyword::ALL);
let distinct = self.parse_keyword(Keyword::DISTINCT);
if !distinct {
return Ok(None);
}
if all {
return parser_err!("Cannot specify both ALL and DISTINCT".to_string());
return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc);
}
let on = self.parse_keyword(Keyword::ON);
if !on {
Expand Down Expand Up @@ -2986,74 +3005,78 @@ impl<'a> Parser<'a> {
let mut admin = vec![];

while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) {
let loc = self
.tokens
.get(self.index - 1)
.map_or(Location { line: 0, column: 0 }, |t| t.location);
match keyword {
Keyword::AUTHORIZATION => {
if authorization_owner.is_some() {
parser_err!("Found multiple AUTHORIZATION")
parser_err!("Found multiple AUTHORIZATION", loc)
} else {
authorization_owner = Some(self.parse_object_name()?);
Ok(())
}
}
Keyword::LOGIN | Keyword::NOLOGIN => {
if login.is_some() {
parser_err!("Found multiple LOGIN or NOLOGIN")
parser_err!("Found multiple LOGIN or NOLOGIN", loc)
} else {
login = Some(keyword == Keyword::LOGIN);
Ok(())
}
}
Keyword::INHERIT | Keyword::NOINHERIT => {
if inherit.is_some() {
parser_err!("Found multiple INHERIT or NOINHERIT")
parser_err!("Found multiple INHERIT or NOINHERIT", loc)
} else {
inherit = Some(keyword == Keyword::INHERIT);
Ok(())
}
}
Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => {
if bypassrls.is_some() {
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS")
parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc)
} else {
bypassrls = Some(keyword == Keyword::BYPASSRLS);
Ok(())
}
}
Keyword::CREATEDB | Keyword::NOCREATEDB => {
if create_db.is_some() {
parser_err!("Found multiple CREATEDB or NOCREATEDB")
parser_err!("Found multiple CREATEDB or NOCREATEDB", loc)
} else {
create_db = Some(keyword == Keyword::CREATEDB);
Ok(())
}
}
Keyword::CREATEROLE | Keyword::NOCREATEROLE => {
if create_role.is_some() {
parser_err!("Found multiple CREATEROLE or NOCREATEROLE")
parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc)
} else {
create_role = Some(keyword == Keyword::CREATEROLE);
Ok(())
}
}
Keyword::SUPERUSER | Keyword::NOSUPERUSER => {
if superuser.is_some() {
parser_err!("Found multiple SUPERUSER or NOSUPERUSER")
parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc)
} else {
superuser = Some(keyword == Keyword::SUPERUSER);
Ok(())
}
}
Keyword::REPLICATION | Keyword::NOREPLICATION => {
if replication.is_some() {
parser_err!("Found multiple REPLICATION or NOREPLICATION")
parser_err!("Found multiple REPLICATION or NOREPLICATION", loc)
} else {
replication = Some(keyword == Keyword::REPLICATION);
Ok(())
}
}
Keyword::PASSWORD => {
if password.is_some() {
parser_err!("Found multiple PASSWORD")
parser_err!("Found multiple PASSWORD", loc)
} else {
password = if self.parse_keyword(Keyword::NULL) {
Some(Password::NullPassword)
Expand All @@ -3066,7 +3089,7 @@ impl<'a> Parser<'a> {
Keyword::CONNECTION => {
self.expect_keyword(Keyword::LIMIT)?;
if connection_limit.is_some() {
parser_err!("Found multiple CONNECTION LIMIT")
parser_err!("Found multiple CONNECTION LIMIT", loc)
} else {
connection_limit = Some(Expr::Value(self.parse_number_value()?));
Ok(())
Expand All @@ -3075,7 +3098,7 @@ impl<'a> Parser<'a> {
Keyword::VALID => {
self.expect_keyword(Keyword::UNTIL)?;
if valid_until.is_some() {
parser_err!("Found multiple VALID UNTIL")
parser_err!("Found multiple VALID UNTIL", loc)
} else {
valid_until = Some(Expr::Value(self.parse_value()?));
Ok(())
Expand All @@ -3084,14 +3107,14 @@ impl<'a> Parser<'a> {
Keyword::IN => {
if self.parse_keyword(Keyword::ROLE) {
if !in_role.is_empty() {
parser_err!("Found multiple IN ROLE")
parser_err!("Found multiple IN ROLE", loc)
} else {
in_role = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
}
} else if self.parse_keyword(Keyword::GROUP) {
if !in_group.is_empty() {
parser_err!("Found multiple IN GROUP")
parser_err!("Found multiple IN GROUP", loc)
} else {
in_group = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
Expand All @@ -3102,23 +3125,23 @@ impl<'a> Parser<'a> {
}
Keyword::ROLE => {
if !role.is_empty() {
parser_err!("Found multiple ROLE")
parser_err!("Found multiple ROLE", loc)
} else {
role = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
}
}
Keyword::USER => {
if !user.is_empty() {
parser_err!("Found multiple USER")
parser_err!("Found multiple USER", loc)
} else {
user = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
}
}
Keyword::ADMIN => {
if !admin.is_empty() {
parser_err!("Found multiple ADMIN")
parser_err!("Found multiple ADMIN", loc)
} else {
admin = self.parse_comma_separated(Parser::parse_identifier)?;
Ok(())
Expand Down Expand Up @@ -3181,14 +3204,19 @@ impl<'a> Parser<'a> {
// specifying multiple objects to delete in a single statement
let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]);
let names = self.parse_comma_separated(Parser::parse_object_name)?;

let loc = self.peek_token().location;
let cascade = self.parse_keyword(Keyword::CASCADE);
let restrict = self.parse_keyword(Keyword::RESTRICT);
let purge = self.parse_keyword(Keyword::PURGE);
if cascade && restrict {
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP");
return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc);
}
if object_type == ObjectType::Role && (cascade || restrict || purge) {
return parser_err!("Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE");
return parser_err!(
"Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE",
loc
);
}
Ok(Statement::Drop {
object_type,
Expand Down Expand Up @@ -4446,7 +4474,11 @@ impl<'a> Parser<'a> {
fn parse_literal_char(&mut self) -> Result<char, ParserError> {
let s = self.parse_literal_string()?;
if s.len() != 1 {
return parser_err!(format!("Expect a char, found {s:?}"));
let loc = self
.tokens
.get(self.index - 1)
.map_or(Location { line: 0, column: 0 }, |t| t.location);
return parser_err!(format!("Expect a char, found {s:?}"), loc);
}
Ok(s.chars().next().unwrap())
}
Expand Down Expand Up @@ -4525,7 +4557,7 @@ impl<'a> Parser<'a> {
// (i.e., it returns the input string).
Token::Number(ref n, l) => match n.parse() {
Ok(n) => Ok(Value::Number(n, l)),
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}")),
Err(e) => parser_err!(format!("Could not parse '{n}' as number: {e}"), location),
},
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
Expand Down Expand Up @@ -6465,10 +6497,11 @@ impl<'a> Parser<'a> {
.parse_keywords(&[Keyword::GRANTED, Keyword::BY])
.then(|| self.parse_identifier().unwrap());

let loc = self.peek_token().location;
let cascade = self.parse_keyword(Keyword::CASCADE);
let restrict = self.parse_keyword(Keyword::RESTRICT);
if cascade && restrict {
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE");
return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc);
}

Ok(Statement::Revoke {
Expand Down Expand Up @@ -7929,14 +7962,12 @@ mod tests {

#[test]
fn test_parser_error_loc() {
// TODO: Once we thread token locations through the parser, we should update this
// test to assert the locations of the referenced token
let sql = "SELECT this is a syntax error";
let ast = Parser::parse_sql(&GenericDialect, sql);
assert_eq!(
ast,
Err(ParserError::ParserError(
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a"
"Expected [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column 16"
.to_string()
))
);
Expand Down
8 changes: 7 additions & 1 deletion src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use core::fmt::Debug;

use crate::dialect::*;
use crate::parser::{Parser, ParserError};
use crate::tokenizer::Tokenizer;
use crate::{ast::*, parser::ParserOptions};

/// Tests use the methods on this struct to invoke the parser on one or
Expand Down Expand Up @@ -82,8 +83,13 @@ impl TestedDialects {
/// the result is the same for all tested dialects.
pub fn parse_sql_statements(&self, sql: &str) -> Result<Vec<Statement>, ParserError> {
self.one_of_identical_results(|dialect| {
let mut tokenizer = Tokenizer::new(dialect, sql);
if let Some(options) = &self.options {
tokenizer = tokenizer.with_unescape(options.unescape);
}
let tokens = tokenizer.tokenize()?;
self.new_parser(dialect)
.try_with_sql(sql)?
.with_tokens(tokens)
.parse_statements()
})
// To fail the `ensure_multiple_dialects_are_tested` test:
Expand Down
Loading
Loading