Skip to content

Commit

Permalink
Add support for mixed BigQuery table name quoting (#971)
Browse files Browse the repository at this point in the history
Co-authored-by: ifeanyi <ifeanyi@validio.io>
  • Loading branch information
iffyio and iffyio authored Oct 2, 2023
1 parent 6ffc3b3 commit 993769e
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 5 deletions.
21 changes: 21 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5041,6 +5041,27 @@ impl<'a> Parser<'a> {
break;
}
}

// BigQuery accepts any number of quoted identifiers of a table name.
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers
if dialect_of!(self is BigQueryDialect)
&& idents.iter().any(|ident| ident.value.contains('.'))
{
idents = idents
.into_iter()
.flat_map(|ident| {
ident
.value
.split('.')
.map(|value| Ident {
value: value.into(),
quote_style: ident.quote_style,
})
.collect::<Vec<_>>()
})
.collect()
}

Ok(ObjectName(idents))
}

Expand Down
18 changes: 18 additions & 0 deletions src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,24 @@ impl TestedDialects {
}
}

/// Ensures that `sql` parses as a single [`Select`], and that additionally:
///
/// 1. parsing `sql` results in the same [`Statement`] as parsing
/// `canonical`.
///
/// 2. re-serializing the result of parsing `sql` produces the same
/// `canonical` sql string
pub fn verified_only_select_with_canonical(&self, query: &str, canonical: &str) -> Select {
let q = match self.one_statement_parses_to(query, canonical) {
Statement::Query(query) => *query,
_ => panic!("Expected Query"),
};
match *q.body {
SetExpr::Select(s) => *s,
_ => panic!("Expected SetExpr::Select"),
}
}

/// Ensures that `sql` parses as an [`Expr`], and that
/// re-serializing the parse result produces the same `sql`
/// string (is not modified after a serialization round-trip).
Expand Down
95 changes: 90 additions & 5 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#[macro_use]
mod test_utils;

use std::ops::Deref;

use sqlparser::ast::*;
use sqlparser::dialect::{BigQueryDialect, GenericDialect};
use test_utils::*;
Expand Down Expand Up @@ -84,9 +86,24 @@ fn parse_raw_literal() {

#[test]
fn parse_table_identifiers() {
fn test_table_ident(ident: &str, expected: Vec<Ident>) {
/// Parses a table identifier ident and verifies that re-serializing the
/// parsed identifier produces the original ident string.
///
/// In some cases, re-serializing the result of the parsed ident is not
/// expected to produce the original ident string. canonical is provided
/// instead as the canonical representation of the identifier for comparison.
/// For example, re-serializing the result of ident `foo.bar` produces
/// the equivalent canonical representation `foo`.`bar`
fn test_table_ident(ident: &str, canonical: Option<&str>, expected: Vec<Ident>) {
let sql = format!("SELECT 1 FROM {ident}");
let select = bigquery().verified_only_select(&sql);
let canonical = canonical.map(|ident| format!("SELECT 1 FROM {ident}"));

let select = if let Some(canonical) = canonical {
bigquery().verified_only_select_with_canonical(&sql, canonical.deref())
} else {
bigquery().verified_only_select(&sql)
};

assert_eq!(
select.from,
vec![TableWithJoins {
Expand All @@ -102,47 +119,115 @@ fn parse_table_identifiers() {
},]
);
}

fn test_table_ident_err(ident: &str) {
let sql = format!("SELECT 1 FROM {ident}");
assert!(bigquery().parse_sql_statements(&sql).is_err());
}

test_table_ident("da-sh-es", vec![Ident::new("da-sh-es")]);
test_table_ident("da-sh-es", None, vec![Ident::new("da-sh-es")]);

test_table_ident("`spa ce`", vec![Ident::with_quote('`', "spa ce")]);
test_table_ident("`spa ce`", None, vec![Ident::with_quote('`', "spa ce")]);

test_table_ident(
"`!@#$%^&*()-=_+`",
None,
vec![Ident::with_quote('`', "!@#$%^&*()-=_+")],
);

test_table_ident(
"_5abc.dataField",
None,
vec![Ident::new("_5abc"), Ident::new("dataField")],
);
test_table_ident(
"`5abc`.dataField",
None,
vec![Ident::with_quote('`', "5abc"), Ident::new("dataField")],
);

test_table_ident_err("5abc.dataField");

test_table_ident(
"abc5.dataField",
None,
vec![Ident::new("abc5"), Ident::new("dataField")],
);

test_table_ident_err("abc5!.dataField");

test_table_ident(
"`GROUP`.dataField",
None,
vec![Ident::with_quote('`', "GROUP"), Ident::new("dataField")],
);

// TODO: this should be error
// test_table_ident_err("GROUP.dataField");

test_table_ident("abc5.GROUP", vec![Ident::new("abc5"), Ident::new("GROUP")]);
test_table_ident(
"abc5.GROUP",
None,
vec![Ident::new("abc5"), Ident::new("GROUP")],
);

test_table_ident(
"`foo.bar.baz`",
Some("`foo`.`bar`.`baz`"),
vec![
Ident::with_quote('`', "foo"),
Ident::with_quote('`', "bar"),
Ident::with_quote('`', "baz"),
],
);

test_table_ident(
"`foo.bar`.`baz`",
Some("`foo`.`bar`.`baz`"),
vec![
Ident::with_quote('`', "foo"),
Ident::with_quote('`', "bar"),
Ident::with_quote('`', "baz"),
],
);

test_table_ident(
"`foo`.`bar.baz`",
Some("`foo`.`bar`.`baz`"),
vec![
Ident::with_quote('`', "foo"),
Ident::with_quote('`', "bar"),
Ident::with_quote('`', "baz"),
],
);

test_table_ident(
"`foo`.`bar`.`baz`",
Some("`foo`.`bar`.`baz`"),
vec![
Ident::with_quote('`', "foo"),
Ident::with_quote('`', "bar"),
Ident::with_quote('`', "baz"),
],
);

test_table_ident(
"`5abc.dataField`",
Some("`5abc`.`dataField`"),
vec![
Ident::with_quote('`', "5abc"),
Ident::with_quote('`', "dataField"),
],
);

test_table_ident(
"`_5abc.da-sh-es`",
Some("`_5abc`.`da-sh-es`"),
vec![
Ident::with_quote('`', "_5abc"),
Ident::with_quote('`', "da-sh-es"),
],
);
}

#[test]
Expand Down

0 comments on commit 993769e

Please sign in to comment.