From c68e9775a22acf00e54b33542b10ac6d1a8cf887 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Fri, 20 Oct 2023 20:33:12 +0200 Subject: [PATCH] Support bigquery `CAST AS x [STRING|DATE] FORMAT` syntax (#978) --- src/ast/mod.rs | 64 ++++++++++++++++++++++++++++++++++-- src/parser/mod.rs | 23 +++++++++++++ tests/sqlparser_bigquery.rs | 35 ++++++++++++++++++-- tests/sqlparser_common.rs | 10 ++++++ tests/sqlparser_postgres.rs | 3 +- tests/sqlparser_snowflake.rs | 1 + 6 files changed, 130 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 87f7ebb37..fc15efbc4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -322,6 +322,16 @@ impl fmt::Display for JsonOperator { } } +/// Options for `CAST` / `TRY_CAST` +/// BigQuery: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CastFormat { + Value(Value), + ValueAtTimeZone(Value, Value), +} + /// An SQL expression of any type. /// /// The parser does not distinguish between expressions of different types @@ -437,12 +447,18 @@ pub enum Expr { Cast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// TRY_CAST an expression to a different data type e.g. `TRY_CAST(foo AS VARCHAR(123))` // this differs from CAST in the choice of how to implement invalid conversions TryCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// SAFE_CAST an expression to a different data type e.g. `SAFE_CAST(foo AS FLOAT64)` // only available for BigQuery: https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-and-operators#safe_casting @@ -450,6 +466,9 @@ pub enum Expr { SafeCast { expr: Box, data_type: DataType, + // Optional CAST(string_expression AS type FORMAT format_string_expression) as used by BigQuery + // https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax + format: Option, }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { @@ -597,6 +616,15 @@ pub enum Expr { }, } +impl fmt::Display for CastFormat { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CastFormat::Value(v) => write!(f, "{v}"), + CastFormat::ValueAtTimeZone(v, tz) => write!(f, "{v} AT TIME ZONE {tz}"), + } + } +} + impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { @@ -753,9 +781,39 @@ impl fmt::Display for Expr { write!(f, "{op}{expr}") } } - Expr::Cast { expr, data_type } => write!(f, "CAST({expr} AS {data_type})"), - Expr::TryCast { expr, data_type } => write!(f, "TRY_CAST({expr} AS {data_type})"), - Expr::SafeCast { expr, data_type } => write!(f, "SAFE_CAST({expr} AS {data_type})"), + Expr::Cast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "CAST({expr} AS {data_type})") + } + } + Expr::TryCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "TRY_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "TRY_CAST({expr} AS {data_type})") + } + } + Expr::SafeCast { + expr, + data_type, + format, + } => { + if let Some(format) = format { + write!(f, "SAFE_CAST({expr} AS {data_type} FORMAT {format})") + } else { + write!(f, "SAFE_CAST({expr} AS {data_type})") + } + } Expr::Extract { field, expr } => write!(f, "EXTRACT({field} FROM {expr})"), Expr::Ceil { expr, field } => { if field == &DateTimeField::NoDateTime { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 95f1f8edc..829b299af 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1139,16 +1139,34 @@ impl<'a> Parser<'a> { }) } + pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FORMAT) { + let value = self.parse_value()?; + if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { + Ok(Some(CastFormat::ValueAtTimeZone( + value, + self.parse_value()?, + ))) + } else { + Ok(Some(CastFormat::Value(value))) + } + } else { + Ok(None) + } + } + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { expr: Box::new(expr), data_type, + format, }) } @@ -1158,10 +1176,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::TryCast { expr: Box::new(expr), data_type, + format, }) } @@ -1171,10 +1191,12 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword(Keyword::AS)?; let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::SafeCast { expr: Box::new(expr), data_type, + format, }) } @@ -2101,6 +2123,7 @@ impl<'a> Parser<'a> { Ok(Expr::Cast { expr: Box::new(expr), data_type: self.parse_data_type()?, + format: None, }) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 7a9a8d1c4..b3f683b9a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -304,8 +304,39 @@ fn parse_trailing_comma() { #[test] fn parse_cast_type() { - let sql = r#"SELECT SAFE_CAST(1 AS INT64)"#; - bigquery().verified_only_select(sql); + let sql = r"SELECT SAFE_CAST(1 AS INT64)"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_date_format() { + let sql = + r"SELECT CAST(date_valid_from AS DATE FORMAT 'YYYY-MM-DD') AS date_valid_from FROM foo"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_time_format() { + let sql = r"SELECT CAST(TIME '21:30:00' AS STRING FORMAT 'PM') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_timestamp_format_tz() { + let sql = r"SELECT CAST(TIMESTAMP '2008-12-25 00:00:00+00:00' AS STRING FORMAT 'TZH' AT TIME ZONE 'Asia/Kolkata') AS date_time_to_string"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_string_to_bytes_format() { + let sql = r"SELECT CAST('Hello' AS BYTES FORMAT 'ASCII') AS string_to_bytes"; + bigquery_and_generic().verified_only_select(sql); +} + +#[test] +fn parse_cast_bytes_to_string_format() { + let sql = r"SELECT CAST(B'\x48\x65\x6c\x6c\x6f' AS STRING FORMAT 'ASCII') AS bytes_to_string"; + bigquery_and_generic().verified_only_select(sql); } #[test] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1511aa76e..ff8bdd7a4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1934,6 +1934,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1944,6 +1945,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1970,6 +1972,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Nvarchar(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1980,6 +1983,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -1990,6 +1994,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2000,6 +2005,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2010,6 +2016,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2020,6 +2027,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2030,6 +2038,7 @@ fn parse_cast() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), + format: None, }, expr_from_projection(only(&select.projection)) ); @@ -2043,6 +2052,7 @@ fn parse_try_cast() { &Expr::TryCast { expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + format: None, }, expr_from_projection(only(&select.projection)) ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index fe336bda7..654723668 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1782,7 +1782,8 @@ fn parse_array_index_expr() { })), data_type: DataType::Array(Some(Box::new(DataType::Array(Some(Box::new( DataType::Int(None) - )))))) + )))))), + format: None, }))), indexes: vec![num[1].clone(), num[2].clone()], }, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e92656d0b..bb988665d 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -167,6 +167,7 @@ fn parse_array() { &Expr::Cast { expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(None), + format: None, }, expr_from_projection(only(&select.projection)) );