diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c0826f2008..a2a76d56c1 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -108,7 +108,7 @@ pub use self::query::{ TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, - XmlNamespaceDefinition, XmlPassingArgument, XmlPassingClause, XmlTableColumn, + WithItem, XmlNamespaceDefinition, XmlPassingArgument, XmlPassingClause, XmlTableColumn, XmlTableColumnOption, }; diff --git a/src/ast/query.rs b/src/ast/query.rs index 1de0e0e9db..8f2efb4a2e 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -754,8 +754,9 @@ pub struct With { pub with_token: AttachedToken, /// Whether the `WITH` is recursive (`WITH RECURSIVE`). pub recursive: bool, - /// The list of CTEs declared by this `WITH` clause. - pub cte_tables: Vec, + /// The items declared by this `WITH` clause: traditional CTEs and, + /// for dialects that support it, named expressions. + pub items: Vec, } impl fmt::Display for With { @@ -764,11 +765,41 @@ impl fmt::Display for With { if self.recursive { f.write_str("RECURSIVE ")?; } - display_comma_separated(&self.cte_tables).fmt(f)?; + display_comma_separated(&self.items).fmt(f)?; Ok(()) } } +/// A single item in a `WITH` clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum WithItem { + /// A traditional common table expression: `name [(cols)] AS [MATERIALIZED] (query)`. + Cte(Cte), + /// ` AS ` — binds an expression (literal, scalar subquery, + /// lambda, …) to a name visible in the surrounding query. + /// + /// See ClickHouse's [common scalar expressions][1]. + /// + /// [1]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions + Named { + /// The expression bound to the alias. + expr: Expr, + /// The name the expression is bound to. + alias: Ident, + }, +} + +impl fmt::Display for WithItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + WithItem::Cte(cte) => cte.fmt(f), + WithItem::Named { expr, alias } => write!(f, "{expr} AS {alias}"), + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index f6ba895478..9044d2c85e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -47,7 +47,7 @@ use super::{ ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject, TableOptionsClustered, TableWithJoins, Update, UpdateTableFromKind, Use, Values, ViewColumnDef, - WhileStatement, WildcardAdditionalOptions, With, WithFill, + WhileStatement, WildcardAdditionalOptions, With, WithFill, WithItem, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -185,12 +185,21 @@ impl Spanned for With { let With { with_token, recursive: _, // bool - cte_tables, + items, } = self; - union_spans( - core::iter::once(with_token.0.span).chain(cte_tables.iter().map(|item| item.span())), - ) + union_spans(core::iter::once(with_token.0.span).chain(items.iter().map(|item| item.span()))) + } +} + +impl Spanned for WithItem { + fn span(&self) -> Span { + match self { + WithItem::Cte(cte) => cte.span(), + WithItem::Named { expr, alias } => { + union_spans(core::iter::once(expr.span()).chain(core::iter::once(alias.span))) + } + } } } @@ -2716,8 +2725,12 @@ pub mod tests { ); let query = test.0.parse_query().unwrap(); - let cte_span = query.clone().with.unwrap().cte_tables[0].span(); - let cte_query_span = query.clone().with.unwrap().cte_tables[0].query.span(); + let cte = match &query.with.as_ref().unwrap().items[0] { + WithItem::Cte(cte) => cte, + _ => panic!("expected a CTE"), + }; + let cte_span = cte.span(); + let cte_query_span = cte.query.span(); let body_span = query.body.span(); // the WITH keyboard is part of the query diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 6ee60cc993..249e493af5 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -153,4 +153,9 @@ impl Dialect for ClickHouseDialect { fn supports_comma_separated_trim(&self) -> bool { true } + + /// See + fn supports_with_clause_scalar_expression(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 9b2ede40d2..03bc88e5cd 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1745,6 +1745,21 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect allows a `WITH` clause item to bind a + /// scalar (or otherwise non-CTE) expression to a name, with the form + /// ` AS ` — alongside or instead of the + /// traditional ` AS ()` CTE form. + /// + /// For example, in ClickHouse: + /// ```sql + /// WITH 42 AS answer SELECT answer FROM t + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions) + fn supports_with_clause_scalar_expression(&self) -> bool { + false + } + /// Returns true if the dialect supports parenthesized multi-column /// aliases in SELECT items. For example: /// ```sql diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 07497b04f6..2af6b18753 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14105,7 +14105,7 @@ impl<'a> Parser<'a> { Some(With { with_token: with_token.clone().into(), recursive: self.parse_keyword(Keyword::RECURSIVE), - cte_tables: self.parse_comma_separated(Parser::parse_cte)?, + items: self.parse_comma_separated(Parser::parse_with_item)?, }) } else { None @@ -14639,6 +14639,33 @@ impl<'a> Parser<'a> { Ok(cte) } + /// Parse a single item in a `WITH` clause. + /// + /// In standard SQL this is always a CTE (`name [(cols)] AS (query)`). + /// Dialects that enable [`Dialect::supports_with_clause_scalar_expression`] + /// — currently only ClickHouse — also accept the reversed form + /// ` AS `, which can be freely interleaved with + /// CTEs in the same comma-separated list. + pub fn parse_with_item(&mut self) -> Result { + if !self.dialect.supports_with_clause_scalar_expression() { + return self.parse_cte().map(WithItem::Cte); + } + + // CTE form must start with an identifier. If the leading token + // can't begin one (e.g. `42`, `(SELECT …)`, `(x, y) -> …`), this + // is unambiguously the named-expression form. + if matches!(self.peek_token().token, Token::Word(_)) { + if let Some(cte) = self.maybe_parse(|p| p.parse_cte())? { + return Ok(WithItem::Cte(cte)); + } + } + + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let alias = self.parse_identifier()?; + Ok(WithItem::Named { expr, alias }) + } + /// Parse a "query body", which is an expression with roughly the /// following grammar: /// ```sql diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 716a3919fc..8eb6aef2f3 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1845,6 +1845,70 @@ fn parse_inner_array_join() { } } +#[test] +fn parse_with_clause_named_expression() { + // Plain literal scalar. + clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t"); + + // String literal scalar from the ClickHouse docs. + clickhouse().verified_stmt( + "WITH '2019-08-01 15:23:00' AS ts_upper_bound SELECT * FROM hits \ + WHERE EventDate = toDate(ts_upper_bound) AND EventTime <= ts_upper_bound", + ); + + // Aggregate function call as a named expression. + clickhouse().verified_stmt( + "WITH sum(bytes) AS s SELECT formatReadableSize(s), \"table\" \ + FROM system.parts GROUP BY \"table\" ORDER BY s", + ); + + // Scalar subquery as the bound expression. + clickhouse().verified_stmt( + "WITH (SELECT sum(bytes) FROM system.parts WHERE active) AS total_disk_usage \ + SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, \"table\" \ + FROM system.parts GROUP BY \"table\" ORDER BY table_disk_usage DESC LIMIT 10", + ); + + // Bare-identifier scalar — disambiguation case (`name AS alias` looks like + // a CTE prefix but the missing `(` after `AS` makes it a named expression). + clickhouse().verified_stmt("WITH user_id AS uid SELECT uid FROM t"); + + // Mixing a named expression with a real CTE in the same WITH list. + clickhouse().verified_stmt("WITH 1 AS one, cte AS (SELECT 1) SELECT one FROM cte"); + + // Lambda as the bound expression (also taken from the docs). + clickhouse().verified_stmt( + "WITH '.txt' AS extension, (id, extension) -> concat(lower(id), extension) AS gen_name \ + SELECT gen_name('test', '.sql') AS file_name", + ); +} + +#[test] +fn parse_with_clause_named_expression_ast() { + let query = clickhouse().verified_query("WITH 42 AS answer SELECT answer FROM t"); + let with = query.with.as_ref().unwrap(); + assert!(!with.recursive); + assert_eq!(with.items.len(), 1); + match &with.items[0] { + WithItem::Named { expr, alias } => { + assert_eq!(alias.value, "answer"); + assert!(matches!(expr, Expr::Value(_))); + } + other => panic!("expected a named expression, got {other:?}"), + } +} + +#[test] +fn parse_with_clause_named_expression_unsupported_in_other_dialects() { + // The named-expression form is only enabled for ClickHouse; other + // dialects should still reject `WITH 42 AS answer …`. + let res = sqlparser::parser::Parser::parse_sql( + &GenericDialect {}, + "WITH 42 AS answer SELECT answer FROM t", + ); + assert!(res.is_err(), "expected parse error, got {res:?}"); +} + fn clickhouse() -> TestedDialects { TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 326fbf678e..9678dcbe45 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7854,18 +7854,21 @@ fn parse_ctes() { fn assert_ctes_in_select(expected: &[&str], sel: &Query) { for (i, exp) in expected.iter().enumerate() { - let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i]; - assert_eq!(*exp, query.to_string()); - assert_eq!(false, alias.explicit); + let cte = match &sel.with.as_ref().unwrap().items[i] { + WithItem::Cte(cte) => cte, + other => panic!("expected a CTE, got {other:?}"), + }; + assert_eq!(*exp, cte.query.to_string()); + assert_eq!(false, cte.alias.explicit); assert_eq!( if i == 0 { Ident::new("a") } else { Ident::new("b") }, - alias.name + cte.alias.name ); - assert!(alias.columns.is_empty()); + assert!(cte.alias.columns.is_empty()); } } @@ -7898,26 +7901,29 @@ fn parse_ctes() { // CTE in a CTE... let sql = &format!("WITH outer_cte AS ({with}) SELECT * FROM outer_cte"); let select = verified_query(sql); - assert_ctes_in_select(&cte_sqls, &only(&select.with.unwrap().cte_tables).query); + let with = select.with.as_ref().unwrap(); + let outer_cte = match only(&with.items) { + WithItem::Cte(cte) => cte, + other => panic!("expected a CTE, got {other:?}"), + }; + assert_ctes_in_select(&cte_sqls, &outer_cte.query); } #[test] fn parse_cte_renamed_columns() { let sql = "WITH cte (col1, col2) AS (SELECT foo, bar FROM baz) SELECT * FROM cte"; let query = all_dialects().verified_query(sql); + let with = query.with.unwrap(); + let cte = match with.items.first().unwrap() { + WithItem::Cte(cte) => cte, + other => panic!("expected a CTE, got {other:?}"), + }; assert_eq!( vec![ TableAliasColumnDef::from_name("col1"), TableAliasColumnDef::from_name("col2") ], - query - .with - .unwrap() - .cte_tables - .first() - .unwrap() - .alias - .columns + cte.alias.columns ); } @@ -7931,8 +7937,8 @@ fn parse_recursive_cte() { let with = query.with.as_ref().unwrap(); assert!(with.recursive); - assert_eq!(with.cte_tables.len(), 1); - let expected = Cte { + assert_eq!(with.items.len(), 1); + let expected = WithItem::Cte(Cte { alias: TableAlias { explicit: false, name: Ident { @@ -7947,8 +7953,8 @@ fn parse_recursive_cte() { from: None, materialized: None, closing_paren_token: AttachedToken::empty(), - }; - assert_eq!(with.cte_tables.first().unwrap(), &expected); + }); + assert_eq!(with.items.first().unwrap(), &expected); } #[test]