-
Notifications
You must be signed in to change notification settings - Fork 714
ClickHouse: Support scalar expressions in WITH clause #2333
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -754,8 +754,9 @@ pub struct With { | |||||||||||||||||||
| pub with_token: AttachedToken, | ||||||||||||||||||||
| /// Whether the `WITH` is recursive (`WITH RECURSIVE`). | ||||||||||||||||||||
| pub recursive: bool, | ||||||||||||||||||||
| /// The list of CTEs declared by this `WITH` clause. | ||||||||||||||||||||
| pub cte_tables: Vec<Cte>, | ||||||||||||||||||||
| /// The items declared by this `WITH` clause: traditional CTEs and, | ||||||||||||||||||||
| /// for dialects that support it, named expressions. | ||||||||||||||||||||
| pub items: Vec<WithItem>, | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| impl fmt::Display for With { | ||||||||||||||||||||
|
|
@@ -764,11 +765,41 @@ impl fmt::Display for With { | |||||||||||||||||||
| if self.recursive { | ||||||||||||||||||||
| f.write_str("RECURSIVE ")?; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| display_comma_separated(&self.cte_tables).fmt(f)?; | ||||||||||||||||||||
| display_comma_separated(&self.items).fmt(f)?; | ||||||||||||||||||||
| Ok(()) | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| /// A single item in a `WITH` clause. | ||||||||||||||||||||
| #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||||||||||||||||||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||||||||||||||||||
| #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||||||||||||||||||||
| pub enum WithItem { | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. repr wise can we do something like? pub enum WithExpression {
Cte(...),
Cse(ExprWithAlias), // can we reuse exprwithalias?
} |
||||||||||||||||||||
| /// A traditional common table expression: `name [(cols)] AS [MATERIALIZED] (query)`. | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||
| Cte(Cte), | ||||||||||||||||||||
| /// `<expr> AS <alias>` — binds an expression (literal, scalar subquery, | ||||||||||||||||||||
| /// lambda, …) to a name visible in the surrounding query. | ||||||||||||||||||||
| /// | ||||||||||||||||||||
| /// See ClickHouse's [common scalar expressions][1]. | ||||||||||||||||||||
| /// | ||||||||||||||||||||
| /// [1]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions | ||||||||||||||||||||
|
Comment on lines
+780
to
+785
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||
| Named { | ||||||||||||||||||||
| /// The expression bound to the alias. | ||||||||||||||||||||
| expr: Expr, | ||||||||||||||||||||
| /// The name the expression is bound to. | ||||||||||||||||||||
| alias: Ident, | ||||||||||||||||||||
| }, | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| impl fmt::Display for WithItem { | ||||||||||||||||||||
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||||||||||||||||
| match self { | ||||||||||||||||||||
| WithItem::Cte(cte) => cte.fmt(f), | ||||||||||||||||||||
| WithItem::Named { expr, alias } => write!(f, "{expr} AS {alias}"), | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||||||||||||||||||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||||||||||||||||||
| #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1745,6 +1745,21 @@ pub trait Dialect: Debug + Any { | |||||||||||||||||||||||||||||||||||||||
| false | ||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| /// Returns true if the dialect allows a `WITH` clause item to bind a | ||||||||||||||||||||||||||||||||||||||||
| /// scalar (or otherwise non-CTE) expression to a name, with the form | ||||||||||||||||||||||||||||||||||||||||
| /// `<expression> AS <identifier>` — alongside or instead of the | ||||||||||||||||||||||||||||||||||||||||
| /// traditional `<identifier> AS (<subquery>)` CTE form. | ||||||||||||||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||||||||||||||
| /// For example, in ClickHouse: | ||||||||||||||||||||||||||||||||||||||||
| /// ```sql | ||||||||||||||||||||||||||||||||||||||||
| /// WITH 42 AS answer SELECT answer FROM t | ||||||||||||||||||||||||||||||||||||||||
| /// ``` | ||||||||||||||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||||||||||||||
| /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions) | ||||||||||||||||||||||||||||||||||||||||
|
Comment on lines
+1748
to
+1758
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||
| fn supports_with_clause_scalar_expression(&self) -> bool { | ||||||||||||||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||
| false | ||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| /// Returns true if the dialect supports parenthesized multi-column | ||||||||||||||||||||||||||||||||||||||||
| /// aliases in SELECT items. For example: | ||||||||||||||||||||||||||||||||||||||||
| /// ```sql | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -14105,7 +14105,7 @@ impl<'a> Parser<'a> { | |||||||||||||||||
| Some(With { | ||||||||||||||||||
| with_token: with_token.clone().into(), | ||||||||||||||||||
| recursive: self.parse_keyword(Keyword::RECURSIVE), | ||||||||||||||||||
| cte_tables: self.parse_comma_separated(Parser::parse_cte)?, | ||||||||||||||||||
| items: self.parse_comma_separated(Parser::parse_with_item)?, | ||||||||||||||||||
| }) | ||||||||||||||||||
| } else { | ||||||||||||||||||
| None | ||||||||||||||||||
|
|
@@ -14639,6 +14639,33 @@ impl<'a> Parser<'a> { | |||||||||||||||||
| Ok(cte) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /// Parse a single item in a `WITH` clause. | ||||||||||||||||||
| /// | ||||||||||||||||||
| /// In standard SQL this is always a CTE (`name [(cols)] AS (query)`). | ||||||||||||||||||
| /// Dialects that enable [`Dialect::supports_with_clause_scalar_expression`] | ||||||||||||||||||
| /// — currently only ClickHouse — also accept the reversed form | ||||||||||||||||||
| /// `<expression> AS <identifier>`, which can be freely interleaved with | ||||||||||||||||||
| /// CTEs in the same comma-separated list. | ||||||||||||||||||
|
Comment on lines
+14642
to
+14648
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||
| pub fn parse_with_item(&mut self) -> Result<WithItem, ParserError> { | ||||||||||||||||||
| if !self.dialect.supports_with_clause_scalar_expression() { | ||||||||||||||||||
| return self.parse_cte().map(WithItem::Cte); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // CTE form must start with an identifier. If the leading token | ||||||||||||||||||
| // can't begin one (e.g. `42`, `(SELECT …)`, `(x, y) -> …`), this | ||||||||||||||||||
| // is unambiguously the named-expression form. | ||||||||||||||||||
| if matches!(self.peek_token().token, Token::Word(_)) { | ||||||||||||||||||
| if let Some(cte) = self.maybe_parse(|p| p.parse_cte())? { | ||||||||||||||||||
| return Ok(WithItem::Cte(cte)); | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
Comment on lines
+14654
to
+14661
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be simplified to use that we do |
||||||||||||||||||
|
|
||||||||||||||||||
| let expr = self.parse_expr()?; | ||||||||||||||||||
| self.expect_keyword(Keyword::AS)?; | ||||||||||||||||||
| let alias = self.parse_identifier()?; | ||||||||||||||||||
| Ok(WithItem::Named { expr, alias }) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /// Parse a "query body", which is an expression with roughly the | ||||||||||||||||||
| /// following grammar: | ||||||||||||||||||
| /// ```sql | ||||||||||||||||||
|
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1845,6 +1845,70 @@ fn parse_inner_array_join() { | |
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn parse_with_clause_named_expression() { | ||
| // Plain literal scalar. | ||
| clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the tests, lets use |
||
|
|
||
| // String literal scalar from the ClickHouse docs. | ||
| clickhouse().verified_stmt( | ||
| "WITH '2019-08-01 15:23:00' AS ts_upper_bound SELECT * FROM hits \ | ||
| WHERE EventDate = toDate(ts_upper_bound) AND EventTime <= ts_upper_bound", | ||
| ); | ||
|
|
||
| // Aggregate function call as a named expression. | ||
| clickhouse().verified_stmt( | ||
| "WITH sum(bytes) AS s SELECT formatReadableSize(s), \"table\" \ | ||
| FROM system.parts GROUP BY \"table\" ORDER BY s", | ||
| ); | ||
|
|
||
| // Scalar subquery as the bound expression. | ||
| clickhouse().verified_stmt( | ||
| "WITH (SELECT sum(bytes) FROM system.parts WHERE active) AS total_disk_usage \ | ||
| SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, \"table\" \ | ||
| FROM system.parts GROUP BY \"table\" ORDER BY table_disk_usage DESC LIMIT 10", | ||
| ); | ||
|
|
||
| // Bare-identifier scalar — disambiguation case (`name AS alias` looks like | ||
| // a CTE prefix but the missing `(` after `AS` makes it a named expression). | ||
| clickhouse().verified_stmt("WITH user_id AS uid SELECT uid FROM t"); | ||
|
|
||
| // Mixing a named expression with a real CTE in the same WITH list. | ||
| clickhouse().verified_stmt("WITH 1 AS one, cte AS (SELECT 1) SELECT one FROM cte"); | ||
|
|
||
| // Lambda as the bound expression (also taken from the docs). | ||
| clickhouse().verified_stmt( | ||
| "WITH '.txt' AS extension, (id, extension) -> concat(lower(id), extension) AS gen_name \ | ||
| SELECT gen_name('test', '.sql') AS file_name", | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn parse_with_clause_named_expression_ast() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can merge this with the test above since they cover the same feature |
||
| let query = clickhouse().verified_query("WITH 42 AS answer SELECT answer FROM t"); | ||
| let with = query.with.as_ref().unwrap(); | ||
| assert!(!with.recursive); | ||
| assert_eq!(with.items.len(), 1); | ||
| match &with.items[0] { | ||
| WithItem::Named { expr, alias } => { | ||
| assert_eq!(alias.value, "answer"); | ||
| assert!(matches!(expr, Expr::Value(_))); | ||
| } | ||
| other => panic!("expected a named expression, got {other:?}"), | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn parse_with_clause_named_expression_unsupported_in_other_dialects() { | ||
| // The named-expression form is only enabled for ClickHouse; other | ||
| // dialects should still reject `WITH 42 AS answer …`. | ||
| let res = sqlparser::parser::Parser::parse_sql( | ||
| &GenericDialect {}, | ||
| "WITH 42 AS answer SELECT answer FROM t", | ||
| ); | ||
| assert!(res.is_err(), "expected parse error, got {res:?}"); | ||
| } | ||
|
Comment on lines
+1901
to
+1910
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can skip this, no need to test what the parser does for an invalid query for unsupported dialects |
||
|
|
||
| fn clickhouse() -> TestedDialects { | ||
| TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.