diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c0826f2008..6249062348 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2267,12 +2267,58 @@ pub struct WindowSpec { pub window_name: Option, /// `OVER (PARTITION BY ...)` pub partition_by: Vec, + /// The kind of partitioning clause used in the window specification. + pub partition_by_kind: WindowPartitionByKind, /// `OVER (ORDER BY ...)` pub order_by: Vec, + /// The kind of ordering clause used in the window specification. + pub order_by_kind: WindowOrderByKind, /// `OVER (window frame)` pub window_frame: Option, } +/// The kind of partitioning clause in a window specification. +#[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum WindowPartitionByKind { + /// `PARTITION BY` + #[default] + Partition, + /// Hive `DISTRIBUTE BY` + Distribute, +} + +impl fmt::Display for WindowPartitionByKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + Self::Partition => "PARTITION BY", + Self::Distribute => "DISTRIBUTE BY", + }) + } +} + +/// The kind of ordering clause in a window specification. +#[derive(Debug, Default, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum WindowOrderByKind { + /// `ORDER BY` + #[default] + Order, + /// Hive `SORT BY` + Sort, +} + +impl fmt::Display for WindowOrderByKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str(match self { + Self::Order => "ORDER BY", + Self::Sort => "SORT BY", + }) + } +} + impl fmt::Display for WindowSpec { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut is_first = true; @@ -2290,7 +2336,8 @@ impl fmt::Display for WindowSpec { is_first = false; write!( f, - "PARTITION BY {}", + "{} {}", + self.partition_by_kind, display_comma_separated(&self.partition_by) )?; } @@ -2299,7 +2346,12 @@ impl fmt::Display for WindowSpec { SpaceOrNewline.fmt(f)?; } is_first = false; - write!(f, "ORDER BY {}", display_comma_separated(&self.order_by))?; + write!( + f, + "{} {}", + self.order_by_kind, + display_comma_separated(&self.order_by) + )?; } if let Some(window_frame) = &self.window_frame { if !is_first { diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 085d1598cb..03c9ed43b6 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -45,6 +45,10 @@ impl Dialect for HiveDialect { true } + fn supports_window_spec_distribute_sort(&self) -> bool { + true + } + fn supports_numeric_prefix(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 9b2ede40d2..bf080cc581 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -341,6 +341,17 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports Hive-style `DISTRIBUTE BY` and `SORT BY` + /// clauses within a window specification. + /// + /// Example + /// ```sql + /// SELECT row_number() OVER (DISTRIBUTE BY a SORT BY b) + /// ``` + fn supports_window_spec_distribute_sort(&self) -> bool { + false + } + /// Returns true if the dialect supports `ARRAY_AGG() [WITHIN GROUP (ORDER BY)]` expressions. /// Otherwise, the dialect should expect an `ORDER BY` without the `WITHIN GROUP` clause, e.g. [`ANSI`] /// diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 07497b04f6..17c3ac8ba2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -19941,15 +19941,36 @@ impl<'a> Parser<'a> { _ => None, }; - let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? + let (partition_by_kind, partition_by) = + if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { + ( + WindowPartitionByKind::Partition, + self.parse_comma_separated(Parser::parse_expr)?, + ) + } else if self.dialect.supports_window_spec_distribute_sort() + && self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) + { + ( + WindowPartitionByKind::Distribute, + self.parse_comma_separated(Parser::parse_expr)?, + ) + } else { + (WindowPartitionByKind::Partition, vec![]) + }; + let (order_by_kind, order_by) = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + ( + WindowOrderByKind::Order, + self.parse_comma_separated(Parser::parse_order_by_expr)?, + ) + } else if self.dialect.supports_window_spec_distribute_sort() + && self.parse_keywords(&[Keyword::SORT, Keyword::BY]) + { + ( + WindowOrderByKind::Sort, + self.parse_comma_separated(Parser::parse_order_by_expr)?, + ) } else { - vec![] + (WindowOrderByKind::Order, vec![]) }; let window_frame = if !self.consume_token(&Token::RParen) { @@ -19962,7 +19983,9 @@ impl<'a> Parser<'a> { Ok(WindowSpec { window_name, partition_by, + partition_by_kind, order_by, + order_by_kind, window_frame, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 326fbf678e..b9bc524813 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3091,6 +3091,7 @@ fn parse_select_qualify() { over: Some(WindowType::WindowSpec(WindowSpec { window_name: None, partition_by: vec![Expr::Identifier(Ident::new("p"))], + partition_by_kind: WindowPartitionByKind::Partition, order_by: vec![OrderByExpr { expr: Expr::Identifier(Ident::new("o")), options: OrderByOptions { @@ -3099,6 +3100,7 @@ fn parse_select_qualify() { }, with_fill: None, }], + order_by_kind: WindowOrderByKind::Order, window_frame: None, })), within_group: vec![] @@ -5811,6 +5813,7 @@ fn parse_window_functions() { over: Some(WindowType::WindowSpec(WindowSpec { window_name: None, partition_by: vec![], + partition_by_kind: WindowPartitionByKind::Partition, order_by: vec![OrderByExpr { expr: Expr::Identifier(Ident::new("dt")), options: OrderByOptions { @@ -5819,6 +5822,7 @@ fn parse_window_functions() { }, with_fill: None, }], + order_by_kind: WindowOrderByKind::Order, window_frame: None, })), within_group: vec![], @@ -6033,6 +6037,7 @@ fn test_parse_named_window() { NamedWindowExpr::WindowSpec(WindowSpec { window_name: None, partition_by: vec![], + partition_by_kind: WindowPartitionByKind::Partition, order_by: vec![OrderByExpr { expr: Expr::Identifier(Ident { value: "C12".to_string(), @@ -6045,6 +6050,7 @@ fn test_parse_named_window() { }, with_fill: None, }], + order_by_kind: WindowOrderByKind::Order, window_frame: None, }), ), @@ -6061,7 +6067,9 @@ fn test_parse_named_window() { quote_style: None, span: Span::empty(), })], + partition_by_kind: WindowPartitionByKind::Partition, order_by: vec![], + order_by_kind: WindowOrderByKind::Order, window_frame: None, }), ), diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index c0a15d5b9a..b07ac7f785 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -562,6 +562,13 @@ fn test_tample_sample() { hive().verified_stmt("SELECT * FROM source TABLESAMPLE (10 ROWS)"); } +#[test] +fn parse_row_number_window_function() { + hive().verified_stmt( + "SELECT row_number() OVER (DISTRIBUTE BY age SORT BY update_time DESC) AS row_num FROM sdl.xxx", + ); +} + fn hive() -> TestedDialects { TestedDialects::new(vec![Box::new(HiveDialect {})]) } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index f9536bc289..a8ade618f4 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -439,7 +439,9 @@ fn parse_window_function_with_filter() { over: Some(WindowType::WindowSpec(WindowSpec { window_name: None, partition_by: vec![], + partition_by_kind: WindowPartitionByKind::Partition, order_by: vec![], + order_by_kind: WindowOrderByKind::Order, window_frame: None, })), filter: Some(Box::new(Expr::Identifier(Ident::new("y")))),