From dcd6cfa45cda34f57a8537e22b670b67735fe0ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Sun, 24 May 2026 16:56:02 +0300 Subject: [PATCH 1/3] Add debug output to trace GROUP BY ordering --- include/executor/vectorized_operator.hpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp index 3a7b77e..f007443 100644 --- a/include/executor/vectorized_operator.hpp +++ b/include/executor/vectorized_operator.hpp @@ -720,6 +720,17 @@ class VectorizedGroupByOperator : public VectorizedOperator { [this](size_t a, size_t b) { return hash_group_keys_[a] < hash_group_keys_[b]; }); + fprintf(stderr, "DEBUG: VectorizedGroupBy sorted %zu groups\n", hash_group_keys_.size()); + for (size_t i = 0; i < sorted_indices_.size() && i < 5; ++i) { + fprintf(stderr, "DEBUG: sorted_indices_[%zu]=%zu key=", i, sorted_indices_[i]); + const auto& key = hash_group_keys_[sorted_indices_[i]]; + for (const auto& k : key) { + fprintf(stderr, "%s ", k.to_string().c_str()); + } + fprintf(stderr, "\n"); + } + } else { + fprintf(stderr, "DEBUG: VectorizedGroupBy hash_group_keys_ is empty\n"); } } From a2d0a5349eb4da3800470a943d08723ec826f94a Mon Sep 17 00:00:00 2001 From: poyrazK <83272398+poyrazK@users.noreply.github.com> Date: Sun, 24 May 2026 13:56:34 +0000 Subject: [PATCH 2/3] style: automated clang-format fixes --- include/executor/vectorized_operator.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/executor/vectorized_operator.hpp b/include/executor/vectorized_operator.hpp index f007443..52baa99 100644 --- a/include/executor/vectorized_operator.hpp +++ b/include/executor/vectorized_operator.hpp @@ -720,7 +720,8 @@ class VectorizedGroupByOperator : public VectorizedOperator { [this](size_t a, size_t b) { return hash_group_keys_[a] < hash_group_keys_[b]; }); - fprintf(stderr, "DEBUG: VectorizedGroupBy sorted %zu groups\n", hash_group_keys_.size()); + fprintf(stderr, "DEBUG: VectorizedGroupBy sorted %zu groups\n", + hash_group_keys_.size()); for (size_t i = 0; i < sorted_indices_.size() && i < 5; ++i) { fprintf(stderr, "DEBUG: sorted_indices_[%zu]=%zu key=", i, sorted_indices_[i]); const auto& key = hash_group_keys_[sorted_indices_[i]]; From 0406906995b1dcc5c04eb260a5db1e4c99b39675 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Poyraz=20K=C3=BC=C3=A7=C3=BCkarslan?= <83272398+PoyrazK@users.noreply.github.com> Date: Sun, 24 May 2026 17:05:19 +0300 Subject: [PATCH 3/3] Fix GROUP BY ordering: sort groups lexicographically before output --- src/executor/operator.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/executor/operator.cpp b/src/executor/operator.cpp index bc58f73..b22cb64 100644 --- a/src/executor/operator.cpp +++ b/src/executor/operator.cpp @@ -646,8 +646,33 @@ bool AggregateOperator::open() { } groups_.clear(); + + // Sort group keys lexicographically for deterministic GROUP BY ordering + std::vector sorted_keys; + sorted_keys.reserve(groups_map.size()); for (auto& pair : groups_map) { - auto& state = pair.second; + sorted_keys.push_back(pair.first); + } + std::sort(sorted_keys.begin(), sorted_keys.end(), + [&groups_map](const std::string& a, const std::string& b) { + const auto& a_vals = groups_map[a].group_values; + const auto& b_vals = groups_map[b].group_values; + if (a_vals.size() != b_vals.size()) { + return a_vals.size() < b_vals.size(); + } + for (size_t i = 0; i < a_vals.size(); ++i) { + if (a_vals[i] < b_vals[i]) { + return true; + } + if (b_vals[i] < a_vals[i]) { + return false; + } + } + return false; + }); + + for (auto& key : sorted_keys) { + auto& state = groups_map[key]; std::vector row = std::move(state.group_values); for (size_t i = 0; i < aggregates_.size(); ++i) { switch (aggregates_[i].type) {