Skip to content

Commit a09e266

Browse files
author
Saurabh Badenkal
committed
Block all server-rejected SQL patterns in SDK (save round-trip)
All checks in one place (_sql_guardrails) for easy future bypass. BLOCKED (ValidationError -- saves network round-trip): - INSERT/UPDATE/DELETE/DROP/etc. (write statements) - CROSS JOIN, RIGHT JOIN, FULL OUTER JOIN (unsupported join types) - UNION / UNION ALL - HAVING - CTE (WITH ... AS) - Subqueries (IN (SELECT ...), EXISTS (SELECT ...)) WARNED (UserWarning -- query executes, advisory only): - LIKE '%value' (leading-wildcard, full table scan) - FROM a, b (implicit cartesian, server allows but risky) - SELECT * with JOIN (partial expansion) Principle: block what server blocks (save time), warn what server allows but is risky (respect user intent). 14 new tests, 770 total passing.
1 parent 68d32ea commit a09e266

3 files changed

Lines changed: 173 additions & 19 deletions

File tree

src/PowerPlatform/Dataverse/core/_error_codes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
VALIDATION_SQL_EMPTY = "validation_sql_empty"
4444
VALIDATION_SQL_WRITE_BLOCKED = "validation_sql_write_blocked"
4545
VALIDATION_SQL_CROSS_JOIN_BLOCKED = "validation_sql_cross_join_blocked"
46+
VALIDATION_SQL_UNSUPPORTED_SYNTAX = "validation_sql_unsupported_syntax"
4647
VALIDATION_ENUM_NO_MEMBERS = "validation_enum_no_members"
4748
VALIDATION_ENUM_NON_INT_VALUE = "validation_enum_non_int_value"
4849
VALIDATION_UNSUPPORTED_COLUMN_TYPE = "validation_unsupported_column_type"

src/PowerPlatform/Dataverse/data/_odata.py

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
VALIDATION_SQL_EMPTY,
3131
VALIDATION_SQL_WRITE_BLOCKED,
3232
VALIDATION_SQL_CROSS_JOIN_BLOCKED,
33+
VALIDATION_SQL_UNSUPPORTED_SYNTAX,
3334
METADATA_ENTITYSET_NOT_FOUND,
3435
METADATA_ENTITYSET_NAME_MISSING,
3536
METADATA_TABLE_NOT_FOUND,
@@ -845,6 +846,18 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st
845846
re.IGNORECASE,
846847
)
847848
_SQL_HAS_JOIN_RE = re.compile(r"\bJOIN\b", re.IGNORECASE)
849+
# Server-blocked SQL patterns (save the round-trip by catching early)
850+
_SQL_UNSUPPORTED_JOIN_RE = re.compile(
851+
r"\b(?:CROSS\s+JOIN|RIGHT\s+(?:OUTER\s+)?JOIN|FULL\s+(?:OUTER\s+)?JOIN)\b",
852+
re.IGNORECASE,
853+
)
854+
_SQL_UNION_RE = re.compile(r"\bUNION\b", re.IGNORECASE)
855+
_SQL_HAVING_RE = re.compile(r"\bHAVING\b", re.IGNORECASE)
856+
_SQL_CTE_RE = re.compile(r"^\s*WITH\b", re.IGNORECASE)
857+
_SQL_SUBQUERY_RE = re.compile(
858+
r"\bIN\s*\(\s*SELECT\b|\bEXISTS\s*\(\s*SELECT\b|\(\s*SELECT\b.*\bFROM\b",
859+
re.IGNORECASE,
860+
)
848861

849862
def _expand_select_star(self, sql: str, table: str) -> str:
850863
"""Replace ``SELECT *`` with explicit column names.
@@ -887,26 +900,33 @@ def _expand_select_star(self, sql: str, table: str) -> str:
887900
def _sql_guardrails(self, sql: str) -> str:
888901
"""Apply safety guardrails to a SQL query before sending to the server.
889902
890-
Checks performed (in order):
903+
Checks split into two categories:
891904
892-
1. **Block write statements** -- ``INSERT``, ``UPDATE``, ``DELETE``,
893-
``DROP``, ``TRUNCATE``, ``ALTER``, ``CREATE``, ``EXEC``, ``GRANT``,
894-
``REVOKE``, ``BULK`` are rejected with ``ValidationError``.
895-
2. **Warn on leading-wildcard LIKE** -- ``LIKE '%...'`` patterns
896-
force full table scans and hurt shared database performance.
897-
3. **Warn on implicit cross joins** -- ``FROM a, b`` (comma syntax)
898-
produces cartesian products.
905+
**Blocked** (``ValidationError`` -- saves a server round-trip):
899906
900-
.. note::
901-
The server enforces a 5000-row maximum per query and blocks
902-
``SELECT *`` directly. The SDK handles ``SELECT *`` via
903-
``_expand_select_star``. No client-side TOP injection is needed
904-
because the server already caps results.
907+
1. Write statements (INSERT/UPDATE/DELETE/DROP/etc.)
908+
2. CROSS JOIN, RIGHT JOIN, FULL OUTER JOIN (server rejects these)
909+
3. UNION / UNION ALL (server rejects)
910+
4. HAVING clause (server rejects)
911+
5. CTE / WITH clause (server rejects)
912+
6. Subqueries -- IN (SELECT ...), EXISTS (SELECT ...) (server rejects)
913+
914+
**Warned** (``UserWarning`` -- query still executes):
915+
916+
7. Leading-wildcard LIKE (full table scan)
917+
8. Implicit cross join FROM a, b (cartesian product)
918+
919+
All blocked patterns are also blocked by the server, but catching
920+
them here saves the network round-trip and provides clearer error
921+
messages. To bypass a specific check (e.g., if the server adds
922+
support in the future), all checks are in this single method.
905923
906924
:param sql: The SQL string (already stripped).
907-
:return: Possibly-rewritten SQL string.
908-
:raises ValidationError: If the SQL contains a write statement.
925+
:return: The SQL string (unchanged unless rewritten).
926+
:raises ValidationError: If the SQL contains a blocked pattern.
909927
"""
928+
# --- BLOCKED (save server round-trip) ---
929+
910930
# 1. Block writes
911931
if self._SQL_WRITE_RE.search(sql):
912932
raise ValidationError(
@@ -916,7 +936,53 @@ def _sql_guardrails(self, sql: str) -> str:
916936
subcode=VALIDATION_SQL_WRITE_BLOCKED,
917937
)
918938

919-
# 2. Warn on leading-wildcard LIKE
939+
# 2. Block unsupported JOIN types
940+
m = self._SQL_UNSUPPORTED_JOIN_RE.search(sql)
941+
if m:
942+
raise ValidationError(
943+
f"Unsupported JOIN type: '{m.group(0).strip()}'. "
944+
"Only INNER JOIN and LEFT JOIN are supported by the "
945+
"Dataverse SQL endpoint.",
946+
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
947+
)
948+
949+
# 3. Block UNION
950+
if self._SQL_UNION_RE.search(sql):
951+
raise ValidationError(
952+
"UNION is not supported by the Dataverse SQL endpoint. "
953+
"Execute separate queries and combine results in Python "
954+
"(e.g. pd.concat([df1, df2])).",
955+
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
956+
)
957+
958+
# 4. Block HAVING
959+
if self._SQL_HAVING_RE.search(sql):
960+
raise ValidationError(
961+
"HAVING is not supported by the Dataverse SQL endpoint. "
962+
"Use WHERE to filter before GROUP BY instead.",
963+
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
964+
)
965+
966+
# 5. Block CTE / WITH
967+
if self._SQL_CTE_RE.search(sql):
968+
raise ValidationError(
969+
"CTE (WITH ... AS) is not supported by the Dataverse SQL "
970+
"endpoint. Use separate queries and combine in Python.",
971+
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
972+
)
973+
974+
# 6. Block subqueries
975+
if self._SQL_SUBQUERY_RE.search(sql):
976+
raise ValidationError(
977+
"Subqueries are not supported by the Dataverse SQL "
978+
"endpoint. Use separate SQL calls and combine results "
979+
"in Python (e.g. step 1: get IDs, step 2: WHERE IN).",
980+
subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX,
981+
)
982+
983+
# --- WARNED (query still executes) ---
984+
985+
# 7. Warn on leading-wildcard LIKE
920986
if self._SQL_LEADING_WILDCARD_RE.search(sql):
921987
warnings.warn(
922988
"Query contains a leading-wildcard LIKE pattern "
@@ -927,8 +993,7 @@ def _sql_guardrails(self, sql: str) -> str:
927993
stacklevel=4,
928994
)
929995

930-
# 3. Warn on implicit cross joins (server allows these but they
931-
# produce cartesian products that can stress shared DB resources)
996+
# 8. Warn on implicit cross joins (server allows but risky)
932997
if self._SQL_IMPLICIT_CROSS_JOIN_RE.search(sql):
933998
warnings.warn(
934999
"Query uses an implicit cross join (FROM table1, table2). "

tests/unit/data/test_sql_guardrails.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,99 @@ def test_select_not_blocked(self):
7575

7676
def test_select_with_delete_in_where_not_blocked(self):
7777
c = _client()
78-
# "DELETE" appearing in a WHERE value should not trigger the guard
7978
result = c._sql_guardrails("SELECT TOP 10 name FROM account WHERE name = 'DELETE ME'")
8079
assert "SELECT" in result
8180

8281

82+
# ===================================================================
83+
# 1b. Block server-rejected SQL patterns (save round-trip)
84+
# ===================================================================
85+
86+
87+
class TestBlockServerRejectedPatterns:
88+
"""Block SQL patterns the server rejects, to save network round-trip."""
89+
90+
@pytest.mark.parametrize(
91+
"sql,match_text",
92+
[
93+
("SELECT a.name FROM account a CROSS JOIN contact c", "Unsupported JOIN"),
94+
(
95+
"SELECT a.name FROM account a RIGHT JOIN contact c ON a.accountid = c.parentcustomerid",
96+
"Unsupported JOIN",
97+
),
98+
(
99+
"SELECT a.name FROM account a RIGHT OUTER JOIN contact c ON a.accountid = c.parentcustomerid",
100+
"Unsupported JOIN",
101+
),
102+
(
103+
"SELECT a.name FROM account a FULL OUTER JOIN contact c ON a.accountid = c.parentcustomerid",
104+
"Unsupported JOIN",
105+
),
106+
(
107+
"SELECT a.name FROM account a FULL JOIN contact c ON a.accountid = c.parentcustomerid",
108+
"Unsupported JOIN",
109+
),
110+
],
111+
)
112+
def test_unsupported_joins_blocked(self, sql, match_text):
113+
c = _client()
114+
with pytest.raises(ValidationError, match=match_text):
115+
c._sql_guardrails(sql)
116+
117+
def test_inner_join_allowed(self):
118+
c = _client()
119+
result = c._sql_guardrails(
120+
"SELECT TOP 5 a.name FROM account a " "INNER JOIN contact c ON a.accountid = c.parentcustomerid"
121+
)
122+
assert "INNER JOIN" in result
123+
124+
def test_left_join_allowed(self):
125+
c = _client()
126+
result = c._sql_guardrails(
127+
"SELECT TOP 5 a.name FROM account a " "LEFT JOIN contact c ON a.accountid = c.parentcustomerid"
128+
)
129+
assert "LEFT JOIN" in result
130+
131+
def test_union_blocked(self):
132+
c = _client()
133+
with pytest.raises(ValidationError, match="UNION"):
134+
c._sql_guardrails("SELECT name FROM account UNION SELECT fullname FROM contact")
135+
136+
def test_union_all_blocked(self):
137+
c = _client()
138+
with pytest.raises(ValidationError, match="UNION"):
139+
c._sql_guardrails("SELECT name FROM account UNION ALL SELECT fullname FROM contact")
140+
141+
def test_having_blocked(self):
142+
c = _client()
143+
with pytest.raises(ValidationError, match="HAVING"):
144+
c._sql_guardrails("SELECT name, COUNT(*) FROM account GROUP BY name HAVING COUNT(*) > 1")
145+
146+
def test_cte_blocked(self):
147+
c = _client()
148+
with pytest.raises(ValidationError, match="CTE"):
149+
c._sql_guardrails("WITH cte AS (SELECT name FROM account) SELECT * FROM cte")
150+
151+
def test_subquery_in_blocked(self):
152+
c = _client()
153+
with pytest.raises(ValidationError, match="Subquer"):
154+
c._sql_guardrails("SELECT name FROM account WHERE accountid IN (SELECT accountid FROM account)")
155+
156+
def test_subquery_exists_blocked(self):
157+
c = _client()
158+
with pytest.raises(ValidationError, match="Subquer"):
159+
c._sql_guardrails(
160+
"SELECT name FROM account a WHERE EXISTS "
161+
"(SELECT 1 FROM contact c WHERE c.parentcustomerid = a.accountid)"
162+
)
163+
164+
def test_subquery_in_values_not_blocked(self):
165+
"""IN with literal values is fine -- only IN (SELECT ...) is blocked."""
166+
c = _client()
167+
result = c._sql_guardrails("SELECT name FROM account WHERE name IN ('A', 'B', 'C')")
168+
assert "IN" in result
169+
170+
83171
# ===================================================================
84172
# 2. Server enforces TOP 5000 (no client-side injection needed)
85173
# ===================================================================

0 commit comments

Comments
 (0)