3030 VALIDATION_SQL_EMPTY ,
3131 VALIDATION_SQL_WRITE_BLOCKED ,
3232 VALIDATION_SQL_CROSS_JOIN_BLOCKED ,
33+ VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
3334 METADATA_ENTITYSET_NOT_FOUND ,
3435 METADATA_ENTITYSET_NAME_MISSING ,
3536 METADATA_TABLE_NOT_FOUND ,
@@ -845,6 +846,18 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st
845846 re .IGNORECASE ,
846847 )
847848 _SQL_HAS_JOIN_RE = re .compile (r"\bJOIN\b" , re .IGNORECASE )
849+ # Server-blocked SQL patterns (save the round-trip by catching early)
850+ _SQL_UNSUPPORTED_JOIN_RE = re .compile (
851+ r"\b(?:CROSS\s+JOIN|RIGHT\s+(?:OUTER\s+)?JOIN|FULL\s+(?:OUTER\s+)?JOIN)\b" ,
852+ re .IGNORECASE ,
853+ )
854+ _SQL_UNION_RE = re .compile (r"\bUNION\b" , re .IGNORECASE )
855+ _SQL_HAVING_RE = re .compile (r"\bHAVING\b" , re .IGNORECASE )
856+ _SQL_CTE_RE = re .compile (r"^\s*WITH\b" , re .IGNORECASE )
857+ _SQL_SUBQUERY_RE = re .compile (
858+ r"\bIN\s*\(\s*SELECT\b|\bEXISTS\s*\(\s*SELECT\b|\(\s*SELECT\b.*\bFROM\b" ,
859+ re .IGNORECASE ,
860+ )
848861
849862 def _expand_select_star (self , sql : str , table : str ) -> str :
850863 """Replace ``SELECT *`` with explicit column names.
@@ -887,26 +900,33 @@ def _expand_select_star(self, sql: str, table: str) -> str:
887900 def _sql_guardrails (self , sql : str ) -> str :
888901 """Apply safety guardrails to a SQL query before sending to the server.
889902
890- Checks performed (in order) :
903+ Checks split into two categories :
891904
892- 1. **Block write statements** -- ``INSERT``, ``UPDATE``, ``DELETE``,
893- ``DROP``, ``TRUNCATE``, ``ALTER``, ``CREATE``, ``EXEC``, ``GRANT``,
894- ``REVOKE``, ``BULK`` are rejected with ``ValidationError``.
895- 2. **Warn on leading-wildcard LIKE** -- ``LIKE '%...'`` patterns
896- force full table scans and hurt shared database performance.
897- 3. **Warn on implicit cross joins** -- ``FROM a, b`` (comma syntax)
898- produces cartesian products.
905+ **Blocked** (``ValidationError`` -- saves a server round-trip):
899906
900- .. note::
901- The server enforces a 5000-row maximum per query and blocks
902- ``SELECT *`` directly. The SDK handles ``SELECT *`` via
903- ``_expand_select_star``. No client-side TOP injection is needed
904- because the server already caps results.
907+ 1. Write statements (INSERT/UPDATE/DELETE/DROP/etc.)
908+ 2. CROSS JOIN, RIGHT JOIN, FULL OUTER JOIN (server rejects these)
909+ 3. UNION / UNION ALL (server rejects)
910+ 4. HAVING clause (server rejects)
911+ 5. CTE / WITH clause (server rejects)
912+ 6. Subqueries -- IN (SELECT ...), EXISTS (SELECT ...) (server rejects)
913+
914+ **Warned** (``UserWarning`` -- query still executes):
915+
916+ 7. Leading-wildcard LIKE (full table scan)
917+ 8. Implicit cross join FROM a, b (cartesian product)
918+
919+ All blocked patterns are also blocked by the server, but catching
920+ them here saves the network round-trip and provides clearer error
921+ messages. To bypass a specific check (e.g., if the server adds
922+ support in the future), all checks are in this single method.
905923
906924 :param sql: The SQL string (already stripped).
907- :return: Possibly-rewritten SQL string.
908- :raises ValidationError: If the SQL contains a write statement .
925+ :return: The SQL string (unchanged unless rewritten) .
926+ :raises ValidationError: If the SQL contains a blocked pattern .
909927 """
928+ # --- BLOCKED (save server round-trip) ---
929+
910930 # 1. Block writes
911931 if self ._SQL_WRITE_RE .search (sql ):
912932 raise ValidationError (
@@ -916,7 +936,53 @@ def _sql_guardrails(self, sql: str) -> str:
916936 subcode = VALIDATION_SQL_WRITE_BLOCKED ,
917937 )
918938
919- # 2. Warn on leading-wildcard LIKE
939+ # 2. Block unsupported JOIN types
940+ m = self ._SQL_UNSUPPORTED_JOIN_RE .search (sql )
941+ if m :
942+ raise ValidationError (
943+ f"Unsupported JOIN type: '{ m .group (0 ).strip ()} '. "
944+ "Only INNER JOIN and LEFT JOIN are supported by the "
945+ "Dataverse SQL endpoint." ,
946+ subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
947+ )
948+
949+ # 3. Block UNION
950+ if self ._SQL_UNION_RE .search (sql ):
951+ raise ValidationError (
952+ "UNION is not supported by the Dataverse SQL endpoint. "
953+ "Execute separate queries and combine results in Python "
954+ "(e.g. pd.concat([df1, df2]))." ,
955+ subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
956+ )
957+
958+ # 4. Block HAVING
959+ if self ._SQL_HAVING_RE .search (sql ):
960+ raise ValidationError (
961+ "HAVING is not supported by the Dataverse SQL endpoint. "
962+ "Use WHERE to filter before GROUP BY instead." ,
963+ subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
964+ )
965+
966+ # 5. Block CTE / WITH
967+ if self ._SQL_CTE_RE .search (sql ):
968+ raise ValidationError (
969+ "CTE (WITH ... AS) is not supported by the Dataverse SQL "
970+ "endpoint. Use separate queries and combine in Python." ,
971+ subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
972+ )
973+
974+ # 6. Block subqueries
975+ if self ._SQL_SUBQUERY_RE .search (sql ):
976+ raise ValidationError (
977+ "Subqueries are not supported by the Dataverse SQL "
978+ "endpoint. Use separate SQL calls and combine results "
979+ "in Python (e.g. step 1: get IDs, step 2: WHERE IN)." ,
980+ subcode = VALIDATION_SQL_UNSUPPORTED_SYNTAX ,
981+ )
982+
983+ # --- WARNED (query still executes) ---
984+
985+ # 7. Warn on leading-wildcard LIKE
920986 if self ._SQL_LEADING_WILDCARD_RE .search (sql ):
921987 warnings .warn (
922988 "Query contains a leading-wildcard LIKE pattern "
@@ -927,8 +993,7 @@ def _sql_guardrails(self, sql: str) -> str:
927993 stacklevel = 4 ,
928994 )
929995
930- # 3. Warn on implicit cross joins (server allows these but they
931- # produce cartesian products that can stress shared DB resources)
996+ # 8. Warn on implicit cross joins (server allows but risky)
932997 if self ._SQL_IMPLICIT_CROSS_JOIN_RE .search (sql ):
933998 warnings .warn (
934999 "Query uses an implicit cross join (FROM table1, table2). "
0 commit comments