diff --git a/README.md b/README.md index d6c6403d..a59684a6 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,14 @@ client.dataframe.update("account", df, id_column="accountid", clear_nulls=True) # Delete records by passing a Series of GUIDs client.dataframe.delete("account", new_accounts["accountid"]) + +# SQL query directly to DataFrame (supports JOINs, aggregates, GROUP BY) +df = client.dataframe.sql( + "SELECT a.name, COUNT(c.contactid) as contacts " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "GROUP BY a.name" +) ``` ### Query data @@ -374,19 +382,68 @@ results = (client.query.builder("account") .execute()) ``` -**SQL queries** provide an alternative read-only query syntax: +**SQL queries** provide an alternative read-only query syntax with support for +JOINs, aggregates, GROUP BY, DISTINCT, and OFFSET FETCH pagination: ```python +# Basic query results = client.query.sql( "SELECT TOP 10 accountid, name FROM account WHERE statecode = 0" ) -for record in results: - print(record["name"]) + +# JOINs and aggregates work +results = client.query.sql( + "SELECT a.name, COUNT(c.contactid) as cnt " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "GROUP BY a.name" +) + +# SELECT * is auto-expanded by the SDK +results = client.query.sql("SELECT * FROM account") + +# SQL results directly as a DataFrame +df = client.dataframe.sql( + "SELECT name, revenue FROM account ORDER BY revenue DESC" +) + +# SQL helpers: discover columns and JOINs from metadata +cols = client.query.sql_select("account") # "accountid, name, revenue, ..." +join = client.query.sql_join("contact", "account", from_alias="c", to_alias="a") +# Returns: "JOIN account a ON c.parentcustomerid = a.accountid" + +# Build queries using helpers -- no OData knowledge needed +sql = f"SELECT TOP 10 c.fullname, a.name FROM contact c {join}" +df = client.dataframe.sql(sql) + +# Discover all possible JOINs from a table (including polymorphic) +joins = client.query.sql_joins("opportunity") +for j in joins: + print(f"{j['column']:30s} -> {j['target']}.{j['target_pk']}") ``` -**Raw OData queries** are available via `records.get()` for cases where you need direct control over the OData filter string: +**Raw OData queries** are available via `records.get()` for cases where you need direct control over the OData filter string. The SDK provides helpers to eliminate the most error-prone parts: ```python +# Discover columns for $select (returns list ready for select= parameter) +cols = client.query.odata_select("account") +for page in client.records.get("account", select=cols, top=10): + ... + +# Discover $expand navigation properties (auto-resolves PascalCase names) +nav = client.query.odata_expand("contact", "account") +# Returns: "parentcustomerid_account" +for page in client.records.get("contact", select=["fullname"], expand=[nav], top=5): + for r in page: + acct = r.get(nav) or {} + print(f"{r['fullname']} -> {acct.get('name')}") + +# Build @odata.bind for lookup fields (no manual name construction) +bind = client.query.odata_bind("contact", "account", account_id) +# Returns: {"parentcustomerid_account@odata.bind": "/accounts(guid)"} +client.records.create("contact", {"firstname": "Jane", **bind}) + +# Raw OData query with manual parameters for page in client.records.get( "account", select=["name"], @@ -435,6 +492,18 @@ client.tables.add_columns("new_Product", {"new_Category": "string"}) # Remove columns client.tables.remove_columns("new_Product", ["new_Category"]) +# List all columns (attributes) for a table to discover schema +columns = client.tables.list_columns("account") +for col in columns: + print(f"{col['LogicalName']} ({col.get('AttributeType')})") + +# List only specific properties +columns = client.tables.list_columns( + "account", + select=["LogicalName", "SchemaName", "AttributeType"], + filter="AttributeType eq 'String'", +) + # Clean up client.tables.delete("new_Product") ``` @@ -487,6 +556,16 @@ rel = client.tables.get_relationship("new_Department_Employee") if rel: print(f"Found: {rel['SchemaName']}") +# List all relationships +rels = client.tables.list_relationships() +for rel in rels: + print(f"{rel['SchemaName']} ({rel.get('@odata.type')})") + +# List relationships for a specific table (one-to-many + many-to-one + many-to-many) +account_rels = client.tables.list_table_relationships("account") +for rel in account_rels: + print(f"{rel['SchemaName']} -> {rel.get('@odata.type')}") + # Delete a relationship client.tables.delete_relationship(result['relationship_id']) ``` diff --git a/examples/README.md b/examples/README.md index 26fe8da4..89d601f7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -40,6 +40,19 @@ Deep-dive into production-ready patterns and specialized functionality: - Column metadata management and multi-language support - Interactive cleanup and best practices +- **`sql_examples.py`** - **SQL QUERY END-TO-END** 🔍 + - Schema discovery before writing SQL (list_columns, list_relationships) + - Full SQL capabilities: SELECT, WHERE, TOP, ORDER BY, LIKE, IN, BETWEEN + - JOINs (INNER, LEFT, multi-table), GROUP BY, DISTINCT, aggregates + - OFFSET FETCH for server-side pagination + - SELECT * auto-expansion (SDK rewrites for server compatibility) + - Polymorphic lookups via SQL (ownerid, customerid, createdby) + - SQL read -> DataFrame transform -> SDK write-back (full round-trip) + - SQL-driven bulk create, update, and delete patterns + - SQL to DataFrame via `client.dataframe.sql()` + - Limitations with SDK fallbacks (writes, subqueries, functions) + - Complete reference table: SQL vs SDK method mapping + - **`file_upload.py`** - **FILE OPERATIONS** 📎 - File upload to Dataverse file columns with chunking - Advanced file handling patterns @@ -68,6 +81,9 @@ python examples/basic/functional_testing.py ```bash # Comprehensive walkthrough with production patterns python examples/advanced/walkthrough.py + +# SQL queries end-to-end with SDK fallbacks for unsupported operations +python examples/advanced/sql_examples.py ``` ## 🎯 Quick Start Recommendations @@ -75,6 +91,7 @@ python examples/advanced/walkthrough.py - **New to the SDK?** → Start with `examples/basic/installation_example.py` - **Need to test/validate?** → Use `examples/basic/functional_testing.py` - **Want to see all features?** → Run `examples/advanced/walkthrough.py` +- **Using SQL queries?** → Run `examples/advanced/sql_examples.py` - **Building production apps?** → Study patterns in `examples/advanced/walkthrough.py` ## 📋 Prerequisites diff --git a/examples/advanced/sql_examples.py b/examples/advanced/sql_examples.py new file mode 100644 index 00000000..ac418da8 --- /dev/null +++ b/examples/advanced/sql_examples.py @@ -0,0 +1,1212 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +End-to-end SQL query examples -- pure SQL workflows in Dataverse. + +This example demonstrates everything a SQL developer can do through the +Python SDK's ``client.query.sql()`` and ``client.dataframe.sql()`` methods, +based on extensive testing of the Dataverse SQL endpoint (353 test queries). + +Capabilities PROVEN to work: +- SELECT with specific columns, SELECT * (auto-expanded by SDK) +- INNER JOIN, LEFT JOIN (up to 6+ tables) +- COUNT(*), SUM(), AVG(), MIN(), MAX() aggregates +- GROUP BY, DISTINCT, DISTINCT TOP +- WHERE (=, !=, >, <, >=, <=, LIKE, IN, NOT IN, IS NULL, IS NOT NULL, BETWEEN) +- TOP N (0-5000), ORDER BY col [ASC|DESC] +- OFFSET ... FETCH NEXT (server-side pagination) +- Table and column aliases +- Polymorphic lookups (ownerid, customerid) via separate JOINs +- Audit trail (createdby, modifiedby) via systemuser JOINs +- SQL read -> DataFrame transform -> SDK write-back (full round-trip) +- AND/OR, NOT IN, NOT LIKE boolean logic +- Deep JOINs (5-8 tables) with no server depth limit +- SQL helper functions: sql_columns, sql_select, sql_joins, sql_join +- OData helper functions: odata_select, odata_expands, odata_expand, odata_bind +- SQL vs OData side-by-side comparison + +Not supported (server rejects): +- INSERT/UPDATE/DELETE (read-only) -> use client.dataframe.create/update/delete +- Subqueries, CTE, HAVING, UNION +- RIGHT JOIN, FULL OUTER JOIN, CROSS JOIN +- CASE, COALESCE, CAST, string/date/math functions +- Window functions (ROW_NUMBER, RANK) + +Prerequisites: +- pip install PowerPlatform-Dataverse-Client azure-identity +""" + +import sys +import json +import time +from collections import defaultdict +from enum import IntEnum + +import pandas as pd +from azure.identity import InteractiveBrowserCredential +from PowerPlatform.Dataverse.client import DataverseClient +from PowerPlatform.Dataverse.core.errors import MetadataError +import requests + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def log_call(description): + print(f"\n-> {description}") + + +def heading(section_num, title): + print(f"\n{'=' * 80}") + print(f"{section_num}. {title}") + print("=" * 80) + + +def backoff(op, *, delays=(0, 2, 5, 10, 20, 20)): + """Retry an operation with exponential back-off.""" + last = None + total_delay = 0 + attempts = 0 + for d in delays: + if d: + time.sleep(d) + total_delay += d + attempts += 1 + try: + result = op() + if attempts > 1: + print(f" [INFO] Backoff succeeded after {attempts - 1} " f"retry(s); waited {total_delay}s total.") + return result + except Exception as ex: + last = ex + continue + if last: + if attempts: + print( + f" [WARN] Backoff exhausted after {max(attempts - 1, 0)} retry(s); waited {total_delay}s total." + f"\n [ERROR] {last}" + ) + raise last + + +class Region(IntEnum): + NORTH = 1 + SOUTH = 2 + EAST = 3 + WEST = 4 + + +def main(): + print("=" * 80) + print("Dataverse SDK -- SQL End-to-End (Pure SQL Workflows)") + print("=" * 80) + + heading(1, "Setup & Authentication") + base_url = input("Enter Dataverse org URL (e.g. https://yourorg.crm.dynamics.com): ").strip() + if not base_url: + print("No URL entered; exiting.") + sys.exit(1) + base_url = base_url.rstrip("/") + + log_call("InteractiveBrowserCredential()") + credential = InteractiveBrowserCredential() + + log_call(f"DataverseClient(base_url='{base_url}', credential=...)") + with DataverseClient(base_url=base_url, credential=credential) as client: + print(f"[OK] Connected to: {base_url}") + _run_examples(client) + + +def _run_examples(client): + parent_table = "new_SQLDemoTeam" + child_table = "new_SQLDemoTask" + + # ================================================================== + # 2. Seed demo data (SDK writes -- SQL is read-only) + # ================================================================== + heading(2, "Seed Demo Data (SDK Writes -- SQL Is Read-Only)") + print( + "[INFO] SQL is read-only (no INSERT/UPDATE/DELETE). We use the SDK's\n" + "records namespace to seed data, then query it all via SQL." + ) + + log_call(f"client.tables.get('{parent_table}')") + info = backoff(lambda: client.tables.get(parent_table)) + if info: + print(f"[OK] Table already exists: {parent_table}") + else: + log_call(f"client.tables.create('{parent_table}', ...)") + info = backoff( + lambda: client.tables.create( + parent_table, + { + "new_Code": "string", + "new_Region": Region, + "new_Budget": "decimal", + "new_Active": "bool", + }, + ) + ) + print(f"[OK] Created table: {parent_table}") + + log_call(f"client.tables.get('{child_table}')") + info2 = backoff(lambda: client.tables.get(child_table)) + if info2: + print(f"[OK] Table already exists: {child_table}") + else: + log_call(f"client.tables.create('{child_table}', ...)") + info2 = backoff( + lambda: client.tables.create( + child_table, + { + "new_Title": "string", + "new_Hours": "int", + "new_Done": "bool", + "new_Priority": "int", + }, + ) + ) + print(f"[OK] Created table: {child_table}") + + # Create lookup so tasks reference teams via JOIN + print("\n[INFO] Creating lookup field so tasks reference teams via JOIN...") + try: + backoff( + lambda: client.tables.create_lookup_field( + referencing_table=child_table, + lookup_field_name="new_TeamId", + referenced_table=parent_table, + display_name="Team", + ) + ) + print("[OK] Created lookup: new_TeamId on tasks -> teams") + except Exception as e: + msg = str(e).lower() + if "already exists" in msg or "duplicate" in msg: + print("[OK] Lookup already exists (skipped)") + else: + raise + + log_call(f"client.records.create('{parent_table}', [...])") + teams = [ + {"new_Code": "ALPHA", "new_Region": Region.NORTH, "new_Budget": 50000, "new_Active": True}, + {"new_Code": "BRAVO", "new_Region": Region.SOUTH, "new_Budget": 75000, "new_Active": True}, + {"new_Code": "CHARLIE", "new_Region": Region.EAST, "new_Budget": 30000, "new_Active": False}, + {"new_Code": "DELTA", "new_Region": Region.WEST, "new_Budget": 90000, "new_Active": True}, + {"new_Code": "ECHO", "new_Region": Region.NORTH, "new_Budget": 42000, "new_Active": True}, + ] + team_ids = backoff(lambda: client.records.create(parent_table, teams)) + print(f"[OK] Seeded {len(team_ids)} teams") + + parent_logical = parent_table.lower() + parent_set = f"{parent_logical}s" + try: + tinfo = client.tables.get(parent_table) + if tinfo: + parent_set = tinfo.get("entity_set_name", parent_set) + except Exception: + pass + + log_call(f"client.records.create('{child_table}', [...])") + tasks = [ + { + "new_Title": "Design mockups", + "new_Hours": 8, + "new_Done": True, + "new_Priority": 2, + f"new_TeamId@odata.bind": f"/{parent_set}({team_ids[0]})", + }, + { + "new_Title": "Write unit tests", + "new_Hours": 12, + "new_Done": False, + "new_Priority": 3, + f"new_TeamId@odata.bind": f"/{parent_set}({team_ids[0]})", + }, + { + "new_Title": "Code review", + "new_Hours": 3, + "new_Done": True, + "new_Priority": 1, + f"new_TeamId@odata.bind": f"/{parent_set}({team_ids[1]})", + }, + { + "new_Title": "Deploy to staging", + "new_Hours": 5, + "new_Done": False, + "new_Priority": 3, + f"new_TeamId@odata.bind": f"/{parent_set}({team_ids[1]})", + }, + { + "new_Title": "Update docs", + "new_Hours": 4, + "new_Done": True, + "new_Priority": 1, + f"new_TeamId@odata.bind": f"/{parent_set}({team_ids[2]})", + }, + { + "new_Title": "Performance tuning", + "new_Hours": 10, + "new_Done": False, + "new_Priority": 2, + f"new_TeamId@odata.bind": f"/{parent_set}({team_ids[3]})", + }, + { + "new_Title": "Security audit", + "new_Hours": 6, + "new_Done": False, + "new_Priority": 3, + f"new_TeamId@odata.bind": f"/{parent_set}({team_ids[4]})", + }, + ] + task_ids = backoff(lambda: client.records.create(child_table, tasks)) + print(f"[OK] Seeded {len(task_ids)} tasks (with team lookups)") + + parent_id_col = f"{parent_logical}id" + + try: + # ============================================================== + # 3. Schema discovery + # ============================================================== + heading(3, "Schema Discovery Before Writing SQL") + log_call(f"client.tables.list_columns('{parent_table}', select=[...])") + columns = backoff( + lambda: client.tables.list_columns( + parent_table, + select=["LogicalName", "SchemaName", "AttributeType"], + ) + ) + custom_cols = [c for c in columns if c.get("LogicalName", "").startswith("new_")] + print(f"[OK] Custom columns on {parent_table}:") + for col in custom_cols: + print(f" {col['LogicalName']:30s} Type: {col.get('AttributeType', 'N/A')}") + + log_call(f"client.tables.list_table_relationships('{child_table}', ...)") + rels = backoff( + lambda: client.tables.list_table_relationships( + child_table, + select=["SchemaName"], + ) + ) + print(f"[OK] Relationships on {child_table}: {len(rels)}") + + # ============================================================== + # 4. Basic SELECT + # ============================================================== + heading(4, "Basic SQL -- SELECT Specific Columns") + sql = f"SELECT new_code, new_budget, new_active FROM {parent_table}" + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] {len(results)} rows:") + for r in results: + print(f" {r.get('new_code', ''):<12s} Budget={r.get('new_budget')} Active={r.get('new_active')}") + + # ============================================================== + # 5. SELECT * (auto-expanded by SDK) + # ============================================================== + heading(5, "SELECT * (Auto-Expanded by SDK)") + print( + "The server blocks SELECT * directly. The SDK auto-resolves\n" + "all column names via list_columns() and rewrites the query." + ) + sql = f"SELECT * FROM {parent_table}" + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + if results: + keys = [k for k in results[0].keys() if not k.startswith("@")] + print(f"[OK] {len(results)} rows, {len(keys)} columns") + + # ============================================================== + # 6. WHERE clause + # ============================================================== + heading(6, "SQL -- WHERE (=, >, <, IN, IS NULL, BETWEEN)") + sql = f"SELECT new_code, new_budget FROM {parent_table} WHERE new_budget > 40000" + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] budget > 40000: {len(results)} rows") + + sql = f"SELECT new_code FROM {parent_table} WHERE new_code IN ('ALPHA', 'DELTA')" + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] IN clause: {[r.get('new_code') for r in results]}") + + sql = f"SELECT new_title FROM {child_table} WHERE new_priority IS NOT NULL" + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] IS NOT NULL: {len(results)} tasks") + + # ============================================================== + # 7. LIKE + # ============================================================== + heading(7, "SQL -- LIKE Pattern Matching") + sql = f"SELECT new_title FROM {child_table} WHERE new_title LIKE '%test%'" + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] LIKE '%test%': {len(results)} matches") + + # ============================================================== + # 8. TOP + ORDER BY + # ============================================================== + heading(8, "SQL -- TOP N + ORDER BY") + sql = f"SELECT TOP 3 new_code, new_budget FROM {parent_table} ORDER BY new_budget DESC" + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] Top 3 by budget:") + for r in results: + print(f" {r.get('new_code', ''):<12s} Budget={r.get('new_budget')}") + + # ============================================================== + # 9. Aliases + # ============================================================== + heading(9, "SQL -- Table and Column Aliases") + sql = ( + f"SELECT t.new_code AS team_code, t.new_budget AS budget " + f"FROM {parent_table} AS t WHERE t.new_active = 1" + ) + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] Aliased results: {len(results)} rows") + + # ============================================================== + # 10. DISTINCT + # ============================================================== + heading(10, "SQL -- DISTINCT") + sql = f"SELECT DISTINCT new_region FROM {parent_table}" + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] Distinct regions: {[r.get('new_region') for r in results]}") + + sql = f"SELECT DISTINCT TOP 2 new_region FROM {parent_table}" + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] DISTINCT TOP 2: {[r.get('new_region') for r in results]}") + + # ============================================================== + # 11. Aggregates: COUNT, SUM, AVG, MIN, MAX + # ============================================================== + heading(11, "SQL -- Aggregates (All Run on Server)") + sql = ( + f"SELECT COUNT(*) as cnt, SUM(new_budget) as total, " + f"AVG(new_budget) as avg_b, MIN(new_budget) as min_b, " + f"MAX(new_budget) as max_b FROM {parent_table}" + ) + log_call('client.query.sql("SELECT COUNT, SUM, AVG, MIN, MAX...")') + results = backoff(lambda: client.query.sql(sql)) + if results: + print(f"[OK] {json.dumps(dict(results[0]), indent=2)}") + + # ============================================================== + # 12. GROUP BY + # ============================================================== + heading(12, "SQL -- GROUP BY (Server-Side)") + sql = ( + f"SELECT new_region, COUNT(*) as team_count, " + f"SUM(new_budget) as total_budget " + f"FROM {parent_table} GROUP BY new_region" + ) + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] {len(results)} groups:") + for r in results: + print(f" Region={r.get('new_region')} Count={r.get('team_count')} Total={r.get('total_budget')}") + + # ============================================================== + # 13. INNER JOIN + # ============================================================== + heading(13, "SQL -- INNER JOIN") + print("Use the lookup attribute's logical name (e.g. new_teamid) for JOINs.") + + # Use sql_join() to auto-discover the relationship and build + # the JOIN clause with proper aliases. + lookup_col = "new_teamid" # Lookup logical name, NOT _..._value + join_clause = client.query.sql_join( + from_table=child_table, + to_table=parent_table, + from_alias="tk", + to_alias="t", + ) + print(f"[INFO] Lookup column: {lookup_col}") + print(f"[INFO] Generated JOIN: {join_clause}") + + sql = f"SELECT t.new_code, tk.new_title, tk.new_hours " f"FROM {child_table} tk " f"{join_clause}" + log_call('client.query.sql("...INNER JOIN...")') + try: + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] JOIN: {len(results)} rows") + for r in results[:5]: + print( + f" Team={r.get('new_code', ''):<10s} Task={r.get('new_title', ''):<25s} Hours={r.get('new_hours')}" + ) + except Exception as e: + print(f"[WARN] JOIN failed: {e}") + + # ============================================================== + # 14. LEFT JOIN + # ============================================================== + heading(14, "SQL -- LEFT JOIN") + sql = ( + f"SELECT t.new_code, tk.new_title " + f"FROM {parent_table} t " + f"LEFT JOIN {child_table} tk ON t.{parent_id_col} = tk.{lookup_col}" + ) # lookup_col = logical name, NOT _..._value + log_call('client.query.sql("...LEFT JOIN...")') + try: + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] LEFT JOIN: {len(results)} rows") + except Exception as e: + print(f"[WARN] LEFT JOIN failed: {e}") + + # ============================================================== + # 15. JOIN + GROUP BY + aggregates + # ============================================================== + heading(15, "SQL -- JOIN + GROUP BY + Aggregates") + sql = ( + f"SELECT t.new_code, COUNT(tk.new_sqldemotaskid) as task_count, " + f"SUM(tk.new_hours) as total_hours " + f"FROM {parent_table} t " + f"JOIN {child_table} tk ON t.{parent_id_col} = tk.{lookup_col} " # logical name + f"GROUP BY t.new_code" + ) + log_call('client.query.sql("...JOIN...GROUP BY...")') + try: + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] {len(results)} groups:") + for r in results: + print(f" Team={r.get('new_code', ''):<10s} Tasks={r.get('task_count')} Hours={r.get('total_hours')}") + except Exception as e: + print(f"[WARN] JOIN+GROUP BY failed: {e}") + + # ============================================================== + # 16. OFFSET FETCH (server-side pagination) + # ============================================================== + heading(16, "SQL -- OFFSET FETCH (Server-Side Pagination)") + page_size = 3 + for pg in range(1, 4): + offset = (pg - 1) * page_size + sql = ( + f"SELECT new_title, new_hours FROM {child_table} " + f"ORDER BY new_hours " + f"OFFSET {offset} ROWS FETCH NEXT {page_size} ROWS ONLY" + ) + log_call(f"Page {pg}: OFFSET {offset} FETCH NEXT {page_size}") + results = backoff(lambda sql=sql: client.query.sql(sql)) + print(f" Page {pg}: {len(results)} rows") + for r in results: + print(f" {r.get('new_title', ''):<25s} Hours={r.get('new_hours')}") + if len(results) < page_size: + break + + # ============================================================== + # 17. SQL to DataFrame + # ============================================================== + heading(17, "SQL to DataFrame (client.dataframe.sql)") + print("Get SQL results directly as a pandas DataFrame.") + sql = f"SELECT new_code, new_budget, new_region " f"FROM {parent_table} ORDER BY new_budget DESC" + log_call(f'client.dataframe.sql("{sql}")') + df = backoff(lambda: client.dataframe.sql(sql)) + print(f"[OK] DataFrame: {len(df)} rows x {len(df.columns)} columns") + print(df.to_string(index=False)) + print(f"\n Mean budget: {df['new_budget'].mean():,.2f}") + print(f" Budget by region:\n{df.groupby('new_region')['new_budget'].sum()}") + + # ============================================================== + # 18. SQL to DataFrame with JOINs + # ============================================================== + heading(18, "SQL to DataFrame -- JOIN Query") + sql = ( + f"SELECT t.new_code, tk.new_title, tk.new_hours " + f"FROM {child_table} tk " + f"JOIN {parent_table} t ON tk.{lookup_col} = t.{parent_id_col}" + ) + log_call('client.dataframe.sql("...JOIN...")') + try: + df_j = backoff(lambda: client.dataframe.sql(sql)) + print(f"[OK] {len(df_j)} rows") + print(df_j.to_string(index=False)) + print("\n-- Pivot: hours by team --") + print(df_j.groupby("new_code")["new_hours"].agg(["sum", "mean", "count"]).to_string()) + except Exception as e: + print(f"[WARN] {e}") + + # ============================================================== + # 19. Built-in table JOINs + # ============================================================== + heading(19, "Built-In Table JOINs (account -> contact)") + sql = "SELECT a.name, c.fullname FROM account a " "INNER JOIN contact c ON a.accountid = c.parentcustomerid" + log_call('client.query.sql("...account JOIN contact...")') + try: + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] {len(results)} rows") + for r in results[:5]: + print(f" Account={r.get('name', ''):<25s} Contact={r.get('fullname', '')}") + except Exception as e: + print(f"[INFO] {e}") + + # ============================================================== + # 20. LIMITATION: Writes require SDK + # ============================================================== + heading(20, "LIMITATION: Writes Require SDK (Read-Only SQL)") + sql = f"SELECT new_sqldemotaskid, new_title " f"FROM {child_table} WHERE new_done = 0" + incomplete = backoff(lambda: client.query.sql(sql)) + print(f"[OK] SQL found {len(incomplete)} incomplete tasks") + if incomplete: + fid = incomplete[0].get("new_sqldemotaskid") + if fid: + backoff(lambda: client.records.update(child_table, fid, {"new_Done": True})) + print(f"[OK] Updated via SDK: '{incomplete[0].get('new_title')}'") + + # ============================================================== + # 21. LIMITATION: No subqueries + # ============================================================== + heading(21, "LIMITATION: No Subqueries -- Chain SQL Calls") + sql1 = f"SELECT {parent_id_col} FROM {parent_table} WHERE new_budget > 50000" + big = backoff(lambda: client.query.sql(sql1)) + big_ids = [r.get(parent_id_col) for r in big if r.get(parent_id_col)] + print(f"[OK] Step 1: {len(big_ids)} teams with budget > 50000") + if big_ids: + id_list = ", ".join(f"'{i}'" for i in big_ids) + sql2 = f"SELECT new_title FROM {child_table} " f"WHERE {lookup_col} IN ({id_list})" + tasks_r = backoff(lambda: client.query.sql(sql2)) + print(f"[OK] Step 2: {len(tasks_r)} tasks for big-budget teams") + + # ============================================================== + # 22. LIMITATION: No functions + # ============================================================== + heading(22, "LIMITATION: No Functions -- Post-Process in Python") + sql = f"SELECT new_code, new_budget FROM {parent_table}" + rows = backoff(lambda: client.query.sql(sql)) + print("[OK] Post-processing (CASE equivalent):") + for r in rows: + b = float(r.get("new_budget") or 0) + tier = "HIGH" if b > 60000 else "MEDIUM" if b > 35000 else "LOW" + print(f" {r.get('new_code', ''):<12s} Budget={b:>10,.2f} Tier={tier}") + + # ============================================================== + # 23. Polymorphic lookups via SQL (ownerid, customerid) + # ============================================================== + heading(23, "Polymorphic Lookups via SQL (ownerid, customerid)") + print( + "Some Dataverse lookup columns are POLYMORPHIC -- the GUID can\n" + "point to different entity types (e.g. ownerid -> systemuser OR\n" + "team, customerid -> account OR contact).\n" + "\n" + "SQL pattern: INNER JOIN acts as both a join AND a type filter.\n" + "If the GUID points to a different type, the JOIN simply returns\n" + "no row -- so you get exactly the records of the type you joined." + ) + + # 23a. Discover lookup columns on a table + print("\n-- 23a. Discover lookup columns on account --") + log_call("client.tables.list_columns('account', filter=Lookup)") + try: + acct_cols = backoff( + lambda: client.tables.list_columns( + "account", + select=["LogicalName", "AttributeType"], + filter="AttributeType eq 'Lookup' or AttributeType eq 'Owner' or AttributeType eq 'Customer'", + ) + ) + lookup_names = sorted( + c.get("LogicalName", "") + for c in acct_cols + if c.get("LogicalName", "").startswith("_") and c.get("LogicalName", "").endswith("_value") + ) + print(f"[OK] Lookup columns on account ({len(lookup_names)} found):") + for ln in lookup_names[:10]: + print(f" {ln}") + if len(lookup_names) > 10: + print(f" ... and {len(lookup_names) - 10} more") + except Exception as e: + print(f"[INFO] Lookup discovery skipped: {e}") + + # 23b. Discover polymorphic targets via relationship metadata + print("\n-- 23b. Discover which entities a polymorphic lookup targets --") + log_call("client.tables.list_table_relationships('account', ...)") + try: + acct_rels = backoff( + lambda: client.tables.list_table_relationships( + "account", + select=["SchemaName", "ReferencedEntity", "ReferencingEntity", "ReferencingAttribute"], + ) + ) + by_attr = defaultdict(list) + for rel in acct_rels: + attr = rel.get("ReferencingAttribute", "") + ref = rel.get("ReferencedEntity", "") + if attr and ref and rel.get("ReferencingEntity", "").lower() == "account": + by_attr[attr].append(ref) + print("[OK] Lookup targets on account:") + for attr, targets in sorted(by_attr.items()): + tag = "POLYMORPHIC" if len(targets) > 1 else "regular" + print(f" {attr:<35s} -> {', '.join(targets):<30s} [{tag}]") + except Exception as e: + print(f"[INFO] Relationship discovery skipped: {e}") + + # 23c. Resolve ownerid (polymorphic: systemuser or team) + print("\n-- 23c. Resolve ownerid via SQL JOINs --") + print("ownerid is polymorphic (systemuser or team). Use separate\n" "JOINs and combine in a DataFrame.") + try: + # Records owned by users + log_call("SQL: account JOIN systemuser ON _ownerid_value") + df_user_owned = backoff( + lambda: client.dataframe.sql( + "SELECT TOP 5 a.name, su.fullname as owner_name " + "FROM account a " + "INNER JOIN systemuser su ON a._ownerid_value = su.systemuserid" + ) + ) + df_user_owned["owner_type"] = "User" + + # Records owned by teams + log_call("SQL: account JOIN team ON _ownerid_value") + df_team_owned = backoff( + lambda: client.dataframe.sql( + "SELECT TOP 5 a.name, t.name as owner_name " + "FROM account a " + "INNER JOIN team t ON a._ownerid_value = t.teamid" + ) + ) + df_team_owned["owner_type"] = "Team" + + df_owners = pd.concat([df_user_owned, df_team_owned], ignore_index=True) + print(f"[OK] Owner resolution: {len(df_owners)} rows") + print(f" User-owned: {len(df_user_owned)}") + print(f" Team-owned: {len(df_team_owned)}") + if not df_owners.empty: + print(df_owners.to_string(index=False)) + except Exception as e: + print(f"[INFO] Owner resolution skipped (may have no data): {e}") + + # 23d. Track created-by and modified-by (common audit pattern) + print("\n-- 23d. Audit trail: who created/modified records (via SQL) --") + try: + log_call("SQL: account JOIN systemuser (createdby + modifiedby)") + results = backoff( + lambda: client.query.sql( + "SELECT TOP 5 a.name, " + "creator.fullname as created_by, " + "modifier.fullname as modified_by " + "FROM account a " + "JOIN systemuser creator ON a._createdby_value = creator.systemuserid " + "JOIN systemuser modifier ON a._modifiedby_value = modifier.systemuserid" + ) + ) + print(f"[OK] Audit trail: {len(results)} rows") + for r in results[:5]: + print( + f" {r.get('name', ''):<25s} " + f"Created: {r.get('created_by', ''):<20s} " + f"Modified: {r.get('modified_by', '')}" + ) + except Exception as e: + print(f"[INFO] Audit trail skipped: {e}") + + # ============================================================== + # 24. SQL Read -> DataFrame Transform -> SDK Write-Back + # ============================================================== + heading(24, "SQL Read -> DataFrame Transform -> SDK Write-Back") + print( + "The full bidirectional workflow for SQL users:\n" + " 1. SQL query -> DataFrame (read)\n" + " 2. pandas -> Transform (compute)\n" + " 3. DataFrame -> SDK write-back (create/update/delete)\n" + "\n" + "This is how SQL developers do end-to-end work without\n" + "learning OData or the Web API." + ) + + # Read current state via SQL + sql = f"SELECT new_sqldemotaskid, new_title, new_hours, new_done " f"FROM {child_table}" + log_call(f'client.dataframe.sql("{sql}")') + df_tasks = backoff(lambda: client.dataframe.sql(sql)) + print(f"[OK] Read {len(df_tasks)} tasks via SQL") + print(df_tasks.to_string(index=False)) + + # Transform: bump hours by 1 for incomplete tasks + mask = df_tasks["new_done"] == False # noqa: E712 + original_hours = df_tasks.loc[mask, "new_hours"].copy() + df_tasks.loc[mask, "new_hours"] = df_tasks.loc[mask, "new_hours"] + 1 + changed = mask.sum() + print(f"\n[OK] Bumped hours +1 for {changed} incomplete tasks (in DataFrame)") + + # Write back via SDK + if changed > 0: + updates = df_tasks.loc[mask, ["new_sqldemotaskid", "new_hours"]] + log_call(f"client.dataframe.update('{child_table}', ..., id_column='new_sqldemotaskid')") + backoff(lambda: client.dataframe.update(child_table, updates, id_column="new_sqldemotaskid")) + print(f"[OK] Wrote back {len(updates)} updated rows via DataFrame") + + # Verify with SQL + verify = backoff( + lambda: client.dataframe.sql(f"SELECT new_title, new_hours FROM {child_table} WHERE new_done = 0") + ) + print(f"[OK] Verified via SQL -- incomplete tasks now:") + print(verify.to_string(index=False)) + + # Restore original values + df_tasks.loc[mask, "new_hours"] = original_hours + restore = df_tasks.loc[mask, ["new_sqldemotaskid", "new_hours"]] + backoff(lambda: client.dataframe.update(child_table, restore, id_column="new_sqldemotaskid")) + print("[OK] Restored original hours") + + # ============================================================== + # 25. SQL-driven bulk create from query results + # ============================================================== + heading(25, "SQL-Driven Bulk Create (Query -> Transform -> Insert)") + print( + "Pattern: query existing data with SQL, transform it,\n" + "then create new records via DataFrame -- all without\n" + "learning OData syntax." + ) + + # Read teams via SQL + sql = f"SELECT new_code, new_budget FROM {parent_table} WHERE new_active = 1" + log_call(f'client.dataframe.sql("{sql}")') + df_active = backoff(lambda: client.dataframe.sql(sql)) + print(f"[OK] Read {len(df_active)} active teams via SQL") + + # Transform: create a new task for each active team + new_tasks = pd.DataFrame( + { + "new_Title": [f"Review budget for {code}" for code in df_active["new_code"]], + "new_Hours": [2] * len(df_active), + "new_Done": [False] * len(df_active), + "new_Priority": [1] * len(df_active), + } + ) + log_call(f"client.dataframe.create('{child_table}', DataFrame({len(new_tasks)} rows))") + new_ids = backoff(lambda: client.dataframe.create(child_table, new_tasks)) + print(f"[OK] Created {len(new_ids)} new tasks from SQL query results") + + # Verify with SQL + verify_sql = f"SELECT new_title, new_hours FROM {child_table} " f"WHERE new_title LIKE 'Review budget%'" + created_tasks = backoff(lambda: client.query.sql(verify_sql)) + print(f"[OK] Verified via SQL: {len(created_tasks)} 'Review budget' tasks") + + # Clean up the created tasks + backoff(lambda: client.dataframe.delete(child_table, new_ids)) + print(f"[OK] Cleaned up {len(new_ids)} demo tasks") + + # ============================================================== + # 26. SQL-driven bulk delete + # ============================================================== + heading(26, "SQL-Driven Bulk Delete (Query -> Filter -> Delete)") + print("Pattern: find records with SQL, filter in pandas,\n" "then delete via DataFrame -- pure SQL thinking.") + + # Create some temp records to demonstrate + temp = pd.DataFrame( + { + "new_Title": ["TEMP: delete me 1", "TEMP: delete me 2", "TEMP: keep me"], + "new_Hours": [1, 2, 3], + "new_Done": [False, False, False], + "new_Priority": [1, 1, 1], + } + ) + temp_ids = backoff(lambda: client.dataframe.create(child_table, temp)) + print(f"[OK] Created {len(temp_ids)} temp records") + + # SQL to find, pandas to filter, SDK to delete + sql = f"SELECT new_sqldemotaskid, new_title FROM {child_table} WHERE new_title LIKE 'TEMP:%'" + df_temp = backoff(lambda: client.dataframe.sql(sql)) + print(f"[OK] SQL found {len(df_temp)} TEMP records") + + # Filter in pandas: only delete the "delete me" ones + to_delete = df_temp[df_temp["new_title"].str.contains("delete me")] + print(f"[OK] Pandas filtered to {len(to_delete)} records to delete") + + if not to_delete.empty: + log_call("client.dataframe.delete(...)") + backoff(lambda: client.dataframe.delete(child_table, to_delete["new_sqldemotaskid"])) + print(f"[OK] Deleted {len(to_delete)} records via DataFrame") + + # Verify the "keep me" record survived + remaining = backoff( + lambda: client.query.sql(f"SELECT new_title FROM {child_table} WHERE new_title LIKE 'TEMP:%'") + ) + print(f"[OK] Remaining TEMP records: {len(remaining)}") + for r in remaining: + print(f" {r.get('new_title')}") + + # Clean up the surviving temp record + keep_ids = [ + r.get("new_sqldemotaskid") + for r in backoff( + lambda: client.query.sql(f"SELECT new_sqldemotaskid FROM {child_table} WHERE new_title LIKE 'TEMP:%'") + ) + if r.get("new_sqldemotaskid") + ] + for kid in keep_ids: + backoff(lambda kid=kid: client.records.delete(child_table, kid)) + + # ============================================================== + # 27. AND/OR, NOT IN, NOT LIKE + # ============================================================== + heading(27, "SQL -- AND/OR, NOT IN, NOT LIKE") + sql = f"SELECT new_code, new_budget FROM {parent_table} " f"WHERE new_active = 1 AND new_budget > 40000" + log_call(f'client.query.sql("{sql}")') + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] AND: {len(results)} rows") + + sql = f"SELECT new_code FROM {parent_table} " f"WHERE new_code = 'ALPHA' OR new_code = 'DELTA'" + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] OR: {[r.get('new_code') for r in results]}") + + sql = ( + f"SELECT new_code FROM {parent_table} " + f"WHERE new_active = 1 AND (new_budget > 80000 OR new_budget < 45000)" + ) + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] AND + OR with parens: {len(results)} rows") + + sql = f"SELECT new_code FROM {parent_table} WHERE new_code NOT IN ('ALPHA')" + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] NOT IN: {[r.get('new_code') for r in results]}") + + sql = f"SELECT new_title FROM {child_table} WHERE new_title NOT LIKE 'Design%'" + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] NOT LIKE: {len(results)} rows") + + # ============================================================== + # 28. Deep JOINs (5-8 tables) + # ============================================================== + heading(28, "Deep JOINs (5+ Tables) -- No Depth Limit") + print( + "SQL JOINs have no server-imposed depth limit (tested up to 15\n" + "tables). Each JOIN uses indexed foreign key lookups, so\n" + "performance stays consistent. Most real-world queries use\n" + "2-4 tables; deeper JOINs are available when needed." + ) + + sql = ( + "SELECT TOP 3 a.name, c.fullname, o.name as opp, " + "su.fullname as owner, bu.name as bu " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "JOIN opportunity o ON a.accountid = o.parentaccountid " + "JOIN systemuser su ON a.ownerid = su.systemuserid " + "JOIN businessunit bu ON su.businessunitid = bu.businessunitid" + ) + log_call("5-table JOIN") + try: + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] 5-table JOIN: {len(results)} rows") + except Exception as e: + print(f"[INFO] {e}") + + sql = ( + "SELECT TOP 3 a.name, c.fullname, o.name as opp, " + "su.fullname as owner, bu.name as bu, t.name as team, " + "cr.fullname as creator, md.fullname as modifier " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "JOIN opportunity o ON a.accountid = o.parentaccountid " + "JOIN systemuser su ON a.ownerid = su.systemuserid " + "JOIN businessunit bu ON su.businessunitid = bu.businessunitid " + "JOIN team t ON bu.businessunitid = t.businessunitid " + "JOIN systemuser cr ON a.createdby = cr.systemuserid " + "JOIN systemuser md ON a.modifiedby = md.systemuserid" + ) + log_call("8-table JOIN") + try: + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] 8-table JOIN: {len(results)} rows") + except Exception as e: + print(f"[INFO] {e}") + + # ============================================================== + # 29. SQL Helper Functions + # ============================================================== + heading(29, "SQL Helper Functions (query.sql_*)") + print( + "The SDK provides helper functions that auto-discover column\n" + "names and JOIN clauses from metadata -- no guessing needed." + ) + + # sql_columns + log_call(f"client.query.sql_columns('{parent_table}')") + cols = client.query.sql_columns(parent_table) + print(f"[OK] {len(cols)} columns:") + for c in cols[:5]: + print(f" {c['name']:30s} Type: {c['type']:15s} PK={c['is_pk']}") + + # sql_select + log_call(f"client.query.sql_select('{parent_table}')") + select_str = client.query.sql_select(parent_table) + print(f"[OK] SELECT list: {select_str[:60]}...") + + # sql_joins + log_call(f"client.query.sql_joins('{child_table}')") + joins = client.query.sql_joins(child_table) + print(f"[OK] {len(joins)} possible JOINs:") + for j in joins[:5]: + print(f" {j['column']:25s} -> {j['target']}.{j['target_pk']}") + + # sql_join (auto-generate JOIN clause) + log_call(f"client.query.sql_join('{child_table}', '{parent_table}', ...)") + try: + join_clause = client.query.sql_join(child_table, parent_table, from_alias="tk", to_alias="t") + print(f"[OK] {join_clause}") + + sql = f"SELECT TOP 3 tk.new_title, t.new_code FROM {child_table} tk {join_clause}" + results = backoff(lambda: client.query.sql(sql)) + print(f"[OK] Live query with sql_join(): {len(results)} rows") + except Exception as e: + print(f"[WARN] {e}") + + # ============================================================== + # 30. OData Helper Functions + # ============================================================== + heading(30, "OData Helper Functions (query.odata_*)") + print( + "Parallel helpers for OData/records.get() users -- auto-discover\n" + "navigation properties and build @odata.bind payloads." + ) + + # odata_select + log_call(f"client.query.odata_select('{parent_table}')") + odata_cols = client.query.odata_select(parent_table) + print(f"[OK] {len(odata_cols)} columns for $select: {odata_cols[:5]}...") + + # odata_expands + log_call(f"client.query.odata_expands('{child_table}')") + try: + expands = client.query.odata_expands(child_table) + print(f"[OK] {len(expands)} expand targets:") + for e in expands[:5]: + print(f" nav={e['nav_property']:30s} -> {e['target_table']}") + except Exception as e: + print(f"[WARN] {e}") + + # odata_expand (single target) + try: + nav = client.query.odata_expand(child_table, parent_table) + print(f"\n[OK] odata_expand('{child_table}', '{parent_table}') = '{nav}'") + print(" Usage: client.records.get('" + child_table + "', expand=['" + nav + "'])") + except Exception as e: + print(f"[WARN] {e}") + + # odata_bind + log_call("client.query.odata_bind(...)") + try: + bind = client.query.odata_bind(child_table, parent_table, team_ids[0]) + print(f"[OK] {bind}") + print(" Merge into create/update payload: {{'new_Title': 'X', **bind}}") + except Exception as e: + print(f"[WARN] {e}") + + # ============================================================== + # 31. SQL vs OData Comparison + # ============================================================== + heading(31, "SQL vs OData -- Side-by-Side Comparison") + print("Both SQL and OData can query Dataverse. Here's how they compare.") + + print(""" ++-------------------------------+------------------------+------------------------+ +| Capability | SQL (client.query.sql) | OData (records.get) | ++-------------------------------+------------------------+------------------------+ +| Read data | YES | YES | +| Write data | NO (read-only) | YES (create/update/del)| +| JOIN depth | No limit (tested 15) | $expand 10-level max | +| JOIN types | INNER, LEFT | $expand (single-valued)| +| Aggregates (COUNT, SUM, etc.) | YES (server-side) | Limited ($apply) | +| GROUP BY | YES (server-side) | Via $apply (complex) | +| DISTINCT | YES | Not directly | +| Pagination | OFFSET FETCH | @odata.nextLink | +| Max results | 5000 per query | 5000 per page | +| Column discovery | sql_columns/sql_select | odata_select | +| JOIN discovery | sql_joins/sql_join | odata_expands/expand | +| Lookup binding | N/A (read-only) | odata_bind | +| SELECT * | YES (SDK auto-expands) | Not applicable | +| Polymorphic lookups | Separate JOINs | $expand by nav prop | +| Return format | list[Record] / DF | pages of Record / DF | +| Subqueries | NO (chain SQL calls) | NO ($filter only) | +| Functions (CASE, CAST, etc.) | NO | NO | ++-------------------------------+------------------------+------------------------+ + +When to use SQL: + - Complex JOINs across 3+ tables + - Aggregates and GROUP BY + - DISTINCT queries + - Familiar SQL syntax preferred + - Read-only analysis / reporting + +When to use OData (records.get): + - Need to write data (create/update/delete) + - Simple single-table or 1-level expand queries + - Need automatic paging (nextLink) + - Prefer typed QueryBuilder API +""") + + # Live comparison: same query via SQL and OData + print("-- Live comparison: account + contact --") + import time as _time + + # SQL version + t0 = _time.time() + try: + sql_rows = backoff( + lambda: client.query.sql( + "SELECT TOP 5 a.name, c.fullname " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid" + ) + ) + sql_time = _time.time() - t0 + print(f" SQL JOIN: {len(sql_rows)} rows in {sql_time:.2f}s") + except Exception as e: + sql_time = _time.time() - t0 + print(f" SQL JOIN: error ({sql_time:.2f}s): {e}") + + # OData version (expand) + t0 = _time.time() + try: + odata_rows = [] + for page in backoff( + lambda: client.records.get( + "account", + select=["name"], + expand=["contact_customer_accounts"], + top=5, + ) + ): + odata_rows.extend(page) + odata_time = _time.time() - t0 + print(f" OData $expand: {len(odata_rows)} rows in {odata_time:.2f}s") + except Exception as e: + odata_time = _time.time() - t0 + print(f" OData $expand: error ({odata_time:.2f}s): {e}") + + # ============================================================== + # 32. Anti-Patterns & Best Practices + # ============================================================== + heading(32, "IMPORTANT: Anti-Patterns & Best Practices") + print(""" +=== ANTI-PATTERNS (avoid these -- they hurt shared database performance) === + +1. CARTESIAN PRODUCTS (FROM table1, table2 without ON) + BAD: SELECT a.name, c.fullname FROM account a, contact c + WHY: Produces rows_a * rows_b intermediate rows. With 5000-row tables, + that's 25 MILLION rows the server must process before capping at 5000. + FIX: Always use explicit JOIN with ON clause. + +2. LEADING-WILDCARD LIKE (LIKE '%value') + BAD: SELECT name FROM account WHERE name LIKE '%corp' + WHY: Forces a FULL TABLE SCAN -- cannot use indexes. On tables with + millions of rows, this monopolizes shared database resources and + slows down OTHER users' queries on the same database. + FIX: Use trailing wildcards: LIKE 'corp%' (uses indexes efficiently). + If you must search mid-string, add TOP to limit scan scope. + +3. NO FILTER ON LARGE SYSTEM TABLES + BAD: SELECT name FROM role + WHY: System tables (role, asyncoperation, sdkmessageprocessingstep) + can have 5000+ rows. Unfiltered queries return max rows. + FIX: Always add WHERE filters and TOP when querying system tables. + +4. SELECT * ON WIDE TABLES + BAD: SELECT * FROM account (307 columns!) + WHY: The SDK auto-expands * into all 260+ non-virtual columns. + Every column is transferred over the network. + NOTE: With JOINs, SELECT * only expands the FIRST (FROM) table's + columns -- joined table columns will NOT be included. + Example: SELECT * FROM account a JOIN contact c ON ... + expands to account columns only; contact columns are missing. + FIX: List only the columns you need: SELECT name, revenue FROM account + Or use the SDK helper: + cols = client.query.sql_select("account") + sql = f"SELECT TOP 10 {{cols}} FROM account" + For JOINs, always specify columns from each table explicitly: + SELECT a.name, c.fullname FROM account a JOIN contact c ON ... + +5. DEEP JOINS WITHOUT TOP + OK: SELECT TOP 100 a.name, ... FROM account a JOIN ... (15 tables) + BAD: SELECT a.name, ... FROM account a JOIN ... (15 tables, no TOP) + WHY: Deep JOINs are safe with proper FK relationships and TOP. + Without TOP, the server processes up to 5000 rows across all joins. + FIX: Always include TOP N for multi-table JOINs. + +SDK guardrails: + - Patterns #1 (writes) and unsupported syntax (CROSS/RIGHT/FULL JOIN, + UNION, HAVING, CTE, subqueries) -> ValidationError (blocked). + - Pattern #2 (cartesian FROM a, b) and #4 (SELECT * + JOIN) + -> UserWarning (advisory). + - Server enforces 5000-row cap on all queries (#3, #5). + - Use sql_columns() or sql_select() to discover valid column names. + - Use sql_joins() or sql_join() to discover valid JOIN clauses. +""") + + # ============================================================== + # 33. Summary + # ============================================================== + heading(33, "Summary -- SQL Capabilities Reference") + print(""" ++-------------------------------+----------+----------------------------------------+ +| Feature | SQL | Notes / SDK Fallback | ++-------------------------------+----------+----------------------------------------+ +| SELECT col1, col2 | YES | Use LogicalName (lowercase) | +| SELECT * | YES (*) | SDK auto-expands via list_columns() | +| WHERE =, !=, >, <, LIKE, IN | YES | | +| AND, OR, parentheses | YES | Full boolean logic | +| NOT IN, NOT LIKE | YES | | +| IS NULL, IS NOT NULL, BETWEEN | YES | | +| TOP N (0-5000) | YES | Max 5000 per query | +| ORDER BY col [ASC|DESC] | YES | Multiple columns supported | +| OFFSET n FETCH NEXT m | YES | Server-side pagination | +| Table/Column aliases | YES | | +| DISTINCT / DISTINCT TOP | YES | Works with JOINs too | +| COUNT, SUM, AVG, MIN, MAX | YES | All 5 standard aggregates | +| GROUP BY | YES | Server-side grouping | +| INNER JOIN | YES | 15+ tables tested (no depth limit) | +| LEFT JOIN | YES | | +| Self JOIN | YES | Same table with different aliases | +| SQL -> DataFrame | YES | client.dataframe.sql(query) | +| Polymorphic lookups | YES | Separate JOINs per target type | +| Nested polymorphic chains | YES | e.g. opp -> acct -> contact -> owner | +| Audit trail (createdby, etc.) | YES | JOIN to systemuser | +| SQL read -> DF write-back | YES | dataframe.sql() + .update()/.create() | +| SQL column discovery | YES | query.sql_columns() / sql_select() | +| SQL JOIN discovery | YES | query.sql_joins() / sql_join() | +| OData column discovery | YES | query.odata_select() | +| OData expand discovery | YES | query.odata_expands() / odata_expand() | +| OData bind builder | YES | query.odata_bind() | ++-------------------------------+----------+----------------------------------------+ +| HAVING | NO | Filter before GROUP BY | +| Subqueries / CTE | NO | Chain multiple SQL calls | +| RIGHT/FULL OUTER/CROSS JOIN | NO | Rewrite as LEFT/INNER JOIN | +| UNION / UNION ALL | NO | Separate queries + pd.concat() | +| CASE, COALESCE, CAST | NO | Post-process in Python/pandas | +| String/Date/Math functions | NO | Post-process in Python/pandas | +| Window fns (ROW_NUMBER, RANK) | NO | Post-process in Python/pandas | +| INSERT / UPDATE / DELETE | NO | dataframe.create/update/delete() | ++-------------------------------+----------+----------------------------------------+ + +SQL-First Workflow (no OData knowledge needed): + 1. Discover schema: cols = client.query.sql_columns("account") + 2. Discover JOINs: joins = client.query.sql_joins("contact") + 3. Build JOIN: j = client.query.sql_join("contact", "account", from_alias="c", to_alias="a") + 4. Query with SQL: df = client.dataframe.sql(f"SELECT c.fullname, a.name FROM contact c {j}") + 5. Transform: df["col"] = df["col"] * 1.1 + 6. Write back: client.dataframe.update("account", df, id_column="accountid") + 7. Verify: df2 = client.dataframe.sql("SELECT ...") +""") + + finally: + heading(34, "Cleanup") + for tbl in [child_table, parent_table]: + log_call(f"client.tables.delete('{tbl}')") + try: + backoff(lambda tbl=tbl: client.tables.delete(tbl)) + print(f"[OK] Deleted table: {tbl}") + except Exception as ex: + code = getattr(getattr(ex, "response", None), "status_code", None) + if isinstance(ex, (requests.exceptions.HTTPError, MetadataError)) and code == 404: + print(f"[OK] Table already removed: {tbl}") + else: + print(f"[WARN] Could not delete {tbl}: {ex}") + + print("\n" + "=" * 80) + print("SQL Examples Complete!") + print("=" * 80) + + +if __name__ == "__main__": + main() diff --git a/src/PowerPlatform/Dataverse/core/_error_codes.py b/src/PowerPlatform/Dataverse/core/_error_codes.py index 12703198..139d8dd4 100644 --- a/src/PowerPlatform/Dataverse/core/_error_codes.py +++ b/src/PowerPlatform/Dataverse/core/_error_codes.py @@ -9,6 +9,8 @@ SQL parsing errors, and metadata operation errors. """ +__all__ = [] + # HTTP subcode constants HTTP_400 = "http_400" HTTP_401 = "http_401" @@ -41,6 +43,9 @@ # Validation subcodes VALIDATION_SQL_NOT_STRING = "validation_sql_not_string" VALIDATION_SQL_EMPTY = "validation_sql_empty" +VALIDATION_SQL_WRITE_BLOCKED = "validation_sql_write_blocked" +VALIDATION_SQL_CROSS_JOIN_BLOCKED = "validation_sql_cross_join_blocked" +VALIDATION_SQL_UNSUPPORTED_SYNTAX = "validation_sql_unsupported_syntax" VALIDATION_ENUM_NO_MEMBERS = "validation_enum_no_members" VALIDATION_ENUM_NON_INT_VALUE = "validation_enum_non_int_value" VALIDATION_UNSUPPORTED_COLUMN_TYPE = "validation_unsupported_column_type" diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py index a3ccaefe..8e7e7e48 100644 --- a/src/PowerPlatform/Dataverse/data/_odata.py +++ b/src/PowerPlatform/Dataverse/data/_odata.py @@ -5,6 +5,8 @@ from __future__ import annotations +__all__ = [] + from typing import Any, Dict, Optional, List, Union, Iterable, Callable from enum import Enum from dataclasses import dataclass, field @@ -13,6 +15,7 @@ import re import json import uuid +import warnings from datetime import datetime, timezone import importlib.resources as ir from contextlib import contextmanager @@ -37,6 +40,8 @@ _is_transient_status, VALIDATION_SQL_NOT_STRING, VALIDATION_SQL_EMPTY, + VALIDATION_SQL_WRITE_BLOCKED, + VALIDATION_SQL_UNSUPPORTED_SYNTAX, VALIDATION_UNSUPPORTED_COLUMN_TYPE, METADATA_ENTITYSET_NOT_FOUND, METADATA_ENTITYSET_NAME_MISSING, @@ -123,7 +128,7 @@ def _lowercase_keys(record: Dict[str, Any]) -> Dict[str, Any]: def _lowercase_list(items: Optional[List[str]]) -> Optional[List[str]]: """Convert all strings in a list to lowercase for case-insensitive column names. - Used for $select, $orderby, $expand parameters where column names must be lowercase. + Used for $select and $orderby parameters where column names must be lowercase. """ if not items: return items @@ -552,8 +557,8 @@ def _delete_multiple( ) -> Optional[str]: """Delete many records by GUID list via the ``BulkDelete`` action. - :param logical_name: Logical (singular) entity name. - :type logical_name: ``str`` + :param table_schema_name: Schema name of the table. + :type table_schema_name: ``str`` :param ids: GUIDs of records to delete. :type ids: ``list[str]`` @@ -750,6 +755,186 @@ def _do_request(url: str, *, params: Optional[Dict[str, Any]] = None) -> Dict[st yield [x for x in items if isinstance(x, dict)] next_link = data.get("@odata.nextLink") or data.get("odata.nextLink") if isinstance(data, dict) else None + # ----------------------- SELECT * detection ----------------------- + _SELECT_STAR_RE = re.compile( + r"\bSELECT\b(\s+(?:DISTINCT\s+)?(?:TOP\s+\d+(?:\s+PERCENT)?\s+)?)\*\s", + re.IGNORECASE, + ) + + # ----------------------- SQL guardrail patterns -------------------- + _SQL_WRITE_RE = re.compile( + r"^\s*(?:INSERT|UPDATE|DELETE|DROP|TRUNCATE|ALTER|CREATE|EXEC|GRANT|REVOKE|BULK)\b", + re.IGNORECASE, + ) + _SQL_COMMENT_RE = re.compile(r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/|--[^\n]*", re.DOTALL) + _SQL_LEADING_WILDCARD_RE = re.compile(r"\bLIKE\s+'%[^']", re.IGNORECASE) + _SQL_IMPLICIT_CROSS_JOIN_RE = re.compile( + r"\bFROM\s+[A-Za-z0-9_]+(?:\s+[A-Za-z0-9_]+)?\s*,\s*[A-Za-z0-9_]+", + re.IGNORECASE, + ) + _SQL_HAS_JOIN_RE = re.compile(r"\bJOIN\b", re.IGNORECASE) + # Server-blocked SQL patterns (save the round-trip by catching early) + _SQL_UNSUPPORTED_JOIN_RE = re.compile( + r"\b(?:CROSS\s+JOIN|RIGHT\s+(?:OUTER\s+)?JOIN|FULL\s+(?:OUTER\s+)?JOIN)\b", + re.IGNORECASE, + ) + _SQL_UNION_RE = re.compile(r"\bUNION\b", re.IGNORECASE) + _SQL_HAVING_RE = re.compile(r"\bHAVING\b", re.IGNORECASE) + _SQL_CTE_RE = re.compile(r"^\s*WITH\b", re.IGNORECASE) + _SQL_SUBQUERY_RE = re.compile( + r"\bIN\s*\(\s*SELECT\b|\bEXISTS\s*\(\s*SELECT\b|\(\s*SELECT\b.*\bFROM\b", + re.IGNORECASE, + ) + + def _expand_select_star(self, sql: str, table: str) -> str: + """Replace ``SELECT *`` with explicit column names. + + When the Dataverse SQL endpoint receives ``SELECT *`` it returns + an error ("SELECT * is not supported"). This helper resolves all + columns via ``_list_columns`` and rewrites the query so the user + never has to know the server limitation. + + For JOIN queries, the expansion only includes columns from the first + (FROM) table. A warning is emitted so the user knows to specify + columns explicitly for multi-table queries. + """ + if not self._SELECT_STAR_RE.search(sql): + return sql + + # Warn on SELECT * with JOINs -- expansion uses only the FROM table + if self._SQL_HAS_JOIN_RE.search(sql): + warnings.warn( + "SELECT * with JOIN: the SDK expands * using columns from " + "the first table only. Columns from joined tables will not " + "be included. Specify columns explicitly for JOINs " + "(e.g. SELECT a.name, c.fullname FROM account a " + "JOIN contact c ON ...).", + UserWarning, + stacklevel=4, + ) + + cols = self._list_columns( + table, + select=["LogicalName"], + filter="AttributeType ne 'Virtual'", + ) + col_names = sorted({c["LogicalName"] for c in cols if "LogicalName" in c}) + if not col_names: + return sql # Fallback: let the server decide + col_list = ", ".join(col_names) + return self._SELECT_STAR_RE.sub(lambda m: f"SELECT{m.group(1)}{col_list} ", sql, count=1) + + def _sql_guardrails(self, sql: str) -> str: + """Apply safety guardrails to a SQL query before sending to the server. + + Checks split into two categories: + + **Blocked** (``ValidationError`` -- saves a server round-trip): + + 1. Write statements (INSERT/UPDATE/DELETE/DROP/etc.) + 2. CROSS JOIN, RIGHT JOIN, FULL OUTER JOIN (server rejects these) + 3. UNION / UNION ALL (server rejects) + 4. HAVING clause (server rejects) + 5. CTE / WITH clause (server rejects) + 6. Subqueries -- IN (SELECT ...), EXISTS (SELECT ...) (server rejects) + + **Warned** (``UserWarning`` -- query still executes): + + 7. Leading-wildcard LIKE (full table scan) + 8. Implicit cross join FROM a, b (cartesian product) + + All blocked patterns are also blocked by the server, but catching + them here saves the network round-trip and provides clearer error + messages. To bypass a specific check (e.g., if the server adds + support in the future), all checks are in this single method. + + :param sql: The SQL string (already stripped). + :return: The SQL string (unchanged unless rewritten). + :raises ValidationError: If the SQL contains a blocked pattern. + """ + # --- BLOCKED (save server round-trip) --- + + # 1. Block writes (strip SQL comments first to catch comment-prefixed writes) + sql_no_comments = self._SQL_COMMENT_RE.sub(" ", sql).strip() + if self._SQL_WRITE_RE.search(sql_no_comments): + raise ValidationError( + "SQL endpoint is read-only. Use client.records or " + "client.dataframe for write operations " + "(INSERT/UPDATE/DELETE are not supported).", + subcode=VALIDATION_SQL_WRITE_BLOCKED, + ) + + # 2. Block unsupported JOIN types + m = self._SQL_UNSUPPORTED_JOIN_RE.search(sql) + if m: + raise ValidationError( + f"Unsupported JOIN type: '{m.group(0).strip()}'. " + "Only INNER JOIN and LEFT JOIN are supported by the " + "Dataverse SQL endpoint.", + subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX, + ) + + # 3. Block UNION + if self._SQL_UNION_RE.search(sql): + raise ValidationError( + "UNION is not supported by the Dataverse SQL endpoint. " + "Execute separate queries and combine results in Python " + "(e.g. pd.concat([df1, df2])).", + subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX, + ) + + # 4. Block HAVING + if self._SQL_HAVING_RE.search(sql): + raise ValidationError( + "HAVING is not supported by the Dataverse SQL endpoint. " + "Use WHERE to filter before GROUP BY instead.", + subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX, + ) + + # 5. Block CTE / WITH + if self._SQL_CTE_RE.search(sql): + raise ValidationError( + "CTE (WITH ... AS) is not supported by the Dataverse SQL " + "endpoint. Use separate queries and combine in Python.", + subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX, + ) + + # 6. Block subqueries + if self._SQL_SUBQUERY_RE.search(sql): + raise ValidationError( + "Subqueries are not supported by the Dataverse SQL " + "endpoint. Use separate SQL calls and combine results " + "in Python (e.g. step 1: get IDs, step 2: WHERE IN).", + subcode=VALIDATION_SQL_UNSUPPORTED_SYNTAX, + ) + + # --- WARNED (query still executes) --- + + # 7. Warn on leading-wildcard LIKE + if self._SQL_LEADING_WILDCARD_RE.search(sql): + warnings.warn( + "Query contains a leading-wildcard LIKE pattern " + "(e.g. LIKE '%value'). This forces a full table scan " + "and may degrade performance on large tables. " + "Prefer trailing wildcards (LIKE 'value%') when possible.", + UserWarning, + stacklevel=4, + ) + + # 8. Warn on implicit cross joins (server allows but risky) + if self._SQL_IMPLICIT_CROSS_JOIN_RE.search(sql): + warnings.warn( + "Query uses an implicit cross join (FROM table1, table2). " + "This produces a cartesian product that can generate " + "millions of intermediate rows and degrade shared database " + "performance. Use explicit JOIN...ON syntax instead: " + "FROM table1 a JOIN table2 b ON a.column = b.column", + UserWarning, + stacklevel=4, + ) + + return sql + # --------------------------- SQL Custom API ------------------------- def _query_sql(self, sql: str) -> list[dict[str, Any]]: """Execute a read-only SQL SELECT using the Dataverse Web API ``?sql=`` capability. @@ -764,13 +949,38 @@ def _query_sql(self, sql: str) -> list[dict[str, Any]]: :raises MetadataError: If logical table name resolution fails. .. note:: - Endpoint form: ``GET /{entity_set}?sql=``. The client extracts the logical table name, resolves the entity set (metadata cached), then issues the request. Only a constrained SELECT subset is supported by the platform. + Endpoint form: ``GET /{entity_set}?sql=``. The client + extracts the logical table name, resolves the entity set (metadata + cached), then issues the request. ``SELECT *`` is automatically + expanded into explicit column names because the server blocks it. """ if not isinstance(sql, str): raise ValidationError("sql must be a string", subcode=VALIDATION_SQL_NOT_STRING) if not sql.strip(): raise ValidationError("sql must be a non-empty string", subcode=VALIDATION_SQL_EMPTY) sql = sql.strip() + + # Block write statements FIRST (before table extraction, since + # UPDATE/INSERT/DELETE don't have FROM clauses). + # Strip SQL comments to catch e.g. /**/DELETE or --\\nDELETE. + sql_no_comments = self._SQL_COMMENT_RE.sub(" ", sql).strip() + if self._SQL_WRITE_RE.search(sql_no_comments): + raise ValidationError( + "SQL endpoint is read-only. Use client.records or " + "client.dataframe for write operations " + "(INSERT/UPDATE/DELETE are not supported).", + subcode=VALIDATION_SQL_WRITE_BLOCKED, + ) + + # Extract logical table name via helper (robust to identifiers ending with 'from') + logical = self._extract_logical_table(sql) + + # Auto-expand SELECT * into explicit column names + sql = self._expand_select_star(sql, logical) + + # Apply safety guardrails (block unsupported syntax, warn on risky patterns) + sql = self._sql_guardrails(sql) + r = self._execute_raw(self._build_sql(sql)) try: body = r.json() @@ -969,6 +1179,50 @@ def _get_attribute_metadata( return item return None + def _list_columns( + self, + table_schema_name: str, + *, + select: Optional[List[str]] = None, + filter: Optional[str] = None, + ) -> List[Dict[str, Any]]: + """List all attribute (column) definitions for a table. + + Issues ``GET EntityDefinitions({MetadataId})/Attributes`` with optional + ``$select`` and ``$filter`` query parameters. + + :param table_schema_name: Schema name of the table + (e.g. ``"account"`` or ``"new_Product"``). + :type table_schema_name: ``str`` + :param select: Optional list of property names to project via + ``$select``. Values are passed as-is (PascalCase). + :type select: ``list[str]`` or ``None`` + :param filter: Optional OData ``$filter`` expression. For example, + ``"AttributeType eq 'String'"`` returns only string columns. + :type filter: ``str`` or ``None`` + + :return: List of raw attribute metadata dictionaries (may be empty). + :rtype: ``list[dict[str, Any]]`` + + :raises MetadataError: If the table is not found. + :raises HttpError: If the Web API request fails. + """ + ent = self._get_entity_by_table_schema_name(table_schema_name) + if not ent or not ent.get("MetadataId"): + raise MetadataError( + f"Table '{table_schema_name}' not found.", + subcode=METADATA_TABLE_NOT_FOUND, + ) + metadata_id = ent["MetadataId"] + url = f"{self.api}/EntityDefinitions({metadata_id})/Attributes" + params: Dict[str, str] = {} + if select: + params["$select"] = ",".join(select) + if filter: + params["$filter"] = filter + r = self._request("get", url, params=params) + return r.json().get("value", []) + def _wait_for_attribute_visibility( self, entity_set: str, @@ -2104,7 +2358,17 @@ def _build_lookup_field_models( Returns ``(LookupAttributeMetadata, OneToManyRelationshipMetadata)``. Used by both the batch resolver and ``TableOperations.create_lookup_field`` to avoid duplicating the metadata assembly logic. + + Note: ``referencing_table`` and ``referenced_table`` are lowercased + automatically because Dataverse stores entity logical names in + lowercase. ``lookup_field_name`` is kept as-is (it is a SchemaName). """ + # Dataverse logical names are always lowercase. Callers may pass + # SchemaName-cased values (e.g. "new_SQLTeam"); normalise here so + # the relationship metadata uses valid logical names. + referencing_lower = referencing_table.lower() + referenced_lower = referenced_table.lower() + lookup = LookupAttributeMetadata( schema_name=lookup_field_name, display_name=Label( @@ -2121,12 +2385,12 @@ def _build_lookup_field_models( lookup.description = Label( localized_labels=[LocalizedLabel(label=description, language_code=language_code)] ) - rel_name = f"{referenced_table}_{referencing_table}_{lookup_field_name}" + rel_name = f"{referenced_lower}_{referencing_lower}_{lookup_field_name}" relationship = OneToManyRelationshipMetadata( schema_name=rel_name, - referenced_entity=referenced_table, - referencing_entity=referencing_table, - referenced_attribute=f"{referenced_table}id", + referenced_entity=referenced_lower, + referencing_entity=referencing_lower, + referenced_attribute=f"{referenced_lower}id", cascade_configuration=CascadeConfiguration(delete=cascade_delete), ) return lookup, relationship diff --git a/src/PowerPlatform/Dataverse/data/_relationships.py b/src/PowerPlatform/Dataverse/data/_relationships.py index c2099a53..30250238 100644 --- a/src/PowerPlatform/Dataverse/data/_relationships.py +++ b/src/PowerPlatform/Dataverse/data/_relationships.py @@ -9,8 +9,10 @@ from __future__ import annotations +__all__ = [] + import re -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional class _RelationshipOperationsMixin: @@ -142,6 +144,96 @@ def _get_relationship(self, schema_name: str) -> Optional[Dict[str, Any]]: results = data.get("value", []) return results[0] if results else None + def _list_relationships( + self, + *, + filter: Optional[str] = None, + select: Optional[List[str]] = None, + ) -> List[Dict[str, Any]]: + """List all relationship definitions. + + Issues ``GET /RelationshipDefinitions`` with optional ``$filter`` and + ``$select`` query parameters. + + :param filter: Optional OData ``$filter`` expression. For example, + ``"RelationshipType eq Microsoft.Dynamics.CRM.RelationshipType'OneToManyRelationship'"`` + returns only one-to-many relationships. + :type filter: ``str`` or ``None`` + :param select: Optional list of property names to project via + ``$select``. Values are passed as-is (PascalCase). + :type select: ``list[str]`` or ``None`` + + :return: List of raw relationship metadata dictionaries (may be empty). + :rtype: ``list[dict[str, Any]]`` + + :raises HttpError: If the Web API request fails. + """ + url = f"{self.api}/RelationshipDefinitions" + params: Dict[str, str] = {} + if filter: + params["$filter"] = filter + if select: + params["$select"] = ",".join(select) + r = self._request("get", url, headers=self._headers(), params=params) + return r.json().get("value", []) + + def _list_table_relationships( + self, + table_schema_name: str, + *, + filter: Optional[str] = None, + select: Optional[List[str]] = None, + ) -> List[Dict[str, Any]]: + """List all relationships for a specific table. + + Issues ``GET EntityDefinitions({MetadataId})/OneToManyRelationships``, + ``GET EntityDefinitions({MetadataId})/ManyToOneRelationships``, and + ``GET EntityDefinitions({MetadataId})/ManyToManyRelationships``, + then combines the results. + + :param table_schema_name: Schema name of the table (e.g. ``"account"``). + :type table_schema_name: ``str`` + :param filter: Optional OData ``$filter`` expression applied to each + sub-request. + :type filter: ``str`` or ``None`` + :param select: Optional list of property names to project via + ``$select``. Values are passed as-is (PascalCase). + :type select: ``list[str]`` or ``None`` + + :return: Combined list of one-to-many, many-to-one, and many-to-many + relationship metadata dictionaries (may be empty). + :rtype: ``list[dict[str, Any]]`` + + :raises MetadataError: If the table is not found. + :raises HttpError: If the Web API request fails. + """ + from ..core.errors import MetadataError + from ..core._error_codes import METADATA_TABLE_NOT_FOUND + + ent = self._get_entity_by_table_schema_name(table_schema_name) + if not ent or not ent.get("MetadataId"): + raise MetadataError( + f"Table '{table_schema_name}' not found.", + subcode=METADATA_TABLE_NOT_FOUND, + ) + + metadata_id = ent["MetadataId"] + params: Dict[str, str] = {} + if filter: + params["$filter"] = filter + if select: + params["$select"] = ",".join(select) + + one_to_many_url = f"{self.api}/EntityDefinitions({metadata_id})/OneToManyRelationships" + many_to_one_url = f"{self.api}/EntityDefinitions({metadata_id})/ManyToOneRelationships" + many_to_many_url = f"{self.api}/EntityDefinitions({metadata_id})/ManyToManyRelationships" + + r1 = self._request("get", one_to_many_url, headers=self._headers(), params=params) + r2 = self._request("get", many_to_one_url, headers=self._headers(), params=params) + r3 = self._request("get", many_to_many_url, headers=self._headers(), params=params) + + return r1.json().get("value", []) + r2.json().get("value", []) + r3.json().get("value", []) + def _extract_id_from_header(self, header_value: Optional[str]) -> Optional[str]: """ Extract a GUID from an OData-EntityId header value. diff --git a/src/PowerPlatform/Dataverse/operations/dataframe.py b/src/PowerPlatform/Dataverse/operations/dataframe.py index 89e31e7f..e6ec2033 100644 --- a/src/PowerPlatform/Dataverse/operations/dataframe.py +++ b/src/PowerPlatform/Dataverse/operations/dataframe.py @@ -51,6 +51,48 @@ class DataFrameOperations: def __init__(self, client: DataverseClient) -> None: self._client = client + # --------------------------------------------------------------------- sql + + def sql(self, sql: str) -> pd.DataFrame: + """Execute a SQL query and return the results as a pandas DataFrame. + + Delegates to :meth:`~PowerPlatform.Dataverse.operations.query.QueryOperations.sql` + and converts the list of records into a single DataFrame. + + :param sql: Supported SQL SELECT statement. + :type sql: :class:`str` + + :return: DataFrame containing all result rows. Returns an empty + DataFrame when no rows match. + :rtype: ~pandas.DataFrame + + :raises ~PowerPlatform.Dataverse.core.errors.ValidationError: + If ``sql`` is not a string or is empty. + + Example: + SQL query to DataFrame:: + + df = client.dataframe.sql( + "SELECT TOP 100 name, revenue FROM account " + "WHERE statecode = 0 ORDER BY revenue" + ) + print(f"Got {len(df)} rows") + print(df.head()) + + Aggregate query to DataFrame:: + + df = client.dataframe.sql( + "SELECT a.name, COUNT(c.contactid) as cnt " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "GROUP BY a.name" + ) + """ + rows = self._client.query.sql(sql) + if not rows: + return pd.DataFrame() + return pd.DataFrame.from_records([r.data for r in rows]) + # -------------------------------------------------------------------- get def get( diff --git a/src/PowerPlatform/Dataverse/operations/query.py b/src/PowerPlatform/Dataverse/operations/query.py index e0dc6972..5926dffe 100644 --- a/src/PowerPlatform/Dataverse/operations/query.py +++ b/src/PowerPlatform/Dataverse/operations/query.py @@ -5,7 +5,7 @@ from __future__ import annotations -from typing import List, TYPE_CHECKING +from typing import Any, Dict, List, Optional, TYPE_CHECKING from ..models.record import Record @@ -95,9 +95,24 @@ def builder(self, table: str) -> QueryBuilder: def sql(self, sql: str) -> List[Record]: """Execute a read-only SQL query using the Dataverse Web API. - The SQL query must follow the supported subset: a single SELECT - statement with optional WHERE, TOP (integer literal), ORDER BY (column - names only), and a simple table alias after FROM. + The Dataverse SQL endpoint supports a broad subset of T-SQL:: + + SELECT / SELECT DISTINCT / SELECT TOP N (0-5000) + FROM table [alias] + INNER JOIN / LEFT JOIN (multi-table, no depth limit) + WHERE (=, !=, >, <, >=, <=, LIKE, IN, NOT IN, IS NULL, + IS NOT NULL, BETWEEN, AND, OR, nested parentheses) + GROUP BY column + ORDER BY column [ASC|DESC] + OFFSET n ROWS FETCH NEXT m ROWS ONLY + COUNT(*), SUM(), AVG(), MIN(), MAX() + + ``SELECT *`` is automatically expanded into explicit column names + by the SDK (the server rejects ``*`` directly). + + Not supported: subqueries, CTE, HAVING, UNION, RIGHT/FULL/CROSS + JOIN, CASE, COALESCE, window functions, string/date/math functions, + INSERT/UPDATE/DELETE. For writes, use ``client.records`` methods. :param sql: Supported SQL SELECT statement. :type sql: :class:`str` @@ -110,22 +125,478 @@ def sql(self, sql: str) -> List[Record]: If ``sql`` is not a string or is empty. Example: - Basic SQL query:: + Basic query:: + + rows = client.query.sql( + "SELECT TOP 10 name FROM account ORDER BY name" + ) + + JOIN with aggregation:: rows = client.query.sql( - "SELECT TOP 10 accountid, name FROM account " - "WHERE name LIKE 'C%' ORDER BY name" + "SELECT a.name, COUNT(c.contactid) as cnt " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "GROUP BY a.name" ) - for row in rows: - print(row["name"]) - Query with alias:: + SELECT * (auto-expanded by SDK):: rows = client.query.sql( - "SELECT a.name, a.telephone1 FROM account AS a " - "WHERE a.statecode = 0" + "SELECT * FROM account" ) """ with self._client._scoped_odata() as od: rows = od._query_sql(sql) return [Record.from_api_response("", row) for row in rows] + + # --------------------------------------------------------------- sql_columns + + def sql_columns( + self, + table: str, + *, + include_system: bool = False, + ) -> List[Dict[str, Any]]: + """Return a simplified list of SQL-usable columns for a table. + + Each dict contains ``name`` (logical name for SQL), ``type`` + (Dataverse attribute type), ``is_pk`` (primary key flag), and + ``label`` (display name). Virtual columns are always excluded + because the SQL endpoint cannot query them. + + :param table: Schema name of the table (e.g. ``"account"``). + :type table: :class:`str` + :param include_system: When ``False`` (default), columns that end + with common system suffixes (``_base``, ``versionnumber``, + ``timezoneruleversionnumber``, ``utcconversiontimezonecode``, + ``importsequencenumber``, ``overriddencreatedon``) are excluded. + :type include_system: :class:`bool` + + :return: List of column metadata dicts. + :rtype: list[dict[str, typing.Any]] + + Example:: + + cols = client.query.sql_columns("account") + for c in cols: + print(f"{c['name']:30s} {c['type']:20s} PK={c['is_pk']}") + """ + _SYSTEM_SUFFIXES = ( + "_base", + "versionnumber", + "timezoneruleversionnumber", + "utcconversiontimezonecode", + "importsequencenumber", + "overriddencreatedon", + ) + + raw = self._client.tables.list_columns( + table, + select=[ + "LogicalName", + "SchemaName", + "AttributeType", + "IsPrimaryId", + "IsPrimaryName", + "DisplayName", + "AttributeOf", + ], + filter="AttributeType ne 'Virtual'", + ) + result: List[Dict[str, Any]] = [] + for c in raw: + name = c.get("LogicalName", "") + if not name: + continue + if not include_system and any(name.endswith(s) for s in _SYSTEM_SUFFIXES): + continue + # Skip computed display-name columns (AttributeOf is set, meaning + # they are auto-generated from a lookup column) + if c.get("AttributeOf"): + continue + # Extract display label + label = "" + dn = c.get("DisplayName") + if isinstance(dn, dict): + ul = dn.get("UserLocalizedLabel") + if isinstance(ul, dict): + label = ul.get("Label", "") + result.append( + { + "name": name, + "type": c.get("AttributeType", ""), + "is_pk": bool(c.get("IsPrimaryId")), + "is_name": bool(c.get("IsPrimaryName")), + "label": label, + } + ) + result.sort(key=lambda x: (not x["is_pk"], not x["is_name"], x["name"])) + return result + + # --------------------------------------------------------------- sql_select + + def sql_select( + self, + table: str, + *, + include_system: bool = False, + ) -> str: + """Return a comma-separated column list for use in SQL SELECT. + + Excludes virtual columns and optionally system columns. The result + can be embedded directly in a SQL query string. + + :param table: Schema name of the table (e.g. ``"account"``). + :type table: :class:`str` + :param include_system: Include system columns (default ``False``). + :type include_system: :class:`bool` + + :return: Comma-separated column names. + :rtype: :class:`str` + + Example:: + + cols = client.query.sql_select("account") + sql = f"SELECT TOP 10 {cols} FROM account" + df = client.dataframe.sql(sql) + """ + columns = self.sql_columns(table, include_system=include_system) + return ", ".join(c["name"] for c in columns) + + # --------------------------------------------------------------- sql_joins + + def sql_joins( + self, + table: str, + ) -> List[Dict[str, Any]]: + """Discover all possible SQL JOINs from a table. + + Returns one entry per outgoing lookup relationship, with the + exact column names needed for SQL ``JOIN ... ON`` clauses. + + For **polymorphic** lookups (e.g. ``customerid`` targeting both + ``account`` and ``contact``), multiple entries are returned with + the same ``column`` but different ``target`` values. + + :param table: Schema name of the table (e.g. ``"contact"``). + :type table: :class:`str` + + :return: List of JOIN metadata dicts, each containing: + + - ``column`` -- the lookup attribute on this table (use in ON clause) + - ``target`` -- the referenced entity name + - ``target_pk`` -- the referenced entity's primary key column + - ``relationship`` -- the schema name of the relationship + - ``join_clause`` -- a ready-to-use ``JOIN ... ON ...`` fragment + + :rtype: list[dict[str, typing.Any]] + + .. note:: + + The ``join_clause`` value references the source table by its + **full name** (e.g. ``ON contact.col = ...``), so the FROM + clause must also use the unaliased table name. For queries + that need aliases, use :meth:`sql_join` instead. + + Example:: + + joins = client.query.sql_joins("contact") + for j in joins: + print(f"{j['column']:30s} -> {j['target']}.{j['target_pk']}") + print(f" {j['join_clause']}") + + # Use in a query (no alias on the FROM table) + j = next(j for j in joins if j['target'] == 'account') + sql = f"SELECT TOP 10 contact.fullname, a.name FROM contact {j['join_clause']}" + """ + table_lower = table.lower() + rels = self._client.tables.list_table_relationships(table) + + result: List[Dict[str, Any]] = [] + for r in rels: + ref_entity = (r.get("ReferencingEntity") or "").lower() + if ref_entity != table_lower: + continue + col = r.get("ReferencingAttribute", "") + target = r.get("ReferencedEntity", "") + target_pk = r.get("ReferencedAttribute", "") + schema = r.get("SchemaName", "") + if not all([col, target, target_pk]): + continue + + # Generate a short alias for the target table + alias = target[0] if target else "j" + join_clause = f"JOIN {target} {alias} " f"ON {table_lower}.{col} = {alias}.{target_pk}" + + result.append( + { + "column": col, + "target": target, + "target_pk": target_pk, + "relationship": schema, + "join_clause": join_clause, + } + ) + + result.sort(key=lambda x: (x["target"], x["column"])) + return result + + # --------------------------------------------------------------- sql_join + + def sql_join( + self, + from_table: str, + to_table: str, + *, + from_alias: Optional[str] = None, + to_alias: Optional[str] = None, + ) -> str: + """Generate a SQL JOIN clause between two tables. + + Discovers the relationship automatically via metadata. If multiple + relationships exist (e.g. polymorphic lookups), picks the first + match. Use :meth:`sql_joins` to see all options. + + :param from_table: Schema name of the FROM table (e.g. ``"contact"``). + :type from_table: :class:`str` + :param to_table: Schema name of the target table (e.g. ``"account"``). + :type to_table: :class:`str` + :param from_alias: Optional alias for the FROM table in the JOIN + clause. If ``None``, uses the full table name. + :type from_alias: :class:`str` or None + :param to_alias: Optional alias for the target table. If ``None``, + uses the first letter of the target table name. + :type to_alias: :class:`str` or None + + :return: A ready-to-use ``JOIN ... ON ...`` clause. + :rtype: :class:`str` + + :raises ValueError: If no relationship is found between the tables. + + Example:: + + j = client.query.sql_join("contact", "account", from_alias="c", to_alias="a") + # Returns: "JOIN account a ON c.parentcustomerid = a.accountid" + sql = f"SELECT TOP 10 c.fullname, a.name FROM contact c {j}" + df = client.dataframe.sql(sql) + """ + to_lower = to_table.lower() + joins = self.sql_joins(from_table) + match = [j for j in joins if j["target"].lower() == to_lower] + if not match: + raise ValueError( + f"No relationship found from '{from_table}' to '{to_table}'. " + f"Use client.query.sql_joins('{from_table}') to see available targets." + ) + + j = match[0] + src = from_alias or from_table.lower() + tgt = to_alias or to_lower[0] + return f"JOIN {to_lower} {tgt} " f"ON {src}.{j['column']} = {tgt}.{j['target_pk']}" + + # =========================================================== + # OData helpers -- eliminate friction for records.get() users + # =========================================================== + + # -------------------------------------------------------- odata_select + + def odata_select( + self, + table: str, + *, + include_system: bool = False, + ) -> List[str]: + """Return a list of column logical names suitable for ``$select``. + + Can be passed directly to ``client.records.get(table, select=...)``. + + :param table: Schema name of the table (e.g. ``"account"``). + :type table: :class:`str` + :param include_system: Include system columns (default ``False``). + :type include_system: :class:`bool` + + :return: List of lowercase column logical names. + :rtype: list[str] + + Example:: + + cols = client.query.odata_select("account") + for page in client.records.get("account", select=cols, top=10): + for r in page: + print(r) + """ + columns = self.sql_columns(table, include_system=include_system) + return [c["name"] for c in columns] + + # ------------------------------------------------------- odata_expands + + def odata_expands( + self, + table: str, + ) -> List[Dict[str, Any]]: + """Discover all ``$expand`` navigation properties from a table. + + Returns entries for each outgoing lookup (single-valued navigation + property). Each entry contains the exact PascalCase navigation + property name needed for ``$expand`` and ``@odata.bind``, plus + the target entity set name. + + :param table: Schema name of the table (e.g. ``"contact"``). + :type table: :class:`str` + + :return: List of dicts, each with: + + - ``nav_property`` -- PascalCase navigation property for $expand + - ``target_table`` -- target entity logical name + - ``target_entity_set`` -- target entity set (for @odata.bind) + - ``lookup_attribute`` -- the lookup column logical name + - ``relationship`` -- relationship schema name + + :rtype: list[dict[str, typing.Any]] + + Example:: + + expands = client.query.odata_expands("contact") + for e in expands: + print(f"expand={e['nav_property']} -> {e['target_table']}") + + # Use in a query + e = next(e for e in expands if e['target_table'] == 'account') + for page in client.records.get("contact", + select=["fullname"], + expand=[e['nav_property']]): + ... + """ + table_lower = table.lower() + rels = self._client.tables.list_table_relationships(table) + + result: List[Dict[str, Any]] = [] + for r in rels: + ref_entity = (r.get("ReferencingEntity") or "").lower() + if ref_entity != table_lower: + continue + nav_prop = r.get("ReferencingEntityNavigationPropertyName", "") + target = r.get("ReferencedEntity", "") + lookup_attr = r.get("ReferencingAttribute", "") + schema = r.get("SchemaName", "") + if not nav_prop or not target: + continue + + # Resolve entity set name for @odata.bind + target_set = "" + try: + with self._client._scoped_odata() as od: + target_set = od._entity_set_from_schema_name(target) + except (KeyError, AttributeError, ValueError): + pass # Entity set resolution failed; target_set stays empty + + result.append( + { + "nav_property": nav_prop, + "target_table": target, + "target_entity_set": target_set, + "lookup_attribute": lookup_attr, + "relationship": schema, + } + ) + + result.sort(key=lambda x: (x["target_table"], x["nav_property"])) + return result + + # -------------------------------------------------------- odata_expand + + def odata_expand( + self, + from_table: str, + to_table: str, + ) -> str: + """Return the navigation property name to ``$expand`` from one table to another. + + Discovers via relationship metadata. Returns the exact PascalCase + string for the ``expand=`` parameter. + + :param from_table: Schema name of the source table (e.g. ``"contact"``). + :type from_table: :class:`str` + :param to_table: Schema name of the target table (e.g. ``"account"``). + :type to_table: :class:`str` + + :return: The navigation property name (PascalCase). + :rtype: :class:`str` + + :raises ValueError: If no navigation property found for the target. + + Example:: + + nav = client.query.odata_expand("contact", "account") + # Returns e.g. "parentcustomerid_account" + for page in client.records.get("contact", + select=["fullname"], + expand=[nav], + top=5): + for r in page: + acct = r.get(nav) or {} + print(f"{r['fullname']} -> {acct.get('name', 'N/A')}") + """ + to_lower = to_table.lower() + expands = self.odata_expands(from_table) + match = [e for e in expands if e["target_table"].lower() == to_lower] + if not match: + raise ValueError( + f"No navigation property found from '{from_table}' to " + f"'{to_table}'. Use client.query.odata_expands('{from_table}') " + f"to see available targets." + ) + return match[0]["nav_property"] + + # --------------------------------------------------------- odata_bind + + def odata_bind( + self, + from_table: str, + to_table: str, + target_id: str, + ) -> Dict[str, str]: + """Build an ``@odata.bind`` entry for setting a lookup field. + + Auto-discovers the navigation property name and entity set name + from metadata. Returns a single-entry dict that can be merged + into a create or update payload. + + :param from_table: Schema name of the entity being created/updated. + :type from_table: :class:`str` + :param to_table: Schema name of the target entity the lookup points to. + :type to_table: :class:`str` + :param target_id: GUID of the target record. + :type target_id: :class:`str` + + :return: A dict like ``{"NavProp@odata.bind": "/entityset(guid)"}``. + :rtype: dict[str, str] + + :raises ValueError: If no relationship found between the tables. + + Example:: + + # Instead of manually constructing: + # {"parentcustomerid_account@odata.bind": "/accounts(guid)"} + # Just do: + bind = client.query.odata_bind("contact", "account", acct_id) + client.records.create("contact", { + "firstname": "Jane", + "lastname": "Doe", + **bind, + }) + """ + to_lower = to_table.lower() + expands = self.odata_expands(from_table) + match = [e for e in expands if e["target_table"].lower() == to_lower and e["target_entity_set"]] + if not match: + raise ValueError( + f"No relationship found from '{from_table}' to '{to_table}'. " + f"Use client.query.odata_expands('{from_table}') to see options." + ) + + e = match[0] + key = f"{e['nav_property']}@odata.bind" + value = f"/{e['target_entity_set']}({target_id})" + return {key: value} diff --git a/src/PowerPlatform/Dataverse/operations/tables.py b/src/PowerPlatform/Dataverse/operations/tables.py index e25c5a14..02af4a51 100644 --- a/src/PowerPlatform/Dataverse/operations/tables.py +++ b/src/PowerPlatform/Dataverse/operations/tables.py @@ -690,3 +690,143 @@ def delete_alternate_key(self, table: str, key_id: str) -> None: """ with self._client._scoped_odata() as od: od._delete_alternate_key(table, key_id) + + # -------------------------------------------------------- list_columns + + def list_columns( + self, + table: str, + *, + select: Optional[List[str]] = None, + filter: Optional[str] = None, + ) -> List[Dict[str, Any]]: + """List all attribute (column) definitions for a table. + + :param table: Schema name of the table (e.g. ``"account"`` or + ``"new_Product"``). + :type table: :class:`str` + :param select: Optional list of property names to project via + ``$select``. Values are passed as-is (PascalCase). + :type select: list[str] or None + :param filter: Optional OData ``$filter`` expression. For example, + ``"AttributeType eq 'String'"`` returns only string columns. + :type filter: :class:`str` or None + + :return: List of raw attribute metadata dictionaries. + :rtype: list[dict[str, typing.Any]] + + :raises ~PowerPlatform.Dataverse.core.errors.MetadataError: + If the table is not found. + :raises ~PowerPlatform.Dataverse.core.errors.HttpError: + If the Web API request fails. + + Example:: + + # List all columns on the account table + columns = client.tables.list_columns("account") + for col in columns: + print(f"{col['LogicalName']} ({col.get('AttributeType')})") + + # List only specific properties + columns = client.tables.list_columns( + "account", + select=["LogicalName", "SchemaName", "AttributeType"], + ) + + # Filter to only string attributes + columns = client.tables.list_columns( + "account", + filter="AttributeType eq 'String'", + ) + """ + with self._client._scoped_odata() as od: + return od._list_columns(table, select=select, filter=filter) + + # ------------------------------------------------- list_relationships + + def list_relationships( + self, + *, + filter: Optional[str] = None, + select: Optional[List[str]] = None, + ) -> List[Dict[str, Any]]: + """List all relationship definitions in the environment. + + :param filter: Optional OData ``$filter`` expression. For example, + ``"RelationshipType eq Microsoft.Dynamics.CRM.RelationshipType'OneToManyRelationship'"`` + returns only one-to-many relationships. + :type filter: :class:`str` or None + :param select: Optional list of property names to project via + ``$select``. Values are passed as-is (PascalCase). + :type select: list[str] or None + + :return: List of raw relationship metadata dictionaries. + :rtype: list[dict[str, typing.Any]] + + :raises ~PowerPlatform.Dataverse.core.errors.HttpError: + If the Web API request fails. + + Example:: + + # List all relationships + rels = client.tables.list_relationships() + for rel in rels: + print(f"{rel['SchemaName']} ({rel.get('@odata.type')})") + + # Filter by type + one_to_many = client.tables.list_relationships( + filter="RelationshipType eq Microsoft.Dynamics.CRM.RelationshipType'OneToManyRelationship'" + ) + + # Select specific properties + rels = client.tables.list_relationships( + select=["SchemaName", "ReferencedEntity", "ReferencingEntity"] + ) + """ + with self._client._scoped_odata() as od: + return od._list_relationships(filter=filter, select=select) + + # --------------------------------------------- list_table_relationships + + def list_table_relationships( + self, + table: str, + *, + filter: Optional[str] = None, + select: Optional[List[str]] = None, + ) -> List[Dict[str, Any]]: + """List all relationships for a specific table. + + Combines one-to-many, many-to-one, and many-to-many relationships + for the given table by querying + ``EntityDefinitions({id})/OneToManyRelationships``, + ``EntityDefinitions({id})/ManyToOneRelationships``, and + ``EntityDefinitions({id})/ManyToManyRelationships``. + + :param table: Schema name of the table (e.g. ``"account"``). + :type table: :class:`str` + :param filter: Optional OData ``$filter`` expression applied to each + sub-request. + :type filter: :class:`str` or None + :param select: Optional list of property names to project via + ``$select``. Values are passed as-is (PascalCase). + :type select: list[str] or None + + :return: Combined list of one-to-many, many-to-one, and many-to-many + relationship metadata dictionaries. + :rtype: list[dict[str, typing.Any]] + + :raises ~PowerPlatform.Dataverse.core.errors.MetadataError: + If the table is not found. + :raises ~PowerPlatform.Dataverse.core.errors.HttpError: + If the Web API request fails. + + Example:: + + # List all relationships for the account table + rels = client.tables.list_table_relationships("account") + for rel in rels: + print(f"{rel['SchemaName']} -> {rel.get('@odata.type')}") + """ + with self._client._scoped_odata() as od: + return od._list_table_relationships(table, filter=filter, select=select) diff --git a/tests/unit/data/test_list_columns.py b/tests/unit/data/test_list_columns.py new file mode 100644 index 00000000..30156ca9 --- /dev/null +++ b/tests/unit/data/test_list_columns.py @@ -0,0 +1,148 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Tests for _list_columns data-layer method.""" + +import unittest +from unittest.mock import MagicMock, Mock + +from PowerPlatform.Dataverse.data._odata import _ODataClient +from PowerPlatform.Dataverse.core.errors import MetadataError + + +class MockODataClient: + """Minimal stand-in for _ODataClient that exposes only what _list_columns needs.""" + + def __init__(self, api_base: str): + self.api = api_base + self._mock_request = MagicMock() + + def _request(self, method, url, **kwargs): + return self._mock_request(method, url, **kwargs) + + def _escape_odata_quotes(self, value: str) -> str: + return value.replace("'", "''") + + # Delegate to the real implementation under test + def _get_entity_by_table_schema_name(self, table_schema_name, headers=None): + return self._mock_get_entity(table_schema_name) + + _mock_get_entity = None + + # Attach the real _list_columns from _ODataClient + _list_columns = _ODataClient._list_columns + + +class TestListColumns(unittest.TestCase): + """Tests for _ODataClient._list_columns.""" + + def setUp(self): + self.client = MockODataClient("https://example.crm.dynamics.com/api/data/v9.2") + self.client._mock_get_entity = MagicMock( + return_value={ + "MetadataId": "ent-guid-1", + "LogicalName": "account", + "SchemaName": "Account", + } + ) + + # ------------------------------------------------ URL construction + + def test_uses_entity_metadata_id_in_url(self): + """_list_columns() should build the URL with the entity MetadataId.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_columns("account") + + call_args = self.client._mock_request.call_args + self.assertEqual(call_args[0][0], "get") + self.assertIn("EntityDefinitions(ent-guid-1)/Attributes", call_args[0][1]) + + def test_no_params_when_no_select_or_filter(self): + """_list_columns() with no select/filter should send no $select or $filter.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_columns("account") + + call_args = self.client._mock_request.call_args + params = call_args[1].get("params", {}) + self.assertNotIn("$select", params) + self.assertNotIn("$filter", params) + + # ------------------------------------------------ $select param + + def test_select_param_is_joined(self): + """_list_columns() should join select list into a comma-separated $select.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_columns("account", select=["LogicalName", "AttributeType"]) + + call_args = self.client._mock_request.call_args + params = call_args[1].get("params", {}) + self.assertEqual(params["$select"], "LogicalName,AttributeType") + + # ------------------------------------------------ $filter param + + def test_filter_param_is_passed_through(self): + """_list_columns() should forward the filter string as $filter.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_columns("account", filter="AttributeType eq 'String'") + + call_args = self.client._mock_request.call_args + params = call_args[1].get("params", {}) + self.assertEqual(params["$filter"], "AttributeType eq 'String'") + + # ------------------------------------------------ return value + + def test_returns_value_array(self): + """_list_columns() should return the 'value' array from the response.""" + expected = [ + {"LogicalName": "name", "AttributeType": "String"}, + {"LogicalName": "accountid", "AttributeType": "Uniqueidentifier"}, + ] + mock_response = Mock() + mock_response.json.return_value = {"value": expected} + self.client._mock_request.return_value = mock_response + + result = self.client._list_columns("account") + + self.assertEqual(result, expected) + + def test_returns_empty_list_when_no_value_key(self): + """_list_columns() should return [] when response has no 'value' key.""" + mock_response = Mock() + mock_response.json.return_value = {} + self.client._mock_request.return_value = mock_response + + result = self.client._list_columns("account") + + self.assertEqual(result, []) + + # ------------------------------------------------ MetadataError + + def test_raises_metadata_error_when_table_not_found(self): + """_list_columns() should raise MetadataError when entity is not found.""" + self.client._mock_get_entity.return_value = None + + with self.assertRaises(MetadataError): + self.client._list_columns("nonexistent_table") + + def test_raises_metadata_error_when_entity_missing_metadata_id(self): + """_list_columns() should raise MetadataError when MetadataId is absent.""" + self.client._mock_get_entity.return_value = {"LogicalName": "account"} + + with self.assertRaises(MetadataError): + self.client._list_columns("account") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/data/test_relationships.py b/tests/unit/data/test_relationships.py index c67b3f1e..5b84aa3f 100644 --- a/tests/unit/data/test_relationships.py +++ b/tests/unit/data/test_relationships.py @@ -289,5 +289,206 @@ def test_get_relationship_returns_none_when_not_found(self): self.assertIsNone(result) +class MockODataClientWithEntityLookup(MockODataClient): + """Extended mock client that also supports _get_entity_by_table_schema_name.""" + + def __init__(self, api_base: str): + super().__init__(api_base) + self._mock_get_entity = MagicMock() + + def _get_entity_by_table_schema_name(self, table_schema_name, headers=None): + return self._mock_get_entity(table_schema_name) + + +class TestListRelationships(unittest.TestCase): + """Tests for _list_relationships method.""" + + def setUp(self): + """Set up test fixtures.""" + self.client = MockODataClient("https://example.crm.dynamics.com/api/data/v9.2") + + def test_list_relationships_url(self): + """Test that correct URL is used.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_relationships() + + call_args = self.client._mock_request.call_args + self.assertEqual(call_args[0][0], "get") + self.assertEqual( + call_args[0][1], + "https://example.crm.dynamics.com/api/data/v9.2/RelationshipDefinitions", + ) + + def test_list_relationships_no_params_by_default(self): + """Test that no $filter or $select are sent when not specified.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_relationships() + + call_args = self.client._mock_request.call_args + params = call_args[1].get("params", {}) + self.assertNotIn("$filter", params) + self.assertNotIn("$select", params) + + def test_list_relationships_filter_param(self): + """Test that $filter is forwarded.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_relationships(filter="RelationshipType eq 'OneToManyRelationship'") + + call_args = self.client._mock_request.call_args + params = call_args[1].get("params", {}) + self.assertEqual(params["$filter"], "RelationshipType eq 'OneToManyRelationship'") + + def test_list_relationships_select_param(self): + """Test that $select is joined from list.""" + mock_response = Mock() + mock_response.json.return_value = {"value": []} + self.client._mock_request.return_value = mock_response + + self.client._list_relationships(select=["SchemaName", "ReferencedEntity"]) + + call_args = self.client._mock_request.call_args + params = call_args[1].get("params", {}) + self.assertEqual(params["$select"], "SchemaName,ReferencedEntity") + + def test_list_relationships_returns_value_array(self): + """Test that the 'value' array is returned.""" + expected = [ + {"SchemaName": "new_account_orders", "MetadataId": "rel-1"}, + {"SchemaName": "new_emp_proj", "MetadataId": "rel-2"}, + ] + mock_response = Mock() + mock_response.json.return_value = {"value": expected} + self.client._mock_request.return_value = mock_response + + result = self.client._list_relationships() + + self.assertEqual(result, expected) + + def test_list_relationships_returns_empty_list_when_no_value(self): + """Test that [] is returned when response has no 'value' key.""" + mock_response = Mock() + mock_response.json.return_value = {} + self.client._mock_request.return_value = mock_response + + result = self.client._list_relationships() + + self.assertEqual(result, []) + + +class TestListTableRelationships(unittest.TestCase): + """Tests for _list_table_relationships method.""" + + def setUp(self): + """Set up test fixtures.""" + self.client = MockODataClientWithEntityLookup("https://example.crm.dynamics.com/api/data/v9.2") + self.client._mock_get_entity.return_value = { + "MetadataId": "ent-guid-1", + "LogicalName": "account", + "SchemaName": "Account", + } + + def _make_response(self, value): + r = Mock() + r.json.return_value = {"value": value} + return r + + def test_uses_one_to_many_and_many_to_many_urls(self): + """Test that OneToMany, ManyToOne, and ManyToMany URLs are queried.""" + self.client._mock_request.side_effect = [ + self._make_response([]), + self._make_response([]), + self._make_response([]), + ] + + self.client._list_table_relationships("account") + + calls = self.client._mock_request.call_args_list + self.assertEqual(len(calls), 3) + urls = [call[0][1] for call in calls] + self.assertTrue(any("OneToManyRelationships" in u for u in urls)) + self.assertTrue(any("ManyToOneRelationships" in u for u in urls)) + self.assertTrue(any("ManyToManyRelationships" in u for u in urls)) + + def test_uses_metadata_id_in_urls(self): + """Test that the entity MetadataId is used in all three URLs.""" + self.client._mock_request.side_effect = [ + self._make_response([]), + self._make_response([]), + self._make_response([]), + ] + + self.client._list_table_relationships("account") + + calls = self.client._mock_request.call_args_list + for call in calls: + self.assertIn("ent-guid-1", call[0][1]) + + def test_combines_one_to_many_and_many_to_many_results(self): + """Test that results from all three sub-requests are combined.""" + one_to_many = [{"SchemaName": "rel_1tm", "MetadataId": "r1"}] + many_to_one = [{"SchemaName": "rel_mt1", "MetadataId": "r2"}] + many_to_many = [{"SchemaName": "rel_mtm", "MetadataId": "r3"}] + self.client._mock_request.side_effect = [ + self._make_response(one_to_many), + self._make_response(many_to_one), + self._make_response(many_to_many), + ] + + result = self.client._list_table_relationships("account") + + self.assertEqual(len(result), 3) + self.assertEqual(result[0]["SchemaName"], "rel_1tm") + self.assertEqual(result[1]["SchemaName"], "rel_mt1") + self.assertEqual(result[2]["SchemaName"], "rel_mtm") + + def test_filter_param_is_forwarded(self): + """Test that $filter is sent to all three sub-requests.""" + self.client._mock_request.side_effect = [ + self._make_response([]), + self._make_response([]), + self._make_response([]), + ] + + self.client._list_table_relationships("account", filter="IsManaged eq false") + + calls = self.client._mock_request.call_args_list + for call in calls: + params = call[1].get("params", {}) + self.assertEqual(params["$filter"], "IsManaged eq false") + + def test_select_param_is_forwarded(self): + """Test that $select is sent to all three sub-requests.""" + self.client._mock_request.side_effect = [ + self._make_response([]), + self._make_response([]), + self._make_response([]), + ] + + self.client._list_table_relationships("account", select=["SchemaName", "MetadataId"]) + + calls = self.client._mock_request.call_args_list + for call in calls: + params = call[1].get("params", {}) + self.assertEqual(params["$select"], "SchemaName,MetadataId") + + def test_raises_metadata_error_when_table_not_found(self): + """Test that MetadataError is raised when entity is not found.""" + from PowerPlatform.Dataverse.core.errors import MetadataError + + self.client._mock_get_entity.return_value = None + + with self.assertRaises(MetadataError): + self.client._list_table_relationships("nonexistent_table") + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/data/test_select_star_expansion.py b/tests/unit/data/test_select_star_expansion.py new file mode 100644 index 00000000..e29ef1a9 --- /dev/null +++ b/tests/unit/data/test_select_star_expansion.py @@ -0,0 +1,259 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Unit tests for SELECT * auto-expansion in _query_sql.""" + +import pytest +from unittest.mock import MagicMock +from urllib.parse import parse_qs, urlparse + +from PowerPlatform.Dataverse.data._odata import _ODataClient + + +class DummyAuth: + def _acquire_token(self, scope): + class T: + access_token = "x" + + return T() + + +def _client(): + return _ODataClient(DummyAuth(), "https://org.example", None) + + +# --- _expand_select_star --- + + +class TestExpandSelectStar: + """Tests for _ODataClient._expand_select_star.""" + + def test_no_star_unchanged(self): + c = _client() + c._list_columns = MagicMock() + sql = "SELECT name, revenue FROM account WHERE statecode = 0" + result = c._expand_select_star(sql, "account") + assert result == sql + c._list_columns.assert_not_called() + + def test_basic_select_star(self): + c = _client() + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + {"LogicalName": "accountid"}, + {"LogicalName": "revenue"}, + ] + ) + sql = "SELECT * FROM account" + result = c._expand_select_star(sql, "account") + assert "SELECT accountid, name, revenue FROM" in result + assert "*" not in result + + def test_select_star_with_top(self): + c = _client() + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + {"LogicalName": "accountid"}, + ] + ) + sql = "SELECT TOP 10 * FROM account" + result = c._expand_select_star(sql, "account") + assert "TOP 10" in result + assert "accountid, name" in result + assert "*" not in result + + def test_select_star_with_distinct(self): + c = _client() + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + ] + ) + sql = "SELECT DISTINCT * FROM account" + result = c._expand_select_star(sql, "account") + assert "DISTINCT" in result + assert "name" in result + assert "*" not in result + + def test_select_star_with_distinct_top(self): + c = _client() + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + {"LogicalName": "accountid"}, + ] + ) + sql = "SELECT DISTINCT TOP 5 * FROM account" + result = c._expand_select_star(sql, "account") + assert "DISTINCT" in result + assert "TOP 5" in result + assert "accountid, name" in result + assert "*" not in result + + def test_star_in_count_not_expanded(self): + c = _client() + c._list_columns = MagicMock() + sql = "SELECT COUNT(*) FROM account" + result = c._expand_select_star(sql, "account") + # COUNT(*) should NOT trigger expansion since the * is inside parens + assert result == sql + c._list_columns.assert_not_called() + + def test_skips_virtual_columns(self): + c = _client() + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + ] + ) + sql = "SELECT * FROM account" + c._expand_select_star(sql, "account") + c._list_columns.assert_called_once_with( + "account", + select=["LogicalName"], + filter="AttributeType ne 'Virtual'", + ) + + def test_empty_columns_unchanged(self): + c = _client() + c._list_columns = MagicMock(return_value=[]) + sql = "SELECT * FROM account" + result = c._expand_select_star(sql, "account") + assert result == sql + + def test_where_clause_preserved(self): + c = _client() + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + {"LogicalName": "accountid"}, + ] + ) + sql = "SELECT * FROM account WHERE statecode = 0" + result = c._expand_select_star(sql, "account") + assert "WHERE statecode = 0" in result + assert "*" not in result + + def test_case_insensitive_select(self): + c = _client() + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + ] + ) + sql = "select * from account" + result = c._expand_select_star(sql, "account") + assert "name" in result + assert "*" not in result + + +# --- _SELECT_STAR_RE pattern tests --- + + +class TestSelectStarRegex: + """Verify the regex correctly identifies SELECT * patterns.""" + + def test_matches_simple_star(self): + assert _ODataClient._SELECT_STAR_RE.search("SELECT * FROM account") + + def test_matches_star_with_top(self): + assert _ODataClient._SELECT_STAR_RE.search("SELECT TOP 10 * FROM account") + + def test_matches_star_with_distinct(self): + assert _ODataClient._SELECT_STAR_RE.search("SELECT DISTINCT * FROM account") + + def test_matches_star_with_distinct_top(self): + assert _ODataClient._SELECT_STAR_RE.search("SELECT DISTINCT TOP 50 * FROM account") + + def test_no_match_count_star(self): + assert not _ODataClient._SELECT_STAR_RE.search("SELECT COUNT(*) FROM account") + + def test_no_match_named_columns(self): + assert not _ODataClient._SELECT_STAR_RE.search("SELECT name, revenue FROM account") + + def test_matches_top_percent(self): + assert _ODataClient._SELECT_STAR_RE.search("SELECT TOP 50 PERCENT * FROM account") + + def test_case_insensitive(self): + assert _ODataClient._SELECT_STAR_RE.search("select * from account") + + def test_no_match_alias_star(self): + # a.* is not SELECT * -- it's a table-qualified wildcard not at toplevel + assert not _ODataClient._SELECT_STAR_RE.search("SELECT a.name, b.* FROM account a") + + +# --- Integration: _query_sql calls _expand_select_star --- + + +@pytest.mark.filterwarnings("ignore::UserWarning") +class TestQuerySqlSelectStarIntegration: + """Verify _query_sql calls _expand_select_star when SELECT * is used.""" + + def test_query_sql_expands_select_star(self): + c = _client() + c._entity_set_from_schema_name = MagicMock(return_value="accounts") + c._list_columns = MagicMock( + return_value=[ + {"LogicalName": "name"}, + {"LogicalName": "accountid"}, + ] + ) + mock_response = MagicMock() + mock_response.json.return_value = {"value": [{"name": "Contoso", "accountid": "1"}]} + mock_response.status_code = 200 + c._request = MagicMock(return_value=mock_response) + + rows = c._query_sql("SELECT * FROM account") + + # Verify _list_columns was called (SELECT * expansion) + c._list_columns.assert_called_once() + # Verify the SQL sent to server has explicit columns, not * + call_args = c._request.call_args + sent_url = call_args[0][1] + sent_sql = parse_qs(urlparse(sent_url).query)["sql"][0] + assert "*" not in sent_sql or "COUNT(*)" in sent_sql + assert "accountid" in sent_sql + assert "name" in sent_sql + assert len(rows) == 1 + + def test_query_sql_skips_expansion_for_named_columns(self): + c = _client() + c._entity_set_from_schema_name = MagicMock(return_value="accounts") + c._list_columns = MagicMock() + mock_response = MagicMock() + mock_response.json.return_value = {"value": [{"name": "Contoso"}]} + mock_response.status_code = 200 + c._request = MagicMock(return_value=mock_response) + + c._query_sql("SELECT name FROM account") + + # _list_columns should NOT be called for explicit column queries + c._list_columns.assert_not_called() + + def test_query_sql_skips_expansion_for_count_star(self): + c = _client() + c._entity_set_from_schema_name = MagicMock(return_value="accounts") + c._list_columns = MagicMock() + mock_response = MagicMock() + mock_response.json.return_value = {"value": [{"cnt": 42}]} + mock_response.status_code = 200 + c._request = MagicMock(return_value=mock_response) + + c._query_sql("SELECT COUNT(*) FROM account") + + c._list_columns.assert_not_called() + + def test_query_sql_with_join_no_star_no_expansion(self): + c = _client() + c._entity_set_from_schema_name = MagicMock(return_value="accounts") + c._list_columns = MagicMock() + mock_response = MagicMock() + mock_response.json.return_value = {"value": []} + mock_response.status_code = 200 + c._request = MagicMock(return_value=mock_response) + + c._query_sql("SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid") + + c._list_columns.assert_not_called() diff --git a/tests/unit/data/test_sql_guardrails.py b/tests/unit/data/test_sql_guardrails.py new file mode 100644 index 00000000..839e01b0 --- /dev/null +++ b/tests/unit/data/test_sql_guardrails.py @@ -0,0 +1,386 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Unit tests for SQL guardrails in _query_sql.""" + +import warnings + +import pytest +from unittest.mock import MagicMock +from urllib.parse import parse_qs, urlparse + +from PowerPlatform.Dataverse.core.errors import ValidationError +from PowerPlatform.Dataverse.data._odata import _ODataClient + + +class DummyAuth: + def _acquire_token(self, scope): + class T: + access_token = "x" + + return T() + + +def _client(): + return _ODataClient(DummyAuth(), "https://org.example", None) + + +# =================================================================== +# 1. Block write statements +# =================================================================== + + +class TestBlockWriteStatements: + """Write SQL (INSERT/UPDATE/DELETE/DROP/etc.) must be blocked.""" + + @pytest.mark.parametrize( + "sql", + [ + "DELETE FROM account WHERE name = 'test'", + "UPDATE account SET name = 'hacked' WHERE 1=1", + "INSERT INTO account (name) VALUES ('injected')", + "DROP TABLE account", + "TRUNCATE TABLE account", + "ALTER TABLE account ADD hackedcol VARCHAR(100)", + "CREATE TABLE hacked (id INT)", + "EXEC sp_helptext 'account'", + "GRANT SELECT ON account TO public", + "REVOKE SELECT ON account FROM public", + "BULK INSERT account FROM '/tmp/data.csv'", + ], + ) + def test_write_statement_raises(self, sql): + c = _client() + with pytest.raises(ValidationError, match="read-only"): + c._sql_guardrails(sql) + + @pytest.mark.parametrize( + "sql", + [ + "delete FROM account", + " DELETE FROM account", + " update account SET x = 1", + "\n\tINSERT INTO t (c) VALUES (1)", + ], + ) + def test_write_case_insensitive_and_whitespace(self, sql): + c = _client() + with pytest.raises(ValidationError): + c._sql_guardrails(sql) + + def test_select_not_blocked(self): + c = _client() + # SELECT should pass through without raising + result = c._sql_guardrails("SELECT TOP 10 name FROM account") + assert "SELECT" in result + + def test_select_with_delete_in_where_not_blocked(self): + c = _client() + result = c._sql_guardrails("SELECT TOP 10 name FROM account WHERE name = 'DELETE ME'") + assert "SELECT" in result + + @pytest.mark.parametrize( + "sql", + [ + "/* comment */ DELETE FROM account", + "-- line comment\nDELETE FROM account", + "/* multi\nline */ DROP TABLE account", + ], + ) + def test_comment_prefixed_writes_blocked(self, sql): + c = _client() + with pytest.raises(ValidationError, match="read-only"): + c._sql_guardrails(sql) + + +# =================================================================== +# 1b. Block server-rejected SQL patterns (save round-trip) +# =================================================================== + + +class TestBlockServerRejectedPatterns: + """Block SQL patterns the server rejects, to save network round-trip.""" + + @pytest.mark.parametrize( + "sql,match_text", + [ + ("SELECT a.name FROM account a CROSS JOIN contact c", "Unsupported JOIN"), + ( + "SELECT a.name FROM account a RIGHT JOIN contact c ON a.accountid = c.parentcustomerid", + "Unsupported JOIN", + ), + ( + "SELECT a.name FROM account a RIGHT OUTER JOIN contact c ON a.accountid = c.parentcustomerid", + "Unsupported JOIN", + ), + ( + "SELECT a.name FROM account a FULL OUTER JOIN contact c ON a.accountid = c.parentcustomerid", + "Unsupported JOIN", + ), + ( + "SELECT a.name FROM account a FULL JOIN contact c ON a.accountid = c.parentcustomerid", + "Unsupported JOIN", + ), + ], + ) + def test_unsupported_joins_blocked(self, sql, match_text): + c = _client() + with pytest.raises(ValidationError, match=match_text): + c._sql_guardrails(sql) + + def test_inner_join_allowed(self): + c = _client() + result = c._sql_guardrails( + "SELECT TOP 5 a.name FROM account a " "INNER JOIN contact c ON a.accountid = c.parentcustomerid" + ) + assert "INNER JOIN" in result + + def test_left_join_allowed(self): + c = _client() + result = c._sql_guardrails( + "SELECT TOP 5 a.name FROM account a " "LEFT JOIN contact c ON a.accountid = c.parentcustomerid" + ) + assert "LEFT JOIN" in result + + def test_union_blocked(self): + c = _client() + with pytest.raises(ValidationError, match="UNION"): + c._sql_guardrails("SELECT name FROM account UNION SELECT fullname FROM contact") + + def test_union_all_blocked(self): + c = _client() + with pytest.raises(ValidationError, match="UNION"): + c._sql_guardrails("SELECT name FROM account UNION ALL SELECT fullname FROM contact") + + def test_having_blocked(self): + c = _client() + with pytest.raises(ValidationError, match="HAVING"): + c._sql_guardrails("SELECT name, COUNT(*) FROM account GROUP BY name HAVING COUNT(*) > 1") + + def test_cte_blocked(self): + c = _client() + with pytest.raises(ValidationError, match="CTE"): + c._sql_guardrails("WITH cte AS (SELECT name FROM account) SELECT * FROM cte") + + def test_subquery_in_blocked(self): + c = _client() + with pytest.raises(ValidationError, match="Subquer"): + c._sql_guardrails("SELECT name FROM account WHERE accountid IN (SELECT accountid FROM account)") + + def test_subquery_exists_blocked(self): + c = _client() + with pytest.raises(ValidationError, match="Subquer"): + c._sql_guardrails( + "SELECT name FROM account a WHERE EXISTS " + "(SELECT 1 FROM contact c WHERE c.parentcustomerid = a.accountid)" + ) + + def test_subquery_in_values_not_blocked(self): + """IN with literal values is fine -- only IN (SELECT ...) is blocked.""" + c = _client() + result = c._sql_guardrails("SELECT name FROM account WHERE name IN ('A', 'B', 'C')") + assert "IN" in result + + +# =================================================================== +# 2. Server enforces TOP 5000 (no client-side injection needed) +# =================================================================== + + +class TestNoTopInjection: + """Verify the SDK does NOT inject TOP -- server handles the 5000 cap.""" + + def test_no_top_passes_through_unchanged(self): + c = _client() + sql = "SELECT name FROM account" + result = c._sql_guardrails(sql) + assert result == sql + assert "TOP" not in result + + def test_existing_top_not_modified(self): + c = _client() + result = c._sql_guardrails("SELECT TOP 100 name FROM account") + assert "TOP 100" in result + + def test_offset_passes_through(self): + c = _client() + sql = "SELECT name FROM account ORDER BY name OFFSET 10 ROWS FETCH NEXT 5 ROWS ONLY" + result = c._sql_guardrails(sql) + assert result == sql + + def test_join_without_top_not_modified(self): + c = _client() + sql = "SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid" + result = c._sql_guardrails(sql) + assert result == sql + assert "TOP" not in result + + +# =================================================================== +# 3. Warn on leading-wildcard LIKE +# =================================================================== + + +class TestLeadingWildcardWarning: + """LIKE '%...' patterns should emit a UserWarning.""" + + def test_leading_wildcard_warns(self): + c = _client() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._sql_guardrails("SELECT TOP 10 name FROM account WHERE name LIKE '%test'") + like_warnings = [x for x in w if "leading-wildcard" in str(x.message).lower()] + assert len(like_warnings) == 1 + + def test_mid_wildcard_warns(self): + c = _client() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._sql_guardrails("SELECT TOP 10 name FROM account WHERE name LIKE '%test%'") + like_warnings = [x for x in w if "leading-wildcard" in str(x.message).lower()] + assert len(like_warnings) == 1 + + def test_trailing_wildcard_no_warning(self): + c = _client() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._sql_guardrails("SELECT TOP 10 name FROM account WHERE name LIKE 'test%'") + like_warnings = [x for x in w if "leading-wildcard" in str(x.message).lower()] + assert len(like_warnings) == 0 + + def test_no_like_no_warning(self): + c = _client() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._sql_guardrails("SELECT TOP 10 name FROM account") + like_warnings = [x for x in w if "wildcard" in str(x.message).lower()] + assert len(like_warnings) == 0 + + +# =================================================================== +# 4. Warn on implicit cross joins (server allows, SDK warns) +# =================================================================== + + +class TestImplicitCrossJoinWarning: + """FROM a, b (comma syntax) should emit UserWarning (not error).""" + + def test_comma_join_warns(self): + c = _client() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._sql_guardrails("SELECT TOP 10 a.name, c.fullname FROM account a, contact c") + cross_warnings = [x for x in w if "cross join" in str(x.message).lower()] + assert len(cross_warnings) == 1 + + def test_explicit_join_no_warning(self): + c = _client() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._sql_guardrails( + "SELECT TOP 10 a.name FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid" + ) + cross_warnings = [x for x in w if "cross join" in str(x.message).lower()] + assert len(cross_warnings) == 0 + + def test_single_table_no_warning(self): + c = _client() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._sql_guardrails("SELECT TOP 10 name FROM account") + cross_warnings = [x for x in w if "cross join" in str(x.message).lower()] + assert len(cross_warnings) == 0 + + +# =================================================================== +# 5. SELECT * with JOIN warning (from _expand_select_star) +# =================================================================== + + +class TestSelectStarJoinWarning: + """SELECT * with JOIN should warn that only first table columns are used.""" + + def test_select_star_with_join_warns(self): + c = _client() + c._list_columns = MagicMock(return_value=[{"LogicalName": "name"}, {"LogicalName": "accountid"}]) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._expand_select_star( + "SELECT * FROM account a JOIN contact c ON a.accountid = c.parentcustomerid", + "account", + ) + join_warnings = [x for x in w if "JOIN" in str(x.message)] + assert len(join_warnings) == 1 + assert "first table only" in str(join_warnings[0].message) + + def test_select_star_no_join_no_warning(self): + c = _client() + c._list_columns = MagicMock(return_value=[{"LogicalName": "name"}]) + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._expand_select_star("SELECT * FROM account", "account") + join_warnings = [x for x in w if "JOIN" in str(x.message)] + assert len(join_warnings) == 0 + + def test_no_star_with_join_no_warning(self): + c = _client() + c._list_columns = MagicMock() + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + c._expand_select_star( + "SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid", + "account", + ) + # _list_columns not called (no star), so no JOIN warning + c._list_columns.assert_not_called() + join_warnings = [x for x in w if "JOIN" in str(x.message)] + assert len(join_warnings) == 0 + + +# =================================================================== +# 6. Integration: _query_sql applies guardrails +# =================================================================== + + +class TestQuerySqlGuardrailIntegration: + """Verify _query_sql applies guardrails before sending to server.""" + + def test_write_blocked_before_server_call(self): + c = _client() + c._request = MagicMock() + with pytest.raises(ValidationError, match="read-only"): + c._query_sql("DELETE FROM account WHERE name = 'x'") + c._request.assert_not_called() + + def test_no_top_injection_in_server_request(self): + """Server manages the 5000 cap -- SDK should not inject TOP.""" + c = _client() + c._entity_set_from_schema_name = MagicMock(return_value="accounts") + mock_response = MagicMock() + mock_response.json.return_value = {"value": []} + mock_response.status_code = 200 + c._request = MagicMock(return_value=mock_response) + + c._query_sql("SELECT name FROM account") + + call_args = c._request.call_args + sent_url = call_args[0][1] + sent_sql = parse_qs(urlparse(sent_url).query)["sql"][0] + # SDK should NOT inject TOP 5000 + assert "TOP 5000" not in sent_sql + assert sent_sql == "SELECT name FROM account" + + def test_explicit_top_preserved_in_server_request(self): + c = _client() + c._entity_set_from_schema_name = MagicMock(return_value="accounts") + mock_response = MagicMock() + mock_response.json.return_value = {"value": []} + mock_response.status_code = 200 + c._request = MagicMock(return_value=mock_response) + + c._query_sql("SELECT TOP 50 name FROM account") + + call_args = c._request.call_args + sent_url = call_args[0][1] + sent_sql = parse_qs(urlparse(sent_url).query)["sql"][0] + assert "TOP 50" in sent_sql diff --git a/tests/unit/data/test_sql_parse.py b/tests/unit/data/test_sql_parse.py index 12c25a9c..886d4d08 100644 --- a/tests/unit/data/test_sql_parse.py +++ b/tests/unit/data/test_sql_parse.py @@ -72,6 +72,114 @@ def test_from_as_value_not_table(): assert c._extract_logical_table(sql) == "incident" +# --- JOIN syntax (multi-table SQL) --- + + +def test_inner_join_extracts_first_table(): + c = _client() + sql = "SELECT a.name, c.fullname FROM account a " "INNER JOIN contact c ON a.accountid = c.parentcustomerid" + assert c._extract_logical_table(sql) == "account" + + +def test_left_join_extracts_first_table(): + c = _client() + sql = "SELECT a.name FROM account a " "LEFT JOIN contact c ON a.accountid = c.parentcustomerid" + assert c._extract_logical_table(sql) == "account" + + +def test_multi_join_extracts_first_table(): + c = _client() + sql = ( + "SELECT a.name, c.fullname, o.name " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "JOIN opportunity o ON a.accountid = o.parentaccountid" + ) + assert c._extract_logical_table(sql) == "account" + + +def test_join_with_alias(): + c = _client() + sql = "SELECT t.name FROM account AS t JOIN contact c ON t.accountid = c.parentcustomerid" + assert c._extract_logical_table(sql) == "account" + + +def test_table_alias_without_as(): + c = _client() + sql = "SELECT a.name FROM account a WHERE a.statecode = 0" + assert c._extract_logical_table(sql) == "account" + + +def test_table_alias_with_as(): + c = _client() + sql = "SELECT a.name FROM account AS a WHERE a.statecode = 0" + assert c._extract_logical_table(sql) == "account" + + +def test_custom_table_with_join(): + c = _client() + sql = ( + "SELECT t.new_code, tk.new_title " + "FROM new_sqldemotask tk " + "INNER JOIN new_sqldemoteam t ON tk._new_teamid_value = t.new_sqldemoteamid" + ) + assert c._extract_logical_table(sql) == "new_sqldemotask" + + +def test_aggregate_with_join(): + c = _client() + sql = ( + "SELECT a.name, COUNT(c.contactid) as cnt " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "GROUP BY a.name" + ) + assert c._extract_logical_table(sql) == "account" + + +def test_offset_fetch(): + c = _client() + sql = "SELECT name FROM account " "ORDER BY name OFFSET 10 ROWS FETCH NEXT 5 ROWS ONLY" + assert c._extract_logical_table(sql) == "account" + + +def test_polymorphic_owner_join(): + c = _client() + sql = "SELECT a.name, su.fullname " "FROM account a " "JOIN systemuser su ON a._ownerid_value = su.systemuserid" + assert c._extract_logical_table(sql) == "account" + + +def test_audit_trail_multi_join(): + c = _client() + sql = ( + "SELECT a.name, creator.fullname, modifier.fullname " + "FROM account a " + "JOIN systemuser creator ON a._createdby_value = creator.systemuserid " + "JOIN systemuser modifier ON a._modifiedby_value = modifier.systemuserid" + ) + assert c._extract_logical_table(sql) == "account" + + +def test_select_star(): + c = _client() + assert c._extract_logical_table("SELECT * FROM account") == "account" + + +def test_select_star_with_where(): + c = _client() + assert c._extract_logical_table("SELECT * FROM account WHERE statecode = 0") == "account" + + +def test_distinct_top(): + c = _client() + assert c._extract_logical_table("SELECT DISTINCT TOP 5 name FROM account") == "account" + + +def test_count_star(): + c = _client() + assert c._extract_logical_table("SELECT COUNT(*) FROM account") == "account" + + # --------------------------------------------------------------------------- # _build_sql URL encoding # --------------------------------------------------------------------------- diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index cfad101e..056764d3 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -333,6 +333,34 @@ def test_referenced_attribute_auto_generated(self): self.assertEqual(relationship.referenced_attribute, "accountid") + def test_mixed_case_table_names_lowered(self): + """Test that mixed-case table names are auto-lowered to logical names. + + Only table names (entity logical names) are lowered. + lookup_field_name is a SchemaName and keeps its original casing. + """ + self.client.tables.create_lookup_field( + referencing_table="new_SQLTask", + lookup_field_name="new_TeamId", + referenced_table="new_SQLTeam", + ) + + call_args = self.client.tables.create_one_to_many_relationship.call_args + lookup = call_args[0][0] + relationship = call_args[0][1] + + # Entity names must be lowercased (Dataverse logical names) + self.assertEqual(relationship.referenced_entity, "new_sqlteam") + self.assertEqual(relationship.referencing_entity, "new_sqltask") + self.assertEqual(relationship.referenced_attribute, "new_sqlteamid") + + # Schema_name: table names lowered, lookup_field_name keeps casing + self.assertEqual(relationship.schema_name, "new_sqlteam_new_sqltask_new_TeamId") + + # Display name defaults to original (un-lowered) referenced_table + label_dict = lookup.display_name.to_dict() + self.assertEqual(label_dict["LocalizedLabels"][0]["Label"], "new_SQLTeam") + def test_returns_result(self): """Test that the method returns the result from create_one_to_many_relationship.""" expected_result = { diff --git a/tests/unit/test_dataframe_operations.py b/tests/unit/test_dataframe_operations.py index c83ae05b..567d4371 100644 --- a/tests/unit/test_dataframe_operations.py +++ b/tests/unit/test_dataframe_operations.py @@ -35,6 +35,141 @@ def setUp(self): self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) self.client._odata = MagicMock() + +class TestDataFrameSql(unittest.TestCase): + """Tests for client.dataframe.sql().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def test_sql_returns_dataframe(self): + """sql() should return a DataFrame from SQL results.""" + raw_rows = [ + {"accountid": "1", "name": "Contoso"}, + {"accountid": "2", "name": "Fabrikam"}, + ] + self.client._odata._query_sql.return_value = raw_rows + df = self.client.dataframe.sql("SELECT accountid, name FROM account") + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 2) + self.assertEqual(df.iloc[0]["name"], "Contoso") + self.assertEqual(df.iloc[1]["name"], "Fabrikam") + + def test_sql_empty_result(self): + """sql() should return an empty DataFrame when no rows match.""" + self.client._odata._query_sql.return_value = [] + df = self.client.dataframe.sql("SELECT name FROM account WHERE name = 'None'") + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 0) + + def test_sql_aggregate(self): + """sql() should handle aggregate results as DataFrame.""" + self.client._odata._query_sql.return_value = [{"cnt": 42}] + df = self.client.dataframe.sql("SELECT COUNT(*) as cnt FROM account") + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 1) + self.assertEqual(df.iloc[0]["cnt"], 42) + + def test_sql_join(self): + """sql() should handle JOIN results as DataFrame.""" + raw = [ + {"name": "Contoso", "fullname": "John Doe"}, + {"name": "Fabrikam", "fullname": "Jane Smith"}, + ] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql( + "SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid" + ) + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 2) + self.assertIn("name", df.columns) + self.assertIn("fullname", df.columns) + + def test_sql_group_by(self): + """sql() should handle GROUP BY results as DataFrame.""" + raw = [ + {"new_region": 1, "cnt": 3, "total": 167000}, + {"new_region": 2, "cnt": 1, "total": 75000}, + ] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql( + "SELECT new_region, COUNT(*) as cnt, SUM(new_budget) as total " "FROM new_sqldemoteam GROUP BY new_region" + ) + self.assertIsInstance(df, pd.DataFrame) + self.assertEqual(len(df), 2) + self.assertIn("new_region", df.columns) + self.assertIn("cnt", df.columns) + self.assertIn("total", df.columns) + + def test_sql_distinct(self): + """sql() should handle DISTINCT results as DataFrame.""" + raw = [{"name": "Contoso"}, {"name": "Fabrikam"}] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql("SELECT DISTINCT name FROM account") + self.assertEqual(len(df), 2) + + def test_sql_select_star(self): + """sql() should handle SELECT * (auto-expanded) as DataFrame.""" + raw = [{"accountid": "1", "name": "Contoso", "revenue": 1000}] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql("SELECT * FROM account") + self.assertEqual(len(df), 1) + self.assertIn("accountid", df.columns) + self.assertIn("name", df.columns) + + def test_sql_polymorphic_owner_join(self): + """sql() should handle polymorphic lookup JOIN to DataFrame.""" + raw = [ + {"name": "Contoso", "owner_name": "Admin"}, + {"name": "Fabrikam", "owner_name": "Manager"}, + ] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql( + "SELECT a.name, su.fullname as owner_name " + "FROM account a " + "JOIN systemuser su ON a._ownerid_value = su.systemuserid" + ) + self.assertEqual(len(df), 2) + self.assertIn("owner_name", df.columns) + + def test_sql_multi_aggregate(self): + """sql() should handle all 5 aggregate functions.""" + raw = [{"cnt": 10, "total": 500, "avg_v": 50.0, "min_v": 10, "max_v": 100}] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql( + "SELECT COUNT(*) as cnt, SUM(revenue) as total, " + "AVG(revenue) as avg_v, MIN(revenue) as min_v, MAX(revenue) as max_v " + "FROM account" + ) + self.assertEqual(len(df), 1) + self.assertEqual(df.iloc[0]["cnt"], 10) + self.assertEqual(df.iloc[0]["max_v"], 100) + + def test_sql_offset_fetch(self): + """sql() should handle OFFSET FETCH pagination results.""" + raw = [{"name": "Row1"}, {"name": "Row2"}] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql("SELECT name FROM account ORDER BY name OFFSET 10 ROWS FETCH NEXT 2 ROWS ONLY") + self.assertEqual(len(df), 2) + + def test_sql_join_with_group_by(self): + """sql() should handle JOIN + GROUP BY + aggregates.""" + raw = [ + {"name": "Contoso", "contact_count": 5}, + {"name": "Fabrikam", "contact_count": 3}, + ] + self.client._odata._query_sql.return_value = raw + df = self.client.dataframe.sql( + "SELECT a.name, COUNT(c.contactid) as contact_count " + "FROM account a " + "JOIN contact c ON a.accountid = c.parentcustomerid " + "GROUP BY a.name" + ) + self.assertEqual(len(df), 2) + self.assertIn("contact_count", df.columns) + def test_get_single_record(self): """record_id returns a one-row DataFrame using result.data.""" self.client._odata._get.return_value = {"accountid": "guid-1", "name": "Contoso"} diff --git a/tests/unit/test_query_operations.py b/tests/unit/test_query_operations.py index 2d025b52..f5f02fbe 100644 --- a/tests/unit/test_query_operations.py +++ b/tests/unit/test_query_operations.py @@ -55,6 +55,108 @@ def test_sql_empty_result(self): self.assertIsInstance(result, list) self.assertEqual(result, []) + def test_sql_join(self): + """sql() should handle JOIN SQL and return Record objects.""" + raw_rows = [ + {"name": "Contoso", "fullname": "John Doe"}, + {"name": "Fabrikam", "fullname": "Jane Smith"}, + ] + self.client._odata._query_sql.return_value = raw_rows + + result = self.client.query.sql( + "SELECT a.name, c.fullname FROM account a " "JOIN contact c ON a.accountid = c.parentcustomerid" + ) + + self.assertEqual(len(result), 2) + self.assertIsInstance(result[0], Record) + self.assertEqual(result[0]["name"], "Contoso") + self.assertEqual(result[0]["fullname"], "John Doe") + + def test_sql_aggregate(self): + """sql() should handle aggregate results.""" + self.client._odata._query_sql.return_value = [{"cnt": 42}] + + result = self.client.query.sql("SELECT COUNT(*) as cnt FROM account") + + self.assertEqual(len(result), 1) + self.assertEqual(result[0]["cnt"], 42) + + def test_sql_group_by(self): + """sql() should handle GROUP BY results.""" + raw = [ + {"statecode": 0, "cnt": 100}, + {"statecode": 1, "cnt": 5}, + ] + self.client._odata._query_sql.return_value = raw + + result = self.client.query.sql("SELECT statecode, COUNT(*) as cnt FROM account GROUP BY statecode") + + self.assertEqual(len(result), 2) + self.assertEqual(result[0]["statecode"], 0) + self.assertEqual(result[0]["cnt"], 100) + + def test_sql_distinct(self): + """sql() should handle DISTINCT results.""" + raw = [{"name": "Contoso"}, {"name": "Fabrikam"}] + self.client._odata._query_sql.return_value = raw + + result = self.client.query.sql("SELECT DISTINCT name FROM account") + + self.assertEqual(len(result), 2) + + def test_sql_polymorphic_owner_join(self): + """sql() should handle polymorphic lookup JOINs (ownerid -> systemuser).""" + raw = [ + {"name": "Contoso", "owner_name": "Admin User"}, + ] + self.client._odata._query_sql.return_value = raw + + result = self.client.query.sql( + "SELECT a.name, su.fullname as owner_name " + "FROM account a " + "JOIN systemuser su ON a._ownerid_value = su.systemuserid" + ) + + self.assertEqual(len(result), 1) + self.assertEqual(result[0]["owner_name"], "Admin User") + + def test_sql_audit_trail_multi_join(self): + """sql() should handle multi-JOIN for audit trail (createdby + modifiedby).""" + raw = [ + {"name": "Contoso", "created_by": "User A", "modified_by": "User B"}, + ] + self.client._odata._query_sql.return_value = raw + + result = self.client.query.sql( + "SELECT a.name, creator.fullname as created_by, modifier.fullname as modified_by " + "FROM account a " + "JOIN systemuser creator ON a._createdby_value = creator.systemuserid " + "JOIN systemuser modifier ON a._modifiedby_value = modifier.systemuserid" + ) + + self.assertEqual(len(result), 1) + self.assertEqual(result[0]["created_by"], "User A") + + def test_sql_offset_fetch(self): + """sql() should handle OFFSET FETCH pagination SQL.""" + raw = [{"name": "Page2-Row1"}] + self.client._odata._query_sql.return_value = raw + + result = self.client.query.sql("SELECT name FROM account ORDER BY name OFFSET 10 ROWS FETCH NEXT 5 ROWS ONLY") + + self.assertEqual(len(result), 1) + self.client._odata._query_sql.assert_called_once() + + def test_sql_select_star(self): + """sql() should handle SELECT * (auto-expanded by _query_sql).""" + raw = [{"accountid": "1", "name": "Contoso", "revenue": 1000}] + self.client._odata._query_sql.return_value = raw + + result = self.client.query.sql("SELECT * FROM account") + + self.assertEqual(len(result), 1) + self.assertEqual(result[0]["name"], "Contoso") + # ----------------------------------------------------------------- builder def test_builder_returns_query_builder(self): @@ -277,5 +379,556 @@ def test_builder_to_dataframe(self): pd.testing.assert_frame_equal(result, expected_df) +# =================================================================== +# SQL Helper Tests +# =================================================================== + + +class TestSqlColumns(unittest.TestCase): + """Tests for client.query.sql_columns().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def _mock_columns(self, columns): + self.client._odata._list_columns.return_value = columns + + def test_basic_columns(self): + self._mock_columns( + [ + { + "LogicalName": "accountid", + "AttributeType": "Uniqueidentifier", + "IsPrimaryId": True, + "IsPrimaryName": False, + "DisplayName": {"UserLocalizedLabel": {"Label": "Account"}}, + }, + { + "LogicalName": "name", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": True, + "DisplayName": {"UserLocalizedLabel": {"Label": "Account Name"}}, + }, + { + "LogicalName": "revenue", + "AttributeType": "Money", + "IsPrimaryId": False, + "IsPrimaryName": False, + "DisplayName": {"UserLocalizedLabel": {"Label": "Annual Revenue"}}, + }, + ] + ) + cols = self.client.query.sql_columns("account") + self.assertEqual(len(cols), 3) + # PK first, then name, then alphabetical + self.assertEqual(cols[0]["name"], "accountid") + self.assertTrue(cols[0]["is_pk"]) + self.assertEqual(cols[1]["name"], "name") + self.assertTrue(cols[1]["is_name"]) + self.assertEqual(cols[2]["name"], "revenue") + self.assertEqual(cols[2]["label"], "Annual Revenue") + + def test_excludes_system_columns(self): + self._mock_columns( + [ + { + "LogicalName": "name", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": True, + "DisplayName": {"UserLocalizedLabel": {"Label": "Name"}}, + }, + { + "LogicalName": "revenue_base", + "AttributeType": "Money", + "IsPrimaryId": False, + "IsPrimaryName": False, + "DisplayName": {"UserLocalizedLabel": {"Label": "Revenue Base"}}, + }, + { + "LogicalName": "versionnumber", + "AttributeType": "BigInt", + "IsPrimaryId": False, + "IsPrimaryName": False, + "DisplayName": {"UserLocalizedLabel": {"Label": "Version"}}, + }, + ] + ) + cols = self.client.query.sql_columns("account", include_system=False) + names = [c["name"] for c in cols] + self.assertIn("name", names) + self.assertNotIn("revenue_base", names) + self.assertNotIn("versionnumber", names) + + def test_include_system_columns(self): + self._mock_columns( + [ + { + "LogicalName": "name", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": False, + "DisplayName": {"UserLocalizedLabel": {"Label": "Name"}}, + }, + { + "LogicalName": "versionnumber", + "AttributeType": "BigInt", + "IsPrimaryId": False, + "IsPrimaryName": False, + "DisplayName": {"UserLocalizedLabel": {"Label": "Version"}}, + }, + ] + ) + cols = self.client.query.sql_columns("account", include_system=True) + names = [c["name"] for c in cols] + self.assertIn("versionnumber", names) + + def test_empty_table(self): + self._mock_columns([]) + cols = self.client.query.sql_columns("account") + self.assertEqual(cols, []) + + def test_excludes_attribute_of_columns(self): + """Columns with AttributeOf set (computed display names) should be excluded.""" + self._mock_columns( + [ + { + "LogicalName": "name", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": True, + "DisplayName": {}, + }, + { + "LogicalName": "createdbyname", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": False, + "DisplayName": {}, + "AttributeOf": "createdby", + }, + ] + ) + cols = self.client.query.sql_columns("account") + names = [c["name"] for c in cols] + self.assertIn("name", names) + self.assertNotIn("createdbyname", names) + + +class TestSqlSelect(unittest.TestCase): + """Tests for client.query.sql_select().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def test_returns_comma_separated(self): + self.client._odata._list_columns.return_value = [ + { + "LogicalName": "accountid", + "AttributeType": "Uniqueidentifier", + "IsPrimaryId": True, + "IsPrimaryName": False, + "DisplayName": {}, + }, + { + "LogicalName": "name", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": True, + "DisplayName": {}, + }, + { + "LogicalName": "revenue", + "AttributeType": "Money", + "IsPrimaryId": False, + "IsPrimaryName": False, + "DisplayName": {}, + }, + ] + result = self.client.query.sql_select("account") + self.assertIn("accountid", result) + self.assertIn("name", result) + self.assertIn("revenue", result) + self.assertEqual(result.count(","), 2) # 3 cols = 2 commas + + +class TestSqlJoins(unittest.TestCase): + """Tests for client.query.sql_joins().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def _mock_rels(self, rels): + self.client._odata._list_table_relationships.return_value = rels + + def test_outgoing_lookups(self): + self._mock_rels( + [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "parentcustomerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "SchemaName": "contact_customer_accounts", + }, + ] + ) + joins = self.client.query.sql_joins("contact") + self.assertEqual(len(joins), 1) + j = joins[0] + self.assertEqual(j["column"], "parentcustomerid") + self.assertEqual(j["target"], "account") + self.assertEqual(j["target_pk"], "accountid") + self.assertIn("JOIN account", j["join_clause"]) + self.assertIn("parentcustomerid", j["join_clause"]) + + def test_ignores_incoming_rels(self): + self._mock_rels( + [ + # This is an incoming relationship (account is referenced, not referencing) + { + "ReferencingEntity": "opportunity", + "ReferencingAttribute": "customerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "SchemaName": "opp_customer_accounts", + }, + ] + ) + joins = self.client.query.sql_joins("account") + self.assertEqual(len(joins), 0) + + def test_polymorphic_returns_multiple(self): + self._mock_rels( + [ + { + "ReferencingEntity": "opportunity", + "ReferencingAttribute": "customerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "SchemaName": "opp_customer_accounts", + }, + { + "ReferencingEntity": "opportunity", + "ReferencingAttribute": "customerid", + "ReferencedEntity": "contact", + "ReferencedAttribute": "contactid", + "SchemaName": "opp_customer_contacts", + }, + ] + ) + joins = self.client.query.sql_joins("opportunity") + self.assertEqual(len(joins), 2) + targets = {j["target"] for j in joins} + self.assertEqual(targets, {"account", "contact"}) + # Both use the same source column + self.assertTrue(all(j["column"] == "customerid" for j in joins)) + + def test_empty_relationships(self): + self._mock_rels([]) + joins = self.client.query.sql_joins("account") + self.assertEqual(joins, []) + + +class TestSqlJoin(unittest.TestCase): + """Tests for client.query.sql_join().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def _mock_rels(self, rels): + self.client._odata._list_table_relationships.return_value = rels + + def test_generates_join_clause(self): + self._mock_rels( + [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "parentcustomerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "SchemaName": "contact_customer_accounts", + }, + ] + ) + result = self.client.query.sql_join("contact", "account", from_alias="c", to_alias="a") + self.assertEqual(result, "JOIN account a ON c.parentcustomerid = a.accountid") + + def test_default_aliases(self): + self._mock_rels( + [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "parentcustomerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "SchemaName": "contact_customer_accounts", + }, + ] + ) + result = self.client.query.sql_join("contact", "account") + self.assertIn("JOIN account a ON contact.parentcustomerid = a.accountid", result) + + def test_no_relationship_raises(self): + self._mock_rels([]) + with self.assertRaises(ValueError) as ctx: + self.client.query.sql_join("contact", "nonexistent") + self.assertIn("No relationship found", str(ctx.exception)) + + def test_case_insensitive_target(self): + self._mock_rels( + [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "ownerid", + "ReferencedEntity": "systemuser", + "ReferencedAttribute": "systemuserid", + "SchemaName": "contact_owner", + }, + ] + ) + result = self.client.query.sql_join("contact", "SystemUser", from_alias="c", to_alias="su") + self.assertIn("JOIN systemuser su", result) + self.assertIn("c.ownerid = su.systemuserid", result) + + +# =================================================================== +# OData Helper Tests +# =================================================================== + + +class TestOdataSelect(unittest.TestCase): + """Tests for client.query.odata_select().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def test_returns_list_of_strings(self): + self.client._odata._list_columns.return_value = [ + { + "LogicalName": "accountid", + "AttributeType": "Uniqueidentifier", + "IsPrimaryId": True, + "IsPrimaryName": False, + "DisplayName": {}, + }, + { + "LogicalName": "name", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": True, + "DisplayName": {}, + }, + ] + result = self.client.query.odata_select("account") + self.assertIsInstance(result, list) + self.assertIn("accountid", result) + self.assertIn("name", result) + + def test_result_usable_in_records_get(self): + """odata_select returns list that matches records.get(select=) format.""" + self.client._odata._list_columns.return_value = [ + { + "LogicalName": "name", + "AttributeType": "String", + "IsPrimaryId": False, + "IsPrimaryName": True, + "DisplayName": {}, + }, + ] + cols = self.client.query.odata_select("account") + self.assertEqual(cols, ["name"]) + + +class TestOdataExpands(unittest.TestCase): + """Tests for client.query.odata_expands().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def _mock_rels(self, rels): + self.client._odata._list_table_relationships.return_value = rels + + def _mock_entity_set(self, name): + self.client._odata._entity_set_from_schema_name.return_value = name + + def test_outgoing_lookups(self): + self._mock_rels( + [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "parentcustomerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "ReferencingEntityNavigationPropertyName": "parentcustomerid_account", + "SchemaName": "contact_customer_accounts", + }, + ] + ) + self._mock_entity_set("accounts") + expands = self.client.query.odata_expands("contact") + self.assertEqual(len(expands), 1) + e = expands[0] + self.assertEqual(e["nav_property"], "parentcustomerid_account") + self.assertEqual(e["target_table"], "account") + self.assertEqual(e["target_entity_set"], "accounts") + self.assertEqual(e["lookup_attribute"], "parentcustomerid") + + def test_ignores_incoming_rels(self): + self._mock_rels( + [ + { + "ReferencingEntity": "opportunity", + "ReferencingAttribute": "customerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "ReferencingEntityNavigationPropertyName": "customerid_account", + "SchemaName": "opp_customer", + }, + ] + ) + expands = self.client.query.odata_expands("account") + self.assertEqual(len(expands), 0) + + def test_polymorphic_returns_multiple(self): + self._mock_rels( + [ + { + "ReferencingEntity": "opportunity", + "ReferencingAttribute": "customerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "ReferencingEntityNavigationPropertyName": "customerid_account", + "SchemaName": "opp_customer_accounts", + }, + { + "ReferencingEntity": "opportunity", + "ReferencingAttribute": "customerid", + "ReferencedEntity": "contact", + "ReferencedAttribute": "contactid", + "ReferencingEntityNavigationPropertyName": "customerid_contact", + "SchemaName": "opp_customer_contacts", + }, + ] + ) + self._mock_entity_set("accounts") + expands = self.client.query.odata_expands("opportunity") + self.assertEqual(len(expands), 2) + nav_props = {e["nav_property"] for e in expands} + self.assertEqual(nav_props, {"customerid_account", "customerid_contact"}) + + +class TestOdataExpand(unittest.TestCase): + """Tests for client.query.odata_expand().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def test_returns_nav_property(self): + self.client._odata._list_table_relationships.return_value = [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "parentcustomerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "ReferencingEntityNavigationPropertyName": "parentcustomerid_account", + "SchemaName": "contact_customer_accounts", + }, + ] + self.client._odata._entity_set_from_schema_name.return_value = "accounts" + result = self.client.query.odata_expand("contact", "account") + self.assertEqual(result, "parentcustomerid_account") + + def test_no_relationship_raises(self): + self.client._odata._list_table_relationships.return_value = [] + with self.assertRaises(ValueError) as ctx: + self.client.query.odata_expand("contact", "nonexistent") + self.assertIn("No navigation property", str(ctx.exception)) + + def test_case_insensitive_target(self): + self.client._odata._list_table_relationships.return_value = [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "ownerid", + "ReferencedEntity": "systemuser", + "ReferencedAttribute": "systemuserid", + "ReferencingEntityNavigationPropertyName": "ownerid_systemuser", + "SchemaName": "contact_owner", + }, + ] + self.client._odata._entity_set_from_schema_name.return_value = "systemusers" + result = self.client.query.odata_expand("contact", "SystemUser") + self.assertEqual(result, "ownerid_systemuser") + + +class TestOdataBind(unittest.TestCase): + """Tests for client.query.odata_bind().""" + + def setUp(self): + self.mock_credential = MagicMock(spec=TokenCredential) + self.client = DataverseClient("https://example.crm.dynamics.com", self.mock_credential) + self.client._odata = MagicMock() + + def test_returns_bind_dict(self): + self.client._odata._list_table_relationships.return_value = [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "parentcustomerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "ReferencingEntityNavigationPropertyName": "parentcustomerid_account", + "SchemaName": "contact_customer_accounts", + }, + ] + self.client._odata._entity_set_from_schema_name.return_value = "accounts" + + guid = "12345678-1234-1234-1234-123456789abc" + result = self.client.query.odata_bind("contact", "account", guid) + self.assertIsInstance(result, dict) + self.assertEqual(len(result), 1) + key = list(result.keys())[0] + self.assertEqual(key, "parentcustomerid_account@odata.bind") + self.assertEqual(result[key], f"/accounts({guid})") + + def test_no_relationship_raises(self): + self.client._odata._list_table_relationships.return_value = [] + with self.assertRaises(ValueError): + self.client.query.odata_bind("contact", "nonexistent", "guid") + + def test_usable_in_create_payload(self): + """Result can be merged into a create payload via **spread.""" + self.client._odata._list_table_relationships.return_value = [ + { + "ReferencingEntity": "contact", + "ReferencingAttribute": "parentcustomerid", + "ReferencedEntity": "account", + "ReferencedAttribute": "accountid", + "ReferencingEntityNavigationPropertyName": "parentcustomerid_account", + "SchemaName": "contact_customer_accounts", + }, + ] + self.client._odata._entity_set_from_schema_name.return_value = "accounts" + + bind = self.client.query.odata_bind("contact", "account", "some-guid") + payload = {"firstname": "Jane", "lastname": "Doe", **bind} + self.assertIn("parentcustomerid_account@odata.bind", payload) + self.assertEqual(payload["firstname"], "Jane") + + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_tables_operations.py b/tests/unit/test_tables_operations.py index 69f57a58..f96fea3c 100644 --- a/tests/unit/test_tables_operations.py +++ b/tests/unit/test_tables_operations.py @@ -398,6 +398,96 @@ def test_delete_alternate_key(self): self.client._odata._delete_alternate_key.assert_called_once_with("new_Product", "key-guid-1") + # -------------------------------------------------------- list_columns + + def test_list_columns(self): + """list_columns() should delegate to _list_columns and return the list.""" + expected = [ + {"LogicalName": "name", "AttributeType": "String"}, + {"LogicalName": "accountid", "AttributeType": "Uniqueidentifier"}, + ] + self.client._odata._list_columns.return_value = expected + + result = self.client.tables.list_columns("account") + + self.client._odata._list_columns.assert_called_once_with("account", select=None, filter=None) + self.assertEqual(result, expected) + + def test_list_columns_with_select_and_filter(self): + """list_columns() should forward select and filter to _list_columns.""" + self.client._odata._list_columns.return_value = [] + + self.client.tables.list_columns( + "account", + select=["LogicalName", "AttributeType"], + filter="AttributeType eq 'String'", + ) + + self.client._odata._list_columns.assert_called_once_with( + "account", + select=["LogicalName", "AttributeType"], + filter="AttributeType eq 'String'", + ) + + # ------------------------------------------------- list_relationships + + def test_list_relationships(self): + """list_relationships() should delegate to _list_relationships and return the list.""" + expected = [ + {"SchemaName": "new_account_orders", "MetadataId": "rel-1"}, + ] + self.client._odata._list_relationships.return_value = expected + + result = self.client.tables.list_relationships() + + self.client._odata._list_relationships.assert_called_once_with(filter=None, select=None) + self.assertEqual(result, expected) + + def test_list_relationships_with_filter_and_select(self): + """list_relationships() should forward filter and select to _list_relationships.""" + self.client._odata._list_relationships.return_value = [] + + self.client.tables.list_relationships( + filter="RelationshipType eq 'OneToManyRelationship'", + select=["SchemaName", "ReferencedEntity"], + ) + + self.client._odata._list_relationships.assert_called_once_with( + filter="RelationshipType eq 'OneToManyRelationship'", + select=["SchemaName", "ReferencedEntity"], + ) + + # --------------------------------------------- list_table_relationships + + def test_list_table_relationships(self): + """list_table_relationships() should delegate to _list_table_relationships.""" + expected = [ + {"SchemaName": "rel_1tm", "MetadataId": "r1"}, + {"SchemaName": "rel_mtm", "MetadataId": "r2"}, + ] + self.client._odata._list_table_relationships.return_value = expected + + result = self.client.tables.list_table_relationships("account") + + self.client._odata._list_table_relationships.assert_called_once_with("account", filter=None, select=None) + self.assertEqual(result, expected) + + def test_list_table_relationships_with_filter_and_select(self): + """list_table_relationships() should forward filter and select.""" + self.client._odata._list_table_relationships.return_value = [] + + self.client.tables.list_table_relationships( + "account", + filter="IsManaged eq false", + select=["SchemaName"], + ) + + self.client._odata._list_table_relationships.assert_called_once_with( + "account", + filter="IsManaged eq false", + select=["SchemaName"], + ) + if __name__ == "__main__": unittest.main()