diff --git a/justfile b/justfile index 2a687966..74c9d5aa 100644 --- a/justfile +++ b/justfile @@ -260,7 +260,7 @@ test: # Run unit tests only [group('test')] test-unit: - go test -v -p 1 . ./configresolve/... ./duckdbservice/... ./server/... ./transpiler/... ./internal/... + go test -v -p 1 . ./configresolve/... ./duckdbservice/... ./server/... ./transpiler/... ./internal/... ./tests/manifests/... # Run cache-proxy tests [group('test')] @@ -286,7 +286,7 @@ test-configstore-integration: # Run Kubernetes-only control plane package tests [group('test')] test-controlplane-k8s: - go test -v -count=1 -tags kubernetes ./controlplane ./controlplane/admin ./controlplane/provisioner + go test -v -count=1 -tags kubernetes . ./controlplane ./controlplane/admin ./controlplane/provisioner # Print the test impact plan for the current branch [group('test')] diff --git a/k8s/README.md b/k8s/README.md index dcc154f5..6038599c 100644 --- a/k8s/README.md +++ b/k8s/README.md @@ -43,13 +43,16 @@ The control plane handles TLS, authentication, PostgreSQL wire protocol, and SQL | File | Description | |------|-------------| | `namespace.yaml` | `duckgres` namespace | -| `rbac.yaml` | Control-plane and shared worker ServiceAccounts, Role (pods + secrets), RoleBinding | +| `rbac.yaml` | Control-plane, shared worker, and query-log writer ServiceAccounts plus required Roles/RoleBindings | | `configmap.yaml` | Shared duckgres config (users, extensions, data dir) | | `secret.yaml` | Bearer token secret (auto-populated by CP if empty) | | `managed-warehouse-secrets.yaml` | Local secret payloads referenced by the seeded managed-warehouse contract | | `worker-identity.yaml` | Local worker ServiceAccount referenced by the seeded managed-warehouse contract | | `networkpolicy.yaml` | Restricts worker ingress to CP pods only | | `control-plane-multitenant-local.yaml` | Optional OrbStack-oriented shared-worker control-plane manifest | +| `query-log-kafka-config.example.yaml` | Example ConfigMap for enabling Kafka query-log producer and writer settings | +| `query-log-writer.yaml` | Disabled-by-default query-log writer Deployment and metrics Service | +| `query-log-writer-alerts.example.yaml` | Example PrometheusRule alerts for writer failures, drops, and high retry volume | | `kind/config-store.overlay.yaml` | Compose overlay that attaches local dependency containers to the external Docker `kind` network | | `kind/config-store.seed.sql` | Kind-oriented managed-warehouse seed for the shared-worker flow | | `kind/control-plane.yaml` | Kind-first shared-worker control-plane manifest used by local dev and CI | @@ -90,6 +93,73 @@ For seamless planned deployments, use a rolling strategy with overlap and enough That gives the old replica time to fail readiness, stop taking new pgwire sessions, keep existing pgwire and Flight sessions alive during the drain window, and then force shutdown at the timeout boundary if sessions remain. +## Query Log Kafka Writer + +By default, query logging writes directly to each tenant's DuckLake-backed +`ducklake.system.query_log`. To route query logs through Kafka instead, deploy +the producer config and query-log writer: + +```bash +cp k8s/query-log-kafka-config.example.yaml /tmp/duckgres-query-log-kafka.yaml +# edit brokers, topic, config_store, aws_region, and k8s_worker_namespace +kubectl apply -f k8s/namespace.yaml +kubectl apply -f k8s/rbac.yaml +kubectl apply -f k8s/networkpolicy.yaml +kubectl apply -f /tmp/duckgres-query-log-kafka.yaml +kubectl apply -f k8s/query-log-writer.yaml +# apply or upgrade the relevant control-plane manifest/chart so the +# DUCKGRES_QUERY_LOG_* env refs exist before restarting control-plane pods +kubectl apply -f k8s/kind/control-plane.yaml +kubectl -n duckgres scale deploy/duckgres-query-log-writer --replicas=1 +kubectl -n duckgres rollout restart deploy/duckgres-control-plane +``` + +The control-plane manifests already include optional `DUCKGRES_QUERY_LOG_*` +environment variables from the `duckgres-query-log-kafka` ConfigMap. When the +ConfigMap is absent, direct DuckLake logging remains unchanged. When present +with `sink: kafka`, control-plane pods publish query-log events to Kafka and the +writer consumes the configured topic using `group_id`. The writer Deployment +requires `config_store`, `brokers`, and `topic` from the ConfigMap so missing +runtime config fails at pod start instead of retrying events without a tenant +target. + +Multiple writer replicas can use the same `group_id`; Kafka partition ownership +keeps each partition assigned to one active consumer in the group. Scale replicas +up to the topic partition count when throughput requires it. + +The writer is a privileged infrastructure service. It never executes logged SQL; +it resolves the target tenant by `org_id`, attaches that tenant's DuckLake, and +inserts generated rows into `ducklake.system.query_log`. The reference RBAC +grants read-only Duckling CR access plus `get` on Secrets in the `duckgres` +namespace. If managed-warehouse SecretRefs point to other namespaces, grant the +same get-only Secret access for those namespaces. Kubernetes RBAC does not grant +cloud credentials: when `aws_region` is set and Duckling-backed tenants require +STS, bind the `duckgres-query-log-writer` ServiceAccount to an IAM/Pod +Identity/IRSA role with the same STS permissions needed to resolve tenant object +store credentials. + +Rollback: + +```bash +kubectl -n duckgres scale deploy/duckgres-query-log-writer --replicas=0 +kubectl -n duckgres delete configmap duckgres-query-log-kafka +kubectl -n duckgres rollout restart deploy/duckgres-control-plane +``` + +Useful checks: + +```bash +kubectl -n duckgres get deploy duckgres-query-log-writer +kubectl -n duckgres logs deploy/duckgres-query-log-writer --tail=200 +kubectl -n duckgres port-forward svc/duckgres-query-log-writer-metrics 9090:9090 +curl -s localhost:9090/metrics | rg 'duckgres_query_log_kafka_writer' +``` + +If your cluster runs the Prometheus Operator, adapt and apply +`k8s/query-log-writer-alerts.example.yaml`. Kafka consumer lag usually comes +from your Kafka exporter rather than Duckgres itself, so wire a lag alert from +that metric source alongside these writer-process alerts. + ## Local Development with kind The primary shared-worker workflow now uses [`kind`](https://kind.sigs.k8s.io/). Prerequisites: Docker, `kubectl`, `kind`, and `just`. diff --git a/k8s/control-plane-multitenant-local.yaml b/k8s/control-plane-multitenant-local.yaml index c0d939df..90fa9691 100644 --- a/k8s/control-plane-multitenant-local.yaml +++ b/k8s/control-plane-multitenant-local.yaml @@ -42,6 +42,30 @@ spec: value: "500m" - name: DUCKGRES_K8S_WORKER_MEMORY_REQUEST value: "512Mi" + - name: DUCKGRES_QUERY_LOG_SINK + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: sink + optional: true + - name: DUCKGRES_QUERY_LOG_KAFKA_BROKERS + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: brokers + optional: true + - name: DUCKGRES_QUERY_LOG_KAFKA_TOPIC + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: topic + optional: true + - name: DUCKGRES_QUERY_LOG_KAFKA_CLIENT_ID + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: client_id + optional: true args: - "--mode" - "control-plane" diff --git a/k8s/kind/control-plane.yaml b/k8s/kind/control-plane.yaml index e2326e41..49b403cd 100644 --- a/k8s/kind/control-plane.yaml +++ b/k8s/kind/control-plane.yaml @@ -42,6 +42,30 @@ spec: value: "500m" - name: DUCKGRES_K8S_WORKER_MEMORY_REQUEST value: "512Mi" + - name: DUCKGRES_QUERY_LOG_SINK + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: sink + optional: true + - name: DUCKGRES_QUERY_LOG_KAFKA_BROKERS + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: brokers + optional: true + - name: DUCKGRES_QUERY_LOG_KAFKA_TOPIC + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: topic + optional: true + - name: DUCKGRES_QUERY_LOG_KAFKA_CLIENT_ID + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: client_id + optional: true args: - "--mode" - "control-plane" diff --git a/k8s/namespace.yaml b/k8s/namespace.yaml index b4ed35ac..28b970c0 100644 --- a/k8s/namespace.yaml +++ b/k8s/namespace.yaml @@ -2,3 +2,8 @@ apiVersion: v1 kind: Namespace metadata: name: duckgres +--- +apiVersion: v1 +kind: Namespace +metadata: + name: ducklings diff --git a/k8s/networkpolicy.yaml b/k8s/networkpolicy.yaml index c58c844e..fc8fa579 100644 --- a/k8s/networkpolicy.yaml +++ b/k8s/networkpolicy.yaml @@ -77,5 +77,49 @@ spec: protocol: TCP - port: 5432 protocol: TCP + # Kafka query-log producer sink. Adjust or extend in production when + # brokers listen on different ports. + - port: 9092 + protocol: TCP + - port: 9093 + protocol: TCP + - port: 9094 + protocol: TCP - port: 8816 protocol: TCP +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: duckgres-query-log-writer-boundaries + namespace: duckgres +spec: + podSelector: + matchLabels: + app: duckgres-query-log-writer + policyTypes: + - Egress + egress: + - ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + # Kubernetes API, STS, object stores, and extension bootstrap. + - port: 80 + protocol: TCP + - port: 443 + protocol: TCP + # Tenant DuckLake metadata stores. + - port: 5432 + protocol: TCP + # Local MinIO/object-store development endpoint. + - port: 9000 + protocol: TCP + # Kafka query-log consumer. + - port: 9092 + protocol: TCP + - port: 9093 + protocol: TCP + - port: 9094 + protocol: TCP diff --git a/k8s/query-log-kafka-config.example.yaml b/k8s/query-log-kafka-config.example.yaml new file mode 100644 index 00000000..afc86e16 --- /dev/null +++ b/k8s/query-log-kafka-config.example.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: duckgres-query-log-kafka + namespace: duckgres +data: + # Set sink=kafka on control-plane pods to publish query-log events instead of + # writing directly to DuckLake. The query-log-writer consumes the same topic + # and writes events into each tenant's ducklake.system.query_log table. + sink: kafka + brokers: "kafka:9092" + topic: "duckgres_query_log" + client_id: "duckgres-query-log" + group_id: "duckgres-query-log-writer" + + # Query-log writer runtime. Use the same config store as the control plane. + config_store: "postgres://duckgres:duckgres@duckgres-config-store:5432/duckgres_config?sslmode=disable" + aws_region: "" + k8s_worker_namespace: "duckgres" diff --git a/k8s/query-log-writer-alerts.example.yaml b/k8s/query-log-writer-alerts.example.yaml new file mode 100644 index 00000000..c4c8a3cb --- /dev/null +++ b/k8s/query-log-writer-alerts.example.yaml @@ -0,0 +1,33 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: duckgres-query-log-writer + namespace: duckgres +spec: + groups: + - name: duckgres-query-log-writer + rules: + - alert: DuckgresQueryLogWriterFailures + expr: increase(duckgres_query_log_kafka_writer_events_total{outcome="failed"}[10m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: Duckgres query-log writer is failing to process events + description: Query-log writer failures usually mean Kafka, tenant DuckLake attach, or query_log inserts are failing. Check writer logs before offsets pile up. + - alert: DuckgresQueryLogWriterDrops + expr: increase(duckgres_query_log_kafka_writer_events_total{outcome="dropped"}[10m]) > 0 + for: 5m + labels: + severity: warning + annotations: + summary: Duckgres query-log writer is dropping events + description: Dropped query-log events are intentionally committed invalid or non-targetable events. Check writer logs for the drop reason. + - alert: DuckgresQueryLogWriterRetriesHigh + expr: increase(duckgres_query_log_kafka_writer_events_total{outcome="retried"}[10m]) > 100 + for: 10m + labels: + severity: warning + annotations: + summary: Duckgres query-log writer retry rate is high + description: The writer is retrying Kafka events repeatedly. Check Kafka commit errors, DuckLake attach errors, and tenant metadata-store connectivity. diff --git a/k8s/query-log-writer.yaml b/k8s/query-log-writer.yaml new file mode 100644 index 00000000..0f904f4b --- /dev/null +++ b/k8s/query-log-writer.yaml @@ -0,0 +1,126 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: duckgres-query-log-writer + namespace: duckgres + labels: + app: duckgres-query-log-writer +spec: + # Disabled by default for the reference manifests. Apply + # query-log-kafka-config.example.yaml with real values, then scale this up. + replicas: 0 + selector: + matchLabels: + app: duckgres-query-log-writer + template: + metadata: + labels: + app: duckgres-query-log-writer + annotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics + prometheus.io/port: "9090" + spec: + serviceAccountName: duckgres-query-log-writer + securityContext: + runAsNonRoot: true + runAsUser: 1000 + containers: + - name: query-log-writer + image: duckgres:test + imagePullPolicy: IfNotPresent + args: + - "--mode" + - "query-log-writer" + - "--config" + - "/etc/duckgres/duckgres.yaml" + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: DUCKGRES_CONFIG_STORE + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: config_store + - name: DUCKGRES_QUERY_LOG_KAFKA_BROKERS + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: brokers + - name: DUCKGRES_QUERY_LOG_KAFKA_TOPIC + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: topic + - name: DUCKGRES_QUERY_LOG_KAFKA_CLIENT_ID + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: client_id + optional: true + - name: DUCKGRES_QUERY_LOG_KAFKA_GROUP_ID + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: group_id + optional: true + - name: DUCKGRES_AWS_REGION + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: aws_region + optional: true + - name: DUCKGRES_K8S_WORKER_NAMESPACE + valueFrom: + configMapKeyRef: + name: duckgres-query-log-kafka + key: k8s_worker_namespace + optional: true + ports: + - name: metrics + containerPort: 9090 + protocol: TCP + volumeMounts: + - name: config + mountPath: /etc/duckgres + readOnly: true + - name: data + mountPath: /data + securityContext: + allowPrivilegeEscalation: false + resources: + requests: + cpu: "100m" + memory: "256Mi" + limits: + cpu: "1" + memory: "1Gi" + volumes: + - name: config + configMap: + name: duckgres-config + - name: data + emptyDir: {} +--- +apiVersion: v1 +kind: Service +metadata: + name: duckgres-query-log-writer-metrics + namespace: duckgres + labels: + app: duckgres-query-log-writer +spec: + type: ClusterIP + ports: + - name: metrics + port: 9090 + targetPort: metrics + protocol: TCP + selector: + app: duckgres-query-log-writer diff --git a/k8s/rbac.yaml b/k8s/rbac.yaml index e3959815..860d9e25 100644 --- a/k8s/rbac.yaml +++ b/k8s/rbac.yaml @@ -11,6 +11,12 @@ metadata: namespace: duckgres automountServiceAccountToken: false --- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: duckgres-query-log-writer + namespace: duckgres +--- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: @@ -47,3 +53,56 @@ roleRef: kind: Role name: duckgres-control-plane apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: duckgres-query-log-writer-runtime + namespace: duckgres +rules: + # Resolve SecretRef-backed metadata-store and object-store credentials for + # tenant DuckLake attachments. Grant equivalent get-only Roles in any + # additional namespaces referenced by managed-warehouse SecretRefs. + - apiGroups: [""] + resources: ["secrets"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: duckgres-query-log-writer-runtime + namespace: duckgres +subjects: + - kind: ServiceAccount + name: duckgres-query-log-writer + namespace: duckgres +roleRef: + kind: Role + name: duckgres-query-log-writer-runtime + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: duckgres-query-log-writer-ducklings + namespace: ducklings +rules: + # Read Duckling status to reuse the same tenant activation target and + # temporary object-store credential resolution as workers. + - apiGroups: ["k8s.posthog.com"] + resources: ["ducklings"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: duckgres-query-log-writer-ducklings + namespace: ducklings +subjects: + - kind: ServiceAccount + name: duckgres-query-log-writer + namespace: duckgres +roleRef: + kind: Role + name: duckgres-query-log-writer-ducklings + apiGroup: rbac.authorization.k8s.io diff --git a/querylog_writer_mode_kubernetes.go b/querylog_writer_mode_kubernetes.go index 3a199555..51b788e2 100644 --- a/querylog_writer_mode_kubernetes.go +++ b/querylog_writer_mode_kubernetes.go @@ -28,6 +28,8 @@ type staticQueryLogDuckLakeConfigResolver struct { cfg server.Config } +var queryLogWriterBootstrapBundledExtensions = server.BootstrapBundledExtensions + func (r staticQueryLogDuckLakeConfigResolver) ResolveQueryLogDuckLakeConfig(context.Context, string) (server.QueryLogDuckLakeResolvedConfig, error) { if r.cfg.DuckLake.MetadataStore == "" { return server.QueryLogDuckLakeResolvedConfig{}, errors.New("querylog: static DuckLake metadata store is required") @@ -83,6 +85,9 @@ func runQueryLogWriter(ctx context.Context, cfg server.Config, resolved configre if !cfg.QueryLog.Enabled { return errors.New("querylog: query log is disabled") } + if err := queryLogWriterBootstrapBundledExtensions(cfg.DataDir); err != nil { + return fmt.Errorf("querylog: bootstrap bundled DuckDB extensions: %w", err) + } metricsSrv := cliboot.InitMetrics() defer shutdownQueryLogWriterMetrics(metricsSrv) diff --git a/querylog_writer_mode_kubernetes_test.go b/querylog_writer_mode_kubernetes_test.go new file mode 100644 index 00000000..22d10e0f --- /dev/null +++ b/querylog_writer_mode_kubernetes_test.go @@ -0,0 +1,45 @@ +//go:build kubernetes + +package main + +import ( + "context" + "errors" + "testing" + + "github.com/posthog/duckgres/configresolve" + "github.com/posthog/duckgres/server" +) + +func TestRunQueryLogWriterBootstrapsBundledExtensions(t *testing.T) { + prevBootstrap := queryLogWriterBootstrapBundledExtensions + t.Cleanup(func() { + queryLogWriterBootstrapBundledExtensions = prevBootstrap + }) + + bootstrapErr := errors.New("bootstrap failed") + var gotDataDir string + queryLogWriterBootstrapBundledExtensions = func(dataDir string) error { + gotDataDir = dataDir + return bootstrapErr + } + + cfg := server.Config{ + DataDir: "/tmp/query-log-writer-test-data", + QueryLog: server.QueryLogConfig{ + Enabled: true, + Kafka: server.QueryLogKafkaConfig{ + Brokers: []string{"localhost:9092"}, + Topic: "query-log", + }, + }, + } + + err := runQueryLogWriter(context.Background(), cfg, configresolve.Resolved{}) + if !errors.Is(err, bootstrapErr) { + t.Fatalf("expected bootstrap error, got %v", err) + } + if gotDataDir != cfg.DataDir { + t.Fatalf("bootstrap data dir = %q, want %q", gotDataDir, cfg.DataDir) + } +} diff --git a/tests/e2e-mw-dev/manifests.tmpl.yaml b/tests/e2e-mw-dev/manifests.tmpl.yaml index 1c739436..cc825993 100644 --- a/tests/e2e-mw-dev/manifests.tmpl.yaml +++ b/tests/e2e-mw-dev/manifests.tmpl.yaml @@ -285,6 +285,17 @@ spec: # settle and two org-default-profile propagation waits — which drop # from 40s each to CONFIG_POLL_SETTLE (12s) on the back of it. - { name: DUCKGRES_CONFIG_POLL_INTERVAL, value: "5s" } + # Optional query-log Kafka producer wiring. The ConfigMap is not + # created by default, so the harness keeps direct DuckLake query-log + # writes unless a follow-up e2e path explicitly enables Kafka. + - name: DUCKGRES_QUERY_LOG_SINK + valueFrom: { configMapKeyRef: { name: duckgres-query-log-kafka, key: sink, optional: true } } + - name: DUCKGRES_QUERY_LOG_KAFKA_BROKERS + valueFrom: { configMapKeyRef: { name: duckgres-query-log-kafka, key: brokers, optional: true } } + - name: DUCKGRES_QUERY_LOG_KAFKA_TOPIC + valueFrom: { configMapKeyRef: { name: duckgres-query-log-kafka, key: topic, optional: true } } + - name: DUCKGRES_QUERY_LOG_KAFKA_CLIENT_ID + valueFrom: { configMapKeyRef: { name: duckgres-query-log-kafka, key: client_id, optional: true } } - name: DUCKGRES_INTERNAL_SECRET valueFrom: { secretKeyRef: { name: duckgres-tokens, key: internal-secret } } # Previous internal secrets still accepted during rotation; the diff --git a/tests/manifests/manifests_test.go b/tests/manifests/manifests_test.go index e90e8749..e7f36d1b 100644 --- a/tests/manifests/manifests_test.go +++ b/tests/manifests/manifests_test.go @@ -55,6 +55,18 @@ func TestControlPlaneRBACDefinesSharedWorkerServiceAccount(t *testing.T) { } } +func TestNamespaceManifestIncludesDucklingsNamespace(t *testing.T) { + content := readManifest(t, "k8s", "namespace.yaml") + for _, want := range []string{ + "name: duckgres", + "name: ducklings", + } { + if !strings.Contains(content, want) { + t.Fatalf("expected %q in k8s/namespace.yaml", want) + } + } +} + func TestNetworkPolicyAllowsControlPlaneToReachWorkerGRPC(t *testing.T) { manifest := readManifest(t, "k8s", "networkpolicy.yaml") @@ -81,6 +93,152 @@ func TestNetworkPolicyAllowsControlPlaneToReachWorkerGRPC(t *testing.T) { } } +func TestQueryLogWriterManifestDefinesDeployment(t *testing.T) { + content := readManifest(t, "k8s", "query-log-writer.yaml") + for _, want := range []string{ + "kind: Deployment", + "name: duckgres-query-log-writer", + "serviceAccountName: duckgres-query-log-writer", + "- \"--mode\"", + "- \"query-log-writer\"", + "name: metrics", + "containerPort: 9090", + "name: DUCKGRES_CONFIG_STORE", + "name: DUCKGRES_QUERY_LOG_KAFKA_BROKERS", + "name: DUCKGRES_QUERY_LOG_KAFKA_TOPIC", + "name: DUCKGRES_QUERY_LOG_KAFKA_GROUP_ID", + "name: config", + "mountPath: /etc/duckgres", + "name: data", + "mountPath: /data", + } { + if !strings.Contains(content, want) { + t.Fatalf("expected %q in k8s/query-log-writer.yaml", want) + } + } +} + +func TestQueryLogWriterManifestRequiresRuntimeConfig(t *testing.T) { + content := readManifest(t, "k8s", "query-log-writer.yaml") + for _, name := range []string{ + "DUCKGRES_CONFIG_STORE", + "DUCKGRES_QUERY_LOG_KAFKA_BROKERS", + "DUCKGRES_QUERY_LOG_KAFKA_TOPIC", + } { + block := envBlock(t, content, name) + if strings.Contains(block, "optional: true") { + t.Fatalf("expected %s to be required in k8s/query-log-writer.yaml", name) + } + } +} + +func TestControlPlaneManifestsExposeOptionalKafkaQueryLogEnv(t *testing.T) { + for _, parts := range [][]string{ + {"k8s", "control-plane-multitenant-local.yaml"}, + {"k8s", "kind", "control-plane.yaml"}, + } { + content := readManifest(t, parts...) + for _, want := range []string{ + "name: DUCKGRES_QUERY_LOG_SINK", + "name: DUCKGRES_QUERY_LOG_KAFKA_BROKERS", + "name: DUCKGRES_QUERY_LOG_KAFKA_TOPIC", + "name: DUCKGRES_QUERY_LOG_KAFKA_CLIENT_ID", + "optional: true", + } { + if !strings.Contains(content, want) { + t.Fatalf("expected %q in %s", want, filepath.Join(parts...)) + } + } + } +} + +func TestRBACIncludesQueryLogWriterAccess(t *testing.T) { + content := readManifest(t, "k8s", "rbac.yaml") + for _, want := range []string{ + "name: duckgres-query-log-writer", + "name: duckgres-query-log-writer-runtime", + `resources: ["secrets"]`, + `verbs: ["get"]`, + "name: duckgres-query-log-writer-ducklings", + "namespace: ducklings", + `apiGroups: ["k8s.posthog.com"]`, + `resources: ["ducklings"]`, + `verbs: ["get"]`, + } { + if !strings.Contains(content, want) { + t.Fatalf("expected %q in k8s/rbac.yaml", want) + } + } + if strings.Contains(content, "kind: ClusterRole\nmetadata:\n name: duckgres-query-log-writer-ducklings") || + strings.Contains(content, "kind: ClusterRoleBinding\nmetadata:\n name: duckgres-query-log-writer-ducklings") { + t.Fatal("expected query-log writer Duckling access to be namespace-scoped") + } +} + +func TestNetworkPolicyAllowsQueryLogKafkaTraffic(t *testing.T) { + manifest := readManifest(t, "k8s", "networkpolicy.yaml") + + var controlPlaneDoc string + var writerDoc string + for _, doc := range strings.Split(manifest, "---") { + switch { + case strings.Contains(doc, "name: duckgres-control-plane-boundaries"): + controlPlaneDoc = doc + case strings.Contains(doc, "name: duckgres-query-log-writer-boundaries"): + writerDoc = doc + } + } + if controlPlaneDoc == "" { + t.Fatal("could not find control-plane network policy in k8s/networkpolicy.yaml") + } + if writerDoc == "" { + t.Fatal("could not find query-log writer network policy in k8s/networkpolicy.yaml") + } + for _, doc := range []struct { + name string + body string + }{ + {name: "control-plane", body: controlPlaneDoc}, + {name: "query-log-writer", body: writerDoc}, + } { + for _, want := range []string{ + "- port: 9092", + "protocol: TCP", + } { + if !strings.Contains(doc.body, want) { + t.Fatalf("expected %q in %s network policy", want, doc.name) + } + } + } + if !strings.Contains(writerDoc, "- port: 80") { + t.Fatal("expected query-log writer network policy to allow HTTP extension bootstrap/object-store egress") + } +} + +func TestQueryLogWriterAlertsReferenceWriterMetrics(t *testing.T) { + content := readManifest(t, "k8s", "query-log-writer-alerts.example.yaml") + for _, want := range []string{ + "kind: PrometheusRule", + "DuckgresQueryLogWriterFailures", + "DuckgresQueryLogWriterDrops", + "DuckgresQueryLogWriterRetriesHigh", + `duckgres_query_log_kafka_writer_events_total{outcome="failed"}`, + `duckgres_query_log_kafka_writer_events_total{outcome="dropped"}`, + `duckgres_query_log_kafka_writer_events_total{outcome="retried"}`, + } { + if !strings.Contains(content, want) { + t.Fatalf("expected %q in k8s/query-log-writer-alerts.example.yaml", want) + } + } +} + +func TestManifestTestsRunInUnitRecipe(t *testing.T) { + content := readManifest(t, "justfile") + if !strings.Contains(content, "./tests/manifests/...") { + t.Fatal("expected just test-unit to include ./tests/manifests/...") + } +} + // readManifest reads a file relative to the project root (located by walking up // to the go.mod), matching how the manifests ship in the repo. func readManifest(t *testing.T, parts ...string) string { @@ -110,3 +268,16 @@ func projectRoot(t *testing.T) string { dir = parent } } + +func envBlock(t *testing.T, content, name string) string { + t.Helper() + start := strings.Index(content, "name: "+name) + if start == -1 { + t.Fatalf("could not find env var %s", name) + } + rest := content[start:] + if next := strings.Index(rest[len("name: "+name):], "\n - name: "); next != -1 { + return rest[:len("name: "+name)+next] + } + return rest +}