diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 6141667b42c..7e21b51824f 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -6382,6 +6382,12 @@ filesystem: # Local filesystem storage directory. # CLI flag: -runtime-config.filesystem.dir [dir: | default = ""] + +# [Experimental] Synthetic tenant ID used as fallback for runtime config +# defaults. When set, overrides for this tenant ID apply to all tenants without +# per-tenant overrides, before falling back to CLI flag defaults. +# CLI flag: -runtime-config.default-tenant-id +[default_tenant_id: | default = ""] ``` ### `s3_sse_config` diff --git a/docs/configuration/v1-guarantees.md b/docs/configuration/v1-guarantees.md index e52c65a8a5b..c313c1a7087 100644 --- a/docs/configuration/v1-guarantees.md +++ b/docs/configuration/v1-guarantees.md @@ -133,3 +133,5 @@ Currently experimental features are: - Ingester: Active Series Tracker - Per-tenant `active_series_trackers` configuration in runtime config overrides - Counts active series matching PromQL label matchers and exposes `cortex_ingester_active_series_per_tracker` metric +- Runtime Config: Default Tenant ID + - `-runtime-config.default-tenant-id` (string) - Synthetic tenant ID used as fallback for runtime config defaults. Overrides for this tenant apply to all tenants without per-tenant overrides. diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index c5b71e68a49..623d7430da4 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -206,7 +206,7 @@ func (t *Cortex) initRuntimeConfig() (services.Service, error) { } func (t *Cortex) initOverridesConfig() (services.Service, error) { - t.OverridesConfig = validation.NewOverrides(t.Cfg.LimitsConfig, t.TenantLimits) + t.OverridesConfig = validation.NewOverridesWithDefaultTenantID(t.Cfg.LimitsConfig, t.TenantLimits, t.Cfg.RuntimeConfig.DefaultTenantID) // overrides don't have operational state, nor do they need to do anything more in starting/stopping phase, // so there is no need to return any service. return nil, nil diff --git a/pkg/util/runtimeconfig/manager.go b/pkg/util/runtimeconfig/manager.go index 7479f5cdcac..bf98b651e31 100644 --- a/pkg/util/runtimeconfig/manager.go +++ b/pkg/util/runtimeconfig/manager.go @@ -36,12 +36,19 @@ type Config struct { Loader Loader `yaml:"-"` StorageConfig bucket.Config `yaml:",inline"` + + // DefaultTenantID is the synthetic tenant ID used as a fallback for default + // runtime config values. When set, overrides for this tenant ID are applied + // to all tenants that do not have their own per-tenant override, before + // falling back to CLI flag defaults. This is an experimental feature. + DefaultTenantID string `yaml:"default_tenant_id"` } // RegisterFlags registers flags. func (mc *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&mc.LoadPath, "runtime-config.file", "", "File with the configuration that can be updated in runtime.") f.DurationVar(&mc.ReloadPeriod, "runtime-config.reload-period", 10*time.Second, "How often to check runtime config file.") + f.StringVar(&mc.DefaultTenantID, "runtime-config.default-tenant-id", "", "[Experimental] Synthetic tenant ID used as fallback for runtime config defaults. When set, overrides for this tenant ID apply to all tenants without per-tenant overrides, before falling back to CLI flag defaults.") mc.StorageConfig.RegisterFlagsWithPrefixAndBackend("runtime-config.", f, bucket.Filesystem) } diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index f16eb03548f..e2e421ab4ff 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -690,8 +690,9 @@ type TenantLimits interface { // Overrides periodically fetch a set of per-user overrides, and provides convenience // functions for fetching the correct value. type Overrides struct { - defaultLimits *Limits - tenantLimits TenantLimits + defaultLimits *Limits + tenantLimits TenantLimits + defaultTenantID string } // NewOverrides makes a new Overrides. @@ -702,6 +703,18 @@ func NewOverrides(defaults Limits, tenantLimits TenantLimits) *Overrides { } } +// NewOverridesWithDefaultTenantID creates Overrides with a synthetic default tenant ID. +// When a tenant has no per-tenant override and defaultTenantID is non-empty, +// the overrides for defaultTenantID are used before falling back to CLI flag defaults. +// This is an experimental feature. +func NewOverridesWithDefaultTenantID(defaults Limits, tenantLimits TenantLimits, defaultTenantID string) *Overrides { + return &Overrides{ + tenantLimits: tenantLimits, + defaultLimits: &defaults, + defaultTenantID: defaultTenantID, + } +} + // IngestionRate returns the limit on ingester rate (samples per second). func (o *Overrides) IngestionRate(userID string) float64 { return o.GetOverridesForUser(userID).IngestionRate @@ -1288,6 +1301,12 @@ func (o *Overrides) GetOverridesForUser(userID string) *Limits { if l != nil { return l } + if o.defaultTenantID != "" && userID != o.defaultTenantID { + l = o.tenantLimits.ByUserID(o.defaultTenantID) + if l != nil { + return l + } + } } return o.defaultLimits } diff --git a/pkg/util/validation/limits_test.go b/pkg/util/validation/limits_test.go index 6c5813e80ba..2c95a255ce0 100644 --- a/pkg/util/validation/limits_test.go +++ b/pkg/util/validation/limits_test.go @@ -266,6 +266,81 @@ func TestOverridesManager_GetOverrides(t *testing.T) { require.Equal(t, 0, ov.MaxLabelsSizeBytes("user2")) } +func TestOverrides_DefaultTenantFallback(t *testing.T) { + defaultTenantID := "__default__" + + tenantLimits := map[string]*Limits{} + + cliDefaults := Limits{ + IngestionRate: 10000, + MaxGlobalSeriesPerUser: 100000, + MaxLabelNamesPerSeries: 30, + } + + // Set up default tenant override with higher ingestion rate + defaultTenantOverride := cliDefaults + defaultTenantOverride.IngestionRate = 50000 + defaultTenantOverride.MaxGlobalSeriesPerUser = 500000 + tenantLimits[defaultTenantID] = &defaultTenantOverride + + ov := NewOverridesWithDefaultTenantID(cliDefaults, newMockTenantLimits(tenantLimits), defaultTenantID) + + // Tenant without override should get default tenant values + require.Equal(t, float64(50000), ov.IngestionRate("user1")) + require.Equal(t, 500000, ov.MaxGlobalSeriesPerUser("user1")) + require.Equal(t, 30, ov.MaxLabelNamesPerSeries("user1")) + + // Add a per-tenant override for user2 + user2Limits := cliDefaults + user2Limits.IngestionRate = 200000 + tenantLimits["user2"] = &user2Limits + + // user2 should get its own override, not the default tenant + require.Equal(t, float64(200000), ov.IngestionRate("user2")) + // user2's MaxGlobalSeriesPerUser comes from its own override (which copied cliDefaults) + require.Equal(t, 100000, ov.MaxGlobalSeriesPerUser("user2")) + + // The default tenant itself should get its own limits (not recurse) + require.Equal(t, float64(50000), ov.IngestionRate(defaultTenantID)) + require.Equal(t, 500000, ov.MaxGlobalSeriesPerUser(defaultTenantID)) +} + +func TestOverrides_DefaultTenantDisabled(t *testing.T) { + tenantLimits := map[string]*Limits{} + + cliDefaults := Limits{ + IngestionRate: 10000, + MaxGlobalSeriesPerUser: 100000, + } + + // Default tenant override exists in the map but feature is disabled (empty ID) + defaultOverride := cliDefaults + defaultOverride.IngestionRate = 50000 + tenantLimits["__default__"] = &defaultOverride + + ov := NewOverridesWithDefaultTenantID(cliDefaults, newMockTenantLimits(tenantLimits), "") + + // With empty defaultTenantID, feature is disabled — should get CLI defaults + require.Equal(t, float64(10000), ov.IngestionRate("user1")) + require.Equal(t, 100000, ov.MaxGlobalSeriesPerUser("user1")) +} + +func TestOverrides_DefaultTenantNotInMap(t *testing.T) { + tenantLimits := map[string]*Limits{} + + cliDefaults := Limits{ + IngestionRate: 10000, + MaxGlobalSeriesPerUser: 100000, + } + + // Feature enabled but no __default__ entry in runtime config + ov := NewOverridesWithDefaultTenantID(cliDefaults, newMockTenantLimits(tenantLimits), "__default__") + + // Should fall through to CLI defaults + require.Equal(t, float64(10000), ov.IngestionRate("user1")) + require.Equal(t, 100000, ov.MaxGlobalSeriesPerUser("user1")) +} + func TestLimitsLoadingFromYaml(t *testing.T) { SetDefaultLimitsForYAMLUnmarshalling(Limits{ MaxLabelNameLength: 100, diff --git a/schemas/cortex-config-schema.json b/schemas/cortex-config-schema.json index 915c689d158..aa40e36734d 100644 --- a/schemas/cortex-config-schema.json +++ b/schemas/cortex-config-schema.json @@ -7810,6 +7810,11 @@ "type": "string", "x-cli-flag": "runtime-config.backend" }, + "default_tenant_id": { + "description": "[Experimental] Synthetic tenant ID used as fallback for runtime config defaults. When set, overrides for this tenant ID apply to all tenants without per-tenant overrides, before falling back to CLI flag defaults.", + "type": "string", + "x-cli-flag": "runtime-config.default-tenant-id" + }, "file": { "description": "File with the configuration that can be updated in runtime.", "type": "string",