diff --git a/PROJECT b/PROJECT index 30fabe51c..2d2ba3573 100644 --- a/PROJECT +++ b/PROJECT @@ -23,4 +23,12 @@ resources: kind: NVIDIADriver path: github.com/NVIDIA/gpu-operator/api/v1alpha1 version: v1alpha1 +- api: + crdVersion: v1 + controller: true + domain: com + group: nvidia + kind: GPUClusterConfig + path: github.com/NVIDIA/gpu-operator/api/v1alpha1 + version: v1alpha1 version: "3" diff --git a/api/nvidia/v1alpha1/gpuclusterconfig_types.go b/api/nvidia/v1alpha1/gpuclusterconfig_types.go new file mode 100644 index 000000000..bff2887e0 --- /dev/null +++ b/api/nvidia/v1alpha1/gpuclusterconfig_types.go @@ -0,0 +1,238 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package v1alpha1 + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" +) + +const ( + GPUClusterConfigCRDName = "GPUClusterConfig" +) + +const ( + // Ignored marks a duplicate GPUClusterConfig that the singleton controller does not reconcile. + Ignored State = "ignored" +) + +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// GPUClusterConfigSpec defines the desired state of GPUClusterConfig, the DRA-based +// software-enablement stack. Unlike ClusterPolicy, it does not manage the NVIDIA driver +// or the device-plugin; the driver is installed separately (host-installed or via an +// NVIDIADriver CR) and GPUClusterConfig waits for driver readiness before proceeding. +type GPUClusterConfigSpec struct { + // DRADriver defines the spec for the NVIDIA DRA driver stack (gpus + computeDomains). + DRADriver DRADriverSpec `json:"draDriver"` + + // DCGM defines the spec for the standalone NVIDIA DCGM hostengine. Disabled by default; + // when disabled, dcgm-exporter uses its embedded nv-hostengine. NOTE: the reused enabled + // field carries no server-side default and its IsEnabled() treats nil as enabled, so the + // controller must default nil enabled to disabled here. + DCGM *nvidiav1.DCGMSpec `json:"dcgm,omitempty"` + + // DCGMExporter defines the spec for NVIDIA DCGM Exporter. Enabled by default, but the + // reused enabled field carries no server-side default; the controller defaults nil enabled. + DCGMExporter *nvidiav1.DCGMExporterSpec `json:"dcgmExporter,omitempty"` + + // GFD defines the spec for the standalone GPU Feature Discovery operand. Enabled by default, + // but the reused enabled field carries no server-side default; the controller defaults nil enabled. + GFD *nvidiav1.GPUFeatureDiscoverySpec `json:"gfd,omitempty"` + + // HostPaths defines the host paths used in host-path volumes for various components. + HostPaths nvidiav1.HostPathsSpec `json:"hostPaths,omitempty"` + + // Daemonsets defines the common configuration applied to all DaemonSets deployed + // by the GPUClusterConfig controller. + Daemonsets nvidiav1.DaemonsetsSpec `json:"daemonsets,omitempty"` +} + +// DRADriverSpec defines the spec for the NVIDIA DRA driver stack. There is no top-level +// enabled toggle; enablement is per capability (gpus / computeDomains). +type DRADriverSpec struct { + // NVIDIA DRA driver image repository + // +kubebuilder:validation:Optional + Repository string `json:"repository,omitempty"` + + // NVIDIA DRA driver image name + // +kubebuilder:validation:Pattern=[a-zA-Z0-9\-]+ + Image string `json:"image,omitempty"` + + // NVIDIA DRA driver image tag + // +kubebuilder:validation:Optional + Version string `json:"version,omitempty"` + + // Image pull policy + // +kubebuilder:validation:Optional + ImagePullPolicy string `json:"imagePullPolicy,omitempty"` + + // Image pull secrets + // +kubebuilder:validation:Optional + ImagePullSecrets []string `json:"imagePullSecrets,omitempty"` + + // FeatureGates is a map of feature gate names to a boolean enabling or disabling each. + // It is rendered as the FEATURE_GATES environment variable on the DRA driver containers. + // +kubebuilder:validation:Optional + FeatureGates map[string]bool `json:"featureGates,omitempty"` + + // GPUs configures the gpu.nvidia.com capability of the DRA driver. + GPUs DRADriverGPUsSpec `json:"gpus,omitempty"` + + // ComputeDomains configures the compute-domain capability of the DRA driver. + ComputeDomains DRADriverComputeDomainsSpec `json:"computeDomains,omitempty"` +} + +// IsGPUsEnabled returns true if the gpus capability of the DRA driver is enabled. +func (d *DRADriverSpec) IsGPUsEnabled() bool { + return d.GPUs.Enabled != nil && *d.GPUs.Enabled +} + +// IsComputeDomainsEnabled returns true if the computeDomains capability of the DRA driver is enabled. +func (d *DRADriverSpec) IsComputeDomainsEnabled() bool { + return d.ComputeDomains.Enabled != nil && *d.ComputeDomains.Enabled +} + +// DRADriverGPUsSpec configures the gpus capability of the DRA driver. It maps onto the +// gpus container of the upstream kubelet-plugin DaemonSet. +type DRADriverGPUsSpec struct { + // Enabled indicates if the gpus capability of the DRA driver is enabled. + // +kubebuilder:default=true + Enabled *bool `json:"enabled,omitempty"` + + // KubeletPlugin configures the kubelet-plugin workload for the gpus capability. + KubeletPlugin DRADriverKubeletPluginSpec `json:"kubeletPlugin,omitempty"` +} + +// DRADriverComputeDomainsSpec configures the computeDomains capability of the DRA driver. +// The kubeletPlugin maps onto the computeDomains container of the upstream kubelet-plugin +// DaemonSet; the controller is a separate Deployment. +type DRADriverComputeDomainsSpec struct { + // Enabled indicates if the computeDomains capability of the DRA driver is enabled. + // +kubebuilder:default=true + Enabled *bool `json:"enabled,omitempty"` + + // Controller configures the compute-domain controller Deployment. + Controller DRADriverControllerSpec `json:"controller,omitempty"` + + // KubeletPlugin configures the kubelet-plugin workload for the computeDomains capability. + KubeletPlugin DRADriverKubeletPluginSpec `json:"kubeletPlugin,omitempty"` +} + +// DRADriverKubeletPluginSpec defines configuration for a DRA driver kubelet-plugin container. +// Per-component scheduling fields augment/override the shared daemonsets defaults for this +// workload. The gpus and computeDomains kubelet-plugin blocks map onto the two containers of +// a single kubelet-plugin DaemonSet, so the renderer reconciles pod-level scheduling when +// both blocks set it. +type DRADriverKubeletPluginSpec struct { + // Optional: List of environment variables + // +kubebuilder:validation:Optional + Env []nvidiav1.EnvVar `json:"env,omitempty"` + + // Optional: Define resources requests and limits for the kubelet-plugin container + // +kubebuilder:validation:Optional + Resources *nvidiav1.ResourceRequirements `json:"resources,omitempty"` + + // HealthcheckPort is the port running a gRPC health service checked by a livenessProbe. + // Set to a negative value to disable the service and the probe. + // +kubebuilder:validation:Optional + HealthcheckPort *int32 `json:"healthcheckPort,omitempty"` + + // +kubebuilder:validation:Optional + // PriorityClassName for the kubelet-plugin DaemonSet pods + PriorityClassName string `json:"priorityClassName,omitempty"` + + // +kubebuilder:validation:Optional + // NodeSelector for the kubelet-plugin DaemonSet pods + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + + // +kubebuilder:validation:Optional + // Tolerations for the kubelet-plugin DaemonSet pods + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + + // +kubebuilder:validation:Optional + // Affinity rules for the kubelet-plugin DaemonSet pods + Affinity *corev1.Affinity `json:"affinity,omitempty"` +} + +// DRADriverControllerSpec defines configuration for the compute-domain controller Deployment. +// As a Deployment (not a DaemonSet) it carries its own scheduling configuration rather than +// inheriting the shared daemonsets defaults. +type DRADriverControllerSpec struct { + // Optional: List of environment variables + // +kubebuilder:validation:Optional + Env []nvidiav1.EnvVar `json:"env,omitempty"` + + // Optional: Define resources requests and limits for the controller container + // +kubebuilder:validation:Optional + Resources *nvidiav1.ResourceRequirements `json:"resources,omitempty"` + + // +kubebuilder:validation:Optional + // PriorityClassName for the controller Deployment pods + PriorityClassName string `json:"priorityClassName,omitempty"` + + // +kubebuilder:validation:Optional + // NodeSelector for the controller Deployment pods + NodeSelector map[string]string `json:"nodeSelector,omitempty"` + + // +kubebuilder:validation:Optional + // Tolerations for the controller Deployment pods + Tolerations []corev1.Toleration `json:"tolerations,omitempty"` + + // +kubebuilder:validation:Optional + // Affinity rules for the controller Deployment pods + Affinity *corev1.Affinity `json:"affinity,omitempty"` +} + +// GPUClusterConfigStatus defines the observed state of GPUClusterConfig +type GPUClusterConfigStatus struct { + // +kubebuilder:validation:Enum=ignored;ready;notReady;disabled + // State indicates the status of the GPUClusterConfig instance + State State `json:"state"` + // Namespace indicates the namespace in which the operator and operands are installed + Namespace string `json:"namespace,omitempty"` + // Conditions is a list of conditions representing the GPUClusterConfig's current state. + Conditions []metav1.Condition `json:"conditions,omitempty"` +} + +// +genclient +// +genclient:nonNamespaced +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:resource:scope=Cluster,shortName={"gcc"} +//+kubebuilder:printcolumn:name="Status",type=string,JSONPath=`.status.state`,priority=0 +//+kubebuilder:printcolumn:name="Age",type=string,JSONPath=`.metadata.creationTimestamp`,priority=0 + +// GPUClusterConfig is the Schema for the gpuclusterconfigs API +type GPUClusterConfig struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec GPUClusterConfigSpec `json:"spec,omitempty"` + Status GPUClusterConfigStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// GPUClusterConfigList contains a list of GPUClusterConfig +type GPUClusterConfigList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []GPUClusterConfig `json:"items"` +} diff --git a/api/nvidia/v1alpha1/groupversion_info.go b/api/nvidia/v1alpha1/groupversion_info.go index 76c5b1300..131d924c9 100644 --- a/api/nvidia/v1alpha1/groupversion_info.go +++ b/api/nvidia/v1alpha1/groupversion_info.go @@ -38,6 +38,7 @@ var ( func addKnownTypes(scheme *runtime.Scheme) error { scheme.AddKnownTypes(SchemeGroupVersion, &NVIDIADriver{}, &NVIDIADriverList{}) + scheme.AddKnownTypes(SchemeGroupVersion, &GPUClusterConfig{}, &GPUClusterConfigList{}) metav1.AddToGroupVersion(scheme, SchemeGroupVersion) return nil } diff --git a/api/nvidia/v1alpha1/zz_generated.deepcopy.go b/api/nvidia/v1alpha1/zz_generated.deepcopy.go index 466ecebbe..70c5b716d 100644 --- a/api/nvidia/v1alpha1/zz_generated.deepcopy.go +++ b/api/nvidia/v1alpha1/zz_generated.deepcopy.go @@ -21,7 +21,8 @@ package v1alpha1 import ( - "k8s.io/api/core/v1" + "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" ) @@ -41,6 +42,171 @@ func (in *ContainerProbeSpec) DeepCopy() *ContainerProbeSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DRADriverComputeDomainsSpec) DeepCopyInto(out *DRADriverComputeDomainsSpec) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } + in.Controller.DeepCopyInto(&out.Controller) + in.KubeletPlugin.DeepCopyInto(&out.KubeletPlugin) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRADriverComputeDomainsSpec. +func (in *DRADriverComputeDomainsSpec) DeepCopy() *DRADriverComputeDomainsSpec { + if in == nil { + return nil + } + out := new(DRADriverComputeDomainsSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DRADriverControllerSpec) DeepCopyInto(out *DRADriverControllerSpec) { + *out = *in + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]v1.EnvVar, len(*in)) + copy(*out, *in) + } + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(v1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]corev1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Affinity != nil { + in, out := &in.Affinity, &out.Affinity + *out = new(corev1.Affinity) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRADriverControllerSpec. +func (in *DRADriverControllerSpec) DeepCopy() *DRADriverControllerSpec { + if in == nil { + return nil + } + out := new(DRADriverControllerSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DRADriverGPUsSpec) DeepCopyInto(out *DRADriverGPUsSpec) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = new(bool) + **out = **in + } + in.KubeletPlugin.DeepCopyInto(&out.KubeletPlugin) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRADriverGPUsSpec. +func (in *DRADriverGPUsSpec) DeepCopy() *DRADriverGPUsSpec { + if in == nil { + return nil + } + out := new(DRADriverGPUsSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DRADriverKubeletPluginSpec) DeepCopyInto(out *DRADriverKubeletPluginSpec) { + *out = *in + if in.Env != nil { + in, out := &in.Env, &out.Env + *out = make([]v1.EnvVar, len(*in)) + copy(*out, *in) + } + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(v1.ResourceRequirements) + (*in).DeepCopyInto(*out) + } + if in.HealthcheckPort != nil { + in, out := &in.HealthcheckPort, &out.HealthcheckPort + *out = new(int32) + **out = **in + } + if in.NodeSelector != nil { + in, out := &in.NodeSelector, &out.NodeSelector + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Tolerations != nil { + in, out := &in.Tolerations, &out.Tolerations + *out = make([]corev1.Toleration, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.Affinity != nil { + in, out := &in.Affinity, &out.Affinity + *out = new(corev1.Affinity) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRADriverKubeletPluginSpec. +func (in *DRADriverKubeletPluginSpec) DeepCopy() *DRADriverKubeletPluginSpec { + if in == nil { + return nil + } + out := new(DRADriverKubeletPluginSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DRADriverSpec) DeepCopyInto(out *DRADriverSpec) { + *out = *in + if in.ImagePullSecrets != nil { + in, out := &in.ImagePullSecrets, &out.ImagePullSecrets + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.FeatureGates != nil { + in, out := &in.FeatureGates, &out.FeatureGates + *out = make(map[string]bool, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + in.GPUs.DeepCopyInto(&out.GPUs) + in.ComputeDomains.DeepCopyInto(&out.ComputeDomains) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DRADriverSpec. +func (in *DRADriverSpec) DeepCopy() *DRADriverSpec { + if in == nil { + return nil + } + out := new(DRADriverSpec) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DriverCertConfigSpec) DeepCopyInto(out *DriverCertConfigSpec) { *out = *in @@ -166,6 +332,120 @@ func (in *GDRCopySpec) DeepCopy() *GDRCopySpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GPUClusterConfig) DeepCopyInto(out *GPUClusterConfig) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUClusterConfig. +func (in *GPUClusterConfig) DeepCopy() *GPUClusterConfig { + if in == nil { + return nil + } + out := new(GPUClusterConfig) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GPUClusterConfig) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GPUClusterConfigList) DeepCopyInto(out *GPUClusterConfigList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]GPUClusterConfig, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUClusterConfigList. +func (in *GPUClusterConfigList) DeepCopy() *GPUClusterConfigList { + if in == nil { + return nil + } + out := new(GPUClusterConfigList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *GPUClusterConfigList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GPUClusterConfigSpec) DeepCopyInto(out *GPUClusterConfigSpec) { + *out = *in + in.DRADriver.DeepCopyInto(&out.DRADriver) + if in.DCGM != nil { + in, out := &in.DCGM, &out.DCGM + *out = new(v1.DCGMSpec) + (*in).DeepCopyInto(*out) + } + if in.DCGMExporter != nil { + in, out := &in.DCGMExporter, &out.DCGMExporter + *out = new(v1.DCGMExporterSpec) + (*in).DeepCopyInto(*out) + } + if in.GFD != nil { + in, out := &in.GFD, &out.GFD + *out = new(v1.GPUFeatureDiscoverySpec) + (*in).DeepCopyInto(*out) + } + out.HostPaths = in.HostPaths + in.Daemonsets.DeepCopyInto(&out.Daemonsets) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUClusterConfigSpec. +func (in *GPUClusterConfigSpec) DeepCopy() *GPUClusterConfigSpec { + if in == nil { + return nil + } + out := new(GPUClusterConfigSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *GPUClusterConfigStatus) DeepCopyInto(out *GPUClusterConfigStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new GPUClusterConfigStatus. +func (in *GPUClusterConfigStatus) DeepCopy() *GPUClusterConfigStatus { + if in == nil { + return nil + } + out := new(GPUClusterConfigStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *GPUDirectRDMASpec) DeepCopyInto(out *GPUDirectRDMASpec) { *out = *in @@ -398,7 +678,7 @@ func (in *NVIDIADriverSpec) DeepCopyInto(out *NVIDIADriverSpec) { } if in.NodeAffinity != nil { in, out := &in.NodeAffinity, &out.NodeAffinity - *out = new(v1.NodeAffinity) + *out = new(corev1.NodeAffinity) (*in).DeepCopyInto(*out) } if in.Labels != nil { @@ -417,14 +697,14 @@ func (in *NVIDIADriverSpec) DeepCopyInto(out *NVIDIADriverSpec) { } if in.Tolerations != nil { in, out := &in.Tolerations, &out.Tolerations - *out = make([]v1.Toleration, len(*in)) + *out = make([]corev1.Toleration, len(*in)) for i := range *in { (*in)[i].DeepCopyInto(&(*out)[i]) } } if in.PodSecurityContext != nil { in, out := &in.PodSecurityContext, &out.PodSecurityContext - *out = new(v1.PodSecurityContext) + *out = new(corev1.PodSecurityContext) (*in).DeepCopyInto(*out) } if in.HostNetwork != nil { @@ -471,14 +751,14 @@ func (in *ResourceRequirements) DeepCopyInto(out *ResourceRequirements) { *out = *in if in.Limits != nil { in, out := &in.Limits, &out.Limits - *out = make(v1.ResourceList, len(*in)) + *out = make(corev1.ResourceList, len(*in)) for key, val := range *in { (*out)[key] = val.DeepCopy() } } if in.Requests != nil { in, out := &in.Requests, &out.Requests - *out = make(v1.ResourceList, len(*in)) + *out = make(corev1.ResourceList, len(*in)) for key, val := range *in { (*out)[key] = val.DeepCopy() } diff --git a/api/versioned/typed/nvidia/v1alpha1/fake/fake_gpuclusterconfig.go b/api/versioned/typed/nvidia/v1alpha1/fake/fake_gpuclusterconfig.go new file mode 100644 index 000000000..bb26ab66d --- /dev/null +++ b/api/versioned/typed/nvidia/v1alpha1/fake/fake_gpuclusterconfig.go @@ -0,0 +1,52 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +// Code generated by client-gen. DO NOT EDIT. + +package fake + +import ( + v1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/versioned/typed/nvidia/v1alpha1" + gentype "k8s.io/client-go/gentype" +) + +// fakeGPUClusterConfigs implements GPUClusterConfigInterface +type fakeGPUClusterConfigs struct { + *gentype.FakeClientWithList[*v1alpha1.GPUClusterConfig, *v1alpha1.GPUClusterConfigList] + Fake *FakeNvidiaV1alpha1 +} + +func newFakeGPUClusterConfigs(fake *FakeNvidiaV1alpha1) nvidiav1alpha1.GPUClusterConfigInterface { + return &fakeGPUClusterConfigs{ + gentype.NewFakeClientWithList[*v1alpha1.GPUClusterConfig, *v1alpha1.GPUClusterConfigList]( + fake.Fake, + "", + v1alpha1.SchemeGroupVersion.WithResource("gpuclusterconfigs"), + v1alpha1.SchemeGroupVersion.WithKind("GPUClusterConfig"), + func() *v1alpha1.GPUClusterConfig { return &v1alpha1.GPUClusterConfig{} }, + func() *v1alpha1.GPUClusterConfigList { return &v1alpha1.GPUClusterConfigList{} }, + func(dst, src *v1alpha1.GPUClusterConfigList) { dst.ListMeta = src.ListMeta }, + func(list *v1alpha1.GPUClusterConfigList) []*v1alpha1.GPUClusterConfig { + return gentype.ToPointerSlice(list.Items) + }, + func(list *v1alpha1.GPUClusterConfigList, items []*v1alpha1.GPUClusterConfig) { + list.Items = gentype.FromPointerSlice(items) + }, + ), + fake, + } +} diff --git a/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidia_client.go b/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidia_client.go index f91c593a0..acc74ec57 100644 --- a/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidia_client.go +++ b/api/versioned/typed/nvidia/v1alpha1/fake/fake_nvidia_client.go @@ -28,6 +28,10 @@ type FakeNvidiaV1alpha1 struct { *testing.Fake } +func (c *FakeNvidiaV1alpha1) GPUClusterConfigs() v1alpha1.GPUClusterConfigInterface { + return newFakeGPUClusterConfigs(c) +} + func (c *FakeNvidiaV1alpha1) NVIDIADrivers() v1alpha1.NVIDIADriverInterface { return newFakeNVIDIADrivers(c) } diff --git a/api/versioned/typed/nvidia/v1alpha1/generated_expansion.go b/api/versioned/typed/nvidia/v1alpha1/generated_expansion.go index f5e236215..e56ce9d82 100644 --- a/api/versioned/typed/nvidia/v1alpha1/generated_expansion.go +++ b/api/versioned/typed/nvidia/v1alpha1/generated_expansion.go @@ -18,4 +18,6 @@ package v1alpha1 +type GPUClusterConfigExpansion interface{} + type NVIDIADriverExpansion interface{} diff --git a/api/versioned/typed/nvidia/v1alpha1/gpuclusterconfig.go b/api/versioned/typed/nvidia/v1alpha1/gpuclusterconfig.go new file mode 100644 index 000000000..7582ba143 --- /dev/null +++ b/api/versioned/typed/nvidia/v1alpha1/gpuclusterconfig.go @@ -0,0 +1,70 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +// Code generated by client-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + context "context" + + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" + scheme "github.com/NVIDIA/gpu-operator/api/versioned/scheme" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + types "k8s.io/apimachinery/pkg/types" + watch "k8s.io/apimachinery/pkg/watch" + gentype "k8s.io/client-go/gentype" +) + +// GPUClusterConfigsGetter has a method to return a GPUClusterConfigInterface. +// A group's client should implement this interface. +type GPUClusterConfigsGetter interface { + GPUClusterConfigs() GPUClusterConfigInterface +} + +// GPUClusterConfigInterface has methods to work with GPUClusterConfig resources. +type GPUClusterConfigInterface interface { + Create(ctx context.Context, gPUClusterConfig *nvidiav1alpha1.GPUClusterConfig, opts v1.CreateOptions) (*nvidiav1alpha1.GPUClusterConfig, error) + Update(ctx context.Context, gPUClusterConfig *nvidiav1alpha1.GPUClusterConfig, opts v1.UpdateOptions) (*nvidiav1alpha1.GPUClusterConfig, error) + // Add a +genclient:noStatus comment above the type to avoid generating UpdateStatus(). + UpdateStatus(ctx context.Context, gPUClusterConfig *nvidiav1alpha1.GPUClusterConfig, opts v1.UpdateOptions) (*nvidiav1alpha1.GPUClusterConfig, error) + Delete(ctx context.Context, name string, opts v1.DeleteOptions) error + DeleteCollection(ctx context.Context, opts v1.DeleteOptions, listOpts v1.ListOptions) error + Get(ctx context.Context, name string, opts v1.GetOptions) (*nvidiav1alpha1.GPUClusterConfig, error) + List(ctx context.Context, opts v1.ListOptions) (*nvidiav1alpha1.GPUClusterConfigList, error) + Watch(ctx context.Context, opts v1.ListOptions) (watch.Interface, error) + Patch(ctx context.Context, name string, pt types.PatchType, data []byte, opts v1.PatchOptions, subresources ...string) (result *nvidiav1alpha1.GPUClusterConfig, err error) + GPUClusterConfigExpansion +} + +// gPUClusterConfigs implements GPUClusterConfigInterface +type gPUClusterConfigs struct { + *gentype.ClientWithList[*nvidiav1alpha1.GPUClusterConfig, *nvidiav1alpha1.GPUClusterConfigList] +} + +// newGPUClusterConfigs returns a GPUClusterConfigs +func newGPUClusterConfigs(c *NvidiaV1alpha1Client) *gPUClusterConfigs { + return &gPUClusterConfigs{ + gentype.NewClientWithList[*nvidiav1alpha1.GPUClusterConfig, *nvidiav1alpha1.GPUClusterConfigList]( + "gpuclusterconfigs", + c.RESTClient(), + scheme.ParameterCodec, + "", + func() *nvidiav1alpha1.GPUClusterConfig { return &nvidiav1alpha1.GPUClusterConfig{} }, + func() *nvidiav1alpha1.GPUClusterConfigList { return &nvidiav1alpha1.GPUClusterConfigList{} }, + ), + } +} diff --git a/api/versioned/typed/nvidia/v1alpha1/nvidia_client.go b/api/versioned/typed/nvidia/v1alpha1/nvidia_client.go index 7e1e1db0a..e22b1b527 100644 --- a/api/versioned/typed/nvidia/v1alpha1/nvidia_client.go +++ b/api/versioned/typed/nvidia/v1alpha1/nvidia_client.go @@ -28,6 +28,7 @@ import ( type NvidiaV1alpha1Interface interface { RESTClient() rest.Interface + GPUClusterConfigsGetter NVIDIADriversGetter } @@ -36,6 +37,10 @@ type NvidiaV1alpha1Client struct { restClient rest.Interface } +func (c *NvidiaV1alpha1Client) GPUClusterConfigs() GPUClusterConfigInterface { + return newGPUClusterConfigs(c) +} + func (c *NvidiaV1alpha1Client) NVIDIADrivers() NVIDIADriverInterface { return newNVIDIADrivers(c) } diff --git a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml index 4cde9ec97..7e1941480 100644 --- a/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml +++ b/bundle/manifests/gpu-operator-certified.clusterserviceversion.yaml @@ -320,6 +320,37 @@ spec: path: state x-descriptors: - 'urn:alm:descriptor:text' + - name: gpuclusterconfigs.nvidia.com + kind: GPUClusterConfig + version: v1alpha1 + group: nvidia.com + displayName: GPUClusterConfig + description: GPUClusterConfig allows you to configure the DRA-based GPU software stack + resources: + - kind: ServiceAccount + name: '' + version: v1 + - kind: DaemonSet + name: '' + version: apps/v1 + - kind: Deployment + name: '' + version: apps/v1 + - kind: ConfigMap + name: '' + version: v1 + - kind: Pod + name: '' + version: v1 + - kind: status + name: '' + version: v1 + statusDescriptors: + - description: The current state of the GPUClusterConfig. + displayName: State + path: state + x-descriptors: + - 'urn:alm:descriptor:text' - name: clusterpolicies.nvidia.com kind: ClusterPolicy version: v1 @@ -635,6 +666,9 @@ spec: - clusterpolicies - clusterpolicies/finalizers - clusterpolicies/status + - gpuclusterconfigs + - gpuclusterconfigs/finalizers + - gpuclusterconfigs/status - nvidiadrivers - nvidiadrivers/finalizers - nvidiadrivers/status diff --git a/bundle/manifests/nvidia.com_gpuclusterconfigs.yaml b/bundle/manifests/nvidia.com_gpuclusterconfigs.yaml new file mode 100644 index 000000000..cc3706c15 --- /dev/null +++ b/bundle/manifests/nvidia.com_gpuclusterconfigs.yaml @@ -0,0 +1,4059 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: gpuclusterconfigs.nvidia.com +spec: + group: nvidia.com + names: + kind: GPUClusterConfig + listKind: GPUClusterConfigList + plural: gpuclusterconfigs + shortNames: + - gcc + singular: gpuclusterconfig + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: Status + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: GPUClusterConfig is the Schema for the gpuclusterconfigs API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + GPUClusterConfigSpec defines the desired state of GPUClusterConfig, the DRA-based + software-enablement stack. Unlike ClusterPolicy, it does not manage the NVIDIA driver + or the device-plugin; the driver is installed separately (host-installed or via an + NVIDIADriver CR) and GPUClusterConfig waits for driver readiness before proceeding. + properties: + daemonsets: + description: |- + Daemonsets defines the common configuration applied to all DaemonSets deployed + by the GPUClusterConfig controller. + properties: + annotations: + additionalProperties: + type: string + description: |- + Optional: Annotations is an unstructured key value map stored with a resource that may be + set by external tools to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + type: object + labels: + additionalProperties: + type: string + description: |- + Optional: Map of string keys and values that can be used to organize and categorize + (scope and select) objects. May match selectors of replication controllers + and services. + type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods (applies as defaults to all containers)' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + priorityClassName: + type: string + rollingUpdate: + description: 'Optional: Configuration for rolling update of all + DaemonSet pods' + properties: + maxUnavailable: + type: string + type: object + tolerations: + description: 'Optional: Set tolerations' + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + updateStrategy: + default: RollingUpdate + enum: + - RollingUpdate + - OnDelete + type: string + type: object + dcgm: + description: |- + DCGM defines the spec for the standalone NVIDIA DCGM hostengine. Disabled by default; + when disabled, dcgm-exporter uses its embedded nv-hostengine. NOTE: the reused enabled + field carries no server-side default and its IsEnabled() treats nil as enabled, so the + controller must default nil enabled to disabled here. + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if deployment of NVIDIA DCGM Hostengine + as a separate pod is enabled. + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork indicates whether the DCGM pod uses the + host's network namespace. + type: boolean + hostPort: + description: 'Deprecated: HostPort represents host port that needs + to be bound for DCGM engine (Default: 5555)' + format: int32 + type: integer + image: + description: NVIDIA DCGM image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA DCGM image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + version: + description: NVIDIA DCGM image tag + type: string + type: object + dcgmExporter: + description: |- + DCGMExporter defines the spec for NVIDIA DCGM Exporter. Enabled by default, but the + reused enabled field carries no server-side default; the controller defaults nil enabled. + properties: + annotations: + additionalProperties: + type: string + description: |- + Optional: Annotations is an unstructured key value map stored with a resource that may be + set by external tools to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + type: object + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + config: + description: 'Optional: Custom metrics configuration for NVIDIA + DCGM Exporter' + properties: + name: + description: ConfigMap name with file dcgm-metrics.csv for + metrics to be collected by NVIDIA DCGM Exporter + type: string + type: object + enablePodLabels: + description: |- + Enable Kubernetes pod labels as Prometheus label dimensions in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enablePodUID: + description: |- + Enable Kubernetes pod UID as a Prometheus label dimension in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enabled: + description: Enabled indicates if deployment of NVIDIA DCGM Exporter + through operator is enabled + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork allows the DCGM-Exporter daemon set to + expose metrics port on the host's network namespace. + type: boolean + hostPID: + description: HostPID allows the DCGM-Exporter daemon set to access + the host's PID namespace + type: boolean + hpcJobMapping: + description: 'Optional: HPC job mapping configuration for NVIDIA + DCGM Exporter' + properties: + directory: + description: |- + Directory path where HPC job mapping files are created by the workload manager + Defaults to /var/lib/dcgm-exporter/job-mapping if not specified + type: string + enabled: + description: Enable HPC job mapping for DCGM Exporter + type: boolean + type: object + image: + description: NVIDIA DCGM Exporter image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + podLabelAllowlistRegex: + description: |- + Regex list for filtering which Kubernetes pod labels are included in DCGM exporter metrics. + Empty means all pod labels are included. + items: + type: string + type: array + repository: + description: NVIDIA DCGM Exporter image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + service: + description: 'Optional: Service configuration for NVIDIA DCGM + Exporter' + properties: + internalTrafficPolicy: + description: InternalTrafficPolicy describes how nodes distribute + service traffic they receive on the ClusterIP. + type: string + type: + description: Type represents the ServiceType which describes + ingress methods for a service + type: string + type: object + serviceMonitor: + description: 'Optional: ServiceMonitor configuration for NVIDIA + DCGM Exporter' + properties: + additionalLabels: + additionalProperties: + type: string + description: AdditionalLabels to add to ServiceMonitor instance + type: object + enabled: + description: Enabled indicates if ServiceMonitor is deployed + type: boolean + honorLabels: + description: HonorLabels chooses the metric’s labels on collisions + with target labels. + type: boolean + interval: + description: |- + Interval at which metrics should be scraped. If not specified, Prometheus’ global scrape interval is used. + Supported units: y, w, d, h, m, s, ms + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + relabelings: + description: Relabelings allows to rewrite labels on metric + sets + items: + description: |- + RelabelConfig allows dynamic rewriting of the label set for targets, alerts, + scraped samples and remote write samples. + + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + properties: + action: + default: replace + description: |- + action to perform based on the regex matching. + + `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0. + `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0. + + Default: "Replace" + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase + - keepequal + - KeepEqual + - dropequal + - DropEqual + type: string + modulus: + description: |- + modulus to take of the hash of the source label values. + + Only applicable when the action is `HashMod`. + format: int64 + type: integer + regex: + description: regex defines the regular expression against + which the extracted value is matched. + type: string + replacement: + description: |- + replacement value against which a Replace action is performed if the + regular expression matches. + + Regex capture groups are available. + type: string + separator: + description: separator defines the string between concatenated + SourceLabels. + type: string + sourceLabels: + description: |- + sourceLabels defines the source labels select values from existing labels. Their content is + concatenated using the configured Separator and matched against the + configured regular expression. + items: + description: |- + LabelName is a valid Prometheus label name. + For Prometheus 3.x, a label name is valid if it contains UTF-8 characters. + For Prometheus 2.x, a label name is only valid if it contains ASCII characters, letters, numbers, as well as underscores. + type: string + type: array + targetLabel: + description: |- + targetLabel defines the label to which the resulting string is written in a replacement. + + It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`, + `KeepEqual` and `DropEqual` actions. + + Regex capture groups are available. + type: string + type: object + type: array + scrapeTimeout: + description: |- + ScrapeTimeout to use when scraping metrics. Must not be greater than Interval. + If not specified, Prometheus' global scrape timeout is used. + Supported units: y, w, d, h, m, s, ms + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + type: object + version: + description: NVIDIA DCGM Exporter image tag + type: string + type: object + draDriver: + description: DRADriver defines the spec for the NVIDIA DRA driver + stack (gpus + computeDomains). + properties: + computeDomains: + description: ComputeDomains configures the compute-domain capability + of the DRA driver. + properties: + controller: + description: Controller configures the compute-domain controller + Deployment. + properties: + affinity: + description: Affinity rules for the controller Deployment + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the controller Deployment + pods + type: object + priorityClassName: + description: PriorityClassName for the controller Deployment + pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the controller container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the controller Deployment + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + enabled: + default: true + description: Enabled indicates if the computeDomains capability + of the DRA driver is enabled. + type: boolean + kubeletPlugin: + description: KubeletPlugin configures the kubelet-plugin workload + for the computeDomains capability. + properties: + affinity: + description: Affinity rules for the kubelet-plugin DaemonSet + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + healthcheckPort: + description: |- + HealthcheckPort is the port running a gRPC health service checked by a livenessProbe. + Set to a negative value to disable the service and the probe. + format: int32 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the kubelet-plugin DaemonSet + pods + type: object + priorityClassName: + description: PriorityClassName for the kubelet-plugin + DaemonSet pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the kubelet-plugin container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the kubelet-plugin DaemonSet + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + type: object + featureGates: + additionalProperties: + type: boolean + description: |- + FeatureGates is a map of feature gate names to a boolean enabling or disabling each. + It is rendered as the FEATURE_GATES environment variable on the DRA driver containers. + type: object + gpus: + description: GPUs configures the gpu.nvidia.com capability of + the DRA driver. + properties: + enabled: + default: true + description: Enabled indicates if the gpus capability of the + DRA driver is enabled. + type: boolean + kubeletPlugin: + description: KubeletPlugin configures the kubelet-plugin workload + for the gpus capability. + properties: + affinity: + description: Affinity rules for the kubelet-plugin DaemonSet + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + healthcheckPort: + description: |- + HealthcheckPort is the port running a gRPC health service checked by a livenessProbe. + Set to a negative value to disable the service and the probe. + format: int32 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the kubelet-plugin DaemonSet + pods + type: object + priorityClassName: + description: PriorityClassName for the kubelet-plugin + DaemonSet pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the kubelet-plugin container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the kubelet-plugin DaemonSet + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + type: object + image: + description: NVIDIA DRA driver image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA DRA driver image repository + type: string + version: + description: NVIDIA DRA driver image tag + type: string + type: object + gfd: + description: |- + GFD defines the spec for the standalone GPU Feature Discovery operand. Enabled by default, + but the reused enabled field carries no server-side default; the controller defaults nil enabled. + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if deployment of GPU Feature Discovery + Plugin is enabled. + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork indicates whether the GPU Feature Discovery + pod uses the host's network namespace. + type: boolean + image: + description: GFD image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: GFD image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + version: + description: GFD image tag + type: string + type: object + hostPaths: + description: HostPaths defines the host paths used in host-path volumes + for various components. + properties: + driverInstallDir: + description: |- + DriverInstallDir represents the root at which driver files including libraries, + config files, and executables can be found. + type: string + kubeletRootDir: + default: /var/lib/kubelet + description: |- + KubeletRootDir represents the location of the kubelet root directory. + If empty, it will default to "/var/lib/kubelet". + type: string + rootFS: + description: |- + RootFS represents the path to the root filesystem of the host. + This is used by components that need to interact with the host filesystem + and as such this must be a chroot-able filesystem. + Examples include the MIG Manager and Toolkit Container which may need to + stop, start, or restart systemd services. + type: string + type: object + required: + - draDriver + type: object + status: + description: GPUClusterConfigStatus defines the observed state of GPUClusterConfig + properties: + conditions: + description: Conditions is a list of conditions representing the GPUClusterConfig's + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + namespace: + description: Namespace indicates the namespace in which the operator + and operands are installed + type: string + state: + description: State indicates the status of the GPUClusterConfig instance + enum: + - ignored + - ready + - notReady + - disabled + type: string + required: + - state + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/bundle/manifests/resource.nvidia.com_computedomaincliques.yaml b/bundle/manifests/resource.nvidia.com_computedomaincliques.yaml new file mode 100644 index 000000000..b3eddef71 --- /dev/null +++ b/bundle/manifests/resource.nvidia.com_computedomaincliques.yaml @@ -0,0 +1,84 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.1 + name: computedomaincliques.resource.nvidia.com +spec: + group: resource.nvidia.com + names: + kind: ComputeDomainClique + listKind: ComputeDomainCliqueList + plural: computedomaincliques + singular: computedomainclique + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: |- + ComputeDomainClique holds information about a specific clique within a ComputeDomain. + It is created in the driver namespace and named as ".". + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + daemons: + items: + description: ComputeDomainDaemonInfo provides information about each + daemon in a ComputeDomainClique. + properties: + cliqueID: + type: string + index: + description: |- + The Index field is used to ensure a consistent IP-to-DNS name + mapping across all machines within an IMEX domain. Each node's index + directly determines its DNS name within a given NVLink partition + (i.e. clique). In other words, the 2-tuple of (CliqueID, Index) will + always be unique. This field is marked as optional (but not + omitempty) in order to support downgrades and avoid an API bump. + type: integer + ipAddress: + type: string + nodeName: + type: string + status: + default: NotReady + description: |- + The Status field tracks the readiness of the IMEX daemon running on + this node. It gets switched to Ready whenever the IMEX daemon is + ready to broker GPU memory exchanges and switches to NotReady when + it is not. It is marked as optional in order to support downgrades + and avoid an API bump. + enum: + - Ready + - NotReady + type: string + required: + - cliqueID + - ipAddress + - nodeName + type: object + type: array + x-kubernetes-list-map-keys: + - nodeName + x-kubernetes-list-type: map + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + type: object + served: true + storage: true diff --git a/bundle/manifests/resource.nvidia.com_computedomains.yaml b/bundle/manifests/resource.nvidia.com_computedomains.yaml new file mode 100644 index 000000000..5a28ae17c --- /dev/null +++ b/bundle/manifests/resource.nvidia.com_computedomains.yaml @@ -0,0 +1,162 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.1 + name: computedomains.resource.nvidia.com +spec: + group: resource.nvidia.com + names: + kind: ComputeDomain + listKind: ComputeDomainList + plural: computedomains + singular: computedomain + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: ComputeDomain prepares a set of nodes to run a multi-node workload + in. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ComputeDomainSpec provides the spec for a ComputeDomain. + properties: + channel: + description: ComputeDomainChannelSpec provides the spec for a channel + used to run a workload inside a ComputeDomain. + properties: + allocationMode: + default: Single + description: |- + Allows for requesting all IMEX channels (the maximum per IMEX domain) or + precisely one. + enum: + - All + - Single + type: string + resourceClaimTemplate: + description: ComputeDomainResourceClaimTemplate provides the details + of the ResourceClaimTemplate to generate. + properties: + name: + type: string + required: + - name + type: object + required: + - resourceClaimTemplate + type: object + numNodes: + description: |- + Intended number of IMEX daemons (i.e., individual compute nodes) in the + ComputeDomain. Must be zero or greater. + + With `featureGates.IMEXDaemonsWithDNSNames=true` (the default), this is + recommended to be set to zero. Workload must implement and consult its + own source of truth for the number of workers online before trying to + share GPU memory (and hence triggering IMEX interaction). When non-zero, + `numNodes` is used only for automatically updating the global + ComputeDomain `Status` (indicating `Ready` when the number of ready IMEX + daemons equals `numNodes`). In this mode, a `numNodes` value greater than + zero in particular does not gate the startup of IMEX daemons: individual + IMEX daemons are started immediately without waiting for its peers, and + any workload pod gets released right after its local IMEX daemon has + started. + + With `featureGates.IMEXDaemonsWithDNSNames=false`, `numNodes` must be set + to the expected number of worker nodes joining the ComputeDomain. In that + mode, all workload pods are held back (with containers in state + `ContainerCreating`) until the underlying IMEX domain has been joined by + `numNodes` IMEX daemons. Pods from more than `numNodes` nodes trying to + join the ComputeDomain may lead to unexpected behavior. + + The `numNodes` parameter is deprecated and will be removed in the next + API version. + type: integer + required: + - channel + - numNodes + type: object + x-kubernetes-validations: + - message: A computeDomain.spec is immutable + rule: self == oldSelf + status: + description: |- + Global ComputeDomain status. Can be used to guide debugging efforts. + Workload however should not rely on inspecting this field at any point + during its lifecycle. + properties: + nodes: + items: + description: ComputeDomainNode provides information about each node + added to a ComputeDomain. + properties: + cliqueID: + type: string + index: + description: |- + The Index field is used to ensure a consistent IP-to-DNS name + mapping across all machines within an IMEX domain. Each node's index + directly determines its DNS name within a given NVLink partition + (i.e. clique). In other words, the 2-tuple of (CliqueID, Index) will + always be unique. This field is marked as optional (but not + omitempty) in order to support downgrades and avoid an API bump. + type: integer + ipAddress: + type: string + name: + type: string + status: + default: NotReady + description: |- + The Status field tracks the readiness of the IMEX daemon running on + this node. It gets switched to Ready whenever the IMEX daemon is + ready to broker GPU memory exchanges and switches to NotReady when + it is not. It is marked as optional in order to support downgrades + and avoid an API bump. + enum: + - Ready + - NotReady + type: string + required: + - cliqueID + - ipAddress + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + status: + default: NotReady + enum: + - Ready + - NotReady + type: string + required: + - status + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/cmd/dra-driver-validator/driver.go b/cmd/dra-driver-validator/driver.go new file mode 100644 index 000000000..2b36bfc55 --- /dev/null +++ b/cmd/dra-driver-validator/driver.go @@ -0,0 +1,52 @@ +/* +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package main + +// driverInfo contains the location of an NVIDIA driver installation as consumed +// by the DRA kubelet-plugin containers via the driver-ready env contract. +// +// driverRoot is the absolute path of the driver install on the host (the plugin's +// NVIDIA_DRIVER_ROOT, used as the target root when generating CDI specifications). +// +// driverRootCtrPath is the path where that driver install is mounted inside the +// kubelet-plugin containers (the plugin's DRIVER_ROOT_CTR_PATH, used to read driver +// files and compute the dev root). +// +// The plugin reads only these two values; it derives its own dev root from +// driverRootCtrPath (see cmd/gpu-kubelet-plugin/device_state.go in k8s-dra-driver-gpu). +type driverInfo struct { + driverRoot string + driverRootCtrPath string +} + +func getDriverInfo(isHostDriver bool, hostRoot string, driverInstallDir string, driverInstallDirCtrPath string) driverInfo { + if isHostDriver { + // Driver installed directly on the host root; the plugin reads it via /host. + return driverInfo{ + driverRoot: hostRoot, + driverRootCtrPath: hostRootCtrPath, + } + } + + // Containerized driver. Unlike nvidia-validator (which hardcodes "/driver-root"), + // the DRA stack mounts the containerized driver at the same path it occupies on + // the host (driverInstallDirCtrPath, e.g. /run/nvidia/driver), so we emit that. + return driverInfo{ + driverRoot: driverInstallDir, + driverRootCtrPath: driverInstallDirCtrPath, + } +} diff --git a/cmd/dra-driver-validator/find.go b/cmd/dra-driver-validator/find.go new file mode 100644 index 000000000..0328a7545 --- /dev/null +++ b/cmd/dra-driver-validator/find.go @@ -0,0 +1,70 @@ +/* +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package main + +import ( + "fmt" + "path/filepath" +) + +type root string + +// getDriverLibraryPath returns path to `libnvidia-ml.so.1` in the driver root. +// The folder for this file is also expected to be the location of other driver files. +func (r root) getDriverLibraryPath() (string, error) { + return r.findFile("libnvidia-ml.so.1", + "/usr/lib64", + "/usr/lib/x86_64-linux-gnu", + "/usr/lib/aarch64-linux-gnu", + "/lib64", + "/lib/x86_64-linux-gnu", + "/lib/aarch64-linux-gnu", + ) +} + +// getNvidiaSMIPath returns path to the `nvidia-smi` executable in the driver root. +func (r root) getNvidiaSMIPath() (string, error) { + return r.findFile("nvidia-smi", "/usr/bin", "/usr/sbin", "/bin", "/sbin") +} + +// findFile searches the root for a specified file. +// A number of folders can be specified to search in addition to the root itself. +// If the file represents a symlink, this is resolved and the final path is returned. +func (r root) findFile(name string, searchIn ...string) (string, error) { + + for _, d := range append([]string{"/"}, searchIn...) { + l := filepath.Join(string(r), d, name) + candidate, err := resolveLink(l) + if err != nil { + continue + } + return candidate, nil + } + + return "", fmt.Errorf("error locating %q", name) +} + +// resolveLink finds the target of a symlink or the file itself in the +// case of a regular file. +// This is equivalent to running `readlink -f ${l}`. +func resolveLink(l string) (string, error) { + resolved, err := filepath.EvalSymlinks(l) + if err != nil { + return "", fmt.Errorf("error resolving link '%s': %w", l, err) + } + return resolved, nil +} diff --git a/cmd/dra-driver-validator/main.go b/cmd/dra-driver-validator/main.go new file mode 100644 index 000000000..bcd822daf --- /dev/null +++ b/cmd/dra-driver-validator/main.go @@ -0,0 +1,300 @@ +/* +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +// Command dra-driver-validator is the init container for the DRA driver +// kubelet-plugin DaemonSet. It validates that an NVIDIA driver is installed and +// writes /run/nvidia/validations/driver-ready with the env contract that the +// kubelet-plugin containers source on startup (NVIDIA_DRIVER_ROOT, DRIVER_ROOT_CTR_PATH). +// +// Unlike nvidia-validator, it validates with `nvidia-smi --version` only (never +// full nvidia-smi), so validation is safe when GPUs are bound to vfio-pci for +// passthrough: full nvidia-smi would initialize the GPUs and fail or hang. +package main + +import ( + "context" + "fmt" + "os" + "os/exec" + "os/signal" + "path/filepath" + "slices" + "strings" + "syscall" + "time" + + pathrs "github.com/cyphar/filepath-securejoin/pathrs-lite" + log "github.com/sirupsen/logrus" + cli "github.com/urfave/cli/v3" + + "github.com/NVIDIA/gpu-operator/internal/info" +) + +const ( + // defaultStatusPath is the directory where validation status files are written. + defaultStatusPath = "/run/nvidia/validations" + // driverStatusFile is the status file signalling driver readiness; its content + // is the env contract sourced by the kubelet-plugin containers. + driverStatusFile = "driver-ready" + // hostRootCtrPath is where the host root filesystem is mounted in the container. + hostRootCtrPath = "/host" + // defaultDriverInstallDirCtrPath is where a containerized driver installation is + // mounted in the container. The DRA stack mounts it at the same path it occupies + // on the host, and the kubelet-plugin containers mount the same path and source + // DRIVER_ROOT_CTR_PATH from driver-ready. + defaultDriverInstallDirCtrPath = "/run/nvidia/driver" + // hostNvidiaSMIPath is the expected location of nvidia-smi within the host root. + hostNvidiaSMIPath = "/usr/bin/nvidia-smi" + // defaultSleepIntervalSeconds is the retry interval between validation attempts. + defaultSleepIntervalSeconds = 10 +) + +var ( + outputDirFlag string + hostRootFlag string + driverInstallDirFlag string + driverInstallDirCtrPathFlag string + sleepIntervalSecondsFlag int + debugFlag bool +) + +func main() { + c := cli.Command{} + c.Name = "dra-driver-validator" + c.Usage = "Validate the NVIDIA driver for the DRA driver kubelet plugin" + c.Version = info.GetVersionString() + + c.Flags = []cli.Flag{ + &cli.BoolFlag{ + Name: "debug", + Aliases: []string{"d"}, + Usage: "Enable debug-level logging", + Destination: &debugFlag, + Sources: cli.EnvVars("DEBUG"), + }, + &cli.StringFlag{ + Name: "output-dir", + Usage: "Directory where the driver-ready status file is written", + Value: defaultStatusPath, + Destination: &outputDirFlag, + Sources: cli.EnvVars("OUTPUT_DIR"), + }, + &cli.StringFlag{ + Name: "host-root", + Usage: "Root path of the host filesystem (mounted at /host in the container)", + Value: "/", + Destination: &hostRootFlag, + Sources: cli.EnvVars("HOST_ROOT"), + }, + &cli.StringFlag{ + Name: "driver-install-dir", + Usage: "Path on the host where a containerized driver installation is made available", + Value: "/run/nvidia/driver", + Destination: &driverInstallDirFlag, + Sources: cli.EnvVars("DRIVER_INSTALL_DIR"), + }, + &cli.StringFlag{ + Name: "driver-install-dir-ctr-path", + Usage: "Path where the containerized driver installation is mounted in this container", + Value: defaultDriverInstallDirCtrPath, + Destination: &driverInstallDirCtrPathFlag, + Sources: cli.EnvVars("DRIVER_INSTALL_DIR_CTR_PATH"), + }, + &cli.IntFlag{ + Name: "sleep-interval-seconds", + Usage: "Seconds to wait between validation attempts", + Value: defaultSleepIntervalSeconds, + Destination: &sleepIntervalSecondsFlag, + Sources: cli.EnvVars("SLEEP_INTERVAL_SECONDS"), + }, + } + + c.Before = func(ctx context.Context, _ *cli.Command) (context.Context, error) { + if debugFlag { + log.SetLevel(log.DebugLevel) + } + return ctx, nil + } + c.Action = run + + log.Infof("version: %s", c.Version) + + // DS pods may be terminated (SIGTERM) and re-created when the operator's driver + // container creates a mount under /run/nvidia. Exit cleanly so Kubernetes retries. + go handleSignal() + + if err := c.Run(context.Background(), os.Args); err != nil { + log.Errorf("%v", err) + os.Exit(1) + } +} + +func run(ctx context.Context, _ *cli.Command) error { + if err := os.MkdirAll(outputDirFlag, 0755); err != nil { + return fmt.Errorf("failed to create status directory %q: %w", outputDirFlag, err) + } + + for { + info, err := runValidation() + if err != nil { + log.Warningf("driver not ready, retrying in %ds: %v", sleepIntervalSecondsFlag, err) + time.Sleep(time.Duration(sleepIntervalSecondsFlag) * time.Second) + continue + } + if err := writeDriverReady(info); err != nil { + return fmt.Errorf("failed to write %s: %w", driverStatusFile, err) + } + log.Infof("driver is ready; wrote %s/%s", outputDirFlag, driverStatusFile) + return nil + } +} + +func handleSignal() { + stop := make(chan os.Signal, 1) + signal.Notify(stop, os.Interrupt, + syscall.SIGTERM, syscall.SIGHUP, syscall.SIGINT, syscall.SIGQUIT) + s := <-stop + log.Fatalf("Exiting due to signal [%v] notification for pid [%d]", s.String(), os.Getpid()) +} + +// runValidation mirrors nvidia-validator's host-then-container probe, but validates +// with `nvidia-smi --version` only. It returns the env-contract driverInfo on success. +func runValidation() (driverInfo, error) { + hostErr := validateHostDriver() + if hostErr == nil { + log.Info("Detected a pre-installed driver on the host") + return getDriverInfo(true, hostRootFlag, hostRootFlag, hostRootCtrPath), nil + } + log.Infof("No pre-installed driver detected on the host: %v", hostErr) + + log.Info("Validating containerized driver installation") + if err := validateContainerDriver(); err != nil { + return driverInfo{}, err + } + return getDriverInfo(false, hostRootFlag, driverInstallDirFlag, driverInstallDirCtrPathFlag), nil +} + +// validateHostDriver checks for a driver installed directly on the host root by +// running `nvidia-smi --version` inside the mounted host filesystem. +func validateHostDriver() error { + fileInfo, err := resolveHostNvidiaSMI(hostRootCtrPath) + if err != nil { + return err + } + if fileInfo.Size() == 0 { + return fmt.Errorf("empty 'nvidia-smi' file found on the host") + } + // chroot into the host root so nvidia-smi resolves its libraries from the host. + return runStreaming(nvidiaSMIVersionCommand("chroot", hostRootCtrPath, "nvidia-smi")) +} + +// validateContainerDriver checks a containerized driver installation mounted at +// driverInstallDirCtrPath by running `nvidia-smi --version` against it. +func validateContainerDriver() error { + driverRoot := root(driverInstallDirCtrPathFlag) + + driverLibraryPath, err := driverRoot.getDriverLibraryPath() + if err != nil { + return fmt.Errorf("failed to locate driver libraries: %w", err) + } + nvidiaSMIPath, err := driverRoot.getNvidiaSMIPath() + if err != nil { + return fmt.Errorf("failed to locate nvidia-smi: %w", err) + } + + cmd := nvidiaSMIVersionCommand(nvidiaSMIPath) + // nvidia-smi needs libnvidia-ml.so.1 on its load path. + cmd.Env = setEnvVar(os.Environ(), "LD_PRELOAD", prependPathListEnvvar("LD_PRELOAD", driverLibraryPath)) + return runStreaming(cmd) +} + +// resolveHostNvidiaSMI opens and stats nvidia-smi within the mounted host root. +func resolveHostNvidiaSMI(hostRoot string) (os.FileInfo, error) { + f, err := pathrs.OpenInRoot(hostRoot, hostNvidiaSMIPath) + if err != nil { + return nil, fmt.Errorf("failed to open 'nvidia-smi' on the host: %w", err) + } + defer f.Close() + + fileInfo, err := f.Stat() + if err != nil { + return nil, fmt.Errorf("failed to stat 'nvidia-smi' on the host: %w", err) + } + return fileInfo, nil +} + +// nvidiaSMIVersionCommand builds the nvidia-smi invocation. It always appends +// "--version": full nvidia-smi would initialize GPUs and fail/hang when GPUs are +// bound to vfio-pci for passthrough. +func nvidiaSMIVersionCommand(nvidiaSMIPath string, prefixArgs ...string) *exec.Cmd { + args := append(slices.Clone(prefixArgs), "--version") + return exec.Command(nvidiaSMIPath, args...) +} + +// runStreaming runs cmd with its output forwarded to the validator's own streams, +// so nvidia-smi output (and errors) land in the init container's logs. +func runStreaming(cmd *exec.Cmd) error { + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() +} + +// writeDriverReady writes the driver-ready env contract sourced by the +// kubelet-plugin containers. Only NVIDIA_DRIVER_ROOT and DRIVER_ROOT_CTR_PATH are +// emitted: those are the only values the DRA kubelet plugins read (see +// cmd/gpu-kubelet-plugin/main.go in k8s-dra-driver-gpu). The plugin derives its +// dev root from DRIVER_ROOT_CTR_PATH itself. +func writeDriverReady(info driverInfo) error { + content := fmt.Sprintf("NVIDIA_DRIVER_ROOT=%s\nDRIVER_ROOT_CTR_PATH=%s\n", + info.driverRoot, info.driverRootCtrPath) + + return writeStatusFile(filepath.Join(outputDirFlag, driverStatusFile), content) +} + +// writeStatusFile atomically writes content to statusFile via a temp file + rename. +func writeStatusFile(statusFile, content string) error { + tmpFile, err := os.CreateTemp(filepath.Dir(statusFile), filepath.Base(statusFile)+".*.tmp") + if err != nil { + return fmt.Errorf("failed to create temporary status file: %w", err) + } + // Best-effort cleanup; on success the rename moves the temp file away first. + defer func() { _ = os.Remove(tmpFile.Name()) }() //nolint:gosec + + if _, err := tmpFile.WriteString(content); err != nil { + tmpFile.Close() + return fmt.Errorf("failed to write temporary status file: %w", err) + } + if err := tmpFile.Close(); err != nil { + return fmt.Errorf("failed to close temporary status file: %w", err) + } + if err := os.Rename(tmpFile.Name(), statusFile); err != nil { + return fmt.Errorf("error moving temporary file to %q: %w", statusFile, err) + } + return nil +} + +func prependPathListEnvvar(envvar string, prepend ...string) string { + current := filepath.SplitList(os.Getenv(envvar)) + return strings.Join(append(prepend, current...), string(filepath.ListSeparator)) +} + +func setEnvVar(envvars []string, key, value string) []string { + envvars = slices.DeleteFunc(envvars, func(e string) bool { + k, _, _ := strings.Cut(e, "=") + return k == key + }) + return append(envvars, key+"="+value) +} diff --git a/cmd/dra-driver-validator/main_test.go b/cmd/dra-driver-validator/main_test.go new file mode 100644 index 000000000..acc9d932b --- /dev/null +++ b/cmd/dra-driver-validator/main_test.go @@ -0,0 +1,119 @@ +/* +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/ + +package main + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestWriteDriverReady(t *testing.T) { + testCases := []struct { + description string + info driverInfo + expected string + }{ + { + description: "host driver", + info: getDriverInfo(true, "/", "/run/nvidia/driver", "/run/nvidia/driver"), + expected: "NVIDIA_DRIVER_ROOT=/\n" + + "DRIVER_ROOT_CTR_PATH=/host\n", + }, + { + description: "containerized driver", + info: getDriverInfo(false, "/", "/run/nvidia/driver", "/run/nvidia/driver"), + expected: "NVIDIA_DRIVER_ROOT=/run/nvidia/driver\n" + + "DRIVER_ROOT_CTR_PATH=/run/nvidia/driver\n", + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + outputDirFlag = t.TempDir() + require.NoError(t, writeDriverReady(tc.info)) + + data, err := os.ReadFile(filepath.Join(outputDirFlag, driverStatusFile)) + require.NoError(t, err) + require.Equal(t, tc.expected, string(data)) + }) + } +} + +func TestGetDriverInfo(t *testing.T) { + t.Run("host driver", func(t *testing.T) { + // Host driver: NVIDIA_DRIVER_ROOT is the host root, read in-container via /host. + info := getDriverInfo(true, "/", "/run/nvidia/driver", "/run/nvidia/driver") + require.Equal(t, "/", info.driverRoot) + require.Equal(t, "/host", info.driverRootCtrPath) + }) + + t.Run("containerized driver", func(t *testing.T) { + // Containerized driver: emitted in-container path is the configured ctr path + // (the DRA stack mounts the driver at the same path on host and in-container), + // not nvidia-validator's fixed /driver-root. + info := getDriverInfo(false, "/", "/run/nvidia/driver", "/run/nvidia/driver") + require.Equal(t, "/run/nvidia/driver", info.driverRoot) + require.Equal(t, "/run/nvidia/driver", info.driverRootCtrPath) + }) +} + +func TestFindNvidiaBinaries(t *testing.T) { + driverRoot := t.TempDir() + writeFile(t, filepath.Join(driverRoot, "usr/bin/nvidia-smi"), "fake nvidia-smi") + writeFile(t, filepath.Join(driverRoot, "usr/lib64/libnvidia-ml.so.1"), "fake libnvidia-ml") + + r := root(driverRoot) + + smiPath, err := r.getNvidiaSMIPath() + require.NoError(t, err) + require.Equal(t, filepath.Join(driverRoot, "usr/bin/nvidia-smi"), smiPath) + + libPath, err := r.getDriverLibraryPath() + require.NoError(t, err) + require.Equal(t, filepath.Join(driverRoot, "usr/lib64/libnvidia-ml.so.1"), libPath) + + // An empty root finds neither. + empty := root(t.TempDir()) + _, err = empty.getNvidiaSMIPath() + require.Error(t, err) + _, err = empty.getDriverLibraryPath() + require.Error(t, err) +} + +// TestNvidiaSMIArgsAlwaysVersion guards the passthrough-safety invariant: the +// validator must only ever invoke `nvidia-smi --version`, never full nvidia-smi. +func TestNvidiaSMIArgsAlwaysVersion(t *testing.T) { + t.Run("container probe", func(t *testing.T) { + cmd := nvidiaSMIVersionCommand("/driver-root/usr/bin/nvidia-smi") + require.Equal(t, []string{"/driver-root/usr/bin/nvidia-smi", "--version"}, cmd.Args) + }) + + t.Run("host probe via chroot", func(t *testing.T) { + cmd := nvidiaSMIVersionCommand("chroot", "/host", "nvidia-smi") + require.Equal(t, []string{"chroot", "/host", "nvidia-smi", "--version"}, cmd.Args) + require.Equal(t, "--version", cmd.Args[len(cmd.Args)-1]) + }) +} + +func writeFile(t *testing.T, path, content string) { + t.Helper() + require.NoError(t, os.MkdirAll(filepath.Dir(path), 0750)) + require.NoError(t, os.WriteFile(path, []byte(content), 0600)) +} diff --git a/cmd/gpu-operator/main.go b/cmd/gpu-operator/main.go index 727c13ed9..abb105e09 100644 --- a/cmd/gpu-operator/main.go +++ b/cmd/gpu-operator/main.go @@ -210,6 +210,16 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "NVIDIADriver") os.Exit(1) } + + if err = (&controllers.GPUClusterConfigReconciler{ + Namespace: operatorNamespace, + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + ClusterInfo: clusterInfo, + }).SetupWithManager(ctx, mgr); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "GPUClusterConfig") + os.Exit(1) + } // +kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("health", healthz.Ping); err != nil { setupLog.Error(err, "unable to set up health check") diff --git a/config/crd/bases/nvidia.com_gpuclusterconfigs.yaml b/config/crd/bases/nvidia.com_gpuclusterconfigs.yaml new file mode 100644 index 000000000..cc3706c15 --- /dev/null +++ b/config/crd/bases/nvidia.com_gpuclusterconfigs.yaml @@ -0,0 +1,4059 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: gpuclusterconfigs.nvidia.com +spec: + group: nvidia.com + names: + kind: GPUClusterConfig + listKind: GPUClusterConfigList + plural: gpuclusterconfigs + shortNames: + - gcc + singular: gpuclusterconfig + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: Status + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: GPUClusterConfig is the Schema for the gpuclusterconfigs API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + GPUClusterConfigSpec defines the desired state of GPUClusterConfig, the DRA-based + software-enablement stack. Unlike ClusterPolicy, it does not manage the NVIDIA driver + or the device-plugin; the driver is installed separately (host-installed or via an + NVIDIADriver CR) and GPUClusterConfig waits for driver readiness before proceeding. + properties: + daemonsets: + description: |- + Daemonsets defines the common configuration applied to all DaemonSets deployed + by the GPUClusterConfig controller. + properties: + annotations: + additionalProperties: + type: string + description: |- + Optional: Annotations is an unstructured key value map stored with a resource that may be + set by external tools to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + type: object + labels: + additionalProperties: + type: string + description: |- + Optional: Map of string keys and values that can be used to organize and categorize + (scope and select) objects. May match selectors of replication controllers + and services. + type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods (applies as defaults to all containers)' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + priorityClassName: + type: string + rollingUpdate: + description: 'Optional: Configuration for rolling update of all + DaemonSet pods' + properties: + maxUnavailable: + type: string + type: object + tolerations: + description: 'Optional: Set tolerations' + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + updateStrategy: + default: RollingUpdate + enum: + - RollingUpdate + - OnDelete + type: string + type: object + dcgm: + description: |- + DCGM defines the spec for the standalone NVIDIA DCGM hostengine. Disabled by default; + when disabled, dcgm-exporter uses its embedded nv-hostengine. NOTE: the reused enabled + field carries no server-side default and its IsEnabled() treats nil as enabled, so the + controller must default nil enabled to disabled here. + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if deployment of NVIDIA DCGM Hostengine + as a separate pod is enabled. + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork indicates whether the DCGM pod uses the + host's network namespace. + type: boolean + hostPort: + description: 'Deprecated: HostPort represents host port that needs + to be bound for DCGM engine (Default: 5555)' + format: int32 + type: integer + image: + description: NVIDIA DCGM image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA DCGM image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + version: + description: NVIDIA DCGM image tag + type: string + type: object + dcgmExporter: + description: |- + DCGMExporter defines the spec for NVIDIA DCGM Exporter. Enabled by default, but the + reused enabled field carries no server-side default; the controller defaults nil enabled. + properties: + annotations: + additionalProperties: + type: string + description: |- + Optional: Annotations is an unstructured key value map stored with a resource that may be + set by external tools to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + type: object + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + config: + description: 'Optional: Custom metrics configuration for NVIDIA + DCGM Exporter' + properties: + name: + description: ConfigMap name with file dcgm-metrics.csv for + metrics to be collected by NVIDIA DCGM Exporter + type: string + type: object + enablePodLabels: + description: |- + Enable Kubernetes pod labels as Prometheus label dimensions in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enablePodUID: + description: |- + Enable Kubernetes pod UID as a Prometheus label dimension in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enabled: + description: Enabled indicates if deployment of NVIDIA DCGM Exporter + through operator is enabled + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork allows the DCGM-Exporter daemon set to + expose metrics port on the host's network namespace. + type: boolean + hostPID: + description: HostPID allows the DCGM-Exporter daemon set to access + the host's PID namespace + type: boolean + hpcJobMapping: + description: 'Optional: HPC job mapping configuration for NVIDIA + DCGM Exporter' + properties: + directory: + description: |- + Directory path where HPC job mapping files are created by the workload manager + Defaults to /var/lib/dcgm-exporter/job-mapping if not specified + type: string + enabled: + description: Enable HPC job mapping for DCGM Exporter + type: boolean + type: object + image: + description: NVIDIA DCGM Exporter image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + podLabelAllowlistRegex: + description: |- + Regex list for filtering which Kubernetes pod labels are included in DCGM exporter metrics. + Empty means all pod labels are included. + items: + type: string + type: array + repository: + description: NVIDIA DCGM Exporter image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + service: + description: 'Optional: Service configuration for NVIDIA DCGM + Exporter' + properties: + internalTrafficPolicy: + description: InternalTrafficPolicy describes how nodes distribute + service traffic they receive on the ClusterIP. + type: string + type: + description: Type represents the ServiceType which describes + ingress methods for a service + type: string + type: object + serviceMonitor: + description: 'Optional: ServiceMonitor configuration for NVIDIA + DCGM Exporter' + properties: + additionalLabels: + additionalProperties: + type: string + description: AdditionalLabels to add to ServiceMonitor instance + type: object + enabled: + description: Enabled indicates if ServiceMonitor is deployed + type: boolean + honorLabels: + description: HonorLabels chooses the metric’s labels on collisions + with target labels. + type: boolean + interval: + description: |- + Interval at which metrics should be scraped. If not specified, Prometheus’ global scrape interval is used. + Supported units: y, w, d, h, m, s, ms + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + relabelings: + description: Relabelings allows to rewrite labels on metric + sets + items: + description: |- + RelabelConfig allows dynamic rewriting of the label set for targets, alerts, + scraped samples and remote write samples. + + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + properties: + action: + default: replace + description: |- + action to perform based on the regex matching. + + `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0. + `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0. + + Default: "Replace" + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase + - keepequal + - KeepEqual + - dropequal + - DropEqual + type: string + modulus: + description: |- + modulus to take of the hash of the source label values. + + Only applicable when the action is `HashMod`. + format: int64 + type: integer + regex: + description: regex defines the regular expression against + which the extracted value is matched. + type: string + replacement: + description: |- + replacement value against which a Replace action is performed if the + regular expression matches. + + Regex capture groups are available. + type: string + separator: + description: separator defines the string between concatenated + SourceLabels. + type: string + sourceLabels: + description: |- + sourceLabels defines the source labels select values from existing labels. Their content is + concatenated using the configured Separator and matched against the + configured regular expression. + items: + description: |- + LabelName is a valid Prometheus label name. + For Prometheus 3.x, a label name is valid if it contains UTF-8 characters. + For Prometheus 2.x, a label name is only valid if it contains ASCII characters, letters, numbers, as well as underscores. + type: string + type: array + targetLabel: + description: |- + targetLabel defines the label to which the resulting string is written in a replacement. + + It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`, + `KeepEqual` and `DropEqual` actions. + + Regex capture groups are available. + type: string + type: object + type: array + scrapeTimeout: + description: |- + ScrapeTimeout to use when scraping metrics. Must not be greater than Interval. + If not specified, Prometheus' global scrape timeout is used. + Supported units: y, w, d, h, m, s, ms + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + type: object + version: + description: NVIDIA DCGM Exporter image tag + type: string + type: object + draDriver: + description: DRADriver defines the spec for the NVIDIA DRA driver + stack (gpus + computeDomains). + properties: + computeDomains: + description: ComputeDomains configures the compute-domain capability + of the DRA driver. + properties: + controller: + description: Controller configures the compute-domain controller + Deployment. + properties: + affinity: + description: Affinity rules for the controller Deployment + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the controller Deployment + pods + type: object + priorityClassName: + description: PriorityClassName for the controller Deployment + pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the controller container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the controller Deployment + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + enabled: + default: true + description: Enabled indicates if the computeDomains capability + of the DRA driver is enabled. + type: boolean + kubeletPlugin: + description: KubeletPlugin configures the kubelet-plugin workload + for the computeDomains capability. + properties: + affinity: + description: Affinity rules for the kubelet-plugin DaemonSet + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + healthcheckPort: + description: |- + HealthcheckPort is the port running a gRPC health service checked by a livenessProbe. + Set to a negative value to disable the service and the probe. + format: int32 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the kubelet-plugin DaemonSet + pods + type: object + priorityClassName: + description: PriorityClassName for the kubelet-plugin + DaemonSet pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the kubelet-plugin container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the kubelet-plugin DaemonSet + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + type: object + featureGates: + additionalProperties: + type: boolean + description: |- + FeatureGates is a map of feature gate names to a boolean enabling or disabling each. + It is rendered as the FEATURE_GATES environment variable on the DRA driver containers. + type: object + gpus: + description: GPUs configures the gpu.nvidia.com capability of + the DRA driver. + properties: + enabled: + default: true + description: Enabled indicates if the gpus capability of the + DRA driver is enabled. + type: boolean + kubeletPlugin: + description: KubeletPlugin configures the kubelet-plugin workload + for the gpus capability. + properties: + affinity: + description: Affinity rules for the kubelet-plugin DaemonSet + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + healthcheckPort: + description: |- + HealthcheckPort is the port running a gRPC health service checked by a livenessProbe. + Set to a negative value to disable the service and the probe. + format: int32 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the kubelet-plugin DaemonSet + pods + type: object + priorityClassName: + description: PriorityClassName for the kubelet-plugin + DaemonSet pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the kubelet-plugin container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the kubelet-plugin DaemonSet + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + type: object + image: + description: NVIDIA DRA driver image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA DRA driver image repository + type: string + version: + description: NVIDIA DRA driver image tag + type: string + type: object + gfd: + description: |- + GFD defines the spec for the standalone GPU Feature Discovery operand. Enabled by default, + but the reused enabled field carries no server-side default; the controller defaults nil enabled. + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if deployment of GPU Feature Discovery + Plugin is enabled. + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork indicates whether the GPU Feature Discovery + pod uses the host's network namespace. + type: boolean + image: + description: GFD image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: GFD image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + version: + description: GFD image tag + type: string + type: object + hostPaths: + description: HostPaths defines the host paths used in host-path volumes + for various components. + properties: + driverInstallDir: + description: |- + DriverInstallDir represents the root at which driver files including libraries, + config files, and executables can be found. + type: string + kubeletRootDir: + default: /var/lib/kubelet + description: |- + KubeletRootDir represents the location of the kubelet root directory. + If empty, it will default to "/var/lib/kubelet". + type: string + rootFS: + description: |- + RootFS represents the path to the root filesystem of the host. + This is used by components that need to interact with the host filesystem + and as such this must be a chroot-able filesystem. + Examples include the MIG Manager and Toolkit Container which may need to + stop, start, or restart systemd services. + type: string + type: object + required: + - draDriver + type: object + status: + description: GPUClusterConfigStatus defines the observed state of GPUClusterConfig + properties: + conditions: + description: Conditions is a list of conditions representing the GPUClusterConfig's + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + namespace: + description: Namespace indicates the namespace in which the operator + and operands are installed + type: string + state: + description: State indicates the status of the GPUClusterConfig instance + enum: + - ignored + - ready + - notReady + - disabled + type: string + required: + - state + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 3ed72c95a..b65f3fcd6 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -4,6 +4,7 @@ resources: - bases/nvidia.com_clusterpolicies.yaml - bases/nvidia.com_nvidiadrivers.yaml +- bases/nvidia.com_gpuclusterconfigs.yaml # +kubebuilder:scaffold:crdkustomizeresource patchesStrategicMerge: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 7a631b9e3..34958f58a 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -132,6 +132,7 @@ rules: - nvidia.com resources: - '*' + - gpuclusterconfigs - nvidiadrivers verbs: - create @@ -144,12 +145,14 @@ rules: - apiGroups: - nvidia.com resources: + - gpuclusterconfigs/finalizers - nvidiadrivers/finalizers verbs: - update - apiGroups: - nvidia.com resources: + - gpuclusterconfigs/status - nvidiadrivers/status verbs: - get diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index 0cb8687a3..cef26cd79 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -2,4 +2,5 @@ resources: - v1_clusterpolicy.yaml - nvidia_v1alpha1_nvidiadriver.yaml +- nvidia_v1alpha1_gpuclusterconfig.yaml # +kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/nvidia_v1alpha1_gpuclusterconfig.yaml b/config/samples/nvidia_v1alpha1_gpuclusterconfig.yaml new file mode 100644 index 000000000..ae5d975ec --- /dev/null +++ b/config/samples/nvidia_v1alpha1_gpuclusterconfig.yaml @@ -0,0 +1,99 @@ +apiVersion: nvidia.com/v1alpha1 +kind: GPUClusterConfig +metadata: + name: gpuclusterconfig-sample +spec: + draDriver: + repository: nvcr.io/nvidia + image: k8s-dra-driver-gpu + version: v0.1.0 + imagePullPolicy: IfNotPresent + imagePullSecrets: [] + # NOTE: a driver-validation init container config block (e.g. `validator:`) is + # intentionally omitted for now and will be added soon. + gpus: + enabled: true + kubeletPlugin: + env: [] + resources: {} + computeDomains: + enabled: true + controller: + env: [] + resources: {} + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + kubeletPlugin: + env: [] + resources: {} + # standalone dcgm hostengine + dcgm: + # disabled by default to use embedded nv-hostengine by exporter + enabled: false + repository: nvcr.io/nvidia/cloud-native + image: dcgm + version: 4.5.2-1-ubuntu22.04 + imagePullPolicy: IfNotPresent + imagePullSecrets: [] + args: [] + env: [] + resources: {} + hostNetwork: false + dcgmExporter: + enabled: true + repository: nvcr.io/nvidia/k8s + image: dcgm-exporter + version: 4.5.1-4.8.0-distroless + imagePullPolicy: IfNotPresent + imagePullSecrets: [] + args: [] + annotations: {} + env: [] + resources: {} + hostPID: false + hostNetwork: false + # Enable Kubernetes pod labels / pod UID as Prometheus label dimensions on exported metrics. + # NOTE: both require the controller to grant dcgm-exporter cluster-level read access to pods. + enablePodLabels: false + enablePodUID: false + # Regex allowlist filtering which pod labels are included as metric dimensions (empty = all). + podLabelAllowlistRegex: [] + service: + internalTrafficPolicy: Cluster + serviceMonitor: + enabled: false + interval: 15s + scrapeTimeout: 10s + honorLabels: false + additionalLabels: {} + relabelings: [] + gfd: + enabled: true + repository: nvcr.io/nvidia + image: k8s-device-plugin + version: v0.19.2 + imagePullPolicy: IfNotPresent + imagePullSecrets: [] + args: [] + env: [] + resources: {} + hostNetwork: false + # Host paths used in host-path volumes for various components + hostPaths: + rootFS: "/" + driverInstallDir: "/run/nvidia/driver" + kubeletRootDir: "/var/lib/kubelet" + # Common configuration for all daemonsets deployed by the GPUClusterConfig controller + daemonsets: + labels: {} + annotations: {} + priorityClassName: system-node-critical + tolerations: + - key: nvidia.com/gpu + operator: Exists + effect: NoSchedule + updateStrategy: "RollingUpdate" + rollingUpdate: + maxUnavailable: "1" diff --git a/controllers/clusterinfo/clusterinfo.go b/controllers/clusterinfo/clusterinfo.go index bf9839d9e..39d162b20 100644 --- a/controllers/clusterinfo/clusterinfo.go +++ b/controllers/clusterinfo/clusterinfo.go @@ -19,6 +19,7 @@ package clusterinfo import ( "context" "fmt" + "slices" "strings" configv1 "github.com/openshift/api/config/v1" @@ -28,6 +29,8 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/discovery" corev1client "k8s.io/client-go/kubernetes/typed/core/v1" "k8s.io/client-go/rest" "sigs.k8s.io/controller-runtime/pkg/client/config" @@ -42,6 +45,7 @@ type Interface interface { GetOpenshiftVersion() (string, error) GetOpenshiftDriverToolkitImages() map[string]string GetOpenshiftProxySpec() (*configv1.ProxySpec, error) + GetDRAResourceGVR() (schema.GroupVersionResource, bool, error) } type clusterInfo struct { ctx context.Context @@ -52,6 +56,8 @@ type clusterInfo struct { openshiftVersion string openshiftDriverToolkitImages map[string]string proxySpec *configv1.ProxySpec + draResourceGVR schema.GroupVersionResource + draSupported bool } // New creates a new instance of clusterinfo API @@ -87,6 +93,13 @@ func New(ctx context.Context, opts ...Option) (Interface, error) { l.openshiftDriverToolkitImages = getOpenshiftDTKImages(ctx, l.config) + draResourceGVR, draSupported, err := getDRAResourceGVR(ctx, l.config) + if err != nil { + return nil, fmt.Errorf("failed to determine DRA support: %w", err) + } + l.draResourceGVR = draResourceGVR + l.draSupported = draSupported + return l, nil } @@ -145,6 +158,59 @@ func (l *clusterInfo) GetOpenshiftProxySpec() (*configv1.ProxySpec, error) { return getOpenshiftProxySpec(l.ctx, l.config) } +// GetDRAResourceGVR returns the GroupVersionResource to use for resource.k8s.io +// DeviceClass objects and whether DRA is supported in the cluster. When DRA is +// not supported the returned bool is false and the GVR is empty. +func (l *clusterInfo) GetDRAResourceGVR() (schema.GroupVersionResource, bool, error) { + if l.oneshot { + return l.draResourceGVR, l.draSupported, nil + } + + return getDRAResourceGVR(l.ctx, l.config) +} + +// getDRAResourceGVR discovers whether the cluster serves the resource.k8s.io +// DeviceClass resource and, if so, the preferred version to use (v1 over v1beta2 +// over v1beta1). +func getDRAResourceGVR(ctx context.Context, config *rest.Config) (schema.GroupVersionResource, bool, error) { + logger := log.FromContext(ctx) + var gvr schema.GroupVersionResource + + discoveryClient, err := discovery.NewDiscoveryClientForConfig(config) + if err != nil { + return gvr, false, fmt.Errorf("error building discovery client: %w", err) + } + + apiResourceLists, err := discoveryClient.ServerPreferredResources() + if err != nil { + return gvr, false, fmt.Errorf("error getting server resources from discovery client: %w", err) + } + + var groupVersions []string + for _, list := range apiResourceLists { + for _, resource := range list.APIResources { + if resource.Kind == "DeviceClass" { + groupVersions = append(groupVersions, list.GroupVersion) + } + } + } + + if len(groupVersions) == 0 { + return gvr, false, nil + } + logger.V(consts.LogLevelInfo).Info("Discovered DeviceClass resource", + "groupVersions", strings.Join(groupVersions, ", ")) + + for _, version := range []string{"v1", "v1beta2", "v1beta1"} { + if slices.Contains(groupVersions, "resource.k8s.io/"+version) { + return schema.GroupVersionResource{Group: "resource.k8s.io", Version: version, Resource: "deviceclasses"}, true, nil + } + } + + return gvr, false, fmt.Errorf("could not determine the GVR for the DeviceClass resource from discovered group/versions: %s", + strings.Join(groupVersions, ", ")) +} + func getContainerRuntime(ctx context.Context, config *rest.Config) (string, error) { logger := log.FromContext(ctx) diff --git a/controllers/gpuclusterconfig_controller.go b/controllers/gpuclusterconfig_controller.go new file mode 100644 index 000000000..7f3bd3f17 --- /dev/null +++ b/controllers/gpuclusterconfig_controller.go @@ -0,0 +1,233 @@ +/* +Copyright 2025. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "context" + "fmt" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/workqueue" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" + + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" + "github.com/NVIDIA/gpu-operator/controllers/clusterinfo" + "github.com/NVIDIA/gpu-operator/internal/conditions" + "github.com/NVIDIA/gpu-operator/internal/consts" + "github.com/NVIDIA/gpu-operator/internal/state" +) + +// GPUClusterConfigReconciler reconciles a GPUClusterConfig object +type GPUClusterConfigReconciler struct { + client.Client + Scheme *runtime.Scheme + ClusterInfo clusterinfo.Interface + Namespace string + + stateManager state.Manager + conditionUpdater conditions.Updater + + // singleton is the GPUClusterConfig that owns reconciliation; the first instance to + // reconcile claims it (first-wins), mirroring ClusterPolicy. + singleton *nvidiav1alpha1.GPUClusterConfig +} + +//+kubebuilder:rbac:groups=nvidia.com,resources=gpuclusterconfigs,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=nvidia.com,resources=gpuclusterconfigs/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=nvidia.com,resources=gpuclusterconfigs/finalizers,verbs=update + +func (r *GPUClusterConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + logger.V(consts.LogLevelInfo).Info("Reconciling GPUClusterConfig") + + instance := &nvidiav1alpha1.GPUClusterConfig{} + if err := r.Get(ctx, req.NamespacedName, instance); err != nil { + if apierrors.IsNotFound(err) { + // Deleted; owned objects are garbage-collected, so there is nothing to clean up. + return ctrl.Result{}, nil + } + wrappedErr := fmt.Errorf("error getting GPUClusterConfig object: %w", err) + logger.Error(err, "error getting GPUClusterConfig object") + instance.Status.State = nvidiav1alpha1.NotReady + if condErr := r.conditionUpdater.SetConditionsError(ctx, instance, conditions.ReconcileFailed, wrappedErr.Error()); condErr != nil { + logger.Error(condErr, "failed to set condition") + } + return ctrl.Result{}, wrappedErr + } + + // Singleton, first-wins (mirroring ClusterPolicy): the first instance to reconcile + // claims ownership; any other instance is marked Ignored and skipped. The owner is + // held in memory, so the choice resets on operator restart. + if r.singleton != nil && r.singleton.Name != instance.Name { + logger.V(consts.LogLevelWarning).Info("Multiple GPUClusterConfig instances found, ignoring this one", + "name", instance.Name, "owner", r.singleton.Name) + if err := r.updateCrStatus(ctx, instance, nvidiav1alpha1.Ignored); err != nil { + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + r.singleton = instance + + infoCatalog := state.NewInfoCatalog() + infoCatalog.Add(state.InfoTypeClusterInfo, r.ClusterInfo) + + managerStatus := r.stateManager.SyncState(ctx, instance, infoCatalog) + + if err := r.updateCrStatus(ctx, instance, nvidiav1alpha1.State(managerStatus.Status)); err != nil { + return ctrl.Result{}, err + } + + if managerStatus.Status != state.SyncStateReady { + logger.Info("GPUClusterConfig instance is not ready") + var errorInfo error + for _, result := range managerStatus.StatesStatus { + if result.Status != state.SyncStateReady && result.ErrInfo != nil { + errorInfo = result.ErrInfo + if condErr := r.conditionUpdater.SetConditionsError(ctx, instance, conditions.ReconcileFailed, fmt.Sprintf("Error syncing state %s: %v", result.StateName, errorInfo)); condErr != nil { + logger.Error(condErr, "failed to set condition") + } + break + } + } + // if no errors are reported from any state, then we are waiting on operand pods + if errorInfo == nil { + if condErr := r.conditionUpdater.SetConditionsError(ctx, instance, conditions.OperandNotReady, "Waiting for operand pods to be ready"); condErr != nil { + logger.Error(condErr, "failed to set condition") + } + } + return ctrl.Result{RequeueAfter: time.Second * 5}, nil + } + + if condErr := r.conditionUpdater.SetConditionsReady(ctx, instance, conditions.Reconciled, "All resources have been successfully reconciled"); condErr != nil { + logger.Error(condErr, "failed to set condition") + return ctrl.Result{}, condErr + } + return ctrl.Result{}, nil +} + +// updateCrStatus writes desired to the CR's status, skipping the write when it is already current. +func (r *GPUClusterConfigReconciler) updateCrStatus(ctx context.Context, cr *nvidiav1alpha1.GPUClusterConfig, desired nvidiav1alpha1.State) error { + reqLogger := log.FromContext(ctx) + + // Refetch to avoid a resourceVersion conflict. + instance := &nvidiav1alpha1.GPUClusterConfig{} + if err := r.Get(ctx, types.NamespacedName{Name: cr.Name}, instance); err != nil { + reqLogger.Error(err, "Failed to get GPUClusterConfig instance for status update") + return err + } + + if instance.Status.State == desired && instance.Status.Namespace == r.Namespace { + return nil + } + instance.Status.State = desired + instance.Status.Namespace = r.Namespace + + reqLogger.V(consts.LogLevelInfo).Info("Updating CR Status", "Status", instance.Status) + if err := r.Status().Update(ctx, instance); err != nil { + reqLogger.Error(err, "Failed to update CR status") + return err + } + cr.Status.State = instance.Status.State + cr.Status.Namespace = instance.Status.Namespace + return nil +} + +// enqueueAllGPUClusterConfigs enqueues every instance so each is reconciled when any +// instance or owned resource changes. +func (r *GPUClusterConfigReconciler) enqueueAllGPUClusterConfigs(ctx context.Context) []reconcile.Request { + logger := log.FromContext(ctx) + list := &nvidiav1alpha1.GPUClusterConfigList{} + + if err := r.List(ctx, list); err != nil { + logger.Error(err, "Unable to list GPUClusterConfig resources") + return []reconcile.Request{} + } + + reconcileRequests := make([]reconcile.Request, 0, len(list.Items)) + for _, config := range list.Items { + reconcileRequests = append(reconcileRequests, + reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: config.GetName(), + }, + }) + } + + return reconcileRequests +} + +func (r *GPUClusterConfigReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { + // The state manager renders the DRA driver operand for the GPUClusterConfig. + stateManager, err := state.NewManager( + nvidiav1alpha1.GPUClusterConfigCRDName, + r.Namespace, + mgr.GetClient(), + mgr.GetScheme()) + if err != nil { + return fmt.Errorf("error creating state manager: %v", err) + } + r.stateManager = stateManager + + r.conditionUpdater = conditions.NewGPUClusterConfigUpdater(mgr.GetClient()) + + c, err := controller.New("gpu-cluster-config-controller", mgr, controller.Options{ + Reconciler: r, + MaxConcurrentReconciles: 1, + RateLimiter: workqueue.NewTypedItemExponentialFailureRateLimiter[reconcile.Request](minDelayCR, maxDelayCR), + }) + if err != nil { + return err + } + + gpuClusterConfigMapFn := func(ctx context.Context, _ *nvidiav1alpha1.GPUClusterConfig) []reconcile.Request { + return r.enqueueAllGPUClusterConfigs(ctx) + } + + err = c.Watch(source.Kind( + mgr.GetCache(), + &nvidiav1alpha1.GPUClusterConfig{}, + handler.TypedEnqueueRequestsFromMapFunc(gpuClusterConfigMapFn), + predicate.TypedGenerationChangedPredicate[*nvidiav1alpha1.GPUClusterConfig]{}, + ), + ) + if err != nil { + return err + } + + // Watch the secondary resources each state manager owns. + watchSources := stateManager.GetWatchSources(mgr) + for _, watchSource := range watchSources { + err = c.Watch( + watchSource, + ) + if err != nil { + return fmt.Errorf("error setting up Watch for source type %v: %w", watchSource, err) + } + } + + return nil +} diff --git a/controllers/gpuclusterconfig_controller_test.go b/controllers/gpuclusterconfig_controller_test.go new file mode 100644 index 000000000..4378ba831 --- /dev/null +++ b/controllers/gpuclusterconfig_controller_test.go @@ -0,0 +1,156 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package controllers + +import ( + "context" + "sort" + "testing" + + "github.com/stretchr/testify/require" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" + "github.com/NVIDIA/gpu-operator/internal/conditions" + "github.com/NVIDIA/gpu-operator/internal/state" +) + +// newGPUClusterConfigReconciler builds a reconciler over a fake client seeded with objs. The +// status subresource is registered so Status().Update persists. +func newGPUClusterConfigReconciler(t *testing.T, objs ...client.Object) (*GPUClusterConfigReconciler, client.Client) { + t.Helper() + + scheme := runtime.NewScheme() + require.NoError(t, nvidiav1alpha1.AddToScheme(scheme)) + + c := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(objs...). + WithStatusSubresource(&nvidiav1alpha1.GPUClusterConfig{}). + Build() + + return &GPUClusterConfigReconciler{ + Client: c, + Scheme: scheme, + Namespace: "test-namespace", + stateManager: &fakeStateManager{results: state.Results{Status: state.SyncStateReady}}, + conditionUpdater: &FakeConditionUpdater{}, + }, c +} + +// fakeStateManager returns canned SyncState results so the controller tests don't load +// real manifests. GetWatchSources is promoted from the embedded (nil) interface and is +// never called here — only SetupWithManager calls it, which these tests skip. +type fakeStateManager struct { + state.Manager + results state.Results +} + +func (f *fakeStateManager) SyncState(_ context.Context, _ interface{}, _ state.InfoCatalog) state.Results { + return f.results +} + +func gccReconcile(t *testing.T, r *GPUClusterConfigReconciler, name string) { + t.Helper() + _, err := r.Reconcile(context.Background(), gccRequest(name)) + require.NoError(t, err) +} + +func gccRequest(name string) ctrl.Request { + return ctrl.Request{NamespacedName: types.NamespacedName{Name: name}} +} + +func gccState(t *testing.T, c client.Client, name string) nvidiav1alpha1.State { + t.Helper() + instance := &nvidiav1alpha1.GPUClusterConfig{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: name}, instance)) + return instance.Status.State +} + +// Empty state set, so SyncState reports ready. +func TestGPUClusterConfigReconcileReady(t *testing.T) { + cfg := &nvidiav1alpha1.GPUClusterConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "config"}, + } + r, c := newGPUClusterConfigReconciler(t, cfg) + + gccReconcile(t, r, cfg.Name) + + instance := &nvidiav1alpha1.GPUClusterConfig{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: cfg.Name}, instance)) + require.Equal(t, nvidiav1alpha1.Ready, instance.Status.State) + require.Equal(t, "test-namespace", instance.Status.Namespace) +} + +func TestGPUClusterConfigReconcileNotFound(t *testing.T) { + r, _ := newGPUClusterConfigReconciler(t) + + res, err := r.Reconcile(context.Background(), gccRequest("missing")) + require.NoError(t, err) + require.Zero(t, res) +} + +// First-reconciled wins (mirroring ClusterPolicy): whichever instance reconciles first +// claims ownership, regardless of name or creationTimestamp. +func TestGPUClusterConfigSingleton(t *testing.T) { + first := &nvidiav1alpha1.GPUClusterConfig{ObjectMeta: metav1.ObjectMeta{Name: "first"}} + second := &nvidiav1alpha1.GPUClusterConfig{ObjectMeta: metav1.ObjectMeta{Name: "second"}} + r, c := newGPUClusterConfigReconciler(t, first, second) + + gccReconcile(t, r, first.Name) + require.Equal(t, nvidiav1alpha1.Ready, gccState(t, c, first.Name)) + + gccReconcile(t, r, second.Name) + require.Equal(t, nvidiav1alpha1.Ignored, gccState(t, c, second.Name)) +} + +// Matching ClusterPolicy, an ignored duplicate carries no status condition. +func TestGPUClusterConfigDuplicateNoCondition(t *testing.T) { + owner := &nvidiav1alpha1.GPUClusterConfig{ObjectMeta: metav1.ObjectMeta{Name: "owner"}} + duplicate := &nvidiav1alpha1.GPUClusterConfig{ObjectMeta: metav1.ObjectMeta{Name: "duplicate"}} + r, c := newGPUClusterConfigReconciler(t, owner, duplicate) + r.conditionUpdater = conditions.NewGPUClusterConfigUpdater(c) + + gccReconcile(t, r, owner.Name) // owner reconciles first, claiming ownership + gccReconcile(t, r, duplicate.Name) // duplicate is ignored + + instance := &nvidiav1alpha1.GPUClusterConfig{} + require.NoError(t, c.Get(context.Background(), types.NamespacedName{Name: duplicate.Name}, instance)) + require.Equal(t, nvidiav1alpha1.Ignored, instance.Status.State) + require.Nil(t, meta.FindStatusCondition(instance.Status.Conditions, conditions.Error)) + require.Nil(t, meta.FindStatusCondition(instance.Status.Conditions, conditions.Ready)) +} + +func TestEnqueueAllGPUClusterConfigs(t *testing.T) { + r, _ := newGPUClusterConfigReconciler(t, + &nvidiav1alpha1.GPUClusterConfig{ObjectMeta: metav1.ObjectMeta{Name: "config-a"}}, + &nvidiav1alpha1.GPUClusterConfig{ObjectMeta: metav1.ObjectMeta{Name: "config-b"}}, + ) + + requests := r.enqueueAllGPUClusterConfigs(context.Background()) + + require.Len(t, requests, 2) + got := []string{requests[0].String(), requests[1].String()} + sort.Strings(got) + require.Equal(t, []string{"/config-a", "/config-b"}, got) +} diff --git a/deployments/gpu-operator/crds/nvidia.com_gpuclusterconfigs.yaml b/deployments/gpu-operator/crds/nvidia.com_gpuclusterconfigs.yaml new file mode 100644 index 000000000..cc3706c15 --- /dev/null +++ b/deployments/gpu-operator/crds/nvidia.com_gpuclusterconfigs.yaml @@ -0,0 +1,4059 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.20.1 + name: gpuclusterconfigs.nvidia.com +spec: + group: nvidia.com + names: + kind: GPUClusterConfig + listKind: GPUClusterConfigList + plural: gpuclusterconfigs + shortNames: + - gcc + singular: gpuclusterconfig + scope: Cluster + versions: + - additionalPrinterColumns: + - jsonPath: .status.state + name: Status + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: string + name: v1alpha1 + schema: + openAPIV3Schema: + description: GPUClusterConfig is the Schema for the gpuclusterconfigs API + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: |- + GPUClusterConfigSpec defines the desired state of GPUClusterConfig, the DRA-based + software-enablement stack. Unlike ClusterPolicy, it does not manage the NVIDIA driver + or the device-plugin; the driver is installed separately (host-installed or via an + NVIDIADriver CR) and GPUClusterConfig waits for driver readiness before proceeding. + properties: + daemonsets: + description: |- + Daemonsets defines the common configuration applied to all DaemonSets deployed + by the GPUClusterConfig controller. + properties: + annotations: + additionalProperties: + type: string + description: |- + Optional: Annotations is an unstructured key value map stored with a resource that may be + set by external tools to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + type: object + labels: + additionalProperties: + type: string + description: |- + Optional: Map of string keys and values that can be used to organize and categorize + (scope and select) objects. May match selectors of replication controllers + and services. + type: object + podSecurityContext: + description: 'Optional: Set pod-level security context for all + DaemonSet pods (applies as defaults to all containers)' + properties: + appArmorProfile: + description: |- + appArmorProfile is the AppArmor options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile loaded on the node that should be used. + The profile must be preconfigured on the node to work. + Must match the loaded name of the profile. + Must be set if and only if type is "Localhost". + type: string + type: + description: |- + type indicates which kind of AppArmor profile will be applied. + Valid options are: + Localhost - a profile pre-loaded on the node. + RuntimeDefault - the container runtime's default profile. + Unconfined - no AppArmor enforcement. + type: string + required: + - type + type: object + fsGroup: + description: |- + A special supplemental group that applies to all containers in a pod. + Some volume types allow the Kubelet to change the ownership of that volume + to be owned by the pod: + + 1. The owning GID will be the FSGroup + 2. The setgid bit is set (new files created in the volume will be owned by FSGroup) + 3. The permission bits are OR'd with rw-rw---- + + If unset, the Kubelet will not modify the ownership and permissions of any volume. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + fsGroupChangePolicy: + description: |- + fsGroupChangePolicy defines behavior of changing ownership and permission of the volume + before being exposed inside Pod. This field will only apply to + volume types which support fsGroup based ownership(and permissions). + It will have no effect on ephemeral volume types such as: secret, configmaps + and emptydir. + Valid values are "OnRootMismatch" and "Always". If not specified, "Always" is used. + Note that this field cannot be set when spec.os.name is windows. + type: string + runAsGroup: + description: |- + The GID to run the entrypoint of the container process. + Uses runtime default if unset. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + runAsNonRoot: + description: |- + Indicates that the container must run as a non-root user. + If true, the Kubelet will validate the image at runtime to ensure that it + does not run as UID 0 (root) and fail to start the container if it does. + If unset or false, no such validation will be performed. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: boolean + runAsUser: + description: |- + The UID to run the entrypoint of the container process. + Defaults to user specified in image metadata if unspecified. + May also be set in SecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence + for that container. + Note that this field cannot be set when spec.os.name is windows. + format: int64 + type: integer + seLinuxChangePolicy: + description: |- + seLinuxChangePolicy defines how the container's SELinux label is applied to all volumes used by the Pod. + It has no effect on nodes that do not support SELinux or to volumes does not support SELinux. + Valid values are "MountOption" and "Recursive". + + "Recursive" means relabeling of all files on all Pod volumes by the container runtime. + This may be slow for large volumes, but allows mixing privileged and unprivileged Pods sharing the same volume on the same node. + + "MountOption" mounts all eligible Pod volumes with `-o context` mount option. + This requires all Pods that share the same volume to use the same SELinux label. + It is not possible to share the same volume among privileged and unprivileged Pods. + Eligible volumes are in-tree FibreChannel and iSCSI volumes, and all CSI volumes + whose CSI driver announces SELinux support by setting spec.seLinuxMount: true in their + CSIDriver instance. Other volumes are always re-labelled recursively. + "MountOption" value is allowed only when SELinuxMount feature gate is enabled. + + If not specified and SELinuxMount feature gate is enabled, "MountOption" is used. + If not specified and SELinuxMount feature gate is disabled, "MountOption" is used for ReadWriteOncePod volumes + and "Recursive" for all other volumes. + + This field affects only Pods that have SELinux label set, either in PodSecurityContext or in SecurityContext of all containers. + + All Pods that use the same volume should use the same seLinuxChangePolicy, otherwise some pods can get stuck in ContainerCreating state. + Note that this field cannot be set when spec.os.name is windows. + type: string + seLinuxOptions: + description: |- + The SELinux context to be applied to all containers. + If unspecified, the container runtime will allocate a random SELinux context for each + container. May also be set in SecurityContext. If set in + both SecurityContext and PodSecurityContext, the value specified in SecurityContext + takes precedence for that container. + Note that this field cannot be set when spec.os.name is windows. + properties: + level: + description: Level is SELinux level label that applies + to the container. + type: string + role: + description: Role is a SELinux role label that applies + to the container. + type: string + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + seccompProfile: + description: |- + The seccomp options to use by the containers in this pod. + Note that this field cannot be set when spec.os.name is windows. + properties: + localhostProfile: + description: |- + localhostProfile indicates a profile defined in a file on the node should be used. + The profile must be preconfigured on the node to work. + Must be a descending path, relative to the kubelet's configured seccomp profile location. + Must be set if type is "Localhost". Must NOT be set for any other type. + type: string + type: + description: |- + type indicates which kind of seccomp profile will be applied. + Valid options are: + + Localhost - a profile defined in a file on the node should be used. + RuntimeDefault - the container runtime default profile should be used. + Unconfined - no profile should be applied. + type: string + required: + - type + type: object + supplementalGroups: + description: |- + A list of groups applied to the first process run in each container, in + addition to the container's primary GID and fsGroup (if specified). If + the SupplementalGroupsPolicy feature is enabled, the + supplementalGroupsPolicy field determines whether these are in addition + to or instead of any group memberships defined in the container image. + If unspecified, no additional groups are added, though group memberships + defined in the container image may still be used, depending on the + supplementalGroupsPolicy field. + Note that this field cannot be set when spec.os.name is windows. + items: + format: int64 + type: integer + type: array + x-kubernetes-list-type: atomic + supplementalGroupsPolicy: + description: |- + Defines how supplemental groups of the first container processes are calculated. + Valid values are "Merge" and "Strict". If not specified, "Merge" is used. + (Alpha) Using the field requires the SupplementalGroupsPolicy feature gate to be enabled + and the container runtime must implement support for this feature. + Note that this field cannot be set when spec.os.name is windows. + type: string + sysctls: + description: |- + Sysctls hold a list of namespaced sysctls used for the pod. Pods with unsupported + sysctls (by the container runtime) might fail to launch. + Note that this field cannot be set when spec.os.name is windows. + items: + description: Sysctl defines a kernel parameter to be set + properties: + name: + description: Name of a property to set + type: string + value: + description: Value of a property to set + type: string + required: + - name + - value + type: object + type: array + x-kubernetes-list-type: atomic + windowsOptions: + description: |- + The Windows specific settings applied to all containers. + If unspecified, the options within a container's SecurityContext will be used. + If set in both SecurityContext and PodSecurityContext, the value specified in SecurityContext takes precedence. + Note that this field cannot be set when spec.os.name is linux. + properties: + gmsaCredentialSpec: + description: |- + GMSACredentialSpec is where the GMSA admission webhook + (https://github.com/kubernetes-sigs/windows-gmsa) inlines the contents of the + GMSA credential spec named by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + hostProcess: + description: |- + HostProcess determines if a container should be run as a 'Host Process' container. + All of a Pod's containers must have the same effective HostProcess value + (it is not allowed to have a mix of HostProcess containers and non-HostProcess containers). + In addition, if HostProcess is true then HostNetwork must also be set to true. + type: boolean + runAsUserName: + description: |- + The UserName in Windows to run the entrypoint of the container process. + Defaults to the user specified in image metadata if unspecified. + May also be set in PodSecurityContext. If set in both SecurityContext and + PodSecurityContext, the value specified in SecurityContext takes precedence. + type: string + type: object + type: object + priorityClassName: + type: string + rollingUpdate: + description: 'Optional: Configuration for rolling update of all + DaemonSet pods' + properties: + maxUnavailable: + type: string + type: object + tolerations: + description: 'Optional: Set tolerations' + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + updateStrategy: + default: RollingUpdate + enum: + - RollingUpdate + - OnDelete + type: string + type: object + dcgm: + description: |- + DCGM defines the spec for the standalone NVIDIA DCGM hostengine. Disabled by default; + when disabled, dcgm-exporter uses its embedded nv-hostengine. NOTE: the reused enabled + field carries no server-side default and its IsEnabled() treats nil as enabled, so the + controller must default nil enabled to disabled here. + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if deployment of NVIDIA DCGM Hostengine + as a separate pod is enabled. + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork indicates whether the DCGM pod uses the + host's network namespace. + type: boolean + hostPort: + description: 'Deprecated: HostPort represents host port that needs + to be bound for DCGM engine (Default: 5555)' + format: int32 + type: integer + image: + description: NVIDIA DCGM image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA DCGM image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + version: + description: NVIDIA DCGM image tag + type: string + type: object + dcgmExporter: + description: |- + DCGMExporter defines the spec for NVIDIA DCGM Exporter. Enabled by default, but the + reused enabled field carries no server-side default; the controller defaults nil enabled. + properties: + annotations: + additionalProperties: + type: string + description: |- + Optional: Annotations is an unstructured key value map stored with a resource that may be + set by external tools to store and retrieve arbitrary metadata. They are not + queryable and should be preserved when modifying objects. + type: object + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + config: + description: 'Optional: Custom metrics configuration for NVIDIA + DCGM Exporter' + properties: + name: + description: ConfigMap name with file dcgm-metrics.csv for + metrics to be collected by NVIDIA DCGM Exporter + type: string + type: object + enablePodLabels: + description: |- + Enable Kubernetes pod labels as Prometheus label dimensions in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enablePodUID: + description: |- + Enable Kubernetes pod UID as a Prometheus label dimension in DCGM exporter metrics. + (Requires cluster-level read access to pods.) + type: boolean + enabled: + description: Enabled indicates if deployment of NVIDIA DCGM Exporter + through operator is enabled + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork allows the DCGM-Exporter daemon set to + expose metrics port on the host's network namespace. + type: boolean + hostPID: + description: HostPID allows the DCGM-Exporter daemon set to access + the host's PID namespace + type: boolean + hpcJobMapping: + description: 'Optional: HPC job mapping configuration for NVIDIA + DCGM Exporter' + properties: + directory: + description: |- + Directory path where HPC job mapping files are created by the workload manager + Defaults to /var/lib/dcgm-exporter/job-mapping if not specified + type: string + enabled: + description: Enable HPC job mapping for DCGM Exporter + type: boolean + type: object + image: + description: NVIDIA DCGM Exporter image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + podLabelAllowlistRegex: + description: |- + Regex list for filtering which Kubernetes pod labels are included in DCGM exporter metrics. + Empty means all pod labels are included. + items: + type: string + type: array + repository: + description: NVIDIA DCGM Exporter image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + service: + description: 'Optional: Service configuration for NVIDIA DCGM + Exporter' + properties: + internalTrafficPolicy: + description: InternalTrafficPolicy describes how nodes distribute + service traffic they receive on the ClusterIP. + type: string + type: + description: Type represents the ServiceType which describes + ingress methods for a service + type: string + type: object + serviceMonitor: + description: 'Optional: ServiceMonitor configuration for NVIDIA + DCGM Exporter' + properties: + additionalLabels: + additionalProperties: + type: string + description: AdditionalLabels to add to ServiceMonitor instance + type: object + enabled: + description: Enabled indicates if ServiceMonitor is deployed + type: boolean + honorLabels: + description: HonorLabels chooses the metric’s labels on collisions + with target labels. + type: boolean + interval: + description: |- + Interval at which metrics should be scraped. If not specified, Prometheus’ global scrape interval is used. + Supported units: y, w, d, h, m, s, ms + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + relabelings: + description: Relabelings allows to rewrite labels on metric + sets + items: + description: |- + RelabelConfig allows dynamic rewriting of the label set for targets, alerts, + scraped samples and remote write samples. + + More info: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config + properties: + action: + default: replace + description: |- + action to perform based on the regex matching. + + `Uppercase` and `Lowercase` actions require Prometheus >= v2.36.0. + `DropEqual` and `KeepEqual` actions require Prometheus >= v2.41.0. + + Default: "Replace" + enum: + - replace + - Replace + - keep + - Keep + - drop + - Drop + - hashmod + - HashMod + - labelmap + - LabelMap + - labeldrop + - LabelDrop + - labelkeep + - LabelKeep + - lowercase + - Lowercase + - uppercase + - Uppercase + - keepequal + - KeepEqual + - dropequal + - DropEqual + type: string + modulus: + description: |- + modulus to take of the hash of the source label values. + + Only applicable when the action is `HashMod`. + format: int64 + type: integer + regex: + description: regex defines the regular expression against + which the extracted value is matched. + type: string + replacement: + description: |- + replacement value against which a Replace action is performed if the + regular expression matches. + + Regex capture groups are available. + type: string + separator: + description: separator defines the string between concatenated + SourceLabels. + type: string + sourceLabels: + description: |- + sourceLabels defines the source labels select values from existing labels. Their content is + concatenated using the configured Separator and matched against the + configured regular expression. + items: + description: |- + LabelName is a valid Prometheus label name. + For Prometheus 3.x, a label name is valid if it contains UTF-8 characters. + For Prometheus 2.x, a label name is only valid if it contains ASCII characters, letters, numbers, as well as underscores. + type: string + type: array + targetLabel: + description: |- + targetLabel defines the label to which the resulting string is written in a replacement. + + It is mandatory for `Replace`, `HashMod`, `Lowercase`, `Uppercase`, + `KeepEqual` and `DropEqual` actions. + + Regex capture groups are available. + type: string + type: object + type: array + scrapeTimeout: + description: |- + ScrapeTimeout to use when scraping metrics. Must not be greater than Interval. + If not specified, Prometheus' global scrape timeout is used. + Supported units: y, w, d, h, m, s, ms + pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$ + type: string + type: object + version: + description: NVIDIA DCGM Exporter image tag + type: string + type: object + draDriver: + description: DRADriver defines the spec for the NVIDIA DRA driver + stack (gpus + computeDomains). + properties: + computeDomains: + description: ComputeDomains configures the compute-domain capability + of the DRA driver. + properties: + controller: + description: Controller configures the compute-domain controller + Deployment. + properties: + affinity: + description: Affinity rules for the controller Deployment + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the controller Deployment + pods + type: object + priorityClassName: + description: PriorityClassName for the controller Deployment + pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the controller container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the controller Deployment + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + enabled: + default: true + description: Enabled indicates if the computeDomains capability + of the DRA driver is enabled. + type: boolean + kubeletPlugin: + description: KubeletPlugin configures the kubelet-plugin workload + for the computeDomains capability. + properties: + affinity: + description: Affinity rules for the kubelet-plugin DaemonSet + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + healthcheckPort: + description: |- + HealthcheckPort is the port running a gRPC health service checked by a livenessProbe. + Set to a negative value to disable the service and the probe. + format: int32 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the kubelet-plugin DaemonSet + pods + type: object + priorityClassName: + description: PriorityClassName for the kubelet-plugin + DaemonSet pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the kubelet-plugin container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the kubelet-plugin DaemonSet + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + type: object + featureGates: + additionalProperties: + type: boolean + description: |- + FeatureGates is a map of feature gate names to a boolean enabling or disabling each. + It is rendered as the FEATURE_GATES environment variable on the DRA driver containers. + type: object + gpus: + description: GPUs configures the gpu.nvidia.com capability of + the DRA driver. + properties: + enabled: + default: true + description: Enabled indicates if the gpus capability of the + DRA driver is enabled. + type: boolean + kubeletPlugin: + description: KubeletPlugin configures the kubelet-plugin workload + for the gpus capability. + properties: + affinity: + description: Affinity rules for the kubelet-plugin DaemonSet + pods + properties: + nodeAffinity: + description: Describes node affinity scheduling rules + for the pod. + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node matches the corresponding matchExpressions; the + node(s) with the highest sum are the most preferred. + items: + description: |- + An empty preferred scheduling term matches all objects with implicit weight 0 + (i.e. it's a no-op). A null preferred scheduling term matches no objects (i.e. is also a no-op). + properties: + preference: + description: A node selector term, associated + with the corresponding weight. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + weight: + description: Weight associated with matching + the corresponding nodeSelectorTerm, in + the range 1-100. + format: int32 + type: integer + required: + - preference + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to an update), the system + may or may not try to eventually evict the pod from its node. + properties: + nodeSelectorTerms: + description: Required. A list of node selector + terms. The terms are ORed. + items: + description: |- + A null or empty node selector term matches no objects. The requirements of + them are ANDed. + The TopologySelectorTerm type implements a subset of the NodeSelectorTerm. + properties: + matchExpressions: + description: A list of node selector + requirements by node's labels. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchFields: + description: A list of node selector + requirements by node's fields. + items: + description: |- + A node selector requirement is a selector that contains values, a key, and an operator + that relates the key and values. + properties: + key: + description: The label key that + the selector applies to. + type: string + operator: + description: |- + Represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists, DoesNotExist. Gt, and Lt. + type: string + values: + description: |- + An array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. If the operator is Gt or Lt, the values + array must have a single element, which will be interpreted as an integer. + This array is replaced during a strategic merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + type: object + x-kubernetes-map-type: atomic + type: array + x-kubernetes-list-type: atomic + required: + - nodeSelectorTerms + type: object + x-kubernetes-map-type: atomic + type: object + podAffinity: + description: Describes pod affinity scheduling rules + (e.g. co-locate this pod in the same node, zone, + etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling affinity expressions, etc.), + compute a sum by iterating through the elements of this field and adding + "weight" to the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + podAntiAffinity: + description: Describes pod anti-affinity scheduling + rules (e.g. avoid putting this pod in the same node, + zone, etc. as some other pod(s)). + properties: + preferredDuringSchedulingIgnoredDuringExecution: + description: |- + The scheduler will prefer to schedule pods to nodes that satisfy + the anti-affinity expressions specified by this field, but it may choose + a node that violates one or more of the expressions. The node that is + most preferred is the one with the greatest sum of weights, i.e. + for each node that meets all of the scheduling requirements (resource + request, requiredDuringScheduling anti-affinity expressions, etc.), + compute a sum by iterating through the elements of this field and subtracting + "weight" from the sum if the node has pods which matches the corresponding podAffinityTerm; the + node(s) with the highest sum are the most preferred. + items: + description: The weights of all of the matched + WeightedPodAffinityTerm fields are added per-node + to find the most preferred node(s) + properties: + podAffinityTerm: + description: Required. A pod affinity term, + associated with the corresponding weight. + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is + a list of label selector requirements. + The requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + weight: + description: |- + weight associated with matching the corresponding podAffinityTerm, + in the range 1-100. + format: int32 + type: integer + required: + - podAffinityTerm + - weight + type: object + type: array + x-kubernetes-list-type: atomic + requiredDuringSchedulingIgnoredDuringExecution: + description: |- + If the anti-affinity requirements specified by this field are not met at + scheduling time, the pod will not be scheduled onto the node. + If the anti-affinity requirements specified by this field cease to be met + at some point during pod execution (e.g. due to a pod label update), the + system may or may not try to eventually evict the pod from its node. + When there are multiple elements, the lists of nodes corresponding to each + podAffinityTerm are intersected, i.e. all terms must be satisfied. + items: + description: |- + Defines a set of pods (namely those matching the labelSelector + relative to the given namespace(s)) that this pod should be + co-located (affinity) or not co-located (anti-affinity) with, + where co-located is defined as running on a node whose value of + the label with key matches that of any node on which + a pod of the set of pods is running + properties: + labelSelector: + description: |- + A label query over a set of resources, in this case pods. + If it's null, this PodAffinityTerm matches with no Pods. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + matchLabelKeys: + description: |- + MatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key in (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both matchLabelKeys and labelSelector. + Also, matchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + mismatchLabelKeys: + description: |- + MismatchLabelKeys is a set of pod label keys to select which pods will + be taken into consideration. The keys are used to lookup values from the + incoming pod labels, those key-value labels are merged with `labelSelector` as `key notin (value)` + to select the group of existing pods which pods will be taken into consideration + for the incoming pod's pod (anti) affinity. Keys that don't exist in the incoming + pod labels will be ignored. The default value is empty. + The same key is forbidden to exist in both mismatchLabelKeys and labelSelector. + Also, mismatchLabelKeys cannot be set when labelSelector isn't set. + items: + type: string + type: array + x-kubernetes-list-type: atomic + namespaceSelector: + description: |- + A label query over the set of namespaces that the term applies to. + The term is applied to the union of the namespaces selected by this field + and the ones listed in the namespaces field. + null selector and null or empty namespaces list means "this pod's namespace". + An empty selector ({}) matches all namespaces. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The + requirements are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label + key that the selector applies + to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + namespaces: + description: |- + namespaces specifies a static list of namespace names that the term applies to. + The term is applied to the union of the namespaces listed in this field + and the ones selected by namespaceSelector. + null or empty namespaces list and null namespaceSelector means "this pod's namespace". + items: + type: string + type: array + x-kubernetes-list-type: atomic + topologyKey: + description: |- + This pod should be co-located (affinity) or not co-located (anti-affinity) with the pods matching + the labelSelector in the specified namespaces, where co-located is defined as running on a node + whose value of the label with key topologyKey matches that of any node on which any of the + selected pods is running. + Empty topologyKey is not allowed. + type: string + required: + - topologyKey + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: object + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable + present in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + healthcheckPort: + description: |- + HealthcheckPort is the port running a gRPC health service checked by a livenessProbe. + Set to a negative value to disable the service and the probe. + format: int32 + type: integer + nodeSelector: + additionalProperties: + type: string + description: NodeSelector for the kubelet-plugin DaemonSet + pods + type: object + priorityClassName: + description: PriorityClassName for the kubelet-plugin + DaemonSet pods + type: string + resources: + description: 'Optional: Define resources requests and + limits for the kubelet-plugin container' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + tolerations: + description: Tolerations for the kubelet-plugin DaemonSet + pods + items: + description: |- + The pod this Toleration is attached to tolerates any taint that matches + the triple using the matching operator . + properties: + effect: + description: |- + Effect indicates the taint effect to match. Empty means match all taint effects. + When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: |- + Key is the taint key that the toleration applies to. Empty means match all taint keys. + If the key is empty, operator must be Exists; this combination means to match all values and all keys. + type: string + operator: + description: |- + Operator represents a key's relationship to the value. + Valid operators are Exists, Equal, Lt, and Gt. Defaults to Equal. + Exists is equivalent to wildcard for value, so that a pod can + tolerate all taints of a particular category. + Lt and Gt perform numeric comparisons (requires feature gate TaintTolerationComparisonOperators). + type: string + tolerationSeconds: + description: |- + TolerationSeconds represents the period of time the toleration (which must be + of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default, + it is not set, which means tolerate the taint forever (do not evict). Zero and + negative values will be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: |- + Value is the taint value the toleration matches to. + If the operator is Exists, the value should be empty, otherwise just a regular string. + type: string + type: object + type: array + type: object + type: object + image: + description: NVIDIA DRA driver image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: NVIDIA DRA driver image repository + type: string + version: + description: NVIDIA DRA driver image tag + type: string + type: object + gfd: + description: |- + GFD defines the spec for the standalone GPU Feature Discovery operand. Enabled by default, + but the reused enabled field carries no server-side default; the controller defaults nil enabled. + properties: + args: + description: 'Optional: List of arguments' + items: + type: string + type: array + enabled: + description: Enabled indicates if deployment of GPU Feature Discovery + Plugin is enabled. + type: boolean + env: + description: 'Optional: List of environment variables' + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. + type: string + value: + description: Value of the environment variable. + type: string + required: + - name + type: object + type: array + hostNetwork: + description: HostNetwork indicates whether the GPU Feature Discovery + pod uses the host's network namespace. + type: boolean + image: + description: GFD image name + pattern: '[a-zA-Z0-9\-]+' + type: string + imagePullPolicy: + description: Image pull policy + type: string + imagePullSecrets: + description: Image pull secrets + items: + type: string + type: array + repository: + description: GFD image repository + type: string + resources: + description: 'Optional: Define resources requests and limits for + each pod' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + version: + description: GFD image tag + type: string + type: object + hostPaths: + description: HostPaths defines the host paths used in host-path volumes + for various components. + properties: + driverInstallDir: + description: |- + DriverInstallDir represents the root at which driver files including libraries, + config files, and executables can be found. + type: string + kubeletRootDir: + default: /var/lib/kubelet + description: |- + KubeletRootDir represents the location of the kubelet root directory. + If empty, it will default to "/var/lib/kubelet". + type: string + rootFS: + description: |- + RootFS represents the path to the root filesystem of the host. + This is used by components that need to interact with the host filesystem + and as such this must be a chroot-able filesystem. + Examples include the MIG Manager and Toolkit Container which may need to + stop, start, or restart systemd services. + type: string + type: object + required: + - draDriver + type: object + status: + description: GPUClusterConfigStatus defines the observed state of GPUClusterConfig + properties: + conditions: + description: Conditions is a list of conditions representing the GPUClusterConfig's + current state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + namespace: + description: Namespace indicates the namespace in which the operator + and operands are installed + type: string + state: + description: State indicates the status of the GPUClusterConfig instance + enum: + - ignored + - ready + - notReady + - disabled + type: string + required: + - state + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/deployments/gpu-operator/crds/resource.nvidia.com_computedomaincliques.yaml b/deployments/gpu-operator/crds/resource.nvidia.com_computedomaincliques.yaml new file mode 100644 index 000000000..b3eddef71 --- /dev/null +++ b/deployments/gpu-operator/crds/resource.nvidia.com_computedomaincliques.yaml @@ -0,0 +1,84 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.1 + name: computedomaincliques.resource.nvidia.com +spec: + group: resource.nvidia.com + names: + kind: ComputeDomainClique + listKind: ComputeDomainCliqueList + plural: computedomaincliques + singular: computedomainclique + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: |- + ComputeDomainClique holds information about a specific clique within a ComputeDomain. + It is created in the driver namespace and named as ".". + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + daemons: + items: + description: ComputeDomainDaemonInfo provides information about each + daemon in a ComputeDomainClique. + properties: + cliqueID: + type: string + index: + description: |- + The Index field is used to ensure a consistent IP-to-DNS name + mapping across all machines within an IMEX domain. Each node's index + directly determines its DNS name within a given NVLink partition + (i.e. clique). In other words, the 2-tuple of (CliqueID, Index) will + always be unique. This field is marked as optional (but not + omitempty) in order to support downgrades and avoid an API bump. + type: integer + ipAddress: + type: string + nodeName: + type: string + status: + default: NotReady + description: |- + The Status field tracks the readiness of the IMEX daemon running on + this node. It gets switched to Ready whenever the IMEX daemon is + ready to broker GPU memory exchanges and switches to NotReady when + it is not. It is marked as optional in order to support downgrades + and avoid an API bump. + enum: + - Ready + - NotReady + type: string + required: + - cliqueID + - ipAddress + - nodeName + type: object + type: array + x-kubernetes-list-map-keys: + - nodeName + x-kubernetes-list-type: map + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + type: object + served: true + storage: true diff --git a/deployments/gpu-operator/crds/resource.nvidia.com_computedomains.yaml b/deployments/gpu-operator/crds/resource.nvidia.com_computedomains.yaml new file mode 100644 index 000000000..5a28ae17c --- /dev/null +++ b/deployments/gpu-operator/crds/resource.nvidia.com_computedomains.yaml @@ -0,0 +1,162 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.17.1 + name: computedomains.resource.nvidia.com +spec: + group: resource.nvidia.com + names: + kind: ComputeDomain + listKind: ComputeDomainList + plural: computedomains + singular: computedomain + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + description: ComputeDomain prepares a set of nodes to run a multi-node workload + in. + properties: + apiVersion: + description: |- + APIVersion defines the versioned schema of this representation of an object. + Servers should convert recognized schemas to the latest internal value, and + may reject unrecognized values. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources + type: string + kind: + description: |- + Kind is a string value representing the REST resource this object represents. + Servers may infer this from the endpoint the client submits requests to. + Cannot be updated. + In CamelCase. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + metadata: + type: object + spec: + description: ComputeDomainSpec provides the spec for a ComputeDomain. + properties: + channel: + description: ComputeDomainChannelSpec provides the spec for a channel + used to run a workload inside a ComputeDomain. + properties: + allocationMode: + default: Single + description: |- + Allows for requesting all IMEX channels (the maximum per IMEX domain) or + precisely one. + enum: + - All + - Single + type: string + resourceClaimTemplate: + description: ComputeDomainResourceClaimTemplate provides the details + of the ResourceClaimTemplate to generate. + properties: + name: + type: string + required: + - name + type: object + required: + - resourceClaimTemplate + type: object + numNodes: + description: |- + Intended number of IMEX daemons (i.e., individual compute nodes) in the + ComputeDomain. Must be zero or greater. + + With `featureGates.IMEXDaemonsWithDNSNames=true` (the default), this is + recommended to be set to zero. Workload must implement and consult its + own source of truth for the number of workers online before trying to + share GPU memory (and hence triggering IMEX interaction). When non-zero, + `numNodes` is used only for automatically updating the global + ComputeDomain `Status` (indicating `Ready` when the number of ready IMEX + daemons equals `numNodes`). In this mode, a `numNodes` value greater than + zero in particular does not gate the startup of IMEX daemons: individual + IMEX daemons are started immediately without waiting for its peers, and + any workload pod gets released right after its local IMEX daemon has + started. + + With `featureGates.IMEXDaemonsWithDNSNames=false`, `numNodes` must be set + to the expected number of worker nodes joining the ComputeDomain. In that + mode, all workload pods are held back (with containers in state + `ContainerCreating`) until the underlying IMEX domain has been joined by + `numNodes` IMEX daemons. Pods from more than `numNodes` nodes trying to + join the ComputeDomain may lead to unexpected behavior. + + The `numNodes` parameter is deprecated and will be removed in the next + API version. + type: integer + required: + - channel + - numNodes + type: object + x-kubernetes-validations: + - message: A computeDomain.spec is immutable + rule: self == oldSelf + status: + description: |- + Global ComputeDomain status. Can be used to guide debugging efforts. + Workload however should not rely on inspecting this field at any point + during its lifecycle. + properties: + nodes: + items: + description: ComputeDomainNode provides information about each node + added to a ComputeDomain. + properties: + cliqueID: + type: string + index: + description: |- + The Index field is used to ensure a consistent IP-to-DNS name + mapping across all machines within an IMEX domain. Each node's index + directly determines its DNS name within a given NVLink partition + (i.e. clique). In other words, the 2-tuple of (CliqueID, Index) will + always be unique. This field is marked as optional (but not + omitempty) in order to support downgrades and avoid an API bump. + type: integer + ipAddress: + type: string + name: + type: string + status: + default: NotReady + description: |- + The Status field tracks the readiness of the IMEX daemon running on + this node. It gets switched to Ready whenever the IMEX daemon is + ready to broker GPU memory exchanges and switches to NotReady when + it is not. It is marked as optional in order to support downgrades + and avoid an API bump. + enum: + - Ready + - NotReady + type: string + required: + - cliqueID + - ipAddress + - name + type: object + type: array + x-kubernetes-list-map-keys: + - name + x-kubernetes-list-type: map + status: + default: NotReady + enum: + - Ready + - NotReady + type: string + required: + - status + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/deployments/gpu-operator/templates/_helpers.tpl b/deployments/gpu-operator/templates/_helpers.tpl index 305c9d1fe..088a8132c 100644 --- a/deployments/gpu-operator/templates/_helpers.tpl +++ b/deployments/gpu-operator/templates/_helpers.tpl @@ -78,3 +78,10 @@ Full image name with tag {{- define "driver-manager.fullimage" -}} {{- .Values.driver.manager.repository -}}/{{- .Values.driver.manager.image -}}:{{- .Values.driver.manager.version -}} {{- end }} + +{{/* +Full image name with tag +*/}} +{{- define "validator.fullimage" -}} +{{- .Values.validator.repository -}}/{{- .Values.validator.image -}}:{{- .Values.validator.version | default .Chart.AppVersion -}} +{{- end }} diff --git a/deployments/gpu-operator/templates/clusterrole.yaml b/deployments/gpu-operator/templates/clusterrole.yaml index 2af291e22..14242cac6 100644 --- a/deployments/gpu-operator/templates/clusterrole.yaml +++ b/deployments/gpu-operator/templates/clusterrole.yaml @@ -107,6 +107,9 @@ rules: - clusterpolicies - clusterpolicies/finalizers - clusterpolicies/status + - gpuclusterconfigs + - gpuclusterconfigs/finalizers + - gpuclusterconfigs/status - nvidiadrivers - nvidiadrivers/finalizers - nvidiadrivers/status @@ -153,3 +156,110 @@ rules: {{- if .Values.operator.cleanupCRD }} - delete {{- end }} +- apiGroups: + - resource.nvidia.com + resources: + - computedomains + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - resource.nvidia.com + resources: + - computedomains/status + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - resource.nvidia.com + resources: + - computedomaincliques + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - create + - update +- apiGroups: + - resource.k8s.io + resources: + - resourceclaims + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - resource.k8s.io + resources: + - resourceclaimtemplates + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - resource.k8s.io + resources: + - deviceclasses + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - resource.k8s.io + resources: + - resourceslices + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - resource.k8s.io + resources: + - resourceclaims/status + verbs: + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - patch diff --git a/deployments/gpu-operator/templates/operator.yaml b/deployments/gpu-operator/templates/operator.yaml index c6ec3bf4d..e1a72b27d 100644 --- a/deployments/gpu-operator/templates/operator.yaml +++ b/deployments/gpu-operator/templates/operator.yaml @@ -59,6 +59,8 @@ spec: fieldPath: metadata.namespace - name: "DRIVER_MANAGER_IMAGE" value: "{{ include "driver-manager.fullimage" . }}" + - name: "VALIDATOR_IMAGE" + value: "{{ include "validator.fullimage" . }}" livenessProbe: httpGet: path: /healthz diff --git a/deployments/gpu-operator/templates/role.yaml b/deployments/gpu-operator/templates/role.yaml index dc4674c57..05c3db2f2 100644 --- a/deployments/gpu-operator/templates/role.yaml +++ b/deployments/gpu-operator/templates/role.yaml @@ -40,6 +40,18 @@ rules: - update - patch - delete +- apiGroups: + - apps + resources: + - deployments + verbs: + - create + - get + - list + - watch + - update + - patch + - delete - apiGroups: - "" resources: diff --git a/docker/Dockerfile b/docker/Dockerfile index 03a928131..59e74fdd1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -109,6 +109,7 @@ ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/busybox COPY --from=builder /workspace/gpu-operator /usr/bin/ COPY --from=builder /workspace/manage-crds /usr/bin/ COPY --from=builder /workspace/nvidia-validator /usr/bin/ +COPY --from=builder /workspace/dra-driver-validator /usr/bin/ COPY --from=sample-builder /build/vectorAdd /usr/bin/vectorAdd ARG CUDA_SAMPLES_VERSION COPY --from=sample-builder /usr/local/cuda-${CUDA_SAMPLES_VERSION}/compat /usr/local/cuda/compat diff --git a/internal/conditions/gpuclusterconfig.go b/internal/conditions/gpuclusterconfig.go new file mode 100644 index 000000000..cd8dc511a --- /dev/null +++ b/internal/conditions/gpuclusterconfig.go @@ -0,0 +1,119 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package conditions + +import ( + "context" + "fmt" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" +) + +// gpuClusterConfigUpdater is the Updater implementation for GPUClusterConfig conditions. +type gpuClusterConfigUpdater struct { + client client.Client +} + +// NewGPUClusterConfigUpdater returns an Updater that manages GPUClusterConfig status conditions. +func NewGPUClusterConfigUpdater(client client.Client) Updater { + return &gpuClusterConfigUpdater{client: client} +} + +func (u *gpuClusterConfigUpdater) SetConditionsReady(ctx context.Context, cr any, reason, message string) error { + gpuClusterConfigCr, ok := cr.(*nvidiav1alpha1.GPUClusterConfig) + if !ok { + return fmt.Errorf("provided object is not a *nvidiav1alpha1.GPUClusterConfig") + } + return u.setConditions(ctx, gpuClusterConfigCr, Ready, reason, message) +} + +func (u *gpuClusterConfigUpdater) SetConditionsError(ctx context.Context, cr any, reason, message string) error { + gpuClusterConfigCr, ok := cr.(*nvidiav1alpha1.GPUClusterConfig) + if !ok { + return fmt.Errorf("provided object is not a *nvidiav1alpha1.GPUClusterConfig") + } + return u.setConditions(ctx, gpuClusterConfigCr, Error, reason, message) +} + +func (u *gpuClusterConfigUpdater) updateConditions(ctx context.Context, cr *nvidiav1alpha1.GPUClusterConfig, statusType, reason, message string) error { + // Refetch to avoid a resourceVersion conflict. + instance := &nvidiav1alpha1.GPUClusterConfig{} + if err := u.client.Get(ctx, types.NamespacedName{Name: cr.Name}, instance); err != nil { + return fmt.Errorf("failed to get GPUClusterConfig instance for status update: %w", err) + } + + switch statusType { + case Ready: + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + }) + + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: Error, + Status: metav1.ConditionFalse, + Reason: Ready, + }) + case Error: + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: Ready, + Status: metav1.ConditionFalse, + Reason: Error, + }) + + meta.SetStatusCondition(&instance.Status.Conditions, metav1.Condition{ + Type: Error, + Status: metav1.ConditionTrue, + Reason: reason, + Message: message, + }) + + if instance.Status.State == "" { + instance.Status.State = cr.Status.State + } + if instance.Status.State == "" { + instance.Status.State = nvidiav1alpha1.NotReady + } + default: + return fmt.Errorf("unknown status type provided: %s", statusType) + } + + return u.client.Status().Update(ctx, instance) +} + +// setConditions retries on conflict to absorb concurrent status writes. +func (u *gpuClusterConfigUpdater) setConditions(ctx context.Context, cr *nvidiav1alpha1.GPUClusterConfig, statusType, reason, message string) error { + reqLogger := log.FromContext(ctx) + + err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { + return u.updateConditions(ctx, cr, statusType, reason, message) + }) + + if err != nil { + reqLogger.Error(err, "Failed to update GPUClusterConfig status after retries", "name", cr.Name) + } + return err +} diff --git a/internal/state/dra_driver.go b/internal/state/dra_driver.go new file mode 100644 index 000000000..09af2cfae --- /dev/null +++ b/internal/state/dra_driver.go @@ -0,0 +1,293 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package state + +import ( + "context" + "fmt" + "maps" + "sort" + "strings" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/source" + + nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" + "github.com/NVIDIA/gpu-operator/controllers/clusterinfo" + "github.com/NVIDIA/gpu-operator/internal/consts" + "github.com/NVIDIA/gpu-operator/internal/image" + "github.com/NVIDIA/gpu-operator/internal/render" + "github.com/NVIDIA/gpu-operator/internal/utils" +) + +const ( + // draDriverImageEnvName is the fallback env var for the DRA driver image when the + // CR does not specify repository/image/version. + draDriverImageEnvName = "DRA_DRIVER_IMAGE" + // draValidatorImageEnvName is the env var for the gpu-operator image that ships the + // dra-driver-validator binary used by the kubelet-plugin init container. + draValidatorImageEnvName = "VALIDATOR_IMAGE" +) + +type stateDRADriver struct { + stateSkel +} + +var _ State = (*stateDRADriver)(nil) + +func NewStateDRADriver( + k8sClient client.Client, + namespace string, + scheme *runtime.Scheme, + manifestDir string) (State, error) { + + files, err := utils.GetFilesWithSuffix(manifestDir, render.ManifestFileSuffix...) + if err != nil { + return nil, fmt.Errorf("failed to get files from manifest directory: %v", err) + } + + renderer := render.NewRenderer(files) + state := &stateDRADriver{ + stateSkel: stateSkel{ + name: "state-dra-driver", + description: "NVIDIA DRA driver deployed in the cluster", + client: k8sClient, + namespace: namespace, + scheme: scheme, + renderer: renderer, + }, + } + return state, nil +} + +func (s *stateDRADriver) Sync(ctx context.Context, customResource interface{}, infoCatalog InfoCatalog) (SyncState, error) { + cr, ok := customResource.(*nvidiav1alpha1.GPUClusterConfig) + if !ok { + return SyncStateError, fmt.Errorf("GPUClusterConfig CR not provided as input to Sync()") + } + + objs, err := s.getManifestObjects(ctx, cr, infoCatalog) + if err != nil { + return SyncStateNotReady, fmt.Errorf("failed to create k8s objects from manifests: %w", err) + } + + if len(objs) == 0 { + // No DRA capability is enabled; nothing to render. + return SyncStateIgnore, nil + } + + // Create objects if they don't exist, update objects if they do exist. Owner + // references make every object (including the cluster-scoped DeviceClasses and + // ClusterRoles) garbage-collected when the GPUClusterConfig CR is deleted. + err = s.createOrUpdateObjs(ctx, func(obj *unstructured.Unstructured) error { + if err := controllerutil.SetControllerReference(cr, obj, s.scheme); err != nil { + return fmt.Errorf("failed to set controller reference for object: %w", err) + } + return nil + }, objs) + if err != nil { + return SyncStateNotReady, fmt.Errorf("failed to create/update objects: %w", err) + } + + syncState, err := s.getSyncState(ctx, objs) + if err != nil { + return SyncStateNotReady, fmt.Errorf("failed to get sync state: %w", err) + } + return syncState, nil +} + +func (s *stateDRADriver) GetWatchSources(mgr ctrlManager) map[string]SyncingSource { + wr := make(map[string]SyncingSource) + wr["DaemonSet"] = source.Kind( + mgr.GetCache(), + &appsv1.DaemonSet{}, + handler.TypedEnqueueRequestForOwner[*appsv1.DaemonSet](mgr.GetScheme(), mgr.GetRESTMapper(), + &nvidiav1alpha1.GPUClusterConfig{}, handler.OnlyControllerOwner()), + ) + return wr +} + +func (s *stateDRADriver) getManifestObjects(ctx context.Context, cr *nvidiav1alpha1.GPUClusterConfig, infoCatalog InfoCatalog) ([]*unstructured.Unstructured, error) { + logger := log.FromContext(ctx) + + // No DRA capability enabled: nothing to render. + if !cr.Spec.DRADriver.IsGPUsEnabled() && !cr.Spec.DRADriver.IsComputeDomainsEnabled() { + logger.V(consts.LogLevelInfo).Info("No DRA driver capability is enabled, skipping render") + return []*unstructured.Unstructured{}, nil + } + + info := infoCatalog.Get(InfoTypeClusterInfo) + if info == nil { + return nil, fmt.Errorf("failed to get cluster info from info catalog") + } + clusterInfo := info.(clusterinfo.Interface) + + gvr, draSupported, err := clusterInfo.GetDRAResourceGVR() + if err != nil { + return nil, fmt.Errorf("failed to determine DRA support: %w", err) + } + if !draSupported { + return nil, fmt.Errorf("the resource.k8s.io DeviceClass API is not served by the cluster; " + + "ensure Dynamic Resource Allocation is enabled on the API server and kubelet") + } + + draDriverSpec, err := getDRADriverSpec(&cr.Spec.DRADriver) + if err != nil { + return nil, fmt.Errorf("failed to construct DRA driver spec: %w", err) + } + + hostPaths := cr.Spec.HostPaths + daemonsets := cr.Spec.Daemonsets + priorityClassName, nodeSelector, tolerations, affinity := mergeKubeletPluginScheduling(&cr.Spec.DRADriver, &daemonsets) + renderData := &draDriverRenderData{ + DRADriver: draDriverSpec, + HostPaths: &hostPaths, + Daemonsets: &daemonsets, + Namespace: s.namespace, + DeviceClassAPIVersion: gvr.Group + "/" + gvr.Version, + FeatureGates: renderDRAFeatureGates(cr.Spec.DRADriver.FeatureGates), + KubeletPluginPriorityClassName: priorityClassName, + KubeletPluginNodeSelector: nodeSelector, + KubeletPluginTolerations: tolerations, + KubeletPluginAffinity: affinity, + } + + objs, err := s.renderManifestObjects(ctx, renderData) + if err != nil { + return nil, fmt.Errorf("failed to render manifests: %w", err) + } + return objs, nil +} + +func (s *stateDRADriver) renderManifestObjects(ctx context.Context, renderData *draDriverRenderData) ([]*unstructured.Unstructured, error) { + logger := log.FromContext(ctx) + logger.V(consts.LogLevelDebug).Info("Rendering DRA driver objects", "data", renderData) + + objs, err := s.renderer.RenderObjects( + &render.TemplatingData{ + Data: renderData, + }, + ) + if err != nil { + return nil, fmt.Errorf("failed to render kubernetes manifests: %w", err) + } + return objs, nil +} + +// getDRADriverSpec builds the render-time DRA driver spec, resolving the DRA driver +// image (from the CR, falling back to DRA_DRIVER_IMAGE) and the init-container image +// (the gpu-operator image carrying dra-driver-validator, from VALIDATOR_IMAGE). +func getDRADriverSpec(spec *nvidiav1alpha1.DRADriverSpec) (*draDriverSpec, error) { + imagePath, err := image.ImagePath(spec.Repository, spec.Image, spec.Version, draDriverImageEnvName) + if err != nil { + return nil, fmt.Errorf("failed to construct DRA driver image path: %w", err) + } + + initImagePath, err := image.ImagePath("", "", "", draValidatorImageEnvName) + if err != nil { + return nil, fmt.Errorf("failed to construct DRA driver validator image path: %w", err) + } + + return &draDriverSpec{ + Spec: spec, + ImagePath: imagePath, + InitImagePath: initImagePath, + }, nil +} + +// mergeKubeletPluginScheduling computes the pod-level scheduling for the single +// kubelet-plugin DaemonSet, which hosts both the gpus and computeDomains containers. +// Tolerations are the union of the daemonsets defaults and each enabled capability's +// kubeletPlugin tolerations; priorityClassName and affinity use the precedence +// computeDomains > gpus > daemonsets default (a disabled capability does not +// contribute). A nil affinity lets the template fall back to the default GPU-present +// node affinity. +func mergeKubeletPluginScheduling(spec *nvidiav1alpha1.DRADriverSpec, daemonsets *nvidiav1.DaemonsetsSpec) (string, map[string]string, []corev1.Toleration, *corev1.Affinity) { + gpus := spec.GPUs.KubeletPlugin + cd := spec.ComputeDomains.KubeletPlugin + gpusEnabled := spec.IsGPUsEnabled() + cdEnabled := spec.IsComputeDomainsEnabled() + + priorityClassName := "system-node-critical" + if daemonsets.PriorityClassName != "" { + priorityClassName = daemonsets.PriorityClassName + } + if gpusEnabled && gpus.PriorityClassName != "" { + priorityClassName = gpus.PriorityClassName + } + if cdEnabled && cd.PriorityClassName != "" { + priorityClassName = cd.PriorityClassName + } + + tolerations := append([]corev1.Toleration{}, daemonsets.Tolerations...) + if gpusEnabled { + tolerations = append(tolerations, gpus.Tolerations...) + } + if cdEnabled { + tolerations = append(tolerations, cd.Tolerations...) + } + + nodeSelector := map[string]string{} + if gpusEnabled { + maps.Copy(nodeSelector, gpus.NodeSelector) + } + if cdEnabled { + maps.Copy(nodeSelector, cd.NodeSelector) + } + if len(nodeSelector) == 0 { + nodeSelector = nil + } + + var affinity *corev1.Affinity + if gpusEnabled && gpus.Affinity != nil { + affinity = gpus.Affinity + } + if cdEnabled && cd.Affinity != nil { + affinity = cd.Affinity + } + + return priorityClassName, nodeSelector, tolerations, affinity +} + +// renderDRAFeatureGates renders the feature-gate map as the FEATURE_GATES env value +// (comma-separated Key=Value with a trailing comma, matching the upstream +// k8s-dra-driver-gpu Helm chart). Keys are sorted so the rendered value is a pure +// function of the input and reconciles do not churn the pod spec. Empty when none. +func renderDRAFeatureGates(gates map[string]bool) string { + if len(gates) == 0 { + return "" + } + names := make([]string, 0, len(gates)) + for name := range gates { + names = append(names, name) + } + sort.Strings(names) + + var b strings.Builder + for _, name := range names { + fmt.Fprintf(&b, "%s=%t,", name, gates[name]) + } + return b.String() +} diff --git a/internal/state/dra_driver_test.go b/internal/state/dra_driver_test.go new file mode 100644 index 000000000..65babc0e1 --- /dev/null +++ b/internal/state/dra_driver_test.go @@ -0,0 +1,357 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package state + +import ( + "context" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client/fake" + + nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" +) + +const draManifestDir = "../../manifests/state-dra-driver" + +func newTestDRAState(t *testing.T) *stateDRADriver { + t.Helper() + s, err := NewStateDRADriver(fake.NewClientBuilder().Build(), "test-operator", runtime.NewScheme(), draManifestDir) + require.NoError(t, err) + return s.(*stateDRADriver) +} + +func sampleGPUClusterConfig() *nvidiav1alpha1.GPUClusterConfig { + return &nvidiav1alpha1.GPUClusterConfig{ + ObjectMeta: metav1.ObjectMeta{Name: "gpuclusterconfig-sample"}, + Spec: nvidiav1alpha1.GPUClusterConfigSpec{ + DRADriver: nvidiav1alpha1.DRADriverSpec{ + Repository: "nvcr.io/nvidia", + Image: "k8s-dra-driver-gpu", + Version: "v0.1.0", + ImagePullPolicy: "IfNotPresent", + FeatureGates: map[string]bool{"MPSSupport": true, "AdminAccess": false}, + GPUs: nvidiav1alpha1.DRADriverGPUsSpec{ + Enabled: ptr.To(true), + }, + }, + Daemonsets: nvidiav1.DaemonsetsSpec{ + Tolerations: []corev1.Toleration{{ + Key: "nvidia.com/gpu", + Operator: corev1.TolerationOpExists, + Effect: corev1.TaintEffectNoSchedule, + }}, + PriorityClassName: "system-node-critical", + }, + HostPaths: nvidiav1.HostPathsSpec{ + RootFS: "/", + DriverInstallDir: "/run/nvidia/driver", + KubeletRootDir: "/var/lib/kubelet", + }, + }, + } +} + +func draSupportedCatalog() InfoCatalog { + catalog := NewInfoCatalog() + catalog.Add(InfoTypeClusterInfo, testClusterInfo{ + draSupported: true, + draResourceGVR: schema.GroupVersionResource{Group: "resource.k8s.io", Version: "v1", Resource: "deviceclasses"}, + }) + return catalog +} + +func TestDRADriverRenderGPUsCut(t *testing.T) { + t.Setenv("VALIDATOR_IMAGE", "nvcr.io/nvidia/gpu-operator-validator:test") + s := newTestDRAState(t) + + objs, err := s.getManifestObjects(context.Background(), sampleGPUClusterConfig(), draSupportedCatalog()) + require.NoError(t, err) + require.NotEmpty(t, objs) + + kinds := map[string]int{} + for _, o := range objs { + kinds[o.GetKind()]++ + } + assert.Equal(t, 1, kinds["ServiceAccount"]) + assert.Equal(t, 1, kinds["ClusterRole"]) + assert.Equal(t, 1, kinds["ClusterRoleBinding"]) + assert.Equal(t, 2, kinds["DeviceClass"], "expected gpu.nvidia.com and mig.nvidia.com") + assert.Equal(t, 1, kinds["DaemonSet"]) + + // DeviceClass apiVersion is injected from discovery, not hardcoded. + for _, o := range objs { + if o.GetKind() == "DeviceClass" { + assert.Equal(t, "resource.k8s.io/v1", o.GetAPIVersion()) + } + } + + ds := findDaemonSet(t, objs) + podSpec := ds.Spec.Template.Spec + + // Init container is the DRA validator, privileged, with the verified mount plan + // and no /driver-root symlink machinery. + require.Len(t, podSpec.InitContainers, 1) + initCtr := podSpec.InitContainers[0] + assert.Equal(t, "dra-driver-validator", initCtr.Name) + assert.Equal(t, "nvcr.io/nvidia/gpu-operator-validator:test", initCtr.Image) + require.NotNil(t, initCtr.SecurityContext) + require.NotNil(t, initCtr.SecurityContext.Privileged) + assert.True(t, *initCtr.SecurityContext.Privileged) + assert.ElementsMatch(t, + []string{"host-root", "driver-install-dir", "validations"}, + mountNames(initCtr.VolumeMounts)) + + // Single gpus container in this cut. + require.Len(t, podSpec.Containers, 1) + gpus := podSpec.Containers[0] + assert.Equal(t, "gpus", gpus.Name) + assert.Equal(t, "nvcr.io/nvidia/k8s-dra-driver-gpu:v0.1.0", gpus.Image) + + // Sources the driver-ready contract and execs the plugin (no prestart.sh). + args := strings.Join(gpus.Args, "\n") + assert.Contains(t, args, "source /run/nvidia/validations/driver-ready") + assert.Contains(t, args, "exec gpu-kubelet-plugin") + + env := envMap(gpus.Env) + assert.Equal(t, "nvcr.io/nvidia/k8s-dra-driver-gpu:v0.1.0", env["IMAGE_NAME"]) + // FEATURE_GATES is sorted and matches the upstream trailing-comma format. + assert.Equal(t, "AdminAccess=false,MPSSupport=true,", env["FEATURE_GATES"]) + // NVIDIA_DRIVER_ROOT / DRIVER_ROOT_CTR_PATH must be sourced, never hardcoded. + _, hasRoot := env["NVIDIA_DRIVER_ROOT"] + assert.False(t, hasRoot, "NVIDIA_DRIVER_ROOT must come from driver-ready, not the pod spec") + _, hasCtrPath := env["DRIVER_ROOT_CTR_PATH"] + assert.False(t, hasCtrPath, "DRIVER_ROOT_CTR_PATH must come from driver-ready, not the pod spec") + + // No /driver-root or /driver-root-parent volumes; validations is an emptyDir. + for _, v := range podSpec.Volumes { + assert.NotEqual(t, "driver-root", v.Name) + assert.NotEqual(t, "driver-root-parent", v.Name) + } + assert.NotNil(t, findVolume(t, ds, "validations").EmptyDir) + // The host-vs-containerized driver branch is a per-node runtime probe, so the + // plugin container must carry both mounts. + assert.NotNil(t, findVolume(t, ds, "host-root").HostPath) + assert.NotNil(t, findVolume(t, ds, "driver-install-dir").HostPath) + + // Shared daemonsets toleration is applied. + require.NotEmpty(t, podSpec.Tolerations) + assert.Equal(t, "nvidia.com/gpu", podSpec.Tolerations[0].Key) +} + +func TestDRADriverRenderGPUsDisabled(t *testing.T) { + s := newTestDRAState(t) + cr := sampleGPUClusterConfig() + cr.Spec.DRADriver.GPUs.Enabled = ptr.To(false) + + objs, err := s.getManifestObjects(context.Background(), cr, draSupportedCatalog()) + require.NoError(t, err) + assert.Empty(t, objs, "nothing should render when the gpus capability is disabled") +} + +func TestDRADriverRenderDRAUnsupported(t *testing.T) { + t.Setenv("VALIDATOR_IMAGE", "nvcr.io/nvidia/gpu-operator-validator:test") + s := newTestDRAState(t) + + catalog := NewInfoCatalog() + catalog.Add(InfoTypeClusterInfo, testClusterInfo{draSupported: false}) + + _, err := s.getManifestObjects(context.Background(), sampleGPUClusterConfig(), catalog) + require.Error(t, err, "rendering must fail when the resource.k8s.io DeviceClass API is absent") +} + +func findDaemonSet(t *testing.T, objs []*unstructured.Unstructured) *appsv1.DaemonSet { + t.Helper() + for _, o := range objs { + if o.GetKind() == "DaemonSet" { + ds := &appsv1.DaemonSet{} + require.NoError(t, runtime.DefaultUnstructuredConverter.FromUnstructured(o.Object, ds)) + return ds + } + } + t.Fatal("DaemonSet not found in rendered objects") + return nil +} + +func findVolume(t *testing.T, ds *appsv1.DaemonSet, name string) corev1.Volume { + t.Helper() + for _, v := range ds.Spec.Template.Spec.Volumes { + if v.Name == name { + return v + } + } + t.Fatalf("volume %q not found", name) + return corev1.Volume{} +} + +func mountNames(mounts []corev1.VolumeMount) []string { + names := make([]string, 0, len(mounts)) + for _, m := range mounts { + names = append(names, m.Name) + } + return names +} + +func envMap(env []corev1.EnvVar) map[string]string { + m := make(map[string]string, len(env)) + for _, e := range env { + m[e.Name] = e.Value + } + return m +} + +func findDeployment(t *testing.T, objs []*unstructured.Unstructured) *appsv1.Deployment { + t.Helper() + for _, o := range objs { + if o.GetKind() == "Deployment" { + dep := &appsv1.Deployment{} + require.NoError(t, runtime.DefaultUnstructuredConverter.FromUnstructured(o.Object, dep)) + return dep + } + } + t.Fatal("Deployment not found in rendered objects") + return nil +} + +func containerByName(t *testing.T, ds *appsv1.DaemonSet, name string) corev1.Container { + t.Helper() + for _, c := range ds.Spec.Template.Spec.Containers { + if c.Name == name { + return c + } + } + t.Fatalf("container %q not found", name) + return corev1.Container{} +} + +func TestDRADriverRenderComputeDomains(t *testing.T) { + t.Setenv("VALIDATOR_IMAGE", "nvcr.io/nvidia/gpu-operator-validator:test") + s := newTestDRAState(t) + cr := sampleGPUClusterConfig() + cr.Spec.DRADriver.ComputeDomains.Enabled = ptr.To(true) + + objs, err := s.getManifestObjects(context.Background(), cr, draSupportedCatalog()) + require.NoError(t, err) + + kinds := map[string]int{} + names := map[string]bool{} + for _, o := range objs { + kinds[o.GetKind()]++ + names[o.GetKind()+"/"+o.GetName()] = true + } + // gpu + mig + compute-domain-daemon + compute-domain-default-channel + assert.Equal(t, 4, kinds["DeviceClass"]) + assert.True(t, names["DeviceClass/compute-domain-daemon.nvidia.com"]) + assert.True(t, names["DeviceClass/compute-domain-default-channel.nvidia.com"]) + // compute-domain controller Deployment + its SA/RBAC and the daemon SA/RBAC + assert.Equal(t, 1, kinds["Deployment"]) + assert.True(t, names["Deployment/nvidia-dra-driver-controller"]) + assert.True(t, names["ServiceAccount/nvidia-dra-driver-controller"]) + assert.True(t, names["ServiceAccount/compute-domain-daemon-service-account"]) + assert.True(t, names["ServiceAccount/nvidia-dra-driver-kubeletplugin"]) + // kubelet-plugin computedomaincliques Role appears only when computeDomains is on + assert.True(t, names["Role/nvidia-dra-driver-kubeletplugin"]) + + // DaemonSet now hosts both containers. + ds := findDaemonSet(t, objs) + require.Len(t, ds.Spec.Template.Spec.Containers, 2) + cnames := make([]string, 0, 2) + for _, c := range ds.Spec.Template.Spec.Containers { + cnames = append(cnames, c.Name) + } + assert.ElementsMatch(t, []string{"gpus", "compute-domains"}, cnames) +} + +func TestDRADriverComputeDomainsContainerAndController(t *testing.T) { + t.Setenv("VALIDATOR_IMAGE", "nvcr.io/nvidia/gpu-operator-validator:test") + s := newTestDRAState(t) + cr := sampleGPUClusterConfig() + cr.Spec.DRADriver.ComputeDomains.Enabled = ptr.To(true) + + objs, err := s.getManifestObjects(context.Background(), cr, draSupportedCatalog()) + require.NoError(t, err) + + ds := findDaemonSet(t, objs) + cd := containerByName(t, ds, "compute-domains") + assert.Equal(t, "nvcr.io/nvidia/k8s-dra-driver-gpu:v0.1.0", cd.Image) + assert.Contains(t, strings.Join(cd.Args, "\n"), "source /run/nvidia/validations/driver-ready") + assert.Contains(t, strings.Join(cd.Args, "\n"), "exec compute-domain-kubelet-plugin") + assert.Equal(t, "51515", envMap(cd.Env)["HEALTHCHECK_PORT"]) + + dep := findDeployment(t, objs) + depCtr := dep.Spec.Template.Spec.Containers[0] + assert.Equal(t, "nvcr.io/nvidia/k8s-dra-driver-gpu:v0.1.0", depCtr.Image) + assert.Equal(t, "nvidia-dra-driver-controller", dep.Spec.Template.Spec.ServiceAccountName) + depEnv := envMap(depCtr.Env) + assert.Equal(t, "false", depEnv["LEADER_ELECTION_ENABLED"]) + assert.Equal(t, "test-operator", depEnv["LEADER_ELECTION_LEASE_LOCK_NAMESPACE"]) +} + +func TestDRADriverComputeDomainsOnly(t *testing.T) { + t.Setenv("VALIDATOR_IMAGE", "nvcr.io/nvidia/gpu-operator-validator:test") + s := newTestDRAState(t) + cr := sampleGPUClusterConfig() + cr.Spec.DRADriver.GPUs.Enabled = ptr.To(false) + cr.Spec.DRADriver.ComputeDomains.Enabled = ptr.To(true) + + objs, err := s.getManifestObjects(context.Background(), cr, draSupportedCatalog()) + require.NoError(t, err) + + ds := findDaemonSet(t, objs) + require.Len(t, ds.Spec.Template.Spec.Containers, 1) + assert.Equal(t, "compute-domains", ds.Spec.Template.Spec.Containers[0].Name) + // gpu/mig DeviceClasses must not render when gpus is disabled. + for _, o := range objs { + if o.GetKind() == "DeviceClass" { + assert.NotEqual(t, "gpu.nvidia.com", o.GetName()) + assert.NotEqual(t, "mig.nvidia.com", o.GetName()) + } + } +} + +func TestDRADriverSchedulingMerge(t *testing.T) { + t.Setenv("VALIDATOR_IMAGE", "nvcr.io/nvidia/gpu-operator-validator:test") + s := newTestDRAState(t) + cr := sampleGPUClusterConfig() + cr.Spec.DRADriver.ComputeDomains.Enabled = ptr.To(true) + cr.Spec.DRADriver.GPUs.KubeletPlugin.Tolerations = []corev1.Toleration{{Key: "gpu-tol", Operator: corev1.TolerationOpExists}} + cr.Spec.DRADriver.ComputeDomains.KubeletPlugin.Tolerations = []corev1.Toleration{{Key: "cd-tol", Operator: corev1.TolerationOpExists}} + cr.Spec.DRADriver.ComputeDomains.KubeletPlugin.PriorityClassName = "cd-priority" + + objs, err := s.getManifestObjects(context.Background(), cr, draSupportedCatalog()) + require.NoError(t, err) + ds := findDaemonSet(t, objs) + + keys := make([]string, 0, len(ds.Spec.Template.Spec.Tolerations)) + for _, tol := range ds.Spec.Template.Spec.Tolerations { + keys = append(keys, tol.Key) + } + // union of daemonsets default + gpus + computeDomains tolerations + assert.Subset(t, keys, []string{"nvidia.com/gpu", "gpu-tol", "cd-tol"}) + // precedence computeDomains > gpus > daemonsets + assert.Equal(t, "cd-priority", ds.Spec.Template.Spec.PriorityClassName) +} diff --git a/internal/state/driver_test.go b/internal/state/driver_test.go index 02b2736f2..d96f68f2e 100644 --- a/internal/state/driver_test.go +++ b/internal/state/driver_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/serializer/json" apitypes "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" @@ -43,13 +44,15 @@ import ( ) const ( - manifestDir = "../../manifests" + manifestDir = "../../manifests/state-driver" manifestResultDir = "./testdata/golden" ) type testClusterInfo struct { runtime string openshiftVersion string + draResourceGVR schema.GroupVersionResource + draSupported bool } func (i testClusterInfo) GetContainerRuntime() (string, error) { @@ -68,6 +71,10 @@ func (i testClusterInfo) GetOpenshiftProxySpec() (*configv1.ProxySpec, error) { return nil, nil } +func (i testClusterInfo) GetDRAResourceGVR() (schema.GroupVersionResource, bool, error) { + return i.draResourceGVR, i.draSupported, nil +} + func getYAMLString(objs []*unstructured.Unstructured) (string, error) { s := json.NewSerializerWithOptions(json.DefaultMetaFactory, scheme.Scheme, scheme.Scheme, json.SerializerOptions{Yaml: true, Pretty: false, Strict: false}) diff --git a/internal/state/manager.go b/internal/state/manager.go index e0cfc66a6..396615a52 100644 --- a/internal/state/manager.go +++ b/internal/state/manager.go @@ -114,6 +114,8 @@ func newStates(crdKind string, namespace string, k8sClient client.Client, scheme switch crdKind { case nvidiav1alpha1.NVIDIADriverCRDName: return newNVIDIADriverStates(k8sClient, namespace, scheme) + case nvidiav1alpha1.GPUClusterConfigCRDName: + return newGPUClusterConfigStates(k8sClient, namespace, scheme) default: break } @@ -128,3 +130,13 @@ func newNVIDIADriverStates(k8sClient client.Client, namespace string, scheme *ru return []State{driverState}, nil } + +// newGPUClusterConfigStates returns the states reconciled for a GPUClusterConfig. +func newGPUClusterConfigStates(k8sClient client.Client, namespace string, scheme *runtime.Scheme) ([]State, error) { + draDriverState, err := NewStateDRADriver(k8sClient, namespace, scheme, "/opt/gpu-operator/manifests/state-dra-driver") + if err != nil { + return nil, fmt.Errorf("failed to create DRA driver state: %v", err) + } + + return []State{draDriverState}, nil +} diff --git a/internal/state/state_skel.go b/internal/state/state_skel.go index ff26d7ccb..9773d08c5 100644 --- a/internal/state/state_skel.go +++ b/internal/state/state_skel.go @@ -268,7 +268,7 @@ func (s *stateSkel) createOrUpdateObjs( if desiredObjectHash == currentObjHash { reqLogger.V(consts.LogLevelDebug).Info("Object is unchanged, so skipping update", "Kind", desiredObj.GetKind(), "Name", desiredObj.GetName()) - return nil + continue } } } @@ -407,6 +407,12 @@ func (s *stateSkel) getSyncState(ctx context.Context, objs []*unstructured.Unstr return SyncStateNotReady, err } } + if found.GetKind() == "Deployment" { + if ready, err := s.isDeploymentReady(found, reqLogger); err != nil || !ready { + reqLogger.V(consts.LogLevelInfo).Info("Object is not ready", "Kind:", obj.GetKind(), "Name", obj.GetName()) + return SyncStateNotReady, err + } + } reqLogger.V(consts.LogLevelInfo).Info("Object is ready", "Kind:", obj.GetKind(), "Name", obj.GetName()) } return SyncStateReady, nil @@ -444,6 +450,38 @@ func (s *stateSkel) isDaemonSetReady(uds *unstructured.Unstructured, reqLogger l return false, nil } +// isDeploymentReady checks if a deployment is ready +func (s *stateSkel) isDeploymentReady(ud *unstructured.Unstructured, reqLogger logr.Logger) (bool, error) { + buf, err := ud.MarshalJSON() + if err != nil { + return false, fmt.Errorf("failed to marshall unstructured deployment object: %w", err) + } + + dep := &appsv1.Deployment{} + if err = json.Unmarshal(buf, dep); err != nil { + return false, fmt.Errorf("failed to unmarshall to deployment object: %w", err) + } + + desired := int32(1) + if dep.Spec.Replicas != nil { + desired = *dep.Spec.Replicas + } + + reqLogger.V(consts.LogLevelDebug).Info( + "Check deployment state", + "DesiredReplicas:", desired, + "UpdatedReplicas:", dep.Status.UpdatedReplicas, + "AvailableReplicas:", dep.Status.AvailableReplicas, + "ObservedGeneration:", dep.Status.ObservedGeneration) + + if dep.Status.ObservedGeneration >= dep.Generation && + dep.Status.UpdatedReplicas == desired && + dep.Status.AvailableReplicas == desired { + return true, nil + } + return false, nil +} + // Check if provided attrTypes are present in NodeAttributes.Attributes // nolint func (s *stateSkel) checkAttributesExist(attrs nodeinfo.NodeAttributes, attrTypes ...nodeinfo.AttributeType) error { diff --git a/internal/state/types.go b/internal/state/types.go index 6b776078d..eb18b49b3 100644 --- a/internal/state/types.go +++ b/internal/state/types.go @@ -17,9 +17,11 @@ package state import ( + corev1 "k8s.io/api/core/v1" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/source" + nvidiav1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" ) @@ -50,3 +52,32 @@ type gdrcopyDriverSpec struct { Spec *nvidiav1alpha1.GDRCopySpec ImagePath string } + +// draDriverSpec is a wrapper of DRADriverSpec with the fully-qualified image paths +// populated: ImagePath for the DRA driver containers and InitImagePath for the +// driver-validation init container (shipped in the gpu-operator image). +type draDriverSpec struct { + Spec *nvidiav1alpha1.DRADriverSpec + ImagePath string + InitImagePath string +} + +// draDriverRenderData is the templating data for the DRA driver manifests. +type draDriverRenderData struct { + DRADriver *draDriverSpec + HostPaths *nvidiav1.HostPathsSpec + Daemonsets *nvidiav1.DaemonsetsSpec + Namespace string + // DeviceClassAPIVersion is the apiVersion to render on DeviceClass objects, + // determined from the resource.k8s.io version served by the cluster. + DeviceClassAPIVersion string + // FeatureGates is the pre-rendered FEATURE_GATES env value (empty when none). + FeatureGates string + // The kubelet-plugin DaemonSet hosts both the gpus and computeDomains containers, + // so these pod-level scheduling fields are merged from the daemonsets defaults and + // the per-capability kubeletPlugin blocks (see mergeKubeletPluginScheduling). + KubeletPluginPriorityClassName string + KubeletPluginNodeSelector map[string]string + KubeletPluginTolerations []corev1.Toleration + KubeletPluginAffinity *corev1.Affinity +} diff --git a/manifests/state-dra-driver/0100_kubeletplugin-service_account.yaml b/manifests/state-dra-driver/0100_kubeletplugin-service_account.yaml new file mode 100644 index 000000000..029065bdd --- /dev/null +++ b/manifests/state-dra-driver/0100_kubeletplugin-service_account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nvidia-dra-driver-kubeletplugin + namespace: {{ .Namespace }} + labels: + app.kubernetes.io/name: nvidia-dra-driver + app.kubernetes.io/component: nvidia-dra-driver-kubelet-plugin diff --git a/manifests/state-dra-driver/0110_controller-rbac.yaml b/manifests/state-dra-driver/0110_controller-rbac.yaml new file mode 100644 index 000000000..73f672d01 --- /dev/null +++ b/manifests/state-dra-driver/0110_controller-rbac.yaml @@ -0,0 +1,122 @@ +{{- if .DRADriver.Spec.IsComputeDomainsEnabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: nvidia-dra-driver-controller + namespace: {{ .Namespace }} + labels: + app.kubernetes.io/name: nvidia-dra-driver + app.kubernetes.io/component: nvidia-dra-driver-controller +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: nvidia-dra-driver-controller +rules: +- apiGroups: + - resource.nvidia.com + resources: + - computedomains + verbs: + - get + - list + - watch + - update +- apiGroups: + - resource.nvidia.com + resources: + - computedomains/status + verbs: + - update +- apiGroups: + - resource.k8s.io + resources: + - resourceclaimtemplates + verbs: + - get + - list + - watch + - create + - update + - delete +- apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - get + - create + - update +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: nvidia-dra-driver-controller + namespace: {{ .Namespace }} +rules: +- apiGroups: + - apps + resources: + - daemonsets + verbs: + - get + - list + - watch + - create + - update + - patch + - delete +- apiGroups: + - resource.nvidia.com + resources: + - computedomaincliques + verbs: + - get + - list + - watch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: nvidia-dra-driver-controller +subjects: +- kind: ServiceAccount + name: nvidia-dra-driver-controller + namespace: {{ .Namespace }} +roleRef: + kind: ClusterRole + name: nvidia-dra-driver-controller + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: nvidia-dra-driver-controller + namespace: {{ .Namespace }} +subjects: +- kind: ServiceAccount + name: nvidia-dra-driver-controller + namespace: {{ .Namespace }} +roleRef: + kind: Role + name: nvidia-dra-driver-controller + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/manifests/state-dra-driver/0120_compute-domain-daemon-rbac.yaml b/manifests/state-dra-driver/0120_compute-domain-daemon-rbac.yaml new file mode 100644 index 000000000..f00d8b05a --- /dev/null +++ b/manifests/state-dra-driver/0120_compute-domain-daemon-rbac.yaml @@ -0,0 +1,60 @@ +{{- if .DRADriver.Spec.IsComputeDomainsEnabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: compute-domain-daemon-service-account + namespace: {{ .Namespace }} + labels: + app.kubernetes.io/name: nvidia-dra-driver + app.kubernetes.io/component: compute-domain-daemon +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: compute-domain-daemon +rules: +- apiGroups: + - resource.nvidia.com + resources: + - computedomains + - computedomains/status + verbs: + - get + - list + - watch + - update + - patch +- apiGroups: + - resource.nvidia.com + resources: + - computedomaincliques + verbs: + - get + - list + - watch + - create + - update + - patch +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch + - patch +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: compute-domain-daemon +subjects: +- kind: ServiceAccount + name: compute-domain-daemon-service-account + namespace: {{ .Namespace }} +roleRef: + kind: ClusterRole + name: compute-domain-daemon + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/manifests/state-dra-driver/0130_kubeletplugin-computedomain-role.yaml b/manifests/state-dra-driver/0130_kubeletplugin-computedomain-role.yaml new file mode 100644 index 000000000..f59418bb9 --- /dev/null +++ b/manifests/state-dra-driver/0130_kubeletplugin-computedomain-role.yaml @@ -0,0 +1,28 @@ +{{- if .DRADriver.Spec.IsComputeDomainsEnabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: nvidia-dra-driver-kubeletplugin + namespace: {{ .Namespace }} +rules: +- apiGroups: + - resource.nvidia.com + resources: + - computedomaincliques + verbs: + - get +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: nvidia-dra-driver-kubeletplugin + namespace: {{ .Namespace }} +subjects: +- kind: ServiceAccount + name: nvidia-dra-driver-kubeletplugin + namespace: {{ .Namespace }} +roleRef: + kind: Role + name: nvidia-dra-driver-kubeletplugin + apiGroup: rbac.authorization.k8s.io +{{- end }} diff --git a/manifests/state-dra-driver/0200_kubeletplugin-clusterrole.yaml b/manifests/state-dra-driver/0200_kubeletplugin-clusterrole.yaml new file mode 100644 index 000000000..850940f91 --- /dev/null +++ b/manifests/state-dra-driver/0200_kubeletplugin-clusterrole.yaml @@ -0,0 +1,49 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: nvidia-dra-driver-kubeletplugin +rules: +- apiGroups: + - resource.nvidia.com + resources: + - computedomains + verbs: + - get + - list + - watch +- apiGroups: + - resource.k8s.io + resources: + - resourceclaims + verbs: + - get + - list + - watch +- apiGroups: + - resource.k8s.io + resources: + - resourceslices + verbs: + - get + - list + - watch + - create + - update + - delete +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - watch + - update +- apiGroups: + - "" + resources: + - pods + verbs: + - get + - list + - watch diff --git a/manifests/state-dra-driver/0300_kubeletplugin-clusterrolebinding.yaml b/manifests/state-dra-driver/0300_kubeletplugin-clusterrolebinding.yaml new file mode 100644 index 000000000..5fa721031 --- /dev/null +++ b/manifests/state-dra-driver/0300_kubeletplugin-clusterrolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: nvidia-dra-driver-kubeletplugin +subjects: +- kind: ServiceAccount + name: nvidia-dra-driver-kubeletplugin + namespace: {{ .Namespace }} +roleRef: + kind: ClusterRole + name: nvidia-dra-driver-kubeletplugin + apiGroup: rbac.authorization.k8s.io diff --git a/manifests/state-dra-driver/0400_deviceclass-gpu.yaml b/manifests/state-dra-driver/0400_deviceclass-gpu.yaml new file mode 100644 index 000000000..4c6f131d7 --- /dev/null +++ b/manifests/state-dra-driver/0400_deviceclass-gpu.yaml @@ -0,0 +1,10 @@ +{{- if .DRADriver.Spec.IsGPUsEnabled }} +apiVersion: {{ .DeviceClassAPIVersion }} +kind: DeviceClass +metadata: + name: gpu.nvidia.com +spec: + selectors: + - cel: + expression: "device.driver == 'gpu.nvidia.com' && device.attributes['gpu.nvidia.com'].type == 'gpu'" +{{- end }} diff --git a/manifests/state-dra-driver/0410_deviceclass-mig.yaml b/manifests/state-dra-driver/0410_deviceclass-mig.yaml new file mode 100644 index 000000000..e092d33c4 --- /dev/null +++ b/manifests/state-dra-driver/0410_deviceclass-mig.yaml @@ -0,0 +1,10 @@ +{{- if .DRADriver.Spec.IsGPUsEnabled }} +apiVersion: {{ .DeviceClassAPIVersion }} +kind: DeviceClass +metadata: + name: mig.nvidia.com +spec: + selectors: + - cel: + expression: "device.driver == 'gpu.nvidia.com' && device.attributes['gpu.nvidia.com'].type == 'mig'" +{{- end }} diff --git a/manifests/state-dra-driver/0420_deviceclass-compute-domains.yaml b/manifests/state-dra-driver/0420_deviceclass-compute-domains.yaml new file mode 100644 index 000000000..0dd1f3ae4 --- /dev/null +++ b/manifests/state-dra-driver/0420_deviceclass-compute-domains.yaml @@ -0,0 +1,19 @@ +{{- if .DRADriver.Spec.IsComputeDomainsEnabled }} +apiVersion: {{ .DeviceClassAPIVersion }} +kind: DeviceClass +metadata: + name: compute-domain-daemon.nvidia.com +spec: + selectors: + - cel: + expression: "device.driver == 'compute-domain.nvidia.com' && device.attributes['compute-domain.nvidia.com'].type == 'daemon'" +--- +apiVersion: {{ .DeviceClassAPIVersion }} +kind: DeviceClass +metadata: + name: compute-domain-default-channel.nvidia.com +spec: + selectors: + - cel: + expression: "device.driver == 'compute-domain.nvidia.com' && device.attributes['compute-domain.nvidia.com'].type == 'channel' && device.attributes['compute-domain.nvidia.com'].id == 0" +{{- end }} diff --git a/manifests/state-dra-driver/0500_daemonset.yaml b/manifests/state-dra-driver/0500_daemonset.yaml new file mode 100644 index 000000000..ea6072d1c --- /dev/null +++ b/manifests/state-dra-driver/0500_daemonset.yaml @@ -0,0 +1,312 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: nvidia-dra-driver-kubelet-plugin + namespace: {{ .Namespace }} + labels: + app.kubernetes.io/name: nvidia-dra-driver + app.kubernetes.io/component: nvidia-dra-driver-kubelet-plugin + app: nvidia-dra-driver-kubelet-plugin +spec: + selector: + matchLabels: + app: nvidia-dra-driver-kubelet-plugin + updateStrategy: + type: RollingUpdate + rollingUpdate: + maxUnavailable: "100%" + template: + metadata: + labels: + app.kubernetes.io/name: nvidia-dra-driver + app.kubernetes.io/component: nvidia-dra-driver-kubelet-plugin + app: nvidia-dra-driver-kubelet-plugin + spec: + priorityClassName: {{ .KubeletPluginPriorityClassName }} + serviceAccountName: nvidia-dra-driver-kubeletplugin + {{- if .DRADriver.Spec.ImagePullSecrets }} + imagePullSecrets: + {{- range .DRADriver.Spec.ImagePullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} + {{- if .KubeletPluginNodeSelector }} + nodeSelector: {{ .KubeletPluginNodeSelector | toJson }} + {{- end }} + {{- if .KubeletPluginAffinity }} + affinity: {{ .KubeletPluginAffinity | toJson }} + {{- else }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: feature.node.kubernetes.io/pci-10de.present + operator: In + values: + - "true" + - matchExpressions: + - key: feature.node.kubernetes.io/cpu-model.vendor_id + operator: In + values: + - NVIDIA + - matchExpressions: + - key: nvidia.com/gpu.present + operator: In + values: + - "true" + {{- end }} + {{- if .KubeletPluginTolerations }} + tolerations: + {{- range .KubeletPluginTolerations }} + - {{ . | toJson }} + {{- end }} + {{- end }} + initContainers: + - name: dra-driver-validator + image: {{ .DRADriver.InitImagePath }} + {{- if .DRADriver.Spec.ImagePullPolicy }} + imagePullPolicy: {{ .DRADriver.Spec.ImagePullPolicy }} + {{- end }} + command: + - dra-driver-validator + securityContext: + privileged: true + # The validator ships in the gpu-operator image (USER 65532); it must run + # as root to chroot into the host root for the host-driver probe. + runAsUser: 0 + env: + - name: HOST_ROOT + value: {{ .HostPaths.RootFS | default "/" | quote }} + - name: DRIVER_INSTALL_DIR + value: {{ .HostPaths.DriverInstallDir | default "/run/nvidia/driver" | quote }} + volumeMounts: + - name: host-root + mountPath: /host + mountPropagation: HostToContainer + readOnly: true + - name: driver-install-dir + mountPath: /run/nvidia/driver + mountPropagation: HostToContainer + - name: validations + mountPath: /run/nvidia/validations + containers: + {{- if .DRADriver.Spec.IsGPUsEnabled }} + - name: gpus + image: {{ .DRADriver.ImagePath }} + {{- if .DRADriver.Spec.ImagePullPolicy }} + imagePullPolicy: {{ .DRADriver.Spec.ImagePullPolicy }} + {{- end }} + securityContext: + privileged: true + command: + - bash + - -c + args: + - |- + # Export the driver-ready env contract (NVIDIA_DRIVER_ROOT, + # DRIVER_ROOT_CTR_PATH) written by the dra-driver-validator init container. + set -a + source /run/nvidia/validations/driver-ready + set +a + # Conditionally mask the params file to prevent this container from + # recreating any missing GPU device nodes (e.g. under nvkind). + if [ "${MASK_NVIDIA_DRIVER_PARAMS}" = "true" ]; then + cp /proc/driver/nvidia/params /root/gpu-params + sed -i 's/^ModifyDeviceFiles: 1$/ModifyDeviceFiles: 0/' /root/gpu-params + mount --bind /root/gpu-params /proc/driver/nvidia/params + fi + exec gpu-kubelet-plugin -v "${LOG_VERBOSITY}" + env: + - name: HTTP_ENDPOINT + value: ":8080" + - name: METRICS_PATH + value: /metrics + - name: LOG_VERBOSITY + value: "4" + - name: MASK_NVIDIA_DRIVER_PARAMS + value: "" + - name: NVIDIA_VISIBLE_DEVICES + value: void + - name: CDI_ROOT + value: /var/run/cdi + - name: NVIDIA_MIG_CONFIG_DEVICES + value: all + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: IMAGE_NAME + value: {{ .DRADriver.ImagePath | quote }} + - name: KUBELET_REGISTRAR_DIRECTORY_PATH + value: {{ printf "%s/plugins_registry" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: KUBELET_PLUGINS_DIRECTORY_PATH + value: {{ printf "%s/plugins" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: HEALTHCHECK_PORT + value: "51516" + {{- if .FeatureGates }} + - name: FEATURE_GATES + value: {{ .FeatureGates | quote }} + {{- end }} + {{- range .DRADriver.Spec.GPUs.KubeletPlugin.Env }} + - name: {{ .Name }} + value: {{ .Value | quote }} + {{- end }} + startupProbe: + grpc: + port: 51516 + service: liveness + failureThreshold: 600 + periodSeconds: 1 + timeoutSeconds: 10 + livenessProbe: + grpc: + port: 51516 + service: liveness + failureThreshold: 3 + periodSeconds: 30 + timeoutSeconds: 10 + {{- if .DRADriver.Spec.GPUs.KubeletPlugin.Resources }} + resources: {{ .DRADriver.Spec.GPUs.KubeletPlugin.Resources | toJson }} + {{- end }} + volumeMounts: + - name: plugins-registry + mountPath: {{ printf "%s/plugins_registry" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: plugins + mountPath: {{ printf "%s/plugins" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + mountPropagation: Bidirectional + - name: cdi + mountPath: /var/run/cdi + - name: validations + mountPath: /run/nvidia/validations + readOnly: true + - name: host-root + mountPath: /host + mountPropagation: HostToContainer + readOnly: true + - name: driver-install-dir + mountPath: /run/nvidia/driver + mountPropagation: HostToContainer + {{- end }} + {{- if .DRADriver.Spec.IsComputeDomainsEnabled }} + - name: compute-domains + image: {{ .DRADriver.ImagePath }} + {{- if .DRADriver.Spec.ImagePullPolicy }} + imagePullPolicy: {{ .DRADriver.Spec.ImagePullPolicy }} + {{- end }} + securityContext: + privileged: true + command: + - bash + - -c + args: + - |- + # Export the driver-ready env contract (NVIDIA_DRIVER_ROOT, + # DRIVER_ROOT_CTR_PATH) written by the dra-driver-validator init container. + set -a + source /run/nvidia/validations/driver-ready + set +a + # Conditionally mask the params file to prevent this container from + # recreating any missing GPU device nodes (e.g. under nvkind). + if [ "${MASK_NVIDIA_DRIVER_PARAMS}" = "true" ]; then + cp /proc/driver/nvidia/params /root/gpu-params + sed -i 's/^ModifyDeviceFiles: 1$/ModifyDeviceFiles: 0/' /root/gpu-params + mount --bind /root/gpu-params /proc/driver/nvidia/params + fi + exec compute-domain-kubelet-plugin -v "${LOG_VERBOSITY}" + env: + - name: HTTP_ENDPOINT + value: ":8081" + - name: METRICS_PATH + value: /metrics + - name: LOG_VERBOSITY + value: "4" + - name: MASK_NVIDIA_DRIVER_PARAMS + value: "" + - name: NVIDIA_VISIBLE_DEVICES + value: void + - name: CDI_ROOT + value: /var/run/cdi + - name: NVIDIA_MIG_CONFIG_DEVICES + value: all + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: KUBELET_REGISTRAR_DIRECTORY_PATH + value: {{ printf "%s/plugins_registry" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: KUBELET_PLUGINS_DIRECTORY_PATH + value: {{ printf "%s/plugins" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: HEALTHCHECK_PORT + value: "51515" + {{- if .FeatureGates }} + - name: FEATURE_GATES + value: {{ .FeatureGates | quote }} + {{- end }} + {{- range .DRADriver.Spec.ComputeDomains.KubeletPlugin.Env }} + - name: {{ .Name }} + value: {{ .Value | quote }} + {{- end }} + startupProbe: + grpc: + port: 51515 + service: liveness + failureThreshold: 600 + periodSeconds: 1 + timeoutSeconds: 10 + livenessProbe: + grpc: + port: 51515 + service: liveness + failureThreshold: 3 + periodSeconds: 10 + timeoutSeconds: 10 + {{- if .DRADriver.Spec.ComputeDomains.KubeletPlugin.Resources }} + resources: {{ .DRADriver.Spec.ComputeDomains.KubeletPlugin.Resources | toJson }} + {{- end }} + volumeMounts: + - name: plugins-registry + mountPath: {{ printf "%s/plugins_registry" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: plugins + mountPath: {{ printf "%s/plugins" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + mountPropagation: Bidirectional + - name: cdi + mountPath: /var/run/cdi + - name: validations + mountPath: /run/nvidia/validations + readOnly: true + - name: host-root + mountPath: /host + mountPropagation: HostToContainer + readOnly: true + - name: driver-install-dir + mountPath: /run/nvidia/driver + mountPropagation: HostToContainer + {{- end }} + volumes: + - name: plugins-registry + hostPath: + path: {{ printf "%s/plugins_registry" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: plugins + hostPath: + path: {{ printf "%s/plugins" (.HostPaths.KubeletRootDir | default "/var/lib/kubelet") | quote }} + - name: cdi + hostPath: + path: /var/run/cdi + - name: host-root + hostPath: + path: {{ .HostPaths.RootFS | default "/" | quote }} + - name: driver-install-dir + hostPath: + path: {{ .HostPaths.DriverInstallDir | default "/run/nvidia/driver" | quote }} + type: DirectoryOrCreate + - name: validations + emptyDir: {} diff --git a/manifests/state-dra-driver/0600_controller-deployment.yaml b/manifests/state-dra-driver/0600_controller-deployment.yaml new file mode 100644 index 000000000..e9a4bd88f --- /dev/null +++ b/manifests/state-dra-driver/0600_controller-deployment.yaml @@ -0,0 +1,123 @@ +{{- if .DRADriver.Spec.IsComputeDomainsEnabled }} +{{- $controller := .DRADriver.Spec.ComputeDomains.Controller }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: nvidia-dra-driver-controller + namespace: {{ .Namespace }} + labels: + app.kubernetes.io/name: nvidia-dra-driver + app.kubernetes.io/component: nvidia-dra-driver-controller + app: nvidia-dra-driver-controller +spec: + replicas: 1 + selector: + matchLabels: + app: nvidia-dra-driver-controller + template: + metadata: + labels: + app.kubernetes.io/name: nvidia-dra-driver + app.kubernetes.io/component: nvidia-dra-driver-controller + app: nvidia-dra-driver-controller + spec: + priorityClassName: {{ $controller.PriorityClassName | default "system-node-critical" }} + serviceAccountName: nvidia-dra-driver-controller + {{- if .DRADriver.Spec.ImagePullSecrets }} + imagePullSecrets: + {{- range .DRADriver.Spec.ImagePullSecrets }} + - name: {{ . }} + {{- end }} + {{- end }} + {{- if $controller.NodeSelector }} + nodeSelector: {{ $controller.NodeSelector | toJson }} + {{- end }} + {{- if $controller.Affinity }} + affinity: {{ $controller.Affinity | toJson }} + {{- else }} + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + weight: 100 + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - podAffinityTerm: + labelSelector: + matchLabels: + app: nvidia-dra-driver-controller + topologyKey: kubernetes.io/hostname + weight: 100 + {{- end }} + tolerations: + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + operator: Exists + - effect: NoSchedule + key: node-role.kubernetes.io/master + operator: Exists + - key: CriticalAddonsOnly + operator: Exists + {{- range $controller.Tolerations }} + - {{ . | toJson }} + {{- end }} + containers: + - name: compute-domain + image: {{ .DRADriver.ImagePath }} + {{- if .DRADriver.Spec.ImagePullPolicy }} + imagePullPolicy: {{ .DRADriver.Spec.ImagePullPolicy }} + {{- end }} + command: + - compute-domain-controller + - -v + - "$(LOG_VERBOSITY)" + env: + - name: HTTP_ENDPOINT + value: ":8080" + - name: METRICS_PATH + value: /metrics + - name: PPROF_PATH + value: "" + - name: LOG_VERBOSITY + value: "4" + - name: LOG_VERBOSITY_CD_DAEMON + value: "4" + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: IMAGE_NAME + value: {{ .DRADriver.ImagePath | quote }} + - name: NVIDIA_VISIBLE_DEVICES + value: void + - name: LEADER_ELECTION_ENABLED + value: "false" + - name: LEADER_ELECTION_LEASE_LOCK_NAME + value: nvidia-dra-driver-controller + - name: LEADER_ELECTION_LEASE_LOCK_NAMESPACE + value: {{ .Namespace | quote }} + - name: LEADER_ELECTION_LEASE_DURATION + value: 15s + - name: LEADER_ELECTION_RENEW_DEADLINE + value: 10s + - name: LEADER_ELECTION_RETRY_PERIOD + value: 2s + {{- if .FeatureGates }} + - name: FEATURE_GATES + value: {{ .FeatureGates | quote }} + {{- end }} + {{- range $controller.Env }} + - name: {{ .Name }} + value: {{ .Value | quote }} + {{- end }} + {{- if $controller.Resources }} + resources: {{ $controller.Resources | toJson }} + {{- end }} +{{- end }}