diff --git a/controllers/clusterpolicy_controller.go b/controllers/clusterpolicy_controller.go index d16d2d445..6f205e5c4 100644 --- a/controllers/clusterpolicy_controller.go +++ b/controllers/clusterpolicy_controller.go @@ -170,6 +170,12 @@ func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Reques } } + // Keep the helm.sh/chart label on running operand pods current. Best-effort: a failure + // must not fail the reconcile, since the authoritative chart label lives on the DaemonSets. + if err := clusterPolicyCtrl.reconcileOperandPodLabels(ctx); err != nil { + r.Log.Error(err, "failed to reconcile chart label on operand pods; continuing") + } + // if any state is not ready, requeue for reconcile after 5 seconds if overallStatus != gpuv1.Ready { clusterPolicyCtrl.operatorMetrics.reconciliationStatus.Set(reconciliationStatusNotReady) diff --git a/controllers/object_controls.go b/controllers/object_controls.go index 3c068c4c8..88b8b8fd7 100644 --- a/controllers/object_controls.go +++ b/controllers/object_controls.go @@ -793,7 +793,9 @@ func applyCommonDaemonsetMetadata(obj *appsv1.DaemonSet, dsSpec *gpuv1.Daemonset // if the user specifies an override of the "app" or the "app.kubernetes.io/part-of" key, we skip it. // DaemonSet pod selectors are immutable, so we still want the pods to be selectable as before and working // with the existing daemon set selectors. - if labelKey == "app" || labelKey == "app.kubernetes.io/part-of" { + // helm.sh/chart is also skipped to avoid churning the pod-template hash on chart upgrades + // (see consts.HelmChartLabelKey); it remains on the DaemonSet object metadata. + if labelKey == "app" || labelKey == "app.kubernetes.io/part-of" || labelKey == consts.HelmChartLabelKey { continue } obj.Spec.Template.Labels[labelKey] = labelValue diff --git a/controllers/state_manager.go b/controllers/state_manager.go index 29eef5cf2..d29d9f1e1 100644 --- a/controllers/state_manager.go +++ b/controllers/state_manager.go @@ -36,6 +36,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/config" gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + "github.com/NVIDIA/gpu-operator/internal/consts" ) const ( @@ -621,6 +622,53 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) { return clusterHasNFDLabels, gpuNodesTotal, nil } +// reconcileOperandPodLabels patches the current helm.sh/chart value onto running operand +// pods. The label is kept off the pod template (see consts.HelmChartLabelKey) so a chart +// upgrade does not change the pod-template hash and trigger a DaemonSet rollout; patching +// the live pods instead preserves pod-level chart traceability without recreating them. +// Pods are selected by app.kubernetes.io/managed-by (stable across releases) and the +// desired value is read from the same daemonsets.labels applied to the DaemonSet metadata. +func (n *ClusterPolicyController) reconcileOperandPodLabels(ctx context.Context) error { + managedBy := n.singleton.Spec.Daemonsets.Labels[consts.AppManagedByLabelKey] + if managedBy == "" { + // Without a stable selector we cannot safely target operand pods; nothing to do. + return nil + } + desiredChart := n.singleton.Spec.Daemonsets.Labels[consts.HelmChartLabelKey] + + podList := &corev1.PodList{} + opts := []client.ListOption{ + client.InNamespace(n.operatorNamespace), + client.MatchingLabels{consts.AppManagedByLabelKey: managedBy}, + } + if err := n.client.List(ctx, podList, opts...); err != nil { + return fmt.Errorf("unable to list operand pods to reconcile %q label: %w", consts.HelmChartLabelKey, err) + } + + for i := range podList.Items { + pod := &podList.Items[i] + if pod.Labels[consts.HelmChartLabelKey] == desiredChart { + // Already up-to-date; avoid a needless write (and pod update event). + continue + } + original := pod.DeepCopy() + if desiredChart == "" { + delete(pod.Labels, consts.HelmChartLabelKey) + } else { + if pod.Labels == nil { + pod.Labels = map[string]string{} + } + pod.Labels[consts.HelmChartLabelKey] = desiredChart + } + if err := n.client.Patch(ctx, pod, client.MergeFrom(original)); err != nil { + return fmt.Errorf("unable to patch %q label on pod %s/%s: %w", consts.HelmChartLabelKey, pod.Namespace, pod.Name, err) + } + n.logger.V(consts.LogLevelInfo).Info("Reconciled chart label on operand pod", + "pod", pod.Name, "label", consts.HelmChartLabelKey, "value", desiredChart) + } + return nil +} + func getRuntimeString(node corev1.Node) (gpuv1.Runtime, error) { // ContainerRuntimeVersion string will look like :// runtimeVer := node.Status.NodeInfo.ContainerRuntimeVersion diff --git a/controllers/state_manager_test.go b/controllers/state_manager_test.go index 6585de196..e9e168439 100644 --- a/controllers/state_manager_test.go +++ b/controllers/state_manager_test.go @@ -21,14 +21,17 @@ import ( "errors" "testing" + "github.com/go-logr/logr" "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client/fake" gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1" + "github.com/NVIDIA/gpu-operator/internal/consts" ) func TestGetGPUNodeOSInfo(t *testing.T) { @@ -223,6 +226,77 @@ func TestHasNFDLabels(t *testing.T) { } } +func TestReconcileOperandPodLabels(t *testing.T) { + const ( + ns = "gpu-operator" + managedBy = "gpu-operator" + oldChart = "gpu-operator-v1.0.0" + newChart = "gpu-operator-v1.0.1" + ) + + operandPod := func(name string, labels map[string]string) *corev1.Pod { + return &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns, Labels: labels}, + } + } + + stale := operandPod("driver-stale", map[string]string{ + consts.AppManagedByLabelKey: managedBy, + consts.HelmChartLabelKey: oldChart, + }) + missing := operandPod("toolkit-missing", map[string]string{ + consts.AppManagedByLabelKey: managedBy, + }) + upToDate := operandPod("plugin-uptodate", map[string]string{ + consts.AppManagedByLabelKey: managedBy, + consts.HelmChartLabelKey: newChart, + }) + // Not an operand (e.g. the operator's own pod, managed by Helm): must be left untouched. + foreign := operandPod("operator", map[string]string{ + consts.AppManagedByLabelKey: "Helm", + consts.HelmChartLabelKey: oldChart, + }) + + scheme := runtime.NewScheme() + require.NoError(t, corev1.AddToScheme(scheme)) + cl := fake.NewClientBuilder(). + WithScheme(scheme). + WithObjects(stale, missing, upToDate, foreign). + Build() + + n := ClusterPolicyController{ + ctx: context.Background(), + client: cl, + logger: logr.Discard(), + operatorNamespace: ns, + singleton: &gpuv1.ClusterPolicy{ + Spec: gpuv1.ClusterPolicySpec{ + Daemonsets: gpuv1.DaemonsetsSpec{ + Labels: map[string]string{ + consts.AppManagedByLabelKey: managedBy, + consts.HelmChartLabelKey: newChart, + }, + }, + }, + }, + } + + require.NoError(t, n.reconcileOperandPodLabels(context.Background())) + + get := func(name string) *corev1.Pod { + p := &corev1.Pod{} + require.NoError(t, cl.Get(context.Background(), types.NamespacedName{Namespace: ns, Name: name}, p)) + return p + } + + // Operand pods (managed-by gpu-operator) are synced to the current chart version. + require.Equal(t, newChart, get("driver-stale").Labels[consts.HelmChartLabelKey], "stale label should be updated") + require.Equal(t, newChart, get("toolkit-missing").Labels[consts.HelmChartLabelKey], "missing label should be added") + require.Equal(t, newChart, get("plugin-uptodate").Labels[consts.HelmChartLabelKey], "up-to-date label should be unchanged") + // Non-operand pod is selected out and left as-is. + require.Equal(t, oldChart, get("operator").Labels[consts.HelmChartLabelKey], "foreign pod must not be touched") +} + func TestHasMIGManagerLabel(t *testing.T) { tests := []struct { labels map[string]string diff --git a/controllers/transforms_test.go b/controllers/transforms_test.go index 30310c1b2..c26db661b 100644 --- a/controllers/transforms_test.go +++ b/controllers/transforms_test.go @@ -817,6 +817,9 @@ func TestApplyCommonDaemonsetMetadata(t *testing.T) { "key": "value", "app": "value", "app.kubernetes.io/part-of": "value", + // helm.sh/chart must not be propagated to the pod template, otherwise a + // chart-version change would churn the pod-template hash and trigger a rollout. + "helm.sh/chart": "gpu-operator-v1.2.3", }}, expectedDs: NewDaemonset().WithPodLabels(map[string]string{ "key": "value", diff --git a/deployments/gpu-operator/templates/nvidiadriver.yaml b/deployments/gpu-operator/templates/nvidiadriver.yaml index 1a059a490..c81ac743f 100644 --- a/deployments/gpu-operator/templates/nvidiadriver.yaml +++ b/deployments/gpu-operator/templates/nvidiadriver.yaml @@ -14,9 +14,8 @@ spec: {{- if .Values.daemonsets.annotations }} annotations: {{ toYaml .Values.daemonsets.annotations | nindent 6 }} {{- end }} - {{- if .Values.daemonsets.labels }} - labels: {{ toYaml .Values.daemonsets.labels | nindent 6 }} - {{- end }} + labels: + {{- include "gpu-operator.operand-labels" . | nindent 4 }} {{- if .Values.driver.nvidiaDriverCRD.nodeSelector }} nodeSelector: {{ toYaml .Values.driver.nvidiaDriverCRD.nodeSelector | nindent 6 }} {{- end }} diff --git a/internal/consts/consts.go b/internal/consts/consts.go index c2850f419..1d4b5ff24 100644 --- a/internal/consts/consts.go +++ b/internal/consts/consts.go @@ -46,6 +46,14 @@ const ( OcpDriverToolkitIdentificationLabel = "openshift.driver-toolkit" NfdOSTreeVersionLabelKey = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION" + // HelmChartLabelKey is the standard Helm "-" label. It is kept off operand + // pod templates (it changes every chart upgrade and would trigger a rollout) but retained on + // DaemonSet object metadata for chart traceability. + HelmChartLabelKey = "helm.sh/chart" + // AppManagedByLabelKey identifies the managing controller. Operand objects carry the stable + // value "gpu-operator", making it a safe selector for operand pods. + AppManagedByLabelKey = "app.kubernetes.io/managed-by" + // NvidiaAnnotationHashKey indicates annotation name for last applied hash by gpu-operator NvidiaAnnotationHashKey = "nvidia.com/last-applied-hash" diff --git a/internal/state/driver.go b/internal/state/driver.go index 355910a8e..f907f947f 100644 --- a/internal/state/driver.go +++ b/internal/state/driver.go @@ -158,6 +158,12 @@ func (s *stateDriver) Sync(ctx context.Context, customResource interface{}, info return SyncStateNotReady, fmt.Errorf("failed to create/update objects: %v", err) } + // Keep the helm.sh/chart label on running driver pods current without rolling them. + // Best-effort and cosmetic — the authoritative chart label lives on the DaemonSet object. + if err := s.reconcileDriverPodLabels(ctx, cr); err != nil { + log.FromContext(ctx).V(consts.LogLevelWarning).Info("failed to reconcile chart label on driver pods; continuing", "error", err) + } + // Check objects status syncState, err := s.getSyncState(ctx, objs) if err != nil { @@ -166,6 +172,50 @@ func (s *stateDriver) Sync(ctx context.Context, customResource interface{}, info return syncState, nil } +// reconcileDriverPodLabels patches the current helm.sh/chart value (from the CR labels) onto +// running driver pods. The label is kept off the pod template (see podTemplateLabels) so a +// chart upgrade does not change the pod-template hash and trigger a driver rollout; patching +// the live pods instead preserves chart traceability without disrupting workloads. Pods are +// located via the DaemonSets this CR owns. +func (s *stateDriver) reconcileDriverPodLabels(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver) error { + desiredChart := cr.Spec.Labels[consts.HelmChartLabelKey] + + dsList := &appsv1.DaemonSetList{} + if err := s.client.List(ctx, dsList, client.MatchingFields{consts.NVIDIADriverControllerIndexKey: cr.Name}); err != nil { + return fmt.Errorf("failed to list driver DaemonSets owned by NVIDIADriver %q: %w", cr.Name, err) + } + + for i := range dsList.Items { + ds := &dsList.Items[i] + podList := &corev1.PodList{} + if err := s.client.List(ctx, podList, + client.InNamespace(ds.Namespace), + client.MatchingLabels(ds.Spec.Selector.MatchLabels)); err != nil { + return fmt.Errorf("failed to list pods for driver DaemonSet %q: %w", ds.Name, err) + } + for j := range podList.Items { + pod := &podList.Items[j] + if pod.Labels[consts.HelmChartLabelKey] == desiredChart { + // Already up-to-date; avoid a needless write (and pod update event). + continue + } + original := pod.DeepCopy() + if desiredChart == "" { + delete(pod.Labels, consts.HelmChartLabelKey) + } else { + if pod.Labels == nil { + pod.Labels = map[string]string{} + } + pod.Labels[consts.HelmChartLabelKey] = desiredChart + } + if err := s.client.Patch(ctx, pod, client.MergeFrom(original)); err != nil { + return fmt.Errorf("failed to patch %q label on pod %s/%s: %w", consts.HelmChartLabelKey, pod.Namespace, pod.Name, err) + } + } + } + return nil +} + func (s *stateDriver) GetWatchSources(mgr ctrlManager) map[string]SyncingSource { wr := make(map[string]SyncingSource) nvDriverPredicate := predicate.NewTypedPredicateFuncs(func(ds *appsv1.DaemonSet) bool { @@ -597,15 +647,31 @@ func getDriverSpec(cr *nvidiav1alpha1.NVIDIADriver, nodePool nodePool) (*driverS spec.Labels = sanitizeDriverLabels(spec.Labels) return &driverSpec{ - Spec: spec, - AppName: nvidiaDriverAppName, - Name: nvidiaDriverName, - ImagePath: imagePath, - ManagerImagePath: managerImagePath, - OSVersion: nodePool.osTag, + Spec: spec, + AppName: nvidiaDriverAppName, + Name: nvidiaDriverName, + ImagePath: imagePath, + ManagerImagePath: managerImagePath, + OSVersion: nodePool.osTag, + PodTemplateLabels: podTemplateLabels(spec.Labels), }, nil } +// podTemplateLabels copies labels for the pod template, excluding helm.sh/chart: its value +// changes on every chart upgrade, and applying it to the pod template would change the +// DaemonSet's controller revision hash and trigger an unnecessary (disruptive) rollout. +// The full label set still lands on the DaemonSet object metadata via Spec.Labels. +func podTemplateLabels(labels map[string]string) map[string]string { + podLabels := make(map[string]string, len(labels)) + for k, v := range labels { + if k == consts.HelmChartLabelKey { + continue + } + podLabels[k] = v + } + return podLabels +} + func getGDSSpec(spec *nvidiav1alpha1.NVIDIADriverSpec, pool nodePool) (*gdsDriverSpec, error) { if spec == nil || !spec.IsGDSEnabled() { // note: GDS is optional in the NvidiaDriver CRD diff --git a/internal/state/driver_test.go b/internal/state/driver_test.go index 02b2736f2..07b94bca6 100644 --- a/internal/state/driver_test.go +++ b/internal/state/driver_test.go @@ -27,6 +27,7 @@ import ( configv1 "github.com/openshift/api/config/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -35,6 +36,7 @@ import ( apitypes "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" nvidiav1alpha1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1alpha1" @@ -315,6 +317,7 @@ func TestDriverSpec(t *testing.T) { renderData := getMinimalDriverRenderData() renderData.Driver.Spec = driverSpec + renderData.Driver.PodTemplateLabels = podTemplateLabels(driverSpec.Labels) objs, err := stateDriver.renderer.RenderObjects( &render.TemplatingData{ @@ -331,6 +334,119 @@ func TestDriverSpec(t *testing.T) { require.Equal(t, string(o), actual) } +// TestDriverPodTemplateExcludesChartLabel verifies that the helm.sh/chart label is applied +// to the rendered DaemonSet's object metadata (for chart traceability) but is excluded from +// its pod template, so a chart-version change does not churn the pod-template revision hash. +func TestDriverPodTemplateExcludesChartLabel(t *testing.T) { + state, err := NewStateDriver(nil, "", nil, manifestDir) + require.Nil(t, err) + stateDriver, ok := state.(*stateDriver) + require.True(t, ok) + + labels := sanitizeDriverLabels(map[string]string{ + "custom-label": "custom-value", + consts.HelmChartLabelKey: "gpu-operator-v1.2.3", + }) + + renderData := getMinimalDriverRenderData() + renderData.Driver.Spec.Labels = labels + renderData.Driver.PodTemplateLabels = podTemplateLabels(labels) + + objs, err := stateDriver.renderer.RenderObjects(&render.TemplatingData{Data: renderData}) + require.Nil(t, err) + + ds, err := getDaemonsetFromObjects(objs) + require.Nil(t, err) + + // On the DaemonSet object metadata: chart label present. + require.Equal(t, "gpu-operator-v1.2.3", ds.Labels[consts.HelmChartLabelKey], + "helm.sh/chart should be retained on the DaemonSet object metadata") + require.Equal(t, "custom-value", ds.Labels["custom-label"]) + + // On the pod template: chart label excluded, other labels retained. + _, found := ds.Spec.Template.Labels[consts.HelmChartLabelKey] + require.False(t, found, "helm.sh/chart must not be propagated to the pod template") + require.Equal(t, "custom-value", ds.Spec.Template.Labels["custom-label"]) +} + +// TestReconcileDriverPodLabels verifies that the NVIDIADriver controller patches the current +// helm.sh/chart value (from the CR labels) onto the live driver pods it owns, without touching +// unrelated pods. +func TestReconcileDriverPodLabels(t *testing.T) { + const ( + ns = "test-ns" + crName = "test-driver" + appLabel = "nvidia-gpu-driver-ubuntu22.04-abc" + oldChart = "gpu-operator-v1.0.0" + newChart = "gpu-operator-v1.0.1" + ) + + cr := &nvidiav1alpha1.NVIDIADriver{ + ObjectMeta: metav1.ObjectMeta{Name: crName}, + Spec: nvidiav1alpha1.NVIDIADriverSpec{ + Labels: map[string]string{consts.HelmChartLabelKey: newChart}, + }, + } + + ds := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: appLabel, + Namespace: ns, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: nvidiav1alpha1.SchemeGroupVersion.String(), + Kind: nvidiav1alpha1.NVIDIADriverCRDName, + Name: crName, + Controller: ptr.To(true), + }}, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"app": appLabel}}, + }, + } + + driverPod := func(name, chart string) *corev1.Pod { + labels := map[string]string{"app": appLabel} + if chart != "" { + labels[consts.HelmChartLabelKey] = chart + } + return &corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns, Labels: labels}} + } + stalePod := driverPod("driver-stale", oldChart) + missingPod := driverPod("driver-missing", "") + // A pod in the namespace that is not owned by this driver DaemonSet must be left untouched. + otherPod := &corev1.Pod{ObjectMeta: metav1.ObjectMeta{ + Name: "other", Namespace: ns, + Labels: map[string]string{"app": "something-else", consts.HelmChartLabelKey: oldChart}, + }} + + // Mirror the DaemonSet ownership index registered by the NVIDIADriver controller. + indexFunc := func(rawObj client.Object) []string { + owner := metav1.GetControllerOf(rawObj.(*appsv1.DaemonSet)) + if owner == nil || owner.Kind != nvidiav1alpha1.NVIDIADriverCRDName { + return nil + } + return []string{owner.Name} + } + + cl := fake.NewClientBuilder(). + WithScheme(scheme.Scheme). + WithObjects(ds, stalePod, missingPod, otherPod). + WithIndex(&appsv1.DaemonSet{}, consts.NVIDIADriverControllerIndexKey, indexFunc). + Build() + + s := &stateDriver{stateSkel: stateSkel{client: cl, namespace: ns}} + require.NoError(t, s.reconcileDriverPodLabels(context.Background(), cr)) + + get := func(name string) *corev1.Pod { + p := &corev1.Pod{} + require.NoError(t, cl.Get(context.Background(), apitypes.NamespacedName{Namespace: ns, Name: name}, p)) + return p + } + require.Equal(t, newChart, get("driver-stale").Labels[consts.HelmChartLabelKey], "stale label should be updated") + require.Equal(t, newChart, get("driver-missing").Labels[consts.HelmChartLabelKey], "missing label should be added") + require.Equal(t, oldChart, get("other").Labels[consts.HelmChartLabelKey], "non-driver pod must not be touched") +} + func TestDriverGDS(t *testing.T) { const ( testName = "driver-gds" diff --git a/internal/state/types.go b/internal/state/types.go index 6b776078d..0f8fec65f 100644 --- a/internal/state/types.go +++ b/internal/state/types.go @@ -35,6 +35,9 @@ type driverSpec struct { ImagePath string ManagerImagePath string OSVersion string + // PodTemplateLabels is Spec.Labels with helm.sh/chart removed (see podTemplateLabels), so a + // chart-version change does not churn the pod template and trigger a driver rollout. + PodTemplateLabels map[string]string } // gdsDriverSpec is a wrapper of GPUDirectStorageSpec with an additional ImagePath field diff --git a/manifests/state-driver/0500_daemonset.yaml b/manifests/state-driver/0500_daemonset.yaml index 5b9c6c62f..b54e264aa 100644 --- a/manifests/state-driver/0500_daemonset.yaml +++ b/manifests/state-driver/0500_daemonset.yaml @@ -61,8 +61,8 @@ spec: {{- else }} app.kubernetes.io/component: "nvidia-driver" {{- end }} - {{- if .Driver.Spec.Labels }} - {{- .Driver.Spec.Labels | yaml | nindent 8 }} + {{- if .Driver.PodTemplateLabels }} + {{- .Driver.PodTemplateLabels | yaml | nindent 8 }} {{- end }} spec: {{- if eq .Driver.Spec.DriverType "vgpu-host-manager" }}