Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions controllers/clusterpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ func (r *ClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Reques
}
}

// Keep the helm.sh/chart label on running operand pods current. Best-effort: a failure
// must not fail the reconcile, since the authoritative chart label lives on the DaemonSets.
if err := clusterPolicyCtrl.reconcileOperandPodLabels(ctx); err != nil {
r.Log.Error(err, "failed to reconcile chart label on operand pods; continuing")
}

// if any state is not ready, requeue for reconcile after 5 seconds
if overallStatus != gpuv1.Ready {
clusterPolicyCtrl.operatorMetrics.reconciliationStatus.Set(reconciliationStatusNotReady)
Expand Down
4 changes: 3 additions & 1 deletion controllers/object_controls.go
Original file line number Diff line number Diff line change
Expand Up @@ -793,7 +793,9 @@ func applyCommonDaemonsetMetadata(obj *appsv1.DaemonSet, dsSpec *gpuv1.Daemonset
// if the user specifies an override of the "app" or the "app.kubernetes.io/part-of" key, we skip it.
// DaemonSet pod selectors are immutable, so we still want the pods to be selectable as before and working
// with the existing daemon set selectors.
if labelKey == "app" || labelKey == "app.kubernetes.io/part-of" {
// helm.sh/chart is also skipped to avoid churning the pod-template hash on chart upgrades
// (see consts.HelmChartLabelKey); it remains on the DaemonSet object metadata.
if labelKey == "app" || labelKey == "app.kubernetes.io/part-of" || labelKey == consts.HelmChartLabelKey {
continue
}
obj.Spec.Template.Labels[labelKey] = labelValue
Expand Down
48 changes: 48 additions & 0 deletions controllers/state_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client/config"

gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
"github.com/NVIDIA/gpu-operator/internal/consts"
)

const (
Expand Down Expand Up @@ -621,6 +622,53 @@ func (n *ClusterPolicyController) labelGPUNodes() (bool, int, error) {
return clusterHasNFDLabels, gpuNodesTotal, nil
}

// reconcileOperandPodLabels patches the current helm.sh/chart value onto running operand
// pods. The label is kept off the pod template (see consts.HelmChartLabelKey) so a chart
// upgrade does not change the pod-template hash and trigger a DaemonSet rollout; patching
// the live pods instead preserves pod-level chart traceability without recreating them.
// Pods are selected by app.kubernetes.io/managed-by (stable across releases) and the
// desired value is read from the same daemonsets.labels applied to the DaemonSet metadata.
func (n *ClusterPolicyController) reconcileOperandPodLabels(ctx context.Context) error {
managedBy := n.singleton.Spec.Daemonsets.Labels[consts.AppManagedByLabelKey]
if managedBy == "" {
// Without a stable selector we cannot safely target operand pods; nothing to do.
return nil
}
desiredChart := n.singleton.Spec.Daemonsets.Labels[consts.HelmChartLabelKey]

podList := &corev1.PodList{}
opts := []client.ListOption{
client.InNamespace(n.operatorNamespace),
client.MatchingLabels{consts.AppManagedByLabelKey: managedBy},
}
if err := n.client.List(ctx, podList, opts...); err != nil {
return fmt.Errorf("unable to list operand pods to reconcile %q label: %w", consts.HelmChartLabelKey, err)
}

for i := range podList.Items {
pod := &podList.Items[i]
if pod.Labels[consts.HelmChartLabelKey] == desiredChart {
// Already up-to-date; avoid a needless write (and pod update event).
continue
}
original := pod.DeepCopy()
if desiredChart == "" {
delete(pod.Labels, consts.HelmChartLabelKey)
} else {
if pod.Labels == nil {
pod.Labels = map[string]string{}
}
pod.Labels[consts.HelmChartLabelKey] = desiredChart
}
if err := n.client.Patch(ctx, pod, client.MergeFrom(original)); err != nil {
return fmt.Errorf("unable to patch %q label on pod %s/%s: %w", consts.HelmChartLabelKey, pod.Namespace, pod.Name, err)
}
n.logger.V(consts.LogLevelInfo).Info("Reconciled chart label on operand pod",
"pod", pod.Name, "label", consts.HelmChartLabelKey, "value", desiredChart)
}
return nil
}

func getRuntimeString(node corev1.Node) (gpuv1.Runtime, error) {
// ContainerRuntimeVersion string will look like <runtime>://<x.y.z>
runtimeVer := node.Status.NodeInfo.ContainerRuntimeVersion
Expand Down
74 changes: 74 additions & 0 deletions controllers/state_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,17 @@ import (
"errors"
"testing"

"github.com/go-logr/logr"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client/fake"

gpuv1 "github.com/NVIDIA/gpu-operator/api/nvidia/v1"
"github.com/NVIDIA/gpu-operator/internal/consts"
)

func TestGetGPUNodeOSInfo(t *testing.T) {
Expand Down Expand Up @@ -223,6 +226,77 @@ func TestHasNFDLabels(t *testing.T) {
}
}

func TestReconcileOperandPodLabels(t *testing.T) {
const (
ns = "gpu-operator"
managedBy = "gpu-operator"
oldChart = "gpu-operator-v1.0.0"
newChart = "gpu-operator-v1.0.1"
)

operandPod := func(name string, labels map[string]string) *corev1.Pod {
return &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: name, Namespace: ns, Labels: labels},
}
}

stale := operandPod("driver-stale", map[string]string{
consts.AppManagedByLabelKey: managedBy,
consts.HelmChartLabelKey: oldChart,
})
missing := operandPod("toolkit-missing", map[string]string{
consts.AppManagedByLabelKey: managedBy,
})
upToDate := operandPod("plugin-uptodate", map[string]string{
consts.AppManagedByLabelKey: managedBy,
consts.HelmChartLabelKey: newChart,
})
// Not an operand (e.g. the operator's own pod, managed by Helm): must be left untouched.
foreign := operandPod("operator", map[string]string{
consts.AppManagedByLabelKey: "Helm",
consts.HelmChartLabelKey: oldChart,
})

scheme := runtime.NewScheme()
require.NoError(t, corev1.AddToScheme(scheme))
cl := fake.NewClientBuilder().
WithScheme(scheme).
WithObjects(stale, missing, upToDate, foreign).
Build()

n := ClusterPolicyController{
ctx: context.Background(),
client: cl,
logger: logr.Discard(),
operatorNamespace: ns,
singleton: &gpuv1.ClusterPolicy{
Spec: gpuv1.ClusterPolicySpec{
Daemonsets: gpuv1.DaemonsetsSpec{
Labels: map[string]string{
consts.AppManagedByLabelKey: managedBy,
consts.HelmChartLabelKey: newChart,
},
},
},
},
}

require.NoError(t, n.reconcileOperandPodLabels(context.Background()))

get := func(name string) *corev1.Pod {
p := &corev1.Pod{}
require.NoError(t, cl.Get(context.Background(), types.NamespacedName{Namespace: ns, Name: name}, p))
return p
}

// Operand pods (managed-by gpu-operator) are synced to the current chart version.
require.Equal(t, newChart, get("driver-stale").Labels[consts.HelmChartLabelKey], "stale label should be updated")
require.Equal(t, newChart, get("toolkit-missing").Labels[consts.HelmChartLabelKey], "missing label should be added")
require.Equal(t, newChart, get("plugin-uptodate").Labels[consts.HelmChartLabelKey], "up-to-date label should be unchanged")
// Non-operand pod is selected out and left as-is.
require.Equal(t, oldChart, get("operator").Labels[consts.HelmChartLabelKey], "foreign pod must not be touched")
}

func TestHasMIGManagerLabel(t *testing.T) {
tests := []struct {
labels map[string]string
Expand Down
3 changes: 3 additions & 0 deletions controllers/transforms_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,6 +817,9 @@ func TestApplyCommonDaemonsetMetadata(t *testing.T) {
"key": "value",
"app": "value",
"app.kubernetes.io/part-of": "value",
// helm.sh/chart must not be propagated to the pod template, otherwise a
// chart-version change would churn the pod-template hash and trigger a rollout.
"helm.sh/chart": "gpu-operator-v1.2.3",
}},
expectedDs: NewDaemonset().WithPodLabels(map[string]string{
"key": "value",
Expand Down
5 changes: 2 additions & 3 deletions deployments/gpu-operator/templates/nvidiadriver.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ spec:
{{- if .Values.daemonsets.annotations }}
annotations: {{ toYaml .Values.daemonsets.annotations | nindent 6 }}
{{- end }}
{{- if .Values.daemonsets.labels }}
labels: {{ toYaml .Values.daemonsets.labels | nindent 6 }}
{{- end }}
labels:
{{- include "gpu-operator.operand-labels" . | nindent 4 }}
{{- if .Values.driver.nvidiaDriverCRD.nodeSelector }}
nodeSelector: {{ toYaml .Values.driver.nvidiaDriverCRD.nodeSelector | nindent 6 }}
{{- end }}
Expand Down
8 changes: 8 additions & 0 deletions internal/consts/consts.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,14 @@ const (
OcpDriverToolkitIdentificationLabel = "openshift.driver-toolkit"
NfdOSTreeVersionLabelKey = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"

// HelmChartLabelKey is the standard Helm "<chart>-<version>" label. It is kept off operand
// pod templates (it changes every chart upgrade and would trigger a rollout) but retained on
// DaemonSet object metadata for chart traceability.
HelmChartLabelKey = "helm.sh/chart"
// AppManagedByLabelKey identifies the managing controller. Operand objects carry the stable
// value "gpu-operator", making it a safe selector for operand pods.
AppManagedByLabelKey = "app.kubernetes.io/managed-by"

// NvidiaAnnotationHashKey indicates annotation name for last applied hash by gpu-operator
NvidiaAnnotationHashKey = "nvidia.com/last-applied-hash"

Expand Down
78 changes: 72 additions & 6 deletions internal/state/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,12 @@ func (s *stateDriver) Sync(ctx context.Context, customResource interface{}, info
return SyncStateNotReady, fmt.Errorf("failed to create/update objects: %v", err)
}

// Keep the helm.sh/chart label on running driver pods current without rolling them.
// Best-effort and cosmetic — the authoritative chart label lives on the DaemonSet object.
if err := s.reconcileDriverPodLabels(ctx, cr); err != nil {
log.FromContext(ctx).V(consts.LogLevelWarning).Info("failed to reconcile chart label on driver pods; continuing", "error", err)
}

// Check objects status
syncState, err := s.getSyncState(ctx, objs)
if err != nil {
Expand All @@ -166,6 +172,50 @@ func (s *stateDriver) Sync(ctx context.Context, customResource interface{}, info
return syncState, nil
}

// reconcileDriverPodLabels patches the current helm.sh/chart value (from the CR labels) onto
// running driver pods. The label is kept off the pod template (see podTemplateLabels) so a
// chart upgrade does not change the pod-template hash and trigger a driver rollout; patching
// the live pods instead preserves chart traceability without disrupting workloads. Pods are
// located via the DaemonSets this CR owns.
func (s *stateDriver) reconcileDriverPodLabels(ctx context.Context, cr *nvidiav1alpha1.NVIDIADriver) error {
desiredChart := cr.Spec.Labels[consts.HelmChartLabelKey]

dsList := &appsv1.DaemonSetList{}
if err := s.client.List(ctx, dsList, client.MatchingFields{consts.NVIDIADriverControllerIndexKey: cr.Name}); err != nil {
return fmt.Errorf("failed to list driver DaemonSets owned by NVIDIADriver %q: %w", cr.Name, err)
}

for i := range dsList.Items {
ds := &dsList.Items[i]
podList := &corev1.PodList{}
if err := s.client.List(ctx, podList,
client.InNamespace(ds.Namespace),
client.MatchingLabels(ds.Spec.Selector.MatchLabels)); err != nil {
return fmt.Errorf("failed to list pods for driver DaemonSet %q: %w", ds.Name, err)
}
for j := range podList.Items {
pod := &podList.Items[j]
if pod.Labels[consts.HelmChartLabelKey] == desiredChart {
// Already up-to-date; avoid a needless write (and pod update event).
continue
}
original := pod.DeepCopy()
if desiredChart == "" {
delete(pod.Labels, consts.HelmChartLabelKey)
} else {
if pod.Labels == nil {
pod.Labels = map[string]string{}
}
pod.Labels[consts.HelmChartLabelKey] = desiredChart
}
if err := s.client.Patch(ctx, pod, client.MergeFrom(original)); err != nil {
return fmt.Errorf("failed to patch %q label on pod %s/%s: %w", consts.HelmChartLabelKey, pod.Namespace, pod.Name, err)
}
}
}
return nil
}

func (s *stateDriver) GetWatchSources(mgr ctrlManager) map[string]SyncingSource {
wr := make(map[string]SyncingSource)
nvDriverPredicate := predicate.NewTypedPredicateFuncs(func(ds *appsv1.DaemonSet) bool {
Expand Down Expand Up @@ -597,15 +647,31 @@ func getDriverSpec(cr *nvidiav1alpha1.NVIDIADriver, nodePool nodePool) (*driverS
spec.Labels = sanitizeDriverLabels(spec.Labels)

return &driverSpec{
Spec: spec,
AppName: nvidiaDriverAppName,
Name: nvidiaDriverName,
ImagePath: imagePath,
ManagerImagePath: managerImagePath,
OSVersion: nodePool.osTag,
Spec: spec,
AppName: nvidiaDriverAppName,
Name: nvidiaDriverName,
ImagePath: imagePath,
ManagerImagePath: managerImagePath,
OSVersion: nodePool.osTag,
PodTemplateLabels: podTemplateLabels(spec.Labels),
}, nil
}

// podTemplateLabels copies labels for the pod template, excluding helm.sh/chart: its value
// changes on every chart upgrade, and applying it to the pod template would change the
// DaemonSet's controller revision hash and trigger an unnecessary (disruptive) rollout.
// The full label set still lands on the DaemonSet object metadata via Spec.Labels.
func podTemplateLabels(labels map[string]string) map[string]string {
podLabels := make(map[string]string, len(labels))
for k, v := range labels {
if k == consts.HelmChartLabelKey {
continue
}
podLabels[k] = v
}
return podLabels
}

func getGDSSpec(spec *nvidiav1alpha1.NVIDIADriverSpec, pool nodePool) (*gdsDriverSpec, error) {
if spec == nil || !spec.IsGDSEnabled() {
// note: GDS is optional in the NvidiaDriver CRD
Expand Down
Loading
Loading