-
Notifications
You must be signed in to change notification settings - Fork 511
fix: drop containerd config mounts when NRI is enabled #2514
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1379,11 +1379,46 @@ func TransformToolkit(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, n | |||||||||||||||||
| func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, runtime string, container *corev1.Container) error { | ||||||||||||||||||
| setContainerEnv(container, "RUNTIME", runtime) | ||||||||||||||||||
|
|
||||||||||||||||||
| if runtime == gpuv1.Containerd.String() { | ||||||||||||||||||
| // Set the runtime class name that is to be configured for containerd | ||||||||||||||||||
| setContainerEnv(container, "CONTAINERD_RUNTIME_CLASS", getRuntimeClassName(config)) | ||||||||||||||||||
| // In NRI plugin mode the toolkit installer runs the noop runtime configurer | ||||||||||||||||||
| // (see nvidia-ctk-installer's NewConfigurer): it does not read or modify the | ||||||||||||||||||
| // container runtime configuration and does not restart the runtime. Mounting the | ||||||||||||||||||
| // runtime config files and socket is therefore unnecessary in this mode, and the | ||||||||||||||||||
| // hostPath mounts (e.g. /etc/containerd/conf.d with DirectoryOrCreate) break on | ||||||||||||||||||
| // read-only hosts such as Talos. Only the NRI socket mount (below) is required. | ||||||||||||||||||
| if config.CDI.IsNRIPluginEnabled() { | ||||||||||||||||||
| // setup mounts for the runtime NRI socket file | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not move L1389-L1401 to a private the same way as you have done in |
||||||||||||||||||
| nriSocketFile := getContainerEnv(container, "NRI_SOCKET") | ||||||||||||||||||
| if nriSocketFile == "" { | ||||||||||||||||||
| nriSocketFile = DefaultRuntimeNRISocketFile | ||||||||||||||||||
| } | ||||||||||||||||||
| setContainerEnv(container, "NRI_SOCKET", path.Join(DefaultRuntimeNRISocketTargetDir, path.Base(nriSocketFile)) | ||||||||||||||||||
|
|
||||||||||||||||||
| nriVolMountSocketName := "nri-socket" | ||||||||||||||||||
| nriVolMountSocket := corev1.VolumeMount{Name: nriVolMountSocketName, MountPath: DefaultRuntimeNRISocketTargetDir} | ||||||||||||||||||
| container.VolumeMounts = append(container.VolumeMounts, nriVolMountSocket) | ||||||||||||||||||
|
|
||||||||||||||||||
| nriSocketVol := corev1.Volume{Name: nriVolMountSocketName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(nriSocketFile), Type: ptr.To(corev1.HostPathDirectoryOrCreate)}}} | ||||||||||||||||||
| obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, nriSocketVol) | ||||||||||||||||||
|
frezbo marked this conversation as resolved.
|
||||||||||||||||||
| } else { | ||||||||||||||||||
| if runtime == gpuv1.Containerd.String() { | ||||||||||||||||||
| // Set the runtime class name that is to be configured for containerd | ||||||||||||||||||
| setContainerEnv(container, "CONTAINERD_RUNTIME_CLASS", getRuntimeClassName(config)) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| if err := transformRuntimeConfigAndSocketMounts(obj, runtime, container); err != nil { | ||||||||||||||||||
| return err | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| return nil | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // transformRuntimeConfigAndSocketMounts configures the toolkit container with the | ||||||||||||||||||
| // mounts and environment required for the toolkit installer to update the container | ||||||||||||||||||
| // runtime configuration (top-level config, drop-in config, and runtime socket). This | ||||||||||||||||||
| // is not required when the NRI plugin is enabled, since the installer does not modify | ||||||||||||||||||
| // the runtime configuration in that mode. | ||||||||||||||||||
|
Comment on lines
+1416
to
+1420
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's leave out the parts that are more or less obvious.
Suggested change
|
||||||||||||||||||
| func transformRuntimeConfigAndSocketMounts(obj *appsv1.DaemonSet, runtime string, container *corev1.Container) error { | ||||||||||||||||||
| // For runtime config files we have top-level configs and drop-in files. | ||||||||||||||||||
| // These are supported as follows: | ||||||||||||||||||
| // * Docker only supports top-level config files. | ||||||||||||||||||
|
|
@@ -1489,22 +1524,6 @@ func transformForRuntime(obj *appsv1.DaemonSet, config *gpuv1.ClusterPolicySpec, | |||||||||||||||||
| obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, socketVol) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| if config.CDI.IsNRIPluginEnabled() { | ||||||||||||||||||
| // setup mounts for the runtime NRI socket file | ||||||||||||||||||
| nriSocketFile := getContainerEnv(container, "NRI_SOCKET") | ||||||||||||||||||
| if nriSocketFile == "" { | ||||||||||||||||||
| nriSocketFile = DefaultRuntimeNRISocketFile | ||||||||||||||||||
| } | ||||||||||||||||||
| setContainerEnv(container, "NRI_SOCKET", DefaultRuntimeNRISocketTargetDir+path.Base(nriSocketFile)) | ||||||||||||||||||
|
|
||||||||||||||||||
| nriVolMountSocketName := "nri-socket" | ||||||||||||||||||
| nriVolMountSocket := corev1.VolumeMount{Name: nriVolMountSocketName, MountPath: DefaultRuntimeNRISocketTargetDir} | ||||||||||||||||||
| container.VolumeMounts = append(container.VolumeMounts, nriVolMountSocket) | ||||||||||||||||||
|
|
||||||||||||||||||
| nriSocketVol := corev1.Volume{Name: nriVolMountSocketName, VolumeSource: corev1.VolumeSource{HostPath: &corev1.HostPathVolumeSource{Path: path.Dir(nriSocketFile), Type: ptr.To(corev1.HostPathDirectoryOrCreate)}}} | ||||||||||||||||||
| obj.Spec.Template.Spec.Volumes = append(obj.Spec.Template.Spec.Volumes, nriSocketVol) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| return nil | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,7 @@ | |
| package controllers | ||
|
|
||
| import ( | ||
| "path" | ||
| "path/filepath" | ||
| "strings" | ||
| "testing" | ||
|
|
@@ -390,6 +391,7 @@ func TestTransformForRuntime(t *testing.T) { | |
| testCases := []struct { | ||
| description string | ||
| runtime gpuv1.Runtime | ||
| clusterPolicy *gpuv1.ClusterPolicySpec | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of modifying the test case struct here, you can add a new test case to the |
||
| input Daemonset | ||
| expectedOutput Daemonset | ||
| }{ | ||
|
|
@@ -558,11 +560,43 @@ func TestTransformForRuntime(t *testing.T) { | |
| }, | ||
| }), | ||
| }, | ||
| { | ||
| // With the NRI plugin enabled, the toolkit installer does not modify the | ||
| // container runtime configuration, so no runtime config / drop-in / socket | ||
| // mounts are added -- only the NRI socket mount. | ||
| description: "containerd with NRI plugin enabled", | ||
| runtime: gpuv1.Containerd, | ||
| clusterPolicy: &gpuv1.ClusterPolicySpec{ | ||
| Operator: gpuv1.OperatorSpec{RuntimeClass: DefaultRuntimeClass}, | ||
| CDI: gpuv1.CDIConfigSpec{ | ||
| Enabled: ptr.To(true), | ||
| NRIPluginEnabled: ptr.To(true), | ||
| }, | ||
| }, | ||
| input: NewDaemonset(). | ||
| WithContainer(corev1.Container{Name: "test-ctr"}), | ||
| expectedOutput: NewDaemonset(). | ||
| WithHostPathVolume("nri-socket", filepath.Dir(DefaultRuntimeNRISocketFile), ptr.To(corev1.HostPathDirectoryOrCreate)). | ||
| WithContainer(corev1.Container{ | ||
| Name: "test-ctr", | ||
| Env: []corev1.EnvVar{ | ||
| {Name: "RUNTIME", Value: gpuv1.Containerd.String()}, | ||
| {Name: "NRI_SOCKET", Value: path.Join(DefaultRuntimeNRISocketTargetDir, path.Base(DefaultRuntimeNRISocketFile))}, | ||
| }, | ||
| VolumeMounts: []corev1.VolumeMount{ | ||
| {Name: "nri-socket", MountPath: DefaultRuntimeNRISocketTargetDir}, | ||
|
frezbo marked this conversation as resolved.
|
||
| }, | ||
| }), | ||
| }, | ||
| } | ||
|
|
||
| cp := &gpuv1.ClusterPolicySpec{Operator: gpuv1.OperatorSpec{RuntimeClass: DefaultRuntimeClass}} | ||
| defaultCP := &gpuv1.ClusterPolicySpec{Operator: gpuv1.OperatorSpec{RuntimeClass: DefaultRuntimeClass}} | ||
| for _, tc := range testCases { | ||
| t.Run(tc.description, func(t *testing.T) { | ||
| cp := defaultCP | ||
| if tc.clusterPolicy != nil { | ||
| cp = tc.clusterPolicy | ||
| } | ||
|
frezbo marked this conversation as resolved.
|
||
| // pass pointer to the target container | ||
| err := transformForRuntime(tc.input.DaemonSet, cp, tc.runtime.String(), &tc.input.Spec.Template.Spec.Containers[0]) | ||
| require.NoError(t, err) | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.