From a819cfdab413d5699dd5fc90e92144a69fcb0acb Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 20 Sep 2024 20:26:00 +0200 Subject: [PATCH 1/2] Revert "Merge pull request #703 from elezar/fix-no-persistenced-flag" This reverts commit c02b144ed4de77261178c883c76b5cedbfd361a3, reversing changes made to b061446694a04e1f63471016eb577151d1830f80. Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime-hook/main.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/nvidia-container-runtime-hook/main.go b/cmd/nvidia-container-runtime-hook/main.go index a4a963f4..f53a649a 100644 --- a/cmd/nvidia-container-runtime-hook/main.go +++ b/cmd/nvidia-container-runtime-hook/main.go @@ -90,6 +90,11 @@ func doPrestart() { args := []string{getCLIPath(cli)} + // Only include the nvidia-persistenced socket if it is explicitly enabled. + if !hook.Features.IncludePersistencedSocket.IsEnabled() { + args = append(args, "--no-persistenced") + } + if cli.Root != "" { args = append(args, fmt.Sprintf("--root=%s", cli.Root)) } @@ -112,11 +117,6 @@ func doPrestart() { } args = append(args, "configure") - // Only include the nvidia-persistenced socket if it is explicitly enabled. - if !hook.Features.IncludePersistencedSocket.IsEnabled() { - args = append(args, "--no-persistenced") - } - if ldconfigPath := cli.NormalizeLDConfigPath(); ldconfigPath != "" { args = append(args, fmt.Sprintf("--ldconfig=%s", ldconfigPath)) } From 5145b0a4b6ff2b3a7f54c20b689ca420f3940f4f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Fri, 20 Sep 2024 20:26:16 +0200 Subject: [PATCH 2/2] Revert "Merge pull request #694 from elezar/add-opt-in-to-sockets" This reverts commit b061446694a04e1f63471016eb577151d1830f80, reversing changes made to c490baab6331ee0b7cb56f9d3c619207adb364e5. Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime-hook/main.go | 6 --- cmd/nvidia-ctk/cdi/generate/generate.go | 8 --- internal/config/features.go | 65 +++++++++++------------ internal/discover/ipc.go | 13 ++--- internal/modifier/cdi.go | 1 - internal/modifier/gated.go | 8 +-- pkg/nvcdi/common-nvml.go | 2 +- pkg/nvcdi/driver-nvml.go | 22 ++++---- pkg/nvcdi/lib.go | 5 -- pkg/nvcdi/management.go | 2 +- pkg/nvcdi/options.go | 11 ---- tools/container/toolkit/toolkit.go | 12 ----- 12 files changed, 52 insertions(+), 103 deletions(-) diff --git a/cmd/nvidia-container-runtime-hook/main.go b/cmd/nvidia-container-runtime-hook/main.go index f53a649a..faaf0b51 100644 --- a/cmd/nvidia-container-runtime-hook/main.go +++ b/cmd/nvidia-container-runtime-hook/main.go @@ -89,12 +89,6 @@ func doPrestart() { rootfs := getRootfsPath(container) args := []string{getCLIPath(cli)} - - // Only include the nvidia-persistenced socket if it is explicitly enabled. - if !hook.Features.IncludePersistencedSocket.IsEnabled() { - args = append(args, "--no-persistenced") - } - if cli.Root != "" { args = append(args, fmt.Sprintf("--root=%s", cli.Root)) } diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 9c2da04f..9f9e994b 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -60,8 +60,6 @@ type options struct { files cli.StringSlice ignorePatterns cli.StringSlice } - - includePersistencedSocket bool } // NewCommand constructs a generate-cdi command with the specified logger @@ -171,11 +169,6 @@ func (m command) build() *cli.Command { Usage: "Specify a pattern the CSV mount specifications.", Destination: &opts.csv.ignorePatterns, }, - &cli.BoolFlag{ - Name: "include-persistenced-socket", - Usage: "Include the nvidia-persistenced socket in the generated CDI specification.", - Destination: &opts.includePersistencedSocket, - }, } return &c @@ -280,7 +273,6 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) { nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()), nvcdi.WithCSVFiles(opts.csv.files.Value()), nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()), - nvcdi.WithOptInFeature("include-persistenced-socket", opts.includePersistencedSocket), ) if err != nil { return nil, fmt.Errorf("failed to create CDI library: %v", err) diff --git a/internal/config/features.go b/internal/config/features.go index cbbecf3c..dfc6b165 100644 --- a/internal/config/features.go +++ b/internal/config/features.go @@ -19,11 +19,10 @@ package config type featureName string const ( - FeatureGDS = featureName("gds") - FeatureMOFED = featureName("mofed") - FeatureNVSWITCH = featureName("nvswitch") - FeatureGDRCopy = featureName("gdrcopy") - FeatureIncludePersistencedSocket = featureName("include-persistenced-socket") + FeatureGDS = featureName("gds") + FeatureMOFED = featureName("mofed") + FeatureNVSWITCH = featureName("nvswitch") + FeatureGDRCopy = featureName("gdrcopy") ) // features specifies a set of named features. @@ -32,57 +31,53 @@ type features struct { MOFED *feature `toml:"mofed,omitempty"` NVSWITCH *feature `toml:"nvswitch,omitempty"` GDRCopy *feature `toml:"gdrcopy,omitempty"` - // IncludePersistencedSocket enables the injection of the nvidia-persistenced - // socket into containers. - IncludePersistencedSocket *feature `toml:"include-persistenced-socket,omitempty"` } type feature bool -// IsEnabledInEnvironment checks whether a specified named feature is enabled. +// IsEnabled checks whether a specified named feature is enabled. // An optional list of environments to check for feature-specific environment // variables can also be supplied. -func (fs features) IsEnabledInEnvironment(n featureName, in ...getenver) bool { +func (fs features) IsEnabled(n featureName, in ...getenver) bool { + featureEnvvars := map[featureName]string{ + FeatureGDS: "NVIDIA_GDS", + FeatureMOFED: "NVIDIA_MOFED", + FeatureNVSWITCH: "NVIDIA_NVSWITCH", + FeatureGDRCopy: "NVIDIA_GDRCOPY", + } + + envvar := featureEnvvars[n] switch n { - // Features with envvar overrides case FeatureGDS: - return fs.GDS.isEnabledWithEnvvarOverride("NVIDIA_GDS", in...) + return fs.GDS.isEnabled(envvar, in...) case FeatureMOFED: - return fs.MOFED.isEnabledWithEnvvarOverride("NVIDIA_MOFED", in...) + return fs.MOFED.isEnabled(envvar, in...) case FeatureNVSWITCH: - return fs.NVSWITCH.isEnabledWithEnvvarOverride("NVIDIA_NVSWITCH", in...) + return fs.NVSWITCH.isEnabled(envvar, in...) case FeatureGDRCopy: - return fs.GDRCopy.isEnabledWithEnvvarOverride("NVIDIA_GDRCOPY", in...) - // Features without envvar overrides - case FeatureIncludePersistencedSocket: - return fs.IncludePersistencedSocket.IsEnabled() + return fs.GDRCopy.isEnabled(envvar, in...) default: return false } } -// IsEnabled checks whether a feature is enabled. -func (f *feature) IsEnabled() bool { - if f != nil { - return bool(*f) - } - return false -} - -// isEnabledWithEnvvarOverride checks whether a feature is enabled and allows an envvar to overide the feature. +// isEnabled checks whether a feature is enabled. // If the enabled value is explicitly set, this is returned, otherwise the // associated envvar is checked in the specified getenver for the string "enabled" // A CUDA container / image can be passed here. -func (f *feature) isEnabledWithEnvvarOverride(envvar string, ins ...getenver) bool { - if envvar != "" { - for _, in := range ins { - if in.Getenv(envvar) == "enabled" { - return true - } +func (f *feature) isEnabled(envvar string, ins ...getenver) bool { + if f != nil { + return bool(*f) + } + if envvar == "" { + return false + } + for _, in := range ins { + if in.Getenv(envvar) == "enabled" { + return true } } - - return f.IsEnabled() + return false } type getenver interface { diff --git a/internal/discover/ipc.go b/internal/discover/ipc.go index f3d106b5..f636290f 100644 --- a/internal/discover/ipc.go +++ b/internal/discover/ipc.go @@ -24,13 +24,7 @@ import ( type ipcMounts mounts // NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets. -func NewIPCDiscoverer(logger logger.Interface, driverRoot string, includePersistencedSocket bool) (Discover, error) { - var requiredSockets []string - if includePersistencedSocket { - requiredSockets = append(requiredSockets, "/nvidia-persistenced/socket") - } - requiredSockets = append(requiredSockets, "/nvidia-fabricmanager/socket") - +func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) { sockets := newMounts( logger, lookup.NewFileLocator( @@ -40,7 +34,10 @@ func NewIPCDiscoverer(logger logger.Interface, driverRoot string, includePersist lookup.WithCount(1), ), driverRoot, - requiredSockets, + []string{ + "/nvidia-persistenced/socket", + "/nvidia-fabricmanager/socket", + }, ) mps := newMounts( diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index d3e4e935..c5af4f88 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -189,7 +189,6 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), nvcdi.WithVendor("runtime.nvidia.com"), nvcdi.WithClass("gpu"), - nvcdi.WithOptInFeature("include-persistenced-socket", cfg.Features.IncludePersistencedSocket.IsEnabled()), ) if err != nil { return nil, fmt.Errorf("failed to construct CDI library: %w", err) diff --git a/internal/modifier/gated.go b/internal/modifier/gated.go index 70322b35..5bed3eaf 100644 --- a/internal/modifier/gated.go +++ b/internal/modifier/gated.go @@ -46,7 +46,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image driverRoot := cfg.NVIDIAContainerCLIConfig.Root devRoot := cfg.NVIDIAContainerCLIConfig.Root - if cfg.Features.IsEnabledInEnvironment(config.FeatureGDS, image) { + if cfg.Features.IsEnabled(config.FeatureGDS, image) { d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot) if err != nil { return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err) @@ -54,7 +54,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image discoverers = append(discoverers, d) } - if cfg.Features.IsEnabledInEnvironment(config.FeatureMOFED, image) { + if cfg.Features.IsEnabled(config.FeatureMOFED, image) { d, err := discover.NewMOFEDDiscoverer(logger, devRoot) if err != nil { return nil, fmt.Errorf("failed to construct discoverer for MOFED devices: %w", err) @@ -62,7 +62,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image discoverers = append(discoverers, d) } - if cfg.Features.IsEnabledInEnvironment(config.FeatureNVSWITCH, image) { + if cfg.Features.IsEnabled(config.FeatureNVSWITCH, image) { d, err := discover.NewNvSwitchDiscoverer(logger, devRoot) if err != nil { return nil, fmt.Errorf("failed to construct discoverer for NVSWITCH devices: %w", err) @@ -70,7 +70,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image discoverers = append(discoverers, d) } - if cfg.Features.IsEnabledInEnvironment(config.FeatureGDRCopy, image) { + if cfg.Features.IsEnabled(config.FeatureGDRCopy, image) { d, err := discover.NewGDRCopyDiscoverer(logger, devRoot) if err != nil { return nil, fmt.Errorf("failed to construct discoverer for GDRCopy devices: %w", err) diff --git a/pkg/nvcdi/common-nvml.go b/pkg/nvcdi/common-nvml.go index 6e9661cb..4dd1bc35 100644 --- a/pkg/nvcdi/common-nvml.go +++ b/pkg/nvcdi/common-nvml.go @@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) { l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err) } - driverFiles, err := l.NewDriverDiscoverer() + driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) } diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 59ff91c2..8fb39888 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -34,41 +34,41 @@ import ( // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // The supplied NVML Library is used to query the expected driver version. -func (l *nvmllib) NewDriverDiscoverer() (discover.Discover, error) { - if r := l.nvmllib.Init(); r != nvml.SUCCESS { +func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) { + if r := nvmllib.Init(); r != nvml.SUCCESS { return nil, fmt.Errorf("failed to initialize NVML: %v", r) } defer func() { - if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS { - l.logger.Warningf("failed to shutdown NVML: %v", r) + if r := nvmllib.Shutdown(); r != nvml.SUCCESS { + logger.Warningf("failed to shutdown NVML: %v", r) } }() - version, r := l.nvmllib.SystemGetDriverVersion() + version, r := nvmllib.SystemGetDriverVersion() if r != nvml.SUCCESS { return nil, fmt.Errorf("failed to determine driver version: %v", r) } - return (*nvcdilib)(l).newDriverVersionDiscoverer(version) + return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version) } -func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover, error) { - libraries, err := NewDriverLibraryDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, version) +func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) { + libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err) } - ipcs, err := discover.NewIPCDiscoverer(l.logger, l.driver.Root, l.optInFeatures["include-persistenced-socket"]) + ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root) if err != nil { return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err) } - firmwares, err := NewDriverFirmwareDiscoverer(l.logger, l.driver.Root, version) + firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version) if err != nil { return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err) } - binaries := NewDriverBinariesDiscoverer(l.logger, l.driver.Root) + binaries := NewDriverBinariesDiscoverer(logger, driver.Root) d := discover.Merge( libraries, diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index e535763b..d2db3b6c 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -63,8 +63,6 @@ type nvcdilib struct { infolib info.Interface mergedDeviceOptions []transform.MergedDeviceOption - - optInFeatures map[string]bool } // New creates a new nvcdi library @@ -132,9 +130,6 @@ func New(opts ...Option) (Interface, error) { if l.vendor == "" { l.vendor = "management.nvidia.com" } - // For management specifications we always allow the fabricmanager and - // persistenced sockets. - WithOptInFeature("include-persistenced-socket", true)(l) lib = (*managementlib)(l) case ModeNvml: lib = (*nvmllib)(l) diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index 8f7709af..4648e5bb 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) { return nil, fmt.Errorf("failed to get CUDA version: %v", err) } - driver, err := (*nvcdilib)(m).newDriverVersionDiscoverer(version) + driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version) if err != nil { return nil, fmt.Errorf("failed to create driver library discoverer: %v", err) } diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index 762b5ac6..417687b9 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -155,14 +155,3 @@ func WithLibrarySearchPaths(paths []string) Option { o.librarySearchPaths = paths } } - -// WithOptInFeature sets a specific opt-in feature. -// Note that previous opt-in-features are not removed. -func WithOptInFeature(feature string, enabled bool) Option { - return func(n *nvcdilib) { - if n.optInFeatures == nil { - n.optInFeatures = make(map[string]bool) - } - n.optInFeatures[feature] = enabled - } -} diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index c9d0db6a..8175ed4e 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -81,8 +81,6 @@ type options struct { acceptNVIDIAVisibleDevicesAsVolumeMounts bool ignoreErrors bool - - optInFeatures cli.StringSlice } func main() { @@ -252,12 +250,6 @@ func main() { Destination: &opts.createDeviceNodes, EnvVars: []string{"CREATE_DEVICE_NODES"}, }, - &cli.StringSliceFlag{ - Name: "opt-in-feature", - Hidden: true, - Destination: &opts.optInFeatures, - EnvVars: []string{"NVIDIA_CONTAINER_TOOLKIT_OPT_IN_FEATURES"}, - }, } // Update the subcommand flags with the common subcommand flags @@ -526,10 +518,6 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai "nvidia-container-runtime.runtimes": opts.ContainerRuntimeRuntimes, "nvidia-container-cli.debug": opts.ContainerCLIDebug, } - for _, feature := range opts.optInFeatures.Value() { - optionalConfigValues["features."+feature] = true - } - for key, value := range optionalConfigValues { if !c.IsSet(key) { log.Infof("Skipping unset option: %v", key)