Skip to content

Commit

Permalink
Use opt-in features for CDI mode
Browse files Browse the repository at this point in the history
Signed-off-by: Evan Lezar <elezar@nvidia.com>
  • Loading branch information
elezar committed Sep 16, 2024
1 parent 38fc55a commit 4d9924d
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 18 deletions.
15 changes: 10 additions & 5 deletions internal/discover/ipc.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,15 @@ import (
type ipcMounts mounts

// NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets.
func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) {
func NewIPCDiscoverer(logger logger.Interface, driverRoot string, allowPersistenced bool, allowFabricmanager bool) (Discover, error) {
var requiredSockets []string
if allowPersistenced {
requiredSockets = append(requiredSockets, "/nvidia-persistenced/socket")
}
if allowFabricmanager {
requiredSockets = append(requiredSockets, "/nvidia-fabricmanager/socket")
}

sockets := newMounts(
logger,
lookup.NewFileLocator(
Expand All @@ -34,10 +42,7 @@ func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, err
lookup.WithCount(1),
),
driverRoot,
[]string{
"/nvidia-persistenced/socket",
"/nvidia-fabricmanager/socket",
},
requiredSockets,
)

mps := newMounts(
Expand Down
2 changes: 2 additions & 0 deletions internal/modifier/cdi.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
nvcdi.WithVendor("runtime.nvidia.com"),
nvcdi.WithClass("gpu"),
nvcdi.WithOptInFeature("allow-fabricmanager", cfg.Features.AllowFabricmanager.IsEnabled()),
nvcdi.WithOptInFeature("allow-persistenced", cfg.Features.AllowPersistenced.IsEnabled()),
)
if err != nil {
return nil, fmt.Errorf("failed to construct CDI library: %w", err)
Expand Down
2 changes: 1 addition & 1 deletion pkg/nvcdi/common-nvml.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
}

driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
driverFiles, err := l.NewDriverDiscoverer()
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
}
Expand Down
22 changes: 11 additions & 11 deletions pkg/nvcdi/driver-nvml.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,41 +34,41 @@ import (

// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
// The supplied NVML Library is used to query the expected driver version.
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
if r := nvmllib.Init(); r != nvml.SUCCESS {
func (l *nvmllib) NewDriverDiscoverer() (discover.Discover, error) {
if r := l.nvmllib.Init(); r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
}
defer func() {
if r := nvmllib.Shutdown(); r != nvml.SUCCESS {
logger.Warningf("failed to shutdown NVML: %v", r)
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS {
l.logger.Warningf("failed to shutdown NVML: %v", r)
}
}()

version, r := nvmllib.SystemGetDriverVersion()
version, r := l.nvmllib.SystemGetDriverVersion()
if r != nvml.SUCCESS {
return nil, fmt.Errorf("failed to determine driver version: %v", r)
}

return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
return (*nvcdilib)(l).newDriverVersionDiscoverer(version)
}

func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) {
libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover, error) {
libraries, err := NewDriverLibraryDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, version)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err)
}

ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root)
ipcs, err := discover.NewIPCDiscoverer(l.logger, l.driver.Root, l.optInFeatures["allow-persistenced"], l.optInFeatures["allow-fabricmanager"])
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err)
}

firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version)
firmwares, err := NewDriverFirmwareDiscoverer(l.logger, l.driver.Root, version)
if err != nil {
return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err)
}

binaries := NewDriverBinariesDiscoverer(logger, driver.Root)
binaries := NewDriverBinariesDiscoverer(l.logger, l.driver.Root)

d := discover.Merge(
libraries,
Expand Down
2 changes: 2 additions & 0 deletions pkg/nvcdi/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ type nvcdilib struct {
infolib info.Interface

mergedDeviceOptions []transform.MergedDeviceOption

optInFeatures map[string]bool
}

// New creates a new nvcdi library
Expand Down
2 changes: 1 addition & 1 deletion pkg/nvcdi/management.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
}

driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version)
driver, err := (*nvcdilib)(m).newDriverVersionDiscoverer(version)
if err != nil {
return nil, fmt.Errorf("failed to create driver library discoverer: %v", err)
}
Expand Down
11 changes: 11 additions & 0 deletions pkg/nvcdi/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,14 @@ func WithLibrarySearchPaths(paths []string) Option {
o.librarySearchPaths = paths
}
}

// WithOptInFeature sets a specific opt-in feature.
// Note that previous opt-in-features are not removed.
func WithOptInFeature(feature string, enabled bool) Option {
return func(n *nvcdilib) {
if n.optInFeatures == nil {
n.optInFeatures = make(map[string]bool)
}
n.optInFeatures[feature] = enabled
}
}

0 comments on commit 4d9924d

Please sign in to comment.