update devicelib/nnlib to detect compute capability

2025-08-21 14:03:19 -07:00 · 2019-01-08 10:48:06 +04:00 · 2019-01-08 10:48:06 +04:00 · e8620919a7
commit e8620919a7
parent 29c2375f5a
5 changed files with 70 additions and 21 deletions
--- a/mainscripts/Converter.py
+++ b/mainscripts/Converter.py
@ -131,8 +131,8 @@ class ConvertSubprocessor(SubprocessorBase):
        from nnlib import nnlib          
        #model process ate all GPU mem,
        #so we cannot use GPU for any TF operations in converter processes (for example image_utils.TFLabConverter)
-        #therefore forcing prefer_DeviceConfig to CPU only
-        nnlib.prefer_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
+        #therefore forcing active_DeviceConfig to CPU only
+        nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
        
        return None

--- a/models/ModelBase.py
+++ b/models/ModelBase.py
@ -109,13 +109,13 @@ class ModelBase(object):
        self.write_preview_history = session_write_preview_history
        self.target_epoch = session_target_epoch
        self.batch_size = session_batch_size
+        self.onInitializeOptions(self.epoch == 0, ask_for_session_options)
        
-        self.device_config = nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options)
+        nnlib.import_all ( nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) )
+        self.device_config = nnlib.active_DeviceConfig
        
        self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb

-        self.onInitializeOptions(self.epoch == 0, ask_for_session_options)        
-        nnlib.import_all (self.device_config)
        self.onInitialize(**in_options)
        
        if self.debug or self.batch_size == 0:
--- a/nnlib/devicelib.py
+++ b/nnlib/devicelib.py
@ -7,7 +7,9 @@ class devicelib:
        force_gpu_idxs = None
        choose_worst_gpu = False
        gpu_idxs = []
+        gpu_names = []
        gpu_total_vram_gb = 0
+        gpu_compute_caps = []
        allow_growth = True
        use_fp16 = False
        cpu_only = False
@ -47,11 +49,15 @@ class devicelib:
                        else:
                            self.gpu_idxs = [gpu_idx]
                            
-                if len(self.gpu_idxs) == 0:
-                    self.cpu_only = True
-                else:
-                    self.cpu_only = False
+                self.cpu_only = (len(self.gpu_idxs) == 0)
+ 
+                if not self.cpu_only:
                    self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] )
+                    self.gpu_names = []
+                    self.gpu_compute_caps = []
+                    for gpu_idx in self.gpu_idxs:
+                        self.gpu_names += [devicelib.getDeviceName(gpu_idx)]
+                        self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ]
                    
    @staticmethod
    def hasNVML():
@ -207,3 +213,15 @@ class devicelib:
        except:
            pass
        return result
+        
+    @staticmethod
+    def getDeviceComputeCapability(idx):
+        result = 0
+        try:
+            nvmlInit()    
+            if idx < nvmlDeviceGetCount():    
+                result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
+            nvmlShutdown()
+        except:
+            pass
+        return result[0] * 10 + result[1]
--- a/nnlib/nnlib.py
+++ b/nnlib/nnlib.py
@ -9,7 +9,7 @@ from .devicelib import devicelib
 class nnlib(object):
    device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
    DeviceConfig = devicelib.Config
-    prefer_DeviceConfig = DeviceConfig() #default is one best GPU
+    active_DeviceConfig = DeviceConfig() #default is one best GPU

    dlib = None
    keras = None
@ -126,11 +126,6 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
        if nnlib.tf is not None:
            return nnlib.code_import_tf

-        if device_config is None:
-            device_config = nnlib.prefer_DeviceConfig
-        else:
-            nnlib.prefer_DeviceConfig = device_config
-            
        if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
            suppressor = std_utils.suppress_stdout_stderr().__enter__()
        else:
@ -144,6 +139,26 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
        import tensorflow as tf
        nnlib.tf = tf
        
+        if device_config is None:
+            device_config = nnlib.active_DeviceConfig
+        
+        tf_ver = [int(x) for x in tf.VERSION.split('.')]
+        req_cap = 35
+        if tf_ver[0] > 1 or (tf_ver[0] == 1 and tf_ver[1] >= 11):
+            req_cap = 37
+            
+        if not device_config.cpu_only and device_config.gpu_compute_caps[0] < req_cap:
+            if suppressor is not None:  
+                suppressor.__exit__()
+            
+            print ("%s does not meet minimum required compute capability: %d.%d. Falling back to CPU mode." % ( device_config.gpu_names[0], req_cap // 10, req_cap % 10 ) )
+            device_config = nnlib.DeviceConfig(cpu_only=True)
+            
+            if suppressor is not None:  
+                suppressor.__enter__()
+
+        nnlib.active_DeviceConfig = device_config
+        
        if device_config.cpu_only:
            config = tf.ConfigProto( device_count = {'GPU': 0} )
        else:     
@ -161,6 +176,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
        if suppressor is not None:  
            suppressor.__exit__()
            
+
        nnlib.__initialize_tf_functions()
        nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec')
        return nnlib.code_import_tf
@ -367,7 +383,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
            return nnlib.code_import_keras

        nnlib.import_tf(device_config)
-        device_config = nnlib.prefer_DeviceConfig
+        device_config = nnlib.active_DeviceConfig
        if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
            suppressor = std_utils.suppress_stdout_stderr().__enter__()
            
--- a/nnlib/pynvml.py
+++ b/nnlib/pynvml.py
@ -1699,3 +1699,18 @@ def nvmlDeviceGetTopologyCommonAncestor(device1, device2):
    ret = fn(device1, device2, byref(c_level))
    _nvmlCheckReturn(ret)
    return c_level.value
+
+#DeepFaceLab additions
+def nvmlDeviceGetCudaComputeCapability(device):
+    c_major = c_int()
+    c_minor = c_int()
+    fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability")
+    
+    # get the count
+    ret = fn(device, byref(c_major), byref(c_minor))
+    
+    # this should only fail with insufficient size
+    if (ret != NVML_SUCCESS):
+        raise NVMLError(ret)
+
+    return c_major.value, c_minor.value