diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py index ae34362..80b4738 100644 --- a/mainscripts/Converter.py +++ b/mainscripts/Converter.py @@ -131,8 +131,8 @@ class ConvertSubprocessor(SubprocessorBase): from nnlib import nnlib #model process ate all GPU mem, #so we cannot use GPU for any TF operations in converter processes (for example image_utils.TFLabConverter) - #therefore forcing prefer_DeviceConfig to CPU only - nnlib.prefer_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) + #therefore forcing active_DeviceConfig to CPU only + nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) return None diff --git a/models/ModelBase.py b/models/ModelBase.py index 654068a..d421d4d 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -109,13 +109,13 @@ class ModelBase(object): self.write_preview_history = session_write_preview_history self.target_epoch = session_target_epoch self.batch_size = session_batch_size - - self.device_config = nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) - + self.onInitializeOptions(self.epoch == 0, ask_for_session_options) + + nnlib.import_all ( nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) ) + self.device_config = nnlib.active_DeviceConfig + self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb - self.onInitializeOptions(self.epoch == 0, ask_for_session_options) - nnlib.import_all (self.device_config) self.onInitialize(**in_options) if self.debug or self.batch_size == 0: diff --git a/nnlib/devicelib.py b/nnlib/devicelib.py index 5f1bd00..c0248dd 100644 --- a/nnlib/devicelib.py +++ b/nnlib/devicelib.py @@ -7,7 +7,9 @@ class devicelib: force_gpu_idxs = None choose_worst_gpu = False gpu_idxs = [] + gpu_names = [] gpu_total_vram_gb = 0 + gpu_compute_caps = [] allow_growth = True use_fp16 = False cpu_only = False @@ -47,12 +49,16 @@ class devicelib: else: self.gpu_idxs = [gpu_idx] - if len(self.gpu_idxs) == 0: - self.cpu_only = True - else: - self.cpu_only = False + self.cpu_only = (len(self.gpu_idxs) == 0) + + if not self.cpu_only: self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] ) - + self.gpu_names = [] + self.gpu_compute_caps = [] + for gpu_idx in self.gpu_idxs: + self.gpu_names += [devicelib.getDeviceName(gpu_idx)] + self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ] + @staticmethod def hasNVML(): try: @@ -206,4 +212,16 @@ class devicelib: nvmlShutdown() except: pass - return result \ No newline at end of file + return result + + @staticmethod + def getDeviceComputeCapability(idx): + result = 0 + try: + nvmlInit() + if idx < nvmlDeviceGetCount(): + result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx)) + nvmlShutdown() + except: + pass + return result[0] * 10 + result[1] \ No newline at end of file diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py index f730b2a..2ebf116 100644 --- a/nnlib/nnlib.py +++ b/nnlib/nnlib.py @@ -9,7 +9,7 @@ from .devicelib import devicelib class nnlib(object): device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib DeviceConfig = devicelib.Config - prefer_DeviceConfig = DeviceConfig() #default is one best GPU + active_DeviceConfig = DeviceConfig() #default is one best GPU dlib = None keras = None @@ -125,12 +125,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator def import_tf(device_config = None): if nnlib.tf is not None: return nnlib.code_import_tf - - if device_config is None: - device_config = nnlib.prefer_DeviceConfig - else: - nnlib.prefer_DeviceConfig = device_config - + if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': suppressor = std_utils.suppress_stdout_stderr().__enter__() else: @@ -144,6 +139,26 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator import tensorflow as tf nnlib.tf = tf + if device_config is None: + device_config = nnlib.active_DeviceConfig + + tf_ver = [int(x) for x in tf.VERSION.split('.')] + req_cap = 35 + if tf_ver[0] > 1 or (tf_ver[0] == 1 and tf_ver[1] >= 11): + req_cap = 37 + + if not device_config.cpu_only and device_config.gpu_compute_caps[0] < req_cap: + if suppressor is not None: + suppressor.__exit__() + + print ("%s does not meet minimum required compute capability: %d.%d. Falling back to CPU mode." % ( device_config.gpu_names[0], req_cap // 10, req_cap % 10 ) ) + device_config = nnlib.DeviceConfig(cpu_only=True) + + if suppressor is not None: + suppressor.__enter__() + + nnlib.active_DeviceConfig = device_config + if device_config.cpu_only: config = tf.ConfigProto( device_count = {'GPU': 0} ) else: @@ -160,6 +175,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator if suppressor is not None: suppressor.__exit__() + nnlib.__initialize_tf_functions() nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec') @@ -367,7 +383,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator return nnlib.code_import_keras nnlib.import_tf(device_config) - device_config = nnlib.prefer_DeviceConfig + device_config = nnlib.active_DeviceConfig if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': suppressor = std_utils.suppress_stdout_stderr().__enter__() diff --git a/nnlib/pynvml.py b/nnlib/pynvml.py index c4b2600..113fdc2 100644 --- a/nnlib/pynvml.py +++ b/nnlib/pynvml.py @@ -1699,3 +1699,18 @@ def nvmlDeviceGetTopologyCommonAncestor(device1, device2): ret = fn(device1, device2, byref(c_level)) _nvmlCheckReturn(ret) return c_level.value + +#DeepFaceLab additions +def nvmlDeviceGetCudaComputeCapability(device): + c_major = c_int() + c_minor = c_int() + fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability") + + # get the count + ret = fn(device, byref(c_major), byref(c_minor)) + + # this should only fail with insufficient size + if (ret != NVML_SUCCESS): + raise NVMLError(ret) + + return c_major.value, c_minor.value \ No newline at end of file