mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-07 05:22:06 -07:00
update devicelib/nnlib to detect compute capability
This commit is contained in:
parent
29c2375f5a
commit
e8620919a7
5 changed files with 70 additions and 21 deletions
|
@ -131,8 +131,8 @@ class ConvertSubprocessor(SubprocessorBase):
|
|||
from nnlib import nnlib
|
||||
#model process ate all GPU mem,
|
||||
#so we cannot use GPU for any TF operations in converter processes (for example image_utils.TFLabConverter)
|
||||
#therefore forcing prefer_DeviceConfig to CPU only
|
||||
nnlib.prefer_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
|
||||
#therefore forcing active_DeviceConfig to CPU only
|
||||
nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
|
||||
|
||||
return None
|
||||
|
||||
|
|
|
@ -109,13 +109,13 @@ class ModelBase(object):
|
|||
self.write_preview_history = session_write_preview_history
|
||||
self.target_epoch = session_target_epoch
|
||||
self.batch_size = session_batch_size
|
||||
self.onInitializeOptions(self.epoch == 0, ask_for_session_options)
|
||||
|
||||
self.device_config = nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options)
|
||||
nnlib.import_all ( nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) )
|
||||
self.device_config = nnlib.active_DeviceConfig
|
||||
|
||||
self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb
|
||||
|
||||
self.onInitializeOptions(self.epoch == 0, ask_for_session_options)
|
||||
nnlib.import_all (self.device_config)
|
||||
self.onInitialize(**in_options)
|
||||
|
||||
if self.debug or self.batch_size == 0:
|
||||
|
|
|
@ -7,7 +7,9 @@ class devicelib:
|
|||
force_gpu_idxs = None
|
||||
choose_worst_gpu = False
|
||||
gpu_idxs = []
|
||||
gpu_names = []
|
||||
gpu_total_vram_gb = 0
|
||||
gpu_compute_caps = []
|
||||
allow_growth = True
|
||||
use_fp16 = False
|
||||
cpu_only = False
|
||||
|
@ -47,11 +49,15 @@ class devicelib:
|
|||
else:
|
||||
self.gpu_idxs = [gpu_idx]
|
||||
|
||||
if len(self.gpu_idxs) == 0:
|
||||
self.cpu_only = True
|
||||
else:
|
||||
self.cpu_only = False
|
||||
self.cpu_only = (len(self.gpu_idxs) == 0)
|
||||
|
||||
if not self.cpu_only:
|
||||
self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] )
|
||||
self.gpu_names = []
|
||||
self.gpu_compute_caps = []
|
||||
for gpu_idx in self.gpu_idxs:
|
||||
self.gpu_names += [devicelib.getDeviceName(gpu_idx)]
|
||||
self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ]
|
||||
|
||||
@staticmethod
|
||||
def hasNVML():
|
||||
|
@ -207,3 +213,15 @@ class devicelib:
|
|||
except:
|
||||
pass
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def getDeviceComputeCapability(idx):
|
||||
result = 0
|
||||
try:
|
||||
nvmlInit()
|
||||
if idx < nvmlDeviceGetCount():
|
||||
result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
|
||||
nvmlShutdown()
|
||||
except:
|
||||
pass
|
||||
return result[0] * 10 + result[1]
|
|
@ -9,7 +9,7 @@ from .devicelib import devicelib
|
|||
class nnlib(object):
|
||||
device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
|
||||
DeviceConfig = devicelib.Config
|
||||
prefer_DeviceConfig = DeviceConfig() #default is one best GPU
|
||||
active_DeviceConfig = DeviceConfig() #default is one best GPU
|
||||
|
||||
dlib = None
|
||||
keras = None
|
||||
|
@ -126,11 +126,6 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
|||
if nnlib.tf is not None:
|
||||
return nnlib.code_import_tf
|
||||
|
||||
if device_config is None:
|
||||
device_config = nnlib.prefer_DeviceConfig
|
||||
else:
|
||||
nnlib.prefer_DeviceConfig = device_config
|
||||
|
||||
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
|
||||
suppressor = std_utils.suppress_stdout_stderr().__enter__()
|
||||
else:
|
||||
|
@ -144,6 +139,26 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
|||
import tensorflow as tf
|
||||
nnlib.tf = tf
|
||||
|
||||
if device_config is None:
|
||||
device_config = nnlib.active_DeviceConfig
|
||||
|
||||
tf_ver = [int(x) for x in tf.VERSION.split('.')]
|
||||
req_cap = 35
|
||||
if tf_ver[0] > 1 or (tf_ver[0] == 1 and tf_ver[1] >= 11):
|
||||
req_cap = 37
|
||||
|
||||
if not device_config.cpu_only and device_config.gpu_compute_caps[0] < req_cap:
|
||||
if suppressor is not None:
|
||||
suppressor.__exit__()
|
||||
|
||||
print ("%s does not meet minimum required compute capability: %d.%d. Falling back to CPU mode." % ( device_config.gpu_names[0], req_cap // 10, req_cap % 10 ) )
|
||||
device_config = nnlib.DeviceConfig(cpu_only=True)
|
||||
|
||||
if suppressor is not None:
|
||||
suppressor.__enter__()
|
||||
|
||||
nnlib.active_DeviceConfig = device_config
|
||||
|
||||
if device_config.cpu_only:
|
||||
config = tf.ConfigProto( device_count = {'GPU': 0} )
|
||||
else:
|
||||
|
@ -161,6 +176,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
|||
if suppressor is not None:
|
||||
suppressor.__exit__()
|
||||
|
||||
|
||||
nnlib.__initialize_tf_functions()
|
||||
nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec')
|
||||
return nnlib.code_import_tf
|
||||
|
@ -367,7 +383,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
|||
return nnlib.code_import_keras
|
||||
|
||||
nnlib.import_tf(device_config)
|
||||
device_config = nnlib.prefer_DeviceConfig
|
||||
device_config = nnlib.active_DeviceConfig
|
||||
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
|
||||
suppressor = std_utils.suppress_stdout_stderr().__enter__()
|
||||
|
||||
|
|
|
@ -1699,3 +1699,18 @@ def nvmlDeviceGetTopologyCommonAncestor(device1, device2):
|
|||
ret = fn(device1, device2, byref(c_level))
|
||||
_nvmlCheckReturn(ret)
|
||||
return c_level.value
|
||||
|
||||
#DeepFaceLab additions
|
||||
def nvmlDeviceGetCudaComputeCapability(device):
|
||||
c_major = c_int()
|
||||
c_minor = c_int()
|
||||
fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability")
|
||||
|
||||
# get the count
|
||||
ret = fn(device, byref(c_major), byref(c_minor))
|
||||
|
||||
# this should only fail with insufficient size
|
||||
if (ret != NVML_SUCCESS):
|
||||
raise NVMLError(ret)
|
||||
|
||||
return c_major.value, c_minor.value
|
Loading…
Add table
Add a link
Reference in a new issue