mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-06 21:12:07 -07:00
update devicelib/nnlib to detect compute capability
This commit is contained in:
parent
29c2375f5a
commit
e8620919a7
5 changed files with 70 additions and 21 deletions
|
@ -131,8 +131,8 @@ class ConvertSubprocessor(SubprocessorBase):
|
||||||
from nnlib import nnlib
|
from nnlib import nnlib
|
||||||
#model process ate all GPU mem,
|
#model process ate all GPU mem,
|
||||||
#so we cannot use GPU for any TF operations in converter processes (for example image_utils.TFLabConverter)
|
#so we cannot use GPU for any TF operations in converter processes (for example image_utils.TFLabConverter)
|
||||||
#therefore forcing prefer_DeviceConfig to CPU only
|
#therefore forcing active_DeviceConfig to CPU only
|
||||||
nnlib.prefer_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
|
nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
@ -109,13 +109,13 @@ class ModelBase(object):
|
||||||
self.write_preview_history = session_write_preview_history
|
self.write_preview_history = session_write_preview_history
|
||||||
self.target_epoch = session_target_epoch
|
self.target_epoch = session_target_epoch
|
||||||
self.batch_size = session_batch_size
|
self.batch_size = session_batch_size
|
||||||
|
self.onInitializeOptions(self.epoch == 0, ask_for_session_options)
|
||||||
self.device_config = nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options)
|
|
||||||
|
nnlib.import_all ( nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) )
|
||||||
|
self.device_config = nnlib.active_DeviceConfig
|
||||||
|
|
||||||
self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb
|
self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb
|
||||||
|
|
||||||
self.onInitializeOptions(self.epoch == 0, ask_for_session_options)
|
|
||||||
nnlib.import_all (self.device_config)
|
|
||||||
self.onInitialize(**in_options)
|
self.onInitialize(**in_options)
|
||||||
|
|
||||||
if self.debug or self.batch_size == 0:
|
if self.debug or self.batch_size == 0:
|
||||||
|
|
|
@ -7,7 +7,9 @@ class devicelib:
|
||||||
force_gpu_idxs = None
|
force_gpu_idxs = None
|
||||||
choose_worst_gpu = False
|
choose_worst_gpu = False
|
||||||
gpu_idxs = []
|
gpu_idxs = []
|
||||||
|
gpu_names = []
|
||||||
gpu_total_vram_gb = 0
|
gpu_total_vram_gb = 0
|
||||||
|
gpu_compute_caps = []
|
||||||
allow_growth = True
|
allow_growth = True
|
||||||
use_fp16 = False
|
use_fp16 = False
|
||||||
cpu_only = False
|
cpu_only = False
|
||||||
|
@ -47,12 +49,16 @@ class devicelib:
|
||||||
else:
|
else:
|
||||||
self.gpu_idxs = [gpu_idx]
|
self.gpu_idxs = [gpu_idx]
|
||||||
|
|
||||||
if len(self.gpu_idxs) == 0:
|
self.cpu_only = (len(self.gpu_idxs) == 0)
|
||||||
self.cpu_only = True
|
|
||||||
else:
|
if not self.cpu_only:
|
||||||
self.cpu_only = False
|
|
||||||
self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] )
|
self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] )
|
||||||
|
self.gpu_names = []
|
||||||
|
self.gpu_compute_caps = []
|
||||||
|
for gpu_idx in self.gpu_idxs:
|
||||||
|
self.gpu_names += [devicelib.getDeviceName(gpu_idx)]
|
||||||
|
self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def hasNVML():
|
def hasNVML():
|
||||||
try:
|
try:
|
||||||
|
@ -206,4 +212,16 @@ class devicelib:
|
||||||
nvmlShutdown()
|
nvmlShutdown()
|
||||||
except:
|
except:
|
||||||
pass
|
pass
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getDeviceComputeCapability(idx):
|
||||||
|
result = 0
|
||||||
|
try:
|
||||||
|
nvmlInit()
|
||||||
|
if idx < nvmlDeviceGetCount():
|
||||||
|
result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
|
||||||
|
nvmlShutdown()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return result[0] * 10 + result[1]
|
|
@ -9,7 +9,7 @@ from .devicelib import devicelib
|
||||||
class nnlib(object):
|
class nnlib(object):
|
||||||
device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
|
device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
|
||||||
DeviceConfig = devicelib.Config
|
DeviceConfig = devicelib.Config
|
||||||
prefer_DeviceConfig = DeviceConfig() #default is one best GPU
|
active_DeviceConfig = DeviceConfig() #default is one best GPU
|
||||||
|
|
||||||
dlib = None
|
dlib = None
|
||||||
keras = None
|
keras = None
|
||||||
|
@ -125,12 +125,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
||||||
def import_tf(device_config = None):
|
def import_tf(device_config = None):
|
||||||
if nnlib.tf is not None:
|
if nnlib.tf is not None:
|
||||||
return nnlib.code_import_tf
|
return nnlib.code_import_tf
|
||||||
|
|
||||||
if device_config is None:
|
|
||||||
device_config = nnlib.prefer_DeviceConfig
|
|
||||||
else:
|
|
||||||
nnlib.prefer_DeviceConfig = device_config
|
|
||||||
|
|
||||||
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
|
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
|
||||||
suppressor = std_utils.suppress_stdout_stderr().__enter__()
|
suppressor = std_utils.suppress_stdout_stderr().__enter__()
|
||||||
else:
|
else:
|
||||||
|
@ -144,6 +139,26 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
nnlib.tf = tf
|
nnlib.tf = tf
|
||||||
|
|
||||||
|
if device_config is None:
|
||||||
|
device_config = nnlib.active_DeviceConfig
|
||||||
|
|
||||||
|
tf_ver = [int(x) for x in tf.VERSION.split('.')]
|
||||||
|
req_cap = 35
|
||||||
|
if tf_ver[0] > 1 or (tf_ver[0] == 1 and tf_ver[1] >= 11):
|
||||||
|
req_cap = 37
|
||||||
|
|
||||||
|
if not device_config.cpu_only and device_config.gpu_compute_caps[0] < req_cap:
|
||||||
|
if suppressor is not None:
|
||||||
|
suppressor.__exit__()
|
||||||
|
|
||||||
|
print ("%s does not meet minimum required compute capability: %d.%d. Falling back to CPU mode." % ( device_config.gpu_names[0], req_cap // 10, req_cap % 10 ) )
|
||||||
|
device_config = nnlib.DeviceConfig(cpu_only=True)
|
||||||
|
|
||||||
|
if suppressor is not None:
|
||||||
|
suppressor.__enter__()
|
||||||
|
|
||||||
|
nnlib.active_DeviceConfig = device_config
|
||||||
|
|
||||||
if device_config.cpu_only:
|
if device_config.cpu_only:
|
||||||
config = tf.ConfigProto( device_count = {'GPU': 0} )
|
config = tf.ConfigProto( device_count = {'GPU': 0} )
|
||||||
else:
|
else:
|
||||||
|
@ -160,6 +175,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
||||||
|
|
||||||
if suppressor is not None:
|
if suppressor is not None:
|
||||||
suppressor.__exit__()
|
suppressor.__exit__()
|
||||||
|
|
||||||
|
|
||||||
nnlib.__initialize_tf_functions()
|
nnlib.__initialize_tf_functions()
|
||||||
nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec')
|
nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec')
|
||||||
|
@ -367,7 +383,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
|
||||||
return nnlib.code_import_keras
|
return nnlib.code_import_keras
|
||||||
|
|
||||||
nnlib.import_tf(device_config)
|
nnlib.import_tf(device_config)
|
||||||
device_config = nnlib.prefer_DeviceConfig
|
device_config = nnlib.active_DeviceConfig
|
||||||
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
|
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
|
||||||
suppressor = std_utils.suppress_stdout_stderr().__enter__()
|
suppressor = std_utils.suppress_stdout_stderr().__enter__()
|
||||||
|
|
||||||
|
|
|
@ -1699,3 +1699,18 @@ def nvmlDeviceGetTopologyCommonAncestor(device1, device2):
|
||||||
ret = fn(device1, device2, byref(c_level))
|
ret = fn(device1, device2, byref(c_level))
|
||||||
_nvmlCheckReturn(ret)
|
_nvmlCheckReturn(ret)
|
||||||
return c_level.value
|
return c_level.value
|
||||||
|
|
||||||
|
#DeepFaceLab additions
|
||||||
|
def nvmlDeviceGetCudaComputeCapability(device):
|
||||||
|
c_major = c_int()
|
||||||
|
c_minor = c_int()
|
||||||
|
fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability")
|
||||||
|
|
||||||
|
# get the count
|
||||||
|
ret = fn(device, byref(c_major), byref(c_minor))
|
||||||
|
|
||||||
|
# this should only fail with insufficient size
|
||||||
|
if (ret != NVML_SUCCESS):
|
||||||
|
raise NVMLError(ret)
|
||||||
|
|
||||||
|
return c_major.value, c_minor.value
|
Loading…
Add table
Add a link
Reference in a new issue