update devicelib/nnlib to detect compute capability

This commit is contained in:
iperov 2019-01-08 10:48:06 +04:00
parent 29c2375f5a
commit e8620919a7
5 changed files with 70 additions and 21 deletions

View file

@ -131,8 +131,8 @@ class ConvertSubprocessor(SubprocessorBase):
from nnlib import nnlib from nnlib import nnlib
#model process ate all GPU mem, #model process ate all GPU mem,
#so we cannot use GPU for any TF operations in converter processes (for example image_utils.TFLabConverter) #so we cannot use GPU for any TF operations in converter processes (for example image_utils.TFLabConverter)
#therefore forcing prefer_DeviceConfig to CPU only #therefore forcing active_DeviceConfig to CPU only
nnlib.prefer_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
return None return None

View file

@ -109,13 +109,13 @@ class ModelBase(object):
self.write_preview_history = session_write_preview_history self.write_preview_history = session_write_preview_history
self.target_epoch = session_target_epoch self.target_epoch = session_target_epoch
self.batch_size = session_batch_size self.batch_size = session_batch_size
self.onInitializeOptions(self.epoch == 0, ask_for_session_options)
self.device_config = nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) nnlib.import_all ( nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) )
self.device_config = nnlib.active_DeviceConfig
self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb
self.onInitializeOptions(self.epoch == 0, ask_for_session_options)
nnlib.import_all (self.device_config)
self.onInitialize(**in_options) self.onInitialize(**in_options)
if self.debug or self.batch_size == 0: if self.debug or self.batch_size == 0:

View file

@ -7,7 +7,9 @@ class devicelib:
force_gpu_idxs = None force_gpu_idxs = None
choose_worst_gpu = False choose_worst_gpu = False
gpu_idxs = [] gpu_idxs = []
gpu_names = []
gpu_total_vram_gb = 0 gpu_total_vram_gb = 0
gpu_compute_caps = []
allow_growth = True allow_growth = True
use_fp16 = False use_fp16 = False
cpu_only = False cpu_only = False
@ -47,11 +49,15 @@ class devicelib:
else: else:
self.gpu_idxs = [gpu_idx] self.gpu_idxs = [gpu_idx]
if len(self.gpu_idxs) == 0: self.cpu_only = (len(self.gpu_idxs) == 0)
self.cpu_only = True
else: if not self.cpu_only:
self.cpu_only = False
self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] ) self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] )
self.gpu_names = []
self.gpu_compute_caps = []
for gpu_idx in self.gpu_idxs:
self.gpu_names += [devicelib.getDeviceName(gpu_idx)]
self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ]
@staticmethod @staticmethod
def hasNVML(): def hasNVML():
@ -207,3 +213,15 @@ class devicelib:
except: except:
pass pass
return result return result
@staticmethod
def getDeviceComputeCapability(idx):
result = 0
try:
nvmlInit()
if idx < nvmlDeviceGetCount():
result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
nvmlShutdown()
except:
pass
return result[0] * 10 + result[1]

View file

@ -9,7 +9,7 @@ from .devicelib import devicelib
class nnlib(object): class nnlib(object):
device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
DeviceConfig = devicelib.Config DeviceConfig = devicelib.Config
prefer_DeviceConfig = DeviceConfig() #default is one best GPU active_DeviceConfig = DeviceConfig() #default is one best GPU
dlib = None dlib = None
keras = None keras = None
@ -126,11 +126,6 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
if nnlib.tf is not None: if nnlib.tf is not None:
return nnlib.code_import_tf return nnlib.code_import_tf
if device_config is None:
device_config = nnlib.prefer_DeviceConfig
else:
nnlib.prefer_DeviceConfig = device_config
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
suppressor = std_utils.suppress_stdout_stderr().__enter__() suppressor = std_utils.suppress_stdout_stderr().__enter__()
else: else:
@ -144,6 +139,26 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
import tensorflow as tf import tensorflow as tf
nnlib.tf = tf nnlib.tf = tf
if device_config is None:
device_config = nnlib.active_DeviceConfig
tf_ver = [int(x) for x in tf.VERSION.split('.')]
req_cap = 35
if tf_ver[0] > 1 or (tf_ver[0] == 1 and tf_ver[1] >= 11):
req_cap = 37
if not device_config.cpu_only and device_config.gpu_compute_caps[0] < req_cap:
if suppressor is not None:
suppressor.__exit__()
print ("%s does not meet minimum required compute capability: %d.%d. Falling back to CPU mode." % ( device_config.gpu_names[0], req_cap // 10, req_cap % 10 ) )
device_config = nnlib.DeviceConfig(cpu_only=True)
if suppressor is not None:
suppressor.__enter__()
nnlib.active_DeviceConfig = device_config
if device_config.cpu_only: if device_config.cpu_only:
config = tf.ConfigProto( device_count = {'GPU': 0} ) config = tf.ConfigProto( device_count = {'GPU': 0} )
else: else:
@ -161,6 +176,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
if suppressor is not None: if suppressor is not None:
suppressor.__exit__() suppressor.__exit__()
nnlib.__initialize_tf_functions() nnlib.__initialize_tf_functions()
nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec') nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec')
return nnlib.code_import_tf return nnlib.code_import_tf
@ -367,7 +383,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
return nnlib.code_import_keras return nnlib.code_import_keras
nnlib.import_tf(device_config) nnlib.import_tf(device_config)
device_config = nnlib.prefer_DeviceConfig device_config = nnlib.active_DeviceConfig
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
suppressor = std_utils.suppress_stdout_stderr().__enter__() suppressor = std_utils.suppress_stdout_stderr().__enter__()

View file

@ -1699,3 +1699,18 @@ def nvmlDeviceGetTopologyCommonAncestor(device1, device2):
ret = fn(device1, device2, byref(c_level)) ret = fn(device1, device2, byref(c_level))
_nvmlCheckReturn(ret) _nvmlCheckReturn(ret)
return c_level.value return c_level.value
#DeepFaceLab additions
def nvmlDeviceGetCudaComputeCapability(device):
c_major = c_int()
c_minor = c_int()
fn = _nvmlGetFunctionPointer("nvmlDeviceGetCudaComputeCapability")
# get the count
ret = fn(device, byref(c_major), byref(c_minor))
# this should only fail with insufficient size
if (ret != NVML_SUCCESS):
raise NVMLError(ret)
return c_major.value, c_minor.value