DeepFaceLab/nnlib/device.py
iperov 5587c93e01 fix ConverterMasked.py,
changing requirements
changing device.py ENV vars
2019-03-07 19:48:12 +04:00

345 lines
14 KiB
Python

import os
import json
import numpy as np
from .pynvml import *
#you can set DFL_TF_MIN_REQ_CAP manually for your build
#the reason why we cannot check tensorflow.version is it requires import tensorflow
tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35))
class device:
backend = None
class Config():
force_gpu_idx = -1
multi_gpu = False
force_gpu_idxs = None
choose_worst_gpu = False
gpu_idxs = []
gpu_names = []
gpu_compute_caps = []
gpu_vram_gb = []
allow_growth = True
use_fp16 = False
cpu_only = False
backend = None
def __init__ (self, force_gpu_idx = -1,
multi_gpu = False,
force_gpu_idxs = None,
choose_worst_gpu = False,
allow_growth = True,
use_fp16 = False,
cpu_only = False,
**in_options):
self.backend = device.backend
self.use_fp16 = use_fp16
self.cpu_only = cpu_only
if not self.cpu_only:
self.cpu_only = (self.backend == "tensorflow-cpu")
if not self.cpu_only:
self.force_gpu_idx = force_gpu_idx
self.multi_gpu = multi_gpu
self.force_gpu_idxs = force_gpu_idxs
self.choose_worst_gpu = choose_worst_gpu
self.allow_growth = allow_growth
self.gpu_idxs = []
if force_gpu_idxs is not None:
for idx in force_gpu_idxs.split(','):
idx = int(idx)
if device.isValidDeviceIdx(idx):
self.gpu_idxs.append(idx)
else:
gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx()
if gpu_idx != -1:
if self.multi_gpu:
self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx )
if len(self.gpu_idxs) <= 1:
self.multi_gpu = False
else:
self.gpu_idxs = [gpu_idx]
self.cpu_only = (len(self.gpu_idxs) == 0)
if not self.cpu_only:
self.gpu_names = []
self.gpu_compute_caps = []
self.gpu_vram_gb = []
for gpu_idx in self.gpu_idxs:
self.gpu_names += [device.getDeviceName(gpu_idx)]
self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ]
self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ]
self.cpu_only = (len(self.gpu_idxs) == 0)
if self.cpu_only:
self.backend = "tensorflow-cpu"
@staticmethod
def getValidDeviceIdxsEnumerator():
if device.backend == "plaidML":
for i in range(plaidML_devices_count):
yield i
elif device.backend == "tensorflow":
for gpu_idx in range(nvmlDeviceGetCount()):
cap = device.getDeviceComputeCapability (gpu_idx)
if cap >= tf_min_req_cap:
yield gpu_idx
elif device.backend == "tensorflow-generic":
yield 0
@staticmethod
def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
result = []
if device.backend == "plaidML":
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024:
result.append (i)
elif device.backend == "tensorflow":
for i in device.getValidDeviceIdxsEnumerator():
handle = nvmlDeviceGetHandleByIndex(i)
memInfo = nvmlDeviceGetMemoryInfo( handle )
if (memInfo.total) >= totalmemsize_gb*1024*1024*1024:
result.append (i)
elif device.backend == "tensorflow-generic":
return [0]
return result
@staticmethod
def getAllDevicesIdxsList():
if device.backend == "plaidML":
return [ *range(plaidML_devices_count) ]
elif device.backend == "tensorflow":
return [ *range(nvmlDeviceGetCount() ) ]
elif device.backend == "tensorflow-generic":
return [0]
@staticmethod
def getValidDevicesIdxsWithNamesList():
if device.backend == "plaidML":
return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ]
elif device.backend == "tensorflow":
return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in device.getValidDeviceIdxsEnumerator() ]
elif device.backend == "tensorflow-cpu":
return [ (0, 'CPU') ]
elif device.backend == "tensorflow-generic":
return [ (0, device.getDeviceName(0) ) ]
@staticmethod
def getDeviceVRAMTotalGb (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024)
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) )
return round ( memInfo.total / (1024*1024*1024) )
return 0
elif device.backend == "tensorflow-generic":
return 2
@staticmethod
def getBestValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = 0
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total > idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = 0
for i in device.getValidDeviceIdxsEnumerator():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total > idx_mem:
idx = i
idx_mem = memInfo.total
return idx
elif device.backend == "tensorflow-generic":
return 0
@staticmethod
def getWorstValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = sys.maxsize
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total < idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = sys.maxsize
for i in device.getValidDeviceIdxsEnumerator():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total < idx_mem:
idx = i
idx_mem = memInfo.total
return idx
elif device.backend == "tensorflow-generic":
return 0
@staticmethod
def isValidDeviceIdx(idx):
if device.backend == "plaidML":
return idx in [*device.getValidDeviceIdxsEnumerator()]
elif device.backend == "tensorflow":
return idx in [*device.getValidDeviceIdxsEnumerator()]
elif device.backend == "tensorflow-generic":
return (idx == 0)
@staticmethod
def getDeviceIdxsEqualModel(idx):
if device.backend == "plaidML":
result = []
idx_name = plaidML_devices[idx]['description']
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['description'] == idx_name:
result.append (i)
return result
elif device.backend == "tensorflow":
result = []
idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
for i in device.getValidDeviceIdxsEnumerator():
if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name:
result.append (i)
return result
elif device.backend == "tensorflow-generic":
return [0] if idx == 0 else []
@staticmethod
def getDeviceName (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['description']
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
elif device.backend == "tensorflow-generic":
if idx == 0:
return "Generic GeForce GPU"
return None
@staticmethod
def getDeviceID (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['id'].decode()
return None
@staticmethod
def getDeviceComputeCapability(idx):
result = 0
if device.backend == "plaidML":
return 99
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
elif device.backend == "tensorflow-generic":
return 99 if idx == 0 else 0
return result[0] * 10 + result[1]
force_plaidML = os.environ.get("DFL_FORCE_PLAIDML", "0") == "1" #for OpenCL build , forcing using plaidML even if NVIDIA found
force_tf_cpu = os.environ.get("DFL_FORCE_TF_CPU", "0") == "1" #for OpenCL build , forcing using tf-cpu if plaidML failed
has_nvml = False
has_nvml_cap = False
#use DFL_FORCE_HAS_NVIDIA_DEVICE=1 if
#- your NVIDIA cannot be seen by OpenCL
#- CUDA build of DFL
has_nvidia_device = os.environ.get("DFL_FORCE_HAS_NVIDIA_DEVICE", "0") == "1"
plaidML_devices = []
# Using plaidML OpenCL backend to determine system devices and has_nvidia_device
try:
os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup'
import plaidml
ctx = plaidml.Context()
for d in plaidml.devices(ctx, return_all=True)[0]:
details = json.loads(d.details)
if details['type'] == 'CPU': #skipping opencl-CPU
continue
if 'nvidia' in details['vendor'].lower():
has_nvidia_device = True
plaidML_devices += [ {'id':d.id,
'globalMemSize' : int(details['globalMemSize']),
'description' : d.description.decode()
}]
ctx.shutdown()
except:
pass
plaidML_devices_count = len(plaidML_devices)
#choosing backend
if device.backend is None and not force_tf_cpu:
#first trying to load NVSMI and detect CUDA devices for tensorflow backend,
#even force_plaidML is choosed, because if plaidML will fail, we can choose tensorflow
try:
nvmlInit()
has_nvml = True
device.backend = "tensorflow" #set tensorflow backend in order to use device.*device() functions
gpu_idxs = device.getAllDevicesIdxsList()
gpu_caps = np.array ( [ device.getDeviceComputeCapability(gpu_idx) for gpu_idx in gpu_idxs ] )
if len ( np.ndarray.flatten ( np.argwhere (gpu_caps >= tf_min_req_cap) ) ) == 0:
if not force_plaidML:
print ("No CUDA devices found with minimum required compute capability: %d.%d. Falling back to OpenCL mode." % (tf_min_req_cap // 10, tf_min_req_cap % 10) )
device.backend = None
nvmlShutdown()
else:
has_nvml_cap = True
except:
#if no NVSMI installed exception will occur
device.backend = None
has_nvml = False
if not has_nvidia_device and (device.backend is None or force_plaidML):
#tensorflow backend was failed without has_nvidia_device , or forcing plaidML, trying to use plaidML backend
if plaidML_devices_count == 0:
#print ("plaidML: No capable OpenCL devices found. Falling back to tensorflow backend.")
device.backend = None
else:
device.backend = "plaidML"
if device.backend is None:
if force_tf_cpu:
device.backend = "tensorflow-cpu"
elif not has_nvml:
if has_nvidia_device:
#some notebook systems have NVIDIA card without NVSMI in official drivers
#in that case considering we have system with one capable GPU and let tensorflow to choose best GPU
device.backend = "tensorflow-generic"
else:
#no NVSMI and no NVIDIA cards, also plaidML was failed, then CPU only
device.backend = "tensorflow-cpu"
else:
if has_nvml_cap:
#has NVSMI and capable CUDA-devices, but force_plaidML was failed, then we choosing tensorflow
device.backend = "tensorflow"
else:
#has NVSMI, no capable CUDA-devices, also plaidML was failed, then CPU only
device.backend = "tensorflow-cpu"