DeepFaceLab/nnlib/device.py
iperov 72ba6b103c added support of AMD videocards
added Intel's plaidML backend to use OpenCL engine. Check new requirements.
smart choosing of backend in device.py
env var 'force_plaidML' can be choosed to forced using plaidML
all tf functions transferred to pure keras
MTCNN transferred to pure keras, but it works slow on plaidML (forced to CPU in this case)
default batch size for all models and VRAMs now 4, feel free to adjust it on your own
SAE: default style options now ZERO, because there are no best values for all scenes, set them on your own.
SAE: return back option pixel_loss, feel free to enable it on your own.
SAE: added option multiscale_decoder default is true, but you can disable it to get 100% same as H,DF,LIAEF model behaviour.
fix converter output to .png
added linux fork reference to doc/doc_build_and_repository_info.md
2019-02-19 17:33:12 +04:00

333 lines
13 KiB
Python

import os
import json
import numpy as np
from .pynvml import *
tf_min_req_cap = 37 #min req compute capability for tensorflow-gpu==1.11.0
class device:
backend = None
class Config():
force_gpu_idx = -1
multi_gpu = False
force_gpu_idxs = None
choose_worst_gpu = False
gpu_idxs = []
gpu_names = []
gpu_compute_caps = []
gpu_vram_gb = []
allow_growth = True
use_fp16 = False
cpu_only = False
backend = None
def __init__ (self, force_gpu_idx = -1,
multi_gpu = False,
force_gpu_idxs = None,
choose_worst_gpu = False,
allow_growth = True,
use_fp16 = False,
cpu_only = False,
**in_options):
self.backend = device.backend
self.use_fp16 = use_fp16
self.cpu_only = cpu_only
if not self.cpu_only:
self.cpu_only = (self.backend == "tensorflow-cpu")
if not self.cpu_only:
self.force_gpu_idx = force_gpu_idx
self.multi_gpu = multi_gpu
self.force_gpu_idxs = force_gpu_idxs
self.choose_worst_gpu = choose_worst_gpu
self.allow_growth = allow_growth
self.gpu_idxs = []
if force_gpu_idxs is not None:
for idx in force_gpu_idxs.split(','):
idx = int(idx)
if device.isValidDeviceIdx(idx):
self.gpu_idxs.append(idx)
else:
gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx()
if gpu_idx != -1:
if self.multi_gpu:
self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx )
if len(self.gpu_idxs) <= 1:
self.multi_gpu = False
else:
self.gpu_idxs = [gpu_idx]
self.cpu_only = (len(self.gpu_idxs) == 0)
if not self.cpu_only:
self.gpu_names = []
self.gpu_compute_caps = []
self.gpu_vram_gb = []
for gpu_idx in self.gpu_idxs:
self.gpu_names += [device.getDeviceName(gpu_idx)]
self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ]
self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ]
self.cpu_only = (len(self.gpu_idxs) == 0)
if self.cpu_only:
self.backend = "tensorflow-cpu"
@staticmethod
def getValidDeviceIdxsEnumerator():
if device.backend == "plaidML":
for i in range(plaidML_devices_count):
yield i
elif device.backend == "tensorflow":
for gpu_idx in range(nvmlDeviceGetCount()):
cap = device.getDeviceComputeCapability (gpu_idx)
if cap >= tf_min_req_cap:
yield gpu_idx
elif device.backend == "tensorflow-generic":
yield 0
@staticmethod
def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
result = []
if device.backend == "plaidML":
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024:
result.append (i)
elif device.backend == "tensorflow":
for i in device.getValidDeviceIdxsEnumerator():
handle = nvmlDeviceGetHandleByIndex(i)
memInfo = nvmlDeviceGetMemoryInfo( handle )
if (memInfo.total) >= totalmemsize_gb*1024*1024*1024:
result.append (i)
elif device.backend == "tensorflow-generic":
return [0]
return result
@staticmethod
def getAllDevicesIdxsList():
if device.backend == "plaidML":
return [ *range(plaidML_devices_count) ]
elif device.backend == "tensorflow":
return [ *range(nvmlDeviceGetCount() ) ]
elif device.backend == "tensorflow-generic":
return [0]
@staticmethod
def getValidDevicesIdxsWithNamesList():
if device.backend == "plaidML":
return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ]
elif device.backend == "tensorflow":
return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in device.getValidDeviceIdxsEnumerator() ]
elif device.backend == "tensorflow-cpu":
return [ (0, 'CPU') ]
elif device.backend == "tensorflow-generic":
return [ (0, device.getDeviceName(0) ) ]
@staticmethod
def getDeviceVRAMTotalGb (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024)
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) )
return round ( memInfo.total / (1024*1024*1024) )
return 0
elif device.backend == "tensorflow-generic":
return 2
@staticmethod
def getBestValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = 0
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total > idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = 0
for i in device.getValidDeviceIdxsEnumerator():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total > idx_mem:
idx = i
idx_mem = memInfo.total
return idx
elif device.backend == "tensorflow-generic":
return 0
@staticmethod
def getWorstValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = sys.maxsize
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total < idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = sys.maxsize
for i in device.getValidDeviceIdxsEnumerator():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total < idx_mem:
idx = i
idx_mem = memInfo.total
return idx
elif device.backend == "tensorflow-generic":
return 0
@staticmethod
def isValidDeviceIdx(idx):
if device.backend == "plaidML":
return idx in [*device.getValidDeviceIdxsEnumerator()]
elif device.backend == "tensorflow":
return idx in [*device.getValidDeviceIdxsEnumerator()]
elif device.backend == "tensorflow-generic":
return (idx == 0)
@staticmethod
def getDeviceIdxsEqualModel(idx):
if device.backend == "plaidML":
result = []
idx_name = plaidML_devices[idx]['description']
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['description'] == idx_name:
result.append (i)
return result
elif device.backend == "tensorflow":
result = []
idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
for i in device.getValidDeviceIdxsEnumerator():
if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name:
result.append (i)
return result
elif device.backend == "tensorflow-generic":
return [0] if idx == 0 else []
@staticmethod
def getDeviceName (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['description']
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
elif device.backend == "tensorflow-generic":
if idx == 0:
return "Generic GeForce GPU"
return None
@staticmethod
def getDeviceID (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['id'].decode()
return None
@staticmethod
def getDeviceComputeCapability(idx):
result = 0
if device.backend == "plaidML":
return 99
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
elif device.backend == "tensorflow-generic":
return 99 if idx == 0 else 0
return result[0] * 10 + result[1]
force_plaidML = os.environ.get("force_plaidML", "0") == "1"
has_nvml = False
has_nvml_cap = False
has_nvidia_device = False
plaidML_devices = []
# Using plaidML OpenCL backend to determine system devices and has_nvidia_device
try:
os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup'
import plaidml
ctx = plaidml.Context()
for d in plaidml.devices(ctx, return_all=True)[0]:
details = json.loads(d.details)
if 'nvidia' in details['vendor'].lower():
has_nvidia_device = True
plaidML_devices += [ {'id':d.id,
'globalMemSize' : int(details['globalMemSize']),
'description' : d.description.decode()
}]
ctx.shutdown()
except:
pass
plaidML_devices_count = len(plaidML_devices)
#choosing backend
if device.backend is None:
#first trying to load NVSMI and detect CUDA devices for tensorflow backend,
#even force_plaidML is choosed, because if plaidML will fail, we can choose tensorflow
try:
nvmlInit()
has_nvml = True
device.backend = "tensorflow" #set tensorflow backend in order to use device.*device() functions
gpu_idxs = device.getAllDevicesIdxsList()
gpu_caps = np.array ( [ device.getDeviceComputeCapability(gpu_idx) for gpu_idx in gpu_idxs ] )
if len ( np.ndarray.flatten ( np.argwhere (gpu_caps >= tf_min_req_cap) ) ) == 0:
if not force_plaidML:
print ("No CUDA devices found with minimum required compute capability: %d.%d. Falling back to OpenCL mode." % (tf_min_req_cap // 10, tf_min_req_cap % 10) )
device.backend = None
nvmlShutdown()
else:
has_nvml_cap = True
except:
#if no NVSMI installed exception will occur
device.backend = None
has_nvml = False
if device.backend is None or force_plaidML:
#tensorflow backend was failed or forcing plaidML, trying to use plaidML backend
if plaidML_devices_count == 0:
print ("plaidML: No capable OpenCL devices found. Falling back to tensorflow backend.")
device.backend = None
else:
device.backend = "plaidML"
if device.backend is None:
if not has_nvml:
if has_nvidia_device:
#some notebook systems have NVIDIA card without NVSMI in official drivers
#in that case considering we have system with one capable GPU and let tensorflow to choose best GPU
device.backend = "tensorflow-generic"
else:
#no NVSMI and no NVIDIA cards, also plaidML was failed, then CPU only
device.backend = "tensorflow-cpu"
else:
if has_nvml_cap:
#has NVSMI and capable CUDA-devices, but force_plaidML was failed, then we choosing tensorflow
device.backend = "tensorflow"
else:
#has NVSMI, no capable CUDA-devices, also plaidML was failed, then CPU only
device.backend = "tensorflow-cpu"