mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-05 20:42:11 -07:00
added AMD/Intel cards support via DirectX12 ( DirectML backend )
This commit is contained in:
parent
fc4a49c3e7
commit
fdb143ff47
7 changed files with 166 additions and 116 deletions
|
@ -1,12 +1,19 @@
|
||||||
import sys
|
import sys
|
||||||
import ctypes
|
import ctypes
|
||||||
import os
|
import os
|
||||||
|
import multiprocessing
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from core.interact import interact as io
|
||||||
|
|
||||||
|
|
||||||
class Device(object):
|
class Device(object):
|
||||||
def __init__(self, index, name, total_mem, free_mem, cc=0):
|
def __init__(self, index, tf_dev_type, name, total_mem, free_mem):
|
||||||
self.index = index
|
self.index = index
|
||||||
|
self.tf_dev_type = tf_dev_type
|
||||||
self.name = name
|
self.name = name
|
||||||
self.cc = cc
|
|
||||||
self.total_mem = total_mem
|
self.total_mem = total_mem
|
||||||
self.total_mem_gb = total_mem / 1024**3
|
self.total_mem_gb = total_mem / 1024**3
|
||||||
self.free_mem = free_mem
|
self.free_mem = free_mem
|
||||||
|
@ -82,12 +89,134 @@ class Devices(object):
|
||||||
result.append (device)
|
result.append (device)
|
||||||
return Devices(result)
|
return Devices(result)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_tf_devices_proc(q : multiprocessing.Queue):
|
||||||
|
|
||||||
|
compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_ALL')
|
||||||
|
os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
|
||||||
|
if not compute_cache_path.exists():
|
||||||
|
io.log_info("Caching GPU kernels...")
|
||||||
|
compute_cache_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
import tensorflow
|
||||||
|
|
||||||
|
tf_version = tensorflow.version.VERSION
|
||||||
|
#if tf_version is None:
|
||||||
|
# tf_version = tensorflow.version.GIT_VERSION
|
||||||
|
if tf_version[0] == 'v':
|
||||||
|
tf_version = tf_version[1:]
|
||||||
|
if tf_version[0] == '2':
|
||||||
|
tf = tensorflow.compat.v1
|
||||||
|
else:
|
||||||
|
tf = tensorflow
|
||||||
|
|
||||||
|
import logging
|
||||||
|
# Disable tensorflow warnings
|
||||||
|
tf_logger = logging.getLogger('tensorflow')
|
||||||
|
tf_logger.setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
from tensorflow.python.client import device_lib
|
||||||
|
|
||||||
|
devices = []
|
||||||
|
|
||||||
|
physical_devices = device_lib.list_local_devices()
|
||||||
|
physical_devices_f = {}
|
||||||
|
for dev in physical_devices:
|
||||||
|
dev_type = dev.device_type
|
||||||
|
dev_tf_name = dev.name
|
||||||
|
dev_tf_name = dev_tf_name[ dev_tf_name.index(dev_type) : ]
|
||||||
|
|
||||||
|
dev_idx = int(dev_tf_name.split(':')[-1])
|
||||||
|
|
||||||
|
if dev_type in ['GPU','DML']:
|
||||||
|
dev_name = dev_tf_name
|
||||||
|
|
||||||
|
dev_desc = dev.physical_device_desc
|
||||||
|
if len(dev_desc) != 0:
|
||||||
|
if dev_desc[0] == '{':
|
||||||
|
dev_desc_json = json.loads(dev_desc)
|
||||||
|
dev_desc_json_name = dev_desc_json.get('name',None)
|
||||||
|
if dev_desc_json_name is not None:
|
||||||
|
dev_name = dev_desc_json_name
|
||||||
|
else:
|
||||||
|
for param, value in ( v.split(':') for v in dev_desc.split(',') ):
|
||||||
|
param = param.strip()
|
||||||
|
value = value.strip()
|
||||||
|
if param == 'name':
|
||||||
|
dev_name = value
|
||||||
|
break
|
||||||
|
|
||||||
|
physical_devices_f[dev_idx] = (dev_type, dev_name, dev.memory_limit)
|
||||||
|
|
||||||
|
q.put(physical_devices_f)
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def initialize_main_env():
|
def initialize_main_env():
|
||||||
os.environ['NN_DEVICES_INITIALIZED'] = '1'
|
if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 0:
|
||||||
os.environ['NN_DEVICES_COUNT'] = '0'
|
return
|
||||||
|
|
||||||
|
if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
|
||||||
|
os.environ.pop('CUDA_VISIBLE_DEVICES')
|
||||||
|
|
||||||
os.environ['CUDA_CACHE_MAXSIZE'] = '2147483647'
|
os.environ['CUDA_CACHE_MAXSIZE'] = '2147483647'
|
||||||
|
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
|
||||||
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only
|
||||||
|
|
||||||
|
q = multiprocessing.Queue()
|
||||||
|
p = multiprocessing.Process(target=Devices._get_tf_devices_proc, args=(q,), daemon=True)
|
||||||
|
p.start()
|
||||||
|
p.join()
|
||||||
|
|
||||||
|
visible_devices = q.get()
|
||||||
|
|
||||||
|
os.environ['NN_DEVICES_INITIALIZED'] = '1'
|
||||||
|
os.environ['NN_DEVICES_COUNT'] = str(len(visible_devices))
|
||||||
|
|
||||||
|
for i in visible_devices:
|
||||||
|
dev_type, name, total_mem = visible_devices[i]
|
||||||
|
|
||||||
|
os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'] = dev_type
|
||||||
|
os.environ[f'NN_DEVICE_{i}_NAME'] = name
|
||||||
|
os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(total_mem)
|
||||||
|
os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(total_mem)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def getDevices():
|
||||||
|
if Devices.all_devices is None:
|
||||||
|
if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
|
||||||
|
raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
|
||||||
|
devices = []
|
||||||
|
for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
|
||||||
|
devices.append ( Device(index=i,
|
||||||
|
tf_dev_type=os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'],
|
||||||
|
name=os.environ[f'NN_DEVICE_{i}_NAME'],
|
||||||
|
total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
|
||||||
|
free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']), )
|
||||||
|
)
|
||||||
|
Devices.all_devices = Devices(devices)
|
||||||
|
|
||||||
|
return Devices.all_devices
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# {'name' : name.split(b'\0', 1)[0].decode(),
|
||||||
|
# 'total_mem' : totalMem.value
|
||||||
|
# }
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
|
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
|
||||||
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
|
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
|
||||||
for libname in libnames:
|
for libname in libnames:
|
||||||
|
@ -139,70 +268,4 @@ class Devices(object):
|
||||||
os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
|
os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
|
||||||
os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
|
os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
|
||||||
os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
|
os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def getDevices():
|
|
||||||
if Devices.all_devices is None:
|
|
||||||
if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
|
|
||||||
raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
|
|
||||||
devices = []
|
|
||||||
for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
|
|
||||||
devices.append ( Device(index=i,
|
|
||||||
name=os.environ[f'NN_DEVICE_{i}_NAME'],
|
|
||||||
total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
|
|
||||||
free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']),
|
|
||||||
cc=int(os.environ[f'NN_DEVICE_{i}_CC']) ))
|
|
||||||
Devices.all_devices = Devices(devices)
|
|
||||||
|
|
||||||
return Devices.all_devices
|
|
||||||
|
|
||||||
"""
|
|
||||||
if Devices.all_devices is None:
|
|
||||||
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
|
|
||||||
|
|
||||||
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
|
|
||||||
for libname in libnames:
|
|
||||||
try:
|
|
||||||
cuda = ctypes.CDLL(libname)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
return Devices([])
|
|
||||||
|
|
||||||
nGpus = ctypes.c_int()
|
|
||||||
name = b' ' * 200
|
|
||||||
cc_major = ctypes.c_int()
|
|
||||||
cc_minor = ctypes.c_int()
|
|
||||||
freeMem = ctypes.c_size_t()
|
|
||||||
totalMem = ctypes.c_size_t()
|
|
||||||
|
|
||||||
result = ctypes.c_int()
|
|
||||||
device = ctypes.c_int()
|
|
||||||
context = ctypes.c_void_p()
|
|
||||||
error_str = ctypes.c_char_p()
|
|
||||||
|
|
||||||
devices = []
|
|
||||||
|
|
||||||
if cuda.cuInit(0) == 0 and \
|
|
||||||
cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
|
|
||||||
for i in range(nGpus.value):
|
|
||||||
if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \
|
|
||||||
cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \
|
|
||||||
cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
|
|
||||||
if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
|
|
||||||
cc = cc_major.value * 10 + cc_minor.value
|
|
||||||
if cc >= min_cc:
|
|
||||||
devices.append ( Device(index=i,
|
|
||||||
name=name.split(b'\0', 1)[0].decode(),
|
|
||||||
total_mem=totalMem.value,
|
|
||||||
free_mem=freeMem.value,
|
|
||||||
cc=cc) )
|
|
||||||
cuda.cuCtxDetach(context)
|
|
||||||
Devices.all_devices = Devices(devices)
|
|
||||||
return Devices.all_devices
|
|
||||||
"""
|
"""
|
|
@ -33,8 +33,8 @@ class nn():
|
||||||
tf = None
|
tf = None
|
||||||
tf_sess = None
|
tf_sess = None
|
||||||
tf_sess_config = None
|
tf_sess_config = None
|
||||||
tf_default_device = None
|
tf_default_device_name = None
|
||||||
|
|
||||||
data_format = None
|
data_format = None
|
||||||
conv2d_ch_axis = None
|
conv2d_ch_axis = None
|
||||||
conv2d_spatial_axes = None
|
conv2d_spatial_axes = None
|
||||||
|
@ -50,9 +50,6 @@ class nn():
|
||||||
nn.setCurrentDeviceConfig(device_config)
|
nn.setCurrentDeviceConfig(device_config)
|
||||||
|
|
||||||
# Manipulate environment variables before import tensorflow
|
# Manipulate environment variables before import tensorflow
|
||||||
|
|
||||||
if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
|
|
||||||
os.environ.pop('CUDA_VISIBLE_DEVICES')
|
|
||||||
|
|
||||||
first_run = False
|
first_run = False
|
||||||
if len(device_config.devices) != 0:
|
if len(device_config.devices) != 0:
|
||||||
|
@ -68,22 +65,19 @@ class nn():
|
||||||
compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str)
|
compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str)
|
||||||
if not compute_cache_path.exists():
|
if not compute_cache_path.exists():
|
||||||
first_run = True
|
first_run = True
|
||||||
|
compute_cache_path.mkdir(parents=True, exist_ok=True)
|
||||||
os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
|
os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
|
||||||
|
|
||||||
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
|
|
||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only
|
|
||||||
|
|
||||||
if first_run:
|
if first_run:
|
||||||
io.log_info("Caching GPU kernels...")
|
io.log_info("Caching GPU kernels...")
|
||||||
|
|
||||||
import tensorflow
|
import tensorflow
|
||||||
|
|
||||||
tf_version = getattr(tensorflow,'VERSION', None)
|
tf_version = tensorflow.version.VERSION
|
||||||
if tf_version is None:
|
#if tf_version is None:
|
||||||
tf_version = tensorflow.version.GIT_VERSION
|
# tf_version = tensorflow.version.GIT_VERSION
|
||||||
if tf_version[0] == 'v':
|
if tf_version[0] == 'v':
|
||||||
tf_version = tf_version[1:]
|
tf_version = tf_version[1:]
|
||||||
|
|
||||||
if tf_version[0] == '2':
|
if tf_version[0] == '2':
|
||||||
tf = tensorflow.compat.v1
|
tf = tensorflow.compat.v1
|
||||||
else:
|
else:
|
||||||
|
@ -108,13 +102,14 @@ class nn():
|
||||||
|
|
||||||
# Configure tensorflow session-config
|
# Configure tensorflow session-config
|
||||||
if len(device_config.devices) == 0:
|
if len(device_config.devices) == 0:
|
||||||
nn.tf_default_device = "/CPU:0"
|
|
||||||
config = tf.ConfigProto(device_count={'GPU': 0})
|
config = tf.ConfigProto(device_count={'GPU': 0})
|
||||||
|
nn.tf_default_device_name = '/CPU:0'
|
||||||
else:
|
else:
|
||||||
nn.tf_default_device = "/GPU:0"
|
nn.tf_default_device_name = f'/{device_config.devices[0].tf_dev_type}:0'
|
||||||
|
|
||||||
config = tf.ConfigProto()
|
config = tf.ConfigProto()
|
||||||
config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
|
config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
|
||||||
|
|
||||||
config.gpu_options.force_gpu_compatible = True
|
config.gpu_options.force_gpu_compatible = True
|
||||||
config.gpu_options.allow_growth = True
|
config.gpu_options.allow_growth = True
|
||||||
nn.tf_sess_config = config
|
nn.tf_sess_config = config
|
||||||
|
@ -202,14 +197,6 @@ class nn():
|
||||||
nn.tf_sess.close()
|
nn.tf_sess.close()
|
||||||
nn.tf_sess = None
|
nn.tf_sess = None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def get_current_device():
|
|
||||||
# Undocumented access to last tf.device(...)
|
|
||||||
objs = nn.tf.get_default_graph()._device_function_stack.peek_objs()
|
|
||||||
if len(objs) != 0:
|
|
||||||
return objs[0].display_name
|
|
||||||
return nn.tf_default_device
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False):
|
def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False):
|
||||||
devices = Devices.getDevices()
|
devices = Devices.getDevices()
|
||||||
|
|
|
@ -161,11 +161,11 @@ class FaceEnhancer(object):
|
||||||
if not model_path.exists():
|
if not model_path.exists():
|
||||||
raise Exception("Unable to load FaceEnhancer.npy")
|
raise Exception("Unable to load FaceEnhancer.npy")
|
||||||
|
|
||||||
with tf.device ('/CPU:0' if place_model_on_cpu else '/GPU:0'):
|
with tf.device ('/CPU:0' if place_model_on_cpu else nn.tf_default_device_name):
|
||||||
self.model = FaceEnhancer()
|
self.model = FaceEnhancer()
|
||||||
self.model.load_weights (model_path)
|
self.model.load_weights (model_path)
|
||||||
|
|
||||||
with tf.device ('/CPU:0' if run_on_cpu else '/GPU:0'):
|
with tf.device ('/CPU:0' if run_on_cpu else nn.tf_default_device_name):
|
||||||
self.model.build_for_run ([ (tf.float32, nn.get4Dshape (192,192,3) ),
|
self.model.build_for_run ([ (tf.float32, nn.get4Dshape (192,192,3) ),
|
||||||
(tf.float32, (None,1,) ),
|
(tf.float32, (None,1,) ),
|
||||||
(tf.float32, (None,1,) ),
|
(tf.float32, (None,1,) ),
|
||||||
|
|
|
@ -39,7 +39,7 @@ class XSegNet(object):
|
||||||
self.target_t = tf.placeholder (nn.floatx, nn.get4Dshape(resolution,resolution,1) )
|
self.target_t = tf.placeholder (nn.floatx, nn.get4Dshape(resolution,resolution,1) )
|
||||||
|
|
||||||
# Initializing model classes
|
# Initializing model classes
|
||||||
with tf.device ('/CPU:0' if place_model_on_cpu else '/GPU:0'):
|
with tf.device ('/CPU:0' if place_model_on_cpu else nn.tf_default_device_name):
|
||||||
self.model = nn.XSeg(3, 32, 1, name=name)
|
self.model = nn.XSeg(3, 32, 1, name=name)
|
||||||
self.model_weights = self.model.get_weights()
|
self.model_weights = self.model.get_weights()
|
||||||
if training:
|
if training:
|
||||||
|
@ -53,7 +53,7 @@ class XSegNet(object):
|
||||||
self.model_filename_list += [ [self.model, f'{model_name}.npy'] ]
|
self.model_filename_list += [ [self.model, f'{model_name}.npy'] ]
|
||||||
|
|
||||||
if not training:
|
if not training:
|
||||||
with tf.device ('/CPU:0' if run_on_cpu else '/GPU:0'):
|
with tf.device ('/CPU:0' if run_on_cpu else nn.tf_default_device_name):
|
||||||
_, pred = self.model(self.input_t)
|
_, pred = self.model(self.input_t)
|
||||||
|
|
||||||
def net_run(input_np):
|
def net_run(input_np):
|
||||||
|
|
|
@ -31,7 +31,7 @@ class QModel(ModelBase):
|
||||||
masked_training = True
|
masked_training = True
|
||||||
|
|
||||||
models_opt_on_gpu = len(devices) >= 1 and all([dev.total_mem_gb >= 4 for dev in devices])
|
models_opt_on_gpu = len(devices) >= 1 and all([dev.total_mem_gb >= 4 for dev in devices])
|
||||||
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
|
models_opt_device = nn.tf_default_device_name if models_opt_on_gpu and self.is_training else '/CPU:0'
|
||||||
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
||||||
|
|
||||||
input_ch = 3
|
input_ch = 3
|
||||||
|
@ -96,7 +96,7 @@ class QModel(ModelBase):
|
||||||
gpu_src_dst_loss_gvs = []
|
gpu_src_dst_loss_gvs = []
|
||||||
|
|
||||||
for gpu_id in range(gpu_count):
|
for gpu_id in range(gpu_count):
|
||||||
with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
|
with tf.device( f'/{devices[gpu_id].tf_dev_type}:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
|
||||||
batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
|
batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
|
||||||
with tf.device(f'/CPU:0'):
|
with tf.device(f'/CPU:0'):
|
||||||
# slice on CPU, otherwise all batch data will be transfered to GPU first
|
# slice on CPU, otherwise all batch data will be transfered to GPU first
|
||||||
|
@ -190,7 +190,7 @@ class QModel(ModelBase):
|
||||||
self.AE_view = AE_view
|
self.AE_view = AE_view
|
||||||
else:
|
else:
|
||||||
# Initializing merge function
|
# Initializing merge function
|
||||||
with tf.device( f'/GPU:0' if len(devices) != 0 else f'/CPU:0'):
|
with tf.device( nn.tf_default_device_name if len(devices) != 0 else f'/CPU:0'):
|
||||||
gpu_dst_code = self.inter(self.encoder(self.warped_dst))
|
gpu_dst_code = self.inter(self.encoder(self.warped_dst))
|
||||||
gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
|
gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
|
||||||
_, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code)
|
_, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code)
|
||||||
|
|
|
@ -235,9 +235,10 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
ct_mode = self.options['ct_mode']
|
ct_mode = self.options['ct_mode']
|
||||||
if ct_mode == 'none':
|
if ct_mode == 'none':
|
||||||
ct_mode = None
|
ct_mode = None
|
||||||
|
|
||||||
|
|
||||||
models_opt_on_gpu = False if len(devices) == 0 else self.options['models_opt_on_gpu']
|
models_opt_on_gpu = False if len(devices) == 0 else self.options['models_opt_on_gpu']
|
||||||
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
|
models_opt_device = nn.tf_default_device_name if models_opt_on_gpu and self.is_training else '/CPU:0'
|
||||||
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
|
||||||
|
|
||||||
input_ch=3
|
input_ch=3
|
||||||
|
@ -336,7 +337,6 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
bs_per_gpu = max(1, self.get_batch_size() // gpu_count)
|
bs_per_gpu = max(1, self.get_batch_size() // gpu_count)
|
||||||
self.set_batch_size( gpu_count*bs_per_gpu)
|
self.set_batch_size( gpu_count*bs_per_gpu)
|
||||||
|
|
||||||
|
|
||||||
# Compute losses per GPU
|
# Compute losses per GPU
|
||||||
gpu_pred_src_src_list = []
|
gpu_pred_src_src_list = []
|
||||||
gpu_pred_dst_dst_list = []
|
gpu_pred_dst_dst_list = []
|
||||||
|
@ -350,9 +350,9 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
gpu_G_loss_gvs = []
|
gpu_G_loss_gvs = []
|
||||||
gpu_D_code_loss_gvs = []
|
gpu_D_code_loss_gvs = []
|
||||||
gpu_D_src_dst_loss_gvs = []
|
gpu_D_src_dst_loss_gvs = []
|
||||||
|
|
||||||
for gpu_id in range(gpu_count):
|
for gpu_id in range(gpu_count):
|
||||||
with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
|
with tf.device( f'/{devices[gpu_id].tf_dev_type}:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
|
||||||
|
|
||||||
with tf.device(f'/CPU:0'):
|
with tf.device(f'/CPU:0'):
|
||||||
# slice on CPU, otherwise all batch data will be transfered to GPU first
|
# slice on CPU, otherwise all batch data will be transfered to GPU first
|
||||||
batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
|
batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
|
||||||
|
@ -360,10 +360,10 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
gpu_warped_dst = self.warped_dst [batch_slice,:,:,:]
|
gpu_warped_dst = self.warped_dst [batch_slice,:,:,:]
|
||||||
gpu_target_src = self.target_src [batch_slice,:,:,:]
|
gpu_target_src = self.target_src [batch_slice,:,:,:]
|
||||||
gpu_target_dst = self.target_dst [batch_slice,:,:,:]
|
gpu_target_dst = self.target_dst [batch_slice,:,:,:]
|
||||||
gpu_target_srcm = self.target_srcm[batch_slice,:,:,:]
|
gpu_target_srcm = self.target_srcm[batch_slice,:,:,:]
|
||||||
gpu_target_srcm_em = self.target_srcm_em[batch_slice,:,:,:]
|
gpu_target_srcm_em = self.target_srcm_em[batch_slice,:,:,:]
|
||||||
gpu_target_dstm = self.target_dstm[batch_slice,:,:,:]
|
gpu_target_dstm = self.target_dstm[batch_slice,:,:,:]
|
||||||
gpu_target_dstm_em = self.target_dstm_em[batch_slice,:,:,:]
|
gpu_target_dstm_em = self.target_dstm_em[batch_slice,:,:,:]
|
||||||
|
|
||||||
# process model tensors
|
# process model tensors
|
||||||
if 'df' in archi_type:
|
if 'df' in archi_type:
|
||||||
|
@ -571,7 +571,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
self.AE_view = AE_view
|
self.AE_view = AE_view
|
||||||
else:
|
else:
|
||||||
# Initializing merge function
|
# Initializing merge function
|
||||||
with tf.device( f'/GPU:0' if len(devices) != 0 else f'/CPU:0'):
|
with tf.device( nn.tf_default_device_name if len(devices) != 0 else f'/CPU:0'):
|
||||||
if 'df' in archi_type:
|
if 'df' in archi_type:
|
||||||
gpu_dst_code = self.inter(self.encoder(self.warped_dst))
|
gpu_dst_code = self.inter(self.encoder(self.warped_dst))
|
||||||
gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
|
gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
|
||||||
|
|
|
@ -52,7 +52,7 @@ class XSegModel(ModelBase):
|
||||||
'head' : FaceType.HEAD}[ self.options['face_type'] ]
|
'head' : FaceType.HEAD}[ self.options['face_type'] ]
|
||||||
|
|
||||||
place_model_on_cpu = len(devices) == 0
|
place_model_on_cpu = len(devices) == 0
|
||||||
models_opt_device = '/CPU:0' if place_model_on_cpu else '/GPU:0'
|
models_opt_device = '/CPU:0' if place_model_on_cpu else nn.tf_default_device_name
|
||||||
|
|
||||||
bgr_shape = nn.get4Dshape(resolution,resolution,3)
|
bgr_shape = nn.get4Dshape(resolution,resolution,3)
|
||||||
mask_shape = nn.get4Dshape(resolution,resolution,1)
|
mask_shape = nn.get4Dshape(resolution,resolution,1)
|
||||||
|
@ -83,7 +83,7 @@ class XSegModel(ModelBase):
|
||||||
for gpu_id in range(gpu_count):
|
for gpu_id in range(gpu_count):
|
||||||
|
|
||||||
|
|
||||||
with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
|
with tf.device(f'/{devices[gpu_id].tf_dev_type}:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
|
||||||
with tf.device(f'/CPU:0'):
|
with tf.device(f'/CPU:0'):
|
||||||
# slice on CPU, otherwise all batch data will be transfered to GPU first
|
# slice on CPU, otherwise all batch data will be transfered to GPU first
|
||||||
batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
|
batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue