mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-08-19 21:13:20 -07:00
Upgraded to TF version 1.13.2
Removed the wait at first launch for most graphics cards. Increased speed of training by 10-20%, but you have to retrain all models from scratch. SAEHD: added option 'use float16' Experimental option. Reduces the model size by half. Increases the speed of training. Decreases the accuracy of the model. The model may collapse or not train. Model may not learn the mask in large resolutions. true_face_training option is replaced by "True face power". 0.0000 .. 1.0 Experimental option. Discriminates the result face to be more like the src face. Higher value - stronger discrimination. Comparison - https://i.imgur.com/czScS9q.png
This commit is contained in:
parent
a3dfcb91b9
commit
76ca79216e
49 changed files with 1320 additions and 1297 deletions
202
core/leras/nn.py
202
core/leras/nn.py
|
@ -1,51 +1,67 @@
|
|||
"""
|
||||
Leras.
|
||||
Leras.
|
||||
|
||||
like lighter keras.
|
||||
This is my lightweight neural network library written from scratch
|
||||
based on pure tensorflow without keras.
|
||||
|
||||
Provides:
|
||||
+ full freedom of tensorflow operations without keras model's restrictions
|
||||
+ full freedom of tensorflow operations without keras model's restrictions
|
||||
+ easy model operations like in PyTorch, but in graph mode (no eager execution)
|
||||
+ convenient and understandable logic
|
||||
|
||||
Reasons why we cannot import tensorflow or any tensorflow.sub modules right here:
|
||||
1) change env variables based on DeviceConfig before import tensorflow
|
||||
2) multiprocesses will import tensorflow every spawn
|
||||
|
||||
NCHW speed up training for 10-20%.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
from core.interact import interact as io
|
||||
|
||||
from .device import Devices
|
||||
|
||||
|
||||
class nn():
|
||||
current_DeviceConfig = None
|
||||
|
||||
tf = None
|
||||
tf_sess = None
|
||||
tf_sess_config = None
|
||||
|
||||
tf_default_device = None
|
||||
|
||||
data_format = None
|
||||
conv2d_ch_axis = None
|
||||
conv2d_spatial_axes = None
|
||||
|
||||
tf_floatx = None
|
||||
np_floatx = None
|
||||
|
||||
# Tensor ops
|
||||
tf_get_value = None
|
||||
tf_batch_set_value = None
|
||||
tf_gradients = None
|
||||
tf_average_gv_list = None
|
||||
tf_average_tensor_list = None
|
||||
tf_dot = None
|
||||
tf_concat = None
|
||||
tf_gelu = None
|
||||
tf_upsample2d = None
|
||||
tf_upsample2d_bilinear = None
|
||||
tf_flatten = None
|
||||
tf_reshape_4D = None
|
||||
tf_random_binomial = None
|
||||
tf_gaussian_blur = None
|
||||
tf_style_loss = None
|
||||
tf_channel_histogram = None
|
||||
tf_histogram = None
|
||||
tf_dssim = None
|
||||
|
||||
tf_space_to_depth = None
|
||||
tf_depth_to_space = None
|
||||
|
||||
# Layers
|
||||
Saveable = None
|
||||
LayerBase = None
|
||||
|
@ -55,16 +71,17 @@ class nn():
|
|||
BlurPool = None
|
||||
Dense = None
|
||||
BatchNorm2D = None
|
||||
|
||||
|
||||
# Initializers
|
||||
initializers = None
|
||||
|
||||
|
||||
# Optimizers
|
||||
TFBaseOptimizer = None
|
||||
TFRMSpropOptimizer = None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def initialize(device_config=None):
|
||||
def initialize(device_config=None, floatx="float32", data_format="NHWC"):
|
||||
|
||||
if nn.tf is None:
|
||||
if device_config is None:
|
||||
device_config = nn.getCurrentDeviceConfig()
|
||||
|
@ -74,11 +91,8 @@ class nn():
|
|||
if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
|
||||
os.environ.pop('CUDA_VISIBLE_DEVICES')
|
||||
|
||||
os.environ['CUDA_CACHE_MAXSIZE'] = '536870912' #512Mb (32mb default)
|
||||
|
||||
first_run = False
|
||||
|
||||
if not device_config.cpu_only:
|
||||
if len(device_config.devices) != 0:
|
||||
if sys.platform[0:3] == 'win':
|
||||
if all( [ x.name == device_config.devices[0].name for x in device_config.devices ] ):
|
||||
devices_str = "_" + device_config.devices[0].name.replace(' ','_')
|
||||
|
@ -86,27 +100,33 @@ class nn():
|
|||
devices_str = ""
|
||||
for device in device_config.devices:
|
||||
devices_str += "_" + device.name.replace(' ','_')
|
||||
|
||||
|
||||
compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str)
|
||||
if not compute_cache_path.exists():
|
||||
first_run = True
|
||||
os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
|
||||
|
||||
os.environ['CUDA_CACHE_MAXSIZE'] = '536870912' #512Mb (32mb default)
|
||||
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
|
||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # tf log errors only
|
||||
|
||||
import warnings
|
||||
warnings.simplefilter(action='ignore', category=FutureWarning)
|
||||
|
||||
|
||||
if first_run:
|
||||
io.log_info("Caching GPU kernels...")
|
||||
|
||||
import tensorflow as tf
|
||||
import tensorflow as tf
|
||||
import logging
|
||||
logging.getLogger('tensorflow').setLevel(logging.ERROR)
|
||||
|
||||
nn.tf = tf
|
||||
|
||||
if device_config.cpu_only:
|
||||
|
||||
if len(device_config.devices) == 0:
|
||||
nn.tf_default_device = "/CPU:0"
|
||||
config = tf.ConfigProto(device_count={'GPU': 0})
|
||||
else:
|
||||
else:
|
||||
nn.tf_default_device = "/GPU:0"
|
||||
config = tf.ConfigProto()
|
||||
config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
|
||||
|
||||
|
@ -114,26 +134,81 @@ class nn():
|
|||
config.gpu_options.allow_growth = True
|
||||
nn.tf_sess_config = config
|
||||
|
||||
nn.tf_floatx = nn.tf.float32 #nn.tf.float16 if device_config.use_fp16 else nn.tf.float32
|
||||
nn.np_floatx = nn.tf_floatx.as_numpy_dtype
|
||||
|
||||
from .tensor_ops import initialize_tensor_ops
|
||||
from .layers import initialize_layers
|
||||
from .initializers import initialize_initializers
|
||||
from .optimizers import initialize_optimizers
|
||||
|
||||
|
||||
initialize_tensor_ops(nn)
|
||||
initialize_layers(nn)
|
||||
initialize_initializers(nn)
|
||||
initialize_optimizers(nn)
|
||||
|
||||
|
||||
if nn.tf_sess is None:
|
||||
nn.tf_sess = tf.Session(config=nn.tf_sess_config)
|
||||
|
||||
|
||||
if floatx == "float32":
|
||||
floatx = nn.tf.float32
|
||||
elif floatx == "float16":
|
||||
floatx = nn.tf.float16
|
||||
else:
|
||||
raise ValueError(f"unsupported floatx {floatx}")
|
||||
nn.set_floatx(floatx)
|
||||
nn.set_data_format(data_format)
|
||||
|
||||
@staticmethod
|
||||
def initialize_main_env():
|
||||
Devices.initialize_main_env()
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_floatx(tf_dtype):
|
||||
"""
|
||||
set default float type for all layers when dtype is None for them
|
||||
"""
|
||||
nn.tf_floatx = tf_dtype
|
||||
nn.np_floatx = tf_dtype.as_numpy_dtype
|
||||
|
||||
@staticmethod
|
||||
def set_data_format(data_format):
|
||||
if data_format != "NHWC" and data_format != "NCHW":
|
||||
raise ValueError(f"unsupported data_format {data_format}")
|
||||
nn.data_format = data_format
|
||||
|
||||
if data_format == "NHWC":
|
||||
nn.conv2d_ch_axis = 3
|
||||
nn.conv2d_spatial_axes = [1,2]
|
||||
elif data_format == "NCHW":
|
||||
nn.conv2d_ch_axis = 1
|
||||
nn.conv2d_spatial_axes = [2,3]
|
||||
|
||||
@staticmethod
|
||||
def get4Dshape ( w, h, c, data_format=None ):
|
||||
"""
|
||||
returns 4D shape based on current data_format
|
||||
"""
|
||||
if data_format is None:
|
||||
data_format = nn.data_format
|
||||
|
||||
if data_format == "NHWC":
|
||||
return (None,h,w,c)
|
||||
else:
|
||||
return (None,c,h,w)
|
||||
|
||||
@staticmethod
|
||||
def to_data_format( x, to_data_format, from_data_format=None):
|
||||
if from_data_format is None:
|
||||
from_data_format = nn.data_format
|
||||
|
||||
if to_data_format == from_data_format:
|
||||
return x
|
||||
|
||||
if to_data_format == "NHWC":
|
||||
return np.transpose(x, (0,2,3,1) )
|
||||
elif to_data_format == "NCHW":
|
||||
return np.transpose(x, (0,3,1,2) )
|
||||
else:
|
||||
raise ValueError(f"unsupported to_data_format {to_data_format}")
|
||||
|
||||
@staticmethod
|
||||
def getCurrentDeviceConfig():
|
||||
if nn.current_DeviceConfig is None:
|
||||
|
@ -151,27 +226,34 @@ class nn():
|
|||
nn.tf.reset_default_graph()
|
||||
nn.tf_sess.close()
|
||||
nn.tf_sess = nn.tf.Session(config=nn.tf_sess_config)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def tf_close_session():
|
||||
def tf_close_session():
|
||||
if nn.tf_sess is not None:
|
||||
nn.tf.reset_default_graph()
|
||||
nn.tf_sess.close()
|
||||
nn.tf_sess = None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def tf_get_current_device():
|
||||
# Undocumented access to last tf.device(...)
|
||||
objs = nn.tf.get_default_graph()._device_function_stack.peek_objs()
|
||||
if len(objs) != 0:
|
||||
return objs[0].display_name
|
||||
return nn.tf_default_device
|
||||
|
||||
@staticmethod
|
||||
def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False, return_device_config=False):
|
||||
devices = Devices.getDevices()
|
||||
if len(devices) == 0:
|
||||
return []
|
||||
|
||||
|
||||
all_devices_indexes = [device.index for device in devices]
|
||||
|
||||
|
||||
if choose_only_one:
|
||||
suggest_best_multi_gpu = False
|
||||
suggest_all_gpu = False
|
||||
|
||||
|
||||
if suggest_all_gpu:
|
||||
best_device_indexes = all_devices_indexes
|
||||
elif suggest_best_multi_gpu:
|
||||
|
@ -179,84 +261,84 @@ class nn():
|
|||
else:
|
||||
best_device_indexes = [ devices.get_best_device().index ]
|
||||
best_device_indexes = ",".join([str(x) for x in best_device_indexes])
|
||||
|
||||
|
||||
io.log_info ("")
|
||||
if choose_only_one:
|
||||
io.log_info ("Choose one GPU idx.")
|
||||
else:
|
||||
io.log_info ("Choose one or several GPU idxs (separated by comma).")
|
||||
io.log_info ("")
|
||||
|
||||
|
||||
if allow_cpu:
|
||||
io.log_info ("[CPU] : CPU")
|
||||
for device in devices:
|
||||
io.log_info (f" [{device.index}] : {device.name}")
|
||||
|
||||
|
||||
io.log_info ("")
|
||||
|
||||
|
||||
while True:
|
||||
try:
|
||||
if choose_only_one:
|
||||
choosed_idxs = io.input_str("Which GPU index to choose?", best_device_indexes)
|
||||
else:
|
||||
choosed_idxs = io.input_str("Which GPU indexes to choose?", best_device_indexes)
|
||||
|
||||
|
||||
if allow_cpu and choosed_idxs.lower() == "cpu":
|
||||
choosed_idxs = []
|
||||
break
|
||||
|
||||
|
||||
choosed_idxs = [ int(x) for x in choosed_idxs.split(',') ]
|
||||
|
||||
|
||||
if choose_only_one:
|
||||
if len(choosed_idxs) == 1:
|
||||
break
|
||||
break
|
||||
else:
|
||||
if all( [idx in all_devices_indexes for idx in choosed_idxs] ):
|
||||
break
|
||||
except:
|
||||
pass
|
||||
io.log_info ("")
|
||||
|
||||
|
||||
if return_device_config:
|
||||
return nn.DeviceConfig.GPUIndexes(choosed_idxs)
|
||||
else:
|
||||
else:
|
||||
return choosed_idxs
|
||||
|
||||
class DeviceConfig():
|
||||
class DeviceConfig():
|
||||
def __init__ (self, devices=None):
|
||||
devices = devices or []
|
||||
|
||||
devices = devices or []
|
||||
|
||||
if not isinstance(devices, Devices):
|
||||
devices = Devices(devices)
|
||||
|
||||
self.devices = devices
|
||||
self.cpu_only = len(devices) == 0
|
||||
|
||||
|
||||
self.devices = devices
|
||||
self.cpu_only = len(devices) == 0
|
||||
|
||||
@staticmethod
|
||||
def BestGPU():
|
||||
def BestGPU():
|
||||
devices = Devices.getDevices()
|
||||
if len(devices) == 0:
|
||||
return nn.DeviceConfig.CPU()
|
||||
|
||||
|
||||
return nn.DeviceConfig([devices.get_best_device()])
|
||||
|
||||
|
||||
@staticmethod
|
||||
def WorstGPU():
|
||||
def WorstGPU():
|
||||
devices = Devices.getDevices()
|
||||
if len(devices) == 0:
|
||||
return nn.DeviceConfig.CPU()
|
||||
|
||||
|
||||
return nn.DeviceConfig([devices.get_worst_device()])
|
||||
|
||||
|
||||
@staticmethod
|
||||
def GPUIndexes(indexes):
|
||||
if len(indexes) != 0:
|
||||
devices = Devices.getDevices().get_devices_from_index_list(indexes)
|
||||
else:
|
||||
devices = []
|
||||
|
||||
|
||||
return nn.DeviceConfig(devices)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def CPU():
|
||||
def CPU():
|
||||
return nn.DeviceConfig([])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue