mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-06 13:02:15 -07:00
SAEHD: added new option GAN power 0.0 .. 10.0 Train the network in Generative Adversarial manner. Forces the neural network to learn small details of the face. You can enable/disable this option at any time, but better to enable it when the network is trained enough. Typical value is 1.0 GAN power with pretrain mode will not work. Example of enabling GAN on 81k iters +5k iters https://i.imgur.com/OdXHLhU.jpg https://i.imgur.com/CYAJmJx.jpg dfhd: default Decoder dimensions are now 48 the preview for 256 res is now correctly displayed fixed model naming/renaming/removing Improvements for those involved in post-processing in AfterEffects: Codec is reverted back to x264 in order to properly use in AfterEffects and video players. Merger now always outputs the mask to workspace\data_dst\merged_mask removed raw modes except raw-rgb raw-rgb mode now outputs selected face mask_mode (before square mask) 'export alpha mask' button is replaced by 'show alpha mask'. You can view the alpha mask without recompute the frames. 8) 'merged *.bat' now also output 'result_mask.' video file. 8) 'merged lossless' now uses x264 lossless codec (before PNG codec) result_mask video file is always lossless. Thus you can use result_mask video file as mask layer in the AfterEffects.
353 lines
No EOL
14 KiB
Python
353 lines
No EOL
14 KiB
Python
import numpy as np
|
|
|
|
def initialize_tensor_ops(nn):
|
|
tf = nn.tf
|
|
from tensorflow.python.ops import array_ops, random_ops, math_ops, sparse_ops, gradients
|
|
from tensorflow.python.framework import sparse_tensor
|
|
|
|
def tf_get_value(tensor):
|
|
return nn.tf_sess.run (tensor)
|
|
nn.tf_get_value = tf_get_value
|
|
|
|
|
|
def tf_batch_set_value(tuples):
|
|
if len(tuples) != 0:
|
|
with nn.tf.device('/CPU:0'):
|
|
assign_ops = []
|
|
feed_dict = {}
|
|
|
|
for x, value in tuples:
|
|
if isinstance(value, nn.tf.Operation):
|
|
assign_ops.append(value)
|
|
else:
|
|
value = np.asarray(value, dtype=x.dtype.as_numpy_dtype)
|
|
assign_placeholder = nn.tf.placeholder( x.dtype.base_dtype, shape=[None]*value.ndim )
|
|
assign_op = nn.tf.assign (x, assign_placeholder )
|
|
assign_ops.append(assign_op)
|
|
feed_dict[assign_placeholder] = value
|
|
|
|
nn.tf_sess.run(assign_ops, feed_dict=feed_dict)
|
|
nn.tf_batch_set_value = tf_batch_set_value
|
|
|
|
def tf_init_weights(weights):
|
|
ops = []
|
|
|
|
ca_tuples_w = []
|
|
ca_tuples = []
|
|
for w in weights:
|
|
initializer = w.initializer
|
|
for input in initializer.inputs:
|
|
if "_cai_" in input.name:
|
|
ca_tuples_w.append (w)
|
|
ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) )
|
|
break
|
|
else:
|
|
ops.append (initializer)
|
|
|
|
if len(ops) != 0:
|
|
nn.tf_sess.run (ops)
|
|
|
|
if len(ca_tuples) != 0:
|
|
nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] )
|
|
nn.tf_init_weights = tf_init_weights
|
|
|
|
def tf_gradients ( loss, vars ):
|
|
grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True )
|
|
gv = [*zip(grads,vars)]
|
|
for g,v in gv:
|
|
if g is None:
|
|
raise Exception(f"No gradient for variable {v.name}")
|
|
return gv
|
|
nn.tf_gradients = tf_gradients
|
|
|
|
def tf_average_gv_list(grad_var_list, tf_device_string=None):
|
|
if len(grad_var_list) == 1:
|
|
return grad_var_list[0]
|
|
|
|
e = tf.device(tf_device_string) if tf_device_string is not None else None
|
|
if e is not None: e.__enter__()
|
|
result = []
|
|
for i, (gv) in enumerate(grad_var_list):
|
|
for j,(g,v) in enumerate(gv):
|
|
g = tf.expand_dims(g, 0)
|
|
if i == 0:
|
|
result += [ [[g], v] ]
|
|
else:
|
|
result[j][0] += [g]
|
|
|
|
for i,(gs,v) in enumerate(result):
|
|
result[i] = ( tf.reduce_mean( tf.concat (gs, 0), 0 ), v )
|
|
if e is not None: e.__exit__(None,None,None)
|
|
return result
|
|
nn.tf_average_gv_list = tf_average_gv_list
|
|
|
|
def tf_average_tensor_list(tensors_list, tf_device_string=None):
|
|
if len(tensors_list) == 1:
|
|
return tensors_list[0]
|
|
|
|
e = tf.device(tf_device_string) if tf_device_string is not None else None
|
|
if e is not None: e.__enter__()
|
|
result = tf.reduce_mean(tf.concat ([tf.expand_dims(t, 0) for t in tensors_list], 0), 0)
|
|
if e is not None: e.__exit__(None,None,None)
|
|
return result
|
|
nn.tf_average_tensor_list = tf_average_tensor_list
|
|
|
|
def tf_concat (tensors_list, axis):
|
|
"""
|
|
Better version.
|
|
"""
|
|
if len(tensors_list) == 1:
|
|
return tensors_list[0]
|
|
return tf.concat(tensors_list, axis)
|
|
nn.tf_concat = tf_concat
|
|
|
|
def tf_gelu(x):
|
|
cdf = 0.5 * (1.0 + tf.nn.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
|
|
return x * cdf
|
|
nn.tf_gelu = tf_gelu
|
|
|
|
def tf_upsample2d(x, size=2):
|
|
if nn.data_format == "NCHW":
|
|
b,c,h,w = x.shape.as_list()
|
|
x = tf.reshape (x, (-1,c,h,1,w,1) )
|
|
x = tf.tile(x, (1,1,1,size,1,size) )
|
|
x = tf.reshape (x, (-1,c,h*size,w*size) )
|
|
return x
|
|
else:
|
|
return tf.image.resize_nearest_neighbor(x, (x.shape[1]*size, x.shape[2]*size) )
|
|
nn.tf_upsample2d = tf_upsample2d
|
|
|
|
def tf_upsample2d_bilinear(x, size=2):
|
|
return tf.image.resize_images(x, (x.shape[1]*size, x.shape[2]*size) )
|
|
nn.tf_upsample2d_bilinear = tf_upsample2d_bilinear
|
|
|
|
def tf_flatten(x):
|
|
if nn.data_format == "NHWC":
|
|
# match NCHW version in order to switch data_format without problems
|
|
x = tf.transpose(x, (0,3,1,2) )
|
|
return tf.reshape (x, (-1, np.prod(x.shape[1:])) )
|
|
|
|
nn.tf_flatten = tf_flatten
|
|
|
|
def tf_reshape_4D(x, w,h,c):
|
|
if nn.data_format == "NHWC":
|
|
# match NCHW version in order to switch data_format without problems
|
|
x = tf.reshape (x, (-1,c,h,w))
|
|
x = tf.transpose(x, (0,2,3,1) )
|
|
return x
|
|
else:
|
|
return tf.reshape (x, (-1,c,h,w))
|
|
nn.tf_reshape_4D = tf_reshape_4D
|
|
|
|
def tf_random_binomial(shape, p=0.0, dtype=None, seed=None):
|
|
if dtype is None:
|
|
dtype=tf.float32
|
|
|
|
if seed is None:
|
|
seed = np.random.randint(10e6)
|
|
return array_ops.where(
|
|
random_ops.random_uniform(shape, dtype=tf.float16, seed=seed) < p,
|
|
array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
|
|
nn.tf_random_binomial = tf_random_binomial
|
|
|
|
def tf_gaussian_blur(input, radius=2.0):
|
|
def gaussian(x, mu, sigma):
|
|
return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2))
|
|
|
|
def make_kernel(sigma):
|
|
kernel_size = max(3, int(2 * 2 * sigma + 1))
|
|
mean = np.floor(0.5 * kernel_size)
|
|
kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)])
|
|
np_kernel = np.outer(kernel_1d, kernel_1d).astype(np.float32)
|
|
kernel = np_kernel / np.sum(np_kernel)
|
|
return kernel, kernel_size
|
|
|
|
gauss_kernel, kernel_size = make_kernel(radius)
|
|
padding = kernel_size//2
|
|
if padding != 0:
|
|
if nn.data_format == "NHWC":
|
|
padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ]
|
|
else:
|
|
padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ]
|
|
else:
|
|
padding = None
|
|
gauss_kernel = gauss_kernel[:,:,None,None]
|
|
|
|
outputs = []
|
|
for i in range(input.shape[nn.conv2d_ch_axis]):
|
|
x = input[:,:,:,i:i+1] if nn.data_format == "NHWC" \
|
|
else input[:,i:i+1,:,:]
|
|
|
|
if padding is not None:
|
|
x = tf.pad (x, padding)
|
|
outputs += [ tf.nn.conv2d(x, tf.constant(gauss_kernel, dtype=input.dtype ), strides=[1,1,1,1], padding="VALID", data_format=nn.data_format) ]
|
|
|
|
return tf.concat (outputs, axis=nn.conv2d_ch_axis)
|
|
nn.tf_gaussian_blur = tf_gaussian_blur
|
|
|
|
def tf_style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1):
|
|
def sd(content, style, loss_weight):
|
|
content_nc = content.shape[ nn.conv2d_ch_axis ]
|
|
style_nc = style.shape[nn.conv2d_ch_axis]
|
|
if content_nc != style_nc:
|
|
raise Exception("style_loss() content_nc != style_nc")
|
|
c_mean, c_var = tf.nn.moments(content, axes=nn.conv2d_spatial_axes, keep_dims=True)
|
|
s_mean, s_var = tf.nn.moments(style, axes=nn.conv2d_spatial_axes, keep_dims=True)
|
|
c_std, s_std = tf.sqrt(c_var + 1e-5), tf.sqrt(s_var + 1e-5)
|
|
mean_loss = tf.reduce_sum(tf.square(c_mean-s_mean), axis=[1,2,3])
|
|
std_loss = tf.reduce_sum(tf.square(c_std-s_std), axis=[1,2,3])
|
|
return (mean_loss + std_loss) * ( loss_weight / content_nc.value )
|
|
|
|
if gaussian_blur_radius > 0.0:
|
|
target = tf_gaussian_blur(target, gaussian_blur_radius)
|
|
style = tf_gaussian_blur(style, gaussian_blur_radius)
|
|
|
|
return sd( target, style, loss_weight=loss_weight )
|
|
|
|
nn.tf_style_loss = tf_style_loss
|
|
|
|
def tf_dssim(img1,img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
|
|
if img1.dtype != img2.dtype:
|
|
raise ValueError("img1.dtype != img2.dtype")
|
|
|
|
not_float32 = img1.dtype != tf.float32
|
|
|
|
if not_float32:
|
|
img_dtype = img1.dtype
|
|
img1 = tf.cast(img1, tf.float32)
|
|
img2 = tf.cast(img2, tf.float32)
|
|
|
|
kernel = np.arange(0, filter_size, dtype=np.float32)
|
|
kernel -= (filter_size - 1 ) / 2.0
|
|
kernel = kernel**2
|
|
kernel *= ( -0.5 / (filter_sigma**2) )
|
|
kernel = np.reshape (kernel, (1,-1)) + np.reshape(kernel, (-1,1) )
|
|
kernel = tf.constant ( np.reshape (kernel, (1,-1)), dtype=tf.float32 )
|
|
kernel = tf.nn.softmax(kernel)
|
|
kernel = tf.reshape (kernel, (filter_size, filter_size, 1, 1))
|
|
kernel = tf.tile (kernel, (1,1, img1.shape[ nn.conv2d_ch_axis ] ,1))
|
|
|
|
def reducer(x):
|
|
return tf.nn.depthwise_conv2d(x, kernel, strides=[1,1,1,1], padding='VALID', data_format=nn.data_format)
|
|
|
|
c1 = (k1 * max_val) ** 2
|
|
c2 = (k2 * max_val) ** 2
|
|
|
|
mean0 = reducer(img1)
|
|
mean1 = reducer(img2)
|
|
num0 = mean0 * mean1 * 2.0
|
|
den0 = tf.square(mean0) + tf.square(mean1)
|
|
luminance = (num0 + c1) / (den0 + c1)
|
|
|
|
num1 = reducer(img1 * img2) * 2.0
|
|
den1 = reducer(tf.square(img1) + tf.square(img2))
|
|
c2 *= 1.0 #compensation factor
|
|
cs = (num1 - num0 + c2) / (den1 - den0 + c2)
|
|
|
|
ssim_val = tf.reduce_mean(luminance * cs, axis=nn.conv2d_spatial_axes )
|
|
dssim = (1.0 - ssim_val ) / 2.0
|
|
|
|
if not_float32:
|
|
dssim = tf.cast(dssim, img_dtype)
|
|
return dssim
|
|
|
|
nn.tf_dssim = tf_dssim
|
|
|
|
def tf_space_to_depth(x, size):
|
|
if nn.data_format == "NHWC":
|
|
# match NCHW version in order to switch data_format without problems
|
|
b,h,w,c = x.shape.as_list()
|
|
oh, ow = h // size, w // size
|
|
x = tf.reshape(x, (-1, size, oh, size, ow, c))
|
|
x = tf.transpose(x, (0, 2, 4, 1, 3, 5))
|
|
x = tf.reshape(x, (-1, oh, ow, size* size* c ))
|
|
return x
|
|
else:
|
|
return tf.space_to_depth(x, size, data_format=nn.data_format)
|
|
nn.tf_space_to_depth = tf_space_to_depth
|
|
|
|
def tf_depth_to_space(x, size):
|
|
if nn.data_format == "NHWC":
|
|
# match NCHW version in order to switch data_format without problems
|
|
|
|
b,h,w,c = x.shape.as_list()
|
|
oh, ow = h * size, w * size
|
|
oc = c // (size * size)
|
|
|
|
x = tf.reshape(x, (-1, h, w, size, size, oc, ) )
|
|
x = tf.transpose(x, (0, 1, 3, 2, 4, 5))
|
|
x = tf.reshape(x, (-1, oh, ow, oc, ))
|
|
return x
|
|
else:
|
|
return tf.depth_to_space(x, size, data_format=nn.data_format)
|
|
nn.tf_depth_to_space = tf_depth_to_space
|
|
|
|
def tf_rgb_to_lab(srgb):
|
|
srgb_pixels = tf.reshape(srgb, [-1, 3])
|
|
linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
|
|
exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
|
|
rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
|
|
rgb_to_xyz = tf.constant([
|
|
# X Y Z
|
|
[0.412453, 0.212671, 0.019334], # R
|
|
[0.357580, 0.715160, 0.119193], # G
|
|
[0.180423, 0.072169, 0.950227], # B
|
|
])
|
|
xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
|
|
|
|
xyz_normalized_pixels = tf.multiply(xyz_pixels, [1/0.950456, 1.0, 1/1.088754])
|
|
|
|
epsilon = 6/29
|
|
linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
|
|
exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
|
|
fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4/29) * linear_mask + (xyz_normalized_pixels ** (1/3)) * exponential_mask
|
|
|
|
fxfyfz_to_lab = tf.constant([
|
|
# l a b
|
|
[ 0.0, 500.0, 0.0], # fx
|
|
[116.0, -500.0, 200.0], # fy
|
|
[ 0.0, 0.0, -200.0], # fz
|
|
])
|
|
lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
|
|
return tf.reshape(lab_pixels, tf.shape(srgb))
|
|
nn.tf_rgb_to_lab = tf_rgb_to_lab
|
|
|
|
def tf_suppress_lower_mean(t, eps=0.00001):
|
|
if t.shape.ndims != 1:
|
|
raise ValueError("tf_suppress_lower_mean: t rank must be 1")
|
|
t_mean_eps = tf.reduce_mean(t) - eps
|
|
q = tf.clip_by_value(t, t_mean_eps, tf.reduce_max(t) )
|
|
q = tf.clip_by_value(q-t_mean_eps, 0, eps)
|
|
q = q * (t/eps)
|
|
return q
|
|
"""
|
|
class GeLU(KL.Layer):
|
|
Gaussian Error Linear Unit.
|
|
A smoother version of ReLU generally used
|
|
in the BERT or BERT architecture based models.
|
|
Original paper: https://arxiv.org/abs/1606.08415
|
|
Input shape:
|
|
Arbitrary. Use the keyword argument `input_shape`
|
|
(tuple of integers, does not include the samples axis)
|
|
when using this layer as the first layer in a model.
|
|
Output shape:
|
|
Same shape as the input.
|
|
|
|
def __init__(self, approximate=True, **kwargs):
|
|
super(GeLU, self).__init__(**kwargs)
|
|
self.approximate = approximate
|
|
self.supports_masking = True
|
|
|
|
def call(self, inputs):
|
|
cdf = 0.5 * (1.0 + K.tanh((np.sqrt(2 / np.pi) * (inputs + 0.044715 * K.pow(inputs, 3)))))
|
|
return inputs * cdf
|
|
|
|
def get_config(self):
|
|
config = {'approximate': self.approximate}
|
|
base_config = super(GeLU, self).get_config()
|
|
return dict(list(base_config.items()) + list(config.items()))
|
|
|
|
def compute_output_shape(self, input_shape):
|
|
return input_shape
|
|
nn.GeLU = GeLU
|
|
""" |