added support of AMD videocards

added Intel's plaidML backend to use OpenCL engine. Check new requirements.
smart choosing of backend in device.py
env var 'force_plaidML' can be choosed to forced using plaidML
all tf functions transferred to pure keras
MTCNN transferred to pure keras, but it works slow on plaidML (forced to CPU in this case)
default batch size for all models and VRAMs now 4, feel free to adjust it on your own
SAE: default style options now ZERO, because there are no best values for all scenes, set them on your own.
SAE: return back option pixel_loss, feel free to enable it on your own.
SAE: added option multiscale_decoder default is true, but you can disable it to get 100% same as H,DF,LIAEF model behaviour.
fix converter output to .png
added linux fork reference to doc/doc_build_and_repository_info.md
This commit is contained in:
iperov 2019-02-19 17:33:12 +04:00
parent 3a9d450281
commit 72ba6b103c
24 changed files with 2694 additions and 1489 deletions

344
__dev/port.py Normal file
View file

@ -0,0 +1,344 @@
#import FaceLandmarksExtractor
import numpy as np
import dlib
import torch
import keras
from keras import backend as K
from keras import layers as KL
import math
import os
import time
import code
class TorchBatchNorm2D(keras.engine.topology.Layer):
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, **kwargs):
super(TorchBatchNorm2D, self).__init__(**kwargs)
self.supports_masking = True
self.axis = axis
self.momentum = momentum
self.epsilon = epsilon
def build(self, input_shape):
dim = input_shape[self.axis]
if dim is None:
raise ValueError('Axis ' + str(self.axis) + ' of '
'input tensor should have a defined dimension '
'but the layer received an input with shape ' +
str(input_shape) + '.')
shape = (dim,)
self.gamma = self.add_weight(shape=shape, name='gamma', initializer='ones', regularizer=None, constraint=None)
self.beta = self.add_weight(shape=shape, name='beta', initializer='zeros', regularizer=None, constraint=None)
self.moving_mean = self.add_weight(shape=shape, name='moving_mean', initializer='zeros', trainable=False)
self.moving_variance = self.add_weight(shape=shape, name='moving_variance', initializer='ones', trainable=False)
self.built = True
def call(self, inputs, training=None):
input_shape = K.int_shape(inputs)
broadcast_shape = [1] * len(input_shape)
broadcast_shape[self.axis] = input_shape[self.axis]
broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape)
broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape)
broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
broadcast_beta = K.reshape(self.beta, broadcast_shape)
invstd = K.ones (shape=broadcast_shape, dtype='float32') / K.sqrt(broadcast_moving_variance + K.constant(self.epsilon, dtype='float32'))
return (inputs - broadcast_moving_mean) * invstd * broadcast_gamma + broadcast_beta
def get_config(self):
config = { 'axis': self.axis, 'momentum': self.momentum, 'epsilon': self.epsilon }
base_config = super(TorchBatchNorm2D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def t2kw_conv2d (src):
if src.bias is not None:
return [ np.moveaxis(src.weight.data.cpu().numpy(), [0,1,2,3], [3,2,0,1]), src.bias.data.cpu().numpy() ]
else:
return [ np.moveaxis(src.weight.data.cpu().numpy(), [0,1,2,3], [3,2,0,1])]
def t2kw_bn2d(src):
return [ src.weight.data.cpu().numpy(), src.bias.data.cpu().numpy(), src.running_mean.cpu().numpy(), src.running_var.cpu().numpy() ]
import face_alignment
fa = face_alignment.FaceAlignment(face_alignment.LandmarksType._2D,enable_cuda=False,enable_cudnn=False,use_cnn_face_detector=True).face_alignemnt_net
fa.eval()
def KerasConvBlock(in_planes, out_planes, input, srctorch):
out1 = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d(srctorch.bn1) )(input)
out1 = KL.Activation( keras.backend.relu ) (out1)
out1 = KL.ZeroPadding2D(padding=(1, 1), data_format='channels_first')(out1)
out1 = KL.convolutional.Conv2D( int(out_planes/2), kernel_size=3, strides=1, data_format='channels_first', padding='valid', use_bias = False, weights=t2kw_conv2d(srctorch.conv1) ) (out1)
out2 = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d(srctorch.bn2) )(out1)
out2 = KL.Activation( keras.backend.relu ) (out2)
out2 = KL.ZeroPadding2D(padding=(1, 1), data_format='channels_first')(out2)
out2 = KL.convolutional.Conv2D( int(out_planes/4), kernel_size=3, strides=1, data_format='channels_first', padding='valid', use_bias = False, weights=t2kw_conv2d(srctorch.conv2) ) (out2)
out3 = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d(srctorch.bn3) )(out2)
out3 = KL.Activation( keras.backend.relu ) (out3)
out3 = KL.ZeroPadding2D(padding=(1, 1), data_format='channels_first')(out3)
out3 = KL.convolutional.Conv2D( int(out_planes/4), kernel_size=3, strides=1, data_format='channels_first', padding='valid', use_bias = False, weights=t2kw_conv2d(srctorch.conv3) ) (out3)
out3 = KL.Concatenate(axis=1)([out1, out2, out3])
if in_planes != out_planes:
downsample = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d(srctorch.downsample[0]) )(input)
downsample = KL.Activation( keras.backend.relu ) (downsample)
downsample = KL.convolutional.Conv2D( out_planes, kernel_size=1, strides=1, data_format='channels_first', padding='valid', use_bias = False, weights=t2kw_conv2d(srctorch.downsample[2]) ) (downsample)
out3 = KL.add ( [out3, downsample] )
else:
out3 = KL.add ( [out3, input] )
return out3
def KerasHourGlass (depth, input, srctorch):
up1 = KerasConvBlock(256, 256, input, srctorch._modules['b1_%d' % (depth)])
low1 = KL.AveragePooling2D (pool_size=2, strides=2, data_format='channels_first', padding='valid' )(input)
low1 = KerasConvBlock (256, 256, low1, srctorch._modules['b2_%d' % (depth)])
if depth > 1:
low2 = KerasHourGlass (depth-1, low1, srctorch)
else:
low2 = KerasConvBlock(256, 256, low1, srctorch._modules['b2_plus_%d' % (depth)])
low3 = KerasConvBlock(256, 256, low2, srctorch._modules['b3_%d' % (depth)])
up2 = KL.UpSampling2D(size=2, data_format='channels_first') (low3)
return KL.add ( [up1, up2] )
model_path = os.path.join( os.path.dirname(__file__) , "2DFAN-4.h5" )
if os.path.exists (model_path):
t = time.time()
model = keras.models.load_model (model_path, custom_objects={'TorchBatchNorm2D': TorchBatchNorm2D} )
print ('load takes = %f' %( time.time() - t ) )
else:
_input = keras.layers.Input ( shape=(3, 256,256) )
x = KL.ZeroPadding2D(padding=(3, 3), data_format='channels_first')(_input)
x = KL.convolutional.Conv2D( 64, kernel_size=7, strides=2, data_format='channels_first', padding='valid', weights=t2kw_conv2d(fa.conv1) ) (x)
x = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d(fa.bn1) )(x)
x = KL.Activation( keras.backend.relu ) (x)
x = KerasConvBlock (64, 128, x, fa.conv2)
x = KL.AveragePooling2D (pool_size=2, strides=2, data_format='channels_first', padding='valid' ) (x)
x = KerasConvBlock (128, 128, x, fa.conv3)
x = KerasConvBlock (128, 256, x, fa.conv4)
outputs = []
previous = x
for i in range(4):
ll = KerasHourGlass (4, previous, fa._modules['m%d' % (i) ])
ll = KerasConvBlock (256,256, ll, fa._modules['top_m_%d' % (i)])
ll = KL.convolutional.Conv2D(256, kernel_size=1, strides=1, data_format='channels_first', padding='valid', weights=t2kw_conv2d( fa._modules['conv_last%d' % (i)] ) ) (ll)
ll = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d( fa._modules['bn_end%d' % (i)] ) )(ll)
ll = KL.Activation( keras.backend.relu ) (ll)
tmp_out = KL.convolutional.Conv2D(68, kernel_size=1, strides=1, data_format='channels_first', padding='valid', weights=t2kw_conv2d( fa._modules['l%d' % (i)] ) ) (ll)
outputs.append(tmp_out)
if i < 4 - 1:
ll = KL.convolutional.Conv2D(256, kernel_size=1, strides=1, data_format='channels_first', padding='valid', weights=t2kw_conv2d( fa._modules['bl%d' % (i)] ) ) (ll)
previous = KL.add ( [previous, ll, KL.convolutional.Conv2D(256, kernel_size=1, strides=1, data_format='channels_first', padding='valid', weights=t2kw_conv2d( fa._modules['al%d' % (i)] ) ) (tmp_out) ] )
model = keras.models.Model (_input, outputs)
model.compile ( loss='mse', optimizer='adam' )
model.save (model_path)
model.save_weights ( os.path.join( os.path.dirname(__file__) , 'weights.h5') )
def transform(point, center, scale, resolution, invert=False):
_pt = torch.ones(3)
_pt[0] = point[0]
_pt[1] = point[1]
h = 200.0 * scale
t = torch.eye(3)
t[0, 0] = resolution / h
t[1, 1] = resolution / h
t[0, 2] = resolution * (-center[0] / h + 0.5)
t[1, 2] = resolution * (-center[1] / h + 0.5)
if invert:
t = torch.inverse(t)
new_point = (torch.matmul(t, _pt))[0:2]
return new_point.int()
def get_preds_fromhm(hm, center=None, scale=None):
max, idx = torch.max( hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2)
idx += 1
preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float()
preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1)
preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1)
for i in range(preds.size(0)):
for j in range(preds.size(1)):
hm_ = hm[i, j, :]
pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
diff = torch.FloatTensor(
[hm_[pY, pX + 1] - hm_[pY, pX - 1],
hm_[pY + 1, pX] - hm_[pY - 1, pX]])
preds[i, j].add_(diff.sign_().mul_(.25))
preds.add_(-.5)
preds_orig = torch.zeros(preds.size())
if center is not None and scale is not None:
for i in range(hm.size(0)):
for j in range(hm.size(1)):
preds_orig[i, j] = transform(
preds[i, j], center, scale, hm.size(2), True)
return preds, preds_orig
def get_preds_fromhm2(a, center=None, scale=None):
b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) )
c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float)
c[:,0] %= a.shape[2]
c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] )
for i in range(a.shape[0]):
pX, pY = int(c[i,0]), int(c[i,1])
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] )
c[i] += np.sign(diff)*0.25
c += 0.5
result = np.empty ( (a.shape[0],2), dtype=np.int )
if center is not None and scale is not None:
for i in range(a.shape[0]):
pt = np.array ( [c[i][0], c[i][1], 1.0] )
h = 200.0 * scale
m = np.eye(3)
m[0,0] = a.shape[2] / h
m[1,1] = a.shape[2] / h
m[0,2] = a.shape[2] * ( -center[0] / h + 0.5 )
m[1,2] = a.shape[2] * ( -center[1] / h + 0.5 )
m = np.linalg.inv(m)
result[i] = np.matmul (m, pt)[0:2].astype( np.int )
return result
rnd_data = np.random.rand (3, 256,256).astype(np.float32)
#rnd_data = np.random.random_integers (2, size=(3, 256,256)).astype(np.float32)
#rnd_data = np.array ( [[[1]*256]*256]*3 , dtype=np.float32 )
input_data = np.array ([rnd_data])
fa_out_tensor = fa( torch.autograd.Variable( torch.from_numpy(input_data), volatile=True) )[-1].data.cpu()
fa_out = fa_out_tensor.numpy()
t = time.time()
m_out = model.predict ( input_data )[-1]
print ('predict takes = %f' %( time.time() - t ) )
t = time.time()
#fa_base_out = fa_base(torch.autograd.Variable( torch.from_numpy(input_data), volatile=True))[0].data.cpu().numpy()
print ( 'shapes = %s , %s , equal == %s ' % (fa_out.shape, m_out.shape, (fa_out.shape == m_out.shape) ) )
print ( 'allclose == %s' % ( np.allclose(fa_out, m_out) ) )
print ( 'total abs diff outputs = %f' % ( np.sum ( np.abs(np.ndarray.flatten(fa_out-m_out))) ))
###
d = dlib.rectangle(156,364,424,765)
center = torch.FloatTensor(
[d.right() - (d.right() - d.left()) / 2.0, d.bottom() -
(d.bottom() - d.top()) / 2.0])
center[1] = center[1] - (d.bottom() - d.top()) * 0.12
scale = (d.right() - d.left() + d.bottom() - d.top()) / 195.0
pts, pts_img = get_preds_fromhm (fa_out_tensor, center, scale)
pts_img = pts_img.view(68, 2).numpy()
###
m_pts_img = get_preds_fromhm2 (m_out[0], center, scale)
print ('pts1 == pts2 == %s' % ( np.array_equal(pts_img, m_pts_img) ) )
code.interact(local=dict(globals(), **locals()))
#print ( np.array_equal (fa_out, m_out) ) #>>> False
#code.interact(local=dict(globals(), **locals()))
#code.interact(local=locals())
#code.interact(local=locals())
###
#fa.conv1.weight = torch.nn.Parameter( torch.from_numpy ( np.array( [[[[1.0]*7]*7]*3]*64, dtype=np.float32) ) )
#fa.conv1.bias = torch.nn.Parameter( torch.from_numpy ( np.array( [1.0]*64, dtype=np.float32 ) ) )
#model.layers[2].set_weights( [ np.array( [[[[1.0]*64]*3]*7]*7, dtype=np.float32), np.array( [1.0]*64, dtype=np.float32 ) ] )
#b = np.array( [1.0]*64, dtype=np.float32 )
#b = np.random.rand (64).astype(np.float32)
#w = np.array( [[[[1.0]*7]*7]*3]*64, dtype=np.float32)
#w = np.random.rand (64, 3, 7, 7).astype(np.float32)
#s = w #fa_base.conv1.weight.data.cpu().numpy() #64x3x7x7
#d = np.moveaxis(s, [0,1,2,3], [3,2,0,1] )
#fa.conv1.weight = torch.nn.Parameter( torch.from_numpy ( w ) )
#fa.conv1.bias = torch.nn.Parameter( torch.from_numpy ( b ) )
#model.layers[2].set_weights( [np.transpose(w), b] )
#model.layers[2].set_weights( [d, b] )
'''
for i in range(0,64):
for j in range(0,128):
b = np.array_equal (fa_out[i,j], m_out[i,j])
if b == False:
print ( '%d %d == False' %(i,j) ) #>>> False
'''
'''
input = -2.7966828
gamma = 0.7640695571899414
beta = 0.22801123559474945
moving_mean = 0.12693816423416138
moving_variance = 0.10409101098775864
epsilon = 0.0 #0.00001
print ( gamma * (input - moving_mean) / math.sqrt(moving_variance + epsilon) + beta )
print ( (input - moving_mean) * (1.0 / math.sqrt(moving_variance) + epsilon)*gamma + beta )
'''
#code.interact(local=dict(globals(), **locals()))
'''
conv_64_128 = x
conv_64_128 = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d(fa.conv2.bn1) )(conv_64_128)
conv_64_128 = KL.Activation( keras.backend.relu ) (conv_64_128)
conv_64_128 = KL.ZeroPadding2D(padding=(1, 1), data_format='channels_first')(conv_64_128)
conv_64_128 = KL.convolutional.Conv2D( 64, kernel_size=3, strides=1, data_format='channels_first', padding='valid', use_bias = False, weights=t2kw_conv2d(fa.conv2.conv1) ) (conv_64_128)
conv_64_128 = TorchBatchNorm2D(axis=1, momentum=0.1, epsilon=1e-05, weights=t2kw_bn2d(fa.conv2.bn2) )(conv_64_128)
conv_64_128 = KL.Activation( keras.backend.relu ) (conv_64_128)
'''
#
#
#keras result = gamma * (input - moving_mean) / sqrt(moving_variance + epsilon) + beta
#
# (input - mean / scale_factor) / sqrt(var / scale_factor + eps)
#
#input = -3.0322433
#
#gamma = 0.1859646
#beta = -0.17041835
#moving_mean = -3.0345056
#moving_variance = 8.773307
#epsilon = 0.00001
#
#result = - 0.17027631
#
# fa result = 1.930317

1282
__dev/test.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
DeepFaceLab officially supports Windows-only. If you want to support Mac/Linux/Docker - create a fork, it will be referenced here.
[Linux fork](https://github.com/lbfs/DeepFaceLab_Linux) by @lbfs
#### **Installing dlib on Windows**

View file

@ -3,15 +3,11 @@ import os
import cv2
from pathlib import Path
from .mtcnn import *
from nnlib import nnlib
class MTCExtractor(object):
def __init__(self, keras, tf, tf_session):
def __init__(self):
self.scale_to = 1920
self.keras = keras
self.tf = tf
self.tf_session = tf_session
self.min_face_size = self.scale_to * 0.042
self.thresh1 = 0.7
@ -19,25 +15,72 @@ class MTCExtractor(object):
self.thresh3 = 0.6
self.scale_factor = 0.95
exec( nnlib.import_all(), locals(), globals() )
PNet_Input = Input ( (None, None,3) )
x = PNet_Input
x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
x = PReLU (shared_axes=[1,2], name="PReLU1" )(x)
x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x)
x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
x = PReLU (shared_axes=[1,2], name="PReLU2" )(x)
x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x)
x = PReLU (shared_axes=[1,2], name="PReLU3" )(x)
prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x)
prob = Softmax()(prob)
x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x)
PNet_model = Model(PNet_Input, [x,prob] )
PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() )
RNet_Input = Input ( (24, 24, 3) )
x = RNet_Input
x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
x = PReLU (shared_axes=[1,2], name="prelu1" )(x)
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x)
x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
x = PReLU (shared_axes=[1,2], name="prelu2" )(x)
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x)
x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x)
x = PReLU (shared_axes=[1,2], name="prelu3" )(x)
x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x)
x = Dense (128, name='conv4')(x)
x = PReLU (name="prelu4" )(x)
prob = Dense (2, name='conv51')(x)
prob = Softmax()(prob)
x = Dense (4, name='conv52')(x)
RNet_model = Model(RNet_Input, [x,prob] )
RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() )
ONet_Input = Input ( (48, 48, 3) )
x = ONet_Input
x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
x = PReLU (shared_axes=[1,2], name="prelu1" )(x)
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x)
x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
x = PReLU (shared_axes=[1,2], name="prelu2" )(x)
x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x)
x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x)
x = PReLU (shared_axes=[1,2], name="prelu3" )(x)
x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x)
x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x)
x = PReLU (shared_axes=[1,2], name="prelu4" )(x)
x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x)
x = Dense (256, name='conv5')(x)
x = PReLU (name="prelu5" )(x)
prob = Dense (2, name='conv61')(x)
prob = Softmax()(prob)
x1 = Dense (4, name='conv62')(x)
x2 = Dense (10, name='conv63')(x)
ONet_model = Model(ONet_Input, [x1,x2,prob] )
ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() )
self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs )
self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs )
self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs )
def __enter__(self):
with self.tf.variable_scope('pnet2'):
data = self.tf.placeholder(self.tf.float32, (None,None,None,3), 'input')
pnet2 = PNet(self.tf, {'data':data})
pnet2.load(str(Path(__file__).parent/'det1.npy'), self.tf_session)
with self.tf.variable_scope('rnet2'):
data = self.tf.placeholder(self.tf.float32, (None,24,24,3), 'input')
rnet2 = RNet(self.tf, {'data':data})
rnet2.load(str(Path(__file__).parent/'det2.npy'), self.tf_session)
with self.tf.variable_scope('onet2'):
data = self.tf.placeholder(self.tf.float32, (None,48,48,3), 'input')
onet2 = ONet(self.tf, {'data':data})
onet2.load(str(Path(__file__).parent/'det3.npy'), self.tf_session)
self.pnet_fun = self.keras.backend.function([pnet2.layers['data']],[pnet2.layers['conv4-2'], pnet2.layers['prob1']])
self.rnet_fun = self.keras.backend.function([rnet2.layers['data']],[rnet2.layers['conv5-2'], rnet2.layers['prob1']])
self.onet_fun = self.keras.backend.function([onet2.layers['data']],[onet2.layers['conv6-2'], onet2.layers['conv6-3'], onet2.layers['prob1']])
faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor )
return self
def __exit__(self, exc_type=None, exc_value=None, traceback=None):
@ -47,7 +90,6 @@ class MTCExtractor(object):
input_image = input_image[:,:,::-1].copy()
(h, w, ch) = input_image.shape
input_scale = self.scale_to / (w if w > h else h)
input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR)
@ -56,3 +98,249 @@ class MTCExtractor(object):
return detected_faces
def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
"""Detects faces in an image, and returns bounding boxes and points for them.
img: input image
minsize: minimum faces' size
pnet, rnet, onet: caffemodel
threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
"""
factor_count=0
total_boxes=np.empty((0,9))
points=np.empty(0)
h=img.shape[0]
w=img.shape[1]
minl=np.amin([h, w])
m=12.0/minsize
minl=minl*m
# create scale pyramid
scales=[]
while minl>=12:
scales += [m*np.power(factor, factor_count)]
minl = minl*factor
factor_count += 1
# first stage
for scale in scales:
hs=int(np.ceil(h*scale))
ws=int(np.ceil(w*scale))
#print ('scale %f %d %d' % (scale, ws,hs))
im_data = imresample(img, (hs, ws))
im_data = (im_data-127.5)*0.0078125
img_x = np.expand_dims(im_data, 0)
img_y = np.transpose(img_x, (0,2,1,3))
out = pnet([img_y])
out0 = np.transpose(out[0], (0,2,1,3))
out1 = np.transpose(out[1], (0,2,1,3))
boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
# inter-scale nms
pick = nms(boxes.copy(), 0.5, 'Union')
if boxes.size>0 and pick.size>0:
boxes = boxes[pick,:]
total_boxes = np.append(total_boxes, boxes, axis=0)
numbox = total_boxes.shape[0]
if numbox>0:
pick = nms(total_boxes.copy(), 0.7, 'Union')
total_boxes = total_boxes[pick,:]
regw = total_boxes[:,2]-total_boxes[:,0]
regh = total_boxes[:,3]-total_boxes[:,1]
qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
total_boxes = rerec(total_boxes.copy())
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
numbox = total_boxes.shape[0]
if numbox>0:
# second stage
tempimg = np.zeros((24,24,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = imresample(tmp, (24, 24))
else:
return np.empty()
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
out = rnet([tempimg1])
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
score = out1[1,:]
ipass = np.where(score>threshold[1])
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
if total_boxes.shape[0]>0:
pick = nms(total_boxes, 0.7, 'Union')
total_boxes = total_boxes[pick,:]
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
total_boxes = rerec(total_boxes.copy())
numbox = total_boxes.shape[0]
if numbox>0:
# third stage
total_boxes = np.fix(total_boxes).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
tempimg = np.zeros((48,48,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = imresample(tmp, (48, 48))
else:
return np.empty()
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
out = onet([tempimg1])
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
out2 = np.transpose(out[2])
score = out2[1,:]
points = out1
ipass = np.where(score>threshold[2])
points = points[:,ipass[0]]
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
w = total_boxes[:,2]-total_boxes[:,0]+1
h = total_boxes[:,3]-total_boxes[:,1]+1
points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
if total_boxes.shape[0]>0:
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
pick = nms(total_boxes.copy(), 0.7, 'Min')
total_boxes = total_boxes[pick,:]
points = points[:,pick]
return total_boxes, points
# function [boundingbox] = bbreg(boundingbox,reg)
def bbreg(boundingbox,reg):
"""Calibrate bounding boxes"""
if reg.shape[1]==1:
reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
w = boundingbox[:,2]-boundingbox[:,0]+1
h = boundingbox[:,3]-boundingbox[:,1]+1
b1 = boundingbox[:,0]+reg[:,0]*w
b2 = boundingbox[:,1]+reg[:,1]*h
b3 = boundingbox[:,2]+reg[:,2]*w
b4 = boundingbox[:,3]+reg[:,3]*h
boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
return boundingbox
def generateBoundingBox(imap, reg, scale, t):
"""Use heatmap to generate bounding boxes"""
stride=2
cellsize=12
imap = np.transpose(imap)
dx1 = np.transpose(reg[:,:,0])
dy1 = np.transpose(reg[:,:,1])
dx2 = np.transpose(reg[:,:,2])
dy2 = np.transpose(reg[:,:,3])
y, x = np.where(imap >= t)
if y.shape[0]==1:
dx1 = np.flipud(dx1)
dy1 = np.flipud(dy1)
dx2 = np.flipud(dx2)
dy2 = np.flipud(dy2)
score = imap[(y,x)]
reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
if reg.size==0:
reg = np.empty((0,3))
bb = np.transpose(np.vstack([y,x]))
q1 = np.fix((stride*bb+1)/scale)
q2 = np.fix((stride*bb+cellsize-1+1)/scale)
boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
return boundingbox, reg
# function pick = nms(boxes,threshold,type)
def nms(boxes, threshold, method):
if boxes.size==0:
return np.empty((0,3))
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
s = boxes[:,4]
area = (x2-x1+1) * (y2-y1+1)
I = np.argsort(s)
pick = np.zeros_like(s, dtype=np.int16)
counter = 0
while I.size>0:
i = I[-1]
pick[counter] = i
counter += 1
idx = I[0:-1]
xx1 = np.maximum(x1[i], x1[idx])
yy1 = np.maximum(y1[i], y1[idx])
xx2 = np.minimum(x2[i], x2[idx])
yy2 = np.minimum(y2[i], y2[idx])
w = np.maximum(0.0, xx2-xx1+1)
h = np.maximum(0.0, yy2-yy1+1)
inter = w * h
if method is 'Min':
o = inter / np.minimum(area[i], area[idx])
else:
o = inter / (area[i] + area[idx] - inter)
I = I[np.where(o<=threshold)]
pick = pick[0:counter]
return pick
# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
def pad(total_boxes, w, h):
"""Compute the padding coordinates (pad the bounding boxes to square)"""
tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
numbox = total_boxes.shape[0]
dx = np.ones((numbox), dtype=np.int32)
dy = np.ones((numbox), dtype=np.int32)
edx = tmpw.copy().astype(np.int32)
edy = tmph.copy().astype(np.int32)
x = total_boxes[:,0].copy().astype(np.int32)
y = total_boxes[:,1].copy().astype(np.int32)
ex = total_boxes[:,2].copy().astype(np.int32)
ey = total_boxes[:,3].copy().astype(np.int32)
tmp = np.where(ex>w)
edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
ex[tmp] = w
tmp = np.where(ey>h)
edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
ey[tmp] = h
tmp = np.where(x<1)
dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
x[tmp] = 1
tmp = np.where(y<1)
dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
y[tmp] = 1
return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
# function [bboxA] = rerec(bboxA)
def rerec(bboxA):
"""Convert bboxA to square."""
h = bboxA[:,3]-bboxA[:,1]
w = bboxA[:,2]-bboxA[:,0]
l = np.maximum(w, h)
bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
return bboxA
def imresample(img, sz):
im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable
return im_data

Binary file not shown.

View file

@ -1,761 +0,0 @@
# Source: https://github.com/davidsandberg/facenet/blob/master/src/align/
""" Tensorflow implementation of the face detection / alignment algorithm found at
https://github.com/kpzhang93/MTCNN_face_detection_alignment
"""
# MIT License
#
# Copyright (c) 2016 David Sandberg
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from six import string_types, iteritems
import numpy as np
#from math import floor
import cv2
import os
def layer(op):
"""Decorator for composable network layers."""
def layer_decorated(self, *args, **kwargs):
# Automatically set a name if not provided.
name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
# Figure out the layer inputs.
if len(self.terminals) == 0:
raise RuntimeError('No input variables found for layer %s.' % name)
elif len(self.terminals) == 1:
layer_input = self.terminals[0]
else:
layer_input = list(self.terminals)
# Perform the operation and get the output.
layer_output = op(self, layer_input, *args, **kwargs)
# Add to layer LUT.
self.layers[name] = layer_output
# This output is now the input for the next layer.
self.feed(layer_output)
# Return self for chained calls.
return self
return layer_decorated
class Network(object):
def __init__(self, tf, inputs, trainable=True):
# The input nodes for this network
self.tf = tf
self.inputs = inputs
# The current list of terminal nodes
self.terminals = []
# Mapping from layer names to layers
self.layers = dict(inputs)
# If true, the resulting variables are set as trainable
self.trainable = trainable
self.setup()
def setup(self):
"""Construct the network. """
raise NotImplementedError('Must be implemented by the subclass.')
def load(self, data_path, session, ignore_missing=False):
"""Load network weights.
data_path: The path to the numpy-serialized network weights
session: The current TensorFlow session
ignore_missing: If true, serialized weights for missing layers are ignored.
"""
data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
for op_name in data_dict:
with self.tf.variable_scope(op_name, reuse=True):
for param_name, data in iteritems(data_dict[op_name]):
try:
var = self.tf.get_variable(param_name)
session.run(var.assign(data))
except ValueError:
if not ignore_missing:
raise
def feed(self, *args):
"""Set the input(s) for the next operation by replacing the terminal nodes.
The arguments can be either layer names or the actual layers.
"""
assert len(args) != 0
self.terminals = []
for fed_layer in args:
if isinstance(fed_layer, string_types):
try:
fed_layer = self.layers[fed_layer]
except KeyError:
raise KeyError('Unknown layer name fed: %s' % fed_layer)
self.terminals.append(fed_layer)
return self
def get_output(self):
"""Returns the current network output."""
return self.terminals[-1]
def get_unique_name(self, prefix):
"""Returns an index-suffixed unique name for the given prefix.
This is used for auto-generating layer names based on the type-prefix.
"""
ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
return '%s_%d' % (prefix, ident)
def make_var(self, name, shape):
"""Creates a new TensorFlow variable."""
return self.tf.get_variable(name, shape, trainable=self.trainable)
def validate_padding(self, padding):
"""Verifies that the padding is one of the supported ones."""
assert padding in ('SAME', 'VALID')
@layer
def conv(self,
inp,
k_h,
k_w,
c_o,
s_h,
s_w,
name,
relu=True,
padding='SAME',
group=1,
biased=True):
# Verify that the padding is acceptable
self.validate_padding(padding)
# Get the number of channels in the input
c_i = int(inp.get_shape()[-1])
# Verify that the grouping parameter is valid
assert c_i % group == 0
assert c_o % group == 0
# Convolution for a given input and kernel
convolve = lambda i, k: self.tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
with self.tf.variable_scope(name) as scope:
kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
# This is the common-case. Convolve the input without any further complications.
output = convolve(inp, kernel)
# Add the biases
if biased:
biases = self.make_var('biases', [c_o])
output = self.tf.nn.bias_add(output, biases)
if relu:
# ReLU non-linearity
output = self.tf.nn.relu(output, name=scope.name)
return output
@layer
def prelu(self, inp, name):
with self.tf.variable_scope(name):
i = int(inp.get_shape()[-1])
alpha = self.make_var('alpha', shape=(i,))
output = self.tf.nn.relu(inp) + self.tf.multiply(alpha, -self.tf.nn.relu(-inp))
return output
@layer
def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
self.validate_padding(padding)
return self.tf.nn.max_pool(inp,
ksize=[1, k_h, k_w, 1],
strides=[1, s_h, s_w, 1],
padding=padding,
name=name)
@layer
def fc(self, inp, num_out, name, relu=True):
with self.tf.variable_scope(name):
input_shape = inp.get_shape()
if input_shape.ndims == 4:
# The input is spatial. Vectorize it first.
dim = 1
for d in input_shape[1:].as_list():
dim *= int(d)
feed_in = self.tf.reshape(inp, [-1, dim])
else:
feed_in, dim = (inp, input_shape[-1].value)
weights = self.make_var('weights', shape=[dim, num_out])
biases = self.make_var('biases', [num_out])
op = self.tf.nn.relu_layer if relu else self.tf.nn.xw_plus_b
fc = op(feed_in, weights, biases, name=name)
return fc
"""
Multi dimensional softmax,
refer to https://github.com/tensorflow/tensorflow/issues/210
compute softmax along the dimension of target
the native softmax only supports batch_size x dimension
"""
@layer
def softmax(self, target, axis, name=None):
max_axis = self.tf.reduce_max(target, axis, keepdims=True)
target_exp = self.tf.exp(target-max_axis)
normalize = self.tf.reduce_sum(target_exp, axis, keepdims=True)
softmax = self.tf.div(target_exp, normalize, name)
return softmax
class PNet(Network):
def setup(self):
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
.conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
.prelu(name='PReLU1')
.max_pool(2, 2, 2, 2, name='pool1')
.conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
.prelu(name='PReLU2')
.conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
.prelu(name='PReLU3')
.conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
.softmax(3,name='prob1'))
(self.feed('PReLU3') #pylint: disable=no-value-for-parameter
.conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
class RNet(Network):
def setup(self):
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
.conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
.prelu(name='prelu1')
.max_pool(3, 3, 2, 2, name='pool1')
.conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
.prelu(name='prelu2')
.max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
.conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
.prelu(name='prelu3')
.fc(128, relu=False, name='conv4')
.prelu(name='prelu4')
.fc(2, relu=False, name='conv5-1')
.softmax(1,name='prob1'))
(self.feed('prelu4') #pylint: disable=no-value-for-parameter
.fc(4, relu=False, name='conv5-2'))
class ONet(Network):
def setup(self):
(self.feed('data') #pylint: disable=no-value-for-parameter, no-member
.conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
.prelu(name='prelu1')
.max_pool(3, 3, 2, 2, name='pool1')
.conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
.prelu(name='prelu2')
.max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
.conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
.prelu(name='prelu3')
.max_pool(2, 2, 2, 2, name='pool3')
.conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
.prelu(name='prelu4')
.fc(256, relu=False, name='conv5')
.prelu(name='prelu5')
.fc(2, relu=False, name='conv6-1')
.softmax(1, name='prob1'))
(self.feed('prelu5') #pylint: disable=no-value-for-parameter
.fc(4, relu=False, name='conv6-2'))
(self.feed('prelu5') #pylint: disable=no-value-for-parameter
.fc(10, relu=False, name='conv6-3'))
def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
"""Detects faces in an image, and returns bounding boxes and points for them.
img: input image
minsize: minimum faces' size
pnet, rnet, onet: caffemodel
threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
"""
factor_count=0
total_boxes=np.empty((0,9))
points=np.empty(0)
h=img.shape[0]
w=img.shape[1]
minl=np.amin([h, w])
m=12.0/minsize
minl=minl*m
# create scale pyramid
scales=[]
while minl>=12:
scales += [m*np.power(factor, factor_count)]
minl = minl*factor
factor_count += 1
# first stage
for scale in scales:
hs=int(np.ceil(h*scale))
ws=int(np.ceil(w*scale))
#print ('scale %f %d %d' % (scale, ws,hs))
im_data = imresample(img, (hs, ws))
im_data = (im_data-127.5)*0.0078125
img_x = np.expand_dims(im_data, 0)
img_y = np.transpose(img_x, (0,2,1,3))
out = pnet([img_y])
out0 = np.transpose(out[0], (0,2,1,3))
out1 = np.transpose(out[1], (0,2,1,3))
boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
# inter-scale nms
pick = nms(boxes.copy(), 0.5, 'Union')
if boxes.size>0 and pick.size>0:
boxes = boxes[pick,:]
total_boxes = np.append(total_boxes, boxes, axis=0)
numbox = total_boxes.shape[0]
if numbox>0:
pick = nms(total_boxes.copy(), 0.7, 'Union')
total_boxes = total_boxes[pick,:]
regw = total_boxes[:,2]-total_boxes[:,0]
regh = total_boxes[:,3]-total_boxes[:,1]
qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
total_boxes = rerec(total_boxes.copy())
total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
numbox = total_boxes.shape[0]
if numbox>0:
# second stage
tempimg = np.zeros((24,24,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = imresample(tmp, (24, 24))
else:
return np.empty()
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
out = rnet([tempimg1])
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
score = out1[1,:]
ipass = np.where(score>threshold[1])
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
if total_boxes.shape[0]>0:
pick = nms(total_boxes, 0.7, 'Union')
total_boxes = total_boxes[pick,:]
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
total_boxes = rerec(total_boxes.copy())
numbox = total_boxes.shape[0]
if numbox>0:
# third stage
total_boxes = np.fix(total_boxes).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
tempimg = np.zeros((48,48,3,numbox))
for k in range(0,numbox):
tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
tempimg[:,:,:,k] = imresample(tmp, (48, 48))
else:
return np.empty()
tempimg = (tempimg-127.5)*0.0078125
tempimg1 = np.transpose(tempimg, (3,1,0,2))
out = onet([tempimg1])
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
out2 = np.transpose(out[2])
score = out2[1,:]
points = out1
ipass = np.where(score>threshold[2])
points = points[:,ipass[0]]
total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
mv = out0[:,ipass[0]]
w = total_boxes[:,2]-total_boxes[:,0]+1
h = total_boxes[:,3]-total_boxes[:,1]+1
points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
if total_boxes.shape[0]>0:
total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
pick = nms(total_boxes.copy(), 0.7, 'Min')
total_boxes = total_boxes[pick,:]
points = points[:,pick]
return total_boxes, points
def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
"""Detects faces in a list of images
images: list containing input images
detection_window_size_ratio: ratio of minimum face size to smallest image dimension
pnet, rnet, onet: caffemodel
threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
"""
all_scales = [None] * len(images)
images_with_boxes = [None] * len(images)
for i in range(len(images)):
images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
# create scale pyramid
for index, img in enumerate(images):
all_scales[index] = []
h = img.shape[0]
w = img.shape[1]
minsize = int(detection_window_size_ratio * np.minimum(w, h))
factor_count = 0
minl = np.amin([h, w])
if minsize <= 12:
minsize = 12
m = 12.0 / minsize
minl = minl * m
while minl >= 12:
all_scales[index].append(m * np.power(factor, factor_count))
minl = minl * factor
factor_count += 1
# # # # # # # # # # # # #
# first stage - fast proposal network (pnet) to obtain face candidates
# # # # # # # # # # # # #
images_obj_per_resolution = {}
# TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
for index, scales in enumerate(all_scales):
h = images[index].shape[0]
w = images[index].shape[1]
for scale in scales:
hs = int(np.ceil(h * scale))
ws = int(np.ceil(w * scale))
if (ws, hs) not in images_obj_per_resolution:
images_obj_per_resolution[(ws, hs)] = []
im_data = imresample(images[index], (hs, ws))
im_data = (im_data - 127.5) * 0.0078125
img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering
images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
for resolution in images_obj_per_resolution:
images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
outs = pnet(images_per_resolution)
for index in range(len(outs[0])):
scale = images_obj_per_resolution[resolution][index]['scale']
image_index = images_obj_per_resolution[resolution][index]['index']
out0 = np.transpose(outs[0][index], (1, 0, 2))
out1 = np.transpose(outs[1][index], (1, 0, 2))
boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
# inter-scale nms
pick = nms(boxes.copy(), 0.5, 'Union')
if boxes.size > 0 and pick.size > 0:
boxes = boxes[pick, :]
images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
boxes,
axis=0)
for index, image_obj in enumerate(images_with_boxes):
numbox = image_obj['total_boxes'].shape[0]
if numbox > 0:
h = images[index].shape[0]
w = images[index].shape[1]
pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
numbox = image_obj['total_boxes'].shape[0]
tempimg = np.zeros((24, 24, 3, numbox))
if numbox > 0:
for k in range(0, numbox):
tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
tempimg[:, :, :, k] = imresample(tmp, (24, 24))
else:
return np.empty()
tempimg = (tempimg - 127.5) * 0.0078125
image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
# # # # # # # # # # # # #
# second stage - refinement of face candidates with rnet
# # # # # # # # # # # # #
bulk_rnet_input = np.empty((0, 24, 24, 3))
for index, image_obj in enumerate(images_with_boxes):
if 'rnet_input' in image_obj:
bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
out = rnet(bulk_rnet_input)
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
score = out1[1, :]
i = 0
for index, image_obj in enumerate(images_with_boxes):
if 'rnet_input' not in image_obj:
continue
rnet_input_count = image_obj['rnet_input'].shape[0]
score_per_image = score[i:i + rnet_input_count]
out0_per_image = out0[:, i:i + rnet_input_count]
ipass = np.where(score_per_image > threshold[1])
image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
np.expand_dims(score_per_image[ipass].copy(), 1)])
mv = out0_per_image[:, ipass[0]]
if image_obj['total_boxes'].shape[0] > 0:
h = images[index].shape[0]
w = images[index].shape[1]
pick = nms(image_obj['total_boxes'], 0.7, 'Union')
image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
numbox = image_obj['total_boxes'].shape[0]
if numbox > 0:
tempimg = np.zeros((48, 48, 3, numbox))
image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
for k in range(0, numbox):
tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
tempimg[:, :, :, k] = imresample(tmp, (48, 48))
else:
return np.empty()
tempimg = (tempimg - 127.5) * 0.0078125
image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
i += rnet_input_count
# # # # # # # # # # # # #
# third stage - further refinement and facial landmarks positions with onet
# # # # # # # # # # # # #
bulk_onet_input = np.empty((0, 48, 48, 3))
for index, image_obj in enumerate(images_with_boxes):
if 'onet_input' in image_obj:
bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
out = onet(bulk_onet_input)
out0 = np.transpose(out[0])
out1 = np.transpose(out[1])
out2 = np.transpose(out[2])
score = out2[1, :]
points = out1
i = 0
ret = []
for index, image_obj in enumerate(images_with_boxes):
if 'onet_input' not in image_obj:
ret.append(None)
continue
onet_input_count = image_obj['onet_input'].shape[0]
out0_per_image = out0[:, i:i + onet_input_count]
score_per_image = score[i:i + onet_input_count]
points_per_image = points[:, i:i + onet_input_count]
ipass = np.where(score_per_image > threshold[2])
points_per_image = points_per_image[:, ipass[0]]
image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
np.expand_dims(score_per_image[ipass].copy(), 1)])
mv = out0_per_image[:, ipass[0]]
w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
image_obj['total_boxes'][:, 0], (5, 1)) - 1
points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
image_obj['total_boxes'][:, 1], (5, 1)) - 1
if image_obj['total_boxes'].shape[0] > 0:
image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
points_per_image = points_per_image[:, pick]
ret.append((image_obj['total_boxes'], points_per_image))
else:
ret.append(None)
i += onet_input_count
return ret
# function [boundingbox] = bbreg(boundingbox,reg)
def bbreg(boundingbox,reg):
"""Calibrate bounding boxes"""
if reg.shape[1]==1:
reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
w = boundingbox[:,2]-boundingbox[:,0]+1
h = boundingbox[:,3]-boundingbox[:,1]+1
b1 = boundingbox[:,0]+reg[:,0]*w
b2 = boundingbox[:,1]+reg[:,1]*h
b3 = boundingbox[:,2]+reg[:,2]*w
b4 = boundingbox[:,3]+reg[:,3]*h
boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
return boundingbox
def generateBoundingBox(imap, reg, scale, t):
"""Use heatmap to generate bounding boxes"""
stride=2
cellsize=12
imap = np.transpose(imap)
dx1 = np.transpose(reg[:,:,0])
dy1 = np.transpose(reg[:,:,1])
dx2 = np.transpose(reg[:,:,2])
dy2 = np.transpose(reg[:,:,3])
y, x = np.where(imap >= t)
if y.shape[0]==1:
dx1 = np.flipud(dx1)
dy1 = np.flipud(dy1)
dx2 = np.flipud(dx2)
dy2 = np.flipud(dy2)
score = imap[(y,x)]
reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
if reg.size==0:
reg = np.empty((0,3))
bb = np.transpose(np.vstack([y,x]))
q1 = np.fix((stride*bb+1)/scale)
q2 = np.fix((stride*bb+cellsize-1+1)/scale)
boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
return boundingbox, reg
# function pick = nms(boxes,threshold,type)
def nms(boxes, threshold, method):
if boxes.size==0:
return np.empty((0,3))
x1 = boxes[:,0]
y1 = boxes[:,1]
x2 = boxes[:,2]
y2 = boxes[:,3]
s = boxes[:,4]
area = (x2-x1+1) * (y2-y1+1)
I = np.argsort(s)
pick = np.zeros_like(s, dtype=np.int16)
counter = 0
while I.size>0:
i = I[-1]
pick[counter] = i
counter += 1
idx = I[0:-1]
xx1 = np.maximum(x1[i], x1[idx])
yy1 = np.maximum(y1[i], y1[idx])
xx2 = np.minimum(x2[i], x2[idx])
yy2 = np.minimum(y2[i], y2[idx])
w = np.maximum(0.0, xx2-xx1+1)
h = np.maximum(0.0, yy2-yy1+1)
inter = w * h
if method is 'Min':
o = inter / np.minimum(area[i], area[idx])
else:
o = inter / (area[i] + area[idx] - inter)
I = I[np.where(o<=threshold)]
pick = pick[0:counter]
return pick
# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
def pad(total_boxes, w, h):
"""Compute the padding coordinates (pad the bounding boxes to square)"""
tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
numbox = total_boxes.shape[0]
dx = np.ones((numbox), dtype=np.int32)
dy = np.ones((numbox), dtype=np.int32)
edx = tmpw.copy().astype(np.int32)
edy = tmph.copy().astype(np.int32)
x = total_boxes[:,0].copy().astype(np.int32)
y = total_boxes[:,1].copy().astype(np.int32)
ex = total_boxes[:,2].copy().astype(np.int32)
ey = total_boxes[:,3].copy().astype(np.int32)
tmp = np.where(ex>w)
edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
ex[tmp] = w
tmp = np.where(ey>h)
edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
ey[tmp] = h
tmp = np.where(x<1)
dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
x[tmp] = 1
tmp = np.where(y<1)
dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
y[tmp] = 1
return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
# function [bboxA] = rerec(bboxA)
def rerec(bboxA):
"""Convert bboxA to square."""
h = bboxA[:,3]-bboxA[:,1]
w = bboxA[:,2]-bboxA[:,0]
l = np.maximum(w, h)
bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
return bboxA
def imresample(img, sz):
im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable
return im_data
# This method is kept for debugging purpose
# h=img.shape[0]
# w=img.shape[1]
# hs, ws = sz
# dx = float(w) / ws
# dy = float(h) / hs
# im_data = np.zeros((hs,ws,3))
# for a1 in range(0,hs):
# for a2 in range(0,ws):
# for a3 in range(0,3):
# im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
# return im_data

BIN
facelib/mtcnn_pnet.h5 Normal file

Binary file not shown.

View file

@ -137,6 +137,8 @@ if __name__ == "__main__":
if arguments.tf_suppress_std:
os.environ['TF_SUPPRESS_STD'] = '1'
#os.environ['force_plaidML'] = '1'
arguments.func(arguments)
print ("Done.")

View file

@ -149,7 +149,8 @@ class ConvertSubprocessor(SubprocessorBase):
files_processed = 1
faces_processed = 0
output_filename_path = self.output_path / filename_path.name
output_filename_path = self.output_path / (filename_path.stem + '.png')
if self.converter.get_mode() == ConverterBase.MODE_FACE and filename_path.stem not in self.alignments.keys():
if not self.debug:
print ( 'no faces found for %s, copying without faces' % (filename_path.name) )

View file

@ -62,23 +62,35 @@ class ExtractSubprocessor(SubprocessorBase):
cv2.setMouseCallback(self.wnd_name, onMouse, self.param)
def get_devices_for_type (self, type, multi_gpu):
if (type == 'rects' or type == 'landmarks'):
def get_devices_for_type (self, type, multi_gpu, cpu_only):
if not cpu_only and (type == 'rects' or type == 'landmarks'):
if type == 'rects' and self.detector == 'mt' and nnlib.device.backend == "plaidML":
cpu_only = True
else:
if multi_gpu:
devices = nnlib.device.getDevicesWithAtLeastTotalMemoryGB(2)
devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2)
if not multi_gpu or len(devices) == 0:
devices = [nnlib.device.getBestDeviceIdx()]
devices = [nnlib.device.getBestValidDeviceIdx()]
if len(devices) == 0:
devices = [0]
devices = [ (idx, nnlib.device.getDeviceName(idx), nnlib.device.getDeviceVRAMTotalGb(idx) ) for idx in devices]
for idx in devices:
dev_name = nnlib.device.getDeviceName(idx)
dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx)
elif type == 'final':
devices = [ (i, 'CPU%d' % (i), 0 ) for i in range(0, multiprocessing.cpu_count()) ]
if not self.manual and self.type == 'rects' and self.detector == 'mt':
for i in range ( int (max (1, dev_vram / 2) ) ):
yield (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram)
else:
yield (idx, 'GPU', dev_name, dev_vram)
return devices
if cpu_only and (type == 'rects' or type == 'landmarks'):
for i in range( min(8, multiprocessing.cpu_count() // 2) ):
yield (i, 'CPU', 'CPU%d' % (i), 0 )
if type == 'final':
for i in range( min(8, multiprocessing.cpu_count()) ):
yield (i, 'CPU', 'CPU%d' % (i), 0 )
#override
def process_info_generator(self):
@ -89,31 +101,13 @@ class ExtractSubprocessor(SubprocessorBase):
'output_dir': str(self.output_path),
'detector': self.detector}
if not self.cpu_only:
for (device_idx, device_name, device_total_vram_gb) in self.get_devices_for_type(self.type, self.multi_gpu):
num_processes = 1
if not self.manual and self.type == 'rects' and self.detector == 'mt':
num_processes = int ( max (1, device_total_vram_gb / 2) )
for i in range(0, num_processes ):
for (device_idx, device_type, device_name, device_total_vram_gb) in self.get_devices_for_type(self.type, self.multi_gpu, self.cpu_only):
client_dict = base_dict.copy()
client_dict['device_idx'] = device_idx
client_dict['device_name'] = device_name if num_processes == 1 else '%s #%d' % (device_name,i)
client_dict['device_type'] = 'GPU'
client_dict['device_name'] = device_name
client_dict['device_type'] = device_type
yield client_dict['device_name'], {}, client_dict
else:
num_processes = 1
if not self.manual and self.type == 'rects' and self.detector == 'mt':
num_processes = int ( max (1, multiprocessing.cpu_count() / 2 ) )
for i in range(0, num_processes ):
client_dict = base_dict.copy()
client_dict['device_idx'] = 0
client_dict['device_name'] = 'CPU' if num_processes == 1 else 'CPU #%d' % (i),
client_dict['device_type'] = 'CPU'
yield client_dict['device_name'], {}, client_dict
#override
def get_no_process_started_message(self):
@ -265,13 +259,12 @@ class ExtractSubprocessor(SubprocessorBase):
self.detector = client_dict['detector']
self.e = None
device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True)
if self.type == 'rects':
if self.detector is not None:
if self.detector == 'mt':
nnlib.import_all (device_config)
self.e = facelib.MTCExtractor(nnlib.keras, nnlib.tf, nnlib.tf_sess)
self.e = facelib.MTCExtractor()
elif self.detector == 'dlib':
nnlib.import_dlib (device_config)
self.e = facelib.DLIBExtractor(nnlib.dlib)

View file

@ -22,7 +22,7 @@ class ModelBase(object):
def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, debug = False, force_gpu_idx=-1, **in_options):
if force_gpu_idx == -1:
idxs_names_list = nnlib.device.getAllDevicesIdxsWithNamesList()
idxs_names_list = nnlib.device.getValidDevicesIdxsWithNamesList()
if len(idxs_names_list) > 1:
print ("You have multi GPUs in a system: ")
for idx, name in idxs_names_list:

View file

@ -16,14 +16,14 @@ class Model(ModelBase):
def onInitializeOptions(self, is_first_run, ask_override):
if is_first_run or ask_override:
def_pixel_loss = self.options.get('pixel_loss', False)
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and remove face jitter.")
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and decrease face jitter.")
else:
self.options['pixel_loss'] = self.options.get('pixel_loss', False)
#override
def onInitialize(self, **in_options):
exec(nnlib.import_all(), locals(), globals())
self.set_vram_batch_requirements( {4.5:4,5:6,6:8,7:16,8:24,9:24,10:32,11:32,12:32,13:48} )
self.set_vram_batch_requirements( {4.5:4} )
ae_input_layer = Input(shape=(128, 128, 3))
mask_layer = Input(shape=(128, 128, 1)) #same as output

View file

@ -24,14 +24,14 @@ class Model(ModelBase):
if is_first_run or ask_override:
def_pixel_loss = self.options.get('pixel_loss', False)
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and remove face jitter.")
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and decrease face jitter.")
else:
self.options['pixel_loss'] = self.options.get('pixel_loss', False)
#override
def onInitialize(self, **in_options):
exec(nnlib.import_all(), locals(), globals())
self.set_vram_batch_requirements( {2.5:2,3:2,4:2,4:4,5:8,6:12,7:16,8:16,9:24,10:24,11:32,12:32,13:48} )
self.set_vram_batch_requirements( {2.5:4} )
bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] )
if not self.is_first_run():

View file

@ -24,14 +24,14 @@ class Model(ModelBase):
if is_first_run or ask_override:
def_pixel_loss = self.options.get('pixel_loss', False)
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and remove face jitter.")
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and decrease face jitter.")
else:
self.options['pixel_loss'] = self.options.get('pixel_loss', False)
#override
def onInitialize(self, **in_options):
exec(nnlib.import_all(), locals(), globals())
self.set_vram_batch_requirements( {1.5:2,2:2,3:8,4:16,5:24,6:32,7:40,8:48} )
self.set_vram_batch_requirements( {1.5:4} )
bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae'])

View file

@ -17,14 +17,14 @@ class Model(ModelBase):
def onInitializeOptions(self, is_first_run, ask_override):
if is_first_run or ask_override:
def_pixel_loss = self.options.get('pixel_loss', False)
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and remove face jitter.")
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 20k epochs to enhance fine details and decrease face jitter.")
else:
self.options['pixel_loss'] = self.options.get('pixel_loss', False)
#override
def onInitialize(self, **in_options):
exec(nnlib.import_all(), locals(), globals())
self.set_vram_batch_requirements( {4.5:4,5:4,6:8,7:12,8:16,9:20,10:24,11:24,12:32,13:48} )
self.set_vram_batch_requirements( {4.5:4} )
ae_input_layer = Input(shape=(128, 128, 3))
mask_layer = Input(shape=(128, 128, 1)) #same as output

View file

@ -29,29 +29,13 @@ class SAEModel(ModelBase):
if is_first_run:
self.options['resolution'] = input_int("Resolution (64,128 ?:help skip:128) : ", default_resolution, [64,128], help_message="More resolution requires more VRAM.")
self.options['face_type'] = input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
self.options['learn_mask'] = input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.")
self.options['archi'] = input_str ("AE architecture (df, liae, ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="DF keeps faces more natural, while LIAE can fix overly different face shapes.").lower()
self.options['lighter_encoder'] = input_bool ("Use lightweight encoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight encoder is 35% faster, requires less VRAM, sacrificing overall quality.")
self.options['learn_mask'] = input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Choose NO to reduce model size. In this case converter forced to use 'not predicted mask' that is not smooth as predicted. Styled SAE can learn without mask and produce same quality fake.")
else:
self.options['resolution'] = self.options.get('resolution', default_resolution)
self.options['face_type'] = self.options.get('face_type', default_face_type)
self.options['archi'] = self.options.get('archi', default_archi)
self.options['lighter_encoder'] = self.options.get('lighter_encoder', False)
self.options['learn_mask'] = self.options.get('learn_mask', True)
default_face_style_power = 10.0
if is_first_run or ask_override:
default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power)
self.options['face_style_power'] = np.clip ( input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power, help_message="How fast NN will learn dst face style during generalization of src and dst faces. If style is learned good enough, set this value to 0.01 to prevent artifacts appearing."), 0.0, 100.0 )
else:
self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power)
default_bg_style_power = 10.0
if is_first_run or ask_override:
default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power)
self.options['bg_style_power'] = np.clip ( input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power, help_message="How fast NN will learn dst background style during generalization of src and dst faces. If style is learned good enough, set this value to 0.1-0.3 to prevent artifacts appearing."), 0.0, 100.0 )
else:
self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power)
self.options['archi'] = self.options.get('archi', default_archi)
default_ae_dims = 256 if self.options['archi'] == 'liae' else 512
default_ed_ch_dims = 42
@ -62,13 +46,36 @@ class SAEModel(ModelBase):
self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims)
self.options['ed_ch_dims'] = self.options.get('ed_ch_dims', default_ed_ch_dims)
if is_first_run:
self.options['lighter_encoder'] = input_bool ("Use lightweight encoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight encoder is 35% faster, requires less VRAM, but sacrificing overall quality.")
self.options['multiscale_decoder'] = input_bool ("Use multiscale decoder? (y/n, ?:help skip:y) : ", True, help_message="Multiscale decoder helps to get better details.")
else:
self.options['lighter_encoder'] = self.options.get('lighter_encoder', False)
self.options['multiscale_decoder'] = self.options.get('multiscale_decoder', True)
default_face_style_power = 0.0
default_bg_style_power = 0.0
if is_first_run or ask_override:
def_pixel_loss = self.options.get('pixel_loss', False)
self.options['pixel_loss'] = input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Default DSSIM loss good for initial understanding structure of faces. Use pixel loss after 15-25k epochs to enhance fine details and decrease face jitter.")
default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power)
self.options['face_style_power'] = np.clip ( input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power,
help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k epochs, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes."), 0.0, 100.0 )
default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power)
self.options['bg_style_power'] = np.clip ( input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power,
help_message="Learn to transfer image around face. This can make face more like dst."), 0.0, 100.0 )
else:
self.options['pixel_loss'] = self.options.get('pixel_loss', False)
self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power)
self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power)
#override
def onInitialize(self, **in_options):
exec(nnlib.import_all(), locals(), globals())
self.set_vram_batch_requirements({2:1,3:2,4:3,5:6,6:8,7:12,8:16})
self.set_vram_batch_requirements({1.5:4})
resolution = self.options['resolution']
ae_dims = self.options['ae_dims']
@ -77,7 +84,9 @@ class SAEModel(ModelBase):
bgr_shape = (resolution, resolution, 3)
mask_shape = (resolution, resolution, 1)
dssim_pixel_alpha = Input( (1,) )
self.ms_count = ms_count = 3 if self.options['multiscale_decoder'] else 1
epoch_alpha = Input( (1,) )
warped_src = Input(bgr_shape)
target_src = Input(bgr_shape)
target_srcm = Input(mask_shape)
@ -86,6 +95,11 @@ class SAEModel(ModelBase):
target_dst = Input(bgr_shape)
target_dstm = Input(mask_shape)
target_src_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)]
target_srcm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)]
target_dst_ar = [ Input ( ( bgr_shape[0] // (2**i) ,)*2 + (bgr_shape[-1],) ) for i in range(ms_count-1, -1, -1)]
target_dstm_ar = [ Input ( ( mask_shape[0] // (2**i) ,)*2 + (mask_shape[-1],) ) for i in range(ms_count-1, -1, -1)]
if self.options['archi'] == 'liae':
self.encoder = modelify(SAEModel.LIAEEncFlow(resolution, adapt_k_size, self.options['lighter_encoder'], ed_ch_dims=ed_ch_dims) ) (Input(bgr_shape))
@ -96,10 +110,10 @@ class SAEModel(ModelBase):
inter_output_Inputs = [ Input( np.array(K.int_shape(x)[1:])*(1,1,2) ) for x in self.inter_B.outputs ]
self.decoder = modelify(SAEModel.LIAEDecFlow (bgr_shape[2],ed_ch_dims=ed_ch_dims//2, multiscale_decoder=True)) (inter_output_Inputs)
self.decoder = modelify(SAEModel.LIAEDecFlow (bgr_shape[2],ed_ch_dims=ed_ch_dims//2, multiscale_count=self.ms_count )) (inter_output_Inputs)
if self.options['learn_mask']:
self.decoderm = modelify(SAEModel.LIAEDecFlow (mask_shape[2],ed_ch_dims=int(ed_ch_dims/1.5), multiscale_decoder=False )) (inter_output_Inputs)
self.decoderm = modelify(SAEModel.LIAEDecFlow (mask_shape[2],ed_ch_dims=int(ed_ch_dims/1.5) )) (inter_output_Inputs)
if not self.is_first_run():
self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5))
@ -129,19 +143,17 @@ class SAEModel(ModelBase):
pred_dst_dstm = self.decoderm(warped_dst_inter_code)
pred_src_dstm = self.decoderm(warped_src_dst_inter_code)
else:
self.encoder = modelify(SAEModel.DFEncFlow(resolution, adapt_k_size, self.options['lighter_encoder'], ae_dims=ae_dims, ed_ch_dims=ed_ch_dims) ) (Input(bgr_shape))
dec_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ]
self.decoder_src = modelify(SAEModel.DFDecFlow (bgr_shape[2],ed_ch_dims=ed_ch_dims//2, multiscale_decoder=True)) (dec_Inputs)
self.decoder_dst = modelify(SAEModel.DFDecFlow (bgr_shape[2],ed_ch_dims=ed_ch_dims//2, multiscale_decoder=True)) (dec_Inputs)
self.decoder_src = modelify(SAEModel.DFDecFlow (bgr_shape[2],ed_ch_dims=ed_ch_dims//2, multiscale_count=self.ms_count )) (dec_Inputs)
self.decoder_dst = modelify(SAEModel.DFDecFlow (bgr_shape[2],ed_ch_dims=ed_ch_dims//2, multiscale_count=self.ms_count )) (dec_Inputs)
if self.options['learn_mask']:
self.decoder_srcm = modelify(SAEModel.DFDecFlow (mask_shape[2],ed_ch_dims=int(ed_ch_dims/1.5), multiscale_decoder=False)) (dec_Inputs)
self.decoder_dstm = modelify(SAEModel.DFDecFlow (mask_shape[2],ed_ch_dims=int(ed_ch_dims/1.5), multiscale_decoder=False)) (dec_Inputs)
self.decoder_srcm = modelify(SAEModel.DFDecFlow (mask_shape[2],ed_ch_dims=int(ed_ch_dims/1.5) )) (dec_Inputs)
self.decoder_dstm = modelify(SAEModel.DFDecFlow (mask_shape[2],ed_ch_dims=int(ed_ch_dims/1.5) )) (dec_Inputs)
if not self.is_first_run():
self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5))
@ -167,18 +179,11 @@ class SAEModel(ModelBase):
if self.options['learn_mask']:
pred_src_srcm, pred_dst_dstm, pred_src_dstm = [ [x] if type(x) != list else x for x in [pred_src_srcm, pred_dst_dstm, pred_src_dstm] ]
ms_count = len(pred_src_src)
target_src_ar = [ target_src if i == 0 else tf.image.resize_bicubic( target_src, (resolution // (2**i) ,)*2 ) for i in range(ms_count-1, -1, -1)]
target_srcm_ar = [ target_srcm if i == 0 else tf.image.resize_bicubic( target_srcm, (resolution // (2**i) ,)*2 ) for i in range(ms_count-1, -1, -1)]
target_dst_ar = [ target_dst if i == 0 else tf.image.resize_bicubic( target_dst, (resolution // (2**i) ,)*2 ) for i in range(ms_count-1, -1, -1)]
target_dstm_ar = [ target_dstm if i == 0 else tf.image.resize_bicubic( target_dstm, (resolution // (2**i) ,)*2 ) for i in range(ms_count-1, -1, -1)]
target_srcm_blurred_ar = [ tf_gaussian_blur( max(1, x.get_shape().as_list()[1] // 32) )(x) for x in target_srcm_ar]
target_srcm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_srcm_ar]
target_srcm_sigm_ar = [ x / 2.0 + 0.5 for x in target_srcm_blurred_ar]
target_srcm_anti_sigm_ar = [ 1.0 - x for x in target_srcm_sigm_ar]
target_dstm_blurred_ar = [ tf_gaussian_blur( max(1, x.get_shape().as_list()[1] // 32) )(x) for x in target_dstm_ar]
target_dstm_blurred_ar = [ gaussian_blur( max(1, K.int_shape(x)[1] // 32) )(x) for x in target_dstm_ar]
target_dstm_sigm_ar = [ x / 2.0 + 0.5 for x in target_dstm_blurred_ar]
target_dstm_anti_sigm_ar = [ 1.0 - x for x in target_dstm_sigm_ar]
@ -200,8 +205,6 @@ class SAEModel(ModelBase):
def optimizer():
return Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)
dssim_pixel_alpha_value = dssim_pixel_alpha[0][0]
if self.options['archi'] == 'liae':
src_dst_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights
if self.options['learn_mask']:
@ -211,34 +214,50 @@ class SAEModel(ModelBase):
if self.options['learn_mask']:
src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights
src_dssim_loss_batch = sum([ ( 100*K.square(tf_dssim(2.0)( target_src_masked_ar[i], pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] ) )) for i in range(len(target_src_masked_ar)) ])
src_pixel_loss_batch = sum([ tf_reduce_mean ( 100*K.square( target_src_masked_ar[i] - pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar)) ])
if not self.options['pixel_loss']:
src_loss_batch = sum([ ( 100*K.square( dssim(max_value=2.0)( target_src_masked_ar[i], pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] ) )) for i in range(len(target_src_masked_ar)) ])
else:
src_loss_batch = sum([ K.mean ( 100*K.square( target_src_masked_ar[i] - pred_src_src_sigm_ar[i] * target_srcm_sigm_ar[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar)) ])
src_loss_batch = src_dssim_loss_batch*(1.0-dssim_pixel_alpha_value) + src_pixel_loss_batch*dssim_pixel_alpha_value
src_loss = K.mean(src_loss_batch)
if self.options['face_style_power'] != 0:
face_style_power = self.options['face_style_power'] / 100.0
src_loss += tf_style_loss(gaussian_blur_radius=resolution // 8, loss_weight=0.2*face_style_power)( psd_target_dst_masked_ar[-1], target_dst_masked_ar[-1] )
if self.options['bg_style_power'] != 0:
if face_style_power != 0:
src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked_ar[-1], target_dst_masked_ar[-1] )
bg_style_power = self.options['bg_style_power'] / 100.0
bg_dssim_loss = K.mean( (100*bg_style_power)*K.square(tf_dssim(2.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] )))
bg_pixel_loss = K.mean( (100*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] ))
src_loss += bg_dssim_loss*(1.0-dssim_pixel_alpha_value) + bg_pixel_loss*dssim_pixel_alpha_value
if bg_style_power != 0:
if not self.options['pixel_loss']:
bg_loss = K.mean( (100*bg_style_power)*K.square(dssim(max_value=2.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] )))
else:
bg_loss = K.mean( (100*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] ))
src_loss += bg_loss
if not self.options['pixel_loss']:
dst_loss_batch = sum([ ( 100*K.square(dssim(max_value=2.0)( target_dst_masked_ar[i], pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] ) )) for i in range(len(target_dst_masked_ar)) ])
else:
dst_loss_batch = sum([ K.mean ( 100*K.square( target_dst_masked_ar[i] - pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar)) ])
dst_dssim_loss_batch = sum([ ( 100*K.square(tf_dssim(2.0)( target_dst_masked_ar[i], pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] ) )) for i in range(len(target_dst_masked_ar)) ])
dst_pixel_loss_batch = sum([ tf_reduce_mean ( 100*K.square( target_dst_masked_ar[i] - pred_dst_dst_sigm_ar[i] * target_dstm_sigm_ar[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar)) ])
dst_loss_batch = dst_dssim_loss_batch*(1.0-dssim_pixel_alpha_value) + dst_pixel_loss_batch*dssim_pixel_alpha_value
dst_loss = K.mean(dst_loss_batch)
self.src_dst_train = K.function ([dssim_pixel_alpha, warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm ],[src_loss,dst_loss,src_loss_batch,dst_loss_batch], optimizer().get_updates(src_loss+dst_loss, src_dst_loss_train_weights) )
feed = [warped_src, warped_dst]
feed += target_src_ar[::-1]
feed += target_srcm_ar[::-1]
feed += target_dst_ar[::-1]
feed += target_dstm_ar[::-1]
self.src_dst_train = K.function (feed,[src_loss,dst_loss], optimizer().get_updates(src_loss+dst_loss, src_dst_loss_train_weights) )
if self.options['learn_mask']:
src_mask_loss = sum([ K.mean(K.square(target_srcm_ar[-1]-pred_src_srcm[-1])) for i in range(len(target_srcm_ar)) ])
dst_mask_loss = sum([ K.mean(K.square(target_dstm_ar[-1]-pred_dst_dstm[-1])) for i in range(len(target_dstm_ar)) ])
self.src_dst_mask_train = K.function ([warped_src, target_srcm, warped_dst, target_dstm],[src_mask_loss, dst_mask_loss], optimizer().get_updates(src_mask_loss+dst_mask_loss, src_dst_mask_loss_train_weights) )
feed = [ warped_src, warped_dst]
feed += target_srcm_ar[::-1]
feed += target_dstm_ar[::-1]
self.src_dst_mask_train = K.function (feed,[src_mask_loss, dst_mask_loss], optimizer().get_updates(src_mask_loss+dst_mask_loss, src_dst_mask_loss_train_weights) )
if self.options['learn_mask']:
self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src[-1], pred_dst_dst[-1], pred_src_dst[-1], pred_src_dstm[-1]])
@ -257,21 +276,20 @@ class SAEModel(ModelBase):
f = SampleProcessor.TypeFlags
face_type = f.FACE_ALIGN_FULL if self.options['face_type'] == 'f' else f.FACE_ALIGN_HALF
output_sample_types=[ [f.WARPED_TRANSFORMED | face_type | f.MODE_BGR, resolution] ]
output_sample_types += [ [f.TRANSFORMED | face_type | f.MODE_BGR, resolution // (2**i) ] for i in range(ms_count)]
output_sample_types += [ [f.TRANSFORMED | face_type | f.MODE_M | f.FACE_MASK_FULL, resolution // (2**i) ] for i in range(ms_count)]
self.set_training_data_generators ([
SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None,
debug=self.is_debug(), batch_size=self.batch_size,
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, normalize_tanh = True, scale_range=np.array([-0.05, 0.05])+self.src_scale_mod / 100.0 ),
output_sample_types=[ [f.WARPED_TRANSFORMED | face_type | f.MODE_BGR, resolution],
[f.TRANSFORMED | face_type | f.MODE_BGR, resolution],
[f.TRANSFORMED | face_type | f.MODE_M | f.FACE_MASK_FULL, resolution]
], add_sample_idx=True ),
output_sample_types=output_sample_types ),
SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, normalize_tanh = True),
output_sample_types=[ [f.WARPED_TRANSFORMED | face_type | f.MODE_BGR, resolution],
[f.TRANSFORMED | face_type | f.MODE_BGR, resolution],
[f.TRANSFORMED | face_type | f.MODE_M | f.FACE_MASK_FULL, resolution]
], add_sample_idx=True )
output_sample_types=output_sample_types )
])
#override
def onSave(self):
@ -297,17 +315,20 @@ class SAEModel(ModelBase):
#override
def onTrainOneEpoch(self, generators_samples, generators_list):
warped_src, target_src, target_src_mask, src_sample_idxs = generators_samples[0]
warped_dst, target_dst, target_dst_mask, dst_sample_idxs = generators_samples[1]
src_samples = generators_samples[0]
dst_samples = generators_samples[1]
dssim_pixel_alpha = np.clip ( (self.epoch - 5000) / 15000.0, 0.0, 1.0 ) #smooth transition between DSSIM and MSE in 5-20k epochs
dssim_pixel_alpha = np.repeat( dssim_pixel_alpha, (self.batch_size,) )
dssim_pixel_alpha = np.expand_dims(dssim_pixel_alpha,-1)
feed = [src_samples[0], dst_samples[0] ] + \
src_samples[1:1+self.ms_count*2] + \
dst_samples[1:1+self.ms_count*2]
src_loss, dst_loss, src_sample_losses, dst_sample_losses = self.src_dst_train ([dssim_pixel_alpha, warped_src, target_src, target_src_mask, warped_dst, target_dst, target_dst_mask])
src_loss, dst_loss, = self.src_dst_train (feed)
if self.options['learn_mask']:
src_mask_loss, dst_mask_loss, = self.src_dst_mask_train ([warped_src, target_src_mask, warped_dst, target_dst_mask])
feed = [ src_samples[0], dst_samples[0] ] + \
src_samples[1+self.ms_count:1+self.ms_count*2] + \
dst_samples[1+self.ms_count:1+self.ms_count*2]
src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed)
return ( ('src_loss', src_loss), ('dst_loss', dst_loss) )
@ -430,7 +451,7 @@ class SAEModel(ModelBase):
return func
@staticmethod
def LIAEDecFlow(output_nc,ed_ch_dims=21, multiscale_decoder=True):
def LIAEDecFlow(output_nc,ed_ch_dims=21, multiscale_count=1):
exec (nnlib.import_all(), locals(), globals())
ed_dims = output_nc * ed_ch_dims
@ -449,12 +470,12 @@ class SAEModel(ModelBase):
outputs = []
x1 = upscale(ed_dims*8)( x )
if multiscale_decoder:
if multiscale_count >= 3:
outputs += [ to_bgr() ( x1 ) ]
x2 = upscale(ed_dims*4)( x1 )
if multiscale_decoder:
if multiscale_count >= 2:
outputs += [ to_bgr() ( x2 ) ]
x3 = upscale(ed_dims*2)( x2 )
@ -513,7 +534,7 @@ class SAEModel(ModelBase):
return func
@staticmethod
def DFDecFlow(output_nc, ed_ch_dims=21, multiscale_decoder=True):
def DFDecFlow(output_nc, ed_ch_dims=21, multiscale_count=1):
exec (nnlib.import_all(), locals(), globals())
ed_dims = output_nc * ed_ch_dims
@ -535,12 +556,12 @@ class SAEModel(ModelBase):
outputs = []
x1 = upscale(ed_dims*8)( x )
if multiscale_decoder:
if multiscale_count >= 3:
outputs += [ to_bgr() ( x1 ) ]
x2 = upscale(ed_dims*4)( x1 )
if multiscale_decoder:
if multiscale_count >= 2:
outputs += [ to_bgr() ( x2 ) ]
x3 = upscale(ed_dims*2)( x2 )

333
nnlib/device.py Normal file
View file

@ -0,0 +1,333 @@
import os
import json
import numpy as np
from .pynvml import *
tf_min_req_cap = 37 #min req compute capability for tensorflow-gpu==1.11.0
class device:
backend = None
class Config():
force_gpu_idx = -1
multi_gpu = False
force_gpu_idxs = None
choose_worst_gpu = False
gpu_idxs = []
gpu_names = []
gpu_compute_caps = []
gpu_vram_gb = []
allow_growth = True
use_fp16 = False
cpu_only = False
backend = None
def __init__ (self, force_gpu_idx = -1,
multi_gpu = False,
force_gpu_idxs = None,
choose_worst_gpu = False,
allow_growth = True,
use_fp16 = False,
cpu_only = False,
**in_options):
self.backend = device.backend
self.use_fp16 = use_fp16
self.cpu_only = cpu_only
if not self.cpu_only:
self.cpu_only = (self.backend == "tensorflow-cpu")
if not self.cpu_only:
self.force_gpu_idx = force_gpu_idx
self.multi_gpu = multi_gpu
self.force_gpu_idxs = force_gpu_idxs
self.choose_worst_gpu = choose_worst_gpu
self.allow_growth = allow_growth
self.gpu_idxs = []
if force_gpu_idxs is not None:
for idx in force_gpu_idxs.split(','):
idx = int(idx)
if device.isValidDeviceIdx(idx):
self.gpu_idxs.append(idx)
else:
gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx()
if gpu_idx != -1:
if self.multi_gpu:
self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx )
if len(self.gpu_idxs) <= 1:
self.multi_gpu = False
else:
self.gpu_idxs = [gpu_idx]
self.cpu_only = (len(self.gpu_idxs) == 0)
if not self.cpu_only:
self.gpu_names = []
self.gpu_compute_caps = []
self.gpu_vram_gb = []
for gpu_idx in self.gpu_idxs:
self.gpu_names += [device.getDeviceName(gpu_idx)]
self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ]
self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ]
self.cpu_only = (len(self.gpu_idxs) == 0)
if self.cpu_only:
self.backend = "tensorflow-cpu"
@staticmethod
def getValidDeviceIdxsEnumerator():
if device.backend == "plaidML":
for i in range(plaidML_devices_count):
yield i
elif device.backend == "tensorflow":
for gpu_idx in range(nvmlDeviceGetCount()):
cap = device.getDeviceComputeCapability (gpu_idx)
if cap >= tf_min_req_cap:
yield gpu_idx
elif device.backend == "tensorflow-generic":
yield 0
@staticmethod
def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
result = []
if device.backend == "plaidML":
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024:
result.append (i)
elif device.backend == "tensorflow":
for i in device.getValidDeviceIdxsEnumerator():
handle = nvmlDeviceGetHandleByIndex(i)
memInfo = nvmlDeviceGetMemoryInfo( handle )
if (memInfo.total) >= totalmemsize_gb*1024*1024*1024:
result.append (i)
elif device.backend == "tensorflow-generic":
return [0]
return result
@staticmethod
def getAllDevicesIdxsList():
if device.backend == "plaidML":
return [ *range(plaidML_devices_count) ]
elif device.backend == "tensorflow":
return [ *range(nvmlDeviceGetCount() ) ]
elif device.backend == "tensorflow-generic":
return [0]
@staticmethod
def getValidDevicesIdxsWithNamesList():
if device.backend == "plaidML":
return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ]
elif device.backend == "tensorflow":
return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in device.getValidDeviceIdxsEnumerator() ]
elif device.backend == "tensorflow-cpu":
return [ (0, 'CPU') ]
elif device.backend == "tensorflow-generic":
return [ (0, device.getDeviceName(0) ) ]
@staticmethod
def getDeviceVRAMTotalGb (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024)
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) )
return round ( memInfo.total / (1024*1024*1024) )
return 0
elif device.backend == "tensorflow-generic":
return 2
@staticmethod
def getBestValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = 0
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total > idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = 0
for i in device.getValidDeviceIdxsEnumerator():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total > idx_mem:
idx = i
idx_mem = memInfo.total
return idx
elif device.backend == "tensorflow-generic":
return 0
@staticmethod
def getWorstValidDeviceIdx():
if device.backend == "plaidML":
idx = -1
idx_mem = sys.maxsize
for i in device.getValidDeviceIdxsEnumerator():
total = plaidML_devices[i]['globalMemSize']
if total < idx_mem:
idx = i
idx_mem = total
return idx
elif device.backend == "tensorflow":
idx = -1
idx_mem = sys.maxsize
for i in device.getValidDeviceIdxsEnumerator():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total < idx_mem:
idx = i
idx_mem = memInfo.total
return idx
elif device.backend == "tensorflow-generic":
return 0
@staticmethod
def isValidDeviceIdx(idx):
if device.backend == "plaidML":
return idx in [*device.getValidDeviceIdxsEnumerator()]
elif device.backend == "tensorflow":
return idx in [*device.getValidDeviceIdxsEnumerator()]
elif device.backend == "tensorflow-generic":
return (idx == 0)
@staticmethod
def getDeviceIdxsEqualModel(idx):
if device.backend == "plaidML":
result = []
idx_name = plaidML_devices[idx]['description']
for i in device.getValidDeviceIdxsEnumerator():
if plaidML_devices[i]['description'] == idx_name:
result.append (i)
return result
elif device.backend == "tensorflow":
result = []
idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
for i in device.getValidDeviceIdxsEnumerator():
if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name:
result.append (i)
return result
elif device.backend == "tensorflow-generic":
return [0] if idx == 0 else []
@staticmethod
def getDeviceName (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['description']
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
elif device.backend == "tensorflow-generic":
if idx == 0:
return "Generic GeForce GPU"
return None
@staticmethod
def getDeviceID (idx):
if device.backend == "plaidML":
if idx < plaidML_devices_count:
return plaidML_devices[idx]['id'].decode()
return None
@staticmethod
def getDeviceComputeCapability(idx):
result = 0
if device.backend == "plaidML":
return 99
elif device.backend == "tensorflow":
if idx < nvmlDeviceGetCount():
result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
elif device.backend == "tensorflow-generic":
return 99 if idx == 0 else 0
return result[0] * 10 + result[1]
force_plaidML = os.environ.get("force_plaidML", "0") == "1"
has_nvml = False
has_nvml_cap = False
has_nvidia_device = False
plaidML_devices = []
# Using plaidML OpenCL backend to determine system devices and has_nvidia_device
try:
os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup'
import plaidml
ctx = plaidml.Context()
for d in plaidml.devices(ctx, return_all=True)[0]:
details = json.loads(d.details)
if 'nvidia' in details['vendor'].lower():
has_nvidia_device = True
plaidML_devices += [ {'id':d.id,
'globalMemSize' : int(details['globalMemSize']),
'description' : d.description.decode()
}]
ctx.shutdown()
except:
pass
plaidML_devices_count = len(plaidML_devices)
#choosing backend
if device.backend is None:
#first trying to load NVSMI and detect CUDA devices for tensorflow backend,
#even force_plaidML is choosed, because if plaidML will fail, we can choose tensorflow
try:
nvmlInit()
has_nvml = True
device.backend = "tensorflow" #set tensorflow backend in order to use device.*device() functions
gpu_idxs = device.getAllDevicesIdxsList()
gpu_caps = np.array ( [ device.getDeviceComputeCapability(gpu_idx) for gpu_idx in gpu_idxs ] )
if len ( np.ndarray.flatten ( np.argwhere (gpu_caps >= tf_min_req_cap) ) ) == 0:
if not force_plaidML:
print ("No CUDA devices found with minimum required compute capability: %d.%d. Falling back to OpenCL mode." % (tf_min_req_cap // 10, tf_min_req_cap % 10) )
device.backend = None
nvmlShutdown()
else:
has_nvml_cap = True
except:
#if no NVSMI installed exception will occur
device.backend = None
has_nvml = False
if device.backend is None or force_plaidML:
#tensorflow backend was failed or forcing plaidML, trying to use plaidML backend
if plaidML_devices_count == 0:
print ("plaidML: No capable OpenCL devices found. Falling back to tensorflow backend.")
device.backend = None
else:
device.backend = "plaidML"
if device.backend is None:
if not has_nvml:
if has_nvidia_device:
#some notebook systems have NVIDIA card without NVSMI in official drivers
#in that case considering we have system with one capable GPU and let tensorflow to choose best GPU
device.backend = "tensorflow-generic"
else:
#no NVSMI and no NVIDIA cards, also plaidML was failed, then CPU only
device.backend = "tensorflow-cpu"
else:
if has_nvml_cap:
#has NVSMI and capable CUDA-devices, but force_plaidML was failed, then we choosing tensorflow
device.backend = "tensorflow"
else:
#has NVSMI, no capable CUDA-devices, also plaidML was failed, then CPU only
device.backend = "tensorflow-cpu"

View file

@ -1,186 +0,0 @@
from .pynvml import *
try:
nvmlInit()
hasNVML = True
except:
hasNVML = False
class devicelib:
class Config():
force_gpu_idx = -1
multi_gpu = False
force_gpu_idxs = None
choose_worst_gpu = False
gpu_idxs = []
gpu_names = []
gpu_compute_caps = []
gpu_vram_gb = []
allow_growth = True
use_fp16 = False
cpu_only = False
def __init__ (self, force_gpu_idx = -1,
multi_gpu = False,
force_gpu_idxs = None,
choose_worst_gpu = False,
allow_growth = True,
use_fp16 = False,
cpu_only = False,
**in_options):
self.use_fp16 = use_fp16
if cpu_only:
self.cpu_only = True
else:
self.force_gpu_idx = force_gpu_idx
self.multi_gpu = multi_gpu
self.force_gpu_idxs = force_gpu_idxs
self.choose_worst_gpu = choose_worst_gpu
self.allow_growth = allow_growth
self.gpu_idxs = []
if force_gpu_idxs is not None:
for idx in force_gpu_idxs.split(','):
idx = int(idx)
if devicelib.isValidDeviceIdx(idx):
self.gpu_idxs.append(idx)
else:
gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and devicelib.isValidDeviceIdx(force_gpu_idx)) else devicelib.getBestDeviceIdx() if not choose_worst_gpu else devicelib.getWorstDeviceIdx()
if gpu_idx != -1:
if self.multi_gpu:
self.gpu_idxs = devicelib.getDeviceIdxsEqualModel( gpu_idx )
if len(self.gpu_idxs) <= 1:
self.multi_gpu = False
else:
self.gpu_idxs = [gpu_idx]
self.cpu_only = (len(self.gpu_idxs) == 0)
if not self.cpu_only:
self.gpu_names = []
self.gpu_compute_caps = []
for gpu_idx in self.gpu_idxs:
self.gpu_names += [devicelib.getDeviceName(gpu_idx)]
self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ]
self.gpu_vram_gb += [ devicelib.getDeviceVRAMTotalGb ( gpu_idx ) ]
@staticmethod
def getDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
if not hasNVML:
return [0]
result = []
for i in range(nvmlDeviceGetCount()):
handle = nvmlDeviceGetHandleByIndex(i)
memInfo = nvmlDeviceGetMemoryInfo( handle )
if (memInfo.total) >= totalmemsize_gb*1024*1024*1024:
result.append (i)
return result
@staticmethod
def getAllDevicesIdxsList():
if not hasNVML:
return [0]
return [ i for i in range(0, nvmlDeviceGetCount() ) ]
@staticmethod
def getAllDevicesIdxsWithNamesList():
if not hasNVML:
return [ (0, devicelib.getDeviceName(0) ) ]
return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in range(nvmlDeviceGetCount() ) ]
@staticmethod
def getDeviceVRAMFree (idx):
if not hasNVML:
return 2
if idx < nvmlDeviceGetCount():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) )
return memInfo.total - memInfo.used
return 0
@staticmethod
def getDeviceVRAMTotalGb (idx):
if not hasNVML:
return 2
if idx < nvmlDeviceGetCount():
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) )
return round ( memInfo.total / (1024*1024*1024) )
return 0
@staticmethod
def getBestDeviceIdx():
if not hasNVML:
return 0
idx = -1
idx_mem = 0
for i in range( nvmlDeviceGetCount() ):
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total > idx_mem:
idx = i
idx_mem = memInfo.total
return idx
@staticmethod
def getWorstDeviceIdx():
if not hasNVML:
return 0
idx = -1
idx_mem = sys.maxsize
for i in range( nvmlDeviceGetCount() ):
memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
if memInfo.total < idx_mem:
idx = i
idx_mem = memInfo.total
return idx
@staticmethod
def isValidDeviceIdx(idx):
if not hasNVML:
return (idx == 0)
return (idx < nvmlDeviceGetCount())
@staticmethod
def getDeviceIdxsEqualModel(idx):
if not hasNVML:
return [0] if idx == 0 else []
result = []
idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
for i in range( nvmlDeviceGetCount() ):
if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name:
result.append (i)
return result
@staticmethod
def getDeviceName (idx):
if not hasNVML:
return 'Generic GeForce GPU'
if idx < nvmlDeviceGetCount():
return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
return None
@staticmethod
def getDeviceComputeCapability(idx):
if not hasNVML:
return 99 if idx == 0 else 0
result = 0
if idx < nvmlDeviceGetCount():
result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
return result[0] * 10 + result[1]

View file

@ -4,66 +4,37 @@ import contextlib
import numpy as np
from utils import std_utils
from .devicelib import devicelib
from .device import device
class nnlib(object):
device = devicelib #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
DeviceConfig = devicelib.Config
device = device #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
DeviceConfig = device.Config
active_DeviceConfig = DeviceConfig() #default is one best GPU
dlib = None
keras = None
keras_contrib = None
tf = None
tf_sess = None
code_import_tf = None
PML = None
PMLK = None
PMLTile= None
code_import_keras = None
code_import_keras_contrib = None
code_import_all = None
code_import_dlib = None
tf_dssim = None
tf_ssim = None
tf_resize_like = None
tf_image_histogram = None
tf_rgb_to_lab = None
tf_lab_to_rgb = None
tf_adain = None
tf_gaussian_blur = None
tf_style_loss = None
modelify = None
ReflectionPadding2D = None
DSSIMLoss = None
DSSIMMSEMaskLoss = None
PixelShuffler = None
SubpixelUpscaler = None
AddUniformNoise = None
ResNet = None
UNet = None
UNetTemporalPredictor = None
NLayerDiscriminator = None
code_import_tf_string = \
"""
tf = nnlib.tf
tf_sess = nnlib.tf_sess
tf_reduce_mean = tf.reduce_mean # todo tf 12+ = tf.math.reduce_mean
tf_total_variation = tf.image.total_variation
tf_dssim = nnlib.tf_dssim
tf_ssim = nnlib.tf_ssim
tf_resize_like = nnlib.tf_resize_like
tf_image_histogram = nnlib.tf_image_histogram
tf_rgb_to_lab = nnlib.tf_rgb_to_lab
tf_lab_to_rgb = nnlib.tf_lab_to_rgb
tf_adain = nnlib.tf_adain
tf_gaussian_blur = nnlib.tf_gaussian_blur
tf_style_loss = nnlib.tf_style_loss
"""
code_import_keras_string = \
"""
keras = nnlib.keras
@ -81,9 +52,11 @@ BatchNormalization = keras.layers.BatchNormalization
LeakyReLU = keras.layers.LeakyReLU
ReLU = keras.layers.ReLU
PReLU = keras.layers.PReLU
tanh = keras.layers.Activation('tanh')
sigmoid = keras.layers.Activation('sigmoid')
Dropout = keras.layers.Dropout
Softmax = keras.layers.Softmax
Lambda = keras.layers.Lambda
Add = keras.layers.Add
@ -100,12 +73,14 @@ Model = keras.models.Model
Adam = keras.optimizers.Adam
modelify = nnlib.modelify
ReflectionPadding2D = nnlib.ReflectionPadding2D
DSSIMLoss = nnlib.DSSIMLoss
DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss
gaussian_blur = nnlib.gaussian_blur
style_loss = nnlib.style_loss
dssim = nnlib.dssim
#ReflectionPadding2D = nnlib.ReflectionPadding2D
PixelShuffler = nnlib.PixelShuffler
SubpixelUpscaler = nnlib.SubpixelUpscaler
AddUniformNoise = nnlib.AddUniformNoise
#AddUniformNoise = nnlib.AddUniformNoise
"""
code_import_keras_contrib_string = \
"""
@ -113,7 +88,6 @@ keras_contrib = nnlib.keras_contrib
GroupNormalization = keras_contrib.layers.GroupNormalization
InstanceNormalization = keras_contrib.layers.InstanceNormalization
Padam = keras_contrib.optimizers.Padam
PELU = keras_contrib.layers.advanced_activations.PELU
"""
code_import_dlib_string = \
"""
@ -122,6 +96,7 @@ dlib = nnlib.dlib
code_import_all_string = \
"""
DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss
ResNet = nnlib.ResNet
UNet = nnlib.UNet
UNetTemporalPredictor = nnlib.UNetTemporalPredictor
@ -130,7 +105,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
@staticmethod
def import_tf(device_config = None):
def _import_tf(device_config):
if nnlib.tf is not None:
return nnlib.code_import_tf
@ -147,30 +122,14 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
import tensorflow as tf
nnlib.tf = tf
if device_config is None:
device_config = nnlib.active_DeviceConfig
tf_ver = [int(x) for x in tf.VERSION.split('.')]
req_cap = 35
if tf_ver[0] > 1 or (tf_ver[0] == 1 and tf_ver[1] >= 11):
req_cap = 37
if not device_config.cpu_only and device_config.gpu_compute_caps[0] < req_cap:
if suppressor is not None:
suppressor.__exit__()
print ("%s does not meet minimum required compute capability: %d.%d. Falling back to CPU mode." % ( device_config.gpu_names[0], req_cap // 10, req_cap % 10 ) )
device_config = nnlib.DeviceConfig(cpu_only=True)
if suppressor is not None:
suppressor.__enter__()
nnlib.active_DeviceConfig = device_config
if device_config.cpu_only:
config = tf.ConfigProto( device_count = {'GPU': 0} )
config = tf.ConfigProto(device_count={'GPU': 0})
else:
config = tf.ConfigProto()
if device_config.backend != "tensorflow-generic":
#tensorflow-generic is system with NVIDIA card, but w/o NVSMI
#so dont hide devices and let tensorflow to choose best card
visible_device_list = ''
for idx in device_config.gpu_idxs:
visible_device_list += str(idx) + ','
@ -184,226 +143,42 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
if suppressor is not None:
suppressor.__exit__()
nnlib.__initialize_tf_functions()
nnlib.code_import_tf = compile (nnlib.code_import_tf_string,'','exec')
return nnlib.code_import_tf
@staticmethod
def __initialize_tf_functions():
tf = nnlib.tf
def tf_dssim_(max_value=1.0):
def func(t1,t2):
return (1.0 - tf.image.ssim (t1, t2, max_value)) / 2.0
return func
nnlib.tf_dssim = tf_dssim_
def tf_ssim_(max_value=1.0):
def func(t1,t2):
return tf.image.ssim (t1, t2, max_value)
return func
nnlib.tf_ssim = tf_ssim_
def tf_resize_like_(ref_tensor):
def func(input_tensor):
H, W = ref_tensor.get_shape()[1], ref_tensor.get_shape()[2]
return tf.image.resize_bilinear(input_tensor, [H.value, W.value])
return func
nnlib.tf_resize_like = tf_resize_like_
def tf_rgb_to_lab():
def func(rgb_input):
with tf.name_scope("rgb_to_lab"):
srgb_pixels = tf.reshape(rgb_input, [-1, 3])
with tf.name_scope("srgb_to_xyz"):
linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
rgb_to_xyz = tf.constant([
# X Y Z
[0.412453, 0.212671, 0.019334], # R
[0.357580, 0.715160, 0.119193], # G
[0.180423, 0.072169, 0.950227], # B
])
xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
# https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
with tf.name_scope("xyz_to_cielab"):
# convert to fx = f(X/Xn), fy = f(Y/Yn), fz = f(Z/Zn)
# normalize for D65 white point
xyz_normalized_pixels = tf.multiply(xyz_pixels, [1/0.950456, 1.0, 1/1.088754])
epsilon = 6/29
linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4/29) * linear_mask + (xyz_normalized_pixels ** (1/3)) * exponential_mask
# convert to lab
fxfyfz_to_lab = tf.constant([
# l a b
[ 0.0, 500.0, 0.0], # fx
[116.0, -500.0, 200.0], # fy
[ 0.0, 0.0, -200.0], # fz
])
lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
return tf.reshape(lab_pixels, tf.shape(rgb_input))
return func
nnlib.tf_rgb_to_lab = tf_rgb_to_lab
def tf_lab_to_rgb():
def func(lab):
with tf.name_scope("lab_to_rgb"):
lab_pixels = tf.reshape(lab, [-1, 3])
# https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
with tf.name_scope("cielab_to_xyz"):
# convert to fxfyfz
lab_to_fxfyfz = tf.constant([
# fx fy fz
[1/116.0, 1/116.0, 1/116.0], # l
[1/500.0, 0.0, 0.0], # a
[ 0.0, 0.0, -1/200.0], # b
])
fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz)
# convert to xyz
epsilon = 6/29
linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32)
exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32)
xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4/29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask
# denormalize for D65 white point
xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754])
with tf.name_scope("xyz_to_srgb"):
xyz_to_rgb = tf.constant([
# r g b
[ 3.2404542, -0.9692660, 0.0556434], # x
[-1.5371385, 1.8760108, -0.2040259], # y
[-0.4985314, 0.0415560, 1.0572252], # z
])
rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb)
# avoid a slightly negative number messing up the conversion
rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0)
linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32)
exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32)
srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1/2.4) * 1.055) - 0.055) * exponential_mask
return tf.reshape(srgb_pixels, tf.shape(lab))
return func
nnlib.tf_lab_to_rgb = tf_lab_to_rgb
def tf_image_histogram():
def func(input):
x = input
x += 1 / 255.0
output = []
for i in range(256, 0, -1):
v = i / 255.0
y = (x - v) * 1000
y = tf.clip_by_value (y, -1.0, 0.0) + 1
output.append ( tf.reduce_sum (y) )
x -= y*v
return tf.stack ( output[::-1] )
return func
nnlib.tf_image_histogram = tf_image_histogram
def tf_adain(epsilon=1e-5):
def func(content, style):
axes = [1,2]
c_mean, c_var = tf.nn.moments(content, axes=axes, keep_dims=True)
s_mean, s_var = tf.nn.moments(style, axes=axes, keep_dims=True)
c_std, s_std = tf.sqrt(c_var + epsilon), tf.sqrt(s_var + epsilon)
return s_std * (content - c_mean) / c_std + s_mean
return func
nnlib.tf_adain = tf_adain
def tf_gaussian_blur(radius=2.0):
def gaussian_kernel(size,mean,std):
d = tf.distributions.Normal( float(mean), float(std) )
vals = d.prob(tf.range(start = -int(size), limit = int(size) + 1, dtype = tf.float32))
gauss_kernel = tf.einsum('i,j->ij',
vals,
vals)
return gauss_kernel / tf.reduce_sum(gauss_kernel)
gauss_kernel = gaussian_kernel(radius, 1.0, radius )
gauss_kernel = gauss_kernel[:, :, tf.newaxis, tf.newaxis]
def func(input):
input_nc = input.get_shape().as_list()[-1]
inputs = tf.split(input, input_nc, -1)
outputs = []
for i in range(len(inputs)):
outputs += [ tf.nn.conv2d( inputs[i] , gauss_kernel, strides=[1, 1, 1, 1], padding="SAME") ]
return tf.concat (outputs, axis=-1)
return func
nnlib.tf_gaussian_blur = tf_gaussian_blur
#any channel count style diff
#outputs 0.0 .. 1.0 style difference*loss_weight , 0.0 - no diff
def tf_style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, batch_normalize=False, epsilon=1e-5):
gblur = tf_gaussian_blur(gaussian_blur_radius)
def sd(content, style):
content_nc = content.get_shape().as_list()[-1]
style_nc = style.get_shape().as_list()[-1]
if content_nc != style_nc:
raise Exception("tf_style_loss() content_nc != style_nc")
axes = [1,2]
c_mean, c_var = tf.nn.moments(content, axes=axes, keep_dims=True)
s_mean, s_var = tf.nn.moments(style, axes=axes, keep_dims=True)
c_std, s_std = tf.sqrt(c_var + epsilon), tf.sqrt(s_var + epsilon)
mean_loss = tf.reduce_sum(tf.squared_difference(c_mean, s_mean))
std_loss = tf.reduce_sum(tf.squared_difference(c_std, s_std))
if batch_normalize:
#normalize w.r.t batch size
n = tf.cast(tf.shape(content)[0], dtype=tf.float32)
mean_loss /= n
std_loss /= n
return (mean_loss + std_loss) * loss_weight
def func(target, style):
if gaussian_blur_radius > 0.0:
return sd( gblur(target), gblur(style))
else:
return sd( target, style )
return func
nnlib.tf_style_loss = tf_style_loss
@staticmethod
def import_keras(device_config = None):
if nnlib.keras is not None:
return nnlib.code_import_keras
nnlib.import_tf(device_config)
if device_config is None:
device_config = nnlib.active_DeviceConfig
nnlib.active_DeviceConfig = device_config
if "tensorflow" in device_config.backend:
nnlib._import_tf(device_config)
device_config = nnlib.active_DeviceConfig
elif device_config.backend == "plaidML":
os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
os.environ["PLAIDML_DEVICE_IDS"] = ",".join ( [ nnlib.device.getDeviceID(idx) for idx in device_config.gpu_idxs] )
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
suppressor = std_utils.suppress_stdout_stderr().__enter__()
import keras as keras_
nnlib.keras = keras_
if device_config.backend == "plaidML":
import plaidml
import plaidml.tile
nnlib.PML = plaidml
nnlib.PMLK = plaidml.keras.backend
nnlib.PMLTile = plaidml.tile
if device_config.use_fp16:
nnlib.keras.backend.set_floatx('float16')
if "tensorflow" in device_config.backend:
nnlib.keras.backend.set_session(nnlib.tf_sess)
nnlib.keras.backend.set_image_data_format('channels_last')
if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
@ -411,14 +186,12 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
nnlib.__initialize_keras_functions()
nnlib.code_import_keras = compile (nnlib.code_import_keras_string,'','exec')
return nnlib.code_import_keras
@staticmethod
def __initialize_keras_functions():
tf = nnlib.tf
keras = nnlib.keras
K = keras.backend
exec (nnlib.code_import_tf, locals(), globals())
def modelify(model_functor):
def func(tensor):
@ -427,68 +200,172 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
nnlib.modelify = modelify
class ReflectionPadding2D(keras.layers.Layer):
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
self.input_spec = [keras.layers.InputSpec(ndim=4)]
super(ReflectionPadding2D, self).__init__(**kwargs)
def gaussian_blur(radius=2.0):
def gaussian(x, mu, sigma):
return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2))
def compute_output_shape(self, s):
""" If you are using "channels_last" configuration"""
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def make_kernel(sigma):
kernel_size = max(3, int(2 * 2 * sigma + 1))
mean = np.floor(0.5 * kernel_size)
kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)])
np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx())
kernel = np_kernel / np.sum(np_kernel)
return kernel
def call(self, x, mask=None):
w_pad,h_pad = self.padding
return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')
nnlib.ReflectionPadding2D = ReflectionPadding2D
gauss_kernel = make_kernel(radius)
gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis]
class DSSIMLoss(object):
def __init__(self, is_tanh=False):
self.is_tanh = is_tanh
def func(input):
inputs = [ input[:,:,:,i:i+1] for i in range( K.int_shape( input )[-1] ) ]
def __call__(self,y_true, y_pred):
if not self.is_tanh:
return (1.0 - tf.image.ssim (y_true, y_pred, 1.0)) / 2.0
outputs = []
for i in range(len(inputs)):
outputs += [ K.conv2d( inputs[i] , K.constant(gauss_kernel) , strides=(1,1), padding="same") ]
return K.concatenate (outputs, axis=-1)
return func
nnlib.gaussian_blur = gaussian_blur
def style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, wnd_size=0, step_size=1):
if gaussian_blur_radius > 0.0:
gblur = gaussian_blur(gaussian_blur_radius)
def sd(content, style, loss_weight):
content_nc = K.int_shape(content)[-1]
style_nc = K.int_shape(style)[-1]
if content_nc != style_nc:
raise Exception("style_loss() content_nc != style_nc")
axes = [1,2]
c_mean, c_var = K.mean(content, axis=axes, keepdims=True), K.var(content, axis=axes, keepdims=True)
s_mean, s_var = K.mean(style, axis=axes, keepdims=True), K.var(style, axis=axes, keepdims=True)
c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5)
mean_loss = K.sum(K.square(c_mean-s_mean))
std_loss = K.sum(K.square(c_std-s_std))
return (mean_loss + std_loss) * ( loss_weight / float(content_nc) )
def func(target, style):
if wnd_size == 0:
if gaussian_blur_radius > 0.0:
return sd( gblur(target), gblur(style), loss_weight=loss_weight)
else:
return (1.0 - tf.image.ssim ((y_true/2+0.5), (y_pred/2+0.5), 1.0)) / 2.0
nnlib.DSSIMLoss = DSSIMLoss
class DSSIMMSEMaskLoss(object):
def __init__(self, mask, is_mse=False):
self.mask = mask
self.is_mse = is_mse
def __call__(self,y_true, y_pred):
total_loss = None
mask = self.mask
if self.is_mse:
blur_mask = tf_gaussian_blur(max(1, mask.get_shape().as_list()[1] // 32))(mask)
return K.mean ( 100*K.square( y_true*blur_mask - y_pred*blur_mask ) )
return sd( target, style, loss_weight=loss_weight )
else:
return (1.0 - (tf.image.ssim (y_true*mask, y_pred*mask, 1.0))) / 2.0
nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss
#currently unused
if nnlib.tf is not None:
sh = K.int_shape(target)[1]
k = (sh-wnd_size) // step_size + 1
if gaussian_blur_radius > 0.0:
target, style = gblur(target), gblur(style)
target = nnlib.tf.image.extract_image_patches(target, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID')
style = nnlib.tf.image.extract_image_patches(style, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID')
return sd( target, style, loss_weight )
if nnlib.PML is not None:
print ("Sorry, plaidML backend does not support style_loss")
return 0
return func
nnlib.style_loss = style_loss
def dssim(k1=0.01, k2=0.03, max_value=1.0):
# port of tf.image.ssim to pure keras in order to work on plaidML backend.
def func(y_true, y_pred):
ch = K.int_shape(y_pred)[-1]
def softmax(x, axis=-1): #from K numpy backend
y = np.exp(x - np.max(x, axis, keepdims=True))
return y / np.sum(y, axis, keepdims=True)
def gauss_kernel(size, sigma):
coords = np.arange(0,size, dtype=K.floatx() )
coords -= (size - 1 ) / 2.0
g = coords**2
g *= ( -0.5 / (sigma**2) )
g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) )
g = np.reshape (g, (1,-1))
g = softmax(g)
g = np.reshape (g, (size, size, 1, 1))
g = np.tile (g, (1,1,ch,1))
return K.constant(g, dtype=K.floatx() )
kernel = gauss_kernel(11,1.5)
def reducer(x):
shape = K.shape(x)
x = K.reshape(x, (-1, shape[-3] , shape[-2], shape[-1]) )
y = K.depthwise_conv2d(x, kernel, strides=(1, 1), padding='valid')
y_shape = K.shape(y)
return K.reshape(y, (shape[0], y_shape[1], y_shape[2], y_shape[3] ) )
def _ssim_helper(x, y, reducer, compensation=1.0):
c1 = (k1 * max_value) ** 2
c2 = (k2 * max_value) ** 2
mean0 = reducer(x)
mean1 = reducer(y)
num0 = mean0 * mean1 * 2.0
den0 = K.square(mean0) + K.square(mean1)
luminance = (num0 + c1) / (den0 + c1)
num1 = reducer(x * y) * 2.0
den1 = reducer(K.square(x) + K.square(y))
c2 *= compensation
cs = (num1 - num0 + c2) / (den1 - den0 + c2)
return luminance, cs
luminance, cs = _ssim_helper(y_true, y_pred, reducer)
ssim_val = K.mean(luminance * cs, axis=(-3, -2) )
return K.mean( (1.0 - ssim_val ) / 2.0 )
return func
nnlib.dssim = dssim
class PixelShuffler(keras.layers.Layer):
def __init__(self, size=(2, 2), data_format=None, **kwargs):
super(PixelShuffler, self).__init__(**kwargs)
self.data_format = keras.backend.common.normalize_data_format(data_format)
self.data_format = K.normalize_data_format(data_format)
self.size = keras.utils.conv_utils.normalize_tuple(size, 2, 'size')
def call(self, inputs):
input_shape = keras.backend.int_shape(inputs)
input_shape = K.int_shape(inputs)
if len(input_shape) != 4:
raise ValueError('Inputs should have rank ' +
str(4) +
'; Received input shape:', str(input_shape))
if self.data_format == 'channels_first':
return tf.depth_to_space(inputs, self.size[0], 'NCHW')
batch_size, c, h, w = input_shape
if batch_size is None:
batch_size = -1
rh, rw = self.size
oh, ow = h * rh, w * rw
oc = c // (rh * rw)
out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w))
out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2))
out = K.reshape(out, (batch_size, oc, oh, ow))
return out
elif self.data_format == 'channels_last':
return tf.depth_to_space(inputs, self.size[0], 'NHWC')
batch_size, h, w, c = input_shape
if batch_size is None:
batch_size = -1
rh, rw = self.size
oh, ow = h * rh, w * rw
oc = c // (rh * rw)
out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc))
out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5))
out = K.reshape(out, (batch_size, oh, ow, oc))
return out
def compute_output_shape(self, input_shape):
if len(input_shape) != 4:
raise ValueError('Inputs should have rank ' +
str(4) +
@ -529,6 +406,23 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
nnlib.PixelShuffler = PixelShuffler
nnlib.SubpixelUpscaler = PixelShuffler
'''
class ReflectionPadding2D(keras.layers.Layer):
def __init__(self, padding=(1, 1), **kwargs):
self.padding = tuple(padding)
self.input_spec = [keras.layers.InputSpec(ndim=4)]
super(ReflectionPadding2D, self).__init__(**kwargs)
def compute_output_shape(self, s):
""" If you are using "channels_last" configuration"""
return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
def call(self, x, mask=None):
w_pad,h_pad = self.padding
return tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')
nnlib.ReflectionPadding2D = ReflectionPadding2D
class AddUniformNoise(keras.layers.Layer):
def __init__(self, power=1.0, minval=-1.0, maxval=1.0, **kwargs):
@ -548,7 +442,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
base_config = super(AddUniformNoise, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
nnlib.AddUniformNoise = AddUniformNoise
'''
@staticmethod
def import_keras_contrib(device_config = None):
if nnlib.keras_contrib is not None:
@ -570,20 +464,17 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
import dlib as dlib_
nnlib.dlib = dlib_
if not device_config.cpu_only and len(device_config.gpu_idxs) > 0:
if not device_config.cpu_only and "tensorflow" in device_config.backend and len(device_config.gpu_idxs) > 0:
nnlib.dlib.cuda.set_device(device_config.gpu_idxs[0])
nnlib.code_import_dlib = compile (nnlib.code_import_dlib_string,'','exec')
@staticmethod
def import_all(device_config = None):
if nnlib.code_import_all is None:
nnlib.import_tf(device_config)
nnlib.import_keras(device_config)
nnlib.import_keras_contrib(device_config)
nnlib.code_import_all = compile (nnlib.code_import_tf_string + '\n'
+ nnlib.code_import_keras_string + '\n'
nnlib.code_import_all = compile (nnlib.code_import_keras_string + '\n'
+ nnlib.code_import_keras_contrib_string
+ nnlib.code_import_all_string,'','exec')
nnlib.__initialize_all_functions()
@ -592,6 +483,24 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
@staticmethod
def __initialize_all_functions():
exec (nnlib.import_keras(), locals(), globals())
exec (nnlib.import_keras_contrib(), locals(), globals())
class DSSIMMSEMaskLoss(object):
def __init__(self, mask, is_mse=False):
self.mask = mask
self.is_mse = is_mse
def __call__(self,y_true, y_pred):
total_loss = None
mask = self.mask
if self.is_mse:
blur_mask = gaussian_blur(max(1, K.int_shape(mask)[1] // 64))(mask)
return K.mean ( 50*K.square( y_true*blur_mask - y_pred*blur_mask ) )
else:
return 10*dssim() (y_true*mask, y_pred*mask)
nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss
'''
def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False):
exec (nnlib.import_all(), locals(), globals())
@ -775,7 +684,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
return Conv2D( 1, 4, 1, 'valid')(x)
return func
nnlib.NLayerDiscriminator = NLayerDiscriminator
'''
@staticmethod
def finalize_all():
if nnlib.keras_contrib is not None:
@ -786,7 +695,6 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
nnlib.keras = None
if nnlib.tf is not None:
nnlib.tf_sess.close()
nnlib.tf_sess = None
nnlib.tf = None

View file

@ -5,6 +5,7 @@ h5py==2.7.1
Keras==2.2.4
opencv-python==4.0.0.21
tensorflow-gpu==1.11.0
plaidml-keras==0.5.0
scikit-image
dlib==19.10.0
tqdm

View file

@ -15,8 +15,7 @@ class SampleProcessor(object):
LANDMARKS_ARRAY = 0x00000010, #currently unused
RANDOM_CLOSE = 0x00000020,
MORPH_TO_RANDOM_CLOSE \
= 0x00000040,
MORPH_TO_RANDOM_CLOSE = 0x00000040,
FACE_ALIGN_HALF = 0x00000100,
FACE_ALIGN_FULL = 0x00000200,

View file

@ -5,7 +5,6 @@ import cv2
import localization
from scipy.spatial import Delaunay
from PIL import Image, ImageDraw, ImageFont
from nnlib import nnlib
def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None):
"""
@ -424,23 +423,3 @@ def reduce_colors (img_bgr, n_colors):
return img_bgr
class TFLabConverter():
def __init__(self):
exec (nnlib.import_tf(), locals(), globals())
self.tf_sess = tf_sess
self.bgr_input_tensor = tf.placeholder("float", [None, None, 3])
self.lab_input_tensor = tf.placeholder("float", [None, None, 3])
self.lab_output_tensor = tf_rgb_to_lab()(self.bgr_input_tensor)
self.bgr_output_tensor = tf_lab_to_rgb()(self.lab_input_tensor)
def bgr2lab(self, bgr):
return self.tf_sess.run(self.lab_output_tensor, feed_dict={self.bgr_input_tensor: bgr})
def lab2bgr(self, lab):
return self.tf_sess.run(self.bgr_output_tensor, feed_dict={self.lab_input_tensor: lab})