mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-06 13:02:15 -07:00
_
This commit is contained in:
parent
c0a63addd4
commit
0e088f6415
4 changed files with 272 additions and 123 deletions
|
@ -331,7 +331,7 @@ def calc_face_yaw(landmarks):
|
||||||
r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
|
r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
|
||||||
return float(r-l)
|
return float(r-l)
|
||||||
|
|
||||||
#returns pitch,yaw [-1...+1]
|
#returns pitch,yaw,roll [-1...+1]
|
||||||
def estimate_pitch_yaw_roll(aligned_256px_landmarks):
|
def estimate_pitch_yaw_roll(aligned_256px_landmarks):
|
||||||
shape = (256,256)
|
shape = (256,256)
|
||||||
focal_length = shape[1]
|
focal_length = shape[1]
|
||||||
|
@ -351,4 +351,4 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):
|
||||||
pitch = np.clip ( pitch*1.25, -1.0, 1.0 )
|
pitch = np.clip ( pitch*1.25, -1.0, 1.0 )
|
||||||
yaw = np.clip ( yaw*1.25, -1.0, 1.0 )
|
yaw = np.clip ( yaw*1.25, -1.0, 1.0 )
|
||||||
roll = np.clip ( roll*1.25, -1.0, 1.0 )
|
roll = np.clip ( roll*1.25, -1.0, 1.0 )
|
||||||
return pitch, yaw, roll
|
return -pitch, yaw, roll
|
||||||
|
|
|
@ -19,71 +19,99 @@ class PoseEstimator(object):
|
||||||
VERSION = 1
|
VERSION = 1
|
||||||
def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False):
|
def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False):
|
||||||
exec( nnlib.import_all(), locals(), globals() )
|
exec( nnlib.import_all(), locals(), globals() )
|
||||||
|
self.resolution = resolution
|
||||||
self.angles = [90, 45, 30, 10, 2]
|
|
||||||
|
self.angles = [60, 45, 30, 10, 2]
|
||||||
self.alpha_cat_losses = [7,5,3,1,1]
|
self.alpha_cat_losses = [7,5,3,1,1]
|
||||||
self.class_nums = [ angle+1 for angle in self.angles ]
|
self.class_nums = [ angle+1 for angle in self.angles ]
|
||||||
self.model = PoseEstimator.BuildModel(resolution, class_nums=self.class_nums)
|
self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums)
|
||||||
|
|
||||||
|
|
||||||
if weights_file_root is not None:
|
if weights_file_root is not None:
|
||||||
weights_file_root = Path(weights_file_root)
|
weights_file_root = Path(weights_file_root)
|
||||||
else:
|
else:
|
||||||
weights_file_root = Path(__file__).parent
|
weights_file_root = Path(__file__).parent
|
||||||
|
|
||||||
self.weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
|
self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) )
|
||||||
|
self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) )
|
||||||
if load_weights:
|
self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) )
|
||||||
self.model.load_weights (str(self.weights_path))
|
|
||||||
else:
|
|
||||||
conv_weights_list = []
|
|
||||||
for layer in self.model.layers:
|
|
||||||
if type(layer) == keras.layers.Conv2D:
|
|
||||||
conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights
|
|
||||||
CAInitializerMP ( conv_weights_list )
|
|
||||||
|
|
||||||
inp_t, = self.model.inputs
|
|
||||||
bins_t = self.model.outputs
|
|
||||||
|
|
||||||
|
self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
|
||||||
|
|
||||||
|
self.input_bgr_shape = (resolution, resolution, 3)
|
||||||
|
inp_t = Input (self.input_bgr_shape)
|
||||||
|
inp_mask_t = Input ( (resolution, resolution, 1) )
|
||||||
|
inp_real_t = Input (self.input_bgr_shape)
|
||||||
inp_pitch_t = Input ( (1,) )
|
inp_pitch_t = Input ( (1,) )
|
||||||
inp_yaw_t = Input ( (1,) )
|
inp_yaw_t = Input ( (1,) )
|
||||||
inp_roll_t = Input ( (1,) )
|
inp_roll_t = Input ( (1,) )
|
||||||
|
|
||||||
inp_bins_t = []
|
if training:
|
||||||
for class_num in self.class_nums:
|
latent_t = self.encoder(inp_t)
|
||||||
inp_bins_t += [ Input ((class_num,)), Input ((class_num,)), Input ((class_num,)) ]
|
bgr_t = self.decoder (latent_t)
|
||||||
|
bins_t = self.model_l(latent_t)
|
||||||
|
else:
|
||||||
|
self.model = Model(inp_t, self.model_l(self.encoder(inp_t)) )
|
||||||
|
bins_t = self.model(inp_t)
|
||||||
|
|
||||||
loss_pitch = []
|
|
||||||
loss_yaw = []
|
if load_weights:
|
||||||
loss_roll = []
|
if training:
|
||||||
|
self.encoder.load_weights (str(self.encoder_weights_path))
|
||||||
for i,class_num in enumerate(self.class_nums):
|
self.decoder.load_weights (str(self.decoder_weights_path))
|
||||||
a = self.alpha_cat_losses[i]
|
self.model_l.load_weights (str(self.l_weights_path))
|
||||||
loss_pitch += [ a*K.categorical_crossentropy( inp_bins_t[i*3+0], bins_t[i*3+0] ) ]
|
else:
|
||||||
loss_yaw += [ a*K.categorical_crossentropy( inp_bins_t[i*3+1], bins_t[i*3+1] ) ]
|
self.model.load_weights (str(self.model_weights_path))
|
||||||
loss_roll += [ a*K.categorical_crossentropy( inp_bins_t[i*3+2], bins_t[i*3+2] ) ]
|
|
||||||
|
else:
|
||||||
|
def gather_Conv2D_layers(models_list):
|
||||||
|
conv_weights_list = []
|
||||||
|
for model in models_list:
|
||||||
|
for layer in model.layers:
|
||||||
|
layer_type = type(layer)
|
||||||
|
if layer_type == keras.layers.Conv2D:
|
||||||
|
conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights
|
||||||
|
elif layer_type == keras.engine.training.Model:
|
||||||
|
conv_weights_list += gather_Conv2D_layers ([layer])
|
||||||
|
return conv_weights_list
|
||||||
|
|
||||||
|
CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) )
|
||||||
|
|
||||||
idx_tensor = K.constant( np.array([idx for idx in range(self.class_nums[0])], dtype=K.floatx() ) )
|
idx_tensor = K.constant( np.array([idx for idx in range(self.class_nums[0])], dtype=K.floatx() ) )
|
||||||
#pitch_t, yaw_t, roll_t = K.sum ( bins_t[0] * idx_tensor, 1), K.sum ( bins_t[1] * idx_tensor, 1), K.sum ( bins_t[2] * idx_tensor, 1)
|
pitch_t, yaw_t, roll_t = K.sum ( bins_t[0] * idx_tensor, 1), K.sum (bins_t[1] * idx_tensor, 1), K.sum ( bins_t[2] * idx_tensor, 1)
|
||||||
|
|
||||||
pitch_t, yaw_t, roll_t = nnlib.tf.reduce_sum ( bins_t[0] * idx_tensor, 1), nnlib.tf.reduce_sum ( bins_t[1] * idx_tensor, 1), nnlib.tf.reduce_sum ( bins_t[2] * idx_tensor, 1)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
reg_alpha = 2
|
|
||||||
reg_pitch_loss = reg_alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1)
|
|
||||||
reg_yaw_loss = reg_alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1)
|
|
||||||
reg_roll_loss = reg_alpha * K.mean(K.square( inp_roll_t - roll_t), -1)
|
|
||||||
|
|
||||||
pitch_loss = reg_pitch_loss + sum(loss_pitch)
|
|
||||||
yaw_loss = reg_yaw_loss + sum(loss_yaw)
|
|
||||||
roll_loss = reg_roll_loss + sum(loss_roll)
|
|
||||||
opt = Adam(lr=0.000001)
|
|
||||||
|
|
||||||
if training:
|
if training:
|
||||||
self.train = K.function ([inp_t, inp_pitch_t, inp_yaw_t, inp_roll_t] + inp_bins_t,
|
inp_bins_t = []
|
||||||
[K.mean(pitch_loss),K.mean(yaw_loss),K.mean(roll_loss)], opt.get_updates( [pitch_loss,yaw_loss,roll_loss], self.model.trainable_weights) )
|
for class_num in self.class_nums:
|
||||||
|
inp_bins_t += [ Input ((class_num,)), Input ((class_num,)), Input ((class_num,)) ]
|
||||||
|
|
||||||
|
loss_pitch = []
|
||||||
|
loss_yaw = []
|
||||||
|
loss_roll = []
|
||||||
|
|
||||||
|
for i,class_num in enumerate(self.class_nums):
|
||||||
|
a = self.alpha_cat_losses[i]
|
||||||
|
loss_pitch += [ a*K.categorical_crossentropy( inp_bins_t[i*3+0], bins_t[i*3+0] ) ]
|
||||||
|
loss_yaw += [ a*K.categorical_crossentropy( inp_bins_t[i*3+1], bins_t[i*3+1] ) ]
|
||||||
|
loss_roll += [ a*K.categorical_crossentropy( inp_bins_t[i*3+2], bins_t[i*3+2] ) ]
|
||||||
|
|
||||||
|
bgr_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( inp_real_t*inp_mask_t, bgr_t*inp_mask_t) )
|
||||||
|
|
||||||
|
reg_alpha = 0.01
|
||||||
|
reg_pitch_loss = reg_alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1)
|
||||||
|
reg_yaw_loss = reg_alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1)
|
||||||
|
reg_roll_loss = reg_alpha * K.mean(K.square( inp_roll_t - roll_t), -1)
|
||||||
|
|
||||||
|
pitch_loss = reg_pitch_loss + sum(loss_pitch)
|
||||||
|
yaw_loss = reg_yaw_loss + sum(loss_yaw)
|
||||||
|
roll_loss = reg_roll_loss + sum(loss_roll)
|
||||||
|
|
||||||
|
|
||||||
|
self.train = K.function ([inp_t, inp_real_t, inp_mask_t],
|
||||||
|
[bgr_loss], Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates( bgr_loss, self.encoder.trainable_weights+self.decoder.trainable_weights ) )
|
||||||
|
|
||||||
|
self.train_l = K.function ([inp_t, inp_pitch_t, inp_yaw_t, inp_roll_t] + inp_bins_t,
|
||||||
|
[K.mean(pitch_loss),K.mean(yaw_loss),K.mean(roll_loss)], Adam(lr=0.000001).get_updates( [pitch_loss,yaw_loss,roll_loss], self.model_l.trainable_weights) )
|
||||||
|
|
||||||
|
|
||||||
self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] )
|
self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] )
|
||||||
|
|
||||||
|
@ -94,13 +122,22 @@ class PoseEstimator(object):
|
||||||
return False #pass exception between __enter__ and __exit__ to outter level
|
return False #pass exception between __enter__ and __exit__ to outter level
|
||||||
|
|
||||||
def save_weights(self):
|
def save_weights(self):
|
||||||
self.model.save_weights (str(self.weights_path))
|
self.encoder.save_weights (str(self.encoder_weights_path))
|
||||||
|
self.decoder.save_weights (str(self.decoder_weights_path))
|
||||||
|
self.model_l.save_weights (str(self.l_weights_path))
|
||||||
|
|
||||||
|
inp_t = Input (self.input_bgr_shape)
|
||||||
|
Model(inp_t, self.model_l(self.encoder(inp_t)) ).save_weights (str(self.model_weights_path))
|
||||||
|
|
||||||
def train_on_batch(self, imgs, pitch_yaw_roll):
|
def train_on_batch(self, warps, imgs, masks, pitch_yaw_roll, skip_bgr_train=False):
|
||||||
pyr = pitch_yaw_roll+1
|
pyr = pitch_yaw_roll+1
|
||||||
|
|
||||||
feed = [imgs]
|
if not skip_bgr_train:
|
||||||
|
bgr_loss, = self.train( [warps, imgs, masks] )
|
||||||
|
else:
|
||||||
|
bgr_loss = 0
|
||||||
|
|
||||||
|
feed = [imgs]
|
||||||
for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)):
|
for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)):
|
||||||
c = np.round(pyr * (angle / 2) ).astype(K.floatx())
|
c = np.round(pyr * (angle / 2) ).astype(K.floatx())
|
||||||
inp_pitch = c[:,0:1]
|
inp_pitch = c[:,0:1]
|
||||||
|
@ -113,11 +150,9 @@ class PoseEstimator(object):
|
||||||
inp_yaw_bins = keras.utils.to_categorical(inp_yaw, class_num )
|
inp_yaw_bins = keras.utils.to_categorical(inp_yaw, class_num )
|
||||||
inp_roll_bins = keras.utils.to_categorical(inp_roll, class_num )
|
inp_roll_bins = keras.utils.to_categorical(inp_roll, class_num )
|
||||||
feed += [inp_pitch_bins, inp_yaw_bins, inp_roll_bins]
|
feed += [inp_pitch_bins, inp_yaw_bins, inp_roll_bins]
|
||||||
#import code
|
|
||||||
#code.interact(local=dict(globals(), **locals()))
|
|
||||||
|
|
||||||
pitch_loss,yaw_loss,roll_loss = self.train(feed)
|
pitch_loss,yaw_loss,roll_loss = self.train_l(feed)
|
||||||
return pitch_loss,yaw_loss,roll_loss
|
return bgr_loss, pitch_loss, yaw_loss, roll_loss
|
||||||
|
|
||||||
def extract (self, input_image, is_input_tanh=False):
|
def extract (self, input_image, is_input_tanh=False):
|
||||||
if is_input_tanh:
|
if is_input_tanh:
|
||||||
|
@ -137,47 +172,139 @@ class PoseEstimator(object):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def BuildModel ( resolution, class_nums):
|
def BuildModels ( resolution, class_nums):
|
||||||
exec( nnlib.import_all(), locals(), globals() )
|
exec( nnlib.import_all(), locals(), globals() )
|
||||||
inp = Input ( (resolution,resolution,3) )
|
|
||||||
x = inp
|
x = inp = Input ( (resolution,resolution,3) )
|
||||||
x = PoseEstimator.Flow(class_nums=class_nums)(x)
|
x = PoseEstimator.EncFlow()(x)
|
||||||
model = Model(inp,x)
|
encoder = Model(inp,x)
|
||||||
return model
|
|
||||||
|
x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
|
||||||
|
x = PoseEstimator.DecFlow(resolution)(x)
|
||||||
|
decoder = Model(inp,x)
|
||||||
|
|
||||||
|
x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
|
||||||
|
x = PoseEstimator.LatentFlow(class_nums=class_nums)(x)
|
||||||
|
model_l = Model(inp, x )
|
||||||
|
|
||||||
|
return encoder, decoder, model_l
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def Flow(class_nums):
|
def EncFlow():
|
||||||
exec( nnlib.import_all(), locals(), globals() )
|
exec( nnlib.import_all(), locals(), globals() )
|
||||||
|
|
||||||
|
XConv2D = partial(Conv2D, padding='zero')
|
||||||
|
|
||||||
|
def Act(lrelu_alpha=0.1):
|
||||||
|
return LeakyReLU(alpha=lrelu_alpha)
|
||||||
|
|
||||||
|
def downscale (dim, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
|
||||||
|
return func
|
||||||
|
|
||||||
|
def upscale (dim, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
|
||||||
|
return func
|
||||||
|
|
||||||
|
def to_bgr (output_nc, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
|
||||||
|
return func
|
||||||
|
|
||||||
|
upscale = partial(upscale)
|
||||||
|
downscale = partial(downscale)
|
||||||
|
ae_dims = 512
|
||||||
def func(input):
|
def func(input):
|
||||||
x = input
|
x = input
|
||||||
|
x = downscale(64)(x)
|
||||||
|
x = downscale(128)(x)
|
||||||
|
x = downscale(256)(x)
|
||||||
|
x = downscale(512)(x)
|
||||||
|
x = Dense(ae_dims, name="latent", use_bias=False)(Flatten()(x))
|
||||||
|
x = Lambda ( lambda x: x + 0.1*K.random_normal(K.shape(x), 0, 1) , output_shape=(None,ae_dims) ) (x)
|
||||||
|
return x
|
||||||
|
|
||||||
# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg')
|
return func
|
||||||
# x = resnet50(x)
|
|
||||||
# output = []
|
@staticmethod
|
||||||
# for class_num in class_nums:
|
def DecFlow(resolution):
|
||||||
# pitch = Dense(class_num, activation='softmax')(x)
|
exec( nnlib.import_all(), locals(), globals() )
|
||||||
# yaw = Dense(class_num, activation='softmax')(x)
|
|
||||||
# roll = Dense(class_num, activation='softmax')(x)
|
|
||||||
# output += [pitch,yaw,roll]
|
|
||||||
|
|
||||||
# return output
|
|
||||||
|
|
||||||
x = Conv2D(64, kernel_size=11, strides=4, padding='same', activation='relu')(x)
|
|
||||||
x = MaxPooling2D( (3,3), strides=2 )(x)
|
|
||||||
|
|
||||||
x = Conv2D(192, kernel_size=5, strides=1, padding='same', activation='relu')(x)
|
XConv2D = partial(Conv2D, padding='zero')
|
||||||
x = MaxPooling2D( (3,3), strides=2 )(x)
|
|
||||||
|
def Act(lrelu_alpha=0.1):
|
||||||
x = Conv2D(384, kernel_size=3, strides=1, padding='same', activation='relu')(x)
|
return LeakyReLU(alpha=lrelu_alpha)
|
||||||
x = Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu')(x)
|
|
||||||
x = Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu')(x)
|
|
||||||
x = MaxPooling2D( (3,3), strides=2 )(x)
|
|
||||||
|
|
||||||
x = Flatten()(x)
|
def downscale (dim, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
|
||||||
|
return func
|
||||||
|
|
||||||
|
def upscale (dim, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
|
||||||
|
return func
|
||||||
|
|
||||||
|
def to_bgr (output_nc, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
|
||||||
|
return func
|
||||||
|
|
||||||
|
upscale = partial(upscale)
|
||||||
|
downscale = partial(downscale)
|
||||||
|
lowest_dense_res = resolution // 16
|
||||||
|
|
||||||
|
def func(input):
|
||||||
|
x = input
|
||||||
|
|
||||||
|
x = Dense(lowest_dense_res * lowest_dense_res * 256, use_bias=False)(x)
|
||||||
|
x = Reshape((lowest_dense_res, lowest_dense_res, 256))(x)
|
||||||
|
|
||||||
|
x = upscale(512)(x)
|
||||||
|
x = upscale(256)(x)
|
||||||
|
x = upscale(128)(x)
|
||||||
|
x = upscale(64)(x)
|
||||||
|
bgr = to_bgr(3)(x)
|
||||||
|
return [bgr]
|
||||||
|
return func
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def LatentFlow(class_nums):
|
||||||
|
exec( nnlib.import_all(), locals(), globals() )
|
||||||
|
|
||||||
|
XConv2D = partial(Conv2D, padding='zero')
|
||||||
|
|
||||||
|
def Act(lrelu_alpha=0.1):
|
||||||
|
return LeakyReLU(alpha=lrelu_alpha)
|
||||||
|
|
||||||
|
def downscale (dim, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
|
||||||
|
return func
|
||||||
|
|
||||||
|
def upscale (dim, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
|
||||||
|
return func
|
||||||
|
|
||||||
|
def to_bgr (output_nc, **kwargs):
|
||||||
|
def func(x):
|
||||||
|
return XConv2D(output_nc, kernel_size=5, use_bias=True, activation='sigmoid')(x)
|
||||||
|
return func
|
||||||
|
|
||||||
|
upscale = partial(upscale)
|
||||||
|
downscale = partial(downscale)
|
||||||
|
|
||||||
|
def func(latent):
|
||||||
|
x = latent
|
||||||
|
|
||||||
x = Dense(1024, activation='relu')(x)
|
x = Dense(1024, activation='relu')(x)
|
||||||
x = Dropout(0.5)(x)
|
x = Dropout(0.5)(x)
|
||||||
x = Dense(1024, activation='relu')(x)
|
x = Dense(2048, activation='relu')(x)
|
||||||
|
x = Dropout(0.5)(x)
|
||||||
|
x = Dense(4096, activation='relu')(x)
|
||||||
|
|
||||||
output = []
|
output = []
|
||||||
for class_num in class_nums:
|
for class_num in class_nums:
|
||||||
|
@ -187,5 +314,19 @@ class PoseEstimator(object):
|
||||||
output += [pitch,yaw,roll]
|
output += [pitch,yaw,roll]
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
#y = Dropout(0.5)(y)
|
||||||
|
#y = Dense(1024, activation='relu')(y)
|
||||||
return func
|
return func
|
||||||
|
|
||||||
|
|
||||||
|
# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg')
|
||||||
|
# x = resnet50(x)
|
||||||
|
# output = []
|
||||||
|
# for class_num in class_nums:
|
||||||
|
# pitch = Dense(class_num)(x)
|
||||||
|
# yaw = Dense(class_num)(x)
|
||||||
|
# roll = Dense(class_num)(x)
|
||||||
|
# output += [pitch,yaw,roll]
|
||||||
|
|
||||||
|
# return output
|
||||||
|
|
|
@ -7,25 +7,33 @@ from facelib import PoseEstimator
|
||||||
from samplelib import *
|
from samplelib import *
|
||||||
from interact import interact as io
|
from interact import interact as io
|
||||||
import imagelib
|
import imagelib
|
||||||
|
|
||||||
class Model(ModelBase):
|
class Model(ModelBase):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs,
|
super().__init__(*args, **kwargs,
|
||||||
ask_write_preview_history=False,
|
ask_write_preview_history=False,
|
||||||
ask_target_iter=False,
|
ask_target_iter=False,
|
||||||
ask_sort_by_yaw=False,
|
ask_sort_by_yaw=False,
|
||||||
ask_random_flip=False,
|
ask_random_flip=False,
|
||||||
ask_src_scale_mod=False)
|
ask_src_scale_mod=False)
|
||||||
|
|
||||||
#override
|
#override
|
||||||
def onInitializeOptions(self, is_first_run, ask_override):
|
def onInitializeOptions(self, is_first_run, ask_override):
|
||||||
|
yn_str = {True:'y',False:'n'}
|
||||||
|
|
||||||
default_face_type = 'f'
|
default_face_type = 'f'
|
||||||
if is_first_run:
|
if is_first_run:
|
||||||
self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
|
self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
|
||||||
else:
|
else:
|
||||||
self.options['face_type'] = self.options.get('face_type', default_face_type)
|
self.options['face_type'] = self.options.get('face_type', default_face_type)
|
||||||
|
|
||||||
|
def_train_bgr = self.options.get('train_bgr', True)
|
||||||
|
if is_first_run or ask_override:
|
||||||
|
self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr)
|
||||||
|
else:
|
||||||
|
self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr)
|
||||||
|
|
||||||
#override
|
#override
|
||||||
def onInitialize(self):
|
def onInitialize(self):
|
||||||
exec(nnlib.import_all(), locals(), globals())
|
exec(nnlib.import_all(), locals(), globals())
|
||||||
|
@ -34,9 +42,9 @@ class Model(ModelBase):
|
||||||
self.resolution = 128
|
self.resolution = 128
|
||||||
self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
|
self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
|
||||||
|
|
||||||
|
|
||||||
self.pose_est = PoseEstimator(self.resolution,
|
self.pose_est = PoseEstimator(self.resolution,
|
||||||
FaceType.toString(self.face_type),
|
FaceType.toString(self.face_type),
|
||||||
load_weights=not self.is_first_run(),
|
load_weights=not self.is_first_run(),
|
||||||
weights_file_root=self.get_model_root_path(),
|
weights_file_root=self.get_model_root_path(),
|
||||||
training=True)
|
training=True)
|
||||||
|
@ -45,42 +53,44 @@ class Model(ModelBase):
|
||||||
t = SampleProcessor.Types
|
t = SampleProcessor.Types
|
||||||
face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF
|
face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF
|
||||||
|
|
||||||
self.set_training_data_generators ([
|
self.set_training_data_generators ([
|
||||||
SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
|
SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
|
||||||
sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
|
sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
|
||||||
output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
|
output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
|
||||||
|
{'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
|
||||||
|
{'types': (t.IMG_TRANSFORMED, face_type, t.MODE_M, t.FACE_MASK_FULL), 'resolution':self.resolution },
|
||||||
{'types': (t.IMG_PITCH_YAW_ROLL,)}
|
{'types': (t.IMG_PITCH_YAW_ROLL,)}
|
||||||
]),
|
]),
|
||||||
|
|
||||||
SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
|
SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
|
||||||
sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
|
sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
|
||||||
output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
|
output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
|
||||||
{'types': (t.IMG_PITCH_YAW_ROLL,)}
|
{'types': (t.IMG_PITCH_YAW_ROLL,)}
|
||||||
])
|
])
|
||||||
])
|
])
|
||||||
|
|
||||||
#override
|
#override
|
||||||
def onSave(self):
|
def onSave(self):
|
||||||
self.pose_est.save_weights()
|
self.pose_est.save_weights()
|
||||||
|
|
||||||
#override
|
#override
|
||||||
def onTrainOneIter(self, generators_samples, generators_list):
|
def onTrainOneIter(self, generators_samples, generators_list):
|
||||||
target_src, pitch_yaw_roll = generators_samples[0]
|
target_srcw, target_src, target_srcm, pitch_yaw_roll = generators_samples[0]
|
||||||
|
|
||||||
pitch_loss,yaw_loss,roll_loss = self.pose_est.train_on_batch( target_src, pitch_yaw_roll )
|
bgr_loss, pitch_loss,yaw_loss,roll_loss = self.pose_est.train_on_batch( target_srcw, target_src, target_srcm, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )
|
||||||
|
|
||||||
|
return ( ('bgr_loss', bgr_loss), ('pitch_loss', pitch_loss), ('yaw_loss', yaw_loss), ('roll_loss', roll_loss) )
|
||||||
|
|
||||||
return ( ('pitch_loss', pitch_loss), ('yaw_loss', yaw_loss), ('roll_loss', roll_loss) )
|
|
||||||
|
|
||||||
#override
|
#override
|
||||||
def onGetPreview(self, generators_samples):
|
def onGetPreview(self, generators_samples):
|
||||||
test_src = generators_samples[0][0][0:4] #first 4 samples
|
test_src = generators_samples[0][1][0:4] #first 4 samples
|
||||||
test_pyr_src = generators_samples[0][1][0:4]
|
test_pyr_src = generators_samples[0][3][0:4]
|
||||||
test_dst = generators_samples[1][0][0:4]
|
test_dst = generators_samples[1][0][0:4]
|
||||||
test_pyr_dst = generators_samples[1][1][0:4]
|
test_pyr_dst = generators_samples[1][1][0:4]
|
||||||
|
|
||||||
h,w,c = self.resolution,self.resolution,3
|
h,w,c = self.resolution,self.resolution,3
|
||||||
h_line = 13
|
h_line = 13
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for name, img, pyr in [ ['training data', test_src, test_pyr_src], \
|
for name, img, pyr in [ ['training data', test_src, test_pyr_src], \
|
||||||
['evaluating data',test_dst, test_pyr_dst] ]:
|
['evaluating data',test_dst, test_pyr_dst] ]:
|
||||||
|
@ -91,18 +101,18 @@ class Model(ModelBase):
|
||||||
img_info = np.ones ( (h,w,c) ) * 0.1
|
img_info = np.ones ( (h,w,c) ) * 0.1
|
||||||
lines = ["%s" % ( str(pyr[i]) ),
|
lines = ["%s" % ( str(pyr[i]) ),
|
||||||
"%s" % ( str(pyr_pred[i]) ) ]
|
"%s" % ( str(pyr_pred[i]) ) ]
|
||||||
|
|
||||||
lines_count = len(lines)
|
lines_count = len(lines)
|
||||||
for ln in range(lines_count):
|
for ln in range(lines_count):
|
||||||
img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \
|
img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \
|
||||||
imagelib.get_text_image ( (h_line,w,c), lines[ln], color=[0.8]*c )
|
imagelib.get_text_image ( (h_line,w,c), lines[ln], color=[0.8]*c )
|
||||||
|
|
||||||
hor_imgs.append ( np.concatenate ( (
|
hor_imgs.append ( np.concatenate ( (
|
||||||
img[i,:,:,0:3],
|
img[i,:,:,0:3],
|
||||||
img_info
|
img_info
|
||||||
), axis=1) )
|
), axis=1) )
|
||||||
|
|
||||||
|
|
||||||
result += [ (name, np.concatenate (hor_imgs, axis=0)) ]
|
result += [ (name, np.concatenate (hor_imgs, axis=0)) ]
|
||||||
|
|
||||||
return result
|
return result
|
|
@ -99,12 +99,6 @@ class SampleProcessor(object):
|
||||||
if debug and is_face_sample:
|
if debug and is_face_sample:
|
||||||
LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0))
|
LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0))
|
||||||
|
|
||||||
close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None
|
|
||||||
close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None
|
|
||||||
|
|
||||||
if debug and close_sample_bgr is not None:
|
|
||||||
LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0))
|
|
||||||
|
|
||||||
params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range )
|
params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range )
|
||||||
|
|
||||||
cached_images = collections.defaultdict(dict)
|
cached_images = collections.defaultdict(dict)
|
||||||
|
@ -265,7 +259,11 @@ class SampleProcessor(object):
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None
|
||||||
|
close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None
|
||||||
|
|
||||||
|
if debug and close_sample_bgr is not None:
|
||||||
|
LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0))
|
||||||
RANDOM_CLOSE = 0x00000040, #currently unused
|
RANDOM_CLOSE = 0x00000040, #currently unused
|
||||||
MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused
|
MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue