This commit is contained in:
iperov 2019-04-25 14:46:39 +04:00
parent c0a63addd4
commit 0e088f6415
4 changed files with 272 additions and 123 deletions

View file

@ -331,7 +331,7 @@ def calc_face_yaw(landmarks):
r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
return float(r-l)
#returns pitch,yaw [-1...+1]
#returns pitch,yaw,roll [-1...+1]
def estimate_pitch_yaw_roll(aligned_256px_landmarks):
shape = (256,256)
focal_length = shape[1]
@ -351,4 +351,4 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):
pitch = np.clip ( pitch*1.25, -1.0, 1.0 )
yaw = np.clip ( yaw*1.25, -1.0, 1.0 )
roll = np.clip ( roll*1.25, -1.0, 1.0 )
return pitch, yaw, roll
return -pitch, yaw, roll

View file

@ -19,36 +19,67 @@ class PoseEstimator(object):
VERSION = 1
def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False):
exec( nnlib.import_all(), locals(), globals() )
self.resolution = resolution
self.angles = [90, 45, 30, 10, 2]
self.angles = [60, 45, 30, 10, 2]
self.alpha_cat_losses = [7,5,3,1,1]
self.class_nums = [ angle+1 for angle in self.angles ]
self.model = PoseEstimator.BuildModel(resolution, class_nums=self.class_nums)
self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums)
if weights_file_root is not None:
weights_file_root = Path(weights_file_root)
else:
weights_file_root = Path(__file__).parent
self.weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) )
self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) )
self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) )
if load_weights:
self.model.load_weights (str(self.weights_path))
else:
conv_weights_list = []
for layer in self.model.layers:
if type(layer) == keras.layers.Conv2D:
conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights
CAInitializerMP ( conv_weights_list )
inp_t, = self.model.inputs
bins_t = self.model.outputs
self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
self.input_bgr_shape = (resolution, resolution, 3)
inp_t = Input (self.input_bgr_shape)
inp_mask_t = Input ( (resolution, resolution, 1) )
inp_real_t = Input (self.input_bgr_shape)
inp_pitch_t = Input ( (1,) )
inp_yaw_t = Input ( (1,) )
inp_roll_t = Input ( (1,) )
if training:
latent_t = self.encoder(inp_t)
bgr_t = self.decoder (latent_t)
bins_t = self.model_l(latent_t)
else:
self.model = Model(inp_t, self.model_l(self.encoder(inp_t)) )
bins_t = self.model(inp_t)
if load_weights:
if training:
self.encoder.load_weights (str(self.encoder_weights_path))
self.decoder.load_weights (str(self.decoder_weights_path))
self.model_l.load_weights (str(self.l_weights_path))
else:
self.model.load_weights (str(self.model_weights_path))
else:
def gather_Conv2D_layers(models_list):
conv_weights_list = []
for model in models_list:
for layer in model.layers:
layer_type = type(layer)
if layer_type == keras.layers.Conv2D:
conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights
elif layer_type == keras.engine.training.Model:
conv_weights_list += gather_Conv2D_layers ([layer])
return conv_weights_list
CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) )
idx_tensor = K.constant( np.array([idx for idx in range(self.class_nums[0])], dtype=K.floatx() ) )
pitch_t, yaw_t, roll_t = K.sum ( bins_t[0] * idx_tensor, 1), K.sum (bins_t[1] * idx_tensor, 1), K.sum ( bins_t[2] * idx_tensor, 1)
if training:
inp_bins_t = []
for class_num in self.class_nums:
inp_bins_t += [ Input ((class_num,)), Input ((class_num,)), Input ((class_num,)) ]
@ -63,15 +94,9 @@ class PoseEstimator(object):
loss_yaw += [ a*K.categorical_crossentropy( inp_bins_t[i*3+1], bins_t[i*3+1] ) ]
loss_roll += [ a*K.categorical_crossentropy( inp_bins_t[i*3+2], bins_t[i*3+2] ) ]
idx_tensor = K.constant( np.array([idx for idx in range(self.class_nums[0])], dtype=K.floatx() ) )
#pitch_t, yaw_t, roll_t = K.sum ( bins_t[0] * idx_tensor, 1), K.sum ( bins_t[1] * idx_tensor, 1), K.sum ( bins_t[2] * idx_tensor, 1)
bgr_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( inp_real_t*inp_mask_t, bgr_t*inp_mask_t) )
pitch_t, yaw_t, roll_t = nnlib.tf.reduce_sum ( bins_t[0] * idx_tensor, 1), nnlib.tf.reduce_sum ( bins_t[1] * idx_tensor, 1), nnlib.tf.reduce_sum ( bins_t[2] * idx_tensor, 1)
reg_alpha = 2
reg_alpha = 0.01
reg_pitch_loss = reg_alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1)
reg_yaw_loss = reg_alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1)
reg_roll_loss = reg_alpha * K.mean(K.square( inp_roll_t - roll_t), -1)
@ -79,11 +104,14 @@ class PoseEstimator(object):
pitch_loss = reg_pitch_loss + sum(loss_pitch)
yaw_loss = reg_yaw_loss + sum(loss_yaw)
roll_loss = reg_roll_loss + sum(loss_roll)
opt = Adam(lr=0.000001)
if training:
self.train = K.function ([inp_t, inp_pitch_t, inp_yaw_t, inp_roll_t] + inp_bins_t,
[K.mean(pitch_loss),K.mean(yaw_loss),K.mean(roll_loss)], opt.get_updates( [pitch_loss,yaw_loss,roll_loss], self.model.trainable_weights) )
self.train = K.function ([inp_t, inp_real_t, inp_mask_t],
[bgr_loss], Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates( bgr_loss, self.encoder.trainable_weights+self.decoder.trainable_weights ) )
self.train_l = K.function ([inp_t, inp_pitch_t, inp_yaw_t, inp_roll_t] + inp_bins_t,
[K.mean(pitch_loss),K.mean(yaw_loss),K.mean(roll_loss)], Adam(lr=0.000001).get_updates( [pitch_loss,yaw_loss,roll_loss], self.model_l.trainable_weights) )
self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] )
@ -94,13 +122,22 @@ class PoseEstimator(object):
return False #pass exception between __enter__ and __exit__ to outter level
def save_weights(self):
self.model.save_weights (str(self.weights_path))
self.encoder.save_weights (str(self.encoder_weights_path))
self.decoder.save_weights (str(self.decoder_weights_path))
self.model_l.save_weights (str(self.l_weights_path))
def train_on_batch(self, imgs, pitch_yaw_roll):
inp_t = Input (self.input_bgr_shape)
Model(inp_t, self.model_l(self.encoder(inp_t)) ).save_weights (str(self.model_weights_path))
def train_on_batch(self, warps, imgs, masks, pitch_yaw_roll, skip_bgr_train=False):
pyr = pitch_yaw_roll+1
feed = [imgs]
if not skip_bgr_train:
bgr_loss, = self.train( [warps, imgs, masks] )
else:
bgr_loss = 0
feed = [imgs]
for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)):
c = np.round(pyr * (angle / 2) ).astype(K.floatx())
inp_pitch = c[:,0:1]
@ -113,11 +150,9 @@ class PoseEstimator(object):
inp_yaw_bins = keras.utils.to_categorical(inp_yaw, class_num )
inp_roll_bins = keras.utils.to_categorical(inp_roll, class_num )
feed += [inp_pitch_bins, inp_yaw_bins, inp_roll_bins]
#import code
#code.interact(local=dict(globals(), **locals()))
pitch_loss,yaw_loss,roll_loss = self.train(feed)
return pitch_loss,yaw_loss,roll_loss
pitch_loss,yaw_loss,roll_loss = self.train_l(feed)
return bgr_loss, pitch_loss, yaw_loss, roll_loss
def extract (self, input_image, is_input_tanh=False):
if is_input_tanh:
@ -137,47 +172,139 @@ class PoseEstimator(object):
return result
@staticmethod
def BuildModel ( resolution, class_nums):
def BuildModels ( resolution, class_nums):
exec( nnlib.import_all(), locals(), globals() )
inp = Input ( (resolution,resolution,3) )
x = inp
x = PoseEstimator.Flow(class_nums=class_nums)(x)
model = Model(inp,x)
return model
x = inp = Input ( (resolution,resolution,3) )
x = PoseEstimator.EncFlow()(x)
encoder = Model(inp,x)
x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
x = PoseEstimator.DecFlow(resolution)(x)
decoder = Model(inp,x)
x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
x = PoseEstimator.LatentFlow(class_nums=class_nums)(x)
model_l = Model(inp, x )
return encoder, decoder, model_l
@staticmethod
def Flow(class_nums):
def EncFlow():
exec( nnlib.import_all(), locals(), globals() )
XConv2D = partial(Conv2D, padding='zero')
def Act(lrelu_alpha=0.1):
return LeakyReLU(alpha=lrelu_alpha)
def downscale (dim, **kwargs):
def func(x):
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
return func
def upscale (dim, **kwargs):
def func(x):
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
return func
def to_bgr (output_nc, **kwargs):
def func(x):
return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
return func
upscale = partial(upscale)
downscale = partial(downscale)
ae_dims = 512
def func(input):
x = input
x = downscale(64)(x)
x = downscale(128)(x)
x = downscale(256)(x)
x = downscale(512)(x)
x = Dense(ae_dims, name="latent", use_bias=False)(Flatten()(x))
x = Lambda ( lambda x: x + 0.1*K.random_normal(K.shape(x), 0, 1) , output_shape=(None,ae_dims) ) (x)
return x
return func
@staticmethod
def DecFlow(resolution):
exec( nnlib.import_all(), locals(), globals() )
XConv2D = partial(Conv2D, padding='zero')
def Act(lrelu_alpha=0.1):
return LeakyReLU(alpha=lrelu_alpha)
def downscale (dim, **kwargs):
def func(x):
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
return func
def upscale (dim, **kwargs):
def func(x):
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
return func
def to_bgr (output_nc, **kwargs):
def func(x):
return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
return func
upscale = partial(upscale)
downscale = partial(downscale)
lowest_dense_res = resolution // 16
def func(input):
x = input
# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg')
# x = resnet50(x)
# output = []
# for class_num in class_nums:
# pitch = Dense(class_num, activation='softmax')(x)
# yaw = Dense(class_num, activation='softmax')(x)
# roll = Dense(class_num, activation='softmax')(x)
# output += [pitch,yaw,roll]
x = Dense(lowest_dense_res * lowest_dense_res * 256, use_bias=False)(x)
x = Reshape((lowest_dense_res, lowest_dense_res, 256))(x)
# return output
x = upscale(512)(x)
x = upscale(256)(x)
x = upscale(128)(x)
x = upscale(64)(x)
bgr = to_bgr(3)(x)
return [bgr]
return func
x = Conv2D(64, kernel_size=11, strides=4, padding='same', activation='relu')(x)
x = MaxPooling2D( (3,3), strides=2 )(x)
@staticmethod
def LatentFlow(class_nums):
exec( nnlib.import_all(), locals(), globals() )
x = Conv2D(192, kernel_size=5, strides=1, padding='same', activation='relu')(x)
x = MaxPooling2D( (3,3), strides=2 )(x)
XConv2D = partial(Conv2D, padding='zero')
x = Conv2D(384, kernel_size=3, strides=1, padding='same', activation='relu')(x)
x = Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu')(x)
x = Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu')(x)
x = MaxPooling2D( (3,3), strides=2 )(x)
def Act(lrelu_alpha=0.1):
return LeakyReLU(alpha=lrelu_alpha)
def downscale (dim, **kwargs):
def func(x):
return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
return func
def upscale (dim, **kwargs):
def func(x):
return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
return func
def to_bgr (output_nc, **kwargs):
def func(x):
return XConv2D(output_nc, kernel_size=5, use_bias=True, activation='sigmoid')(x)
return func
upscale = partial(upscale)
downscale = partial(downscale)
def func(latent):
x = latent
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu')(x)
x = Dense(2048, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu')(x)
output = []
for class_num in class_nums:
@ -188,4 +315,18 @@ class PoseEstimator(object):
return output
#y = Dropout(0.5)(y)
#y = Dense(1024, activation='relu')(y)
return func
# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg')
# x = resnet50(x)
# output = []
# for class_num in class_nums:
# pitch = Dense(class_num)(x)
# yaw = Dense(class_num)(x)
# roll = Dense(class_num)(x)
# output += [pitch,yaw,roll]
# return output

View file

@ -20,12 +20,20 @@ class Model(ModelBase):
#override
def onInitializeOptions(self, is_first_run, ask_override):
yn_str = {True:'y',False:'n'}
default_face_type = 'f'
if is_first_run:
self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
else:
self.options['face_type'] = self.options.get('face_type', default_face_type)
def_train_bgr = self.options.get('train_bgr', True)
if is_first_run or ask_override:
self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr)
else:
self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr)
#override
def onInitialize(self):
exec(nnlib.import_all(), locals(), globals())
@ -48,7 +56,9 @@ class Model(ModelBase):
self.set_training_data_generators ([
SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
{'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
{'types': (t.IMG_TRANSFORMED, face_type, t.MODE_M, t.FACE_MASK_FULL), 'resolution':self.resolution },
{'types': (t.IMG_PITCH_YAW_ROLL,)}
]),
@ -65,16 +75,16 @@ class Model(ModelBase):
#override
def onTrainOneIter(self, generators_samples, generators_list):
target_src, pitch_yaw_roll = generators_samples[0]
target_srcw, target_src, target_srcm, pitch_yaw_roll = generators_samples[0]
pitch_loss,yaw_loss,roll_loss = self.pose_est.train_on_batch( target_src, pitch_yaw_roll )
bgr_loss, pitch_loss,yaw_loss,roll_loss = self.pose_est.train_on_batch( target_srcw, target_src, target_srcm, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )
return ( ('pitch_loss', pitch_loss), ('yaw_loss', yaw_loss), ('roll_loss', roll_loss) )
return ( ('bgr_loss', bgr_loss), ('pitch_loss', pitch_loss), ('yaw_loss', yaw_loss), ('roll_loss', roll_loss) )
#override
def onGetPreview(self, generators_samples):
test_src = generators_samples[0][0][0:4] #first 4 samples
test_pyr_src = generators_samples[0][1][0:4]
test_src = generators_samples[0][1][0:4] #first 4 samples
test_pyr_src = generators_samples[0][3][0:4]
test_dst = generators_samples[1][0][0:4]
test_pyr_dst = generators_samples[1][1][0:4]

View file

@ -99,12 +99,6 @@ class SampleProcessor(object):
if debug and is_face_sample:
LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0))
close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None
close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None
if debug and close_sample_bgr is not None:
LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0))
params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range )
cached_images = collections.defaultdict(dict)
@ -265,7 +259,11 @@ class SampleProcessor(object):
return outputs
"""
close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None
close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None
if debug and close_sample_bgr is not None:
LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0))
RANDOM_CLOSE = 0x00000040, #currently unused
MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused