From e1da9c56b4c60d2af0f6fc71843f7d42ebeaa29d Mon Sep 17 00:00:00 2001 From: iperov Date: Wed, 24 Apr 2019 09:38:26 +0400 Subject: [PATCH] SAE collapse fix (#245) * test * _ * _ * upd dev_poseest * SAE: finally collapses are fixed * fix batch size help --- facelib/PoseEstimator.py | 124 +++++++++++++++++------------- models/ModelBase.py | 2 +- models/Model_DEV_POSEEST/Model.py | 15 ++-- models/Model_SAE/Model.py | 16 ++-- 4 files changed, 87 insertions(+), 70 deletions(-) diff --git a/facelib/PoseEstimator.py b/facelib/PoseEstimator.py index 97abccb..64adbba 100644 --- a/facelib/PoseEstimator.py +++ b/facelib/PoseEstimator.py @@ -12,6 +12,7 @@ from nnlib import nnlib """ PoseEstimator estimates pitch, yaw, roll, from FAN aligned face. trained on https://www.umdfaces.io +based on https://arxiv.org/pdf/1901.06778.pdf HYBRID COARSE-FINE CLASSIFICATION FOR HEAD POSE ESTIMATION """ class PoseEstimator(object): @@ -19,9 +20,12 @@ class PoseEstimator(object): def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): exec( nnlib.import_all(), locals(), globals() ) - self.class_num = 91 + self.angles = [90, 45, 30, 10, 2] + self.alpha_cat_losses = [0.07,0.05,0.03,0.01,0.01] + self.class_nums = [ angle+1 for angle in self.angles ] + + self.model = PoseEstimator.BuildModel(resolution, class_nums=self.class_nums) - self.model = PoseEstimator.BuildModel(resolution, class_num=self.class_num) if weights_file_root is not None: weights_file_root = Path(weights_file_root) @@ -33,42 +37,44 @@ class PoseEstimator(object): if load_weights: self.model.load_weights (str(self.weights_path)) - idx_tensor = np.array([idx for idx in range(self.class_num)], dtype=K.floatx() ) - idx_tensor = K.constant(idx_tensor) - inp_t, = self.model.inputs - pitch_bins_t, yaw_bins_t, roll_bins_t = self.model.outputs + bins_t = self.model.outputs - pitch_t, yaw_t, roll_t = K.sum ( pitch_bins_t * idx_tensor, 1), K.sum ( yaw_bins_t * idx_tensor, 1), K.sum ( roll_bins_t * idx_tensor, 1) - - inp_pitch_bins_t = Input ( (self.class_num,) ) inp_pitch_t = Input ( (1,) ) - - inp_yaw_bins_t = Input ( (self.class_num,) ) inp_yaw_t = Input ( (1,) ) - - inp_roll_bins_t = Input ( (self.class_num,) ) inp_roll_t = Input ( (1,) ) - alpha = 0.001 - - pitch_loss = K.categorical_crossentropy(inp_pitch_bins_t, pitch_bins_t) \ - + alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1) + inp_bins_t = [] + for class_num in self.class_nums: + inp_bins_t += [ Input ((class_num,)), Input ((class_num,)), Input ((class_num,)) ] - yaw_loss = K.categorical_crossentropy(inp_yaw_bins_t, yaw_bins_t) \ - + alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1) - - roll_loss = K.categorical_crossentropy(inp_roll_bins_t, roll_bins_t) \ - + alpha * K.mean(K.square( inp_roll_t - roll_t), -1) + loss_pitch = [] + loss_yaw = [] + loss_roll = [] + for i,class_num in enumerate(self.class_nums): + a = self.alpha_cat_losses[i] + loss_pitch += [ a*K.categorical_crossentropy( inp_bins_t[i*3+0], bins_t[i*3+0] ) ] + loss_yaw += [ a*K.categorical_crossentropy( inp_bins_t[i*3+1], bins_t[i*3+1] ) ] + loss_roll += [ a*K.categorical_crossentropy( inp_bins_t[i*3+2], bins_t[i*3+2] ) ] + + idx_tensor = K.constant( np.array([idx for idx in range(self.class_nums[0])], dtype=K.floatx() ) ) + pitch_t, yaw_t, roll_t = K.sum ( bins_t[0] * idx_tensor, 1), K.sum ( bins_t[1] * idx_tensor, 1), K.sum ( bins_t[2] * idx_tensor, 1) - loss = K.mean( pitch_loss + yaw_loss + roll_loss ) + reg_alpha = 0.02 + reg_pitch_loss = reg_alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1) + reg_yaw_loss = reg_alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1) + reg_roll_loss = reg_alpha * K.mean(K.square( inp_roll_t - roll_t), -1) + + pitch_loss = reg_pitch_loss + sum(loss_pitch) + yaw_loss = reg_yaw_loss + sum(loss_yaw) + roll_loss = reg_roll_loss + sum(loss_roll) - opt = Adam(lr=0.001, tf_cpu_mode=2) + opt = Adam(lr=0.001, tf_cpu_mode=0) if training: - self.train = K.function ([inp_t, inp_pitch_bins_t, inp_pitch_t, inp_yaw_bins_t, inp_yaw_t, inp_roll_bins_t, inp_roll_t], - [loss], opt.get_updates(loss, self.model.trainable_weights) ) + self.train = K.function ([inp_t, inp_pitch_t, inp_yaw_t, inp_roll_t] + inp_bins_t, + [K.mean(pitch_loss),K.mean(yaw_loss),K.mean(roll_loss)], opt.get_updates( [pitch_loss,yaw_loss,roll_loss], self.model.trainable_weights) ) self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] ) @@ -82,19 +88,27 @@ class PoseEstimator(object): self.model.save_weights (str(self.weights_path)) def train_on_batch(self, imgs, pitch_yaw_roll): - c = ( (pitch_yaw_roll+1) * 45.0 ).astype(np.int).astype(K.floatx()) + pyr = pitch_yaw_roll+1 + + feed = [imgs] - inp_pitch = c[:,0:1] - inp_yaw = c[:,1:2] - inp_roll = c[:,2:3] + for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)): + c = np.round(pyr * (angle / 2) ).astype(K.floatx()) + inp_pitch = c[:,0:1] + inp_yaw = c[:,1:2] + inp_roll = c[:,2:3] + if i == 0: + feed += [inp_pitch, inp_yaw, inp_roll] + + inp_pitch_bins = keras.utils.to_categorical(inp_pitch, class_num ) + inp_yaw_bins = keras.utils.to_categorical(inp_yaw, class_num ) + inp_roll_bins = keras.utils.to_categorical(inp_roll, class_num ) + feed += [inp_pitch_bins, inp_yaw_bins, inp_roll_bins] + #import code + #code.interact(local=dict(globals(), **locals())) - inp_pitch_bins = keras.utils.to_categorical(inp_pitch, self.class_num ) - inp_yaw_bins = keras.utils.to_categorical(inp_yaw, self.class_num ) - inp_roll_bins = keras.utils.to_categorical(inp_roll, self.class_num ) - - loss, = self.train( [imgs, inp_pitch_bins, inp_pitch, inp_yaw_bins, inp_yaw, inp_roll_bins, inp_roll] ) - - return loss + pitch_loss,yaw_loss,roll_loss = self.train(feed) + return pitch_loss,yaw_loss,roll_loss def extract (self, input_image, is_input_tanh=False): if is_input_tanh: @@ -106,7 +120,7 @@ class PoseEstimator(object): pitch, yaw, roll = self.view( [input_image] ) result = np.concatenate( (pitch[...,np.newaxis], yaw[...,np.newaxis], roll[...,np.newaxis]), -1 ) - result = np.clip ( result / 45.0 - 1, -1.0, 1.0 ) + result = np.clip ( result / (self.angles[0] / 2) - 1, -1.0, 1.0 ) if input_shape_len == 3: result = result[0] @@ -114,28 +128,31 @@ class PoseEstimator(object): return result @staticmethod - def BuildModel ( resolution, class_num): + def BuildModel ( resolution, class_nums): exec( nnlib.import_all(), locals(), globals() ) inp = Input ( (resolution,resolution,3) ) x = inp - x = PoseEstimator.Flow(class_num=class_num)(x) + x = PoseEstimator.Flow(class_nums=class_nums)(x) model = Model(inp,x) return model @staticmethod - def Flow(class_num): + def Flow(class_nums): exec( nnlib.import_all(), locals(), globals() ) def func(input): x = input - # resnet50 = keras.applications.ResNet50(include_top=False, weights='imagenet', input_shape=K.int_shape(x)[1:], pooling='avg') + # resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg') # x = resnet50(x) - # pitch = Dense(class_num, activation='softmax', name='pitch')(x) - # yaw = Dense(class_num, activation='softmax', name='yaw')(x) - # roll = Dense(class_num, activation='softmax', name='roll')(x) - - # return [pitch, yaw, roll] + # output = [] + # for class_num in class_nums: + # pitch = Dense(class_num, activation='softmax')(x) + # yaw = Dense(class_num, activation='softmax')(x) + # roll = Dense(class_num, activation='softmax')(x) + # output += [pitch,yaw,roll] + + # return output x = Conv2D(64, kernel_size=11, strides=4, padding='same', activation='relu')(x) x = MaxPooling2D( (3,3), strides=2 )(x) @@ -153,10 +170,13 @@ class PoseEstimator(object): x = Dropout(0.5)(x) x = Dense(1024, activation='relu')(x) - pitch = Dense(class_num, activation='softmax', name='pitch')(x) - yaw = Dense(class_num, activation='softmax', name='yaw')(x) - roll = Dense(class_num, activation='softmax', name='roll')(x) - - return [pitch, yaw, roll] + output = [] + for class_num in class_nums: + pitch = Dense(class_num, activation='softmax')(x) + yaw = Dense(class_num, activation='softmax')(x) + roll = Dense(class_num, activation='softmax')(x) + output += [pitch,yaw,roll] + + return output return func diff --git a/models/ModelBase.py b/models/ModelBase.py index 9e17aae..2c2dc69 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -95,7 +95,7 @@ class ModelBase(object): if ask_batch_size and (self.iter == 0 or ask_override): default_batch_size = 0 if self.iter == 0 else self.options.get('batch_size',0) - self.options['batch_size'] = max(0, io.input_int("Batch_size (?:help skip:%d) : " % (default_batch_size), default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error and increases risk of model collapse during training. Tune this value for your videocard manually.")) + self.options['batch_size'] = max(0, io.input_int("Batch_size (?:help skip:%d) : " % (default_batch_size), default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) else: self.options['batch_size'] = self.options.get('batch_size', 0) diff --git a/models/Model_DEV_POSEEST/Model.py b/models/Model_DEV_POSEEST/Model.py index 1b793a4..6e097b1 100644 --- a/models/Model_DEV_POSEEST/Model.py +++ b/models/Model_DEV_POSEEST/Model.py @@ -44,18 +44,17 @@ class Model(ModelBase): if self.is_training_mode: f = SampleProcessor.TypeFlags face_type = f.FACE_TYPE_FULL if self.options['face_type'] == 'f' else f.FACE_TYPE_HALF - - normalize_vgg = False + self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, sample_process_options=SampleProcessor.Options( rotation_range=[0,0], motion_blur = [25, 1] ), #random_flip=True, - output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE | f.OPT_APPLY_MOTION_BLUR, self.resolution, {'normalize_vgg':normalize_vgg} ], + output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE | f.OPT_APPLY_MOTION_BLUR, self.resolution ], [f.PITCH_YAW_ROLL], ]), - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, - output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE, self.resolution, {'normalize_vgg':normalize_vgg} ], + output_sample_types=[ [f.TRANSFORMED | face_type | f.MODE_BGR_SHUFFLE, self.resolution ], [f.PITCH_YAW_ROLL], ]) ]) @@ -68,9 +67,9 @@ class Model(ModelBase): def onTrainOneIter(self, generators_samples, generators_list): target_src, pitch_yaw_roll = generators_samples[0] - loss = self.pose_est.train_on_batch( target_src, pitch_yaw_roll ) + pitch_loss,yaw_loss,roll_loss = self.pose_est.train_on_batch( target_src, pitch_yaw_roll ) - return ( ('loss', loss), ) + return ( ('pitch_loss', pitch_loss), ('yaw_loss', yaw_loss), ('roll_loss', roll_loss) ) #override def onGetPreview(self, generators_samples): diff --git a/models/Model_SAE/Model.py b/models/Model_SAE/Model.py index 15971aa..1eb2c07 100644 --- a/models/Model_SAE/Model.py +++ b/models/Model_SAE/Model.py @@ -248,9 +248,7 @@ class SAEModel(ModelBase): psd_target_dst_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] - - alpha_rec = 100 - + if self.is_training_mode: self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, tf_cpu_mode=self.options['optimizer_mode']-1) self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, tf_cpu_mode=self.options['optimizer_mode']-1) @@ -265,9 +263,9 @@ class SAEModel(ModelBase): src_dst_mask_loss_train_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights if not self.options['pixel_loss']: - src_loss_batch = sum([ ( alpha_rec*K.square( dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_ar_opt[i], pred_src_src_masked_ar_opt[i] ) )) for i in range(len(target_src_masked_ar_opt)) ]) + src_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_ar_opt[i], pred_src_src_masked_ar_opt[i]) for i in range(len(target_src_masked_ar_opt)) ]) else: - src_loss_batch = sum([ K.mean ( alpha_rec*K.square( target_src_masked_ar_opt[i] - pred_src_src_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar_opt)) ]) + src_loss_batch = sum([ K.mean ( 50*K.square( target_src_masked_ar_opt[i] - pred_src_src_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_src_masked_ar_opt)) ]) src_loss = K.mean(src_loss_batch) @@ -279,15 +277,15 @@ class SAEModel(ModelBase): bg_style_power = self.options['bg_style_power'] / 100.0 if bg_style_power != 0: if not self.options['pixel_loss']: - bg_loss = K.mean( (alpha_rec*bg_style_power)*K.square(dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] ))) + bg_loss = K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked_ar[-1], target_dst_anti_masked_ar[-1] )) else: - bg_loss = K.mean( (alpha_rec*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] )) + bg_loss = K.mean( (50*bg_style_power)*K.square( psd_target_dst_anti_masked_ar[-1] - target_dst_anti_masked_ar[-1] )) src_loss += bg_loss if not self.options['pixel_loss']: - dst_loss_batch = sum([ ( alpha_rec*K.square(dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_dst_masked_ar_opt[i], pred_dst_dst_masked_ar_opt[i] ) )) for i in range(len(target_dst_masked_ar_opt)) ]) + dst_loss_batch = sum([ 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_ar_opt[i], pred_dst_dst_masked_ar_opt[i]) for i in range(len(target_dst_masked_ar_opt)) ]) else: - dst_loss_batch = sum([ K.mean ( alpha_rec*K.square( target_dst_masked_ar_opt[i] - pred_dst_dst_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar_opt)) ]) + dst_loss_batch = sum([ K.mean ( 50*K.square( target_dst_masked_ar_opt[i] - pred_dst_dst_masked_ar_opt[i] ), axis=[1,2,3]) for i in range(len(target_dst_masked_ar_opt)) ]) dst_loss = K.mean(dst_loss_batch)