_

2025-07-06 13:02:15 -07:00 · 2019-04-25 14:46:39 +04:00 · 2019-04-25 14:46:39 +04:00 · 0e088f6415
commit 0e088f6415
parent c0a63addd4
4 changed files with 272 additions and 123 deletions
--- a/facelib/LandmarksProcessor.py
+++ b/facelib/LandmarksProcessor.py
@ -331,7 +331,7 @@ def calc_face_yaw(landmarks):
    r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
    return float(r-l)

-#returns pitch,yaw [-1...+1]
+#returns pitch,yaw,roll [-1...+1]
 def estimate_pitch_yaw_roll(aligned_256px_landmarks):
    shape = (256,256)
    focal_length = shape[1]
@ -351,4 +351,4 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):
    pitch = np.clip ( pitch*1.25, -1.0, 1.0 )
    yaw = np.clip ( yaw*1.25, -1.0, 1.0 )
    roll = np.clip ( roll*1.25, -1.0, 1.0 )
-    return pitch, yaw, roll
+    return -pitch, yaw, roll
--- a/facelib/PoseEstimator.py
+++ b/facelib/PoseEstimator.py
@ -19,36 +19,67 @@ class PoseEstimator(object):
    VERSION = 1
    def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False):
        exec( nnlib.import_all(), locals(), globals() )
+        self.resolution = resolution
        
-        self.angles = [90, 45, 30, 10, 2]
+        self.angles = [60, 45, 30, 10, 2]
        self.alpha_cat_losses = [7,5,3,1,1]
        self.class_nums = [ angle+1 for angle in self.angles ]
-        self.model = PoseEstimator.BuildModel(resolution, class_nums=self.class_nums)
-        
+        self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums)

        if weights_file_root is not None:
            weights_file_root = Path(weights_file_root)
        else:
            weights_file_root = Path(__file__).parent

-        self.weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
+        self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) )
+        self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) )
+        self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) )
        
-        if load_weights:
-            self.model.load_weights (str(self.weights_path))
-        else:
-            conv_weights_list = []
-            for layer in self.model.layers:
-                if type(layer) == keras.layers.Conv2D:
-                    conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights            
-            CAInitializerMP ( conv_weights_list )
-            
-        inp_t, = self.model.inputs
-        bins_t = self.model.outputs
+        self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
  
+        self.input_bgr_shape = (resolution, resolution, 3)
+        inp_t = Input (self.input_bgr_shape)
+        inp_mask_t = Input ( (resolution, resolution, 1) )
+        inp_real_t = Input (self.input_bgr_shape)
        inp_pitch_t = Input ( (1,) )
        inp_yaw_t = Input ( (1,) )
        inp_roll_t = Input ( (1,) )
        
+        if training:
+            latent_t = self.encoder(inp_t)
+            bgr_t = self.decoder (latent_t)        
+            bins_t = self.model_l(latent_t)
+        else:
+            self.model = Model(inp_t, self.model_l(self.encoder(inp_t)) )
+            bins_t = self.model(inp_t)
+        
+        
+        if load_weights:
+            if training:
+                self.encoder.load_weights (str(self.encoder_weights_path))
+                self.decoder.load_weights (str(self.decoder_weights_path))
+                self.model_l.load_weights (str(self.l_weights_path))
+            else:
+                self.model.load_weights (str(self.model_weights_path))
+                
+        else:
+            def gather_Conv2D_layers(models_list):
+                conv_weights_list = []
+                for model in models_list:
+                    for layer in model.layers:
+                        layer_type = type(layer)
+                        if layer_type == keras.layers.Conv2D:
+                            conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights            
+                        elif layer_type == keras.engine.training.Model:
+                            conv_weights_list += gather_Conv2D_layers ([layer])
+                return conv_weights_list
+                        
+            CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) )
+            
+        idx_tensor =  K.constant( np.array([idx for idx in range(self.class_nums[0])], dtype=K.floatx() ) )
+        pitch_t, yaw_t, roll_t = K.sum ( bins_t[0] * idx_tensor, 1), K.sum (bins_t[1] * idx_tensor, 1), K.sum ( bins_t[2] * idx_tensor, 1)
+     
+        if training:
            inp_bins_t = []
            for class_num in self.class_nums:
                inp_bins_t += [ Input ((class_num,)), Input ((class_num,)), Input ((class_num,)) ]
@ -63,15 +94,9 @@ class PoseEstimator(object):
                loss_yaw   += [ a*K.categorical_crossentropy( inp_bins_t[i*3+1], bins_t[i*3+1] ) ]
                loss_roll  += [ a*K.categorical_crossentropy( inp_bins_t[i*3+2], bins_t[i*3+2] ) ]
        
-        idx_tensor =  K.constant( np.array([idx for idx in range(self.class_nums[0])], dtype=K.floatx() ) )
-        #pitch_t, yaw_t, roll_t = K.sum ( bins_t[0] * idx_tensor, 1), K.sum ( bins_t[1] * idx_tensor, 1), K.sum ( bins_t[2] * idx_tensor, 1)
+            bgr_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( inp_real_t*inp_mask_t, bgr_t*inp_mask_t) )
            
-        pitch_t, yaw_t, roll_t = nnlib.tf.reduce_sum ( bins_t[0] * idx_tensor, 1), nnlib.tf.reduce_sum ( bins_t[1] * idx_tensor, 1), nnlib.tf.reduce_sum ( bins_t[2] * idx_tensor, 1)
-        
-        
-        
-        
-        reg_alpha = 2
+            reg_alpha = 0.01
            reg_pitch_loss = reg_alpha * K.mean(K.square( inp_pitch_t - pitch_t), -1)        
            reg_yaw_loss   = reg_alpha * K.mean(K.square( inp_yaw_t - yaw_t), -1)                        
            reg_roll_loss  = reg_alpha * K.mean(K.square( inp_roll_t - roll_t), -1)
@ -79,11 +104,14 @@ class PoseEstimator(object):
            pitch_loss = reg_pitch_loss + sum(loss_pitch)
            yaw_loss   = reg_yaw_loss   + sum(loss_yaw)
            roll_loss  = reg_roll_loss  + sum(loss_roll)
-        opt = Adam(lr=0.000001)
            
-        if training:
-            self.train = K.function ([inp_t, inp_pitch_t, inp_yaw_t, inp_roll_t] + inp_bins_t,
-                                     [K.mean(pitch_loss),K.mean(yaw_loss),K.mean(roll_loss)], opt.get_updates( [pitch_loss,yaw_loss,roll_loss], self.model.trainable_weights) )
+            
+            self.train = K.function ([inp_t, inp_real_t, inp_mask_t],
+                                     [bgr_loss], Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates( bgr_loss, self.encoder.trainable_weights+self.decoder.trainable_weights ) )
+            
+            self.train_l = K.function ([inp_t, inp_pitch_t, inp_yaw_t, inp_roll_t] + inp_bins_t,
+                                     [K.mean(pitch_loss),K.mean(yaw_loss),K.mean(roll_loss)], Adam(lr=0.000001).get_updates( [pitch_loss,yaw_loss,roll_loss], self.model_l.trainable_weights) )
+

        self.view = K.function ([inp_t], [pitch_t, yaw_t, roll_t] )
            
@ -94,13 +122,22 @@ class PoseEstimator(object):
        return False #pass exception between __enter__ and __exit__ to outter level

    def save_weights(self):
-        self.model.save_weights (str(self.weights_path))
+        self.encoder.save_weights (str(self.encoder_weights_path))
+        self.decoder.save_weights (str(self.decoder_weights_path))
+        self.model_l.save_weights (str(self.l_weights_path))
        
-    def train_on_batch(self, imgs, pitch_yaw_roll):
+        inp_t = Input (self.input_bgr_shape)
+        Model(inp_t, self.model_l(self.encoder(inp_t)) ).save_weights (str(self.model_weights_path)) 
+
+    def train_on_batch(self, warps, imgs, masks, pitch_yaw_roll, skip_bgr_train=False):
        pyr = pitch_yaw_roll+1

-        feed = [imgs]
+        if not skip_bgr_train:
+            bgr_loss, = self.train( [warps, imgs, masks] )
+        else:
+            bgr_loss = 0
        
+        feed = [imgs]
        for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)):
            c = np.round(pyr * (angle / 2) ).astype(K.floatx())
            inp_pitch = c[:,0:1]
@ -113,11 +150,9 @@ class PoseEstimator(object):
            inp_yaw_bins = keras.utils.to_categorical(inp_yaw, class_num )
            inp_roll_bins = keras.utils.to_categorical(inp_roll, class_num )
            feed += [inp_pitch_bins, inp_yaw_bins, inp_roll_bins] 
-            #import code
-            #code.interact(local=dict(globals(), **locals()))

-        pitch_loss,yaw_loss,roll_loss = self.train(feed)
-        return pitch_loss,yaw_loss,roll_loss
+        pitch_loss,yaw_loss,roll_loss = self.train_l(feed)
+        return bgr_loss, pitch_loss, yaw_loss, roll_loss

    def extract (self, input_image, is_input_tanh=False):
        if is_input_tanh:
@ -137,47 +172,139 @@ class PoseEstimator(object):
        return result

    @staticmethod
-    def BuildModel ( resolution, class_nums):
+    def BuildModels ( resolution, class_nums):
        exec( nnlib.import_all(), locals(), globals() )
-        inp = Input ( (resolution,resolution,3) )
-        x = inp
-        x = PoseEstimator.Flow(class_nums=class_nums)(x)
-        model = Model(inp,x)
-        return model
+        
+        x = inp = Input ( (resolution,resolution,3) )
+        x = PoseEstimator.EncFlow()(x)
+        encoder = Model(inp,x)
+        
+        x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
+        x = PoseEstimator.DecFlow(resolution)(x)
+        decoder = Model(inp,x)
+        
+        x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
+        x = PoseEstimator.LatentFlow(class_nums=class_nums)(x)
+        model_l = Model(inp, x )
+        
+        return encoder, decoder, model_l

    @staticmethod
-    def Flow(class_nums):
+    def EncFlow():
        exec( nnlib.import_all(), locals(), globals() )

+        XConv2D = partial(Conv2D, padding='zero')
+        
+        def Act(lrelu_alpha=0.1):
+            return LeakyReLU(alpha=lrelu_alpha)
+            
+        def downscale (dim, **kwargs):
+            def func(x):
+                return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
+            return func
+            
+        def upscale (dim, **kwargs):
+            def func(x):
+                return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
+            return func
+            
+        def to_bgr (output_nc, **kwargs):
+            def func(x):
+                return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
+            return func
+            
+        upscale = partial(upscale)
+        downscale = partial(downscale)
+        ae_dims = 512
+        def func(input):
+            x = input
+            x = downscale(64)(x)
+            x = downscale(128)(x)
+            x = downscale(256)(x)
+            x = downscale(512)(x)            
+            x = Dense(ae_dims, name="latent", use_bias=False)(Flatten()(x))            
+            x = Lambda ( lambda x: x + 0.1*K.random_normal(K.shape(x), 0, 1) , output_shape=(None,ae_dims) ) (x)            
+            return x
+            
+        return func
+        
+    @staticmethod
+    def DecFlow(resolution):
+        exec( nnlib.import_all(), locals(), globals() )
+
+        XConv2D = partial(Conv2D, padding='zero')
+        
+        def Act(lrelu_alpha=0.1):
+            return LeakyReLU(alpha=lrelu_alpha)
+            
+        def downscale (dim, **kwargs):
+            def func(x):
+                return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
+            return func
+            
+        def upscale (dim, **kwargs):
+            def func(x):
+                return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
+            return func
+            
+        def to_bgr (output_nc, **kwargs):
+            def func(x):
+                return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
+            return func
+            
+        upscale = partial(upscale)
+        downscale = partial(downscale)
+        lowest_dense_res = resolution // 16
+        
        def func(input):
            x = input

-            # resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg')
-            # x = resnet50(x)
-            # output = []
-            # for class_num in class_nums:
-            #     pitch = Dense(class_num, activation='softmax')(x)
-            #     yaw = Dense(class_num, activation='softmax')(x)
-            #     roll = Dense(class_num, activation='softmax')(x)
-            #     output += [pitch,yaw,roll]
+            x = Dense(lowest_dense_res * lowest_dense_res * 256, use_bias=False)(x)
+            x = Reshape((lowest_dense_res, lowest_dense_res, 256))(x)
            
-            # return output
+            x = upscale(512)(x)
+            x = upscale(256)(x)
+            x = upscale(128)(x)
+            x = upscale(64)(x)
+            bgr = to_bgr(3)(x)                
+            return [bgr]
+        return func
        
-            x = Conv2D(64, kernel_size=11, strides=4, padding='same', activation='relu')(x)
-            x = MaxPooling2D( (3,3), strides=2 )(x)
+    @staticmethod
+    def LatentFlow(class_nums):
+        exec( nnlib.import_all(), locals(), globals() )

-            x = Conv2D(192, kernel_size=5, strides=1, padding='same', activation='relu')(x)
-            x = MaxPooling2D( (3,3), strides=2 )(x)
+        XConv2D = partial(Conv2D, padding='zero')
        
-            x = Conv2D(384, kernel_size=3, strides=1, padding='same', activation='relu')(x)
-            x = Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu')(x)
-            x = Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu')(x)
-            x = MaxPooling2D( (3,3), strides=2 )(x)
+        def Act(lrelu_alpha=0.1):
+            return LeakyReLU(alpha=lrelu_alpha)
+            
+        def downscale (dim, **kwargs):
+            def func(x):
+                return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
+            return func
+            
+        def upscale (dim, **kwargs):
+            def func(x):
+                return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
+            return func
+            
+        def to_bgr (output_nc, **kwargs):
+            def func(x):
+                return XConv2D(output_nc, kernel_size=5, use_bias=True, activation='sigmoid')(x)
+            return func
+            
+        upscale = partial(upscale)
+        downscale = partial(downscale)
+        
+        def func(latent):
+            x = latent

-            x = Flatten()(x)
            x = Dense(1024, activation='relu')(x)
            x = Dropout(0.5)(x)
-            x = Dense(1024, activation='relu')(x)
+            x = Dense(2048, activation='relu')(x)
+            x = Dropout(0.5)(x)
+            x = Dense(4096, activation='relu')(x)
            
            output = []
            for class_num in class_nums:
@ -188,4 +315,18 @@ class PoseEstimator(object):
                
            return output
            
+            #y = Dropout(0.5)(y)
+            #y = Dense(1024, activation='relu')(y)
        return func
+        
+                
+# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg')
+# x = resnet50(x)
+# output = []
+# for class_num in class_nums:
+#     pitch = Dense(class_num)(x)
+#     yaw = Dense(class_num)(x)
+#     roll = Dense(class_num)(x)
+#     output += [pitch,yaw,roll]
+    
+# return output
--- a/models/Model_DEV_POSEEST/Model.py
+++ b/models/Model_DEV_POSEEST/Model.py
@ -20,12 +20,20 @@ class Model(ModelBase):

    #override
    def onInitializeOptions(self, is_first_run, ask_override):
+        yn_str = {True:'y',False:'n'}
+
        default_face_type = 'f'
        if is_first_run:
            self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
        else:
            self.options['face_type'] = self.options.get('face_type', default_face_type)

+        def_train_bgr = self.options.get('train_bgr', True)
+        if is_first_run or ask_override:
+            self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr)
+        else:
+            self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr)
+
    #override
    def onInitialize(self):
        exec(nnlib.import_all(), locals(), globals())
@ -48,7 +56,9 @@ class Model(ModelBase):
            self.set_training_data_generators ([
                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
                            sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
-                            output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
+                            output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
+                                                  {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
+                                                  {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_M, t.FACE_MASK_FULL), 'resolution':self.resolution },
                                                  {'types': (t.IMG_PITCH_YAW_ROLL,)}
                                                ]),

@ -65,16 +75,16 @@ class Model(ModelBase):

    #override
    def onTrainOneIter(self, generators_samples, generators_list):
-        target_src, pitch_yaw_roll = generators_samples[0]
+        target_srcw, target_src, target_srcm, pitch_yaw_roll = generators_samples[0]

-        pitch_loss,yaw_loss,roll_loss = self.pose_est.train_on_batch( target_src, pitch_yaw_roll )
+        bgr_loss, pitch_loss,yaw_loss,roll_loss = self.pose_est.train_on_batch( target_srcw, target_src, target_srcm, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )

-        return ( ('pitch_loss', pitch_loss), ('yaw_loss', yaw_loss), ('roll_loss', roll_loss) )
+        return ( ('bgr_loss', bgr_loss), ('pitch_loss', pitch_loss), ('yaw_loss', yaw_loss), ('roll_loss', roll_loss) )

    #override
    def onGetPreview(self, generators_samples):
-        test_src     = generators_samples[0][0][0:4] #first 4 samples
-        test_pyr_src = generators_samples[0][1][0:4]
+        test_src     = generators_samples[0][1][0:4] #first 4 samples
+        test_pyr_src = generators_samples[0][3][0:4]
        test_dst     = generators_samples[1][0][0:4]
        test_pyr_dst = generators_samples[1][1][0:4]

--- a/samplelib/SampleProcessor.py
+++ b/samplelib/SampleProcessor.py
@ -99,12 +99,6 @@ class SampleProcessor(object):
        if debug and is_face_sample:
            LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0))

-        close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None
-        close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None
-
-        if debug and close_sample_bgr is not None:
-            LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0))
-
        params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range )

        cached_images = collections.defaultdict(dict)
@ -265,7 +259,11 @@ class SampleProcessor(object):
            return outputs

 """
+        close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None
+        close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None

+        if debug and close_sample_bgr is not None:
+            LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0))
        RANDOM_CLOSE               = 0x00000040, #currently unused
        MORPH_TO_RANDOM_CLOSE      = 0x00000080, #currently unused