_

2025-07-07 05:22:06 -07:00 · 2019-04-30 17:14:02 +04:00 · 2019-04-30 17:14:02 +04:00 · 659aa5705a
commit 659aa5705a
parent 1f1f94848b
2 changed files with 107 additions and 102 deletions
--- a/facelib/PoseEstimator.py
+++ b/facelib/PoseEstimator.py
@ -38,22 +38,32 @@ class PoseEstimator(object):
        self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
  
        self.input_bgr_shape = (resolution, resolution, 3)
+        
+        def ResamplerFunc(input):
+            mean_t, logvar_t = input
+            return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t))
+
+        self.BVAEResampler = Lambda ( lambda x: x[0] + K.exp(0.5*x[1])*K.random_normal(K.shape(x[0])),        
+                                        output_shape=K.int_shape(self.encoder.outputs[0])[1:] )
+
        inp_t = Input (self.input_bgr_shape)
-        inp_mask_t = Input ( (resolution, resolution, 1) )
        inp_real_t = Input (self.input_bgr_shape)
        inp_pitch_t = Input ( (1,) )
        inp_yaw_t = Input ( (1,) )
        inp_roll_t = Input ( (1,) )
        
+
+        mean_t, logvar_t = self.encoder(inp_t)
+    
+        latent_t = self.BVAEResampler([mean_t, logvar_t])
+        
        if training:
-            latent_t = self.encoder(inp_t)
            bgr_t = self.decoder (latent_t)        
            pyrs_t = self.model_l(latent_t)
        else:
-            self.model = Model(inp_t, self.model_l(self.encoder(inp_t)) )
+            self.model = Model(inp_t, self.model_l(latent_t) )
            pyrs_t = self.model(inp_t)
        
-        
        if load_weights:
            if training:
                self.encoder.load_weights (str(self.encoder_weights_path))
@ -88,19 +98,31 @@ class PoseEstimator(object):
                a = self.alpha_cat_losses[i]
                pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ]
    
-            bgr_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( inp_real_t*inp_mask_t, bgr_t*inp_mask_t) )
+            def BVAELoss(beta=4):
+                #keep in mind loss per sample, not per minibatch
+                def func(input):
+                    mean_t, logvar_t = input
+                    return beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True )
+                return func
+                
+            BVAE_loss = BVAELoss(4)([mean_t, logvar_t])#beta * K.mean ( K.sum( -0.5*(1 + logvar_t - K.exp(logvar_t) - K.square(mean_t)), axis=1 ), axis=0, keepdims=True )
+
+
+            bgr_loss = K.mean(K.square(inp_real_t-bgr_t), axis=0, keepdims=True)
+
+            #train_loss = BVAE_loss + bgr_loss
            
            pyr_loss = sum(pyr_loss)

            
-            self.train = K.function ([inp_t, inp_real_t, inp_mask_t],
-                                     [bgr_loss], Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates( bgr_loss, self.encoder.trainable_weights+self.decoder.trainable_weights ) )
+            self.train = K.function ([inp_t, inp_real_t],
+                                     [ K.mean (BVAE_loss)+K.mean(bgr_loss) ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( [BVAE_loss, bgr_loss], self.encoder.trainable_weights+self.decoder.trainable_weights ) )
            
            self.train_l = K.function ([inp_t] + inp_pyrs_t,
                                     [pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) )


-        self.view = K.function ([inp_t], [ pyrs_t[0] ] )
+            self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] )
     
    def __enter__(self):
        return self
@ -114,21 +136,25 @@ class PoseEstimator(object):
        self.model_l.save_weights (str(self.l_weights_path))
        
        inp_t = Input (self.input_bgr_shape)
-        Model(inp_t, self.model_l(self.encoder(inp_t)) ).save_weights (str(self.model_weights_path)) 

-    def train_on_batch(self, warps, imgs, masks, pitch_yaw_roll, skip_bgr_train=False):
+        Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path)) 
+
+    def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False):

        if not skip_bgr_train:
-            bgr_loss, = self.train( [warps, imgs, masks] )
+            bgr_loss, = self.train( [warps, imgs] )
+            pyr_loss = 0
        else:
            bgr_loss = 0      
              
            feed = [imgs]        
            for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)):
-            c = np.round( np.round(pitch_yaw_roll * angle)  / angle ) #.astype(K.floatx())
+                a = angle / 2
+                c = np.round( (pyr_tanh+1) * a )  / a -1 #.astype(K.floatx())
                feed += [c] 

            pyr_loss, = self.train_l(feed)
+            
        return bgr_loss, pyr_loss

    def extract (self, input_image, is_input_tanh=False):
@ -139,26 +165,27 @@ class PoseEstimator(object):
        if input_shape_len == 3:
            input_image = input_image[np.newaxis,...]

-        result, = self.view( [input_image] )
+        bgr, result, = self.view( [input_image] )
        
        
        #result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 )

        if input_shape_len == 3:
+            bgr = bgr[0]
            result = result[0]

-        return result
+        return bgr, result

    @staticmethod
-    def BuildModels ( resolution, class_nums):
+    def BuildModels ( resolution, class_nums, ae_dims=128):
        exec( nnlib.import_all(), locals(), globals() )
        
        x = inp = Input ( (resolution,resolution,3) )
-        x = PoseEstimator.EncFlow()(x)
+        x = PoseEstimator.EncFlow(ae_dims)(x)
        encoder = Model(inp,x)
        
        x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
-        x = PoseEstimator.DecFlow(resolution)(x)
+        x = PoseEstimator.DecFlow(resolution, ae_dims)(x)
        decoder = Model(inp,x)
        
        x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
@ -168,61 +195,52 @@ class PoseEstimator(object):
        return encoder, decoder, model_l

    @staticmethod
-    def EncFlow():
+    def EncFlow(ae_dims):
        exec( nnlib.import_all(), locals(), globals() )

        XConv2D = partial(Conv2D, padding='zero')
        
-        def Act(lrelu_alpha=0.1):
-            return LeakyReLU(alpha=lrelu_alpha)

        def downscale (dim, **kwargs):
            def func(x):
-                return Act() ( XConv2D(dim, kernel_size=5, strides=2)(x))
+                return ReLU() (  ( XConv2D(dim, kernel_size=4, strides=2)(x)) )
            return func
           
-        def upscale (dim, **kwargs):
-            def func(x):
-                return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
-            return func

-        def to_bgr (output_nc, **kwargs):
-            def func(x):
-                return XConv2D(output_nc, kernel_size=5, activation='sigmoid')(x)
-            return func
-            
-        upscale = partial(upscale)
        downscale = partial(downscale)
-        ae_dims = 512
+        
+        ed_ch_dims = 128
+
        def func(input):
            x = input
            x = downscale(64)(x)
            x = downscale(128)(x)
            x = downscale(256)(x)            
            x = downscale(512)(x)    
-            x = Dense(ae_dims, name="latent", use_bias=False)(Flatten()(x))            
-            x = Lambda ( lambda x: x + 0.1*K.random_normal(K.shape(x), 0, 1) , output_shape=(None,ae_dims) ) (x)            
-            return x
+            x = Flatten()(x)
+
+            x = Dense(256)(x)
+            x = ReLU()(x)
+            
+            x = Dense(256)(x)
+            x = ReLU()(x)
+
+            mean = Dense(ae_dims)(x)
+            logvar = Dense(ae_dims)(x)
+            
+            return mean, logvar
            
        return func
        
    @staticmethod
-    def DecFlow(resolution):
+    def DecFlow(resolution, ae_dims):
        exec( nnlib.import_all(), locals(), globals() )

        XConv2D = partial(Conv2D, padding='zero')
        
-        def Act(lrelu_alpha=0.1):
-            return LeakyReLU(alpha=lrelu_alpha)
-            
-        def downscale (dim, **kwargs):
+        def upscale (dim, strides=2, **kwargs):
            def func(x):
-                return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
-            return func
-            
-        def upscale (dim, **kwargs):
-            def func(x):
-                return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
+                return ReLU()(  ( Conv2DTranspose(dim, kernel_size=4, strides=strides, padding='same')(x)) )
            return func
            
        def to_bgr (output_nc, **kwargs):
@ -231,21 +249,29 @@ class PoseEstimator(object):
            return func
            
        upscale = partial(upscale)
-        downscale = partial(downscale)
        lowest_dense_res = resolution // 16

        def func(input):
            x = input
            
-            x = Dense(lowest_dense_res * lowest_dense_res * 256, use_bias=False)(x)
-            x = Reshape((lowest_dense_res, lowest_dense_res, 256))(x)
+            x = Dense(256)(x)
+            x = ReLU()(x)
+            
+            x = Dense(256)(x)
+            x = ReLU()(x)            
+            
+            x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x)      
+            x = ReLU()(x)   
+            
+            x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x)
            
            x = upscale(512)(x)            
            x = upscale(256)(x)
            x = upscale(128)(x)
            x = upscale(64)(x)
-            bgr = to_bgr(3)(x)                
-            return [bgr]
+            x = to_bgr(3)(x)           
+                 
+            return x
        return func
        
    @staticmethod
@ -254,39 +280,18 @@ class PoseEstimator(object):

        XConv2D = partial(Conv2D, padding='zero')

-        def Act(lrelu_alpha=0.1):
-            return LeakyReLU(alpha=lrelu_alpha)
-            
-        def downscale (dim, **kwargs):
-            def func(x):
-                return MaxPooling2D()( Act() ( XConv2D(dim, kernel_size=5, strides=1)(x)) )
-            return func
-            
-        def upscale (dim, **kwargs):
-            def func(x):
-                return SubpixelUpscaler()(Act()( XConv2D(dim * 4, kernel_size=3, strides=1)(x)))
-            return func
-            
-        def to_bgr (output_nc, **kwargs):
-            def func(x):
-                return XConv2D(output_nc, kernel_size=5, use_bias=True, activation='sigmoid')(x)
-            return func
-            
-        upscale = partial(upscale)
-        downscale = partial(downscale)
-        
        def func(latent):
            x = latent

            x = Dense(1024, activation='relu')(x)
            x = Dropout(0.5)(x)
-            x = Dense(2048, activation='relu')(x)
-            x = Dropout(0.5)(x)
-            x = Dense(4096, activation='relu')(x)
+            x = Dense(1024, activation='relu')(x)
+            # x = Dropout(0.5)(x)
+            # x = Dense(4096, activation='relu')(x)
            
            output = []
            for class_num in class_nums:
-                pyr = Dense(3, activation='sigmoid')(x)
+                pyr = Dense(3, activation='tanh')(x)
                output += [pyr]
                
            return output
--- a/models/Model_DEV_POSEEST/Model.py
+++ b/models/Model_DEV_POSEEST/Model.py
@ -37,7 +37,7 @@ class Model(ModelBase):
    #override
    def onInitialize(self):
        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements( {4:32} )
+        self.set_vram_batch_requirements( {4:64} )

        self.resolution = 128
        self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
@ -58,14 +58,13 @@ class Model(ModelBase):
                            sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
                            output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
                                                  {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
-                                                  {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_M, t.FACE_MASK_FULL), 'resolution':self.resolution },
-                                                  {'types': (t.IMG_PITCH_YAW_ROLL_SIGMOID,)}
+                                                  {'types': (t.IMG_PITCH_YAW_ROLL,)}
                                                ]),

                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
                            sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
-                            output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
-                                                  {'types': (t.IMG_PITCH_YAW_ROLL_SIGMOID,)}
+                            output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution },
+                                                  {'types': (t.IMG_PITCH_YAW_ROLL,)}
                                                ])
                                            ])

@ -75,16 +74,16 @@ class Model(ModelBase):

    #override
    def onTrainOneIter(self, generators_samples, generators_list):
-        target_srcw, target_src, target_srcm, pitch_yaw_roll = generators_samples[0]
+        target_srcw, target_src, pitch_yaw_roll = generators_samples[0]

-        bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, target_srcm, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )
+        bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )

        return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), )

    #override
    def onGetPreview(self, generators_samples):
        test_src     = generators_samples[0][1][0:4] #first 4 samples
-        test_pyr_src = generators_samples[0][3][0:4]
+        test_pyr_src = generators_samples[0][2][0:4]
        test_dst     = generators_samples[1][0][0:4]
        test_pyr_dst = generators_samples[1][1][0:4]

@ -94,7 +93,7 @@ class Model(ModelBase):
        result = []
        for name, img, pyr in [ ['training data', test_src, test_pyr_src],  \
                                ['evaluating data',test_dst, test_pyr_dst] ]:
-            pyr_pred = self.pose_est.extract(img)
+            bgr_pred, pyr_pred = self.pose_est.extract(img)
            
            hor_imgs = []
            for i in range(len(img)):
@ -112,6 +111,7 @@ class Model(ModelBase):

                hor_imgs.append ( np.concatenate ( (
                    img[i,:,:,0:3],
+                    bgr_pred[i],
                    img_info
                    ), axis=1) )