Added interactive converter.

With interactive converter you can change any parameter of any frame and see the result in real time. Converter: added motion_blur_power param. Motion blur is applied by precomputed motion vectors. So the moving face will look more realistic. RecycleGAN model is removed. Added experimental AVATAR model. Minimum required VRAM is 6GB (NVIDIA), 12GB (AMD) Usage: 1) place data_src.mp4 10-20min square resolution video of news reporter sitting at the table with static background, other faces should not appear in frames. 2) process "extract images from video data_src.bat" with FULL fps 3) place data_dst.mp4 video of face who will control the src face 4) process "extract images from video data_dst FULL FPS.bat" 5) process "data_src mark faces S3FD best GPU.bat" 6) process "data_dst extract unaligned faces S3FD best GPU.bat" 7) train AVATAR.bat stage 1, tune batch size to maximum for your card (32 for 6GB), train to 50k+ iters. 8) train AVATAR.bat stage 2, tune batch size to maximum for your card (4 for 6GB), train to decent sharpness. 9) convert AVATAR.bat 10) converted to mp4.bat updated versions of modules
2025-07-06 13:02:15 -07:00 · 2019-08-24 12:57:29 +04:00 · 2019-08-24 12:57:29 +04:00 · 407ce3b1ca
commit 407ce3b1ca
parent 3f0bf2e994
46 changed files with 2394 additions and 1659 deletions
--- a/models/Model_SAE/Model.py
+++ b/models/Model_SAE/Model.py
@ -24,7 +24,7 @@ class SAEModel(ModelBase):
    #override
    def onInitializeOptions(self, is_first_run, ask_override):
        yn_str = {True:'y',False:'n'}
-        
+
        default_resolution = 128
        default_archi = 'df'
        default_face_type = 'f'
@ -90,20 +90,20 @@ class SAEModel(ModelBase):

            default_apply_random_ct = False if is_first_run else self.options.get('apply_random_ct', False)
            self.options['apply_random_ct'] = io.input_bool ("Apply random color transfer to src faceset? (y/n, ?:help skip:%s) : " % (yn_str[default_apply_random_ct]), default_apply_random_ct, help_message="Increase variativity of src samples by apply LCT color transfer from random dst samples. It is like 'face_style' learning, but more precise color transfer and without risk of model collapse, also it does not require additional GPU resources, but the training time may be longer, due to the src faceset is becoming more diverse.")
-            
+
            if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301
                default_clipgrad = False if is_first_run else self.options.get('clipgrad', False)
                self.options['clipgrad'] = io.input_bool ("Enable gradient clipping? (y/n, ?:help skip:%s) : " % (yn_str[default_clipgrad]), default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
            else:
                self.options['clipgrad'] = False
-                
+
        else:
            self.options['pixel_loss'] = self.options.get('pixel_loss', False)
            self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power)
            self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power)
            self.options['apply_random_ct'] = self.options.get('apply_random_ct', False)
            self.options['clipgrad'] = self.options.get('clipgrad', False)
-            
+
        if is_first_run:
            self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.")
        else:
@ -383,7 +383,7 @@ class SAEModel(ModelBase):
                                              [ {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution // (2**i)} for i in range(ms_count)] + \
                                              [ {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution // (2**i) } for i in range(ms_count)])
                    ])
-                    
+
    #override
    def get_model_filename_list(self):
        ar = []
@ -413,7 +413,7 @@ class SAEModel(ModelBase):
                ar += [ [self.decoder_srcm, 'decoder_srcm.h5'],
                        [self.decoder_dstm, 'decoder_dstm.h5'] ]
        return ar
-        
+
    #override
    def onSave(self):
        self.save_weights_safe( self.get_model_filename_list() )
@ -469,17 +469,20 @@ class SAEModel(ModelBase):

        return result

-    def predictor_func (self, face):
-        if self.options['learn_mask']:
-            bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]])
-            mask = mask_dst_dstm[0] * mask_src_dstm[0]
-            return bgr[0], mask[...,0]
+    def predictor_func (self, face=None, dummy_predict=False):
+        if dummy_predict:
+            self.AE_convert ([ np.zeros ( (1, self.options['resolution'], self.options['resolution'], 3), dtype=np.float32 ) ])
        else:
-            bgr, = self.AE_convert ([face[np.newaxis,...]])
-            return bgr[0]
+            if self.options['learn_mask']:
+                bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]])
+                mask = mask_dst_dstm[0] * mask_src_dstm[0]
+                return bgr[0], mask[...,0]
+            else:
+                bgr, = self.AE_convert ([face[np.newaxis,...]])
+                return bgr[0]

    #override
-    def get_converter(self):
+    def get_ConverterConfig(self):
        base_erode_mask_modifier = 30 if self.options['face_type'] == 'f' else 100
        base_blur_mask_modifier = 0 if self.options['face_type'] == 'f' else 100

@ -489,17 +492,18 @@ class SAEModel(ModelBase):

        face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF

-        from converters import ConverterMasked
-        return ConverterMasked(self.predictor_func,
-                               predictor_input_size=self.options['resolution'],
-                               predictor_masked=self.options['learn_mask'],
-                               face_type=face_type,
-                               default_mode = 1 if self.options['apply_random_ct'] or self.options['face_style_power'] or self.options['bg_style_power'] else 4,
-                               base_erode_mask_modifier=base_erode_mask_modifier,
-                               base_blur_mask_modifier=base_blur_mask_modifier,
-                               default_erode_mask_modifier=default_erode_mask_modifier,
-                               default_blur_mask_modifier=default_blur_mask_modifier,
-                               clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0)
+        import converters
+        return converters.ConverterConfigMasked(predictor_func=self.predictor_func,
+                                     predictor_input_shape=(self.options['resolution'], self.options['resolution'], 3),
+                                     predictor_masked=self.options['learn_mask'],
+                                     face_type=face_type,
+                                     default_mode = 1 if self.options['apply_random_ct'] or self.options['face_style_power'] or self.options['bg_style_power'] else 4,
+                                     base_erode_mask_modifier=base_erode_mask_modifier,
+                                     base_blur_mask_modifier=base_blur_mask_modifier,
+                                     default_erode_mask_modifier=default_erode_mask_modifier,
+                                     default_blur_mask_modifier=default_blur_mask_modifier,
+                                     clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0,
+                                    )

    @staticmethod
    def initialize_nn_functions():
@ -545,7 +549,7 @@ class SAEModel(ModelBase):
                return Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=2, padding=padding)(x)) )
            return func
        SAEModel.downscale = downscale
-        
+
        #def downscale (dim, padding='zero', norm='', act='', **kwargs):
        #    def func(x):
        #        return BlurPool()( Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=1, padding=padding)(x)) ) )