added experimental face type 'whole_face'

Basic usage instruction: https://i.imgur.com/w7LkId2.jpg 'whole_face' requires skill in Adobe After Effects. For using whole_face you have to extract whole_face's by using 4) data_src extract whole_face and 5) data_dst extract whole_face Images will be extracted in 512 resolution, so they can be used for regular full_face's and half_face's. 'whole_face' covers whole area of face include forehead in training square, but training mask is still 'full_face' therefore it requires manual final masking and composing in Adobe After Effects. added option 'masked_training' This option is available only for 'whole_face' type. Default is ON. Masked training clips training area to full_face mask, thus network will train the faces properly. When the face is trained enough, disable this option to train all area of the frame. Merge with 'raw-rgb' mode, then use Adobe After Effects to manually mask, tune color, and compose whole face include forehead.
2025-08-20 13:33:24 -07:00 · 2020-02-21 16:21:04 +04:00 · 2020-02-21 16:21:04 +04:00 · f1d115b63b
commit f1d115b63b
parent 778fb94246
10 changed files with 74 additions and 58 deletions
--- a/core/leras/models.py
+++ b/core/leras/models.py
@ -302,4 +302,8 @@ patch_discriminator_kernels = \
      13 : [ [3,2], [4,2], [2,1] ],
      14 : [ [4,2], [4,2], [2,1] ],
      15 : [ [3,2], [3,2], [3,1] ],
-      16 : [ [4,2], [3,2], [3,1] ] }
+      16 : [ [4,2], [3,2], [3,1] ],
+      
+      
+      28 : [ [4,2], [3,2], [4,2], [2,1] ]
+       }
--- a/facelib/FaceType.py
+++ b/facelib/FaceType.py
@ -6,8 +6,9 @@ class FaceType(IntEnum):
    MID_FULL = 1
    FULL = 2
    FULL_NO_ALIGN = 3
-    HEAD = 4
-    HEAD_NO_ALIGN = 5
+    WHOLE_FACE = 4
+    HEAD = 5
+    HEAD_NO_ALIGN = 6

    MARK_ONLY = 10, #no align at all, just embedded faceinfo

@ -25,6 +26,7 @@ class FaceType(IntEnum):
 from_string_dict = {'half_face': FaceType.HALF,
                    'midfull_face': FaceType.MID_FULL,
                    'full_face': FaceType.FULL,
+                    'whole_face': FaceType.WHOLE_FACE,
                    'head' : FaceType.HEAD,
                    'mark_only' : FaceType.MARK_ONLY,
                    'full_face_no_align' : FaceType.FULL_NO_ALIGN,
@ -33,6 +35,7 @@ from_string_dict = {'half_face': FaceType.HALF,
 to_string_dict = { FaceType.HALF : 'half_face',
                   FaceType.MID_FULL : 'midfull_face',
                   FaceType.FULL : 'full_face',
+                   FaceType.WHOLE_FACE : 'whole_face',
                   FaceType.HEAD : 'head',
                   FaceType.MARK_ONLY :'mark_only',
                   FaceType.FULL_NO_ALIGN : 'full_face_no_align',
--- a/facelib/LandmarksProcessor.py
+++ b/facelib/LandmarksProcessor.py
@ -188,8 +188,9 @@ FaceType_to_padding_remove_align = {
    FaceType.MID_FULL: (0.0675, False),
    FaceType.FULL: (0.2109375, False),
    FaceType.FULL_NO_ALIGN: (0.2109375, True),
-    FaceType.HEAD: (0.369140625, False),
-    FaceType.HEAD_NO_ALIGN: (0.369140625, True),
+    FaceType.WHOLE_FACE: (0.40, False),
+    FaceType.HEAD: (1.0, False),
+    FaceType.HEAD_NO_ALIGN: (1.0, True),
 }

 def convert_98_to_68(lmrks):
@ -257,30 +258,37 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
    mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2]
    
    # get corner points in global space
-    l_p = transform_points (  np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True)
-    l_c = l_p[4]
+    g_p = transform_points (  np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5) ]) , mat, True)
+    g_c = g_p[4]

    # calc diagonal vectors between corners in global space
-    tb_diag_vec = (l_p[2]-l_p[0]).astype(np.float32)
+    tb_diag_vec = (g_p[2]-g_p[0]).astype(np.float32)
    tb_diag_vec /= npla.norm(tb_diag_vec)
-    bt_diag_vec = (l_p[1]-l_p[3]).astype(np.float32)
+    bt_diag_vec = (g_p[1]-g_p[3]).astype(np.float32)
    bt_diag_vec /= npla.norm(bt_diag_vec)

    # calc modifier of diagonal vectors for scale and padding value
    padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
-    mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) )
+    mod = (1.0 / scale)* ( npla.norm(g_p[0]-g_p[2])*(padding*np.sqrt(2.0) + 0.5) )
+    
+    if face_type == FaceType.WHOLE_FACE:
+        vec = (g_p[0]-g_p[3]).astype(np.float32)
+        vec_len = npla.norm(vec)
+        vec /= vec_len
+        
+        g_c += vec*vec_len*0.07
    
    # calc 3 points in global space to estimate 2d affine transform 
    if not remove_align:
-        l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
-                          np.round( l_c + bt_diag_vec*mod ),
-                          np.round( l_c + tb_diag_vec*mod ) ] )
+        l_t = np.array( [ np.round( g_c - tb_diag_vec*mod ),
+                          np.round( g_c + bt_diag_vec*mod ),
+                          np.round( g_c + tb_diag_vec*mod ) ] )
    else:
        # remove_align - face will be centered in the frame but not aligned
-        l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
-                          np.round( l_c + bt_diag_vec*mod ),
-                          np.round( l_c + tb_diag_vec*mod ),
-                          np.round( l_c - bt_diag_vec*mod ),
+        l_t = np.array( [ np.round( g_c - tb_diag_vec*mod ),
+                          np.round( g_c + bt_diag_vec*mod ),
+                          np.round( g_c + tb_diag_vec*mod ),
+                          np.round( g_c - bt_diag_vec*mod ),
                         ] )

        # get area of face square in global space
@ -290,9 +298,9 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
        side = np.float32(math.sqrt(area) / 2)
        
        # calc 3 points with unrotated square
-        l_t = np.array( [ np.round( l_c + [-side,-side] ),
-                          np.round( l_c + [ side,-side] ),
-                          np.round( l_c + [ side, side] ) ] )
+        l_t = np.array( [ np.round( g_c + [-side,-side] ),
+                          np.round( g_c + [ side,-side] ),
+                          np.round( g_c + [ side, side] ) ] )

    # calc affine transform from 3 global space points to 3 local space points size of 'output_size'
    pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) ))
@ -658,18 +666,11 @@ def calc_face_pitch(landmarks):
    b = landmarks[8][1]
    return float(b-t)
    
-def calc_face_yaw(landmarks):
-    if not isinstance(landmarks, np.ndarray):
-        landmarks = np.array (landmarks)
-    l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0
-    r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
-    return float(r-l)
-
-def estimate_pitch_yaw_roll(aligned_256px_landmarks):
+def estimate_pitch_yaw_roll(aligned_landmarks, size=256):
    """
    returns pitch,yaw,roll [-pi...+pi]
    """
-    shape = (256,256)
+    shape = (size,size)
    focal_length = shape[1]
    camera_center = (shape[1] / 2, shape[0] / 2)
    camera_matrix = np.array(
@ -679,7 +680,7 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):

    (_, rotation_vector, translation_vector) = cv2.solvePnP(
        landmarks_68_3D,
-        aligned_256px_landmarks.astype(np.float32),
+        aligned_landmarks.astype(np.float32),
        camera_matrix,
        np.zeros((4, 1)) )

@ -690,7 +691,6 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):

    return -pitch, yaw, roll
    
-
 #if remove_align:
 #    bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True)
 #    #import code
--- a/main.py
+++ b/main.py
@ -47,7 +47,7 @@ if __name__ == "__main__":
    p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.")
    p.add_argument('--output-debug', action="store_true", dest="output_debug", default=None, help="Writes debug images to <output-dir>_debug\ directory.")
    p.add_argument('--no-output-debug', action="store_false", dest="output_debug", default=None, help="Don't writes debug images to <output-dir>_debug\ directory.")
-    p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'full_face_no_align', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")
+    p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'whole_face', 'head', 'full_face_no_align', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")
    p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.")
    p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.")
    p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.")
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@ -680,7 +680,6 @@ def main(detector=None,
         manual_fix=False,
         manual_output_debug_fix=False,
         manual_window_size=1368,
-         image_size=256,
         face_type='full_face',
         max_faces_from_image=0,
         cpu_only = False,
@ -688,6 +687,8 @@ def main(detector=None,
         ):
    face_type = FaceType.fromString(face_type)

+    image_size = 512 if face_type == FaceType.WHOLE_FACE else 256
+    
    if not input_path.exists():
        io.log_err ('Input directory not found. Please ensure it exists.')
        return
@ -710,7 +711,7 @@ def main(detector=None,
        if not manual_output_debug_fix and input_path != output_path:
            output_images_paths = pathex.get_image_paths(output_path)
            if len(output_images_paths) > 0:
-                io.input(f"WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.")
+                io.input(f"\n WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.\n")
                for filename in output_images_paths:
                    Path(filename).unlink()
    else:
--- a/mainscripts/dev_misc.py
+++ b/mainscripts/dev_misc.py
@ -394,7 +394,6 @@ def extract_fanseg(input_dir, device_args={} ):

 #unused in end user workflow
 def extract_umd_csv(input_file_csv,
-                    image_size=256,
                    face_type='full_face',
                    device_args={} ):

@ -456,7 +455,7 @@ def extract_umd_csv(input_file_csv,
        data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run()

        io.log_info ('Performing 3rd pass...')
-        data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
+        data = ExtractSubprocessor (data, 'final', face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
        faces_detected += sum([d.faces_detected for d in data])


--- a/merger/MergerConfig.py
+++ b/merger/MergerConfig.py
@ -107,8 +107,6 @@ class MergerConfigMasked(MergerConfig):

    def __init__(self, face_type=FaceType.FULL,
                       default_mode = 'overlay',
-                       clip_hborder_mask_per = 0,
-
                       mode='overlay',
                       masked_hist_match=True,
                       hist_match_threshold = 238,
@ -128,11 +126,10 @@ class MergerConfigMasked(MergerConfig):
        super().__init__(type=MergerConfig.TYPE_MASKED, **kwargs)

        self.face_type = face_type
-        if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL ]:
+        if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL, FaceType.WHOLE_FACE ]:
            raise ValueError("MergerConfigMasked does not support this type of face.")

        self.default_mode = default_mode
-        self.clip_hborder_mask_per = clip_hborder_mask_per

        #default changeable params
        if mode not in mode_str_dict:
--- a/models/Model_Quick96/Model.py
+++ b/models/Model_Quick96/Model.py
@ -454,7 +454,6 @@ class QModel(ModelBase):
        import merger
        return self.predictor_func, (self.resolution, self.resolution, 3), merger.MergerConfigMasked(face_type=face_type,
                                     default_mode = 'overlay',
-                                     clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0,
                                    )

 Model = QModel
--- a/models/Model_SAEHD/Model.py
+++ b/models/Model_SAEHD/Model.py
@ -35,6 +35,7 @@ class SAEHDModel(ModelBase):
        default_e_dims             = self.options['e_dims']             = self.load_or_def_option('e_dims', 64)
        default_d_dims             = self.options['d_dims']             = self.options.get('d_dims', None)
        default_d_mask_dims        = self.options['d_mask_dims']        = self.options.get('d_mask_dims', None)
+        default_masked_training    = self.options['masked_training']    = self.load_or_def_option('masked_training', True)
        default_learn_mask         = self.options['learn_mask']         = self.load_or_def_option('learn_mask', True)
        default_eyes_prio          = self.options['eyes_prio']          = self.load_or_def_option('eyes_prio', False)
        default_lr_dropout         = self.options['lr_dropout']         = self.load_or_def_option('lr_dropout', False)
@ -59,7 +60,7 @@ class SAEHDModel(ModelBase):
            resolution = io.input_int("Resolution", default_resolution, add_info="64-256", help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.")
            resolution = np.clip ( (resolution // 16) * 16, 64, 256)
            self.options['resolution'] = resolution
-            self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f'], help_message="Half / mid face / full face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face.").lower()
+            self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf'], help_message="Half / mid face / full face / whole face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face. 'Whole face' covers full area of face include forehead, but requires manual merge in Adobe After Effects.").lower()
            self.options['archi'] = io.input_str ("AE architecture", default_archi, ['dfhd','liaehd','df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'hd' is heavyweight version for the best quality.").lower()

        default_d_dims             = 48 if self.options['archi'] == 'dfhd' else 64
@ -84,7 +85,10 @@ class SAEHDModel(ModelBase):
            self.options['d_mask_dims'] = d_mask_dims + d_mask_dims % 2

        if self.is_first_run() or ask_override:
-            self.options['learn_mask']  = io.input_bool ("Learn mask", default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case merger forced to use 'not predicted mask' that is not smooth as predicted.")
+            if self.options['face_type'] == 'wf':
+                self.options['masked_training']  = io.input_bool ("Masked training", default_masked_training, help_message="This option is available only for 'whole_face' type. Masked training clips training area to full_face mask, thus network will train the faces properly.  When the face is trained enough, disable this option to train all area of the frame. Merge with 'raw-rgb' mode, then use Adobe After Effects to manually mask and compose whole face include forehead.")
+            
+            self.options['learn_mask']  = io.input_bool ("Learn mask", default_learn_mask, help_message="Learning mask will produce a smooth mask in the merger. Also it works as guide for neural network to recognize face directions.")
            self.options['eyes_prio']  = io.input_bool ("Eyes priority", default_eyes_prio, help_message='Helps to fix eye problems during training like "alien eyes" and wrong eyes direction ( especially on HD architectures ) by forcing the neural network to train eyes with higher priority. before/after https://i.imgur.com/YQHOuSR.jpg ')
      
        if self.is_first_run() or ask_override:
@ -101,11 +105,15 @@ class SAEHDModel(ModelBase):
            else:
                self.options['true_face_power'] = 0.0

-            self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.001 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
-            self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 )
+            if self.options['face_type'] != 'wf':
+                self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.001 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
+                self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 )
+                
            self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.")
            self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
-            self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.")
+            
+            if self.options['face_type'] != 'wf':
+                self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.")

        if self.options['pretrain'] and self.get_pretraining_data_path() is None:
            raise Exception("pretraining_data_path is not defined")
@ -348,7 +356,7 @@ class SAEHDModel(ModelBase):

        self.gan_power = gan_power = self.options['gan_power'] if not self.pretrain else 0.0

-        masked_training = True
+        masked_training = self.options['masked_training']

        models_opt_on_gpu = False if len(devices) == 0 else True if len(devices) > 1 else self.options['models_opt_on_gpu']
        models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
@ -721,6 +729,8 @@ class SAEHDModel(ModelBase):
                face_type = t.FACE_TYPE_MID_FULL
            elif self.options['face_type'] == 'f':
                face_type = t.FACE_TYPE_FULL
+            elif self.options['face_type'] == 'wf':
+                face_type = t.FACE_TYPE_WHOLE_FACE
                
            training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path()
            training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path()
@ -838,11 +848,12 @@ class SAEHDModel(ModelBase):
            face_type = FaceType.MID_FULL
        elif self.options['face_type'] == 'f':
            face_type = FaceType.FULL
+        elif self.options['face_type'] == 'wf':
+            face_type = FaceType.WHOLE_FACE
            
        import merger
        return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), merger.MergerConfigMasked(face_type=face_type,
                                     default_mode = 'overlay' if self.options['ct_mode'] != 'none' or self.options['face_style_power'] or self.options['bg_style_power'] else 'seamless',
-                                     clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0,
                                    )

 Model = SAEHDModel
--- a/samplelib/SampleProcessor.py
+++ b/samplelib/SampleProcessor.py
@ -25,10 +25,11 @@ class SampleProcessor(object):
        FACE_TYPE_HALF             = 10
        FACE_TYPE_MID_FULL         = 11
        FACE_TYPE_FULL             = 12
-        FACE_TYPE_HEAD             = 13  #currently unused
-        FACE_TYPE_AVATAR           = 14  #currently unused
-        FACE_TYPE_FULL_NO_ALIGN    = 15
-        FACE_TYPE_HEAD_NO_ALIGN    = 16
+        FACE_TYPE_WHOLE_FACE       = 13
+        FACE_TYPE_HEAD             = 14  #currently unused
+        FACE_TYPE_AVATAR           = 15  #currently unused
+        FACE_TYPE_FULL_NO_ALIGN    = 16
+        FACE_TYPE_HEAD_NO_ALIGN    = 17
        FACE_TYPE_END = 20

        MODE_BEGIN = 40
@ -55,6 +56,7 @@ class SampleProcessor(object):
    SPTF_FACETYPE_TO_FACETYPE =  {  Types.FACE_TYPE_HALF : FaceType.HALF,
                                    Types.FACE_TYPE_MID_FULL : FaceType.MID_FULL,
                                    Types.FACE_TYPE_FULL : FaceType.FULL,
+                                    Types.FACE_TYPE_WHOLE_FACE : FaceType.WHOLE_FACE,
                                    Types.FACE_TYPE_HEAD : FaceType.HEAD,
                                    Types.FACE_TYPE_FULL_NO_ALIGN : FaceType.FULL_NO_ALIGN,
                                    Types.FACE_TYPE_HEAD_NO_ALIGN : FaceType.HEAD_NO_ALIGN,