Merger:

added smooth_rect option default is ON. Decreases jitter of predicting rect by using temporal interpolation. You can disable this option if you have problems with dynamic scenes.
2025-07-06 04:52:13 -07:00 · 2020-02-17 18:27:09 +04:00 · 2020-02-17 18:27:09 +04:00 · 814da70577
commit 814da70577
parent e0a55ff1c3
6 changed files with 236 additions and 66 deletions
--- a/facelib/LandmarksProcessor.py
+++ b/facelib/LandmarksProcessor.py
@ -249,77 +249,162 @@ def transform_points(points, mat, invert=False):
    points = np.squeeze(points)
    return points

-def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0, full_face_align_top=True):
+
+
+def get_transform_mat_data (image_landmarks, face_type, scale=1.0):
    if not isinstance(image_landmarks, np.ndarray):
        image_landmarks = np.array (image_landmarks)

-    padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
-
+    # estimate landmarks transform from global space to local aligned space with bounds [0..1]
    mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2]
+    
+    # get corner points in global space
    l_p = transform_points (  np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True)
    l_c = l_p[4]

+    # calc diagonal vectors between corners in global space
    tb_diag_vec = (l_p[2]-l_p[0]).astype(np.float32)
    tb_diag_vec /= npla.norm(tb_diag_vec)
    bt_diag_vec = (l_p[1]-l_p[3]).astype(np.float32)
    bt_diag_vec /= npla.norm(bt_diag_vec)

+    # calc modifier of diagonal vectors for scale and padding value
+    padding, _ = FaceType_to_padding_remove_align.get(face_type, 0.0)
    mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) )
-
+    return l_c, tb_diag_vec, bt_diag_vec, mod
+    
+def get_transform_mat_by_data (l_c, tb_diag_vec, bt_diag_vec, mod, output_size, face_type):
+    _, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
+    
+    # calc 3 points in global space to estimate 2d affine transform 
    if not remove_align:
        l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
                          np.round( l_c + bt_diag_vec*mod ),
                          np.round( l_c + tb_diag_vec*mod ) ] )
    else:
+        # remove_align - face will be centered in the frame but not aligned
        l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
                          np.round( l_c + bt_diag_vec*mod ),
                          np.round( l_c + tb_diag_vec*mod ),
                          np.round( l_c - bt_diag_vec*mod ),
                         ] )

+        # get area of face square in global space
        area = mathlib.polygon_area(l_t[:,0], l_t[:,1] )
+        
+        # calc side of square
        side = np.float32(math.sqrt(area) / 2)
+        
+        # calc 3 points with unrotated square
        l_t = np.array( [ np.round( l_c + [-side,-side] ),
                          np.round( l_c + [ side,-side] ),
                          np.round( l_c + [ side, side] ) ] )

+    # calc affine transform from 3 global space points to 3 local space points size of 'output_size'
    pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) ))
    mat = cv2.getAffineTransform(l_t,pts2)
-
-
-    #if remove_align:
-    #    bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True)
-    #    #import code
-    #    #code.interact(local=dict(globals(), **locals()))
-    #    area = mathlib.polygon_area(bbox[:,0], bbox[:,1] )
-    #    side = math.sqrt(area) / 2
-    #    center = transform_points ( [(output_size/2,output_size/2)], mat, True)
-    #    pts1 = np.float32(( center+[-side,-side], center+[side,-side], center+[side,-side] ))
-    #    pts2 = np.float32([[0,0],[output_size,0],[0,output_size]])
-    #    mat = cv2.getAffineTransform(pts1,pts2)
-
+    
    return mat
+     
+def get_averaged_transform_mat (img_landmarks, 
+                                img_landmarks_prev, 
+                                img_landmarks_next, 
+                                average_frame_count, 
+                                average_center_frame_count,
+                                output_size, face_type, scale=1.0):
+    
+    l_c_list = []
+    tb_diag_vec_list = []
+    bt_diag_vec_list = []
+    mod_list = []
+    
+    count = max(average_frame_count,average_center_frame_count)
+    for i in range ( -count, count+1, 1 ):        
+        if i < 0:
+            lmrks = img_landmarks_prev[i] if -i < len(img_landmarks_prev) else None
+        elif i > 0:
+            lmrks = img_landmarks_next[i] if i < len(img_landmarks_next) else None
+        else:
+            lmrks = img_landmarks
+        
+        if lmrks is None:
+            continue
+        
+        l_c, tb_diag_vec, bt_diag_vec, mod = get_transform_mat_data (lmrks, face_type, scale=scale)
+        
+        if i >= -average_frame_count and i <= average_frame_count:
+            tb_diag_vec_list.append(tb_diag_vec)
+            bt_diag_vec_list.append(bt_diag_vec)
+            mod_list.append(mod)
+            
+        if i >= -average_center_frame_count and i <= average_center_frame_count:
+            l_c_list.append(l_c)
+    
+    tb_diag_vec = np.mean( np.array(tb_diag_vec_list), axis=0 )
+    bt_diag_vec = np.mean( np.array(bt_diag_vec_list), axis=0 )
+    mod         = np.mean( np.array(mod_list), axis=0 )    
+    l_c         = np.mean( np.array(l_c_list), axis=0 )

-#if full_face_align_top and (face_type == FaceType.FULL or face_type == FaceType.FULL_NO_ALIGN):
-#    #lmrks2 = expand_eyebrows(image_landmarks)
-#    #lmrks2_ = transform_points( [ lmrks2[19], lmrks2[24] ], mat, False )
-#    #y_diff = np.float32( (0,np.min(lmrks2_[:,1])) )
-#    #y_diff = transform_points( [ np.float32( (0,0) ), y_diff], mat, True)
-#    #y_diff = y_diff[1]-y_diff[0]
-#
-#    x_diff = np.float32((0,0))
-#
-#    lmrks2_ = transform_points( [ image_landmarks[0], image_landmarks[16] ], mat, False )
-#    if lmrks2_[0,0] < 0:
-#        x_diff = lmrks2_[0,0]
-#        x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
-#        x_diff = x_diff[1]-x_diff[0]
-#    elif lmrks2_[1,0] >= output_size:
-#        x_diff = lmrks2_[1,0]-(output_size-1)
-#        x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
-#        x_diff = x_diff[1]-x_diff[0]
-#
-#    mat = cv2.getAffineTransform( l_t+y_diff+x_diff ,pts2)
+    return get_transform_mat_by_data (l_c, tb_diag_vec, bt_diag_vec, mod, output_size, face_type)
+    
+def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
+    l_c, tb_diag_vec, bt_diag_vec, mod = get_transform_mat_data (image_landmarks, face_type, scale=scale)
+    return get_transform_mat_by_data (l_c, tb_diag_vec, bt_diag_vec, mod, output_size, face_type)
+  
+"""
+def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
+    if not isinstance(image_landmarks, np.ndarray):
+        image_landmarks = np.array (image_landmarks)
+
+    # get face padding value for FaceType
+    padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
+
+    # estimate landmarks transform from global space to local aligned space with bounds [0..1]
+    mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2]
+    
+    # get corner points in global space
+    l_p = transform_points (  np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True)
+    l_c = l_p[4]
+
+    # calc diagonal vectors between corners in global space
+    tb_diag_vec = (l_p[2]-l_p[0]).astype(np.float32)
+    tb_diag_vec /= npla.norm(tb_diag_vec)
+    bt_diag_vec = (l_p[1]-l_p[3]).astype(np.float32)
+    bt_diag_vec /= npla.norm(bt_diag_vec)
+
+    # calc modifier of diagonal vectors for scale and padding value
+    mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) )
+
+    # calc 3 points in global space to estimate 2d affine transform 
+    if not remove_align:
+        l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
+                          np.round( l_c + bt_diag_vec*mod ),
+                          np.round( l_c + tb_diag_vec*mod ) ] )
+    else:
+        # remove_align - face will be centered in the frame but not aligned
+        l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
+                          np.round( l_c + bt_diag_vec*mod ),
+                          np.round( l_c + tb_diag_vec*mod ),
+                          np.round( l_c - bt_diag_vec*mod ),
+                         ] )
+
+        # get area of face square in global space
+        area = mathlib.polygon_area(l_t[:,0], l_t[:,1] )
+        
+        # calc side of square
+        side = np.float32(math.sqrt(area) / 2)
+        
+        # calc 3 points with unrotated square
+        l_t = np.array( [ np.round( l_c + [-side,-side] ),
+                          np.round( l_c + [ side,-side] ),
+                          np.round( l_c + [ side, side] ) ] )
+
+    # calc affine transform from 3 global space points to 3 local space points size of 'output_size'
+    pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) ))
+    mat = cv2.getAffineTransform(l_t,pts2)
+    
+    return mat
+"""
 def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0):
    if len(lmrks) != 68:
        raise Exception('works only with 68 landmarks')
@ -710,3 +795,35 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):
    roll = np.clip ( roll, -math.pi, math.pi )

    return -pitch, yaw, roll
+
+
+#if remove_align:
+#    bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True)
+#    #import code
+#    #code.interact(local=dict(globals(), **locals()))
+#    area = mathlib.polygon_area(bbox[:,0], bbox[:,1] )
+#    side = math.sqrt(area) / 2
+#    center = transform_points ( [(output_size/2,output_size/2)], mat, True)
+#    pts1 = np.float32(( center+[-side,-side], center+[side,-side], center+[side,-side] ))
+#    pts2 = np.float32([[0,0],[output_size,0],[0,output_size]])
+#    mat = cv2.getAffineTransform(pts1,pts2)
+#if full_face_align_top and (face_type == FaceType.FULL or face_type == FaceType.FULL_NO_ALIGN):
+#    #lmrks2 = expand_eyebrows(image_landmarks)
+#    #lmrks2_ = transform_points( [ lmrks2[19], lmrks2[24] ], mat, False )
+#    #y_diff = np.float32( (0,np.min(lmrks2_[:,1])) )
+#    #y_diff = transform_points( [ np.float32( (0,0) ), y_diff], mat, True)
+#    #y_diff = y_diff[1]-y_diff[0]
+#
+#    x_diff = np.float32((0,0))
+#
+#    lmrks2_ = transform_points( [ image_landmarks[0], image_landmarks[16] ], mat, False )
+#    if lmrks2_[0,0] < 0:
+#        x_diff = lmrks2_[0,0]
+#        x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
+#        x_diff = x_diff[1]-x_diff[0]
+#    elif lmrks2_[1,0] >= output_size:
+#        x_diff = lmrks2_[1,0]-(output_size-1)
+#        x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
+#        x_diff = x_diff[1]-x_diff[0]
+#
+#    mat = cv2.getAffineTransform( l_t+y_diff+x_diff ,pts2)
--- a/mainscripts/Merger.py
+++ b/mainscripts/Merger.py
@ -148,7 +148,7 @@ class MergeSubprocessor(Subprocessor):
                    cfg.fanseg_extract_func = self.fanseg_extract_func

                    try:
-                        final_img = MergeMasked (self.predictor_func, self.predictor_input_shape, cfg, frame_info)
+                        final_img = MergeMasked (self.predictor_func, self.predictor_input_shape, cfg, frame_info, pf.prev_temporal_frame_infos, pf.next_temporal_frame_infos)
                    except Exception as e:
                        e_str = traceback.format_exc()
                        if 'MemoryError' in e_str:
@ -387,6 +387,7 @@ class MergeSubprocessor(Subprocessor):
                    'z' : lambda cfg,shift_pressed: cfg.toggle_masked_hist_match(),
                    'x' : lambda cfg,shift_pressed: cfg.toggle_mask_mode(),
                    'c' : lambda cfg,shift_pressed: cfg.toggle_color_transfer_mode(),
+                    'b' : lambda cfg,shift_pressed: cfg.toggle_smooth_rect(),
                    'n' : lambda cfg,shift_pressed: cfg.toggle_sharpen_mode(),
                    }
            self.masked_keys = list(self.masked_keys_funcs.keys())
@ -673,7 +674,8 @@ def main (model_class_name=None,
            cfg.ask_settings()

        input_path_image_paths = pathex.get_image_paths(input_path)
-
+        
+ 
        if cfg.type == MergerConfig.TYPE_MASKED:
            if not aligned_path.exists():
                io.log_err('Aligned directory not found. Please ensure it exists.')
@ -741,11 +743,34 @@ def main (model_class_name=None,
                io.log_info ("Use 'recover original filename' to determine the exact duplicates.")
                io.log_info ("")

-            frames = [ MergeSubprocessor.Frame( frame_info=FrameInfo(filepath=Path(p), 
-                                                                     landmarks_list=alignments.get(Path(p).stem, None)
-                                                                    )
-                                              )
-                       for p in input_path_image_paths ]
+            filesdata = []
+            for filepath in io.progress_bar_generator(input_path_image_paths, "Collecting info"):
+                filepath=Path(filepath)
+                filesdata += [ FrameInfo(filepath=filepath, landmarks_list=alignments.get(filepath.stem, None)) ]
+    
+            frames = []
+            filesdata_len = len(filesdata)
+            for i in range(len(filesdata)):
+                frame_info = filesdata[i]
+
+                if multiple_faces_detected:
+                    prev_temporal_frame_infos = None
+                    next_temporal_frame_infos = None
+                else:
+                    prev_temporal_frame_infos = []
+                    next_temporal_frame_infos = []
+
+                    for t in range (1,6):
+                        prev_frame_info = filesdata[ max(i -t, 0) ]
+                        next_frame_info = filesdata[ min(i +t, filesdata_len-1 )]
+
+                        prev_temporal_frame_infos.insert (0, prev_frame_info )
+                        next_temporal_frame_infos.append (   next_frame_info )
+
+                frames.append ( MergeSubprocessor.Frame(prev_temporal_frame_infos=prev_temporal_frame_infos,
+                                                        frame_info=frame_info,
+                                                        next_temporal_frame_infos=next_temporal_frame_infos) )
+        

            if multiple_faces_detected:
                io.log_info ("Warning: multiple faces detected. Motion blur will not be used.")
@ -783,6 +808,8 @@ def main (model_class_name=None,


        elif cfg.type == MergerConfig.TYPE_FACE_AVATAR:
+            pass
+            """
            filesdata = []
            for filepath in io.progress_bar_generator(input_path_image_paths, "Collecting info"):
                filepath = Path(filepath)
@ -812,7 +839,7 @@ def main (model_class_name=None,
                frames.append ( MergeSubprocessor.Frame(prev_temporal_frame_infos=prev_temporal_frame_infos,
                                                          frame_info=frame_info,
                                                          next_temporal_frame_infos=next_temporal_frame_infos) )
-
+            """
        if len(frames) == 0:
            io.log_info ("No frames to merge in input_dir.")
        else:
--- a/mainscripts/gfx/help_merger_masked.jpg
+++ b/mainscripts/gfx/help_merger_masked.jpg
--- a/mainscripts/gfx/help_merger_masked_source.psd
+++ b/mainscripts/gfx/help_merger_masked_source.psd
--- a/merger/MergeMasked.py
+++ b/merger/MergeMasked.py
@ -8,7 +8,7 @@ from facelib import FaceType, LandmarksProcessor
 from core.interact import interact as io
 from core.cv2ex import *

-def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmarks):
+def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmarks, img_landmarks_prev, img_landmarks_next):
    img_size = img_bgr.shape[1], img_bgr.shape[0]
    img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks)

@ -18,20 +18,29 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
    out_img = img_bgr.copy()
    out_merging_mask_a = None

-    mask_subres = 4
    input_size = predictor_input_shape[0]
    mask_subres_size = input_size*4
    output_size = input_size
-    if cfg.super_resolution_power != 0:
+    if cfg.super_resolution_power != 0 or cfg.smooth_rect:
        output_size *= 4

-    face_mat        = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type)
-    face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale   )
+    if cfg.smooth_rect:
+        average_frame_count=5
+        average_center_frame_count=1
+    else:
+        average_frame_count=0
+        average_center_frame_count=0
+
+    def get_transform_mat(*args, **kwargs):
+        return LandmarksProcessor.get_averaged_transform_mat (img_face_landmarks, img_landmarks_prev, img_landmarks_next, average_frame_count, average_center_frame_count, *args, **kwargs)
+
+    face_mat        = get_transform_mat (output_size, face_type=cfg.face_type)
+    face_output_mat = get_transform_mat (output_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale)

    if mask_subres_size == output_size:
        face_mask_output_mat = face_output_mat
    else:
-        face_mask_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, mask_subres_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale   )
+        face_mask_output_mat = get_transform_mat (mask_subres_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale)

    dst_face_bgr      = cv2.warpAffine( img_bgr        , face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC )
    dst_face_bgr      = np.clip(dst_face_bgr, 0, 1)
@ -56,11 +65,13 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
    if cfg.super_resolution_power != 0:
        prd_face_bgr_enhanced = cfg.superres_func(prd_face_bgr)
        mod = cfg.super_resolution_power / 100.0
-
-        prd_face_bgr = cv2.resize(prd_face_bgr, (output_size,output_size))*(1.0-mod) + \
-                       prd_face_bgr_enhanced*mod
+        prd_face_bgr = cv2.resize(prd_face_bgr, (output_size,output_size))*(1.0-mod) + prd_face_bgr_enhanced*mod
+        prd_face_bgr = np.clip(prd_face_bgr, 0, 1)
+    elif cfg.smooth_rect:
+        prd_face_bgr = cv2.resize(prd_face_bgr, (output_size,output_size), cv2.INTER_CUBIC)
        prd_face_bgr = np.clip(prd_face_bgr, 0, 1)

+    if cfg.super_resolution_power != 0 or cfg.smooth_rect:
        if predictor_masked:
            prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0,  (output_size, output_size), cv2.INTER_CUBIC)
        else:
@ -77,14 +88,14 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img

        if cfg.mask_mode >= 4 and cfg.mask_mode <= 7:

-            full_face_fanseg_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, cfg.fanseg_input_size, face_type=FaceType.FULL)
+            full_face_fanseg_mat = get_transform_mat (cfg.fanseg_input_size, face_type=FaceType.FULL)
            dst_face_fanseg_bgr = cv2.warpAffine(img_bgr, full_face_fanseg_mat, (cfg.fanseg_input_size,)*2, flags=cv2.INTER_CUBIC )
            dst_face_fanseg_mask = cfg.fanseg_extract_func( FaceType.FULL, dst_face_fanseg_bgr )

            if cfg.face_type == FaceType.FULL:
                FAN_dst_face_mask_a_0 = cv2.resize (dst_face_fanseg_mask, (output_size,output_size), cv2.INTER_CUBIC)
            else:
-                face_fanseg_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, cfg.fanseg_input_size, face_type=cfg.face_type)
+                face_fanseg_mat = get_transform_mat (cfg.fanseg_input_size, face_type=cfg.face_type)

                fanseg_rect_corner_pts = np.array ( [ [0,0], [cfg.fanseg_input_size-1,0], [0,cfg.fanseg_input_size-1] ], dtype=np.float32 )
                a = LandmarksProcessor.transform_points (fanseg_rect_corner_pts, face_fanseg_mat, invert=True )
@ -106,13 +117,12 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img

    prd_face_mask_a_0[ prd_face_mask_a_0 < (1.0/255.0) ] = 0.0 # get rid of noise

+    # resize to mask_subres_size
+    if prd_face_mask_a_0.shape[0] != mask_subres_size:
+        prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (mask_subres_size, mask_subres_size), cv2.INTER_CUBIC)

    # process mask in local predicted space
    if 'raw' not in cfg.mode:
-        # resize to mask_subres_size
-        if prd_face_mask_a_0.shape[0] != mask_subres_size:
-            prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (mask_subres_size, mask_subres_size), cv2.INTER_CUBIC)
-
        # add zero pad
        prd_face_mask_a_0 = np.pad (prd_face_mask_a_0, input_size)

@ -281,7 +291,7 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
                k_size = int(frame_info.motion_power*cfg_mp)
                if k_size >= 1:
                    k_size = np.clip (k_size+1, 2, 50)
-                    if cfg.super_resolution_power != 0:
+                    if cfg.super_resolution_power != 0 or cfg.smooth_rect:
                        k_size *= 2
                    out_face_bgr = imagelib.LinearMotionBlur (out_face_bgr, k_size , frame_info.motion_deg)

@ -321,14 +331,20 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
    return out_img, out_merging_mask_a


-def MergeMasked (predictor_func, predictor_input_shape, cfg, frame_info):
+def MergeMasked (predictor_func, predictor_input_shape, cfg, frame_info, prev_temporal_frame_infos=None, next_temporal_frame_infos=None):
    img_bgr_uint8 = cv2_imread(frame_info.filepath)
    img_bgr_uint8 = imagelib.normalize_channels (img_bgr_uint8, 3)
    img_bgr = img_bgr_uint8.astype(np.float32) / 255.0

+
    outs = []
    for face_num, img_landmarks in enumerate( frame_info.landmarks_list ):
-        out_img, out_img_merging_mask = MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_landmarks)
+        img_landmarks_prev = [ x.landmarks_list[0] for x in prev_temporal_frame_infos if len(x.landmarks_list) != 0] \
+                                                         if prev_temporal_frame_infos is not None else []
+        img_landmarks_next = [ x.landmarks_list[0] for x in next_temporal_frame_infos if len(x.landmarks_list) != 0] \
+                                                         if next_temporal_frame_infos is not None else []
+
+        out_img, out_img_merging_mask = MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_landmarks, img_landmarks_prev, img_landmarks_next)
        outs += [ (out_img, out_img_merging_mask) ]

    #Combining multiple face outputs
--- a/merger/MergerConfig.py
+++ b/merger/MergerConfig.py
@ -119,6 +119,7 @@ class MergerConfigMasked(MergerConfig):
                       output_face_scale = 0,
                       super_resolution_power = 0,
                       color_transfer_mode = ctm_str_dict['rct'],
+                       smooth_rect = True,
                       image_denoise_power = 0,
                       bicubic_degrade_power = 0,
                       color_degrade_power = 0,
@ -148,6 +149,7 @@ class MergerConfigMasked(MergerConfig):
        self.output_face_scale = output_face_scale
        self.super_resolution_power = super_resolution_power
        self.color_transfer_mode = color_transfer_mode
+        self.smooth_rect = smooth_rect
        self.image_denoise_power = image_denoise_power
        self.bicubic_degrade_power = bicubic_degrade_power
        self.color_degrade_power = color_degrade_power
@ -188,6 +190,9 @@ class MergerConfigMasked(MergerConfig):
    def toggle_color_transfer_mode(self):
        self.color_transfer_mode = (self.color_transfer_mode+1) % ( max(ctm_dict.keys())+1 )

+    def toggle_smooth_rect(self):
+        self.smooth_rect = not self.smooth_rect
+
    def add_super_resolution_power(self, diff):
        self.super_resolution_power = np.clip ( self.super_resolution_power+diff , 0, 100)

@ -241,8 +246,10 @@ class MergerConfigMasked(MergerConfig):
            self.color_transfer_mode = io.input_str ( "Color transfer to predicted face", None, valid_list=list(ctm_str_dict.keys())[1:] )
            self.color_transfer_mode = ctm_str_dict[self.color_transfer_mode]

+        self.smooth_rect = io.input_bool("Smooth rect?", True, help_message="Decreases jitter of predicting rect by using temporal interpolation. You can disable this option if you have problems with dynamic scenes.")
+
        super().ask_settings()
- 
+
        self.super_resolution_power = np.clip ( io.input_int ("Choose super resolution power", 0, add_info="0..100", help_message="Enhance details by applying superresolution network."), 0, 100)

        if 'raw' not in self.mode:
@ -266,6 +273,7 @@ class MergerConfigMasked(MergerConfig):
                   self.motion_blur_power == other.motion_blur_power and \
                   self.output_face_scale == other.output_face_scale and \
                   self.color_transfer_mode == other.color_transfer_mode and \
+                   self.smooth_rect == other.smooth_rect and \
                   self.super_resolution_power == other.super_resolution_power and \
                   self.image_denoise_power == other.image_denoise_power and \
                   self.bicubic_degrade_power == other.bicubic_degrade_power and \
@ -298,11 +306,13 @@ class MergerConfigMasked(MergerConfig):
        r += f"""output_face_scale: {self.output_face_scale}\n"""

        if 'raw' not in self.mode:
-            r += f"""color_transfer_mode: { ctm_dict[self.color_transfer_mode]}\n"""
+            r += f"""color_transfer_mode: {ctm_dict[self.color_transfer_mode]}\n"""
+
+        r += f"""smooth_rect: {self.smooth_rect}\n"""

        r += super().to_string(filename)
        r += f"""super_resolution_power: {self.super_resolution_power}\n"""
-        
+
        if 'raw' not in self.mode:
            r += (f"""image_denoise_power: {self.image_denoise_power}\n"""
                  f"""bicubic_degrade_power: {self.bicubic_degrade_power}\n"""