diff --git a/DFLIMG/DFLJPG.py b/DFLIMG/DFLJPG.py index bf39663..4e2663f 100644 --- a/DFLIMG/DFLJPG.py +++ b/DFLIMG/DFLJPG.py @@ -1,13 +1,16 @@ import pickle import struct +import traceback import cv2 import numpy as np +from core import imagelib +from core.imagelib import SegIEPolys from core.interact import interact as io from core.structex import * from facelib import FaceType -from core.imagelib import SegIEPolys + class DFLJPG(object): def __init__(self, filename): @@ -148,7 +151,7 @@ class DFLJPG(object): return inst except Exception as e: - print (e) + io.log_err (f'Exception occured while DFLJPG.load : {traceback.format_exc()}') return None def has_data(self): @@ -165,10 +168,10 @@ class DFLJPG(object): data = b"" dict_data = self.dfl_dict - + # Remove None keys for key in list(dict_data.keys()): - if dict_data[key] is None: + if dict_data[key] is None: dict_data.pop(key) for chunk in self.chunks: @@ -242,52 +245,58 @@ class DFLJPG(object): return None def set_image_to_face_mat(self, image_to_face_mat): self.dfl_dict['image_to_face_mat'] = image_to_face_mat - def get_seg_ie_polys(self): + def get_seg_ie_polys(self): d = self.dfl_dict.get('seg_ie_polys',None) if d is not None: d = SegIEPolys.load(d) else: d = SegIEPolys() - + return d - + def set_seg_ie_polys(self, seg_ie_polys): - if seg_ie_polys is not None: + if seg_ie_polys is not None: if not isinstance(seg_ie_polys, SegIEPolys): raise ValueError('seg_ie_polys should be instance of SegIEPolys') - + if seg_ie_polys.has_polys(): seg_ie_polys = seg_ie_polys.dump() else: seg_ie_polys = None - + self.dfl_dict['seg_ie_polys'] = seg_ie_polys - def get_xseg_mask(self): + def get_xseg_mask(self): mask_buf = self.dfl_dict.get('xseg_mask',None) if mask_buf is None: return None - + img = cv2.imdecode(mask_buf, cv2.IMREAD_UNCHANGED) if len(img.shape) == 2: img = img[...,None] - - + return img.astype(np.float32) / 255.0 - - + + def set_xseg_mask(self, mask_a): if mask_a is None: self.dfl_dict['xseg_mask'] = None return - - ret, buf = cv2.imencode( '.png', np.clip( mask_a*255, 0, 255 ).astype(np.uint8) ) + + mask_a = imagelib.normalize_channels(mask_a, 1) + img_data = np.clip( mask_a*255, 0, 255 ).astype(np.uint8) + + data_max_len = 4096 + + ret, buf = cv2.imencode('.png', img_data) + + if not ret or len(buf) > data_max_len: + for jpeg_quality in range(100,-1,-1): + ret, buf = cv2.imencode( '.jpg', img_data, [int(cv2.IMWRITE_JPEG_QUALITY), jpeg_quality] ) + if ret and len(buf) <= data_max_len: + break + if not ret: - raise Exception("unable to generate PNG data for set_xseg_mask") - + raise Exception("set_xseg_mask: unable to generate image data for set_xseg_mask") + self.dfl_dict['xseg_mask'] = buf - - - - - diff --git a/README.md b/README.md index bea3dc3..cc71253 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,13 @@ DeepFaceLab is used by such popular youtube channels as + + + +## Replace the head + + + diff --git a/XSegEditor/QIconDB.py b/XSegEditor/QIconDB.py index 7a48cf4..064efad 100644 --- a/XSegEditor/QIconDB.py +++ b/XSegEditor/QIconDB.py @@ -19,3 +19,4 @@ class QIconDB(): QIconDB.right = QIcon ( str(icon_path / 'right.png') ) QIconDB.pt_edit_mode = QIcon ( str(icon_path / 'pt_edit_mode.png') ) QIconDB.view_baked = QIcon ( str(icon_path / 'view_baked.png') ) + QIconDB.view_xseg = QIcon ( str(icon_path / 'view_xseg.png') ) \ No newline at end of file diff --git a/XSegEditor/QStringDB.py b/XSegEditor/QStringDB.py index b15fce0..02617d0 100644 --- a/XSegEditor/QStringDB.py +++ b/XSegEditor/QStringDB.py @@ -30,6 +30,11 @@ class QStringDB(): 'zh' : '查看遮罩通道', }[lang] + QStringDB.btn_view_xseg_mask_tip = { 'en' : 'View trained XSeg mask', + 'ru' : 'Посмотреть тренированную XSeg маску', + 'zh' : '查看导入后的XSeg遮罩', + }[lang] + QStringDB.btn_poly_type_include_tip = { 'en' : 'Poly include mode', 'ru' : 'Режим полигонов - включение', 'zh' : '包含选区模式', diff --git a/XSegEditor/XSegEditor.py b/XSegEditor/XSegEditor.py index 44ca9b8..293f10e 100644 --- a/XSegEditor/XSegEditor.py +++ b/XSegEditor/XSegEditor.py @@ -18,6 +18,7 @@ from PyQt5.QtWidgets import * from core import pathex from core.cv2ex import * +from core import imagelib from core.imagelib import SegIEPoly, SegIEPolys, SegIEPolyType, sd from core.qtex import * from DFLIMG import * @@ -33,6 +34,7 @@ class OpMode(IntEnum): DRAW_PTS = 1 EDIT_PTS = 2 VIEW_BAKED = 3 + VIEW_XSEG_MASK = 4 class PTEditMode(IntEnum): MOVE = 0 @@ -244,11 +246,17 @@ class QCanvasControlsRightBar(QFrame): btn_view_baked_mask.setDefaultAction(self.btn_view_baked_mask_act) btn_view_baked_mask.setIconSize(QUIConfig.icon_q_size) + btn_view_xseg_mask = QToolButton() + self.btn_view_xseg_mask_act = QActionEx( QIconDB.view_xseg, QStringDB.btn_view_xseg_mask_tip, shortcut='5', shortcut_in_tooltip=True, is_checkable=True) + btn_view_xseg_mask.setDefaultAction(self.btn_view_xseg_mask_act) + btn_view_xseg_mask.setIconSize(QUIConfig.icon_q_size) + self.btn_poly_color_act_grp = QActionGroup (self) self.btn_poly_color_act_grp.addAction(self.btn_poly_color_red_act) self.btn_poly_color_act_grp.addAction(self.btn_poly_color_green_act) self.btn_poly_color_act_grp.addAction(self.btn_poly_color_blue_act) self.btn_poly_color_act_grp.addAction(self.btn_view_baked_mask_act) + self.btn_poly_color_act_grp.addAction(self.btn_view_xseg_mask_act) self.btn_poly_color_act_grp.setExclusive(True) #============================================== @@ -257,6 +265,7 @@ class QCanvasControlsRightBar(QFrame): controls_bar_frame1_l.addWidget ( btn_poly_color_green ) controls_bar_frame1_l.addWidget ( btn_poly_color_blue ) controls_bar_frame1_l.addWidget ( btn_view_baked_mask ) + controls_bar_frame1_l.addWidget ( btn_view_xseg_mask ) controls_bar_frame1 = QFrame() controls_bar_frame1.setFrameShape(QFrame.StyledPanel) controls_bar_frame1.setSizePolicy (QSizePolicy.Fixed, QSizePolicy.Fixed) @@ -274,12 +283,13 @@ class QCanvasOperator(QWidget): super().__init__() self.cbar = cbar - self.set_cbar_disabled(initialize=False) + self.set_cbar_disabled() self.cbar.btn_poly_color_red_act.triggered.connect ( lambda : self.set_color_scheme_id(0) ) self.cbar.btn_poly_color_green_act.triggered.connect ( lambda : self.set_color_scheme_id(1) ) self.cbar.btn_poly_color_blue_act.triggered.connect ( lambda : self.set_color_scheme_id(2) ) - self.cbar.btn_view_baked_mask_act.toggled.connect ( self.set_view_baked_mask ) + self.cbar.btn_view_baked_mask_act.toggled.connect ( lambda : self.set_op_mode(OpMode.VIEW_BAKED) ) + self.cbar.btn_view_xseg_mask_act.toggled.connect ( self.set_view_xseg_mask ) self.cbar.btn_poly_type_include_act.triggered.connect ( lambda : self.set_poly_include_type(SegIEPolyType.INCLUDE) ) self.cbar.btn_poly_type_exclude_act.triggered.connect ( lambda : self.set_poly_include_type(SegIEPolyType.EXCLUDE) ) @@ -298,10 +308,19 @@ class QCanvasOperator(QWidget): self.qp = QPainter() self.initialized = False + self.last_state = None - def initialize(self, q_img, img_look_pt=None, view_scale=None, ie_polys=None, canvas_config=None ): + def initialize(self, q_img, img_look_pt=None, view_scale=None, ie_polys=None, xseg_mask=None, canvas_config=None ): self.q_img = q_img self.img_pixmap = QPixmap.fromImage(q_img) + + self.xseg_mask_pixmap = None + if xseg_mask is not None: + w,h = QSize_to_np ( q_img.size() ) + xseg_mask = cv2.resize(xseg_mask, (w,h), cv2.INTER_CUBIC) + xseg_mask = (imagelib.normalize_channels(xseg_mask, 1) * 255).astype(np.uint8) + self.xseg_mask_pixmap = QPixmap.fromImage(QImage_from_np(xseg_mask)) + self.img_size = QSize_to_np (self.img_pixmap.size()) self.img_look_pt = img_look_pt @@ -314,45 +333,49 @@ class QCanvasOperator(QWidget): if canvas_config is None: canvas_config = CanvasConfig() self.canvas_config = canvas_config - + + # UI init + self.set_cbar_disabled() + self.cbar.btn_poly_color_act_grp.setDisabled(False) + self.cbar.btn_poly_type_act_grp.setDisabled(False) + + # Initial vars self.current_cursor = None - - self.mouse_hull_poly = None self.mouse_wire_poly = None - self.drag_type = DragType.NONE - self.op_mode = None - self.pt_edit_mode = None - - if not hasattr(self, 'color_scheme_id' ): - self.color_scheme_id = 1 - self.set_color_scheme_id(self.color_scheme_id) - - self.set_op_mode(OpMode.NONE) - + + # Initial state + self.set_op_mode(OpMode.NONE) + self.set_color_scheme_id(1) + self.set_poly_include_type(SegIEPolyType.INCLUDE) self.set_pt_edit_mode(PTEditMode.MOVE) - self.set_view_baked_mask(False) - - self.set_cbar_disabled(initialize=True) - - if not hasattr(self, 'poly_include_type' ): - self.poly_include_type = SegIEPolyType.INCLUDE - self.set_poly_include_type(self.poly_include_type) - + # Apply last state + if self.last_state is not None: + self.set_color_scheme_id(self.last_state.color_scheme_id) + if self.last_state.op_mode is not None: + self.set_op_mode(self.last_state.op_mode) + + self.initialized = True + self.setMouseTracking(True) self.update_cursor() self.update() - self.initialized = True + def finalize(self): if self.initialized: + + self.last_state = sn(op_mode = self.op_mode if self.op_mode in [OpMode.VIEW_BAKED, OpMode.VIEW_XSEG_MASK] else None, + color_scheme_id = self.color_scheme_id, + ) + self.img_pixmap = None self.update_cursor(is_finalize=True) self.setMouseTracking(False) self.setFocusPolicy(Qt.NoFocus) - self.set_cbar_disabled(initialize=False) + self.set_cbar_disabled() self.initialized = False self.update() @@ -445,16 +468,18 @@ class QCanvasOperator(QWidget): # ====================================== SETTERS ===================================== # ==================================================================================== # ==================================================================================== - def set_op_mode(self, op_mode, op_poly=None): - if op_mode != self.op_mode: - + if not hasattr(self,'op_mode'): + self.op_mode = None + self.op_poly = None + + if self.op_mode != op_mode: + # Finalize prev mode if self.op_mode == OpMode.NONE: self.cbar.btn_poly_type_act_grp.setDisabled(True) elif self.op_mode == OpMode.DRAW_PTS: self.cbar.btn_undo_pt_act.setDisabled(True) self.cbar.btn_redo_pt_act.setDisabled(True) - if self.op_poly.get_pts_count() < 3: # Remove unfinished poly self.ie_polys.remove_poly(self.op_poly) @@ -463,59 +488,69 @@ class QCanvasOperator(QWidget): self.cbar.btn_delete_poly_act.setDisabled(True) # Reset pt_edit_move when exit from EDIT_PTS self.set_pt_edit_mode(PTEditMode.MOVE) + elif self.op_mode == OpMode.VIEW_BAKED: + self.cbar.btn_view_baked_mask_act.setChecked(False) + elif self.op_mode == OpMode.VIEW_XSEG_MASK: + self.cbar.btn_view_xseg_mask_act.setChecked(False) self.op_mode = op_mode - - if self.op_mode == OpMode.NONE: + + # Initialize new mode + if op_mode == OpMode.NONE: self.cbar.btn_poly_type_act_grp.setDisabled(False) - elif self.op_mode == OpMode.DRAW_PTS: + elif op_mode == OpMode.DRAW_PTS: self.cbar.btn_undo_pt_act.setDisabled(False) self.cbar.btn_redo_pt_act.setDisabled(False) - elif self.op_mode == OpMode.EDIT_PTS: + elif op_mode == OpMode.EDIT_PTS: self.cbar.btn_pt_edit_mode_act.setDisabled(False) self.cbar.btn_delete_poly_act.setDisabled(False) - - if self.op_mode in [OpMode.DRAW_PTS, OpMode.EDIT_PTS]: + elif op_mode == OpMode.VIEW_BAKED: + self.cbar.btn_view_baked_mask_act.setChecked(True ) + n = QImage_to_np ( self.q_img ).astype(np.float32) / 255.0 + h,w,c = n.shape + mask = np.zeros( (h,w,1), dtype=np.float32 ) + self.ie_polys.overlay_mask(mask) + n = (mask*255).astype(np.uint8) + self.img_baked_pixmap = QPixmap.fromImage(QImage_from_np(n)) + elif op_mode == OpMode.VIEW_XSEG_MASK: + self.cbar.btn_view_xseg_mask_act.setChecked(True) + if op_mode in [OpMode.DRAW_PTS, OpMode.EDIT_PTS]: self.mouse_op_poly_pt_id = None self.mouse_op_poly_edge_id = None self.mouse_op_poly_edge_id_pt = None + # + self.op_poly = op_poly + if op_poly is not None: + self.update_mouse_info() - self.set_op_poly(op_poly) self.update_cursor() self.update() - def set_op_poly(self, op_poly): - self.op_poly = op_poly - if op_poly is not None: - self.update_mouse_info() - self.update() - def set_pt_edit_mode(self, pt_edit_mode): - if self.pt_edit_mode != pt_edit_mode: + if not hasattr(self, 'pt_edit_mode') or self.pt_edit_mode != pt_edit_mode: self.pt_edit_mode = pt_edit_mode self.update_cursor() self.update() self.cbar.btn_pt_edit_mode_act.setChecked( self.pt_edit_mode == PTEditMode.ADD_DEL ) - def set_cbar_disabled(self, initialize): + def set_cbar_disabled(self): self.cbar.btn_delete_poly_act.setDisabled(True) self.cbar.btn_undo_pt_act.setDisabled(True) self.cbar.btn_redo_pt_act.setDisabled(True) self.cbar.btn_pt_edit_mode_act.setDisabled(True) - - if initialize: - self.cbar.btn_poly_color_act_grp.setDisabled(False) - self.cbar.btn_poly_type_act_grp.setDisabled(False) - else: - self.cbar.btn_poly_color_act_grp.setDisabled(True) - self.cbar.btn_poly_type_act_grp.setDisabled(True) + self.cbar.btn_poly_color_act_grp.setDisabled(True) + self.cbar.btn_poly_type_act_grp.setDisabled(True) def set_color_scheme_id(self, id): - if self.color_scheme_id != id: + if self.op_mode == OpMode.VIEW_BAKED: + self.set_op_mode(OpMode.NONE) + + if not hasattr(self, 'color_scheme_id') or self.color_scheme_id != id: self.color_scheme_id = id self.update_cursor() self.update() + if self.color_scheme_id == 0: self.cbar.btn_poly_color_red_act.setChecked( True ) elif self.color_scheme_id == 1: @@ -524,33 +559,33 @@ class QCanvasOperator(QWidget): self.cbar.btn_poly_color_blue_act.setChecked( True ) def set_poly_include_type(self, poly_include_type): - if self.op_mode in [OpMode.NONE, OpMode.EDIT_PTS]: - if self.poly_include_type != poly_include_type: - self.poly_include_type = poly_include_type - self.update() + if not hasattr(self, 'poly_include_type' ) or \ + ( self.poly_include_type != poly_include_type and \ + self.op_mode in [OpMode.NONE, OpMode.EDIT_PTS] ): + self.poly_include_type = poly_include_type + self.update() self.cbar.btn_poly_type_include_act.setChecked(self.poly_include_type == SegIEPolyType.INCLUDE) self.cbar.btn_poly_type_exclude_act.setChecked(self.poly_include_type == SegIEPolyType.EXCLUDE) - - - def set_view_baked_mask(self, is_checked): + def set_view_xseg_mask(self, is_checked): if is_checked: - self.set_op_mode(OpMode.VIEW_BAKED) + self.set_op_mode(OpMode.VIEW_XSEG_MASK) - n = QImage_to_np ( self.q_img ).astype(np.float32) / 255.0 - h,w,c = n.shape + #n = QImage_to_np ( self.q_img ).astype(np.float32) / 255.0 + #h,w,c = n.shape - mask = np.zeros( (h,w,1), dtype=np.float32 ) - self.ie_polys.overlay_mask(mask) + #mask = np.zeros( (h,w,1), dtype=np.float32 ) + #self.ie_polys.overlay_mask(mask) - n = (mask*255).astype(np.uint8) + #n = (mask*255).astype(np.uint8) - self.img_baked_pixmap = QPixmap.fromImage(QImage_from_np(n)) + #self.img_baked_pixmap = QPixmap.fromImage(QImage_from_np(n)) else: self.set_op_mode(OpMode.NONE) - self.cbar.btn_view_baked_mask_act.setChecked(is_checked ) + self.cbar.btn_view_xseg_mask_act.setChecked(is_checked ) + # ==================================================================================== # ==================================================================================== @@ -764,7 +799,6 @@ class QCanvasOperator(QWidget): # other cases -> unselect poly self.set_op_mode(OpMode.NONE) - elif btn == Qt.MiddleButton: if self.drag_type == DragType.NONE: # Start image drag @@ -773,6 +807,7 @@ class QCanvasOperator(QWidget): self.drag_img_look_pt = self.get_img_look_pt() self.update_cursor() + def mouseReleaseEvent(self, ev): super().mouseReleaseEvent(ev) if not self.initialized: @@ -855,6 +890,11 @@ class QCanvasOperator(QWidget): src_rect = QRect(0, 0, *self.img_size) dst_rect = self.img_to_cli_rect( src_rect ) qp.drawPixmap(dst_rect, self.img_baked_pixmap, src_rect) + elif self.op_mode == OpMode.VIEW_XSEG_MASK: + if self.xseg_mask_pixmap is not None: + src_rect = QRect(0, 0, *self.img_size) + dst_rect = self.img_to_cli_rect( src_rect ) + qp.drawPixmap(dst_rect, self.xseg_mask_pixmap, src_rect) else: if self.img_pixmap is not None: src_rect = QRect(0, 0, *self.img_size) @@ -980,6 +1020,7 @@ class QCanvas(QFrame): btn_poly_color_green_act = self.canvas_control_right_bar.btn_poly_color_green_act, btn_poly_color_blue_act = self.canvas_control_right_bar.btn_poly_color_blue_act, btn_view_baked_mask_act = self.canvas_control_right_bar.btn_view_baked_mask_act, + btn_view_xseg_mask_act = self.canvas_control_right_bar.btn_view_xseg_mask_act, btn_poly_color_act_grp = self.canvas_control_right_bar.btn_poly_color_act_grp, btn_poly_type_include_act = self.canvas_control_left_bar.btn_poly_type_include_act, @@ -1124,9 +1165,9 @@ class MainWindow(QXMainWindow): if img is None: img = QImage_from_np(cv2_imread(image_path)) if img is None: - raise Exception(f'Unable to load {image_path}') + io.log_err(f'Unable to load {image_path}') except: - io.log_err(f"{traceback.format_exc()}") + img = None return img @@ -1143,25 +1184,32 @@ class MainWindow(QXMainWindow): return False dflimg = DFLIMG.load(image_path) + if not dflimg or not dflimg.has_data(): + return False + ie_polys = dflimg.get_seg_ie_polys() + xseg_mask = dflimg.get_xseg_mask() q_img = self.load_QImage(image_path) - - self.canvas.op.initialize ( q_img, ie_polys=ie_polys ) + if q_img is None: + return False + + self.canvas.op.initialize ( q_img, ie_polys=ie_polys, xseg_mask=xseg_mask ) self.filename_label.setText(str(image_path.name)) return True def canvas_finalize(self, image_path): - dflimg = DFLIMG.load(image_path) + + if image_path.exists(): + dflimg = DFLIMG.load(image_path) + ie_polys = dflimg.get_seg_ie_polys() + new_ie_polys = self.canvas.op.get_ie_polys() - ie_polys = dflimg.get_seg_ie_polys() - new_ie_polys = self.canvas.op.get_ie_polys() - - if not new_ie_polys.identical(ie_polys): - self.image_paths_has_ie_polys[image_path] = new_ie_polys.has_polys() - dflimg.set_seg_ie_polys( new_ie_polys ) - dflimg.save() + if not new_ie_polys.identical(ie_polys): + self.image_paths_has_ie_polys[image_path] = new_ie_polys.has_polys() + dflimg.set_seg_ie_polys( new_ie_polys ) + dflimg.save() self.canvas.op.finalize() self.filename_label.setText("") @@ -1182,9 +1230,10 @@ class MainWindow(QXMainWindow): break if len(self.image_paths) == 0: break + + ret = self.canvas_initialize(self.image_paths[0], len(self.image_paths_done) != 0 and only_has_polys) - - if self.canvas_initialize(self.image_paths[0], len(self.image_paths_done) != 0 and only_has_polys): + if ret or len(self.image_paths_done) == 0: break self.update_cached_images() diff --git a/XSegEditor/gfx/icons/view_xseg.png b/XSegEditor/gfx/icons/view_xseg.png new file mode 100644 index 0000000..7328d2c Binary files /dev/null and b/XSegEditor/gfx/icons/view_xseg.png differ diff --git a/facelib/FAN.npy b/facelib/2DFAN.npy similarity index 100% rename from facelib/FAN.npy rename to facelib/2DFAN.npy diff --git a/facelib/3DFAN.npy b/facelib/3DFAN.npy new file mode 100644 index 0000000..b96bcd2 Binary files /dev/null and b/facelib/3DFAN.npy differ diff --git a/facelib/FANExtractor.py b/facelib/FANExtractor.py index 3e6c9ad..5676fcc 100644 --- a/facelib/FANExtractor.py +++ b/facelib/FANExtractor.py @@ -13,8 +13,9 @@ from core.leras import nn ported from https://github.com/1adrianb/face-alignment """ class FANExtractor(object): - def __init__ (self, place_model_on_cpu=False): - model_path = Path(__file__).parent / "FAN.npy" + def __init__ (self, landmarks_3D=False, place_model_on_cpu=False): + + model_path = Path(__file__).parent / ( "2DFAN.npy" if not landmarks_3D else "3DFAN.npy") if not model_path.exists(): raise Exception("Unable to load FANExtractor model") diff --git a/facelib/FaceType.py b/facelib/FaceType.py index 044c701..745cff3 100644 --- a/facelib/FaceType.py +++ b/facelib/FaceType.py @@ -2,16 +2,15 @@ from enum import IntEnum class FaceType(IntEnum): #enumerating in order "next contains prev" - MOUTH = -1 HALF = 0 MID_FULL = 1 FULL = 2 FULL_NO_ALIGN = 3 WHOLE_FACE = 4 - HEAD = 5 - HEAD_NO_ALIGN = 6 + HEAD = 10 + HEAD_NO_ALIGN = 20 - MARK_ONLY = 10, #no align at all, just embedded faceinfo + MARK_ONLY = 100, #no align at all, just embedded faceinfo @staticmethod def fromString (s): @@ -24,23 +23,15 @@ class FaceType(IntEnum): def toString (face_type): return to_string_dict[face_type] -from_string_dict = {'mouth': FaceType.MOUTH, - 'half_face': FaceType.HALF, - 'midfull_face': FaceType.MID_FULL, - 'full_face': FaceType.FULL, - 'whole_face': FaceType.WHOLE_FACE, - 'head' : FaceType.HEAD, - 'mark_only' : FaceType.MARK_ONLY, - 'full_face_no_align' : FaceType.FULL_NO_ALIGN, - 'head_no_align' : FaceType.HEAD_NO_ALIGN, - } -to_string_dict = { FaceType.MOUTH : 'mouth', - FaceType.HALF : 'half_face', +to_string_dict = { FaceType.HALF : 'half_face', FaceType.MID_FULL : 'midfull_face', FaceType.FULL : 'full_face', + FaceType.FULL_NO_ALIGN : 'full_face_no_align', FaceType.WHOLE_FACE : 'whole_face', FaceType.HEAD : 'head', - FaceType.MARK_ONLY :'mark_only', - FaceType.FULL_NO_ALIGN : 'full_face_no_align', - FaceType.HEAD_NO_ALIGN : 'head_no_align' + FaceType.HEAD_NO_ALIGN : 'head_no_align', + + FaceType.MARK_ONLY :'mark_only', } + +from_string_dict = { to_string_dict[x] : x for x in to_string_dict.keys() } \ No newline at end of file diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index 93cdd9f..8d3a327 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -134,86 +134,85 @@ landmarks_68_pt = { "mouth": (48,68), "nose": (27, 36), # missed one point "jaw": (0, 17) } - landmarks_68_3D = np.array( [ -[-73.393523 , -29.801432 , 47.667532 ], -[-72.775014 , -10.949766 , 45.909403 ], -[-70.533638 , 7.929818 , 44.842580 ], -[-66.850058 , 26.074280 , 43.141114 ], -[-59.790187 , 42.564390 , 38.635298 ], -[-48.368973 , 56.481080 , 30.750622 ], -[-34.121101 , 67.246992 , 18.456453 ], -[-17.875411 , 75.056892 , 3.609035 ], -[0.098749 , 77.061286 , -0.881698 ], -[17.477031 , 74.758448 , 5.181201 ], -[32.648966 , 66.929021 , 19.176563 ], -[46.372358 , 56.311389 , 30.770570 ], -[57.343480 , 42.419126 , 37.628629 ], -[64.388482 , 25.455880 , 40.886309 ], -[68.212038 , 6.990805 , 42.281449 ], -[70.486405 , -11.666193 , 44.142567 ], -[71.375822 , -30.365191 , 47.140426 ], -[-61.119406 , -49.361602 , 14.254422 ], -[-51.287588 , -58.769795 , 7.268147 ], -[-37.804800 , -61.996155 , 0.442051 ], -[-24.022754 , -61.033399 , -6.606501 ], -[-11.635713 , -56.686759 , -11.967398 ], -[12.056636 , -57.391033 , -12.051204 ], -[25.106256 , -61.902186 , -7.315098 ], -[38.338588 , -62.777713 , -1.022953 ], -[51.191007 , -59.302347 , 5.349435 ], -[60.053851 , -50.190255 , 11.615746 ], -[0.653940 , -42.193790 , -13.380835 ], -[0.804809 , -30.993721 , -21.150853 ], -[0.992204 , -19.944596 , -29.284036 ], -[1.226783 , -8.414541 , -36.948060 ], -[-14.772472 , 2.598255 , -20.132003 ], -[-7.180239 , 4.751589 , -23.536684 ], -[0.555920 , 6.562900 , -25.944448 ], -[8.272499 , 4.661005 , -23.695741 ], -[15.214351 , 2.643046 , -20.858157 ], -[-46.047290 , -37.471411 , 7.037989 ], -[-37.674688 , -42.730510 , 3.021217 ], -[-27.883856 , -42.711517 , 1.353629 ], -[-19.648268 , -36.754742 , -0.111088 ], -[-28.272965 , -35.134493 , -0.147273 ], -[-38.082418 , -34.919043 , 1.476612 ], -[19.265868 , -37.032306 , -0.665746 ], -[27.894191 , -43.342445 , 0.247660 ], -[37.437529 , -43.110822 , 1.696435 ], -[45.170805 , -38.086515 , 4.894163 ], -[38.196454 , -35.532024 , 0.282961 ], -[28.764989 , -35.484289 , -1.172675 ], -[-28.916267 , 28.612716 , -2.240310 ], -[-17.533194 , 22.172187 , -15.934335 ], -[-6.684590 , 19.029051 , -22.611355 ], -[0.381001 , 20.721118 , -23.748437 ], -[8.375443 , 19.035460 , -22.721995 ], -[18.876618 , 22.394109 , -15.610679 ], -[28.794412 , 28.079924 , -3.217393 ], -[19.057574 , 36.298248 , -14.987997 ], -[8.956375 , 39.634575 , -22.554245 ], -[0.381549 , 40.395647 , -23.591626 ], -[-7.428895 , 39.836405 , -22.406106 ], -[-18.160634 , 36.677899 , -15.121907 ], -[-24.377490 , 28.677771 , -4.785684 ], -[-6.897633 , 25.475976 , -20.893742 ], -[0.340663 , 26.014269 , -22.220479 ], -[8.444722 , 25.326198 , -21.025520 ], -[24.474473 , 28.323008 , -5.712776 ], -[8.449166 , 30.596216 , -20.671489 ], -[0.205322 , 31.408738 , -21.903670 ], -[-7.198266 , 30.844876 , -20.328022 ] ], dtype=np.float32) +[-73.393523 , -29.801432 , 47.667532 ], #00 +[-72.775014 , -10.949766 , 45.909403 ], #01 +[-70.533638 , 7.929818 , 44.842580 ], #02 +[-66.850058 , 26.074280 , 43.141114 ], #03 +[-59.790187 , 42.564390 , 38.635298 ], #04 +[-48.368973 , 56.481080 , 30.750622 ], #05 +[-34.121101 , 67.246992 , 18.456453 ], #06 +[-17.875411 , 75.056892 , 3.609035 ], #07 +[0.098749 , 77.061286 , -0.881698 ], #08 +[17.477031 , 74.758448 , 5.181201 ], #09 +[32.648966 , 66.929021 , 19.176563 ], #10 +[46.372358 , 56.311389 , 30.770570 ], #11 +[57.343480 , 42.419126 , 37.628629 ], #12 +[64.388482 , 25.455880 , 40.886309 ], #13 +[68.212038 , 6.990805 , 42.281449 ], #14 +[70.486405 , -11.666193 , 44.142567 ], #15 +[71.375822 , -30.365191 , 47.140426 ], #16 +[-61.119406 , -49.361602 , 14.254422 ], #17 +[-51.287588 , -58.769795 , 7.268147 ], #18 +[-37.804800 , -61.996155 , 0.442051 ], #19 +[-24.022754 , -61.033399 , -6.606501 ], #20 +[-11.635713 , -56.686759 , -11.967398 ], #21 +[12.056636 , -57.391033 , -12.051204 ], #22 +[25.106256 , -61.902186 , -7.315098 ], #23 +[38.338588 , -62.777713 , -1.022953 ], #24 +[51.191007 , -59.302347 , 5.349435 ], #25 +[60.053851 , -50.190255 , 11.615746 ], #26 +[0.653940 , -42.193790 , -13.380835 ], #27 +[0.804809 , -30.993721 , -21.150853 ], #28 +[0.992204 , -19.944596 , -29.284036 ], #29 +[1.226783 , -8.414541 , -36.948060 ], #00 +[-14.772472 , 2.598255 , -20.132003 ], #01 +[-7.180239 , 4.751589 , -23.536684 ], #02 +[0.555920 , 6.562900 , -25.944448 ], #03 +[8.272499 , 4.661005 , -23.695741 ], #04 +[15.214351 , 2.643046 , -20.858157 ], #05 +[-46.047290 , -37.471411 , 7.037989 ], #06 +[-37.674688 , -42.730510 , 3.021217 ], #07 +[-27.883856 , -42.711517 , 1.353629 ], #08 +[-19.648268 , -36.754742 , -0.111088 ], #09 +[-28.272965 , -35.134493 , -0.147273 ], #10 +[-38.082418 , -34.919043 , 1.476612 ], #11 +[19.265868 , -37.032306 , -0.665746 ], #12 +[27.894191 , -43.342445 , 0.247660 ], #13 +[37.437529 , -43.110822 , 1.696435 ], #14 +[45.170805 , -38.086515 , 4.894163 ], #15 +[38.196454 , -35.532024 , 0.282961 ], #16 +[28.764989 , -35.484289 , -1.172675 ], #17 +[-28.916267 , 28.612716 , -2.240310 ], #18 +[-17.533194 , 22.172187 , -15.934335 ], #19 +[-6.684590 , 19.029051 , -22.611355 ], #20 +[0.381001 , 20.721118 , -23.748437 ], #21 +[8.375443 , 19.035460 , -22.721995 ], #22 +[18.876618 , 22.394109 , -15.610679 ], #23 +[28.794412 , 28.079924 , -3.217393 ], #24 +[19.057574 , 36.298248 , -14.987997 ], #25 +[8.956375 , 39.634575 , -22.554245 ], #26 +[0.381549 , 40.395647 , -23.591626 ], #27 +[-7.428895 , 39.836405 , -22.406106 ], #28 +[-18.160634 , 36.677899 , -15.121907 ], #29 +[-24.377490 , 28.677771 , -4.785684 ], #30 +[-6.897633 , 25.475976 , -20.893742 ], #31 +[0.340663 , 26.014269 , -22.220479 ], #32 +[8.444722 , 25.326198 , -21.025520 ], #33 +[24.474473 , 28.323008 , -5.712776 ], #34 +[8.449166 , 30.596216 , -20.671489 ], #35 +[0.205322 , 31.408738 , -21.903670 ], #36 +[-7.198266 , 30.844876 , -20.328022 ] #37 +], dtype=np.float32) FaceType_to_padding_remove_align = { - FaceType.MOUTH: (0.25, False), FaceType.HALF: (0.0, False), FaceType.MID_FULL: (0.0675, False), FaceType.FULL: (0.2109375, False), FaceType.FULL_NO_ALIGN: (0.2109375, True), FaceType.WHOLE_FACE: (0.40, False), - FaceType.HEAD: (1.0, False), - FaceType.HEAD_NO_ALIGN: (1.0, True), + FaceType.HEAD: (0.70, False), + FaceType.HEAD_NO_ALIGN: (0.70, True), } def convert_98_to_68(lmrks): @@ -279,11 +278,8 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): # estimate landmarks transform from global space to local aligned space with bounds [0..1] - if face_type == FaceType.MOUTH: - mat = umeyama(image_landmarks[48:68], mouth_center_landmarks_2D, True)[0:2] - else: - mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2] - + mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2] + # get corner points in global space g_p = transform_points ( np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5) ]) , mat, True) g_c = g_p[4] @@ -297,16 +293,36 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): # calc modifier of diagonal vectors for scale and padding value padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0) mod = (1.0 / scale)* ( npla.norm(g_p[0]-g_p[2])*(padding*np.sqrt(2.0) + 0.5) ) - + if face_type == FaceType.WHOLE_FACE: - # adjust center for WHOLE_FACE, 7% below in order to cover more forehead + # adjust vertical offset for WHOLE_FACE, 7% below in order to cover more forehead vec = (g_p[0]-g_p[3]).astype(np.float32) vec_len = npla.norm(vec) vec /= vec_len - g_c += vec*vec_len*0.07 - - # calc 3 points in global space to estimate 2d affine transform + + elif face_type == FaceType.HEAD: + mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2] + + # assuming image_landmarks are 3D_Landmarks extracted for HEAD, + # adjust horizontal offset according to estimated yaw + yaw = estimate_averaged_yaw(transform_points (image_landmarks, mat, False)) + + hvec = (g_p[0]-g_p[1]).astype(np.float32) + hvec_len = npla.norm(hvec) + hvec /= hvec_len + + yaw *= np.abs(math.tanh(yaw*2)) # Damp near zero + + g_c -= hvec * (yaw * hvec_len / 2.0) + + # adjust vertical offset for HEAD, 50% below + vvec = (g_p[0]-g_p[3]).astype(np.float32) + vvec_len = npla.norm(vvec) + vvec /= vvec_len + g_c += vvec*vvec_len*0.50 + + # calc 3 points in global space to estimate 2d affine transform if not remove_align: l_t = np.array( [ g_c - tb_diag_vec*mod, g_c + bt_diag_vec*mod, @@ -321,10 +337,10 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): # get area of face square in global space area = mathlib.polygon_area(l_t[:,0], l_t[:,1] ) - + # calc side of square side = np.float32(math.sqrt(area) / 2) - + # calc 3 points with unrotated square l_t = np.array( [ g_c + [-side,-side], g_c + [ side,-side], @@ -334,14 +350,14 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) )) mat = cv2.getAffineTransform(l_t,pts2) return mat - + def get_rect_from_landmarks(image_landmarks): mat = get_transform_mat(image_landmarks, 256, FaceType.FULL_NO_ALIGN) - + g_p = transform_points ( np.float32([(0,0),(255,255) ]) , mat, True) - + (l,t,r,b) = g_p[0][0], g_p[0][1], g_p[1][0], g_p[1][1] - + return (l,t,r,b) def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0): @@ -393,15 +409,15 @@ def get_image_hull_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0 ) cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), (1,) ) return hull_mask - + def get_image_eye_mask (image_shape, image_landmarks): if len(image_landmarks) != 68: raise Exception('get_image_eye_mask works only with 68 landmarks') - + h,w,c = image_shape hull_mask = np.zeros( (h,w,1),dtype=np.float32) - + image_landmarks = image_landmarks.astype(np.int) cv2.fillConvexPoly( hull_mask, cv2.convexHull( image_landmarks[36:42]), (1,) ) @@ -409,7 +425,7 @@ def get_image_eye_mask (image_shape, image_landmarks): dilate = h // 32 hull_mask = cv2.dilate(hull_mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(dilate,dilate)), iterations = 1 ) - + blur = h // 16 blur = blur + (1-blur % 2) hull_mask = cv2.GaussianBlur(hull_mask, (blur, blur) , 0) @@ -646,9 +662,9 @@ def mirror_landmarks (landmarks, val): def get_face_struct_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0, color=(1,) ): mask = np.zeros(image_shape[0:2]+( len(color),),dtype=np.float32) lmrks = expand_eyebrows(image_landmarks, eyebrows_expand_mod) - draw_landmarks (mask, image_landmarks, color=color, draw_circles=False, thickness=2) + draw_landmarks (mask, image_landmarks, color=color, draw_circles=False, thickness=2) return mask - + def draw_landmarks (image, image_landmarks, color=(0,255,0), draw_circles=True, thickness=1, transparent_mask=False): if len(image_landmarks) != 68: raise Exception('get_image_eye_mask works only with 68 landmarks') @@ -669,7 +685,7 @@ def draw_landmarks (image, image_landmarks, color=(0,255,0), draw_circles=True, # closed shapes cv2.polylines(image, tuple(np.array([v]) for v in (right_eye, left_eye, mouth)), True, color, thickness=thickness, lineType=cv2.LINE_AA) - + if draw_circles: # the rest of the cicles for x, y in np.concatenate((right_eyebrow, left_eyebrow, mouth, right_eye, left_eye, nose), axis=0): @@ -692,17 +708,25 @@ def draw_rect_landmarks (image, rect, image_landmarks, face_type, face_size=256, points = transform_points ( [ ( int(face_size*0.05), 0), ( int(face_size*0.1), int(face_size*0.1) ), ( 0, int(face_size*0.1) ) ], image_to_face_mat, True) imagelib.draw_polygon (image, points, (0,0,255), 2) - + def calc_face_pitch(landmarks): if not isinstance(landmarks, np.ndarray): landmarks = np.array (landmarks) t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0 b = landmarks[8][1] return float(b-t) + +def estimate_averaged_yaw(landmarks): + # Works much better than solvePnP if landmarks from "3DFAN" + if not isinstance(landmarks, np.ndarray): + landmarks = np.array (landmarks) + l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0 + r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0 + return float(r-l) def estimate_pitch_yaw_roll(aligned_landmarks, size=256): """ - returns pitch,yaw,roll [-pi...+pi] + returns pitch,yaw,roll [-pi/2...+pi/2] """ shape = (size,size) focal_length = shape[1] @@ -712,19 +736,21 @@ def estimate_pitch_yaw_roll(aligned_landmarks, size=256): [0, focal_length, camera_center[1]], [0, 0, 1]], dtype=np.float32) - (_, rotation_vector, translation_vector) = cv2.solvePnP( - landmarks_68_3D, - aligned_landmarks.astype(np.float32), + (_, rotation_vector, _) = cv2.solvePnP( + np.concatenate( (landmarks_68_3D[:27], landmarks_68_3D[30:36]) , axis=0) , + np.concatenate( (aligned_landmarks[:27], aligned_landmarks[30:36]) , axis=0).astype(np.float32), camera_matrix, np.zeros((4, 1)) ) pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] ) - pitch = np.clip ( pitch, -math.pi, math.pi ) - yaw = np.clip ( yaw , -math.pi, math.pi ) - roll = np.clip ( roll, -math.pi, math.pi ) + + half_pi = math.pi / 2.0 + pitch = np.clip ( pitch, -half_pi, half_pi ) + yaw = np.clip ( yaw , -half_pi, half_pi ) + roll = np.clip ( roll, -half_pi, half_pi ) return -pitch, yaw, roll - + #if remove_align: # bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True) # #import code @@ -758,48 +784,48 @@ def estimate_pitch_yaw_roll(aligned_landmarks, size=256): """ -def get_averaged_transform_mat (img_landmarks, - img_landmarks_prev, - img_landmarks_next, - average_frame_count, +def get_averaged_transform_mat (img_landmarks, + img_landmarks_prev, + img_landmarks_next, + average_frame_count, average_center_frame_count, output_size, face_type, scale=1.0): - + l_c_list = [] tb_diag_vec_list = [] bt_diag_vec_list = [] mod_list = [] - + count = max(average_frame_count,average_center_frame_count) - for i in range ( -count, count+1, 1 ): + for i in range ( -count, count+1, 1 ): if i < 0: lmrks = img_landmarks_prev[i] if -i < len(img_landmarks_prev) else None elif i > 0: lmrks = img_landmarks_next[i] if i < len(img_landmarks_next) else None else: lmrks = img_landmarks - + if lmrks is None: continue - + l_c, tb_diag_vec, bt_diag_vec, mod = get_transform_mat_data (lmrks, face_type, scale=scale) - + if i >= -average_frame_count and i <= average_frame_count: tb_diag_vec_list.append(tb_diag_vec) bt_diag_vec_list.append(bt_diag_vec) mod_list.append(mod) - + if i >= -average_center_frame_count and i <= average_center_frame_count: l_c_list.append(l_c) - + tb_diag_vec = np.mean( np.array(tb_diag_vec_list), axis=0 ) bt_diag_vec = np.mean( np.array(bt_diag_vec_list), axis=0 ) - mod = np.mean( np.array(mod_list), axis=0 ) + mod = np.mean( np.array(mod_list), axis=0 ) l_c = np.mean( np.array(l_c_list), axis=0 ) return get_transform_mat_by_data (l_c, tb_diag_vec, bt_diag_vec, mod, output_size, face_type) - - + + def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): if not isinstance(image_landmarks, np.ndarray): image_landmarks = np.array (image_landmarks) @@ -809,7 +835,7 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): # estimate landmarks transform from global space to local aligned space with bounds [0..1] mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2] - + # get corner points in global space l_p = transform_points ( np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True) l_c = l_p[4] @@ -823,7 +849,7 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): # calc modifier of diagonal vectors for scale and padding value mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) ) - # calc 3 points in global space to estimate 2d affine transform + # calc 3 points in global space to estimate 2d affine transform if not remove_align: l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ), np.round( l_c + bt_diag_vec*mod ), @@ -838,10 +864,10 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): # get area of face square in global space area = mathlib.polygon_area(l_t[:,0], l_t[:,1] ) - + # calc side of square side = np.float32(math.sqrt(area) / 2) - + # calc 3 points with unrotated square l_t = np.array( [ np.round( l_c + [-side,-side] ), np.round( l_c + [ side,-side] ), @@ -850,6 +876,6 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): # calc affine transform from 3 global space points to 3 local space points size of 'output_size' pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) )) mat = cv2.getAffineTransform(l_t,pts2) - + return mat """ \ No newline at end of file diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index e101c46..2a14453 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -71,7 +71,9 @@ class ExtractSubprocessor(Subprocessor): self.rects_extractor = facelib.S3FDExtractor(place_model_on_cpu=place_model_on_cpu) if self.type == 'all' or 'landmarks' in self.type: - self.landmarks_extractor = facelib.FANExtractor(place_model_on_cpu=place_model_on_cpu) + # for head type, extract "3D landmarks" + self.landmarks_extractor = facelib.FANExtractor(landmarks_3D=self.face_type >= FaceType.HEAD, + place_model_on_cpu=place_model_on_cpu) self.cached_image = (None, None) @@ -690,7 +692,7 @@ def main(detector=None, ): face_type = FaceType.fromString(face_type) - image_size = 512 + image_size = 512 if face_type < FaceType.HEAD else 768 if not input_path.exists(): io.log_err ('Input directory not found. Please ensure it exists.') diff --git a/merger/InteractiveMergerSubprocessor.py b/merger/InteractiveMergerSubprocessor.py index 5552bce..bbec108 100644 --- a/merger/InteractiveMergerSubprocessor.py +++ b/merger/InteractiveMergerSubprocessor.py @@ -84,14 +84,19 @@ class InteractiveMergerSubprocessor(Subprocessor): filepath = frame_info.filepath if len(frame_info.landmarks_list) == 0: - self.log_info (f'no faces found for {filepath.name}, copying without faces') - - img_bgr = cv2_imread(filepath) - imagelib.normalize_channels(img_bgr, 3) + + if cfg.mode == 'raw-predict': + h,w,c = self.predictor_input_shape + img_bgr = np.zeros( (h,w,3), dtype=np.uint8) + img_mask = np.zeros( (h,w,1), dtype=np.uint8) + else: + self.log_info (f'no faces found for {filepath.name}, copying without faces') + img_bgr = cv2_imread(filepath) + imagelib.normalize_channels(img_bgr, 3) + h,w,c = img_bgr.shape + img_mask = np.zeros( (h,w,1), dtype=img_bgr.dtype) + cv2_imwrite (pf.output_filepath, img_bgr) - h,w,c = img_bgr.shape - - img_mask = np.zeros( (h,w,1), dtype=img_bgr.dtype) cv2_imwrite (pf.output_mask_filepath, img_mask) if pf.need_return_image: @@ -300,6 +305,7 @@ class InteractiveMergerSubprocessor(Subprocessor): '3' : lambda cfg,shift_pressed: cfg.set_mode(3), '4' : lambda cfg,shift_pressed: cfg.set_mode(4), '5' : lambda cfg,shift_pressed: cfg.set_mode(5), + '6' : lambda cfg,shift_pressed: cfg.set_mode(6), 'q' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(1 if not shift_pressed else 5), 'a' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(-1 if not shift_pressed else -5), 'w' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(1 if not shift_pressed else 5), diff --git a/merger/MergeMasked.py b/merger/MergeMasked.py index 023718d..74c81da 100644 --- a/merger/MergeMasked.py +++ b/merger/MergeMasked.py @@ -17,8 +17,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, img_size = img_bgr.shape[1], img_bgr.shape[0] img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) - if cfg.mode == 'original': - return img_bgr, img_face_mask_a out_img = img_bgr.copy() out_merging_mask_a = None @@ -45,17 +43,10 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, predictor_input_bgr = cv2.resize (dst_face_bgr, (input_size,input_size) ) - predicted = predictor_func (predictor_input_bgr) - if isinstance(predicted, tuple): - #merger return bgr,mask - prd_face_bgr = np.clip (predicted[0], 0, 1.0) - prd_face_mask_a_0 = np.clip (predicted[1], 0, 1.0) - predictor_masked = True - else: - #merger return bgr only, using dst mask - prd_face_bgr = np.clip (predicted, 0, 1.0 ) - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (input_size,input_size) ) - predictor_masked = False + predicted = predictor_func (predictor_input_bgr) + prd_face_bgr = np.clip (predicted[0], 0, 1.0) + prd_face_mask_a_0 = np.clip (predicted[1], 0, 1.0) + prd_face_dst_mask_a_0 = np.clip (predicted[2], 0, 1.0) if cfg.super_resolution_power != 0: prd_face_bgr_enhanced = face_enhancer_func(prd_face_bgr, is_tanh=True, preserve_size=False) @@ -64,89 +55,100 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, prd_face_bgr = np.clip(prd_face_bgr, 0, 1) if cfg.super_resolution_power != 0: - if predictor_masked: - prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) - else: - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) + prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) + prd_face_dst_mask_a_0 = cv2.resize (prd_face_dst_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC) - if cfg.mask_mode == 2: #dst - prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) - elif cfg.mask_mode >= 3 and cfg.mask_mode <= 6: #XSeg modes - if cfg.mask_mode == 3 or cfg.mask_mode == 5 or cfg.mask_mode == 6: + if cfg.mask_mode == 1: #dst + wrk_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) + elif cfg.mask_mode == 2: #learned-prd + wrk_face_mask_a_0 = prd_face_mask_a_0 + elif cfg.mask_mode == 3: #learned-dst + wrk_face_mask_a_0 = prd_face_dst_mask_a_0 + elif cfg.mask_mode == 4: #learned-prd*learned-dst + wrk_face_mask_a_0 = prd_face_mask_a_0*prd_face_dst_mask_a_0 + elif cfg.mask_mode >= 5 and cfg.mask_mode <= 8: #XSeg modes + if cfg.mask_mode == 5 or cfg.mask_mode == 7 or cfg.mask_mode == 8: # obtain XSeg-prd prd_face_xseg_bgr = cv2.resize (prd_face_bgr, (xseg_input_size,)*2, cv2.INTER_CUBIC) prd_face_xseg_mask = xseg_256_extract_func(prd_face_xseg_bgr) X_prd_face_mask_a_0 = cv2.resize ( prd_face_xseg_mask, (output_size, output_size), cv2.INTER_CUBIC) - if cfg.mask_mode >= 4 and cfg.mask_mode <= 6: + if cfg.mask_mode >= 6 and cfg.mask_mode <= 8: # obtain XSeg-dst xseg_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, xseg_input_size, face_type=cfg.face_type) dst_face_xseg_bgr = cv2.warpAffine(img_bgr, xseg_mat, (xseg_input_size,)*2, flags=cv2.INTER_CUBIC ) dst_face_xseg_mask = xseg_256_extract_func(dst_face_xseg_bgr) X_dst_face_mask_a_0 = cv2.resize (dst_face_xseg_mask, (output_size,output_size), cv2.INTER_CUBIC) - if cfg.mask_mode == 3: #'XSeg-prd', - prd_face_mask_a_0 = X_prd_face_mask_a_0 - elif cfg.mask_mode == 4: #'XSeg-dst', - prd_face_mask_a_0 = X_dst_face_mask_a_0 - elif cfg.mask_mode == 5: #'XSeg-prd*XSeg-dst', - prd_face_mask_a_0 = X_prd_face_mask_a_0 * X_dst_face_mask_a_0 - elif cfg.mask_mode == 6: #learned*XSeg-prd*XSeg-dst' - prd_face_mask_a_0 = prd_face_mask_a_0 * X_prd_face_mask_a_0 * X_dst_face_mask_a_0 + if cfg.mask_mode == 5: #'XSeg-prd' + wrk_face_mask_a_0 = X_prd_face_mask_a_0 + elif cfg.mask_mode == 6: #'XSeg-dst' + wrk_face_mask_a_0 = X_dst_face_mask_a_0 + elif cfg.mask_mode == 7: #'XSeg-prd*XSeg-dst' + wrk_face_mask_a_0 = X_prd_face_mask_a_0 * X_dst_face_mask_a_0 + elif cfg.mask_mode == 8: #learned-prd*learned-dst*XSeg-prd*XSeg-dst + wrk_face_mask_a_0 = prd_face_mask_a_0 * prd_face_dst_mask_a_0 * X_prd_face_mask_a_0 * X_dst_face_mask_a_0 - prd_face_mask_a_0[ prd_face_mask_a_0 < (1.0/255.0) ] = 0.0 # get rid of noise + wrk_face_mask_a_0[ wrk_face_mask_a_0 < (1.0/255.0) ] = 0.0 # get rid of noise # resize to mask_subres_size - if prd_face_mask_a_0.shape[0] != mask_subres_size: - prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (mask_subres_size, mask_subres_size), cv2.INTER_CUBIC) + if wrk_face_mask_a_0.shape[0] != mask_subres_size: + wrk_face_mask_a_0 = cv2.resize (wrk_face_mask_a_0, (mask_subres_size, mask_subres_size), cv2.INTER_CUBIC) # process mask in local predicted space if 'raw' not in cfg.mode: # add zero pad - prd_face_mask_a_0 = np.pad (prd_face_mask_a_0, input_size) + wrk_face_mask_a_0 = np.pad (wrk_face_mask_a_0, input_size) ero = cfg.erode_mask_modifier blur = cfg.blur_mask_modifier if ero > 0: - prd_face_mask_a_0 = cv2.erode(prd_face_mask_a_0, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) + wrk_face_mask_a_0 = cv2.erode(wrk_face_mask_a_0, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(ero,ero)), iterations = 1 ) elif ero < 0: - prd_face_mask_a_0 = cv2.dilate(prd_face_mask_a_0, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) + wrk_face_mask_a_0 = cv2.dilate(wrk_face_mask_a_0, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(-ero,-ero)), iterations = 1 ) # clip eroded/dilated mask in actual predict area # pad with half blur size in order to accuratelly fade to zero at the boundary clip_size = input_size + blur // 2 - prd_face_mask_a_0[:clip_size,:] = 0 - prd_face_mask_a_0[-clip_size:,:] = 0 - prd_face_mask_a_0[:,:clip_size] = 0 - prd_face_mask_a_0[:,-clip_size:] = 0 + wrk_face_mask_a_0[:clip_size,:] = 0 + wrk_face_mask_a_0[-clip_size:,:] = 0 + wrk_face_mask_a_0[:,:clip_size] = 0 + wrk_face_mask_a_0[:,-clip_size:] = 0 if blur > 0: blur = blur + (1-blur % 2) - prd_face_mask_a_0 = cv2.GaussianBlur(prd_face_mask_a_0, (blur, blur) , 0) + wrk_face_mask_a_0 = cv2.GaussianBlur(wrk_face_mask_a_0, (blur, blur) , 0) - prd_face_mask_a_0 = prd_face_mask_a_0[input_size:-input_size,input_size:-input_size] + wrk_face_mask_a_0 = wrk_face_mask_a_0[input_size:-input_size,input_size:-input_size] - prd_face_mask_a_0 = np.clip(prd_face_mask_a_0, 0, 1) + wrk_face_mask_a_0 = np.clip(wrk_face_mask_a_0, 0, 1) - img_face_mask_a = cv2.warpAffine( prd_face_mask_a_0, face_mask_output_mat, img_size, np.zeros(img_bgr.shape[0:2], dtype=np.float32), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC )[...,None] + img_face_mask_a = cv2.warpAffine( wrk_face_mask_a_0, face_mask_output_mat, img_size, np.zeros(img_bgr.shape[0:2], dtype=np.float32), flags=cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC )[...,None] img_face_mask_a = np.clip (img_face_mask_a, 0.0, 1.0) img_face_mask_a [ img_face_mask_a < (1.0/255.0) ] = 0.0 # get rid of noise - if prd_face_mask_a_0.shape[0] != output_size: - prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) + if wrk_face_mask_a_0.shape[0] != output_size: + wrk_face_mask_a_0 = cv2.resize (wrk_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) - prd_face_mask_a = prd_face_mask_a_0[...,None] - prd_face_mask_area_a = prd_face_mask_a.copy() - prd_face_mask_area_a[prd_face_mask_area_a>0] = 1.0 + wrk_face_mask_a = wrk_face_mask_a_0[...,None] + wrk_face_mask_area_a = wrk_face_mask_a.copy() + wrk_face_mask_area_a[wrk_face_mask_area_a>0] = 1.0 - if 'raw' in cfg.mode: + if cfg.mode == 'original': + return img_bgr, img_face_mask_a + + elif 'raw' in cfg.mode: if cfg.mode == 'raw-rgb': out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT ) out_merging_mask_a = img_face_mask_a - + + elif cfg.mode == 'raw-predict': + out_img = prd_face_bgr + out_merging_mask_a = wrk_face_mask_a + out_img = np.clip (out_img, 0.0, 1.0 ) else: #averaging [lenx, leny, maskx, masky] by grayscale gradients of upscaled mask @@ -165,8 +167,8 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, if 'seamless' not in cfg.mode and cfg.color_transfer_mode != 0: if cfg.color_transfer_mode == 1: #rct - prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( prd_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8), - np.clip( dst_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8), ) + prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( prd_face_bgr*wrk_face_mask_area_a*255, 0, 255).astype(np.uint8), + np.clip( dst_face_bgr*wrk_face_mask_area_a*255, 0, 255).astype(np.uint8), ) prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) elif cfg.color_transfer_mode == 2: #lct @@ -174,22 +176,22 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, elif cfg.color_transfer_mode == 3: #mkl prd_face_bgr = imagelib.color_transfer_mkl (prd_face_bgr, dst_face_bgr) elif cfg.color_transfer_mode == 4: #mkl-m - prd_face_bgr = imagelib.color_transfer_mkl (prd_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + prd_face_bgr = imagelib.color_transfer_mkl (prd_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) elif cfg.color_transfer_mode == 5: #idt prd_face_bgr = imagelib.color_transfer_idt (prd_face_bgr, dst_face_bgr) elif cfg.color_transfer_mode == 6: #idt-m - prd_face_bgr = imagelib.color_transfer_idt (prd_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + prd_face_bgr = imagelib.color_transfer_idt (prd_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) elif cfg.color_transfer_mode == 7: #sot-m - prd_face_bgr = imagelib.color_transfer_sot (prd_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + prd_face_bgr = imagelib.color_transfer_sot (prd_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) prd_face_bgr = np.clip (prd_face_bgr, 0.0, 1.0) elif cfg.color_transfer_mode == 8: #mix-m - prd_face_bgr = imagelib.color_transfer_mix (prd_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + prd_face_bgr = imagelib.color_transfer_mix (prd_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) if cfg.mode == 'hist-match': hist_mask_a = np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) if cfg.masked_hist_match: - hist_mask_a *= prd_face_mask_area_a + hist_mask_a *= wrk_face_mask_area_a white = (1.0-hist_mask_a)* np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=np.float32) @@ -240,24 +242,24 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, if 'seamless' in cfg.mode and cfg.color_transfer_mode != 0: if cfg.color_transfer_mode == 1: - out_face_bgr = imagelib.reinhard_color_transfer ( np.clip(out_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8), - np.clip(dst_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8) ) + out_face_bgr = imagelib.reinhard_color_transfer ( np.clip(out_face_bgr*wrk_face_mask_area_a*255, 0, 255).astype(np.uint8), + np.clip(dst_face_bgr*wrk_face_mask_area_a*255, 0, 255).astype(np.uint8) ) out_face_bgr = np.clip( out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) elif cfg.color_transfer_mode == 2: #lct out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr) elif cfg.color_transfer_mode == 3: #mkl out_face_bgr = imagelib.color_transfer_mkl (out_face_bgr, dst_face_bgr) elif cfg.color_transfer_mode == 4: #mkl-m - out_face_bgr = imagelib.color_transfer_mkl (out_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + out_face_bgr = imagelib.color_transfer_mkl (out_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) elif cfg.color_transfer_mode == 5: #idt out_face_bgr = imagelib.color_transfer_idt (out_face_bgr, dst_face_bgr) elif cfg.color_transfer_mode == 6: #idt-m - out_face_bgr = imagelib.color_transfer_idt (out_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + out_face_bgr = imagelib.color_transfer_idt (out_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) elif cfg.color_transfer_mode == 7: #sot-m - out_face_bgr = imagelib.color_transfer_sot (out_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + out_face_bgr = imagelib.color_transfer_sot (out_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) out_face_bgr = np.clip (out_face_bgr, 0.0, 1.0) elif cfg.color_transfer_mode == 8: #mix-m - out_face_bgr = imagelib.color_transfer_mix (out_face_bgr*prd_face_mask_area_a, dst_face_bgr*prd_face_mask_area_a) + out_face_bgr = imagelib.color_transfer_mix (out_face_bgr*wrk_face_mask_area_a, dst_face_bgr*wrk_face_mask_area_a) if cfg.mode == 'seamless-hist-match': out_face_bgr = imagelib.color_hist_match(out_face_bgr, dst_face_bgr, cfg.hist_match_threshold) diff --git a/merger/MergerConfig.py b/merger/MergerConfig.py index c9eb868..8f59861 100644 --- a/merger/MergerConfig.py +++ b/merger/MergerConfig.py @@ -76,21 +76,21 @@ mode_dict = {0:'original', 2:'hist-match', 3:'seamless', 4:'seamless-hist-match', - 5:'raw-rgb',} + 5:'raw-rgb', + 6:'raw-predict'} -mode_str_dict = {} +mode_str_dict = { mode_dict[key] : key for key in mode_dict.keys() } -for key in mode_dict.keys(): - mode_str_dict[ mode_dict[key] ] = key - -mask_mode_dict = {1:'learned', - 2:'dst', - 3:'XSeg-prd', - 4:'XSeg-dst', - 5:'XSeg-prd*XSeg-dst', - 6:'learned*XSeg-prd*XSeg-dst' +mask_mode_dict = {1:'dst', + 2:'learned-prd', + 3:'learned-dst', + 4:'learned-prd*learned-dst', + 5:'XSeg-prd', + 6:'XSeg-dst', + 7:'XSeg-prd*XSeg-dst', + 8:'learned-prd*learned-dst*XSeg-prd*XSeg-dst' } - + ctm_dict = { 0: "None", 1:"rct", 2:"lct", 3:"mkl", 4:"mkl-m", 5:"idt", 6:"idt-m", 7:"sot-m", 8:"mix-m" } ctm_str_dict = {None:0, "rct":1, "lct":2, "mkl":3, "mkl-m":4, "idt":5, "idt-m":6, "sot-m":7, "mix-m":8 } @@ -102,7 +102,7 @@ class MergerConfigMasked(MergerConfig): mode='overlay', masked_hist_match=True, hist_match_threshold = 238, - mask_mode = 1, + mask_mode = 4, erode_mask_modifier = 0, blur_mask_modifier = 0, motion_blur_power = 0, @@ -118,7 +118,7 @@ class MergerConfigMasked(MergerConfig): super().__init__(type=MergerConfig.TYPE_MASKED, **kwargs) self.face_type = face_type - if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL, FaceType.WHOLE_FACE ]: + if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL, FaceType.WHOLE_FACE, FaceType.HEAD ]: raise ValueError("MergerConfigMasked does not support this type of face.") self.default_mode = default_mode @@ -262,9 +262,9 @@ class MergerConfigMasked(MergerConfig): if self.mode == 'hist-match' or self.mode == 'seamless-hist-match': r += f"""hist_match_threshold: {self.hist_match_threshold}\n""" - + r += f"""mask_mode: { mask_mode_dict[self.mask_mode] }\n""" - + if 'raw' not in self.mode: r += (f"""erode_mask_modifier: {self.erode_mask_modifier}\n""" f"""blur_mask_modifier: {self.blur_mask_modifier}\n""" @@ -274,8 +274,8 @@ class MergerConfigMasked(MergerConfig): if 'raw' not in self.mode: r += f"""color_transfer_mode: {ctm_dict[self.color_transfer_mode]}\n""" + r += super().to_string(filename) - r += super().to_string(filename) r += f"""super_resolution_power: {self.super_resolution_power}\n""" if 'raw' not in self.mode: diff --git a/merger/gfx/help_merger_masked.jpg b/merger/gfx/help_merger_masked.jpg index f3c31e0..df22ffb 100644 Binary files a/merger/gfx/help_merger_masked.jpg and b/merger/gfx/help_merger_masked.jpg differ diff --git a/merger/gfx/help_merger_masked_source.psd b/merger/gfx/help_merger_masked_source.psd index 25a440f..437a410 100644 Binary files a/merger/gfx/help_merger_masked_source.psd and b/merger/gfx/help_merger_masked_source.psd differ diff --git a/models/Model_Quick96/Model.py b/models/Model_Quick96/Model.py index eccdbc7..967b9a3 100644 --- a/models/Model_Quick96/Model.py +++ b/models/Model_Quick96/Model.py @@ -308,8 +308,7 @@ class QModel(ModelBase): face = nn.to_data_format(face[None,...], self.model_data_format, "NHWC") bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x, "NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ] - mask = mask_dst_dstm[0] * mask_src_dstm[0] - return bgr[0], mask[...,0] + return bgr[0], mask_src_dstm[0][...,0], mask_dst_dstm[0][...,0] #override def get_MergerConfig(self): diff --git a/models/Model_SAEHD/Model.py b/models/Model_SAEHD/Model.py index f90891d..e934a63 100644 --- a/models/Model_SAEHD/Model.py +++ b/models/Model_SAEHD/Model.py @@ -60,7 +60,7 @@ class SAEHDModel(ModelBase): resolution = io.input_int("Resolution", default_resolution, add_info="64-512", help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") resolution = np.clip ( (resolution // 16) * 16, 64, 512) self.options['resolution'] = resolution - self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf'], help_message="Half / mid face / full face / whole face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face. 'Whole face' covers full area of face include forehead, but requires manual merge in Adobe After Effects.").lower() + self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf','head'], help_message="Half / mid face / full face / whole face / head. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face. 'Whole face' covers full area of face include forehead. 'head' covers full head, but requires XSeg for src and dst faceset.").lower() self.options['archi'] = io.input_str ("AE architecture", default_archi, ['df','liae','dfhd','liaehd','dfuhd','liaeuhd'], help_message="'df' keeps faces more natural.\n'liae' can fix overly different face shapes.\n'hd' are experimental versions.").lower() default_d_dims = 48 if self.options['archi'] == 'dfhd' else 64 @@ -84,11 +84,11 @@ class SAEHDModel(ModelBase): self.options['d_mask_dims'] = d_mask_dims + d_mask_dims % 2 if self.is_first_run() or ask_override: - if self.options['face_type'] == 'wf': + if self.options['face_type'] == 'wf' or self.options['face_type'] == 'head': self.options['masked_training'] = io.input_bool ("Masked training", default_masked_training, help_message="This option is available only for 'whole_face' type. Masked training clips training area to full_face mask, thus network will train the faces properly. When the face is trained enough, disable this option to train all area of the frame. Merge with 'raw-rgb' mode, then use Adobe After Effects to manually mask and compose whole face include forehead.") - + self.options['eyes_prio'] = io.input_bool ("Eyes priority", default_eyes_prio, help_message='Helps to fix eye problems during training like "alien eyes" and wrong eyes direction ( especially on HD architectures ) by forcing the neural network to train eyes with higher priority. before/after https://i.imgur.com/YQHOuSR.jpg ') - + if self.is_first_run() or ask_override: self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.") @@ -104,10 +104,10 @@ class SAEHDModel(ModelBase): self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn the color of the predicted face to be the same as dst inside mask. If you want to use this option with 'whole_face' you have to use XSeg trained mask. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.001 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn the area outside mask of the predicted face to be the same as dst. If you want to use this option with 'whole_face' you have to use XSeg trained mask. For whole_face you have to use XSeg trained mask. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 ) - + self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.") self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") - + self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.") if self.options['pretrain'] and self.get_pretraining_data_path() is None: @@ -127,7 +127,8 @@ class SAEHDModel(ModelBase): self.face_type = {'h' : FaceType.HALF, 'mf' : FaceType.MID_FULL, 'f' : FaceType.FULL, - 'wf' : FaceType.WHOLE_FACE}[ self.options['face_type'] ] + 'wf' : FaceType.WHOLE_FACE, + 'head' : FaceType.HEAD}[ self.options['face_type'] ] eyes_prio = self.options['eyes_prio'] archi = self.options['archi'] @@ -166,10 +167,10 @@ class SAEHDModel(ModelBase): self.target_srcm_all = tf.placeholder (nn.floatx, mask_shape) self.target_dstm_all = tf.placeholder (nn.floatx, mask_shape) - + # Initializing model classes - model_archi = nn.DeepFakeArchi(resolution, mod='uhd' if 'uhd' in archi else None) - + model_archi = nn.DeepFakeArchi(resolution, mod='uhd' if 'uhd' in archi else None) + with tf.device (models_opt_device): if 'df' in archi: self.encoder = model_archi.Encoder(in_ch=input_ch, e_ch=e_dims, is_hd=is_hd, name='encoder') @@ -270,7 +271,7 @@ class SAEHDModel(ModelBase): gpu_target_dst = self.target_dst [batch_slice,:,:,:] gpu_target_srcm_all = self.target_srcm_all[batch_slice,:,:,:] gpu_target_dstm_all = self.target_dstm_all[batch_slice,:,:,:] - + # process model tensors if 'df' in archi: gpu_src_code = self.inter(self.encoder(gpu_warped_src)) @@ -300,11 +301,11 @@ class SAEHDModel(ModelBase): gpu_pred_src_srcm_list.append(gpu_pred_src_srcm) gpu_pred_dst_dstm_list.append(gpu_pred_dst_dstm) gpu_pred_src_dstm_list.append(gpu_pred_src_dstm) - + # unpack masks from one combined mask - gpu_target_srcm = tf.clip_by_value (gpu_target_srcm_all, 0, 1) - gpu_target_dstm = tf.clip_by_value (gpu_target_dstm_all, 0, 1) - gpu_target_srcm_eyes = tf.clip_by_value (gpu_target_srcm_all-1, 0, 1) + gpu_target_srcm = tf.clip_by_value (gpu_target_srcm_all, 0, 1) + gpu_target_dstm = tf.clip_by_value (gpu_target_dstm_all, 0, 1) + gpu_target_srcm_eyes = tf.clip_by_value (gpu_target_srcm_all-1, 0, 1) gpu_target_dstm_eyes = tf.clip_by_value (gpu_target_dstm_all-1, 0, 1) gpu_target_srcm_blur = nn.gaussian_blur(gpu_target_srcm, max(1, resolution // 32) ) @@ -315,7 +316,7 @@ class SAEHDModel(ModelBase): gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst - + gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst @@ -324,10 +325,10 @@ class SAEHDModel(ModelBase): gpu_src_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3]) - + if eyes_prio: gpu_src_loss += tf.reduce_mean ( 300*tf.abs ( gpu_target_src*gpu_target_srcm_eyes - gpu_pred_src_src*gpu_target_srcm_eyes ), axis=[1,2,3]) - + gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] ) face_style_power = self.options['face_style_power'] / 100.0 @@ -341,10 +342,10 @@ class SAEHDModel(ModelBase): gpu_dst_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1]) gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3]) - + if eyes_prio: gpu_dst_loss += tf.reduce_mean ( 300*tf.abs ( gpu_target_dst*gpu_target_dstm_eyes - gpu_pred_dst_dst*gpu_target_dstm_eyes ), axis=[1,2,3]) - + gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] ) gpu_src_losses += [gpu_src_loss] @@ -444,7 +445,7 @@ class SAEHDModel(ModelBase): self.target_dstm_all:target_dstm_all}) self.D_src_dst_train = D_src_dst_train - + def AE_view(warped_src, warped_dst): return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm], feed_dict={self.warped_src:warped_src, @@ -468,7 +469,7 @@ class SAEHDModel(ModelBase): gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder(gpu_src_dst_code) _, gpu_pred_dst_dstm = self.decoder(gpu_dst_code) - + def AE_merge( warped_dst): return nn.tf_sess.run ( [gpu_pred_src_dst, gpu_pred_dst_dstm, gpu_pred_src_dstm], feed_dict={self.warped_dst:warped_dst}) @@ -523,10 +524,10 @@ class SAEHDModel(ModelBase): ], generators_count=dst_generators_count ) ]) - + self.last_src_samples_loss = [] self.last_dst_samples_loss = [] - + if self.pretrain_just_disabled: self.update_sample_for_preview(force_new=True) @@ -543,25 +544,25 @@ class SAEHDModel(ModelBase): #override def onTrainOneIter(self): bs = self.get_batch_size() - + ( (warped_src, target_src, target_srcm_all), \ (warped_dst, target_dst, target_dstm_all) ) = self.generate_next_samples() src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm_all, warped_dst, target_dst, target_dstm_all) - - for i in range(bs): + + for i in range(bs): self.last_src_samples_loss.append ( (target_src[i], target_srcm_all[i], src_loss[i] ) ) self.last_dst_samples_loss.append ( (target_dst[i], target_dstm_all[i], dst_loss[i] ) ) - + if len(self.last_src_samples_loss) >= bs*16: src_samples_loss = sorted(self.last_src_samples_loss, key=operator.itemgetter(2), reverse=True) dst_samples_loss = sorted(self.last_dst_samples_loss, key=operator.itemgetter(2), reverse=True) - + target_src = np.stack( [ x[0] for x in src_samples_loss[:bs] ] ) target_srcm_all = np.stack( [ x[1] for x in src_samples_loss[:bs] ] ) - + target_dst = np.stack( [ x[0] for x in dst_samples_loss[:bs] ] ) - target_dstm_all = np.stack( [ x[1] for x in dst_samples_loss[:bs] ] ) + target_dstm_all = np.stack( [ x[1] for x in dst_samples_loss[:bs] ] ) src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm_all, target_dst, target_dst, target_dstm_all) self.last_src_samples_loss = [] @@ -584,68 +585,71 @@ class SAEHDModel(ModelBase): DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] target_srcm_all, target_dstm_all = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm_all, target_dstm_all] )] - + target_srcm = np.clip(target_srcm_all, 0, 1) target_dstm = np.clip(target_dstm_all, 0, 1) - + n_samples = min(4, self.get_batch_size(), 800 // self.resolution ) if self.resolution <= 256: result = [] - + st = [] for i in range(n_samples): ar = S[i], SS[i], D[i], DD[i], SD[i] st.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD', np.concatenate (st, axis=0 )), ] - + st_m = [] for i in range(n_samples): - ar = S[i]*target_srcm[i], SS[i], D[i]*target_dstm[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) + SD_mask = DDM[i]*SDM[i] if self.face_type < FaceType.HEAD else SDM[i] + + ar = S[i]*target_srcm[i], SS[i], D[i]*target_dstm[i], DD[i]*DDM[i], SD[i]*SD_mask st_m.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD masked', np.concatenate (st_m, axis=0 )), ] else: result = [] - + st = [] for i in range(n_samples): ar = S[i], SS[i] st.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD src-src', np.concatenate (st, axis=0 )), ] - + st = [] for i in range(n_samples): ar = D[i], DD[i] st.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD dst-dst', np.concatenate (st, axis=0 )), ] - + st = [] for i in range(n_samples): ar = D[i], SD[i] st.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD pred', np.concatenate (st, axis=0 )), ] - + st_m = [] for i in range(n_samples): ar = S[i]*target_srcm[i], SS[i] - st_m.append ( np.concatenate ( ar, axis=1) ) + st_m.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD masked src-src', np.concatenate (st_m, axis=0 )), ] - + st_m = [] for i in range(n_samples): ar = D[i]*target_dstm[i], DD[i]*DDM[i] - st_m.append ( np.concatenate ( ar, axis=1) ) + st_m.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD masked dst-dst', np.concatenate (st_m, axis=0 )), ] - + st_m = [] for i in range(n_samples): - ar = D[i]*target_dstm[i], SD[i]*(DDM[i]*SDM[i]) - st_m.append ( np.concatenate ( ar, axis=1) ) + SD_mask = DDM[i]*SDM[i] if self.face_type < FaceType.HEAD else SDM[i] + ar = D[i]*target_dstm[i], SD[i]*SD_mask + st_m.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD masked pred', np.concatenate (st_m, axis=0 )), ] - + return result def predictor_func (self, face=None): @@ -653,8 +657,7 @@ class SAEHDModel(ModelBase): bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ] - mask = mask_dst_dstm[0] * mask_src_dstm[0] - return bgr[0], mask[...,0] + return bgr[0], mask_src_dstm[0][...,0], mask_dst_dstm[0][...,0] #override def get_MergerConfig(self): diff --git a/models/Model_XSeg/Model.py b/models/Model_XSeg/Model.py index 6f9bb2f..fb99530 100644 --- a/models/Model_XSeg/Model.py +++ b/models/Model_XSeg/Model.py @@ -21,16 +21,15 @@ class XSegModel(ModelBase): self.set_batch_size(4) ask_override = self.ask_override() - - default_face_type = self.options['face_type'] = self.load_or_def_option('face_type', 'wf') - - if not self.is_first_run() and ask_override: - self.restart_training = io.input_bool(f"Restart training?", False, help_message="Reset model weights and start training from scratch.") - else: - self.restart_training = False + if not self.is_first_run() and ask_override: + if io.input_bool(f"Restart training?", False, help_message="Reset model weights and start training from scratch."): + self.set_iter(0) + + default_face_type = self.options['face_type'] = self.load_or_def_option('face_type', 'wf') + if self.is_first_run(): - self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf'], help_message="Half / mid face / full face / whole face. Choose the same as your deepfake model.").lower() + self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf','head'], help_message="Half / mid face / full face / whole face / head. Choose the same as your deepfake model.").lower() #override @@ -44,14 +43,13 @@ class XSegModel(ModelBase): devices = device_config.devices self.resolution = resolution = 256 - - if self.restart_training: - self.set_iter(0) + self.face_type = {'h' : FaceType.HALF, 'mf' : FaceType.MID_FULL, 'f' : FaceType.FULL, - 'wf' : FaceType.WHOLE_FACE}[ self.options['face_type'] ] + 'wf' : FaceType.WHOLE_FACE, + 'head' : FaceType.HEAD}[ self.options['face_type'] ] place_model_on_cpu = len(devices) == 0 models_opt_device = '/CPU:0' if place_model_on_cpu else '/GPU:0' diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py index effcd97..f5faf1d 100644 --- a/samplelib/SampleProcessor.py +++ b/samplelib/SampleProcessor.py @@ -127,13 +127,7 @@ class SampleProcessor(object): if face_type is None: raise ValueError("face_type must be defined for face samples") - if face_type > sample.face_type: - raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, face_type) ) - - - if sample_type == SPST.FACE_MASK: - - + if sample_type == SPST.FACE_MASK: if face_mask_type == SPFMT.FULL_FACE: img = get_full_face_mask() elif face_mask_type == SPFMT.EYES: