Merge pull request #27 from faceshiftlabs/feat/consistent_dpi

Feat/consistent dpi
2025-08-22 06:23:20 -07:00 · 2019-08-23 14:04:17 -07:00 · 2019-08-23 14:04:17 -07:00 · 2add4e2bad
commit 2add4e2bad
parent 1110bc33c8 953cc81db3
6 changed files with 58 additions and 45 deletions
--- a/facelib/LandmarksProcessor.py
+++ b/facelib/LandmarksProcessor.py
@ -149,6 +149,7 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
    else:
        raise ValueError ('wrong face_type: ', face_type)

+
    mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2]
    mat = mat * (output_size - 2 * padding)
    mat[:,2] += padding
--- a/facelib/S3FDExtractor.py
+++ b/facelib/S3FDExtractor.py
@ -3,26 +3,39 @@ from pathlib import Path
 import cv2
 from nnlib import nnlib

+
 class S3FDExtractor(object):
+    """
+    S3FD: Single Shot Scale-invariant Face Detector
+    https://arxiv.org/pdf/1708.05237.pdf
+    """
    def __init__(self):
-        exec( nnlib.import_all(), locals(), globals() )
+        exec(nnlib.import_all(), locals(), globals())

        model_path = Path(__file__).parent / "S3FD.h5"
        if not model_path.exists():
-            return None
+            raise Exception(f'Could not find S3DF model at path {model_path}')

-        self.model = nnlib.keras.models.load_model ( str(model_path) )
+        self.model = nnlib.keras.models.load_model(str(model_path))

    def __enter__(self):
        return self

    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
-        return False #pass exception between __enter__ and __exit__ to outter level
+        return False  # pass exception between __enter__ and __exit__ to outter level

-    def extract (self, input_image, is_bgr=True):
+    def extract(self, input_image, is_bgr=True, nms_thresh=0.3):
+        """
+        Extracts the bounding boxes for all faces found in image
+        :param input_image: The image to look for faces in
+        :param is_bgr: Is this image in OpenCV's BGR color mode, if not, assume RGB color mode
+        :param nms_thresh: The NMS (non-maximum suppression) threshold. Of all bounding boxes found, only return
+        bounding boxes with an overlap ratio less then threshold
+        :return:
+        """

        if is_bgr:
-            input_image = input_image[:,:,::-1]
+            input_image = input_image[:, :, ::-1]
            is_bgr = False

        (h, w, ch) = input_image.shape
@ -32,35 +45,36 @@ class S3FDExtractor(object):
        scale_to = max(64, scale_to)

        input_scale = d / scale_to
-        input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR)
+        input_image = cv2.resize(input_image, (int(w / input_scale), int(h / input_scale)),
+                                 interpolation=cv2.INTER_LINEAR)

-        olist = self.model.predict( np.expand_dims(input_image,0) )
+        olist = self.model.predict(np.expand_dims(input_image, 0))

        detected_faces = []
-        for ltrb in self.refine (olist):
-            l,t,r,b = [ x*input_scale for x in ltrb]
-            bt = b-t
-            if min(r-l,bt) < 40: #filtering faces < 40pix by any side
+        for ltrb in self._refine(olist, nms_thresh):
+            l, t, r, b = [x * input_scale for x in ltrb]
+            bt = b - t
+            if min(r - l, bt) < 40:  # filtering faces < 40pix by any side
                continue
-            b += bt*0.1 #enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin
-            detected_faces.append ( [int(x) for x in (l,t,r,b) ] )
+            b += bt * 0.1  # enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin
+            detected_faces.append([int(x) for x in (l, t, r, b)])

        return detected_faces

-    def refine(self, olist):
+    def _refine(self, olist, thresh):
        bboxlist = []
-        for i, ((ocls,), (oreg,)) in enumerate ( zip ( olist[::2], olist[1::2] ) ):
-            stride = 2**(i + 2)    # 4,8,16,32,64,128
+        for i, ((ocls,), (oreg,)) in enumerate(zip(olist[::2], olist[1::2])):
+            stride = 2 ** (i + 2)  # 4,8,16,32,64,128
            s_d2 = stride / 2
            s_m4 = stride * 4

            for hindex, windex in zip(*np.where(ocls > 0.05)):
                score = ocls[hindex, windex]
-                loc   = oreg[hindex, windex, :]
+                loc = oreg[hindex, windex, :]
                priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4])
                priors_2p = priors[2:]
                box = np.concatenate((priors[:2] + loc[:2] * 0.1 * priors_2p,
-                                      priors_2p * np.exp(loc[2:] * 0.2)) )
+                                      priors_2p * np.exp(loc[2:] * 0.2)))
                box[:2] -= box[2:] / 2
                box[2:] += box[:2]

@ -69,12 +83,11 @@ class S3FDExtractor(object):
        bboxlist = np.array(bboxlist)
        if len(bboxlist) == 0:
            bboxlist = np.zeros((1, 5))
-
-        bboxlist = bboxlist[self.refine_nms(bboxlist, 0.3), :]
-        bboxlist = [ x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5]
+        bboxlist = bboxlist[self._refine_nms(bboxlist, thresh), :]
+        bboxlist = [x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5]
        return bboxlist

-    def refine_nms(self, dets, thresh):
+    def _refine_nms(self, dets, nms_thresh):
        keep = list()
        if len(dets) == 0:
            return keep
@ -93,6 +106,6 @@ class S3FDExtractor(object):
            width, height = np.maximum(0.0, xx_2 - xx_1 + 1), np.maximum(0.0, yy_2 - yy_1 + 1)
            ovr = width * height / (areas[i] + areas[order[1:]] - width * height)

-            inds = np.where(ovr <= thresh)[0]
+            inds = np.where(ovr <= nms_thresh)[0]
            order = order[inds + 1]
        return keep
--- a/guilib/main.py
+++ b/guilib/main.py
--- a/imagelib/warp.py
+++ b/imagelib/warp.py
@ -4,8 +4,8 @@ from utils import random_utils

 def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05]  ):
    h,w,c = source.shape
-    if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024):
-        raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.')
+    #if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024):
+    #    raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.')

    rotation = np.random.uniform( rotation_range[0], rotation_range[1] )
    scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1])
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@ -41,7 +41,6 @@ class ExtractSubprocessor(Subprocessor):
        #override
        def on_initialize(self, client_dict):
            self.type         = client_dict['type']
-            self.image_size   = client_dict['image_size']
            self.face_type    = client_dict['face_type']
            self.device_idx   = client_dict['device_idx']
            self.cpu_only     = client_dict['device_type'] == 'CPU'
@ -126,6 +125,8 @@ class ExtractSubprocessor(Subprocessor):
                else:
                    h, w, ch = image.shape

+                #self.image_size = h
+
                if ch == 1:
                    image = np.repeat (image, 3, -1)
                elif ch == 4:
@ -230,6 +231,8 @@ class ExtractSubprocessor(Subprocessor):
                            continue

                        rect = np.array(rect)
+                        rect_area = mathlib.polygon_area(np.array(rect[[0, 2, 2, 0]]), np.array(rect[[1, 1, 3, 3]]))
+                        self.image_size = int(math.sqrt(rect_area))

                        if self.face_type == FaceType.MARK_ONLY:
                            image_to_face_mat = None
@ -243,7 +246,7 @@ class ExtractSubprocessor(Subprocessor):

                            landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True)

-                            rect_area      = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]]))
+
                            landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] )

                            if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area
@ -305,10 +308,9 @@ class ExtractSubprocessor(Subprocessor):
            return data.filename

    #override
-    def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None):
+    def __init__(self, input_data, type, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None):
        self.input_data = input_data
        self.type = type
-        self.image_size = image_size
        self.face_type = face_type
        self.debug_dir = debug_dir
        self.final_output_path = final_output_path
@ -361,7 +363,6 @@ class ExtractSubprocessor(Subprocessor):
    #override
    def process_info_generator(self):
        base_dict = {'type' : self.type,
-                     'image_size': self.image_size,
                     'face_type': self.face_type,
                     'debug_dir': self.debug_dir,
                     'final_output_dir': str(self.final_output_path),
@ -560,6 +561,7 @@ class ExtractSubprocessor(Subprocessor):
                self.landmarks = landmarks[0]

            (h,w,c) = self.image.shape
+            self.image_size = h

            if not self.hide_help:
                image = cv2.addWeighted (self.image,1.0,self.text_lines_img,1.0,0)
@ -731,7 +733,6 @@ def extract_fanseg(input_dir, device_args={} ):
        data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in paths_to_extract ], 'fanseg', multi_gpu=multi_gpu, cpu_only=cpu_only).run()

 def extract_umd_csv(input_file_csv,
-                    image_size=256,
                    face_type='full_face',
                    device_args={} ):

@ -798,7 +799,7 @@ def extract_umd_csv(input_file_csv,
        data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run()

        io.log_info ('Performing 3rd pass...')
-        data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
+        data = ExtractSubprocessor (data, 'final', face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
        faces_detected += sum([d.faces_detected for d in data])


@ -814,7 +815,6 @@ def main(input_dir,
         manual_fix=False,
         manual_output_debug_fix=False,
         manual_window_size=1368,
-         image_size=256,
         face_type='full_face',
         device_args={}):

@ -867,16 +867,16 @@ def main(input_dir,
    if images_found != 0:
        if detector == 'manual':
            io.log_info ('Performing manual extract...')
-            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run()
+            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run()
        else:
            io.log_info ('Performing 1st pass...')
-            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run()
+            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run()

            io.log_info ('Performing 2nd pass...')
-            data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run()
+            data = ExtractSubprocessor (data, 'landmarks',  face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run()

        io.log_info ('Performing 3rd pass...')
-        data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
+        data = ExtractSubprocessor (data, 'final',  face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
        faces_detected += sum([d.faces_detected for d in data])

        if manual_fix:
@ -885,8 +885,8 @@ def main(input_dir,
            else:
                fix_data = [ ExtractSubprocessor.Data(d.filename) for d in data if d.faces_detected == 0 ]
                io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) )
-                fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run()
-                fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
+                fix_data = ExtractSubprocessor (fix_data, 'landmarks',  face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run()
+                fix_data = ExtractSubprocessor (fix_data, 'final',  face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
                faces_detected += sum([d.faces_detected for d in fix_data])


--- a/mainscripts/MaskEditorTool.py
+++ b/mainscripts/MaskEditorTool.py
@ -25,8 +25,8 @@ class MaskEditor:
        self.img = imagelib.normalize_channels (img,3)
        h, w, c = img.shape

-        if h != w and w != 256:
-            #to support any square res, scale img,mask and ie_polys to 256, then scale ie_polys back on .get_ie_polys()
+        if h != w or w != 256:
+            self.img = cv2.resize(img, (256,256))
            raise Exception ("MaskEditor does not support image size != 256x256")

        ph, pw = h // 4, w // 4 #pad wh
@ -257,7 +257,7 @@ class MaskEditor:
                preview_images += [ np.concatenate (prev_images, axis=1) ]

            img = np.full ( (prh,prw, sc), (0,0,1), dtype=np.float )
-            img[border:-border,border:-border] = cv2.resize( self.img, max_wh_bordered )
+            img[border:-border,border:-border] = cv2.resize( self.img, 256 )

            preview_images += [ img ]

@ -276,7 +276,6 @@ class MaskEditor:
            self.preview_images = np.clip(preview_images * 255, 0, 255 ).astype(np.uint8)

        status_img = self.get_screen_status_block( screens.shape[1], screens.shape[2] )
-
        result = np.concatenate ( [self.preview_images, screens, status_img], axis=0  )

        return result