added new extractor: S3FD,

all extractors now produce less false-positive faces
2025-07-05 12:36:42 -07:00 · 2019-03-10 23:18:10 +04:00 · 2019-03-10 23:18:10 +04:00 · fbf39d2727
commit fbf39d2727
parent 9440224556
10 changed files with 83 additions and 112 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,14 +4,5 @@
 !*.txt
 !*.jpg
 !requirements*
 !doc
 !facelib
 !gpufmkmgr
 !localization
 !mainscripts
 !mathlib
 !models
 !nnlib
 !utils
 !Dockerfile*
 !*.sh
--- a/doc/manual_ru.pdf
+++ b/doc/manual_ru.pdf
--- a/doc/manual_ru_source.xml
+++ b/doc/manual_ru_source.xml
--- a/facelib/2DFAN-4.h5
+++ b/facelib/2DFAN-4.h5
--- a/facelib/LandmarksExtractor.py
+++ b/facelib/LandmarksExtractor.py
@ -3,101 +3,18 @@ import os
 import cv2
 from pathlib import Path
 def transform(point, center, scale, resolution):
    pt = np.array ( [point[0], point[1], 1.0] )            
    h = 200.0 * scale
    m = np.eye(3)
    m[0,0] = resolution / h
    m[1,1] = resolution / h
    m[0,2] = resolution * ( -center[0] / h + 0.5 )
    m[1,2] = resolution * ( -center[1] / h + 0.5 )
    m = np.linalg.inv(m)
    return np.matmul (m, pt)[0:2]
 def crop(image, center, scale, resolution=256.0):
    ul = transform([1, 1], center, scale, resolution).astype( np.int )
    br = transform([resolution, resolution], center, scale, resolution).astype( np.int )
    if image.ndim > 2:
        newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
        newImg = np.zeros(newDim, dtype=np.uint8)
    else:
        newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
        newImg = np.zeros(newDim, dtype=np.uint8)
    ht = image.shape[0]
    wd = image.shape[1]
    newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
    newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
    oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
    oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
    newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
    newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR)
    return newImg
 def get_pts_from_predict(a, center, scale):
    b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) )    
    c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float)
    c[:,0] %= a.shape[2]    
    c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] )
    for i in range(a.shape[0]):
        pX, pY = int(c[i,0]), int(c[i,1])
        if pX > 0 and pX < 63 and pY > 0 and pY < 63:
            diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] )
            c[i] += np.sign(diff)*0.25
    c += 0.5
    return [ transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ]
 class LandmarksExtractor(object):
    def __init__ (self, keras):
        self.keras = keras
        K = self.keras.backend
-        class TorchBatchNorm2D(self.keras.layers.Layer):
+  
            def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, **kwargs):
                super(TorchBatchNorm2D, self).__init__(**kwargs)
                self.supports_masking = True
                self.axis = axis
                self.momentum = momentum
                self.epsilon = epsilon
            def build(self, input_shape):
                dim = input_shape[self.axis]
                if dim is None:
                    raise ValueError('Axis ' + str(self.axis) + ' of ' 'input tensor should have a defined dimension ' 'but the layer received an input with shape ' + str(input_shape) + '.')
                shape = (dim,)
                self.gamma = self.add_weight(shape=shape, name='gamma', initializer='ones', regularizer=None, constraint=None)
                self.beta = self.add_weight(shape=shape, name='beta', initializer='zeros', regularizer=None, constraint=None)
                self.moving_mean = self.add_weight(shape=shape, name='moving_mean', initializer='zeros', trainable=False)            
                self.moving_variance = self.add_weight(shape=shape, name='moving_variance', initializer='ones', trainable=False)            
                self.built = True
            def call(self, inputs, training=None):
                input_shape = K.int_shape(inputs)
                broadcast_shape = [1] * len(input_shape)
                broadcast_shape[self.axis] = input_shape[self.axis]
                broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape)
                broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape)
                broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
                broadcast_beta = K.reshape(self.beta, broadcast_shape)        
                invstd = K.ones (shape=broadcast_shape, dtype='float32') / K.sqrt(broadcast_moving_variance + K.constant(self.epsilon, dtype='float32'))
                return (inputs - broadcast_moving_mean) * invstd * broadcast_gamma + broadcast_beta
            def get_config(self):
                config = { 'axis': self.axis, 'momentum': self.momentum, 'epsilon': self.epsilon }
                base_config = super(TorchBatchNorm2D, self).get_config()
                return dict(list(base_config.items()) + list(config.items()))
        self.TorchBatchNorm2D = TorchBatchNorm2D
    def __enter__(self):        
        keras_model_path = Path(__file__).parent / "2DFAN-4.h5"
        if not keras_model_path.exists():
            return None
-        self.keras_model = self.keras.models.load_model ( str(keras_model_path), custom_objects={'TorchBatchNorm2D': self.TorchBatchNorm2D} ) 
+        self.keras_model = self.keras.models.load_model (str(keras_model_path)) 
        return self
@ -116,13 +33,58 @@ class LandmarksExtractor(object):
            center[1] -= (bottom - top) * 0.12
            scale = (right - left + bottom - top) / 195.0
-            image = crop(input_image, center, scale).transpose ( (2,0,1) ).astype(np.float32) / 255.0
+            image = self.crop(input_image, center, scale).astype(np.float32)
            image = np.expand_dims(image, 0)
-            predicted = self.keras_model.predict (image)
+            predicted = self.keras_model.predict (image).transpose (0,3,1,2)
-                
+            
-            pts_img = get_pts_from_predict ( predicted[-1], center, scale)
+            pts_img = self.get_pts_from_predict ( predicted[-1], center, scale)
            pts_img = [ ( int(pt[0]), int(pt[1]) ) for pt in pts_img ]             
            landmarks.append ( ( (left, top, right, bottom),pts_img ) )
        return landmarks
    def transform(self, point, center, scale, resolution):
        pt = np.array ( [point[0], point[1], 1.0] )            
        h = 200.0 * scale
        m = np.eye(3)
        m[0,0] = resolution / h
        m[1,1] = resolution / h
        m[0,2] = resolution * ( -center[0] / h + 0.5 )
        m[1,2] = resolution * ( -center[1] / h + 0.5 )
        m = np.linalg.inv(m)
        return np.matmul (m, pt)[0:2]
    def crop(self, image, center, scale, resolution=256.0):
        ul = self.transform([1, 1], center, scale, resolution).astype( np.int )
        br = self.transform([resolution, resolution], center, scale, resolution).astype( np.int )
        if image.ndim > 2:
            newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
            newImg = np.zeros(newDim, dtype=np.uint8)
        else:
            newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
            newImg = np.zeros(newDim, dtype=np.uint8)
        ht = image.shape[0]
        wd = image.shape[1]
        newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
        newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
        oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
        oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
        newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
        newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR)
        return newImg
    def get_pts_from_predict(self, a, center, scale):
        b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) )    
        c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float)
        c[:,0] %= a.shape[2]    
        c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] )
        for i in range(a.shape[0]):
            pX, pY = int(c[i,0]), int(c[i,1])
            if pX > 0 and pX < 63 and pY > 0 and pY < 63:
                diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] )
                c[i] += np.sign(diff)*0.25
        c += 0.5
        return [ self.transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ]
--- a/facelib/S3FD.h5
+++ b/facelib/S3FD.h5
--- a/facelib/init.py
+++ b/facelib/init.py
@ -1,4 +1,5 @@
 from .FaceType import FaceType
 from .DLIBExtractor import DLIBExtractor
 from .MTCExtractor import MTCExtractor
 from .S3FDExtractor import S3FDExtractor
 from .LandmarksExtractor import LandmarksExtractor
--- a/main.py
+++ b/main.py
@ -39,7 +39,7 @@ if __name__ == "__main__":
    extract_parser.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.")
    extract_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Writes debug images to [output_dir]_debug\ directory.")    
    extract_parser.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")    
-    extract_parser.add_argument('--detector', dest="detector", choices=['dlib','mt','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.")
+    extract_parser.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.")
    extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.")
    extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.")
    extract_parser.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.")
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@ -6,6 +6,7 @@ import multiprocessing
 import shutil
 from pathlib import Path
 import numpy as np
 import mathlib
 import cv2
 from utils import Path_utils
 from utils.DFLJPG import DFLJPG
@ -47,6 +48,9 @@ class ExtractSubprocessor(Subprocessor):
                    elif self.detector == 'dlib':
                        nnlib.import_dlib (device_config)
                        self.e = facelib.DLIBExtractor(nnlib.dlib)
                    elif self.detector == 's3fd':
                        nnlib.import_all (device_config)
                        self.e = facelib.S3FDExtractor()
                    else:
                        raise ValueError ("Wrond detector type.")
@ -104,15 +108,11 @@ class ExtractSubprocessor(Subprocessor):
                        debug_output_file = '{}{}'.format( str(Path(str(self.output_path) + '_debug') / filename_path.stem),  '.jpg')
                        debug_image = image.copy()
-                    for (face_idx, face) in enumerate(faces):         
+                    face_idx = 0
-                        output_file = '{}_{}{}'.format(str(self.output_path / filename_path.stem), str(face_idx), '.jpg')
+                    for face in faces:   
-                        
+                        rect = np.array(face[0])
                        rect = face[0]
                        image_landmarks = np.array(face[1])
                        if self.debug:
                            LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type)
                        if self.face_type == FaceType.MARK_ONLY:                        
                            face_image = image
                            face_image_landmarks = image_landmarks
@ -120,6 +120,20 @@ class ExtractSubprocessor(Subprocessor):
                            image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type)       
                            face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4)
                            face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat)
                            landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True)
                            rect_area      = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]]))
                            landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] )
                            if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area
                                continue
                        if self.debug:
                            LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type)
                        output_file = '{}_{}{}'.format(str(self.output_path / filename_path.stem), str(face_idx), '.jpg')
                        face_idx += 1
                        if src_dflimg is not None:
                            #if extracting from dflimg just copy it in order not to lose quality
@ -199,13 +213,13 @@ class ExtractSubprocessor(Subprocessor):
            cpu_only = True
        if not cpu_only and (type == 'rects' or type == 'landmarks'):
-            if type == 'rects' and self.detector == 'mt' and nnlib.device.backend == "plaidML":
+            if type == 'rects' and (self.detector == 'mt' or self.detector == 's3fd') and nnlib.device.backend == "plaidML":
                cpu_only = True
            else:
                if multi_gpu:
                    devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2)
                if not multi_gpu or len(devices) == 0:
-                    devices = [nnlib.device.getBestValidDeviceIdx()]                    
+                    devices = [nnlib.device.getBestValidDeviceIdx()]
                if len(devices) == 0:
                    devices = [0]
@ -213,7 +227,7 @@ class ExtractSubprocessor(Subprocessor):
                    dev_name = nnlib.device.getDeviceName(idx)
                    dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx)
-                    if not self.manual and ( (self.type == 'rects') ):
+                    if not self.manual and ( self.type == 'rects' and self.detector != 's3fd' ):
                        for i in range ( int (max (1, dev_vram / 2) ) ):
                            yield (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram)
                    else:
--- a/mathlib/init.py
+++ b/mathlib/init.py
@ -19,4 +19,7 @@ def rotationMatrixToEulerAngles(R) :
        x = math.atan2(-R[1,2], R[1,1])
        y = math.atan2(-R[2,0], sy)
        z = 0 
-    return np.array([x, y, z])
+    return np.array([x, y, z])
 def polygon_area(x,y):
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))