added new extractor: S3FD,

all extractors now produce less false-positive faces
This commit is contained in:
iperov 2019-03-10 23:18:10 +04:00
parent 9440224556
commit fbf39d2727
10 changed files with 83 additions and 112 deletions

9
.gitignore vendored
View file

@ -4,14 +4,5 @@
!*.txt !*.txt
!*.jpg !*.jpg
!requirements* !requirements*
!doc
!facelib
!gpufmkmgr
!localization
!mainscripts
!mathlib
!models
!nnlib
!utils
!Dockerfile* !Dockerfile*
!*.sh !*.sh

Binary file not shown.

File diff suppressed because one or more lines are too long

Binary file not shown.

View file

@ -3,101 +3,18 @@ import os
import cv2 import cv2
from pathlib import Path from pathlib import Path
def transform(point, center, scale, resolution):
pt = np.array ( [point[0], point[1], 1.0] )
h = 200.0 * scale
m = np.eye(3)
m[0,0] = resolution / h
m[1,1] = resolution / h
m[0,2] = resolution * ( -center[0] / h + 0.5 )
m[1,2] = resolution * ( -center[1] / h + 0.5 )
m = np.linalg.inv(m)
return np.matmul (m, pt)[0:2]
def crop(image, center, scale, resolution=256.0):
ul = transform([1, 1], center, scale, resolution).astype( np.int )
br = transform([resolution, resolution], center, scale, resolution).astype( np.int )
if image.ndim > 2:
newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
newImg = np.zeros(newDim, dtype=np.uint8)
else:
newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
newImg = np.zeros(newDim, dtype=np.uint8)
ht = image.shape[0]
wd = image.shape[1]
newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR)
return newImg
def get_pts_from_predict(a, center, scale):
b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) )
c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float)
c[:,0] %= a.shape[2]
c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] )
for i in range(a.shape[0]):
pX, pY = int(c[i,0]), int(c[i,1])
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] )
c[i] += np.sign(diff)*0.25
c += 0.5
return [ transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ]
class LandmarksExtractor(object): class LandmarksExtractor(object):
def __init__ (self, keras): def __init__ (self, keras):
self.keras = keras self.keras = keras
K = self.keras.backend K = self.keras.backend
class TorchBatchNorm2D(self.keras.layers.Layer):
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, **kwargs):
super(TorchBatchNorm2D, self).__init__(**kwargs)
self.supports_masking = True
self.axis = axis
self.momentum = momentum
self.epsilon = epsilon
def build(self, input_shape):
dim = input_shape[self.axis]
if dim is None:
raise ValueError('Axis ' + str(self.axis) + ' of ' 'input tensor should have a defined dimension ' 'but the layer received an input with shape ' + str(input_shape) + '.')
shape = (dim,)
self.gamma = self.add_weight(shape=shape, name='gamma', initializer='ones', regularizer=None, constraint=None)
self.beta = self.add_weight(shape=shape, name='beta', initializer='zeros', regularizer=None, constraint=None)
self.moving_mean = self.add_weight(shape=shape, name='moving_mean', initializer='zeros', trainable=False)
self.moving_variance = self.add_weight(shape=shape, name='moving_variance', initializer='ones', trainable=False)
self.built = True
def call(self, inputs, training=None):
input_shape = K.int_shape(inputs)
broadcast_shape = [1] * len(input_shape)
broadcast_shape[self.axis] = input_shape[self.axis]
broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape)
broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape)
broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
broadcast_beta = K.reshape(self.beta, broadcast_shape)
invstd = K.ones (shape=broadcast_shape, dtype='float32') / K.sqrt(broadcast_moving_variance + K.constant(self.epsilon, dtype='float32'))
return (inputs - broadcast_moving_mean) * invstd * broadcast_gamma + broadcast_beta
def get_config(self):
config = { 'axis': self.axis, 'momentum': self.momentum, 'epsilon': self.epsilon }
base_config = super(TorchBatchNorm2D, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
self.TorchBatchNorm2D = TorchBatchNorm2D
def __enter__(self): def __enter__(self):
keras_model_path = Path(__file__).parent / "2DFAN-4.h5" keras_model_path = Path(__file__).parent / "2DFAN-4.h5"
if not keras_model_path.exists(): if not keras_model_path.exists():
return None return None
self.keras_model = self.keras.models.load_model ( str(keras_model_path), custom_objects={'TorchBatchNorm2D': self.TorchBatchNorm2D} ) self.keras_model = self.keras.models.load_model (str(keras_model_path))
return self return self
@ -116,13 +33,58 @@ class LandmarksExtractor(object):
center[1] -= (bottom - top) * 0.12 center[1] -= (bottom - top) * 0.12
scale = (right - left + bottom - top) / 195.0 scale = (right - left + bottom - top) / 195.0
image = crop(input_image, center, scale).transpose ( (2,0,1) ).astype(np.float32) / 255.0 image = self.crop(input_image, center, scale).astype(np.float32)
image = np.expand_dims(image, 0) image = np.expand_dims(image, 0)
predicted = self.keras_model.predict (image) predicted = self.keras_model.predict (image).transpose (0,3,1,2)
pts_img = get_pts_from_predict ( predicted[-1], center, scale) pts_img = self.get_pts_from_predict ( predicted[-1], center, scale)
pts_img = [ ( int(pt[0]), int(pt[1]) ) for pt in pts_img ] pts_img = [ ( int(pt[0]), int(pt[1]) ) for pt in pts_img ]
landmarks.append ( ( (left, top, right, bottom),pts_img ) ) landmarks.append ( ( (left, top, right, bottom),pts_img ) )
return landmarks return landmarks
def transform(self, point, center, scale, resolution):
pt = np.array ( [point[0], point[1], 1.0] )
h = 200.0 * scale
m = np.eye(3)
m[0,0] = resolution / h
m[1,1] = resolution / h
m[0,2] = resolution * ( -center[0] / h + 0.5 )
m[1,2] = resolution * ( -center[1] / h + 0.5 )
m = np.linalg.inv(m)
return np.matmul (m, pt)[0:2]
def crop(self, image, center, scale, resolution=256.0):
ul = self.transform([1, 1], center, scale, resolution).astype( np.int )
br = self.transform([resolution, resolution], center, scale, resolution).astype( np.int )
if image.ndim > 2:
newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
newImg = np.zeros(newDim, dtype=np.uint8)
else:
newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
newImg = np.zeros(newDim, dtype=np.uint8)
ht = image.shape[0]
wd = image.shape[1]
newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR)
return newImg
def get_pts_from_predict(self, a, center, scale):
b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) )
c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float)
c[:,0] %= a.shape[2]
c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] )
for i in range(a.shape[0]):
pX, pY = int(c[i,0]), int(c[i,1])
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] )
c[i] += np.sign(diff)*0.25
c += 0.5
return [ self.transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ]

BIN
facelib/S3FD.h5 Normal file

Binary file not shown.

View file

@ -1,4 +1,5 @@
from .FaceType import FaceType from .FaceType import FaceType
from .DLIBExtractor import DLIBExtractor from .DLIBExtractor import DLIBExtractor
from .MTCExtractor import MTCExtractor from .MTCExtractor import MTCExtractor
from .S3FDExtractor import S3FDExtractor
from .LandmarksExtractor import LandmarksExtractor from .LandmarksExtractor import LandmarksExtractor

View file

@ -39,7 +39,7 @@ if __name__ == "__main__":
extract_parser.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") extract_parser.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.")
extract_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Writes debug images to [output_dir]_debug\ directory.") extract_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Writes debug images to [output_dir]_debug\ directory.")
extract_parser.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") extract_parser.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")
extract_parser.add_argument('--detector', dest="detector", choices=['dlib','mt','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") extract_parser.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.")
extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.")
extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.")
extract_parser.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.") extract_parser.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.")

View file

@ -6,6 +6,7 @@ import multiprocessing
import shutil import shutil
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
import mathlib
import cv2 import cv2
from utils import Path_utils from utils import Path_utils
from utils.DFLJPG import DFLJPG from utils.DFLJPG import DFLJPG
@ -47,6 +48,9 @@ class ExtractSubprocessor(Subprocessor):
elif self.detector == 'dlib': elif self.detector == 'dlib':
nnlib.import_dlib (device_config) nnlib.import_dlib (device_config)
self.e = facelib.DLIBExtractor(nnlib.dlib) self.e = facelib.DLIBExtractor(nnlib.dlib)
elif self.detector == 's3fd':
nnlib.import_all (device_config)
self.e = facelib.S3FDExtractor()
else: else:
raise ValueError ("Wrond detector type.") raise ValueError ("Wrond detector type.")
@ -104,15 +108,11 @@ class ExtractSubprocessor(Subprocessor):
debug_output_file = '{}{}'.format( str(Path(str(self.output_path) + '_debug') / filename_path.stem), '.jpg') debug_output_file = '{}{}'.format( str(Path(str(self.output_path) + '_debug') / filename_path.stem), '.jpg')
debug_image = image.copy() debug_image = image.copy()
for (face_idx, face) in enumerate(faces): face_idx = 0
output_file = '{}_{}{}'.format(str(self.output_path / filename_path.stem), str(face_idx), '.jpg') for face in faces:
rect = np.array(face[0])
rect = face[0]
image_landmarks = np.array(face[1]) image_landmarks = np.array(face[1])
if self.debug:
LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type)
if self.face_type == FaceType.MARK_ONLY: if self.face_type == FaceType.MARK_ONLY:
face_image = image face_image = image
face_image_landmarks = image_landmarks face_image_landmarks = image_landmarks
@ -120,6 +120,20 @@ class ExtractSubprocessor(Subprocessor):
image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type)
face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4)
face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat)
landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True)
rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]]))
landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] )
if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area
continue
if self.debug:
LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type)
output_file = '{}_{}{}'.format(str(self.output_path / filename_path.stem), str(face_idx), '.jpg')
face_idx += 1
if src_dflimg is not None: if src_dflimg is not None:
#if extracting from dflimg just copy it in order not to lose quality #if extracting from dflimg just copy it in order not to lose quality
@ -199,13 +213,13 @@ class ExtractSubprocessor(Subprocessor):
cpu_only = True cpu_only = True
if not cpu_only and (type == 'rects' or type == 'landmarks'): if not cpu_only and (type == 'rects' or type == 'landmarks'):
if type == 'rects' and self.detector == 'mt' and nnlib.device.backend == "plaidML": if type == 'rects' and (self.detector == 'mt' or self.detector == 's3fd') and nnlib.device.backend == "plaidML":
cpu_only = True cpu_only = True
else: else:
if multi_gpu: if multi_gpu:
devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2) devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2)
if not multi_gpu or len(devices) == 0: if not multi_gpu or len(devices) == 0:
devices = [nnlib.device.getBestValidDeviceIdx()] devices = [nnlib.device.getBestValidDeviceIdx()]
if len(devices) == 0: if len(devices) == 0:
devices = [0] devices = [0]
@ -213,7 +227,7 @@ class ExtractSubprocessor(Subprocessor):
dev_name = nnlib.device.getDeviceName(idx) dev_name = nnlib.device.getDeviceName(idx)
dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx) dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx)
if not self.manual and ( (self.type == 'rects') ): if not self.manual and ( self.type == 'rects' and self.detector != 's3fd' ):
for i in range ( int (max (1, dev_vram / 2) ) ): for i in range ( int (max (1, dev_vram / 2) ) ):
yield (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram) yield (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram)
else: else:

View file

@ -19,4 +19,7 @@ def rotationMatrixToEulerAngles(R) :
x = math.atan2(-R[1,2], R[1,1]) x = math.atan2(-R[1,2], R[1,1])
y = math.atan2(-R[2,0], sy) y = math.atan2(-R[2,0], sy)
z = 0 z = 0
return np.array([x, y, z]) return np.array([x, y, z])
def polygon_area(x,y):
return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))