diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index 2d45721..976ff15 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -149,6 +149,7 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): else: raise ValueError ('wrong face_type: ', face_type) + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] mat = mat * (output_size - 2 * padding) mat[:,2] += padding diff --git a/facelib/S3FDExtractor.py b/facelib/S3FDExtractor.py index 59e42f1..1372720 100644 --- a/facelib/S3FDExtractor.py +++ b/facelib/S3FDExtractor.py @@ -3,26 +3,39 @@ from pathlib import Path import cv2 from nnlib import nnlib + class S3FDExtractor(object): + """ + S3FD: Single Shot Scale-invariant Face Detector + https://arxiv.org/pdf/1708.05237.pdf + """ def __init__(self): - exec( nnlib.import_all(), locals(), globals() ) + exec(nnlib.import_all(), locals(), globals()) model_path = Path(__file__).parent / "S3FD.h5" if not model_path.exists(): - return None + raise Exception(f'Could not find S3DF model at path {model_path}') - self.model = nnlib.keras.models.load_model ( str(model_path) ) + self.model = nnlib.keras.models.load_model(str(model_path)) def __enter__(self): return self def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level + return False # pass exception between __enter__ and __exit__ to outter level - def extract (self, input_image, is_bgr=True): + def extract(self, input_image, is_bgr=True, nms_thresh=0.3): + """ + Extracts the bounding boxes for all faces found in image + :param input_image: The image to look for faces in + :param is_bgr: Is this image in OpenCV's BGR color mode, if not, assume RGB color mode + :param nms_thresh: The NMS (non-maximum suppression) threshold. Of all bounding boxes found, only return + bounding boxes with an overlap ratio less then threshold + :return: + """ if is_bgr: - input_image = input_image[:,:,::-1] + input_image = input_image[:, :, ::-1] is_bgr = False (h, w, ch) = input_image.shape @@ -32,35 +45,36 @@ class S3FDExtractor(object): scale_to = max(64, scale_to) input_scale = d / scale_to - input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR) + input_image = cv2.resize(input_image, (int(w / input_scale), int(h / input_scale)), + interpolation=cv2.INTER_LINEAR) - olist = self.model.predict( np.expand_dims(input_image,0) ) + olist = self.model.predict(np.expand_dims(input_image, 0)) detected_faces = [] - for ltrb in self.refine (olist): - l,t,r,b = [ x*input_scale for x in ltrb] - bt = b-t - if min(r-l,bt) < 40: #filtering faces < 40pix by any side + for ltrb in self._refine(olist, nms_thresh): + l, t, r, b = [x * input_scale for x in ltrb] + bt = b - t + if min(r - l, bt) < 40: # filtering faces < 40pix by any side continue - b += bt*0.1 #enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin - detected_faces.append ( [int(x) for x in (l,t,r,b) ] ) + b += bt * 0.1 # enlarging bottom line a bit for 2DFAN-4, because default is not enough covering a chin + detected_faces.append([int(x) for x in (l, t, r, b)]) return detected_faces - def refine(self, olist): + def _refine(self, olist, thresh): bboxlist = [] - for i, ((ocls,), (oreg,)) in enumerate ( zip ( olist[::2], olist[1::2] ) ): - stride = 2**(i + 2) # 4,8,16,32,64,128 + for i, ((ocls,), (oreg,)) in enumerate(zip(olist[::2], olist[1::2])): + stride = 2 ** (i + 2) # 4,8,16,32,64,128 s_d2 = stride / 2 s_m4 = stride * 4 for hindex, windex in zip(*np.where(ocls > 0.05)): score = ocls[hindex, windex] - loc = oreg[hindex, windex, :] + loc = oreg[hindex, windex, :] priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4]) priors_2p = priors[2:] box = np.concatenate((priors[:2] + loc[:2] * 0.1 * priors_2p, - priors_2p * np.exp(loc[2:] * 0.2)) ) + priors_2p * np.exp(loc[2:] * 0.2))) box[:2] -= box[2:] / 2 box[2:] += box[:2] @@ -69,12 +83,11 @@ class S3FDExtractor(object): bboxlist = np.array(bboxlist) if len(bboxlist) == 0: bboxlist = np.zeros((1, 5)) - - bboxlist = bboxlist[self.refine_nms(bboxlist, 0.3), :] - bboxlist = [ x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5] + bboxlist = bboxlist[self._refine_nms(bboxlist, thresh), :] + bboxlist = [x[:-1].astype(np.int) for x in bboxlist if x[-1] >= 0.5] return bboxlist - def refine_nms(self, dets, thresh): + def _refine_nms(self, dets, nms_thresh): keep = list() if len(dets) == 0: return keep @@ -93,6 +106,6 @@ class S3FDExtractor(object): width, height = np.maximum(0.0, xx_2 - xx_1 + 1), np.maximum(0.0, yy_2 - yy_1 + 1) ovr = width * height / (areas[i] + areas[order[1:]] - width * height) - inds = np.where(ovr <= thresh)[0] + inds = np.where(ovr <= nms_thresh)[0] order = order[inds + 1] return keep diff --git a/guilib/main.py b/guilib/main.py new file mode 100644 index 0000000..e69de29 diff --git a/imagelib/warp.py b/imagelib/warp.py index aa0f602..27e4f98 100644 --- a/imagelib/warp.py +++ b/imagelib/warp.py @@ -4,8 +4,8 @@ from utils import random_utils def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ): h,w,c = source.shape - if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024): - raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.') + #if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024): + # raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.') rotation = np.random.uniform( rotation_range[0], rotation_range[1] ) scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1]) diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index da013af..1a022ae 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -41,7 +41,6 @@ class ExtractSubprocessor(Subprocessor): #override def on_initialize(self, client_dict): self.type = client_dict['type'] - self.image_size = client_dict['image_size'] self.face_type = client_dict['face_type'] self.device_idx = client_dict['device_idx'] self.cpu_only = client_dict['device_type'] == 'CPU' @@ -126,6 +125,8 @@ class ExtractSubprocessor(Subprocessor): else: h, w, ch = image.shape + #self.image_size = h + if ch == 1: image = np.repeat (image, 3, -1) elif ch == 4: @@ -230,6 +231,8 @@ class ExtractSubprocessor(Subprocessor): continue rect = np.array(rect) + rect_area = mathlib.polygon_area(np.array(rect[[0, 2, 2, 0]]), np.array(rect[[1, 1, 3, 3]])) + self.image_size = int(math.sqrt(rect_area)) if self.face_type == FaceType.MARK_ONLY: image_to_face_mat = None @@ -237,13 +240,13 @@ class ExtractSubprocessor(Subprocessor): face_image_landmarks = image_landmarks else: image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) - + face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True) - rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]])) + landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] ) if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area @@ -305,10 +308,9 @@ class ExtractSubprocessor(Subprocessor): return data.filename #override - def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None): + def __init__(self, input_data, type, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, final_output_path=None): self.input_data = input_data self.type = type - self.image_size = image_size self.face_type = face_type self.debug_dir = debug_dir self.final_output_path = final_output_path @@ -361,7 +363,6 @@ class ExtractSubprocessor(Subprocessor): #override def process_info_generator(self): base_dict = {'type' : self.type, - 'image_size': self.image_size, 'face_type': self.face_type, 'debug_dir': self.debug_dir, 'final_output_dir': str(self.final_output_path), @@ -560,6 +561,7 @@ class ExtractSubprocessor(Subprocessor): self.landmarks = landmarks[0] (h,w,c) = self.image.shape + self.image_size = h if not self.hide_help: image = cv2.addWeighted (self.image,1.0,self.text_lines_img,1.0,0) @@ -731,7 +733,6 @@ def extract_fanseg(input_dir, device_args={} ): data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in paths_to_extract ], 'fanseg', multi_gpu=multi_gpu, cpu_only=cpu_only).run() def extract_umd_csv(input_file_csv, - image_size=256, face_type='full_face', device_args={} ): @@ -798,7 +799,7 @@ def extract_umd_csv(input_file_csv, data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run() io.log_info ('Performing 3rd pass...') - data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + data = ExtractSubprocessor (data, 'final', face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() faces_detected += sum([d.faces_detected for d in data]) @@ -814,7 +815,6 @@ def main(input_dir, manual_fix=False, manual_output_debug_fix=False, manual_window_size=1368, - image_size=256, face_type='full_face', device_args={}): @@ -867,16 +867,16 @@ def main(input_dir, if images_found != 0: if detector == 'manual': io.log_info ('Performing manual extract...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run() + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'landmarks', face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run() else: io.log_info ('Performing 1st pass...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() io.log_info ('Performing 2nd pass...') - data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() + data = ExtractSubprocessor (data, 'landmarks', face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() io.log_info ('Performing 3rd pass...') - data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + data = ExtractSubprocessor (data, 'final', face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() faces_detected += sum([d.faces_detected for d in data]) if manual_fix: @@ -885,8 +885,8 @@ def main(input_dir, else: fix_data = [ ExtractSubprocessor.Data(d.filename) for d in data if d.faces_detected == 0 ] io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) ) - fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run() - fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + fix_data = ExtractSubprocessor (fix_data, 'landmarks', face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run() + fix_data = ExtractSubprocessor (fix_data, 'final', face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() faces_detected += sum([d.faces_detected for d in fix_data]) diff --git a/mainscripts/MaskEditorTool.py b/mainscripts/MaskEditorTool.py index 1d9750f..dd30760 100644 --- a/mainscripts/MaskEditorTool.py +++ b/mainscripts/MaskEditorTool.py @@ -25,8 +25,8 @@ class MaskEditor: self.img = imagelib.normalize_channels (img,3) h, w, c = img.shape - if h != w and w != 256: - #to support any square res, scale img,mask and ie_polys to 256, then scale ie_polys back on .get_ie_polys() + if h != w or w != 256: + self.img = cv2.resize(img, (256,256)) raise Exception ("MaskEditor does not support image size != 256x256") ph, pw = h // 4, w // 4 #pad wh @@ -257,7 +257,7 @@ class MaskEditor: preview_images += [ np.concatenate (prev_images, axis=1) ] img = np.full ( (prh,prw, sc), (0,0,1), dtype=np.float ) - img[border:-border,border:-border] = cv2.resize( self.img, max_wh_bordered ) + img[border:-border,border:-border] = cv2.resize( self.img, 256 ) preview_images += [ img ] @@ -276,7 +276,6 @@ class MaskEditor: self.preview_images = np.clip(preview_images * 255, 0, 255 ).astype(np.uint8) status_img = self.get_screen_status_block( screens.shape[1], screens.shape[2] ) - result = np.concatenate ( [self.preview_images, screens, status_img], axis=0 ) return result