diff --git a/converters/ConverterAvatar.py b/converters/ConverterAvatar.py index 4a9e9b5..ca26c63 100644 --- a/converters/ConverterAvatar.py +++ b/converters/ConverterAvatar.py @@ -20,8 +20,9 @@ class ConverterAvatar(Converter): self.predictor_input_size = predictor_input_size #dummy predict and sleep, tensorflow caching kernels. If remove it, conversion speed will be x2 slower - predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ), - np.zeros ( (predictor_input_size,predictor_input_size,1), dtype=np.float32 ) ) + predictor_func ( np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ), + np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ), + np.zeros ( (predictor_input_size,predictor_input_size,3), dtype=np.float32 ) ) time.sleep(2) predictor_func_host, predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) @@ -33,38 +34,28 @@ class ConverterAvatar(Converter): self.predictor_func_host.obj.process_messages() #override - def cli_convert_face (self, img_bgr, img_face_landmarks, debug, avaperator_face_bgr=None, **kwargs): + def cli_convert_face (self, f0, f0_lmrk, f1, f1_lmrk, f2, f2_lmrk, debug, **kwargs): if debug: - debugs = [img_bgr.copy()] - - img_size = img_bgr.shape[1], img_bgr.shape[0] - - img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) - img_face_mask_aaa = np.repeat(img_face_mask_a, 3, -1) + debugs = [] + + inp_size = self.predictor_input_size + + f0_mat = LandmarksProcessor.get_transform_mat (f0_lmrk, inp_size, face_type=FaceType.FULL_NO_ALIGN) + f1_mat = LandmarksProcessor.get_transform_mat (f1_lmrk, inp_size, face_type=FaceType.FULL_NO_ALIGN) + f2_mat = LandmarksProcessor.get_transform_mat (f2_lmrk, inp_size, face_type=FaceType.FULL_NO_ALIGN) - output_size = self.predictor_input_size - face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=FaceType.FULL) + inp_f0 = cv2.warpAffine( f0, f0_mat, (inp_size, inp_size), flags=cv2.INTER_CUBIC ) + inp_f1 = cv2.warpAffine( f1, f1_mat, (inp_size, inp_size), flags=cv2.INTER_CUBIC ) + inp_f2 = cv2.warpAffine( f2, f2_mat, (inp_size, inp_size), flags=cv2.INTER_CUBIC ) - dst_face_mask_a_0 = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC ) + prd_f = self.predictor_func ( inp_f0, inp_f1, inp_f2 ) - predictor_input_dst_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) - prd_inp_dst_face_mask_a = predictor_input_dst_face_mask_a_0[...,np.newaxis] + out_img = np.clip(prd_f, 0.0, 1.0) - prd_inp_avaperator_face_bgr = cv2.resize (avaperator_face_bgr, (self.predictor_input_size,self.predictor_input_size), cv2.INTER_CUBIC ) - - prd_face_bgr = self.predictor_func ( prd_inp_avaperator_face_bgr, prd_inp_dst_face_mask_a ) - - out_img = img_bgr.copy() - out_img = cv2.warpAffine( prd_face_bgr, face_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - out_img = np.clip(out_img, 0.0, 1.0) + out_img = np.concatenate ( [cv2.resize ( inp_f1, (prd_f.shape[1], prd_f.shape[0]) ), + out_img], axis=1 ) if debug: debugs += [out_img.copy()] - out_img = np.clip( img_bgr*(1-img_face_mask_aaa) + (out_img*img_face_mask_aaa) , 0, 1.0 ) - - if debug: - debugs += [out_img.copy()] - - return debugs if debug else out_img diff --git a/facelib/FaceType.py b/facelib/FaceType.py index f0d5530..dba999d 100644 --- a/facelib/FaceType.py +++ b/facelib/FaceType.py @@ -4,10 +4,10 @@ class FaceType(IntEnum): HALF = 0, FULL = 1, HEAD = 2, - AVATAR = 3, #centered nose only - MARK_ONLY = 4, #no align at all, just embedded faceinfo - QTY = 5 - + + FULL_NO_ALIGN = 5, + MARK_ONLY = 10, #no align at all, just embedded faceinfo + @staticmethod def fromString (s): r = from_string_dict.get (s.lower()) @@ -17,17 +17,17 @@ class FaceType(IntEnum): @staticmethod def toString (face_type): - return to_string_list[face_type] + return to_string_dict[face_type] from_string_dict = {'half_face': FaceType.HALF, 'full_face': FaceType.FULL, 'head' : FaceType.HEAD, - 'avatar' : FaceType.AVATAR, 'mark_only' : FaceType.MARK_ONLY, + 'full_face_no_align' : FaceType.FULL_NO_ALIGN, } -to_string_list = [ 'half_face', - 'full_face', - 'head', - 'avatar', - 'mark_only' - ] +to_string_dict = { FaceType.HALF : 'half_face', + FaceType.FULL : 'full_face', + FaceType.HEAD : 'head', + FaceType.MARK_ONLY :'mark_only', + FaceType.FULL_NO_ALIGN : 'full_face_no_align' + } diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index a49076b..7c0b697 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -109,10 +109,19 @@ landmarks_68_3D = np.array( [ [0.205322 , 31.408738 , -21.903670 ], [-7.198266 , 30.844876 , -20.328022 ] ], dtype=np.float32) +def transform_points(points, mat, invert=False): + if invert: + mat = cv2.invertAffineTransform (mat) + points = np.expand_dims(points, axis=1) + points = cv2.transform(points, mat, points.shape) + points = np.squeeze(points) + return points + def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): if not isinstance(image_landmarks, np.ndarray): image_landmarks = np.array (image_landmarks) + """ if face_type == FaceType.AVATAR: centroid = np.mean (image_landmarks, axis=0) @@ -128,76 +137,79 @@ def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0): mat = mat * scale * (output_size / 3) mat[:,2] += output_size / 2 else: - if face_type == FaceType.HALF: - padding = 0 - elif face_type == FaceType.FULL: - padding = (output_size / 64) * 12 - elif face_type == FaceType.HEAD: - padding = (output_size / 64) * 24 - else: - raise ValueError ('wrong face_type: ', face_type) + """ + remove_align = False + if face_type == FaceType.FULL_NO_ALIGN: + face_type = FaceType.FULL + remove_align = True + + if face_type == FaceType.HALF: + padding = 0 + elif face_type == FaceType.FULL: + padding = (output_size / 64) * 12 + elif face_type == FaceType.HEAD: + padding = (output_size / 64) * 24 + else: + raise ValueError ('wrong face_type: ', face_type) - mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] - mat = mat * (output_size - 2 * padding) - mat[:,2] += padding - mat *= (1 / scale) - mat[:,2] += -output_size*( ( (1 / scale) - 1.0 ) / 2 ) + mat = umeyama(image_landmarks[17:], landmarks_2D, True)[0:2] + mat = mat * (output_size - 2 * padding) + mat[:,2] += padding + mat *= (1 / scale) + mat[:,2] += -output_size*( ( (1 / scale) - 1.0 ) / 2 ) + + if remove_align: + bbox = transform_points ( [ (0,0), (0,output_size-1), (output_size-1, output_size-1), (output_size-1,0) ], mat, True) + area = mathlib.polygon_area(bbox[:,0], bbox[:,1] ) + side = math.sqrt(area) / 2 + center = transform_points ( [(output_size/2,output_size/2)], mat, True) + + pts1 = np.float32([ center+[-side,-side], center+[side,-side], center+[-side,side] ]) + pts2 = np.float32([[0,0],[output_size-1,0],[0,output_size-1]]) + mat = cv2.getAffineTransform(pts1,pts2) return mat -def transform_points(points, mat, invert=False): - if invert: - mat = cv2.invertAffineTransform (mat) - points = np.expand_dims(points, axis=1) - points = cv2.transform(points, mat, points.shape) - points = np.squeeze(points) - return points - - def get_image_hull_mask (image_shape, image_landmarks, ie_polys=None): if len(image_landmarks) != 68: raise Exception('get_image_hull_mask works only with 68 landmarks') - int_lmrks = np.array(image_landmarks, dtype=np.int) + int_lmrks = np.array(image_landmarks.copy(), dtype=np.int) hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[0:9], - int_lmrks[17:18]))) , (1,) ) + # #nose + ml_pnt = (int_lmrks[36] + int_lmrks[0]) // 2 + mr_pnt = (int_lmrks[16] + int_lmrks[45]) // 2 - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[8:17], - int_lmrks[26:27]))) , (1,) ) + # mid points between the mid points and eye + ql_pnt = (int_lmrks[36] + ml_pnt) // 2 + qr_pnt = (int_lmrks[45] + mr_pnt) // 2 - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[17:20], - int_lmrks[8:9]))) , (1,) ) + # Top of the eye arrays + bot_l = np.array((ql_pnt, int_lmrks[36], int_lmrks[37], int_lmrks[38], int_lmrks[39])) + bot_r = np.array((int_lmrks[42], int_lmrks[43], int_lmrks[44], int_lmrks[45], qr_pnt)) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[24:27], - int_lmrks[8:9]))) , (1,) ) + # Eyebrow arrays + top_l = int_lmrks[17:22] + top_r = int_lmrks[22:27] - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[19:25], - int_lmrks[8:9], - ))) , (1,) ) + # Adjust eyebrow arrays + int_lmrks[17:22] = top_l + ((top_l - bot_l) // 2) + int_lmrks[22:27] = top_r + ((top_r - bot_r) // 2) - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[17:22], - int_lmrks[27:28], - int_lmrks[31:36], - int_lmrks[8:9] - ))) , (1,) ) + r_jaw = (int_lmrks[0:9], int_lmrks[17:18]) + l_jaw = (int_lmrks[8:17], int_lmrks[26:27]) + r_cheek = (int_lmrks[17:20], int_lmrks[8:9]) + l_cheek = (int_lmrks[24:27], int_lmrks[8:9]) + nose_ridge = (int_lmrks[19:25], int_lmrks[8:9],) + r_eye = (int_lmrks[17:22], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + l_eye = (int_lmrks[22:27], int_lmrks[27:28], int_lmrks[31:36], int_lmrks[8:9]) + nose = (int_lmrks[27:31], int_lmrks[31:36]) + parts = [r_jaw, l_jaw, r_cheek, l_cheek, nose_ridge, r_eye, l_eye, nose] - cv2.fillConvexPoly( hull_mask, cv2.convexHull( - np.concatenate ( (int_lmrks[22:27], - int_lmrks[27:28], - int_lmrks[31:36], - int_lmrks[8:9] - ))) , (1,) ) - - #nose - cv2.fillConvexPoly( hull_mask, cv2.convexHull(int_lmrks[27:36]), (1,) ) + for item in parts: + merged = np.concatenate(item) + cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), 1) if ie_polys is not None: ie_polys.overlay_mask(hull_mask) @@ -309,7 +321,7 @@ def draw_landmarks (image, image_landmarks, color=(0,255,0), transparent_mask=Fa mask = get_image_hull_mask (image.shape, image_landmarks, ie_polys) image[...] = ( image * (1-mask) + image * mask / 2 )[...] -def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, transparent_mask=False, ie_polys=None, landmarks_color=(0,255,0) ): +def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, transparent_mask=False, ie_polys=None, landmarks_color=(0,255,0)): draw_landmarks(image, image_landmarks, color=landmarks_color, transparent_mask=transparent_mask, ie_polys=ie_polys) imagelib.draw_rect (image, rect, (255,0,0), 2 ) diff --git a/main.py b/main.py index 8093c77..240c044 100644 --- a/main.py +++ b/main.py @@ -40,7 +40,7 @@ if __name__ == "__main__": p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") p.add_argument('--debug-dir', action=fixPathAction, dest="debug_dir", help="Writes debug images to this directory.") - p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") + p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'full_face_no_align', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") p.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") @@ -151,7 +151,6 @@ if __name__ == "__main__": args = {'input_dir' : arguments.input_dir, 'output_dir' : arguments.output_dir, 'aligned_dir' : arguments.aligned_dir, - 'avaperator_aligned_dir' : arguments.avaperator_aligned_dir, 'model_dir' : arguments.model_dir, 'model_name' : arguments.model_name, 'debug' : arguments.debug, @@ -166,7 +165,6 @@ if __name__ == "__main__": p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.") p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored.") - p.add_argument('--avaperator-aligned-dir', action=fixPathAction, dest="avaperator_aligned_dir", help="Only for AVATAR model. Directory of aligned avatar operator faces.") p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py index 5ce9b7a..53c3e4f 100644 --- a/mainscripts/Converter.py +++ b/mainscripts/Converter.py @@ -28,9 +28,9 @@ class ConvertSubprocessor(Subprocessor): self.device_idx = client_dict['device_idx'] self.device_name = client_dict['device_name'] self.converter = client_dict['converter'] + self.input_data = client_dict['input_data'] self.output_path = Path(client_dict['output_dir']) if 'output_dir' in client_dict.keys() else None self.alignments = client_dict['alignments'] - self.avatar_image_paths = client_dict['avatar_image_paths'] self.debug = client_dict['debug'] #transfer and set stdin in order to work code.interact in debug subprocess @@ -50,65 +50,30 @@ class ConvertSubprocessor(Subprocessor): #override def process_data(self, data): - idx, filename = data - filename_path = Path(filename) files_processed = 1 faces_processed = 0 + + idx, = data + filename = self.input_data[idx][0] + filename_path = Path(filename) output_filename_path = self.output_path / (filename_path.stem + '.png') + image = None + + if self.converter.type == Converter.TYPE_FACE: + if filename_path.stem not in self.alignments.keys(): + if not self.debug: + self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) ) - if (self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR ) \ - and filename_path.stem not in self.alignments.keys(): - if not self.debug: - self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) ) - - if filename_path.suffix == '.png': - shutil.copy ( str(filename_path), str(output_filename_path) ) - else: - image = cv2_imread(str(filename_path)) - cv2_imwrite ( str(output_filename_path), image ) - else: - image = (cv2_imread(str(filename_path)) / 255.0).astype(np.float32) - image = normalize_channels (image, 3) + if filename_path.suffix == '.png': + shutil.copy ( str(filename_path), str(output_filename_path) ) + else: + image = cv2_imread(str(filename_path)) + cv2_imwrite ( str(output_filename_path), image ) + else: + image = (cv2_imread(str(filename_path)) / 255.0).astype(np.float32) + image = normalize_channels (image, 3) - if self.converter.type == Converter.TYPE_IMAGE: - image = self.converter.cli_convert_image(image, None, self.debug) - - if self.debug: - return (1, image) - - faces_processed = 1 - - elif self.converter.type == Converter.TYPE_IMAGE_WITH_LANDMARKS: - #currently unused - if filename_path.suffix == '.png': - dflimg = DFLPNG.load( str(filename_path) ) - elif filename_path.suffix == '.jpg': - dflimg = DFLJPG.load ( str(filename_path) ) - else: - dflimg = None - - if dflimg is not None: - image_landmarks = dflimg.get_landmarks() - - image = self.converter.convert_image(image, image_landmarks, self.debug) - - if self.debug: - raise NotImplementedError - #for img in image: - # io.show_image ('Debug convert', img ) - # cv2.waitKey(0) - faces_processed = 1 - else: - self.log_err ("%s is not a dfl image file" % (filename_path.name) ) - - elif self.converter.type == Converter.TYPE_FACE or self.converter.type == Converter.TYPE_FACE_AVATAR: - - ava_face = None - if self.converter.type == Converter.TYPE_FACE_AVATAR: - ava_filename_path = self.avatar_image_paths[idx] - ava_face = (cv2_imread(str(ava_filename_path)) / 255.0).astype(np.float32) - ava_face = normalize_channels (ava_face, 3) faces = self.alignments[filename_path.stem] if self.debug: @@ -120,9 +85,9 @@ class ConvertSubprocessor(Subprocessor): self.log_info ( '\nConverting face_num [%d] in file [%s]' % (face_num, filename_path) ) if self.debug: - debug_images += self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) + debug_images += self.converter.cli_convert_face(image, image_landmarks, self.debug) else: - image = self.converter.cli_convert_face(image, image_landmarks, self.debug, avaperator_face_bgr=ava_face) + image = self.converter.cli_convert_face(image, image_landmarks, self.debug) except Exception as e: e_str = traceback.format_exc() @@ -135,29 +100,60 @@ class ConvertSubprocessor(Subprocessor): return (1, debug_images) faces_processed = len(faces) + elif self.converter.type == Converter.TYPE_IMAGE: + image = (cv2_imread(str(filename_path)) / 255.0).astype(np.float32) + image = normalize_channels (image, 3) + image = self.converter.cli_convert_image(image, None, self.debug) - if not self.debug: - cv2_imwrite (str(output_filename_path), (image*255).astype(np.uint8) ) + if self.debug: + return (1, image) + + faces_processed = 1 + elif self.converter.type == Converter.TYPE_FACE_AVATAR: + max_idx = len(self.input_data)-1 + + i0 = max (idx-1, 0) + i1 = idx + i2 = min (max_idx, idx+1) + + f0 = (cv2_imread( self.input_data[i0][0] ) / 255.0).astype(np.float32) + f0_lmrk = self.input_data[i0][1] + f1 = (cv2_imread( self.input_data[i1][0] ) / 255.0).astype(np.float32) + f1_lmrk = self.input_data[i1][1] + f2 = (cv2_imread( self.input_data[i2][0] ) / 255.0).astype(np.float32) + f2_lmrk = self.input_data[i2][1] + + f0, f1, f2 = [ normalize_channels (f, 3) for f in [f0,f1,f2] ] + + image = self.converter.cli_convert_face(f0, f0_lmrk, f1, f1_lmrk, f2, f2_lmrk, self.debug) + + output_filename_path = self.output_path / self.input_data[idx][2] + if self.debug: + return (1, image) + + faces_processed = 1 + + if image is not None and not self.debug: + cv2_imwrite (str(output_filename_path), (image*255).astype(np.uint8) ) return (0, files_processed, faces_processed) #overridable def get_data_name (self, data): #return string identificator of your data - idx, filename = data - return filename + idx, = data + return self.input_data[idx][0] #override - def __init__(self, converter, input_path_image_paths, output_path, alignments, avatar_image_paths=None, debug = False): + def __init__(self, converter, input_data, output_path, alignments, debug = False): super().__init__('Converter', ConvertSubprocessor.Cli, 86400 if debug == True else 60) self.converter = converter - self.input_data = self.input_path_image_paths = input_path_image_paths + self.input_data = input_data self.input_data_idxs = [ *range(len(self.input_data)) ] self.output_path = output_path self.alignments = alignments - self.avatar_image_paths = avatar_image_paths self.debug = debug self.files_processed = 0 @@ -171,9 +167,9 @@ class ConvertSubprocessor(Subprocessor): yield 'CPU%d' % (i), {}, {'device_idx': i, 'device_name': 'CPU%d' % (i), 'converter' : self.converter, + 'input_data' : self.input_data, 'output_dir' : str(self.output_path), 'alignments' : self.alignments, - 'avatar_image_paths' : self.avatar_image_paths, 'debug': self.debug, 'stdin_fd': sys.stdin.fileno() if self.debug else None } @@ -196,12 +192,12 @@ class ConvertSubprocessor(Subprocessor): def get_data(self, host_dict): if len (self.input_data_idxs) > 0: idx = self.input_data_idxs.pop(0) - return (idx, self.input_data[idx]) + return (idx, ) return None #override def on_data_return (self, host_dict, data): - idx, filename = data + idx, = data self.input_data_idxs.insert(0, idx) #override @@ -253,9 +249,9 @@ def main (args, device_args): converter = model.get_converter() input_path_image_paths = Path_utils.get_image_paths(input_path) + alignments = None - avatar_image_paths = None - if converter.type == Converter.TYPE_FACE or converter.type == Converter.TYPE_FACE_AVATAR: + if converter.type == Converter.TYPE_FACE: if aligned_dir is None: io.log_err('Aligned directory not found. Please ensure it exists.') return @@ -287,21 +283,15 @@ def main (args, device_args): alignments[ source_filename_stem ] = [] alignments[ source_filename_stem ].append (dflimg.get_source_landmarks()) - - - if converter.type == Converter.TYPE_FACE_AVATAR: - if avaperator_aligned_dir is None: - io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') - return + #avatar_alignments += [ ( str(filepath), dflimg.get_source_landmarks(), dflimg.get_source_filename() ) ] + + input_data = [ (p,) for p in input_path_image_paths ] + elif converter.type == Converter.TYPE_FACE_AVATAR: + + input_data = [] + for filepath in io.progress_bar_generator(input_path_image_paths, "Collecting info"): + filepath = Path(filepath) - avaperator_aligned_path = Path(avaperator_aligned_dir) - if not avaperator_aligned_path.exists(): - io.log_err('Avatar operator aligned directory not found. Please ensure it exists.') - return - - avatar_image_paths = [] - for filename in io.progress_bar_generator( Path_utils.get_image_paths(avaperator_aligned_path) , "Sorting avaperator faces"): - filepath = Path(filename) if filepath.suffix == '.png': dflimg = DFLPNG.load( str(filepath) ) elif filepath.suffix == '.jpg': @@ -310,22 +300,19 @@ def main (args, device_args): dflimg = None if dflimg is None: - io.log_err ("Fatal error: %s is not a dfl image file" % (filepath.name) ) - return - - avatar_image_paths += [ (filename, dflimg.get_source_filename() ) ] - avatar_image_paths = [ p[0] for p in sorted(avatar_image_paths, key=operator.itemgetter(1)) ] + io.log_err ("%s is not a dfl image file" % (filepath.name) ) + continue + input_data += [ ( str(filepath), dflimg.get_landmarks(), dflimg.get_source_filename() ) ] - if len(input_path_image_paths) < len(avatar_image_paths): - io.log_err("Input faces count must be >= avatar operator faces count.") - return - + input_data = sorted(input_data, key=operator.itemgetter(2)) + else: + input_data = [ (p,) for p in input_path_image_paths ] + files_processed, faces_processed = ConvertSubprocessor ( converter = converter, - input_path_image_paths = input_path_image_paths, + input_data = input_data, output_path = output_path, alignments = alignments, - avatar_image_paths = avatar_image_paths, debug = args.get('debug',False) ).run() @@ -389,3 +376,29 @@ if model_name == 'AVATAR': # new_points = np.concatenate( [np.expand_dims(p1,-1),np.expand_dims(p2,-1)], -1 ) # # alignments[ a[i] ][0] = LandmarksProcessor.transform_points (new_points, m0, True).astype(np.int32) + +""" + elif self.converter.type == Converter.TYPE_IMAGE_WITH_LANDMARKS: + #currently unused + if filename_path.suffix == '.png': + dflimg = DFLPNG.load( str(filename_path) ) + elif filename_path.suffix == '.jpg': + dflimg = DFLJPG.load ( str(filename_path) ) + else: + dflimg = None + + if dflimg is not None: + image_landmarks = dflimg.get_landmarks() + + image = self.converter.convert_image(image, image_landmarks, self.debug) + + if self.debug: + raise NotImplementedError + #for img in image: + # io.show_image ('Debug convert', img ) + # cv2.waitKey(0) + faces_processed = 1 + else: + self.log_err ("%s is not a dfl image file" % (filename_path.name) ) + +""" \ No newline at end of file diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index bb9873a..a45d59b 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -6,6 +6,7 @@ import multiprocessing import shutil from pathlib import Path import numpy as np +import math import mathlib import imagelib import cv2 @@ -21,6 +22,8 @@ from nnlib import nnlib from joblib import Subprocessor from interact import interact as io +DEBUG = False + class ExtractSubprocessor(Subprocessor): class Data(object): def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, pitch_yaw_roll=None, final_output_files = None): @@ -44,6 +47,11 @@ class ExtractSubprocessor(Subprocessor): self.cpu_only = client_dict['device_type'] == 'CPU' self.final_output_path = Path(client_dict['final_output_dir']) if 'final_output_dir' in client_dict.keys() else None self.debug_dir = client_dict['debug_dir'] + + #transfer and set stdin in order to work code.interact in debug subprocess + stdin_fd = client_dict['stdin_fd'] + if stdin_fd is not None and DEBUG: + sys.stdin = os.fdopen(stdin_fd) self.cached_image = (None, None) @@ -224,10 +232,12 @@ class ExtractSubprocessor(Subprocessor): rect = np.array(rect) if self.face_type == FaceType.MARK_ONLY: + image_to_face_mat = None face_image = image face_image_landmarks = image_landmarks else: image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) + face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) @@ -239,8 +249,8 @@ class ExtractSubprocessor(Subprocessor): if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area continue - if self.debug_dir is not None: - LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) + if self.debug_dir is not None: + LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) if src_dflimg is not None and filename_path.suffix == '.jpg': #if extracting from dflimg and jpg copy it in order not to lose quality @@ -296,7 +306,7 @@ class ExtractSubprocessor(Subprocessor): self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only) - no_response_time_sec = 60 if not self.manual else 999999 + no_response_time_sec = 60 if not self.manual and not DEBUG else 999999 super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec) #override @@ -342,7 +352,8 @@ class ExtractSubprocessor(Subprocessor): 'image_size': self.image_size, 'face_type': self.face_type, 'debug_dir': self.debug_dir, - 'final_output_dir': str(self.final_output_path)} + 'final_output_dir': str(self.final_output_path), + 'stdin_fd': sys.stdin.fileno() } for (device_idx, device_type, device_name, device_total_vram_gb) in self.devices: @@ -620,7 +631,7 @@ class ExtractSubprocessor(Subprocessor): return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] elif type == 'final': - return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range(min(8, multiprocessing.cpu_count())) ] + return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in (range(min(8, multiprocessing.cpu_count())) if not DEBUG else [0]) ] class DeletedFilesSearcherSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py index 9ff2588..8ff0ee9 100644 --- a/mainscripts/Sorter.py +++ b/mainscripts/Sorter.py @@ -423,14 +423,11 @@ def sort_by_hist_dissim(input_path): else: dflimg = None - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ([str(filepath)]) - continue - image = cv2_imread(str(filepath)) - face_mask = LandmarksProcessor.get_image_hull_mask (image.shape, dflimg.get_landmarks()) - image = (image*face_mask).astype(np.uint8) + + if dflimg is not None: + face_mask = LandmarksProcessor.get_image_hull_mask (image.shape, dflimg.get_landmarks()) + image = (image*face_mask).astype(np.uint8) img_list.append ([str(filepath), cv2.calcHist([cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)], [0], None, [256], [0, 256]), 0 ]) diff --git a/models/ModelBase.py b/models/ModelBase.py index b823a74..0897dd2 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -466,7 +466,7 @@ class ModelBase(object): return imagelib.equalize_and_stack_square (images) def generate_next_sample(self): - return [next(generator) for generator in self.generator_list] + return [ generator.generate_next() for generator in self.generator_list] def train_one_iter(self): sample = self.generate_next_sample() diff --git a/models/Model_RecycleGAN/Model.py b/models/Model_RecycleGAN/Model.py index 6f9d1c8..9533ab1 100644 --- a/models/Model_RecycleGAN/Model.py +++ b/models/Model_RecycleGAN/Model.py @@ -378,7 +378,6 @@ class RecycleGANModel(ModelBase): return x return func - nnlib.UNet = UNet @staticmethod def UNetTemporalPredictor(output_nc, use_batch_norm, ngf=64, use_dropout=False): diff --git a/models/Model_SAE/Model.py b/models/Model_SAE/Model.py index f4fd2b6..dc2f0a2 100644 --- a/models/Model_SAE/Model.py +++ b/models/Model_SAE/Model.py @@ -545,6 +545,12 @@ class SAEModel(ModelBase): return Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=2, padding=padding)(x)) ) return func SAEModel.downscale = downscale + + #def downscale (dim, padding='zero', norm='', act='', **kwargs): + # def func(x): + # return BlurPool()( Norm(norm)( Act(act) (Conv2D(dim, kernel_size=5, strides=1, padding=padding)(x)) ) ) + # return func + #SAEModel.downscale = downscale def upscale (dim, padding='zero', norm='', act='', **kwargs): def func(x): diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py index edcb201..97e98b9 100644 --- a/nnlib/nnlib.py +++ b/nnlib/nnlib.py @@ -89,6 +89,8 @@ dssim = nnlib.dssim PixelShuffler = nnlib.PixelShuffler SubpixelUpscaler = nnlib.SubpixelUpscaler Scale = nnlib.Scale +BlurPool = nnlib.BlurPool +SelfAttention = nnlib.SelfAttention CAInitializerMP = nnlib.CAInitializerMP @@ -455,6 +457,51 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator nnlib.PixelShuffler = PixelShuffler nnlib.SubpixelUpscaler = PixelShuffler + + class BlurPool(KL.Layer): + """ + https://arxiv.org/abs/1904.11486 https://github.com/adobe/antialiased-cnns + """ + def __init__(self, filt_size=3, stride=2, **kwargs): + self.strides = (stride,stride) + self.filt_size = filt_size + self.padding = ( (int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ), (int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ) ) + if(self.filt_size==1): + self.a = np.array([1.,]) + elif(self.filt_size==2): + self.a = np.array([1., 1.]) + elif(self.filt_size==3): + self.a = np.array([1., 2., 1.]) + elif(self.filt_size==4): + self.a = np.array([1., 3., 3., 1.]) + elif(self.filt_size==5): + self.a = np.array([1., 4., 6., 4., 1.]) + elif(self.filt_size==6): + self.a = np.array([1., 5., 10., 10., 5., 1.]) + elif(self.filt_size==7): + self.a = np.array([1., 6., 15., 20., 15., 6., 1.]) + + super(BlurPool, self).__init__(**kwargs) + + def compute_output_shape(self, input_shape): + height = input_shape[1] // self.strides[0] + width = input_shape[2] // self.strides[1] + channels = input_shape[3] + return (input_shape[0], height, width, channels) + + def call(self, x): + k = self.a + k = k[:,None]*k[None,:] + k = k / np.sum(k) + k = np.tile (k[:,:,None,None], (1,1,K.int_shape(x)[-1],1) ) + k = K.constant (k, dtype=K.floatx() ) + + x = K.spatial_2d_padding(x, padding=self.padding) + x = K.depthwise_conv2d(x, k, strides=self.strides, padding='valid') + return x + + nnlib.BlurPool = BlurPool + class Scale(KL.Layer): """ @@ -487,6 +534,43 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator return dict(list(base_config.items()) + list(config.items())) nnlib.Scale = Scale + class SelfAttention(KL.Layer): + def __init__(self, nc, squeeze_factor=8, **kwargs): + assert nc//squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}" + + self.nc = nc + self.squeeze_factor = squeeze_factor + super(SelfAttention, self).__init__(**kwargs) + + def compute_output_shape(self, input_shape): + return (input_shape[0], input_shape[1], input_shape[2], self.nc) + + def call(self, inp): + x = inp + shape_x = x.get_shape().as_list() + + f = Conv2D(self.nc//self.squeeze_factor, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x) + g = Conv2D(self.nc//self.squeeze_factor, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x) + h = Conv2D(self.nc, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x) + + shape_f = f.get_shape().as_list() + shape_g = g.get_shape().as_list() + shape_h = h.get_shape().as_list() + flat_f = Reshape( (-1, shape_f[-1]) )(f) + flat_g = Reshape( (-1, shape_g[-1]) )(g) + flat_h = Reshape( (-1, shape_h[-1]) )(h) + + s = Lambda(lambda x: K.batch_dot(x[0], keras.layers.Permute((2,1))(x[1]) ))([flat_g, flat_f]) + beta = keras.layers.Softmax(axis=-1)(s) + o = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta, flat_h]) + + o = Reshape(shape_x[1:])(o) + o = Scale()(o) + + out = Add()([o, inp]) + return out + nnlib.SelfAttention = SelfAttention + class Adam(keras.optimizers.Optimizer): """Adam optimizer. diff --git a/samplelib/SampleGeneratorBase.py b/samplelib/SampleGeneratorBase.py index dec741e..b89c506 100644 --- a/samplelib/SampleGeneratorBase.py +++ b/samplelib/SampleGeneratorBase.py @@ -13,7 +13,18 @@ class SampleGeneratorBase(object): self.samples_path = Path(samples_path) self.debug = debug self.batch_size = 1 if self.debug else batch_size - + self.last_generation = None + self.active = True + + def set_active(self, is_active): + self.active = is_active + + def generate_next(self): + if not self.active and self.last_generation is not None: + return self.last_generation + self.last_generation = next(self) + return self.last_generation + #overridable def __iter__(self): #implement your own iterator diff --git a/samplelib/SampleGeneratorFaceTemporal.py b/samplelib/SampleGeneratorFaceTemporal.py index cf670d2..add15ae 100644 --- a/samplelib/SampleGeneratorFaceTemporal.py +++ b/samplelib/SampleGeneratorFaceTemporal.py @@ -8,7 +8,7 @@ from samplelib import SampleType, SampleProcessor, SampleLoader, SampleGenerator ''' output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional)random_sub_size] , + [SampleProcessor.TypeFlags, size, (optional) {} opts ] , ... ] ''' @@ -46,9 +46,9 @@ class SampleGeneratorFaceTemporal(SampleGeneratorBase): raise ValueError('No training data provided.') mult_max = 1 - l = samples_len - (self.temporal_image_count-1)*mult_max + 1 + l = samples_len - ( (self.temporal_image_count)*mult_max - (mult_max-1) ) - samples_idxs = [ *range(l) ] [generator_id::self.generators_count] + samples_idxs = [ *range(l+1) ] [generator_id::self.generators_count] if len(samples_idxs) - self.temporal_image_count < 0: raise ValueError('Not enough samples to fit temporal line.') @@ -67,7 +67,7 @@ class SampleGeneratorFaceTemporal(SampleGeneratorBase): idx = shuffle_idxs.pop() temporal_samples = [] - mult = np.random.randint(mult_max) + mult = np.random.randint(mult_max)+1 for i in range( self.temporal_image_count ): sample = samples[ idx+i*mult ] try: diff --git a/samplelib/SampleGeneratorImageTemporal.py b/samplelib/SampleGeneratorImageTemporal.py index 190f98d..227c09e 100644 --- a/samplelib/SampleGeneratorImageTemporal.py +++ b/samplelib/SampleGeneratorImageTemporal.py @@ -43,7 +43,8 @@ class SampleGeneratorImageTemporal(SampleGeneratorBase): raise ValueError('No training data provided.') mult_max = 4 - samples_sub_len = samples_len - (self.temporal_image_count-1)*mult_max + samples_sub_len = samples_len - ( (self.temporal_image_count)*mult_max - (mult_max-1) ) + if samples_sub_len <= 0: raise ValueError('Not enough samples to fit temporal line.') @@ -61,7 +62,7 @@ class SampleGeneratorImageTemporal(SampleGeneratorBase): idx = shuffle_idxs.pop() temporal_samples = [] - mult = np.random.randint(mult_max) + mult = np.random.randint(mult_max)+1 for i in range( self.temporal_image_count ): sample = samples[ idx+i*mult ] try: diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py index 0921e52..338dc65 100644 --- a/samplelib/SampleProcessor.py +++ b/samplelib/SampleProcessor.py @@ -61,6 +61,7 @@ class SampleProcessor(object): FACE_TYPE_FULL = 11 FACE_TYPE_HEAD = 12 #currently unused FACE_TYPE_AVATAR = 13 #currently unused + FACE_TYPE_FULL_NO_ALIGN = 14 FACE_TYPE_END = 20 MODE_BEGIN = 40 @@ -103,7 +104,7 @@ class SampleProcessor(object): SPTF_FACETYPE_TO_FACETYPE = { SPTF.FACE_TYPE_HALF : FaceType.HALF, SPTF.FACE_TYPE_FULL : FaceType.FULL, SPTF.FACE_TYPE_HEAD : FaceType.HEAD, - SPTF.FACE_TYPE_AVATAR : FaceType.AVATAR } + SPTF.FACE_TYPE_FULL_NO_ALIGN : FaceType.FULL_NO_ALIGN } outputs = [] for opts in output_sample_types: @@ -157,6 +158,20 @@ class SampleProcessor(object): if mode_type == SPTF.NONE: raise ValueError ('expected MODE_ type') + def do_transform(img, mask): + warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) + transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) + flip = img_type != SPTF.IMG_WARPED + + img = imagelib.warp_by_params (params, img, warp, transform, flip, True) + if mask is not None: + mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False) + if len(mask.shape) == 2: + mask = mask[...,np.newaxis] + + img = np.concatenate( (img, mask ), -1 ) + return img + img = cached_images.get(img_type, None) if img is None: @@ -181,15 +196,12 @@ class SampleProcessor(object): if cur_sample.ie_polys is not None: cur_sample.ie_polys.overlay_mask(mask) - - warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) - transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) - flip = img_type != SPTF.IMG_WARPED - - img = imagelib.warp_by_params (params, img, warp, transform, flip, True) - if mask is not None: - mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False)[...,np.newaxis] - img = np.concatenate( (img, mask ), -1 ) + + if sample.face_type == FaceType.MARK_ONLY: + if mask is not None: + img = np.concatenate( (img, mask), -1 ) + else: + img = do_transform (img, mask) cached_images[img_type] = img @@ -197,7 +209,17 @@ class SampleProcessor(object): ft = SPTF_FACETYPE_TO_FACETYPE[target_face_type] if ft > sample.face_type: raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, ft) ) - img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, ft), (resolution,resolution), flags=cv2.INTER_CUBIC ) + + if sample.face_type == FaceType.MARK_ONLY: + img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC ) + + mask = img[...,3:4] if img.shape[2] > 3 else None + img = img[...,0:3] + img = do_transform (img, mask) + img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) + else: + img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, ft), (resolution,resolution), flags=cv2.INTER_CUBIC ) + else: img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )