diff --git a/README.md b/README.md index 008d145..ab972cd 100644 --- a/README.md +++ b/README.md @@ -96,19 +96,19 @@ SAE model Cage-Trump video: https://www.youtube.com/watch?v=2R_aqHBClUQ `black` Places images which contains black area at end of folder. Useful to get rid of src faces which cutted by screen. -Best practice for gather src faceset: +`final` sorts by yaw, blur, and hist, and leaves best 1500-1700 images. -1) delete first unsorted aligned groups of images what you can to delete. Dont touch target face mixed with others. -2) `black` -> delete faces cutted by black area at end of folder -3) `blur` -> delete 30-50% at end of folder -4) `hist` -> delete groups of similar and leave only target face -5) `hist-dissim` -> leave only first **1500 faces** -6) `face-yaw` -> just for finalize faceset +Best practice for gather src faceset from tens of thousands images: + +1) `black` -> then delete faces cutted by black area at end of folder +2) `blur` -> then delete blurred faces at end of folder +3) `hist` -> then delete groups of similar unwanted faces and leave only target face +4) `final` -> then delete faces occluded by obstructions Best practice for dst faces: 1) delete first unsorted aligned groups of images what you can to delete. Dont touch target face mixed with others. -2) `hist` -> delete groups of similar and leave only target face +2) `hist` -> then delete groups of similar and leave only target face ### **Ready to work facesets**: diff --git a/main.py b/main.py index f875d21..2b7a0c2 100644 --- a/main.py +++ b/main.py @@ -61,7 +61,7 @@ if __name__ == "__main__": sort_parser = subparsers.add_parser( "sort", help="Sort faces in a directory.") sort_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "brightness", "hue", "black", "origname"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) + sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "final"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) sort_parser.set_defaults (func=process_sort) def process_train(arguments): @@ -151,7 +151,8 @@ if __name__ == "__main__": arguments.func(arguments) print ("Done.") + ''' import code code.interact(local=dict(globals(), **locals())) -''' +''' \ No newline at end of file diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py index ab52291..26d0e79 100644 --- a/mainscripts/Sorter.py +++ b/mainscripts/Sorter.py @@ -452,6 +452,163 @@ def sort_by_hist_dissim(input_path): img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) return img_list + + +class FinalLoaderSubprocessor(SubprocessorBase): + #override + def __init__(self, img_list ): + self.img_list = img_list + + self.result = [] + self.result_trash = [] + + super().__init__('FinalLoader', 60) + + #override + def onHostClientsInitialized(self): + pass + + #override + def process_info_generator(self): + for i in range(0, min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {}, {'device_idx': i, + 'device_name': 'CPU%d' % (i) + } + + #override + def get_no_process_started_message(self): + print ( 'Unable to start CPU processes.') + + #override + def onHostGetProgressBarDesc(self): + return "Loading" + + #override + def onHostGetProgressBarLen(self): + return len (self.img_list) + + #override + def onHostGetData(self, host_dict): + if len (self.img_list) > 0: + return [self.img_list.pop(0)] + + return None + + #override + def onHostDataReturn (self, host_dict, data): + self.img_list.insert(0, data[0]) + + #override + def onClientInitialize(self, client_dict): + self.safe_print ('Running on %s.' % (client_dict['device_name']) ) + return None + + #override + def onClientFinalize(self): + pass + + #override + def onClientProcessData(self, data): + filepath = Path(data[0]) + if filepath.suffix != '.png': + print ("%s is not a png file required for sort_final" % (filepath.name) ) + return [ 1, [str(filepath)] ] + + dflpng = DFLPNG.load (str(filepath), print_on_no_embedded_data=True) + if dflpng is None: + return [ 1, [str(filepath)] ] + + bgr = cv2.imread(str(filepath)) + gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY) + gray_masked = ( gray * LandmarksProcessor.get_image_hull_mask (bgr, dflpng.get_landmarks() )[:,:,0] ).astype(np.uint8) + sharpness = estimate_sharpness(gray_masked) + hist = cv2.calcHist([gray], [0], None, [256], [0, 256]) + return [ 0, [str(filepath), sharpness, hist, dflpng.get_yaw_value() ] ] + + + #override + def onClientGetDataName (self, data): + #return string identificator of your data + return data[0] + + #override + def onHostResult (self, host_dict, data, result): + if result[0] == 0: + self.result.append (result[1]) + else: + self.result_trash.append (result[1]) + return 1 + + #override + def onFinalizeAndGetResult(self): + return self.result, self.result_trash + +def sort_final(input_path): + print ("Performing final sort.") + + img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path) ).process() + final_img_list = [] + + grads = 128 + imgs_per_grad = 15 + sharpned_imgs_per_grad = imgs_per_grad*10 + + yaws_sample_list = [None]*grads + for g in tqdm ( range (grads), desc="Sort by yaw" ): + yaw = -grads+1 + g*2 + next_yaw = -grads+1 + (g+1)*2 + + yaw_samples = [] + for img in img_list: + s_yaw = -img[3] + if (g == 0 and s_yaw < next_yaw) or \ + (g < grads-1 and s_yaw >= yaw and s_yaw < next_yaw) or \ + (g == grads-1 and s_yaw >= yaw): + yaw_samples += [ img ] + if len(yaw_samples) > 0: + yaws_sample_list[g] = yaw_samples + + for g in tqdm ( range (grads), desc="Sort by blur" ): + img_list = yaws_sample_list[g] + if img_list is None: + continue + + img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) + + if len(img_list) > imgs_per_grad*2: + trash_img_list += img_list[len(img_list) // 2:] + img_list = img_list[0: len(img_list) // 2] + + if len(img_list) > sharpned_imgs_per_grad: + trash_img_list += img_list[sharpned_imgs_per_grad:] + img_list = img_list[0:sharpned_imgs_per_grad] + + yaws_sample_list[g] = img_list + + for g in tqdm ( range (grads), desc="Sort by hist" ): + img_list = yaws_sample_list[g] + if img_list is None: + continue + + for i in range( len(img_list) ): + score_total = 0 + for j in range( len(img_list) ): + if i == j: + continue + score_total += cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA) + img_list[i][3] = score_total + + yaws_sample_list[g] = sorted(img_list, key=operator.itemgetter(3), reverse=True) + + for g in tqdm ( range (grads), desc="Fetching best" ): + img_list = yaws_sample_list[g] + if img_list is None: + continue + + final_img_list += img_list[0:imgs_per_grad] + trash_img_list += img_list[imgs_per_grad:] + + return final_img_list, trash_img_list def sort_by_black(input_path): print ("Sorting by amount of black pixels...") @@ -466,16 +623,36 @@ def sort_by_black(input_path): return img_list -def final_rename(input_path, img_list): - for i in tqdm( range(0,len(img_list)), desc="Renaming" , leave=False): +def final_process(input_path, img_list, trash_img_list): + if len(trash_img_list) != 0: + parent_input_path = input_path.parent + trash_path = parent_input_path / (input_path.stem + '_trash') + trash_path.mkdir (exist_ok=True) + + print ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) ) + + for filename in Path_utils.get_image_paths(trash_path): + Path(filename).unlink() + + for i in tqdm( range(len(trash_img_list)), desc="Moving trash" , leave=False): + src = Path (trash_img_list[i][0]) + dst = trash_path / src.name + try: + src.rename (dst) + except: + print ('fail to trashing %s' % (src.name) ) + + print ("") + + for i in tqdm( range(len(img_list)), desc="Renaming" , leave=False): src = Path (img_list[i][0]) dst = input_path / ('%.5d_%s' % (i, src.name )) try: src.rename (dst) except: - print ('fail to rename %s' % (src.name) ) + print ('fail to rename %s' % (src.name) ) - for i in tqdm( range(0,len(img_list)) , desc="Renaming" ): + for i in tqdm( range(len(img_list)) , desc="Renaming" ): src = Path (img_list[i][0]) src = input_path / ('%.5d_%s' % (i, src.name)) @@ -483,8 +660,8 @@ def final_rename(input_path, img_list): try: src.rename (dst) except: - print ('fail to rename %s' % (src.name) ) - + print ('fail to rename %s' % (src.name) ) + def sort_by_origname(input_path): print ("Sort by original filename...") @@ -513,7 +690,7 @@ def main (input_path, sort_by_method): print ("Running sort tool.\r\n") img_list = [] - + trash_img_list = [] if sort_by_method == 'blur': img_list = sort_by_blur (input_path) elif sort_by_method == 'face': img_list = sort_by_face (input_path) elif sort_by_method == 'face-dissim': img_list = sort_by_face_dissim (input_path) @@ -524,5 +701,6 @@ def main (input_path, sort_by_method): elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) elif sort_by_method == 'black': img_list = sort_by_black (input_path) elif sort_by_method == 'origname': img_list = sort_by_origname (input_path) + elif sort_by_method == 'final': img_list, trash_img_list = sort_final (input_path) - final_rename (input_path, img_list) + final_process (input_path, img_list, trash_img_list)