added sort by 'final'

This commit is contained in:
iperov 2019-01-07 14:20:44 +04:00
parent 6faabcb94e
commit abdbe7fd8d
3 changed files with 197 additions and 18 deletions

View file

@ -96,19 +96,19 @@ SAE model Cage-Trump video: https://www.youtube.com/watch?v=2R_aqHBClUQ
`black` Places images which contains black area at end of folder. Useful to get rid of src faces which cutted by screen. `black` Places images which contains black area at end of folder. Useful to get rid of src faces which cutted by screen.
Best practice for gather src faceset: `final` sorts by yaw, blur, and hist, and leaves best 1500-1700 images.
1) delete first unsorted aligned groups of images what you can to delete. Dont touch target face mixed with others. Best practice for gather src faceset from tens of thousands images:
2) `black` -> delete faces cutted by black area at end of folder
3) `blur` -> delete 30-50% at end of folder 1) `black` -> then delete faces cutted by black area at end of folder
4) `hist` -> delete groups of similar and leave only target face 2) `blur` -> then delete blurred faces at end of folder
5) `hist-dissim` -> leave only first **1500 faces** 3) `hist` -> then delete groups of similar unwanted faces and leave only target face
6) `face-yaw` -> just for finalize faceset 4) `final` -> then delete faces occluded by obstructions
Best practice for dst faces: Best practice for dst faces:
1) delete first unsorted aligned groups of images what you can to delete. Dont touch target face mixed with others. 1) delete first unsorted aligned groups of images what you can to delete. Dont touch target face mixed with others.
2) `hist` -> delete groups of similar and leave only target face 2) `hist` -> then delete groups of similar and leave only target face
### **Ready to work facesets**: ### **Ready to work facesets**:

View file

@ -61,7 +61,7 @@ if __name__ == "__main__":
sort_parser = subparsers.add_parser( "sort", help="Sort faces in a directory.") sort_parser = subparsers.add_parser( "sort", help="Sort faces in a directory.")
sort_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") sort_parser.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "brightness", "hue", "black", "origname"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) sort_parser.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "final"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." )
sort_parser.set_defaults (func=process_sort) sort_parser.set_defaults (func=process_sort)
def process_train(arguments): def process_train(arguments):
@ -151,7 +151,8 @@ if __name__ == "__main__":
arguments.func(arguments) arguments.func(arguments)
print ("Done.") print ("Done.")
''' '''
import code import code
code.interact(local=dict(globals(), **locals())) code.interact(local=dict(globals(), **locals()))
''' '''

View file

@ -452,6 +452,163 @@ def sort_by_hist_dissim(input_path):
img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True)
return img_list return img_list
class FinalLoaderSubprocessor(SubprocessorBase):
#override
def __init__(self, img_list ):
self.img_list = img_list
self.result = []
self.result_trash = []
super().__init__('FinalLoader', 60)
#override
def onHostClientsInitialized(self):
pass
#override
def process_info_generator(self):
for i in range(0, min(multiprocessing.cpu_count(), 8) ):
yield 'CPU%d' % (i), {}, {'device_idx': i,
'device_name': 'CPU%d' % (i)
}
#override
def get_no_process_started_message(self):
print ( 'Unable to start CPU processes.')
#override
def onHostGetProgressBarDesc(self):
return "Loading"
#override
def onHostGetProgressBarLen(self):
return len (self.img_list)
#override
def onHostGetData(self, host_dict):
if len (self.img_list) > 0:
return [self.img_list.pop(0)]
return None
#override
def onHostDataReturn (self, host_dict, data):
self.img_list.insert(0, data[0])
#override
def onClientInitialize(self, client_dict):
self.safe_print ('Running on %s.' % (client_dict['device_name']) )
return None
#override
def onClientFinalize(self):
pass
#override
def onClientProcessData(self, data):
filepath = Path(data[0])
if filepath.suffix != '.png':
print ("%s is not a png file required for sort_final" % (filepath.name) )
return [ 1, [str(filepath)] ]
dflpng = DFLPNG.load (str(filepath), print_on_no_embedded_data=True)
if dflpng is None:
return [ 1, [str(filepath)] ]
bgr = cv2.imread(str(filepath))
gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
gray_masked = ( gray * LandmarksProcessor.get_image_hull_mask (bgr, dflpng.get_landmarks() )[:,:,0] ).astype(np.uint8)
sharpness = estimate_sharpness(gray_masked)
hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
return [ 0, [str(filepath), sharpness, hist, dflpng.get_yaw_value() ] ]
#override
def onClientGetDataName (self, data):
#return string identificator of your data
return data[0]
#override
def onHostResult (self, host_dict, data, result):
if result[0] == 0:
self.result.append (result[1])
else:
self.result_trash.append (result[1])
return 1
#override
def onFinalizeAndGetResult(self):
return self.result, self.result_trash
def sort_final(input_path):
print ("Performing final sort.")
img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path) ).process()
final_img_list = []
grads = 128
imgs_per_grad = 15
sharpned_imgs_per_grad = imgs_per_grad*10
yaws_sample_list = [None]*grads
for g in tqdm ( range (grads), desc="Sort by yaw" ):
yaw = -grads+1 + g*2
next_yaw = -grads+1 + (g+1)*2
yaw_samples = []
for img in img_list:
s_yaw = -img[3]
if (g == 0 and s_yaw < next_yaw) or \
(g < grads-1 and s_yaw >= yaw and s_yaw < next_yaw) or \
(g == grads-1 and s_yaw >= yaw):
yaw_samples += [ img ]
if len(yaw_samples) > 0:
yaws_sample_list[g] = yaw_samples
for g in tqdm ( range (grads), desc="Sort by blur" ):
img_list = yaws_sample_list[g]
if img_list is None:
continue
img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True)
if len(img_list) > imgs_per_grad*2:
trash_img_list += img_list[len(img_list) // 2:]
img_list = img_list[0: len(img_list) // 2]
if len(img_list) > sharpned_imgs_per_grad:
trash_img_list += img_list[sharpned_imgs_per_grad:]
img_list = img_list[0:sharpned_imgs_per_grad]
yaws_sample_list[g] = img_list
for g in tqdm ( range (grads), desc="Sort by hist" ):
img_list = yaws_sample_list[g]
if img_list is None:
continue
for i in range( len(img_list) ):
score_total = 0
for j in range( len(img_list) ):
if i == j:
continue
score_total += cv2.compareHist(img_list[i][2], img_list[j][2], cv2.HISTCMP_BHATTACHARYYA)
img_list[i][3] = score_total
yaws_sample_list[g] = sorted(img_list, key=operator.itemgetter(3), reverse=True)
for g in tqdm ( range (grads), desc="Fetching best" ):
img_list = yaws_sample_list[g]
if img_list is None:
continue
final_img_list += img_list[0:imgs_per_grad]
trash_img_list += img_list[imgs_per_grad:]
return final_img_list, trash_img_list
def sort_by_black(input_path): def sort_by_black(input_path):
print ("Sorting by amount of black pixels...") print ("Sorting by amount of black pixels...")
@ -466,16 +623,36 @@ def sort_by_black(input_path):
return img_list return img_list
def final_rename(input_path, img_list): def final_process(input_path, img_list, trash_img_list):
for i in tqdm( range(0,len(img_list)), desc="Renaming" , leave=False): if len(trash_img_list) != 0:
parent_input_path = input_path.parent
trash_path = parent_input_path / (input_path.stem + '_trash')
trash_path.mkdir (exist_ok=True)
print ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) )
for filename in Path_utils.get_image_paths(trash_path):
Path(filename).unlink()
for i in tqdm( range(len(trash_img_list)), desc="Moving trash" , leave=False):
src = Path (trash_img_list[i][0])
dst = trash_path / src.name
try:
src.rename (dst)
except:
print ('fail to trashing %s' % (src.name) )
print ("")
for i in tqdm( range(len(img_list)), desc="Renaming" , leave=False):
src = Path (img_list[i][0]) src = Path (img_list[i][0])
dst = input_path / ('%.5d_%s' % (i, src.name )) dst = input_path / ('%.5d_%s' % (i, src.name ))
try: try:
src.rename (dst) src.rename (dst)
except: except:
print ('fail to rename %s' % (src.name) ) print ('fail to rename %s' % (src.name) )
for i in tqdm( range(0,len(img_list)) , desc="Renaming" ): for i in tqdm( range(len(img_list)) , desc="Renaming" ):
src = Path (img_list[i][0]) src = Path (img_list[i][0])
src = input_path / ('%.5d_%s' % (i, src.name)) src = input_path / ('%.5d_%s' % (i, src.name))
@ -483,8 +660,8 @@ def final_rename(input_path, img_list):
try: try:
src.rename (dst) src.rename (dst)
except: except:
print ('fail to rename %s' % (src.name) ) print ('fail to rename %s' % (src.name) )
def sort_by_origname(input_path): def sort_by_origname(input_path):
print ("Sort by original filename...") print ("Sort by original filename...")
@ -513,7 +690,7 @@ def main (input_path, sort_by_method):
print ("Running sort tool.\r\n") print ("Running sort tool.\r\n")
img_list = [] img_list = []
trash_img_list = []
if sort_by_method == 'blur': img_list = sort_by_blur (input_path) if sort_by_method == 'blur': img_list = sort_by_blur (input_path)
elif sort_by_method == 'face': img_list = sort_by_face (input_path) elif sort_by_method == 'face': img_list = sort_by_face (input_path)
elif sort_by_method == 'face-dissim': img_list = sort_by_face_dissim (input_path) elif sort_by_method == 'face-dissim': img_list = sort_by_face_dissim (input_path)
@ -524,5 +701,6 @@ def main (input_path, sort_by_method):
elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) elif sort_by_method == 'hue': img_list = sort_by_hue (input_path)
elif sort_by_method == 'black': img_list = sort_by_black (input_path) elif sort_by_method == 'black': img_list = sort_by_black (input_path)
elif sort_by_method == 'origname': img_list = sort_by_origname (input_path) elif sort_by_method == 'origname': img_list = sort_by_origname (input_path)
elif sort_by_method == 'final': img_list, trash_img_list = sort_final (input_path)
final_rename (input_path, img_list) final_process (input_path, img_list, trash_img_list)