diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index 3a653e3..1b146bf 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -27,13 +27,14 @@ DEBUG = False class ExtractSubprocessor(Subprocessor): class Data(object): - def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, pitch_yaw_roll=None, final_output_files = None): + def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, pitch_yaw_roll=None, force_output_path=None, final_output_files = None): self.filename = filename self.rects = rects or [] self.rects_rotation = 0 self.landmarks_accurate = landmarks_accurate self.landmarks = landmarks or [] self.pitch_yaw_roll = pitch_yaw_roll + self.force_output_path = force_output_path self.final_output_files = final_output_files or [] self.faces_detected = 0 @@ -248,13 +249,18 @@ class ExtractSubprocessor(Subprocessor): if self.debug_dir is not None: LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) + final_output_path = self.final_output_path + if data.force_output_path is not None: + final_output_path = data.force_output_path + if src_dflimg is not None and filename_path.suffix == '.jpg': #if extracting from dflimg and jpg copy it in order not to lose quality - output_file = str(self.final_output_path / filename_path.name) + output_file = str(final_output_path / filename_path.name) if str(filename_path) != str(output_file): shutil.copy ( str(filename_path), str(output_file) ) else: - output_file = '{}_{}{}'.format(str(self.final_output_path / filename_path.stem), str(face_idx), '.jpg') + + output_file = '{}_{}{}'.format(str(final_output_path / filename_path.stem), str(face_idx), '.jpg') cv2_imwrite(output_file, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100] ) DFLJPG.embed_data(output_file, face_type=FaceType.toString(self.face_type), @@ -303,7 +309,8 @@ class ExtractSubprocessor(Subprocessor): self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only) no_response_time_sec = 60 if not self.manual and not DEBUG else 999999 - super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec) + + super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec, initialize_subprocesses_in_serial=(type != 'final')) #override def on_check_run(self): diff --git a/mainscripts/dev_misc.py b/mainscripts/dev_misc.py index b6e1d4d..5718416 100644 --- a/mainscripts/dev_misc.py +++ b/mainscripts/dev_misc.py @@ -5,7 +5,7 @@ from pathlib import Path import cv2 import numpy as np -from facelib import LandmarksProcessor +from facelib import FaceType, LandmarksProcessor from interact import interact as io from joblib import Subprocessor from utils import Path_utils @@ -14,8 +14,159 @@ from utils.DFLJPG import DFLJPG from utils.DFLPNG import DFLPNG from . import Extractor, Sorter +from .Extractor import ExtractSubprocessor +def extract_vggface2_dataset(input_dir, device_args={} ): + multi_gpu = device_args.get('multi_gpu', False) + cpu_only = device_args.get('cpu_only', False) + + input_path = Path(input_dir) + if not input_path.exists(): + raise ValueError('Input directory not found. Please ensure it exists.') + + bb_csv = input_path / 'loose_bb_train.csv' + if not bb_csv.exists(): + raise ValueError('loose_bb_train.csv found. Please ensure it exists.') + + bb_lines = bb_csv.read_text().split('\n') + bb_lines.pop(0) + + bb_dict = {} + for line in bb_lines: + name, l, t, w, h = line.split(',') + name = name[1:-1] + l, t, w, h = [ int(x) for x in (l, t, w, h) ] + bb_dict[name] = (l,t,w, h) + + + output_path = input_path.parent / (input_path.name + '_out') + + dir_names = Path_utils.get_all_dir_names(input_path) + + if not output_path.exists(): + output_path.mkdir(parents=True, exist_ok=True) + + data = [] + for dir_name in io.progress_bar_generator(dir_names, "Collecting"): + cur_input_path = input_path / dir_name + cur_output_path = output_path / dir_name + + if not cur_output_path.exists(): + cur_output_path.mkdir(parents=True, exist_ok=True) + + input_path_image_paths = Path_utils.get_image_paths(cur_input_path) + + for filename in input_path_image_paths: + filename_path = Path(filename) + + name = filename_path.parent.name + '/' + filename_path.stem + if name not in bb_dict: + continue + + l,t,w,h = bb_dict[name] + if min(w,h) < 128: + continue + + data += [ ExtractSubprocessor.Data(filename=filename,rects=[ (l,t,l+w,t+h) ], landmarks_accurate=False, force_output_path=cur_output_path ) ] + + face_type = FaceType.fromString('full_face') + + io.log_info ('Performing 2nd pass...') + data = ExtractSubprocessor (data, 'landmarks', 256, face_type, debug_dir=None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() + + io.log_info ('Performing 3rd pass...') + ExtractSubprocessor (data, 'final', 256, face_type, debug_dir=None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=None).run() + + +""" + import code + code.interact(local=dict(globals(), **locals())) + + data_len = len(data) + i = 0 + while i < data_len-1: + i_name = Path(data[i].filename).parent.name + + sub_data = [] + + for j in range (i, data_len): + j_name = Path(data[j].filename).parent.name + if i_name == j_name: + sub_data += [ data[j] ] + else: + break + i = j + + cur_output_path = output_path / i_name + + io.log_info (f"Processing: {str(cur_output_path)}, {i}/{data_len} ") + + if not cur_output_path.exists(): + cur_output_path.mkdir(parents=True, exist_ok=True) + + + + + + + + + for dir_name in dir_names: + + cur_input_path = input_path / dir_name + cur_output_path = output_path / dir_name + + input_path_image_paths = Path_utils.get_image_paths(cur_input_path) + l = len(input_path_image_paths) + #if l < 250 or l > 350: + # continue + + io.log_info (f"Processing: {str(cur_input_path)} ") + + if not cur_output_path.exists(): + cur_output_path.mkdir(parents=True, exist_ok=True) + + + data = [] + for filename in input_path_image_paths: + filename_path = Path(filename) + + name = filename_path.parent.name + '/' + filename_path.stem + if name not in bb_dict: + continue + + bb = bb_dict[name] + l,t,w,h = bb + if min(w,h) < 128: + continue + + data += [ ExtractSubprocessor.Data(filename=filename,rects=[ (l,t,l+w,t+h) ], landmarks_accurate=False ) ] + + + + io.log_info ('Performing 2nd pass...') + data = ExtractSubprocessor (data, 'landmarks', 256, face_type, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False).run() + + io.log_info ('Performing 3rd pass...') + data = ExtractSubprocessor (data, 'final', 256, face_type, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, final_output_path=cur_output_path).run() + + + io.log_info (f"Sorting: {str(cur_output_path)} ") + Sorter.main (input_path=str(cur_output_path), sort_by_method='hist') + + import code + code.interact(local=dict(globals(), **locals())) + + #try: + # io.log_info (f"Removing: {str(cur_input_path)} ") + # shutil.rmtree(cur_input_path) + #except: + # io.log_info (f"unable to remove: {str(cur_input_path)} ") + + + + def extract_vggface2_dataset(input_dir, device_args={} ): multi_gpu = device_args.get('multi_gpu', False) cpu_only = device_args.get('cpu_only', False) @@ -64,7 +215,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ): except: io.log_info (f"unable to remove: {str(cur_input_path)} ") - +""" class CelebAMASKHQSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): diff --git a/nnlib/VGGFace.py b/nnlib/VGGFace.py index 06babf0..60a1de4 100644 --- a/nnlib/VGGFace.py +++ b/nnlib/VGGFace.py @@ -6,43 +6,31 @@ def VGGFace(): img_input = Input(shape=(224,224,3) ) # Block 1 - x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')( - img_input) + x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(img_input) x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x) # Block 2 - x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')( - x) - x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')( - x) + x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x) + x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x) # Block 3 - x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')( - x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')( - x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')( - x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(x) # Block 4 - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')( - x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')( - x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')( - x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(x) # Block 5 - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')( - x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')( - x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')( - x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5')(x) @@ -58,7 +46,6 @@ def VGGFace(): model = Model(img_input, x, name='vggface_vgg16') weights_path = keras.utils.data_utils.get_file('rcmalli_vggface_tf_vgg16.h5', 'https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_vgg16.h5') - model.load_weights(weights_path, by_name=True) return model \ No newline at end of file diff --git a/samplelib/SampleGeneratorFacePerson.py b/samplelib/SampleGeneratorFacePerson.py new file mode 100644 index 0000000..2b038dd --- /dev/null +++ b/samplelib/SampleGeneratorFacePerson.py @@ -0,0 +1,198 @@ +import multiprocessing +import traceback + +import cv2 +import numpy as np + +from facelib import LandmarksProcessor +from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, + SampleType) +from utils import iter_utils + + +''' +arg +output_sample_types = [ + [SampleProcessor.TypeFlags, size, (optional) {} opts ] , + ... + ] +''' +class SampleGeneratorFacePerson(SampleGeneratorBase): + def __init__ (self, samples_path, debug=False, batch_size=1, + sample_process_options=SampleProcessor.Options(), + output_sample_types=[], + person_id_mode=1, + generators_count=2, + generators_random_seed=None, + **kwargs): + + super().__init__(samples_path, debug, batch_size) + self.sample_process_options = sample_process_options + self.output_sample_types = output_sample_types + self.person_id_mode = person_id_mode + + if generators_random_seed is not None and len(generators_random_seed) != generators_count: + raise ValueError("len(generators_random_seed) != generators_count") + self.generators_random_seed = generators_random_seed + + samples = SampleLoader.load (SampleType.FACE, self.samples_path, person_id_mode=True) + + if person_id_mode==1: + new_samples = [] + for s in samples: + new_samples += s + samples = new_samples + np.random.shuffle(samples) + + self.samples_len = len(samples) + + if self.samples_len == 0: + raise ValueError('No training data provided.') + + if self.debug: + self.generators_count = 1 + self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (0, samples) )] + else: + self.generators_count = min ( generators_count, self.samples_len ) + + if person_id_mode==1: + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, samples[i::self.generators_count]) ) for i in range(self.generators_count) ] + else: + self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, samples) ) for i in range(self.generators_count) ] + + self.generator_counter = -1 + + #overridable + def get_total_sample_count(self): + return self.samples_len + + def __iter__(self): + return self + + def __next__(self): + self.generator_counter += 1 + generator = self.generators[self.generator_counter % len(self.generators) ] + return next(generator) + + def batch_func(self, param ): + generator_id, samples = param + + if self.generators_random_seed is not None: + np.random.seed ( self.generators_random_seed[generator_id] ) + + if self.person_id_mode==1: + samples_len = len(samples) + samples_idxs = [*range(samples_len)] + shuffle_idxs = [] + elif self.person_id_mode==2: + persons_count = len(samples) + + person_idxs = [] + for j in range(persons_count): + for i in range(j+1,persons_count): + person_idxs += [ [i,j] ] + + shuffle_person_idxs = [] + + samples_idxs = [None]*persons_count + shuffle_idxs = [None]*persons_count + + for i in range(persons_count): + samples_idxs[i] = [*range(len(samples[i]))] + shuffle_idxs[i] = [] + + while True: + + if self.person_id_mode==2: + if len(shuffle_person_idxs) == 0: + shuffle_person_idxs = person_idxs.copy() + np.random.shuffle(shuffle_person_idxs) + person_ids = shuffle_person_idxs.pop() + + + batches = None + for n_batch in range(self.batch_size): + + if self.person_id_mode==1: + if len(shuffle_idxs) == 0: + shuffle_idxs = samples_idxs.copy() + np.random.shuffle(shuffle_idxs) + + idx = shuffle_idxs.pop() + sample = samples[ idx ] + + try: + x = SampleProcessor.process (sample, self.sample_process_options, self.output_sample_types, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) + + if type(x) != tuple and type(x) != list: + raise Exception('SampleProcessor.process returns NOT tuple/list') + + if batches is None: + batches = [ [] for _ in range(len(x)) ] + + batches += [ [] ] + i_person_id = len(batches)-1 + + for i in range(len(x)): + batches[i].append ( x[i] ) + + batches[i_person_id].append ( np.array([sample.person_id]) ) + + + else: + person_id1, person_id2 = person_ids + + if len(shuffle_idxs[person_id1]) == 0: + shuffle_idxs[person_id1] = samples_idxs[person_id1].copy() + np.random.shuffle(shuffle_idxs[person_id1]) + + idx = shuffle_idxs[person_id1].pop() + sample1 = samples[person_id1][idx] + + if len(shuffle_idxs[person_id2]) == 0: + shuffle_idxs[person_id2] = samples_idxs[person_id2].copy() + np.random.shuffle(shuffle_idxs[person_id2]) + + idx = shuffle_idxs[person_id2].pop() + sample2 = samples[person_id2][idx] + + if sample1 is not None and sample2 is not None: + try: + x1 = SampleProcessor.process (sample1, self.sample_process_options, self.output_sample_types, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample1.filename, traceback.format_exc() ) ) + + try: + x2 = SampleProcessor.process (sample2, self.sample_process_options, self.output_sample_types, self.debug) + except: + raise Exception ("Exception occured in sample %s. Error: %s" % (sample2.filename, traceback.format_exc() ) ) + + x1_len = len(x1) + if batches is None: + batches = [ [] for _ in range(x1_len) ] + batches += [ [] ] + i_person_id1 = len(batches)-1 + + batches += [ [] for _ in range(len(x2)) ] + batches += [ [] ] + i_person_id2 = len(batches)-1 + + for i in range(x1_len): + batches[i].append ( x1[i] ) + + for i in range(len(x2)): + batches[x1_len+1+i].append ( x2[i] ) + + batches[i_person_id1].append ( np.array([sample1.person_id]) ) + + batches[i_person_id2].append ( np.array([sample2.person_id]) ) + + + + yield [ np.array(batch) for batch in batches] + + @staticmethod + def get_person_id_max_count(samples_path): + return SampleLoader.get_person_id_max_count(samples_path) \ No newline at end of file diff --git a/samplelib/__init__.py b/samplelib/__init__.py index d865394..1731a74 100644 --- a/samplelib/__init__.py +++ b/samplelib/__init__.py @@ -4,5 +4,6 @@ from .SampleLoader import SampleLoader from .SampleProcessor import SampleProcessor from .SampleGeneratorBase import SampleGeneratorBase from .SampleGeneratorFace import SampleGeneratorFace +from .SampleGeneratorFacePerson import SampleGeneratorFacePerson from .SampleGeneratorFaceTemporal import SampleGeneratorFaceTemporal from .SampleGeneratorImageTemporal import SampleGeneratorImageTemporal