diff --git a/core/cv2ex.py b/core/cv2ex.py index 0fda461..ea6ca84 100644 --- a/core/cv2ex.py +++ b/core/cv2ex.py @@ -4,8 +4,10 @@ from pathlib import Path from core.interact import interact as io import traceback -#allows to open non-english characters path def cv2_imread(filename, flags=cv2.IMREAD_UNCHANGED, loader_func=None): + """ + allows to open non-english characters path + """ try: if loader_func is not None: bytes = bytearray(loader_func(filename)) diff --git a/core/imagelib/__init__.py b/core/imagelib/__init__.py index 6c0236f..993bcdb 100644 --- a/core/imagelib/__init__.py +++ b/core/imagelib/__init__.py @@ -11,7 +11,7 @@ from .warp import gen_warp_params, warp_by_params from .reduce_colors import reduce_colors -from .color_transfer import color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone +from .color_transfer import color_transfer, color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone from .common import normalize_channels, cut_odd_image, overlay_alpha_image diff --git a/core/imagelib/color_transfer.py b/core/imagelib/color_transfer.py index 1451af2..175920b 100644 --- a/core/imagelib/color_transfer.py +++ b/core/imagelib/color_transfer.py @@ -299,7 +299,7 @@ def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5): matched_img += mu_s matched_img[matched_img>1] = 1 matched_img[matched_img<0] = 0 - return matched_img.astype(source_img.dtype) + return np.clip(matched_img.astype(source_img.dtype), 0, 1) def lab_image_stats(image): # compute the mean and standard deviation of each channel @@ -390,4 +390,25 @@ def color_transfer_mix(img_src,img_trg): img_rct = cv2.cvtColor(img_rct, cv2.COLOR_LAB2BGR) - return (img_rct / 255.0).astype(np.float32) \ No newline at end of file + return (img_rct / 255.0).astype(np.float32) + +def color_transfer(ct_mode, img_src, img_trg): + """ + color transfer for [0,1] float inputs + """ + if ct_mode == 'lct': + out = linear_color_transfer (img_src, img_trg) + elif ct_mode == 'rct': + out = reinhard_color_transfer ( np.clip( img_src*255, 0, 255 ).astype(np.uint8), + np.clip( img_trg*255, 0, 255 ).astype(np.uint8) ) + out = np.clip( out.astype(np.float32) / 255.0, 0.0, 1.0) + elif ct_mode == 'mkl': + out = color_transfer_mkl (img_src, img_trg) + elif ct_mode == 'idt': + out = color_transfer_idt (img_src, img_trg) + elif ct_mode == 'sot': + out = color_transfer_sot (img_src, img_trg) + out = np.clip( out, 0.0, 1.0) + else: + raise ValueError(f"unknown ct_mode {ct_mode}") + return out \ No newline at end of file diff --git a/core/imagelib/warp.py b/core/imagelib/warp.py index 3610cec..4fd06cf 100644 --- a/core/imagelib/warp.py +++ b/core/imagelib/warp.py @@ -47,11 +47,13 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0 return params -def warp_by_params (params, img, warp, transform, flip, is_border_replicate): - if warp: +def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate): + if can_warp: img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC ) - if transform: - img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC ) - if flip and params['flip']: + if can_transform: + img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC ) + if len(img.shape) == 2: + img = img[...,None] + if can_flip and params['flip']: img = img[:,::-1,...] return img \ No newline at end of file diff --git a/core/leras/layers.py b/core/leras/layers.py index 90d8027..b45ccb9 100644 --- a/core/leras/layers.py +++ b/core/leras/layers.py @@ -78,26 +78,7 @@ def initialize_layers(nn): return True def init_weights(self): - ops = [] - - ca_tuples_w = [] - ca_tuples = [] - for w in self.get_weights(): - initializer = w.initializer - for input in initializer.inputs: - if "_cai_" in input.name: - ca_tuples_w.append (w) - ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) ) - break - else: - ops.append (initializer) - - if len(ops) != 0: - nn.tf_sess.run (ops) - - if len(ca_tuples) != 0: - nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] ) - + nn.tf_init_weights(self.get_weights()) nn.Saveable = Saveable class LayerBase(): @@ -302,7 +283,8 @@ def initialize_layers(nn): raise ValueError ("strides must be an int type") if not isinstance(dilations, int): raise ValueError ("dilations must be an int type") - + kernel_size = int(kernel_size) + if dtype is None: dtype = nn.tf_floatx @@ -405,7 +387,8 @@ def initialize_layers(nn): def __init__(self, in_ch, out_ch, kernel_size, strides=2, padding='SAME', use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): if not isinstance(strides, int): raise ValueError ("strides must be an int type") - + kernel_size = int(kernel_size) + if dtype is None: dtype = nn.tf_floatx diff --git a/core/leras/models.py b/core/leras/models.py new file mode 100644 index 0000000..bac6735 --- /dev/null +++ b/core/leras/models.py @@ -0,0 +1,41 @@ +def initialize_models(nn): + tf = nn.tf + + + + class PatchDiscriminator(nn.ModelBase): + def on_build(self, patch_size, in_ch, base_ch=256, kernel_initializer=None): + prev_ch = in_ch + self.convs = [] + for i, (kernel_size, strides) in enumerate(patch_discriminator_kernels[patch_size]): + cur_ch = base_ch * min( (2**i), 8 ) + self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=kernel_initializer) ) + prev_ch = cur_ch + + self.out_conv = nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=kernel_initializer) + + def forward(self, x): + for conv in self.convs: + x = tf.nn.leaky_relu( conv(x), 0.1 ) + return self.out_conv(x) + + nn.PatchDiscriminator = PatchDiscriminator + + +patch_discriminator_kernels = \ + { 1 : [ [1,1] ], + 2 : [ [2,1] ], + 3 : [ [2,1], [2,1] ], + 4 : [ [2,2], [2,2] ], + 5 : [ [3,2], [2,2] ], + 6 : [ [4,2], [2,2] ], + 7 : [ [3,2], [3,2] ], + 8 : [ [4,2], [3,2] ], + 9 : [ [3,2], [4,2] ], + 10 : [ [4,2], [4,2] ], + 11 : [ [3,2], [3,2], [2,1] ], + 12 : [ [4,2], [3,2], [2,1] ], + 13 : [ [3,2], [4,2], [2,1] ], + 14 : [ [4,2], [4,2], [2,1] ], + 15 : [ [3,2], [3,2], [3,1] ], + 16 : [ [4,2], [3,2], [3,1] ] } \ No newline at end of file diff --git a/core/leras/nn.py b/core/leras/nn.py index c1dfc93..9199b6d 100644 --- a/core/leras/nn.py +++ b/core/leras/nn.py @@ -46,6 +46,7 @@ class nn(): # Tensor ops tf_get_value = None tf_batch_set_value = None + tf_init_weights = None tf_gradients = None tf_average_gv_list = None tf_average_tensor_list = None @@ -78,6 +79,9 @@ class nn(): # Optimizers TFBaseOptimizer = None TFRMSpropOptimizer = None + + # Models + PatchDiscriminator = None @staticmethod def initialize(device_config=None, floatx="float32", data_format="NHWC"): @@ -138,11 +142,13 @@ class nn(): from .layers import initialize_layers from .initializers import initialize_initializers from .optimizers import initialize_optimizers + from .models import initialize_models initialize_tensor_ops(nn) initialize_layers(nn) initialize_initializers(nn) initialize_optimizers(nn) + initialize_models(nn) if nn.tf_sess is None: nn.tf_sess = tf.Session(config=nn.tf_sess_config) diff --git a/core/leras/tensor_ops.py b/core/leras/tensor_ops.py index 895071d..db99f0e 100644 --- a/core/leras/tensor_ops.py +++ b/core/leras/tensor_ops.py @@ -29,7 +29,28 @@ def initialize_tensor_ops(nn): nn.tf_sess.run(assign_ops, feed_dict=feed_dict) nn.tf_batch_set_value = tf_batch_set_value + def tf_init_weights(weights): + ops = [] + ca_tuples_w = [] + ca_tuples = [] + for w in weights: + initializer = w.initializer + for input in initializer.inputs: + if "_cai_" in input.name: + ca_tuples_w.append (w) + ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) ) + break + else: + ops.append (initializer) + + if len(ops) != 0: + nn.tf_sess.run (ops) + + if len(ca_tuples) != 0: + nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] ) + nn.tf_init_weights = tf_init_weights + def tf_gradients ( loss, vars ): grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True ) gv = [*zip(grads,vars)] diff --git a/main.py b/main.py index 71af4c7..b09c2fe 100644 --- a/main.py +++ b/main.py @@ -201,23 +201,23 @@ if __name__ == "__main__": def process_merge(arguments): osex.set_process_lowest_prio() - kwargs = {'model_class_name' : arguments.model_name, - 'saved_models_path' : Path(arguments.model_dir), - 'training_data_src_path' : Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None, - 'force_model_name' : arguments.force_model_name, - 'input_path' : Path(arguments.input_dir), - 'output_path' : Path(arguments.output_dir), - 'aligned_path' : Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None, - 'cpu_only' : arguments.cpu_only, - 'force_gpu_idxs' : arguments.force_gpu_idxs, - } from mainscripts import Merger - Merger.main (**kwargs) + Merger.main ( model_class_name = arguments.model_name, + saved_models_path = Path(arguments.model_dir), + training_data_src_path = Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None, + force_model_name = arguments.force_model_name, + input_path = Path(arguments.input_dir), + output_path = Path(arguments.output_dir), + output_mask_path = Path(arguments.output_mask_dir), + aligned_path = Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None, + force_gpu_idxs = arguments.force_gpu_idxs, + cpu_only = arguments.cpu_only) p = subparsers.add_parser( "merge", help="Merger") p.add_argument('--training-data-src-dir', action=fixPathAction, dest="training_data_src_dir", default=None, help="(optional, may be required by some models) Dir of extracted SRC faceset.") p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the merged files will be stored.") + p.add_argument('--output-mask-dir', required=True, action=fixPathAction, dest="output_mask_dir", help="Output mask directory. This is where the mask files will be stored.") p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", default=None, help="Aligned directory. This is where the extracted of dst faces stored.") p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") p.add_argument('--model', required=True, dest="model_name", choices=pathex.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Model class name.") @@ -268,13 +268,14 @@ if __name__ == "__main__": def process_videoed_video_from_sequence(arguments): osex.set_process_lowest_prio() from mainscripts import VideoEd - VideoEd.video_from_sequence (arguments.input_dir, - arguments.output_file, - arguments.reference_file, - arguments.ext, - arguments.fps, - arguments.bitrate, - arguments.lossless) + VideoEd.video_from_sequence (input_dir = arguments.input_dir, + output_file = arguments.output_file, + reference_file = arguments.reference_file, + ext = arguments.ext, + fps = arguments.fps, + bitrate = arguments.bitrate, + include_audio = arguments.include_audio, + lossless = arguments.lossless) p = videoed_parser.add_parser( "video-from-sequence", help="Make video from image sequence.") p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.") @@ -283,7 +284,9 @@ if __name__ == "__main__": p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.") p.add_argument('--fps', type=int, dest="fps", default=None, help="FPS of output file. Overwritten by reference-file.") p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.") + p.add_argument('--include-audio', action="store_true", dest="include_audio", default=False, help="Include audio from reference file.") p.add_argument('--lossless', action="store_true", dest="lossless", default=False, help="PNG codec.") + p.set_defaults(func=process_videoed_video_from_sequence) def process_labelingtool_edit_mask(arguments): diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index c9e014b..2786156 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -297,7 +297,18 @@ class ExtractSubprocessor(Subprocessor): if not cpu_only: if type == 'landmarks-manual': devices = [devices.get_best_device()] - result = [ (device.index, 'GPU', device.name, device.total_mem_gb) for device in devices ] + + result = [] + + for device in devices: + count = 1 + + if count == 1: + result += [ (device.index, 'GPU', device.name, device.total_mem_gb) ] + else: + for i in range(count): + result += [ (device.index, 'GPU', f"{device.name} #{i}", device.total_mem_gb) ] + return result else: if type == 'landmarks-manual': diff --git a/mainscripts/Merger.py b/mainscripts/Merger.py index f036b50..4c9eb56 100644 --- a/mainscripts/Merger.py +++ b/mainscripts/Merger.py @@ -39,6 +39,7 @@ class MergeSubprocessor(Subprocessor): self.frame_info = frame_info self.next_temporal_frame_infos = next_temporal_frame_infos self.output_filepath = None + self.output_mask_filepath = None self.idx = None self.cfg = None @@ -54,6 +55,7 @@ class MergeSubprocessor(Subprocessor): frame_info=None, next_temporal_frame_infos=None, output_filepath=None, + output_mask_filepath=None, need_return_image = False): self.idx = idx self.cfg = cfg @@ -61,6 +63,7 @@ class MergeSubprocessor(Subprocessor): self.frame_info = frame_info self.next_temporal_frame_infos = next_temporal_frame_infos self.output_filepath = output_filepath + self.output_mask_filepath = output_mask_filepath self.need_return_image = need_return_image if self.need_return_image: @@ -123,35 +126,22 @@ class MergeSubprocessor(Subprocessor): cfg.superres_func = self.superres_func frame_info = pf.frame_info - filepath = frame_info.filepath - landmarks_list = frame_info.landmarks_list - output_filepath = pf.output_filepath - need_return_image = pf.need_return_image + if len(frame_info.landmarks_list) == 0: + self.log_info (f'no faces found for {filepath.name}, copying without faces') - if len(landmarks_list) == 0: - self.log_info ( 'no faces found for %s, copying without faces' % (filepath.name) ) + img_bgr = cv2_imread(filepath) + imagelib.normalize_channels(img_bgr, 3) + cv2_imwrite (pf.output_filepath, img_bgr) + h,w,c = img_bgr.shape - if cfg.export_mask_alpha: - img_bgr = cv2_imread(filepath) - h,w,c = img_bgr.shape - if c == 1: - img_bgr = np.repeat(img_bgr, 3, -1) - if c == 3: - img_bgr = np.concatenate ([img_bgr, np.zeros((h,w,1), dtype=img_bgr.dtype) ], axis=-1) + img_mask = np.zeros( (h,w,1), dtype=img_bgr.dtype) + cv2_imwrite (pf.output_mask_filepath, img_mask) - cv2_imwrite (output_filepath, img_bgr) - else: - if filepath.suffix == '.png': - shutil.copy ( str(filepath), str(output_filepath) ) - else: - img_bgr = cv2_imread(filepath) - cv2_imwrite (output_filepath, img_bgr) + if pf.need_return_image: + pf.image = np.concatenate ([img_bgr, img_mask], axis=-1) - if need_return_image: - img_bgr = cv2_imread(filepath) - pf.image = img_bgr else: if cfg.type == MergerConfig.TYPE_MASKED: cfg.fanseg_input_size = self.fanseg_input_size @@ -172,10 +162,10 @@ class MergeSubprocessor(Subprocessor): pf.frame_info, pf.next_temporal_frame_infos ) - if output_filepath is not None and final_img is not None: - cv2_imwrite (output_filepath, final_img ) + cv2_imwrite (pf.output_filepath, final_img[...,0:3] ) + cv2_imwrite (pf.output_mask_filepath, final_img[...,3:4] ) - if need_return_image: + if pf.need_return_image: pf.image = final_img return pf @@ -186,7 +176,7 @@ class MergeSubprocessor(Subprocessor): return pf.frame_info.filepath #override - def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, frames_root_path, output_path, model_iter): + def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, frames_root_path, output_path, output_mask_path, model_iter): if len (frames) == 0: raise ValueError ("len (frames) == 0") @@ -226,6 +216,7 @@ class MergeSubprocessor(Subprocessor): self.frames_root_path = frames_root_path self.output_path = output_path + self.output_mask_path = output_mask_path self.model_iter = model_iter self.prefetch_frame_count = self.process_count = min(6,multiprocessing.cpu_count()) @@ -305,12 +296,17 @@ class MergeSubprocessor(Subprocessor): for filename in pathex.get_image_paths(self.output_path): #remove all images in output_path Path(filename).unlink() + for filename in pathex.get_image_paths(self.output_mask_path): #remove all images in output_mask_path + Path(filename).unlink() + + frames[0].cfg = self.merger_config.copy() for i in range( len(self.frames) ): frame = self.frames[i] frame.idx = i - frame.output_filepath = self.output_path / ( frame.frame_info.filepath.stem + '.png' ) + frame.output_filepath = self.output_path / ( frame.frame_info.filepath.stem + '.png' ) + frame.output_mask_filepath = self.output_mask_path / ( frame.frame_info.filepath.stem + '.png' ) #override def process_info_generator(self): @@ -353,9 +349,6 @@ class MergeSubprocessor(Subprocessor): '3' : lambda cfg,shift_pressed: cfg.set_mode(3), '4' : lambda cfg,shift_pressed: cfg.set_mode(4), '5' : lambda cfg,shift_pressed: cfg.set_mode(5), - '6' : lambda cfg,shift_pressed: cfg.set_mode(6), - '7' : lambda cfg,shift_pressed: cfg.set_mode(7), - '8' : lambda cfg,shift_pressed: cfg.set_mode(8), 'q' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(1 if not shift_pressed else 5), 'a' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(-1 if not shift_pressed else -5), 'w' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(1 if not shift_pressed else 5), @@ -379,7 +372,6 @@ class MergeSubprocessor(Subprocessor): 'x' : lambda cfg,shift_pressed: cfg.toggle_mask_mode(), 'c' : lambda cfg,shift_pressed: cfg.toggle_color_transfer_mode(), 'v' : lambda cfg,shift_pressed: cfg.toggle_super_resolution_mode(), - 'b' : lambda cfg,shift_pressed: cfg.toggle_export_mask_alpha(), 'n' : lambda cfg,shift_pressed: cfg.toggle_sharpen_mode(), } self.masked_keys = list(self.masked_keys_funcs.keys()) @@ -393,6 +385,7 @@ class MergeSubprocessor(Subprocessor): for frame in self.frames: frame.output_filepath = None + frame.output_mask_filepath = None frame.image = None session_data = { @@ -435,12 +428,19 @@ class MergeSubprocessor(Subprocessor): io.log_info (cur_frame.cfg.to_string( cur_frame.frame_info.filepath.name) ) if cur_frame.image is None: - cur_frame.image = cv2_imread ( cur_frame.output_filepath) - if cur_frame.image is None: + image = cv2_imread (cur_frame.output_filepath) + image_mask = cv2_imread (cur_frame.output_mask_filepath) + if image is None or image_mask is None: # unable to read? recompute then cur_frame.is_done = False cur_frame.is_shown = False - self.main_screen.set_image(cur_frame.image) + else: + image_mask = imagelib.normalize_channels(image_mask, 1) + cur_frame.image = np.concatenate([image, image_mask], -1) + + if cur_frame.is_done: + self.main_screen.set_image(cur_frame.image) + else: self.main_screen.set_waiting_icon(True) @@ -510,6 +510,8 @@ class MergeSubprocessor(Subprocessor): self.screen_manager.get_current().diff_scale(-0.1) elif chr_key == '=': self.screen_manager.get_current().diff_scale(0.1) + elif chr_key == 'b': + self.screen_manager.get_current().toggle_show_checker_board() if go_prev_frame: if cur_frame is None or cur_frame.is_done: @@ -607,6 +609,7 @@ class MergeSubprocessor(Subprocessor): frame_info=frame.frame_info, next_temporal_frame_infos=frame.next_temporal_frame_infos, output_filepath=frame.output_filepath, + output_mask_filepath=frame.output_mask_filepath, need_return_image=True ) return None @@ -621,6 +624,7 @@ def main (model_class_name=None, force_model_name=None, input_path=None, output_path=None, + output_mask_path=None, aligned_path=None, force_gpu_idxs=None, cpu_only=None): @@ -634,6 +638,9 @@ def main (model_class_name=None, if not output_path.exists(): output_path.mkdir(parents=True, exist_ok=True) + if not output_mask_path.exists(): + output_mask_path.mkdir(parents=True, exist_ok=True) + if not saved_models_path.exists(): io.log_err('Model directory not found. Please ensure it exists.') return @@ -783,6 +790,7 @@ def main (model_class_name=None, frames = frames, frames_root_path = input_path, output_path = output_path, + output_mask_path = output_mask_path, model_iter = model.get_iter() ).run() diff --git a/mainscripts/MergerScreen/MergerScreen.py b/mainscripts/MergerScreen/MergerScreen.py index d105a86..6773cbd 100644 --- a/mainscripts/MergerScreen/MergerScreen.py +++ b/mainscripts/MergerScreen/MergerScreen.py @@ -30,6 +30,7 @@ class Screen(object): self.scale = 1 self.force_update = True self.is_first_appear = True + self.show_checker_board = False self.last_screen_shape = (480,640,3) self.checkerboard_image = None @@ -39,6 +40,10 @@ class Screen(object): def set_waiting_icon(self, b): self.waiting_icon = b + def toggle_show_checker_board(self): + self.show_checker_board = not self.show_checker_board + self.force_update = True + def set_image(self, img): if not img is self.image: self.force_update = True @@ -85,11 +90,14 @@ class Screen(object): screen = cv2.resize ( screen, ( int(w*self.scale), int(h*self.scale) ) ) if c == 4: - if self.checkerboard_image is None or self.checkerboard_image.shape[0:2] != screen.shape[0:2]: - self.checkerboard_image = ScreenAssets.build_checkerboard_a(screen.shape) + if not self.show_checker_board: + screen = screen[...,0:3] + else: + if self.checkerboard_image is None or self.checkerboard_image.shape[0:2] != screen.shape[0:2]: + self.checkerboard_image = ScreenAssets.build_checkerboard_a(screen.shape) - screen = screen[...,0:3]*0.75 + 64*self.checkerboard_image*(1- (screen[...,3:4].astype(np.float32)/255.0) ) - screen = screen.astype(np.uint8) + screen = screen[...,0:3]*0.75 + 64*self.checkerboard_image*(1- (screen[...,3:4].astype(np.float32)/255.0) ) + screen = screen.astype(np.uint8) io.show_image(self.scrn_manager.wnd_name, screen) diff --git a/mainscripts/VideoEd.py b/mainscripts/VideoEd.py index 7ac7b48..5120d2a 100644 --- a/mainscripts/VideoEd.py +++ b/mainscripts/VideoEd.py @@ -68,7 +68,7 @@ def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, b if bitrate is None: bitrate = max (1, io.input_int ("Bitrate of output file in MB/s", 25) ) - kwargs = {"c:v": "libx265", + kwargs = {"c:v": "libx264", "b:v": "%dM" %(bitrate), "pix_fmt": "yuv420p", } @@ -113,7 +113,7 @@ def denoise_image_sequence( input_dir, ext=None, factor=None ): except: io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) ) -def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, lossless=None ): +def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, include_audio=False, lossless=None ): input_path = Path(input_dir) output_file_path = Path(output_file) reference_file_path = Path(reference_file) if reference_file is not None else None @@ -177,7 +177,7 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, output_args = [i_in] - if ref_in_a is not None: + if include_audio and ref_in_a is not None: output_args += [ref_in_a] output_args += [str (output_file_path)] @@ -185,18 +185,21 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, output_kwargs = {} if lossless: - output_kwargs.update ({"c:v": "png" + output_kwargs.update ({"c:v": "libx264", + "crf": "0", + "pix_fmt": "yuv420p", }) else: - output_kwargs.update ({"c:v": "libx265", + output_kwargs.update ({"c:v": "libx264", "b:v": "%dM" %(bitrate), "pix_fmt": "yuv420p", }) - - output_kwargs.update ({"c:a": "aac", - "b:a": "192k", - "ar" : "48000" - }) + + if include_audio and ref_in_a is not None: + output_kwargs.update ({"c:a": "aac", + "b:a": "192k", + "ar" : "48000" + }) job = ( ffmpeg.output(*output_args, **output_kwargs).overwrite_output() ) diff --git a/mainscripts/gfx/help_merger_masked.jpg b/mainscripts/gfx/help_merger_masked.jpg index d7598d2..926cf28 100644 Binary files a/mainscripts/gfx/help_merger_masked.jpg and b/mainscripts/gfx/help_merger_masked.jpg differ diff --git a/mainscripts/gfx/help_merger_masked_source.psd b/mainscripts/gfx/help_merger_masked_source.psd index 6b1e387..800cb71 100644 Binary files a/mainscripts/gfx/help_merger_masked_source.psd and b/mainscripts/gfx/help_merger_masked_source.psd differ diff --git a/merger/MergeMasked.py b/merger/MergeMasked.py index 56cb7ca..50c2a78 100644 --- a/merger/MergeMasked.py +++ b/merger/MergeMasked.py @@ -13,8 +13,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) if cfg.mode == 'original': - if cfg.export_mask_alpha: - img_bgr = np.concatenate ( [img_bgr, img_face_mask_a], -1 ) return img_bgr, img_face_mask_a out_img = img_bgr.copy() @@ -106,29 +104,10 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 #get rid of noise if 'raw' in cfg.mode: - face_corner_pts = np.array ([ [0,0], [output_size-1,0], [output_size-1,output_size-1], [0,output_size-1] ], dtype=np.float32) - square_mask = np.zeros(img_bgr.shape, dtype=np.float32) - cv2.fillConvexPoly(square_mask, \ - LandmarksProcessor.transform_points (face_corner_pts, face_output_mat, invert=True ).astype(np.int), \ - (1,1,1) ) - if cfg.mode == 'raw-rgb': - out_merging_mask = square_mask - - if cfg.mode == 'raw-rgb' or cfg.mode == 'raw-rgb-mask': out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT ) - - if cfg.mode == 'raw-rgb-mask': - out_img = np.concatenate ( [out_img, np.expand_dims (img_face_mask_aaa[:,:,0],-1)], -1 ) - out_merging_mask = square_mask - - elif cfg.mode == 'raw-mask-only': - out_img = img_face_mask_aaa out_merging_mask = img_face_mask_aaa - elif cfg.mode == 'raw-predicted-only': - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT ) - out_merging_mask = square_mask - + out_img = np.clip (out_img, 0.0, 1.0 ) else: #averaging [lenx, leny, maskx, masky] by grayscale gradients of upscaled mask @@ -176,14 +155,12 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img if 'seamless' not in cfg.mode and cfg.color_transfer_mode != 0: if cfg.color_transfer_mode == 1: #rct - prd_face_bgr = imagelib.reinhard_color_transfer ( (prd_face_bgr*255).astype(np.uint8), - (dst_face_bgr*255).astype(np.uint8), + prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( prd_face_bgr*255, 0, 255).astype(np.uint8), + np.clip( dst_face_bgr*255, 0, 255).astype(np.uint8), source_mask=prd_face_mask_a, target_mask=prd_face_mask_a) prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) - elif cfg.color_transfer_mode == 2: #lct prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr) - prd_face_bgr = np.clip( prd_face_bgr, 0.0, 1.0) elif cfg.color_transfer_mode == 3: #mkl prd_face_bgr = imagelib.color_transfer_mkl (prd_face_bgr, dst_face_bgr) elif cfg.color_transfer_mode == 4: #mkl-m @@ -270,7 +247,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img out_face_bgr = np.clip( out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0) elif cfg.color_transfer_mode == 2: #lct out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr) - out_face_bgr = np.clip( out_face_bgr, 0.0, 1.0) elif cfg.color_transfer_mode == 3: #mkl out_face_bgr = imagelib.color_transfer_mkl (out_face_bgr, dst_face_bgr) elif cfg.color_transfer_mode == 4: #mkl-m @@ -356,7 +332,6 @@ def MergeMasked (predictor_func, predictor_input_shape, cfg, frame_info): final_img = final_img*(1-merging_mask) + img*merging_mask final_mask = np.clip (final_mask + merging_mask, 0, 1 ) - if cfg.export_mask_alpha: - final_img = np.concatenate ( [final_img, final_mask], -1) + final_img = np.concatenate ( [final_img, final_mask], -1) return (final_img*255).astype(np.uint8) \ No newline at end of file diff --git a/merger/MergerConfig.py b/merger/MergerConfig.py index c638e65..cff0439 100644 --- a/merger/MergerConfig.py +++ b/merger/MergerConfig.py @@ -101,10 +101,7 @@ mode_dict = {0:'original', 2:'hist-match', 3:'seamless', 4:'seamless-hist-match', - 5:'raw-rgb', - 6:'raw-rgb-mask', - 7:'raw-mask-only', - 8:'raw-predicted-only'} + 5:'raw-rgb',} mode_str_dict = {} @@ -144,7 +141,6 @@ class MergerConfigMasked(MergerConfig): image_denoise_power = 0, bicubic_degrade_power = 0, color_degrade_power = 0, - export_mask_alpha = False, **kwargs ): @@ -158,6 +154,9 @@ class MergerConfigMasked(MergerConfig): self.clip_hborder_mask_per = clip_hborder_mask_per #default changeable params + if mode not in mode_str_dict: + mode = mode_dict[1] + self.mode = mode self.masked_hist_match = masked_hist_match self.hist_match_threshold = hist_match_threshold @@ -170,7 +169,6 @@ class MergerConfigMasked(MergerConfig): self.image_denoise_power = image_denoise_power self.bicubic_degrade_power = bicubic_degrade_power self.color_degrade_power = color_degrade_power - self.export_mask_alpha = export_mask_alpha def copy(self): return copy.copy(self) @@ -217,9 +215,6 @@ class MergerConfigMasked(MergerConfig): def add_bicubic_degrade_power(self, diff): self.bicubic_degrade_power = np.clip ( self.bicubic_degrade_power+diff, 0, 100) - def toggle_export_mask_alpha(self): - self.export_mask_alpha = not self.export_mask_alpha - def ask_settings(self): s = """Choose mode: \n""" for key in mode_dict.keys(): @@ -267,7 +262,6 @@ class MergerConfigMasked(MergerConfig): self.image_denoise_power = np.clip ( io.input_int ("Choose image degrade by denoise power", 0, add_info="0..500"), 0, 500) self.bicubic_degrade_power = np.clip ( io.input_int ("Choose image degrade by bicubic rescale power", 0, add_info="0..100"), 0, 100) self.color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image", 0, add_info="0..100"), 0, 100) - self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask?", False) io.log_info ("") @@ -287,8 +281,7 @@ class MergerConfigMasked(MergerConfig): self.color_transfer_mode == other.color_transfer_mode and \ self.image_denoise_power == other.image_denoise_power and \ self.bicubic_degrade_power == other.bicubic_degrade_power and \ - self.color_degrade_power == other.color_degrade_power and \ - self.export_mask_alpha == other.export_mask_alpha + self.color_degrade_power == other.color_degrade_power return False @@ -324,8 +317,7 @@ class MergerConfigMasked(MergerConfig): if 'raw' not in self.mode: r += (f"""image_denoise_power: {self.image_denoise_power}\n""" f"""bicubic_degrade_power: {self.bicubic_degrade_power}\n""" - f"""color_degrade_power: {self.color_degrade_power}\n""" - f"""export_mask_alpha: {self.export_mask_alpha}\n""") + f"""color_degrade_power: {self.color_degrade_power}\n""") r += "================" diff --git a/models/ModelBase.py b/models/ModelBase.py index 1102d64..855f823 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -113,8 +113,8 @@ class ModelBase(object): self.model_name = saved_models_names[model_idx] else: - self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "noname") - + self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "new") + self.model_name = self.model_name.replace('_', ' ') break self.model_name = self.model_name + '_' + self.model_class_name @@ -159,8 +159,8 @@ class ModelBase(object): ##### io.input_skip_pending() - self.on_initialize_options() + if self.is_first_run(): # save as default options only for first run model initialize self.default_options_path.write_bytes( pickle.dumps (self.options) ) @@ -173,6 +173,8 @@ class ModelBase(object): self.on_initialize() self.options['batch_size'] = self.batch_size + + if self.is_training: self.preview_history_path = self.saved_models_path / ( f'{self.get_model_name()}_history' ) self.autobackups_path = self.saved_models_path / ( f'{self.get_model_name()}_autobackups' ) @@ -275,7 +277,7 @@ class ModelBase(object): def ask_batch_size(self, suggest_batch_size=None): default_batch_size = self.load_or_def_option('batch_size', suggest_batch_size or self.batch_size) - self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) + self.options['batch_size'] = self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) #overridable diff --git a/models/Model_Quick96/Model.py b/models/Model_Quick96/Model.py index a373f79..91d874b 100644 --- a/models/Model_Quick96/Model.py +++ b/models/Model_Quick96/Model.py @@ -14,7 +14,7 @@ class QModel(ModelBase): #override def on_initialize(self): device_config = nn.getCurrentDeviceConfig() - self.model_data_format = "NCHW" if len(device_config.devices) != 0 else "NHWC" + self.model_data_format = "NCHW" if len(device_config.devices) != 0 and not self.is_debug() else "NHWC" nn.initialize(data_format=self.model_data_format) tf = nn.tf @@ -167,9 +167,9 @@ class QModel(ModelBase): models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0' optimizer_vars_on_cpu = models_opt_device=='/CPU:0' - input_nc = 3 - output_nc = 3 - bgr_shape = nn.get4Dshape(resolution,resolution,input_nc) + input_ch = 3 + output_ch = 3 + bgr_shape = nn.get4Dshape(resolution,resolution,input_ch) mask_shape = nn.get4Dshape(resolution,resolution,1) lowest_dense_res = resolution // 16 @@ -189,7 +189,7 @@ class QModel(ModelBase): # Initializing model classes with tf.device (models_opt_device): - self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, name='encoder') + self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, name='encoder') encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape)) self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, d_ch=d_dims, name='inter') @@ -228,7 +228,7 @@ class QModel(ModelBase): gpu_src_losses = [] gpu_dst_losses = [] gpu_src_dst_loss_gvs = [] - + for gpu_id in range(gpu_count): with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ): batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu ) @@ -262,7 +262,7 @@ class QModel(ModelBase): gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur) - gpu_target_srcmasked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src + gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src @@ -271,8 +271,8 @@ class QModel(ModelBase): gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur) - gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) - gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3]) + gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) + gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3]) gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] ) gpu_dst_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1]) @@ -282,8 +282,8 @@ class QModel(ModelBase): gpu_src_losses += [gpu_src_loss] gpu_dst_losses += [gpu_dst_loss] - gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss - gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ] + gpu_G_loss = gpu_src_loss + gpu_dst_loss + gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_G_loss, self.src_dst_trainable_weights ) ] # Average losses and gradients, and create optimizer update ops @@ -362,10 +362,9 @@ class QModel(ModelBase): training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path() training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path() - cpu_count = multiprocessing.cpu_count() - + cpu_count = min(multiprocessing.cpu_count(), 8) src_generators_count = cpu_count // 2 - dst_generators_count = cpu_count - src_generators_count + dst_generators_count = cpu_count // 2 self.set_training_data_generators ([ SampleGeneratorFace(training_data_src_path, debug=self.is_debug(), batch_size=self.get_batch_size(), @@ -396,18 +395,19 @@ class QModel(ModelBase): #override def onTrainOneIter(self): + if self.get_iter() % 3 == 0 and self.last_samples is not None: ( (warped_src, target_src, target_srcm), \ (warped_dst, target_dst, target_dstm) ) = self.last_samples - src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm, - target_dst, target_dst, target_dstm) + warped_src = target_src + warped_dst = target_dst else: samples = self.last_samples = self.generate_next_samples() ( (warped_src, target_src, target_srcm), \ (warped_dst, target_dst, target_dstm) ) = samples - src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, - warped_dst, target_dst, target_dstm) + src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, + warped_dst, target_dst, target_dstm) return ( ('src_loss', src_loss), ('dst_loss', dst_loss), ) @@ -440,8 +440,7 @@ class QModel(ModelBase): return result def predictor_func (self, face=None): - face = face[None,...] - face = nn.to_data_format(face, self.model_data_format, "NHWC") + face = nn.to_data_format(face[None,...], self.model_data_format, "NHWC") bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x, "NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ] mask = mask_dst_dstm[0] * mask_src_dstm[0] diff --git a/models/Model_SAEHD/Model.py b/models/Model_SAEHD/Model.py index aa01172..412588b 100644 --- a/models/Model_SAEHD/Model.py +++ b/models/Model_SAEHD/Model.py @@ -33,7 +33,9 @@ class SAEHDModel(ModelBase): default_archi = self.options['archi'] = self.load_or_def_option('archi', 'dfhd') default_ae_dims = self.options['ae_dims'] = self.load_or_def_option('ae_dims', 256) default_e_dims = self.options['e_dims'] = self.load_or_def_option('e_dims', 64) - default_d_dims = self.options['d_dims'] = self.load_or_def_option('d_dims', 64) + + default_d_dims = 48 if self.options['archi'] == 'dfhd' else 64 + default_d_dims = self.options['d_dims'] = self.load_or_def_option('d_dims', default_d_dims) default_d_mask_dims = default_d_dims // 3 default_d_mask_dims += default_d_mask_dims % 2 @@ -43,6 +45,7 @@ class SAEHDModel(ModelBase): default_learn_mask = self.options['learn_mask'] = self.load_or_def_option('learn_mask', True) default_lr_dropout = self.options['lr_dropout'] = self.load_or_def_option('lr_dropout', False) default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True) + default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0) default_true_face_power = self.options['true_face_power'] = self.load_or_def_option('true_face_power', 0.0) default_face_style_power = self.options['face_style_power'] = self.load_or_def_option('face_style_power', 0.0) default_bg_style_power = self.options['bg_style_power'] = self.load_or_def_option('bg_style_power', 0.0) @@ -87,13 +90,15 @@ class SAEHDModel(ModelBase): self.options['lr_dropout'] = io.input_bool ("Use learning rate dropout", default_lr_dropout, help_message="When the face is trained enough, you can enable this option to get extra sharpness for less amount of iterations.") self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness for less amount of iterations.") + self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 10.0", help_message="Train the network in Generative Adversarial manner. Accelerates the speed of training. Forces the neural network to learn small details of the face. You can enable/disable this option at any time. Typical value is 1.0"), 0.0, 10.0 ) + if 'df' in self.options['archi']: - self.options['true_face_power'] = np.clip ( io.input_number (" 'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 ) + self.options['true_face_power'] = np.clip ( io.input_number ("'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Typical value is 0.01 . Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 ) else: self.options['true_face_power'] = 0.0 self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 ) self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.") self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.") @@ -110,7 +115,7 @@ class SAEHDModel(ModelBase): #override def on_initialize(self): device_config = nn.getCurrentDeviceConfig() - self.model_data_format = "NCHW" if len(device_config.devices) != 0 else "NHWC" + self.model_data_format = "NCHW" if len(device_config.devices) != 0 and not self.is_debug() else "NHWC" nn.initialize(floatx="float16" if self.options['use_float16'] else "float32", data_format=self.model_data_format) tf = nn.tf @@ -129,17 +134,15 @@ class SAEHDModel(ModelBase): def on_build(self, *args, **kwargs ): self.conv1 = nn.Conv2D( self.in_ch, - self.out_ch // (4 if self.subpixel else 1), - kernel_size=self.kernel_size, - strides=1 if self.subpixel else 2, - padding='SAME', dilations=self.dilations, kernel_initializer=conv_kernel_initializer) + self.out_ch // (4 if self.subpixel else 1), + kernel_size=self.kernel_size, + strides=1 if self.subpixel else 2, + padding='SAME', dilations=self.dilations, kernel_initializer=conv_kernel_initializer) def forward(self, x): x = self.conv1(x) - if self.subpixel: x = nn.tf_space_to_depth(x, 2) - if self.use_activator: x = tf.nn.leaky_relu(x, 0.1) return x @@ -332,7 +335,7 @@ class SAEHDModel(ModelBase): device_config = nn.getCurrentDeviceConfig() devices = device_config.devices - resolution = self.options['resolution'] + self.resolution = resolution = self.options['resolution'] learn_mask = self.options['learn_mask'] archi = self.options['archi'] ae_dims = self.options['ae_dims'] @@ -341,15 +344,17 @@ class SAEHDModel(ModelBase): d_mask_dims = self.options['d_mask_dims'] self.pretrain = self.options['pretrain'] + self.gan_power = gan_power = self.options['gan_power'] if not self.pretrain else 0.0 + masked_training = True models_opt_on_gpu = False if len(devices) != 1 else self.options['models_opt_on_gpu'] models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0' optimizer_vars_on_cpu = models_opt_device=='/CPU:0' - input_nc = 3 - output_nc = 3 - bgr_shape = nn.get4Dshape(resolution,resolution,input_nc) + input_ch = 3 + output_ch = 3 + bgr_shape = nn.get4Dshape(resolution,resolution,input_ch) mask_shape = nn.get4Dshape(resolution,resolution,1) lowest_dense_res = resolution // 16 @@ -370,7 +375,7 @@ class SAEHDModel(ModelBase): # Initializing model classes with tf.device (models_opt_device): if 'df' in archi: - self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder') + self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, is_hd='hd' in archi, name='encoder') encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape)) self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, name='inter') @@ -386,11 +391,11 @@ class SAEHDModel(ModelBase): if self.is_training: if self.options['true_face_power'] != 0: - self.dis = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' ) - self.model_filename_list += [ [self.dis, 'dis.npy'] ] + self.code_discriminator = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' ) + self.model_filename_list += [ [self.code_discriminator, 'code_discriminator.npy'] ] elif 'liae' in archi: - self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder') + self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, is_hd='hd' in archi, name='encoder') encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape)) self.inter_AB = Inter(in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims*2, name='inter_AB') @@ -407,6 +412,12 @@ class SAEHDModel(ModelBase): [self.decoder , 'decoder.npy'] ] if self.is_training: + if gan_power != 0: + self.D_src = nn.PatchDiscriminator(patch_size=resolution//16, in_ch=output_ch, base_ch=512, name="D_src") + self.D_dst = nn.PatchDiscriminator(patch_size=resolution//16, in_ch=output_ch, base_ch=512, name="D_dst") + self.model_filename_list += [ [self.D_src, 'D_src.npy'] ] + self.model_filename_list += [ [self.D_dst, 'D_dst.npy'] ] + # Initialize optimizers lr=5e-5 lr_dropout = 0.3 if self.options['lr_dropout'] else 1.0 @@ -424,9 +435,14 @@ class SAEHDModel(ModelBase): self.src_dst_opt.initialize_variables (self.src_dst_all_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu) if self.options['true_face_power'] != 0: - self.D_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_opt') - self.D_opt.initialize_variables ( self.dis.get_weights(), vars_on_cpu=optimizer_vars_on_cpu) - self.model_filename_list += [ (self.D_opt, 'D_opt.npy') ] + self.D_code_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_code_opt') + self.D_code_opt.initialize_variables ( self.code_discriminator.get_weights(), vars_on_cpu=optimizer_vars_on_cpu) + self.model_filename_list += [ (self.D_code_opt, 'D_code_opt.npy') ] + + if gan_power != 0: + self.D_src_dst_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_src_dst_opt') + self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights()+self.D_dst.get_weights(), vars_on_cpu=optimizer_vars_on_cpu) + self.model_filename_list += [ (self.D_src_dst_opt, 'D_src_dst_opt.npy') ] if self.is_training: # Adjust batch size for multiple GPU @@ -445,9 +461,9 @@ class SAEHDModel(ModelBase): gpu_src_losses = [] gpu_dst_losses = [] - gpu_src_dst_loss_gvs = [] - gpu_D_loss_gvs = [] - + gpu_G_loss_gvs = [] + gpu_D_code_loss_gvs = [] + gpu_D_src_dst_loss_gvs = [] for gpu_id in range(gpu_count): with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ): @@ -497,8 +513,8 @@ class SAEHDModel(ModelBase): gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur) - gpu_target_srcmasked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src - gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst + gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src + gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst @@ -506,8 +522,8 @@ class SAEHDModel(ModelBase): gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur) - gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) - gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3]) + gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) + gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3]) if learn_mask: gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] ) @@ -528,26 +544,48 @@ class SAEHDModel(ModelBase): gpu_src_losses += [gpu_src_loss] gpu_dst_losses += [gpu_dst_loss] - gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss + gpu_G_loss = gpu_src_loss + gpu_dst_loss + + def DLoss(labels,logits): + return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits), axis=[1,2,3]) if self.options['true_face_power'] != 0: - def DLoss(labels,logits): - return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits), axis=[1,2,3]) - - gpu_src_code_d = self.dis( gpu_src_code ) - gpu_src_code_d_ones = tf.ones_like(gpu_src_code_d) + gpu_src_code_d = self.code_discriminator( gpu_src_code ) + gpu_src_code_d_ones = tf.ones_like (gpu_src_code_d) gpu_src_code_d_zeros = tf.zeros_like(gpu_src_code_d) - gpu_dst_code_d = self.dis( gpu_dst_code ) + gpu_dst_code_d = self.code_discriminator( gpu_dst_code ) gpu_dst_code_d_ones = tf.ones_like(gpu_dst_code_d) - gpu_src_dst_loss += self.options['true_face_power']*DLoss(gpu_src_code_d_ones, gpu_src_code_d) + gpu_G_loss += self.options['true_face_power']*DLoss(gpu_src_code_d_ones, gpu_src_code_d) - gpu_D_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \ - DLoss(gpu_src_code_d_zeros, gpu_src_code_d) ) * 0.5 + gpu_D_code_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \ + DLoss(gpu_src_code_d_zeros, gpu_src_code_d) ) * 0.5 - gpu_D_loss_gvs += [ nn.tf_gradients (gpu_D_loss, self.dis.get_weights() ) ] + gpu_D_code_loss_gvs += [ nn.tf_gradients (gpu_D_code_loss, self.code_discriminator.get_weights() ) ] - gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ] + if gan_power != 0: + gpu_pred_src_src_d = self.D_src(gpu_pred_src_src_masked_opt) + gpu_pred_src_src_d_ones = tf.ones_like (gpu_pred_src_src_d) + gpu_pred_src_src_d_zeros = tf.zeros_like(gpu_pred_src_src_d) + gpu_target_src_d = self.D_src(gpu_target_src_masked_opt) + gpu_target_src_d_ones = tf.ones_like(gpu_target_src_d) + gpu_pred_dst_dst_d = self.D_dst(gpu_pred_dst_dst_masked_opt) + gpu_pred_dst_dst_d_ones = tf.ones_like (gpu_pred_dst_dst_d) + gpu_pred_dst_dst_d_zeros = tf.zeros_like(gpu_pred_dst_dst_d) + gpu_target_dst_d = self.D_dst(gpu_target_dst_masked_opt) + gpu_target_dst_d_ones = tf.ones_like(gpu_target_dst_d) + + gpu_D_src_dst_loss = (DLoss(gpu_target_src_d_ones , gpu_target_src_d) + \ + DLoss(gpu_pred_src_src_d_zeros, gpu_pred_src_src_d) ) * 0.5 + \ + (DLoss(gpu_target_dst_d_ones , gpu_target_dst_d) + \ + DLoss(gpu_pred_dst_dst_d_zeros, gpu_pred_dst_dst_d) ) * 0.5 + + gpu_D_src_dst_loss_gvs += [ nn.tf_gradients (gpu_D_src_dst_loss, self.D_src.get_weights()+self.D_dst.get_weights() ) ] + + gpu_G_loss += gan_power*(DLoss(gpu_pred_src_src_d_ones, gpu_pred_src_src_d) + DLoss(gpu_pred_dst_dst_d_ones, gpu_pred_dst_dst_d)) + + + gpu_G_loss_gvs += [ nn.tf_gradients ( gpu_G_loss, self.src_dst_trainable_weights ) ] # Average losses and gradients, and create optimizer update ops @@ -558,15 +596,15 @@ class SAEHDModel(ModelBase): pred_src_srcm = nn.tf_concat(gpu_pred_src_srcm_list, 0) pred_dst_dstm = nn.tf_concat(gpu_pred_dst_dstm_list, 0) pred_src_dstm = nn.tf_concat(gpu_pred_src_dstm_list, 0) - src_loss = nn.tf_average_tensor_list(gpu_src_losses) dst_loss = nn.tf_average_tensor_list(gpu_dst_losses) - src_dst_loss_gv = nn.tf_average_gv_list (gpu_src_dst_loss_gvs) - src_dst_loss_gv_op = self.src_dst_opt.get_update_op (src_dst_loss_gv ) + src_dst_loss_gv_op = self.src_dst_opt.get_update_op (nn.tf_average_gv_list (gpu_G_loss_gvs)) if self.options['true_face_power'] != 0: - D_loss_gv = nn.tf_average_gv_list(gpu_D_loss_gvs) - D_loss_gv_op = self.D_opt.get_update_op (D_loss_gv ) + D_loss_gv_op = self.D_code_opt.get_update_op (nn.tf_average_gv_list(gpu_D_code_loss_gvs)) + + if gan_power != 0: + src_D_src_dst_loss_gv_op = self.D_src_dst_opt.get_update_op (nn.tf_average_gv_list(gpu_D_src_dst_loss_gvs) ) # Initializing training and view functions @@ -590,6 +628,17 @@ class SAEHDModel(ModelBase): nn.tf_sess.run ([D_loss_gv_op], feed_dict={self.warped_src: warped_src, self.warped_dst: warped_dst}) self.D_train = D_train + if gan_power != 0: + def D_src_dst_train(warped_src, target_src, target_srcm, \ + warped_dst, target_dst, target_dstm): + nn.tf_sess.run ([src_D_src_dst_loss_gv_op], feed_dict={self.warped_src :warped_src, + self.target_src :target_src, + self.target_srcm:target_srcm, + self.warped_dst :warped_dst, + self.target_dst :target_dst, + self.target_dstm:target_dstm}) + self.D_src_dst_train = D_src_dst_train + if learn_mask: def AE_view(warped_src, warped_dst): return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm], @@ -663,12 +712,11 @@ class SAEHDModel(ModelBase): t_img_warped = t.IMG_WARPED_TRANSFORMED if self.options['random_warp'] else t.IMG_TRANSFORMED - cpu_count = multiprocessing.cpu_count() - + cpu_count = min(multiprocessing.cpu_count(), 8) src_generators_count = cpu_count // 2 + dst_generators_count = cpu_count // 2 if self.options['ct_mode'] != 'none': src_generators_count = int(src_generators_count * 1.5) - dst_generators_count = cpu_count - src_generators_count self.set_training_data_generators ([ SampleGeneratorFace(training_data_src_path, random_ct_samples_path=random_ct_samples_path, debug=self.is_debug(), batch_size=self.get_batch_size(), @@ -706,6 +754,9 @@ class SAEHDModel(ModelBase): if self.options['true_face_power'] != 0 and not self.pretrain: self.D_train (warped_src, warped_dst) + if self.gan_power != 0: + self.D_src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm) + return ( ('src_loss', src_loss), ('dst_loss', dst_loss), ) #override @@ -721,7 +772,8 @@ class SAEHDModel(ModelBase): target_srcm, target_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm, target_dstm] )] - n_samples = min(4, self.get_batch_size() ) + n_samples = min(4, self.get_batch_size(), 800 // self.resolution ) + result = [] st = [] for i in range(n_samples): @@ -742,8 +794,7 @@ class SAEHDModel(ModelBase): return result def predictor_func (self, face=None): - face = face[None,...] - face = nn.to_data_format(face, self.model_data_format, "NHWC") + face = nn.to_data_format(face[None,...], self.model_data_format, "NHWC") if self.options['learn_mask']: bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ] diff --git a/samplelib/PackedFaceset.py b/samplelib/PackedFaceset.py index 986574b..867fcd9 100644 --- a/samplelib/PackedFaceset.py +++ b/samplelib/PackedFaceset.py @@ -3,7 +3,7 @@ import shutil import struct from pathlib import Path -import samplelib.SampleHost +import samplelib.SampleLoader from core.interact import interact as io from samplelib import Sample from core import pathex @@ -34,7 +34,7 @@ class PackedFaceset(): else: image_paths = pathex.get_image_paths(samples_path) - samples = samplelib.SampleHost.load_face_samples(image_paths) + samples = samplelib.SampleLoader.load_face_samples(image_paths) samples_len = len(samples) samples_configs = [] diff --git a/samplelib/SampleGeneratorFace.py b/samplelib/SampleGeneratorFace.py index 195e45a..696a4f2 100644 --- a/samplelib/SampleGeneratorFace.py +++ b/samplelib/SampleGeneratorFace.py @@ -9,7 +9,7 @@ import numpy as np from core import mplib from core.joblib import SubprocessGenerator, ThisThreadGenerator from facelib import LandmarksProcessor -from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, +from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, SampleType) @@ -39,7 +39,7 @@ class SampleGeneratorFace(SampleGeneratorBase): else: self.generators_count = max(1, generators_count) - samples = SampleHost.load (SampleType.FACE, self.samples_path) + samples = SampleLoader.load (SampleType.FACE, self.samples_path) self.samples_len = len(samples) if self.samples_len == 0: @@ -48,7 +48,7 @@ class SampleGeneratorFace(SampleGeneratorBase): index_host = mplib.IndexHost(self.samples_len) if random_ct_samples_path is not None: - ct_samples = SampleHost.load (SampleType.FACE, random_ct_samples_path) + ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path) ct_index_host = mplib.IndexHost( len(ct_samples) ) else: ct_samples = None diff --git a/samplelib/SampleGeneratorFacePerson.py b/samplelib/SampleGeneratorFacePerson.py index d691341..d6be2d8 100644 --- a/samplelib/SampleGeneratorFacePerson.py +++ b/samplelib/SampleGeneratorFacePerson.py @@ -8,7 +8,7 @@ import numpy as np from core import mplib from core.joblib import SubprocessGenerator, ThisThreadGenerator from facelib import LandmarksProcessor -from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, +from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, SampleType) @@ -33,7 +33,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase): raise NotImplementedError("Currently SampleGeneratorFacePerson is not implemented.") - samples_host = SampleHost.mp_host (SampleType.FACE, self.samples_path) + samples_host = SampleLoader.mp_host (SampleType.FACE, self.samples_path) samples = samples_host.get_list() self.samples_len = len(samples) @@ -98,7 +98,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase): @staticmethod def get_person_id_max_count(samples_path): - return SampleHost.get_person_id_max_count(samples_path) + return SampleLoader.get_person_id_max_count(samples_path) """ if self.person_id_mode==1: diff --git a/samplelib/SampleGeneratorFaceTemporal.py b/samplelib/SampleGeneratorFaceTemporal.py index d5eb754..7a9215f 100644 --- a/samplelib/SampleGeneratorFaceTemporal.py +++ b/samplelib/SampleGeneratorFaceTemporal.py @@ -9,7 +9,7 @@ import numpy as np from core import mplib from core.joblib import SubprocessGenerator, ThisThreadGenerator from facelib import LandmarksProcessor -from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, +from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, SampleType) @@ -31,7 +31,7 @@ class SampleGeneratorFaceTemporal(SampleGeneratorBase): else: self.generators_count = generators_count - samples = SampleHost.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path) + samples = SampleLoader.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path) samples_len = len(samples) if samples_len == 0: raise ValueError('No training data provided.') diff --git a/samplelib/SampleGeneratorImageTemporal.py b/samplelib/SampleGeneratorImageTemporal.py index 62dbdfc..4f86e43 100644 --- a/samplelib/SampleGeneratorImageTemporal.py +++ b/samplelib/SampleGeneratorImageTemporal.py @@ -4,7 +4,7 @@ import cv2 import numpy as np from core.joblib import SubprocessGenerator, ThisThreadGenerator -from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, +from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor, SampleType) @@ -22,7 +22,7 @@ class SampleGeneratorImageTemporal(SampleGeneratorBase): self.sample_process_options = sample_process_options self.output_sample_types = output_sample_types - self.samples = SampleHost.load (SampleType.IMAGE, self.samples_path) + self.samples = SampleLoader.load (SampleType.IMAGE, self.samples_path) self.generator_samples = [ self.samples ] self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] if self.debug else \ diff --git a/samplelib/SampleHost.py b/samplelib/SampleLoader.py similarity index 95% rename from samplelib/SampleHost.py rename to samplelib/SampleLoader.py index ef399f8..80dbe67 100644 --- a/samplelib/SampleHost.py +++ b/samplelib/SampleLoader.py @@ -14,7 +14,7 @@ from facelib import FaceType, LandmarksProcessor from .Sample import Sample, SampleType -class SampleHost: +class SampleLoader: samples_cache = dict() @staticmethod def get_person_id_max_count(samples_path): @@ -33,7 +33,7 @@ class SampleHost: @staticmethod def load(sample_type, samples_path): - samples_cache = SampleHost.samples_cache + samples_cache = SampleLoader.samples_cache if str(samples_path) not in samples_cache.keys(): samples_cache[str(samples_path)] = [None]*SampleType.QTY @@ -55,12 +55,12 @@ class SampleHost: io.log_info (f"Loaded {len(result)} packed faces from {samples_path}") if result is None: - result = SampleHost.load_face_samples( pathex.get_image_paths(samples_path) ) + result = SampleLoader.load_face_samples( pathex.get_image_paths(samples_path) ) samples[sample_type] = result elif sample_type == SampleType.FACE_TEMPORAL_SORTED: - result = SampleHost.load (SampleType.FACE, samples_path) - result = SampleHost.upgradeToFaceTemporalSortedSamples(result) + result = SampleLoader.load (SampleType.FACE, samples_path) + result = SampleLoader.upgradeToFaceTemporalSortedSamples(result) samples[sample_type] = result return samples[sample_type] diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py index e42a205..b6d1afe 100644 --- a/samplelib/SampleProcessor.py +++ b/samplelib/SampleProcessor.py @@ -101,7 +101,6 @@ class SampleProcessor(object): for sample in samples: sample_bgr = sample.load_bgr() ct_sample_bgr = None - ct_sample_mask = None h,w,c = sample_bgr.shape is_face_sample = sample.landmarks is not None @@ -117,10 +116,6 @@ class SampleProcessor(object): resolution = opts.get('resolution', 0) types = opts.get('types', [] ) - border_replicate = opts.get('border_replicate', True) - random_sub_res = opts.get('random_sub_res', 0) - normalize_std_dev = opts.get('normalize_std_dev', False) - normalize_vgg = opts.get('normalize_vgg', False) motion_blur = opts.get('motion_blur', None) gaussian_blur = opts.get('gaussian_blur', None) @@ -131,7 +126,6 @@ class SampleProcessor(object): img_type = SPTF.NONE target_face_type = SPTF.NONE - face_mask_type = SPTF.NONE mode_type = SPTF.NONE for t in types: if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END: @@ -140,6 +134,12 @@ class SampleProcessor(object): target_face_type = t elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END: mode_type = t + + if mode_type == SPTF.MODE_M and not is_face_sample: + raise ValueError("MODE_M applicable only for face samples") + + can_warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) + can_transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) if img_type == SPTF.NONE: raise ValueError ('expected IMG_ type') @@ -148,7 +148,7 @@ class SampleProcessor(object): l = sample.landmarks l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 ) l = np.clip(l, 0.0, 1.0) - img = l + out_sample = l elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: pitch_yaw_roll = sample.get_pitch_yaw_roll() @@ -156,57 +156,42 @@ class SampleProcessor(object): yaw = -yaw if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: - pitch = np.clip( (pitch / math.pi) / 2.0 + 1.0, 0, 1) - yaw = np.clip( (yaw / math.pi) / 2.0 + 1.0, 0, 1) - roll = np.clip( (roll / math.pi) / 2.0 + 1.0, 0, 1) + pitch = np.clip( (pitch / math.pi) / 2.0 + 0.5, 0, 1) + yaw = np.clip( (yaw / math.pi) / 2.0 + 0.5, 0, 1) + roll = np.clip( (roll / math.pi) / 2.0 + 0.5, 0, 1) - img = (pitch, yaw, roll) + out_sample = (pitch, yaw, roll) else: if mode_type == SPTF.NONE: raise ValueError ('expected MODE_ type') - - def do_transform(img, mask): - warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED) - transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED) - flip = img_type != SPTF.IMG_WARPED - - img = imagelib.warp_by_params (params, img, warp, transform, flip, border_replicate) - if mask is not None: - mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False) - if len(mask.shape) == 2: - mask = mask[...,np.newaxis] - - - return img, mask - - img = sample_bgr - - ### Prepare a mask - mask = None - if is_face_sample: + + need_img = mode_type != SPTF.MODE_M + need_mask = mode_type == SPTF.MODE_M + + if need_mask: if sample.eyebrows_expand_mod is not None: - mask = LandmarksProcessor.get_image_hull_mask (img.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod ) + mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod ) else: - mask = LandmarksProcessor.get_image_hull_mask (img.shape, sample.landmarks) + mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks) if sample.ie_polys is not None: sample.ie_polys.overlay_mask(mask) - ################## + if need_img: + img = sample_bgr + if motion_blur is not None: + chance, mb_max_size = motion_blur + chance = np.clip(chance, 0, 100) - if motion_blur is not None: - chance, mb_max_size = motion_blur - chance = np.clip(chance, 0, 100) + if np.random.randint(100) < chance: + img = imagelib.LinearMotionBlur (img, np.random.randint( mb_max_size )+1, np.random.randint(360) ) - if np.random.randint(100) < chance: - img = imagelib.LinearMotionBlur (img, np.random.randint( mb_max_size )+1, np.random.randint(360) ) + if gaussian_blur is not None: + chance, kernel_max_size = gaussian_blur + chance = np.clip(chance, 0, 100) - if gaussian_blur is not None: - chance, kernel_max_size = gaussian_blur - chance = np.clip(chance, 0, 100) - - if np.random.randint(100) < chance: - img = cv2.GaussianBlur(img, ( np.random.randint( kernel_max_size )*2+1 ,) *2 , 0) + if np.random.randint(100) < chance: + img = cv2.GaussianBlur(img, ( np.random.randint( kernel_max_size )*2+1 ,) *2 , 0) if is_face_sample and target_face_type != SPTF.NONE: target_ft = SampleProcessor.SPTF_FACETYPE_TO_FACETYPE[target_face_type] @@ -214,99 +199,78 @@ class SampleProcessor(object): raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, target_ft) ) if sample.face_type == FaceType.MARK_ONLY: - #first warp to target facetype - img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC ) - mask = cv2.warpAffine( mask, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC ) - #then apply transforms - img, mask = do_transform (img, mask) - img = np.concatenate( (img, mask ), -1 ) - img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) + mat = LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]) + + if need_img: + img = cv2.warpAffine( img, mat, flags=cv2.INTER_CUBIC ) + img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True) + img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) + + if need_mask: + mask = cv2.warpAffine( mask, mat, flags=cv2.INTER_CUBIC ) + mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False) + mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None] else: - img, mask = do_transform (img, mask) - mat = LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft) - img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC ) - mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC ) - img = np.concatenate( (img, mask[...,None] ), -1 ) + + if need_img: + img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True) + img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC ) + + if need_mask: + mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False) + mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )[...,None] else: - img, mask = do_transform (img, mask) - img = np.concatenate( (img, mask ), -1 ) - img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) + if need_img: + img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True) + img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC ) + + if need_mask: + mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False) + mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None] - if random_sub_res != 0: - sub_size = resolution - random_sub_res - rnd_state = np.random.RandomState (sample_rnd_seed+random_sub_res) - start_x = rnd_state.randint(sub_size+1) - start_y = rnd_state.randint(sub_size+1) - img = img[start_y:start_y+sub_size,start_x:start_x+sub_size,:] - - img = np.clip(img, 0, 1).astype(np.float32) - img_bgr = img[...,0:3] - img_mask = img[...,3:4] - - if ct_mode is not None and ct_sample is not None: - if ct_sample_bgr is None: - ct_sample_bgr = ct_sample.load_bgr() - - ct_sample_bgr_resized = cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) - - if ct_mode == 'lct': - img_bgr = imagelib.linear_color_transfer (img_bgr, ct_sample_bgr_resized) - img_bgr = np.clip( img_bgr, 0.0, 1.0) - elif ct_mode == 'rct': - img_bgr = imagelib.reinhard_color_transfer ( np.clip( (img_bgr*255).astype(np.uint8), 0, 255), - np.clip( (ct_sample_bgr_resized*255).astype(np.uint8), 0, 255) ) - img_bgr = np.clip( img_bgr.astype(np.float32) / 255.0, 0.0, 1.0) - elif ct_mode == 'mkl': - img_bgr = imagelib.color_transfer_mkl (img_bgr, ct_sample_bgr_resized) - elif ct_mode == 'idt': - img_bgr = imagelib.color_transfer_idt (img_bgr, ct_sample_bgr_resized) - elif ct_mode == 'sot': - img_bgr = imagelib.color_transfer_sot (img_bgr, ct_sample_bgr_resized) - img_bgr = np.clip( img_bgr, 0.0, 1.0) - - if normalize_std_dev: - img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) ) - elif normalize_vgg: - img_bgr = np.clip(img_bgr*255, 0, 255) - img_bgr[:,:,0] -= 103.939 - img_bgr[:,:,1] -= 116.779 - img_bgr[:,:,2] -= 123.68 - - if mode_type == SPTF.MODE_BGR: - img = img_bgr - elif mode_type == SPTF.MODE_BGR_SHUFFLE: - rnd_state = np.random.RandomState (sample_rnd_seed) - img = np.take (img_bgr, rnd_state.permutation(img_bgr.shape[-1]), axis=-1) - - elif mode_type == SPTF.MODE_BGR_RANDOM_HSV_SHIFT: - rnd_state = np.random.RandomState (sample_rnd_seed) - hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV) - h, s, v = cv2.split(hsv) - h = (h + rnd_state.randint(360) ) % 360 - s = np.clip ( s + rnd_state.random()-0.5, 0, 1 ) - v = np.clip ( v + rnd_state.random()-0.5, 0, 1 ) - hsv = cv2.merge([h, s, v]) - img = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 ) - elif mode_type == SPTF.MODE_G: - img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)[...,None] - elif mode_type == SPTF.MODE_GGG: - img = np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1) - elif mode_type == SPTF.MODE_M and is_face_sample: - img = img_mask + + if mode_type == SPTF.MODE_M: + out_sample = np.clip(mask, 0, 1).astype(np.float32) + else: + img = np.clip(img, 0, 1).astype(np.float32) + + if ct_mode is not None and ct_sample is not None: + if ct_sample_bgr is None: + ct_sample_bgr = ct_sample.load_bgr() + img = imagelib.color_transfer (ct_mode, + img, + cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) ) + + if mode_type == SPTF.MODE_BGR: + out_sample = img + elif mode_type == SPTF.MODE_BGR_SHUFFLE: + rnd_state = np.random.RandomState (sample_rnd_seed) + out_sample = np.take (img, rnd_state.permutation(img.shape[-1]), axis=-1) + elif mode_type == SPTF.MODE_BGR_RANDOM_HSV_SHIFT: + rnd_state = np.random.RandomState (sample_rnd_seed) + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + h, s, v = cv2.split(hsv) + h = (h + rnd_state.randint(360) ) % 360 + s = np.clip ( s + rnd_state.random()-0.5, 0, 1 ) + v = np.clip ( v + rnd_state.random()-0.5, 0, 1 ) + hsv = cv2.merge([h, s, v]) + out_sample = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 ) + elif mode_type == SPTF.MODE_G: + out_sample = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[...,None] + elif mode_type == SPTF.MODE_GGG: + out_sample = np.repeat ( np.expand_dims(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),-1), (3,), -1) + if not debug: if normalize_tanh: - img = np.clip (img * 2.0 - 1.0, -1.0, 1.0) - else: - img = np.clip (img, 0.0, 1.0) + out_sample = np.clip (out_sample * 2.0 - 1.0, -1.0, 1.0) if data_format == "NCHW": - img = np.transpose(img, (2,0,1) ) - - - outputs_sample.append ( img ) + out_sample = np.transpose(out_sample, (2,0,1) ) + + outputs_sample.append ( out_sample ) outputs += [outputs_sample] return outputs diff --git a/samplelib/__init__.py b/samplelib/__init__.py index 67630c5..e72ac11 100644 --- a/samplelib/__init__.py +++ b/samplelib/__init__.py @@ -1,6 +1,6 @@ from .Sample import Sample from .Sample import SampleType -from .SampleHost import SampleHost +from .SampleLoader import SampleLoader from .SampleProcessor import SampleProcessor from .SampleGeneratorBase import SampleGeneratorBase from .SampleGeneratorFace import SampleGeneratorFace