diff --git a/README.md b/README.md index 1794088..ba6b10e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ ## **DeepFaceLab** is a tool that utilizes deep learning to recognize and swap faces in pictures and videos. -Based on original FaceSwap repo. **Facesets** of FaceSwap or FakeApp are **not compatible** with this repo. You should to run extract again. - ### **Features**: - new models @@ -34,6 +32,8 @@ MTCNN produces less jitter. - standalone zero dependencies ready to work prebuilt binary for all windows versions, see below +### Warning: **Facesets** of FaceSwap or FakeApp are **not compatible** with this repo. You should to run extract again. + ### **Model types**: - **H64 (2GB+)** - half face with 64 resolution. It is as original FakeApp or FaceSwap, but with new TensorFlow 1.8 DSSIM Loss func and separated mask decoder + better ConverterMasked. for 2GB and 3GB VRAM model works in reduced mode. @@ -60,7 +60,7 @@ H128 asian face on blurry target: ![](https://github.com/iperov/DeepFaceLab/blob/master/doc/DF_Cage_0.jpg) -- **LIAEF128 (5GB+)** - new model. Result of combining DF, IAE, + experiments. Model tries to morph src face to dst, while keeping facial features of src face, but less agressive morphing. Model has problems with closed eyes recognizing. +- **LIAEF128 (5GB+)** - Less agressive Improved Autoencoder Fullface 128 model. Result of combining DF, IAE, + experiments. Model tries to morph src face to dst, while keeping facial features of src face, but less agressive morphing. Model has problems with closed eyes recognizing. LIAEF128 Cage: @@ -72,47 +72,10 @@ LIAEF128 Cage video: [![Watch the video](https://img.youtube.com/vi/mRsexePEVco/0.jpg)](https://www.youtube.com/watch?v=mRsexePEVco) -- **LIAEF128YAW (5GB+)** - currently testing. Useful when your src faceset has too many side faces vs dst faceset. It feeds NN by sorted samples by yaw. - -- **MIAEF128 (5GB+)** - as LIAEF128, but also it tries to match brightness/color features. - -MIAEF128 model diagramm: - -![](https://github.com/iperov/DeepFaceLab/blob/master/doc/MIAEF128_diagramm.png) - -MIAEF128 Ford success case: - -![](https://github.com/iperov/DeepFaceLab/blob/master/doc/MIAEF128_Ford_0.jpg) - -MIAEF128 Cage fail case: - -![](https://github.com/iperov/DeepFaceLab/blob/master/doc/MIAEF128_Cage_fail.jpg) - -- **AVATAR (4GB+)** - non GAN, 256x256 face controlling model. - -![](https://github.com/iperov/DeepFaceLab/blob/master/doc/AVATAR_Navalniy_0.jpg) - -Video: - -[![](https://img.youtube.com/vi/3M0E4QnWMqA/0.jpg)](https://www.youtube.com/watch?v=3M0E4QnWMqA) - -Usage: - -src - controllable face (Cage) - -dst - controller face (your face) - -converter --input-dir must contains *extracted dst faces* in sequence to be converted, its mean you can train on for example 1500 dst faces, but use for example 100 faces for convert. +- **UFM (4GB+)** - U-net Face Morpher model. If "match_style" option choosed, then this model tries to morph src face to target face and fill around face same background. UFM is result of combining modified U-Net, classic face autoencoder, DSSIM and style losses. ![](https://github.com/iperov/DeepFaceLab/blob/master/doc/DeepFaceLab_convertor_overview.png) -- Video comparison of different Cage facesets. -Vertical: 1 - mix of various Cage face shape and light conditions. 2,3,4 - without mix. -Horizontal: 1 - DF, 2 - LIAEF128. - -[![](https://img.youtube.com/vi/C1nFgrmtm_o/0.jpg)](https://youtu.be/C1nFgrmtm_o) - -Conclusion: **better not to mix and use only same shape faces with same light** ### **Sort tool**: @@ -164,6 +127,10 @@ CPU mode enabled by arg --cpu-only for all stages. Follow requirements-cpu.txt t Do not use DLIB extractor in CPU mode, its too slow. Only H64 model reasonable to train on home CPU. +### Mac/linux/docker script support. + +This repo supports only windows build of scripts. If you want to support mac/linux/docker - create such fork, it will be referenced here. + ### Prebuilt windows app: Windows 7,8,8.1,10 zero dependency (just install/update your GeForce Drivers) prebuilt DeepFaceLab (include GPU and CPU versions) can be downloaded from diff --git a/doc/AVATAR_Navalniy_0.jpg b/doc/AVATAR_Navalniy_0.jpg deleted file mode 100644 index 24b2f2d..0000000 Binary files a/doc/AVATAR_Navalniy_0.jpg and /dev/null differ diff --git a/doc/MIAEF128_Cage_fail.jpg b/doc/MIAEF128_Cage_fail.jpg deleted file mode 100644 index b3c7923..0000000 Binary files a/doc/MIAEF128_Cage_fail.jpg and /dev/null differ diff --git a/doc/MIAEF128_Ford_0.jpg b/doc/MIAEF128_Ford_0.jpg deleted file mode 100644 index 51c7f9e..0000000 Binary files a/doc/MIAEF128_Ford_0.jpg and /dev/null differ diff --git a/doc/MIAEF128_diagramm.png b/doc/MIAEF128_diagramm.png deleted file mode 100644 index a940d52..0000000 Binary files a/doc/MIAEF128_diagramm.png and /dev/null differ diff --git a/doc/merged-face.jpg b/doc/merged-face.jpg deleted file mode 100644 index 27ed67f..0000000 Binary files a/doc/merged-face.jpg and /dev/null differ diff --git a/main.py b/main.py index 5422627..aae3acd 100644 --- a/main.py +++ b/main.py @@ -4,7 +4,6 @@ import argparse from utils import Path_utils from utils import os_utils from pathlib import Path -import numpy as np if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 2): raise Exception("This program requires at least Python 3.2") @@ -68,7 +67,7 @@ if __name__ == "__main__": def process_train(arguments): if 'DFL_TARGET_EPOCH' in os.environ.keys(): - arguments.target_epoch = int ( os.environ['DFL_TARGET_EPOCH'] ) + arguments.session_target_epoch = int ( os.environ['DFL_TARGET_EPOCH'] ) if 'DFL_BATCH_SIZE' in os.environ.keys(): arguments.batch_size = int ( os.environ['DFL_BATCH_SIZE'] ) @@ -79,11 +78,12 @@ if __name__ == "__main__": training_data_dst_dir=arguments.training_data_dst_dir, model_path=arguments.model_dir, model_name=arguments.model_name, + ask_for_session_options = arguments.ask_for_session_options, debug = arguments.debug, #**options - batch_size = arguments.batch_size, - write_preview_history = arguments.write_preview_history, - target_epoch = arguments.target_epoch, + session_write_preview_history = arguments.session_write_preview_history, + session_target_epoch = arguments.session_target_epoch, + session_batch_size = arguments.session_batch_size, save_interval_min = arguments.save_interval_min, choose_worst_gpu = arguments.choose_worst_gpu, force_best_gpu_idx = arguments.force_best_gpu_idx, @@ -97,94 +97,21 @@ if __name__ == "__main__": train_parser.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of dst-set.") train_parser.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") train_parser.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - train_parser.add_argument('--write-preview-history', action="store_true", dest="write_preview_history", default=False, help="Enable write preview history.") - train_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug training.") - train_parser.add_argument('--batch-size', type=int, dest="batch_size", default=0, help="Model batch size. Default - auto. Environment variable: ODFS_BATCH_SIZE.") - train_parser.add_argument('--target-epoch', type=int, dest="target_epoch", default=0, help="Train until target epoch. Default - unlimited. Environment variable: ODFS_TARGET_EPOCH.") + train_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") + train_parser.add_argument('--ask-for-session-options', action="store_true", dest="ask_for_session_options", default=False, help="Ask to override session options.") + train_parser.add_argument('--session-write-preview-history', action="store_true", dest="session_write_preview_history", default=None, help="Enable write preview history for this session.") + train_parser.add_argument('--session-target-epoch', type=int, dest="session_target_epoch", default=0, help="Train until target epoch for this session. Default - unlimited. Environment variable to override: DFL_TARGET_EPOCH.") + train_parser.add_argument('--session-batch-size', type=int, dest="session_batch_size", default=0, help="Model batch size for this session. Default - auto. Environment variable to override: DFL_BATCH_SIZE.") train_parser.add_argument('--save-interval-min', type=int, dest="save_interval_min", default=10, help="Save interval in minutes. Default 10.") + train_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") + train_parser.add_argument('--force-gpu-idxs', type=str, dest="force_gpu_idxs", default=None, help="Override final GPU idxs. Example: 0,1,2.") + train_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="MultiGPU option (if model supports it). It will select only same best(worst) GPU models.") train_parser.add_argument('--choose-worst-gpu', action="store_true", dest="choose_worst_gpu", default=False, help="Choose worst GPU instead of best.") train_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best(worst).") - train_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="MultiGPU option. It will select only same best(worst) GPU models.") - train_parser.add_argument('--force-gpu-idxs', type=str, dest="force_gpu_idxs", default=None, help="Override final GPU idxs. Example: 0,1,2.") - train_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") train_parser.set_defaults (func=process_train) def process_convert(arguments): - if arguments.ask_for_params: - try: - mode = int ( input ("Choose mode: (1) hist match, (2) hist match bw, (3) seamless (default), (4) seamless hist match : ") ) - except: - mode = 3 - - if mode == 1: - arguments.mode = 'hist-match' - elif mode == 2: - arguments.mode = 'hist-match-bw' - elif mode == 3: - arguments.mode = 'seamless' - elif mode == 4: - arguments.mode = 'seamless-hist-match' - - if arguments.mode == 'hist-match' or arguments.mode == 'hist-match-bw': - try: - arguments.masked_hist_match = bool ( {"1":True,"0":False}[input("Masked hist match? [0 or 1] (default 1) : ").lower()] ) - except: - arguments.masked_hist_match = True - - if arguments.mode == 'hist-match' or arguments.mode == 'hist-match-bw' or arguments.mode == 'seamless-hist-match': - try: - hist_match_threshold = int ( input ("Hist match threshold. [0..255] (default - 255) : ") ) - arguments.hist_match_threshold = hist_match_threshold - except: - arguments.hist_match_threshold = 255 - - try: - arguments.use_predicted_mask = bool ( {"1":True,"0":False}[input("Use predicted mask? [0 or 1] (default 1) : ").lower()] ) - except: - arguments.use_predicted_mask = False - - try: - arguments.erode_mask_modifier = int ( input ("Choose erode mask modifier [-200..200] (default 0) : ") ) - except: - arguments.erode_mask_modifier = 0 - - try: - arguments.blur_mask_modifier = int ( input ("Choose blur mask modifier [-200..200] (default 0) : ") ) - except: - arguments.blur_mask_modifier = 0 - - if arguments.mode == 'seamless' or arguments.mode == 'seamless-hist-match': - try: - arguments.seamless_erode_mask_modifier = int ( input ("Choose seamless erode mask modifier [-100..100] (default 0) : ") ) - except: - arguments.seamless_erode_mask_modifier = 0 - - try: - arguments.output_face_scale_modifier = int ( input ("Choose output face scale modifier [-50..50] (default 0) : ") ) - except: - arguments.output_face_scale_modifier = 0 - - try: - arguments.transfercolor = bool ( {"1":True,"0":False}[input("Transfer color from dst face to converted final face? [0 or 1] (default 0) : ").lower()] ) - except: - arguments.transfercolor = False - - try: - arguments.final_image_color_degrade_power = int ( input ("Degrade color power of final image [0..100] (default 0) : ") ) - except: - arguments.final_image_color_degrade_power = 0 - - try: - arguments.alpha = bool ( {"1":True,"0":False}[input("Export png with alpha channel? [0..1] (default 0) : ").lower()] ) - except: - arguments.alpha = False - - arguments.erode_mask_modifier = np.clip ( int(arguments.erode_mask_modifier), -200, 200) - arguments.blur_mask_modifier = np.clip ( int(arguments.blur_mask_modifier), -200, 200) - arguments.seamless_erode_mask_modifier = np.clip ( int(arguments.seamless_erode_mask_modifier), -100, 100) - arguments.output_face_scale_modifier = np.clip ( int(arguments.output_face_scale_modifier), -50, 50) - from mainscripts import Converter Converter.main ( input_dir=arguments.input_dir, @@ -193,17 +120,6 @@ if __name__ == "__main__": model_dir=arguments.model_dir, model_name=arguments.model_name, debug = arguments.debug, - mode = arguments.mode, - masked_hist_match = arguments.masked_hist_match, - hist_match_threshold = arguments.hist_match_threshold, - use_predicted_mask = arguments.use_predicted_mask, - erode_mask_modifier = arguments.erode_mask_modifier, - blur_mask_modifier = arguments.blur_mask_modifier, - seamless_erode_mask_modifier = arguments.seamless_erode_mask_modifier, - output_face_scale_modifier = arguments.output_face_scale_modifier, - final_image_color_degrade_power = arguments.final_image_color_degrade_power, - transfercolor = arguments.transfercolor, - alpha = arguments.alpha, force_best_gpu_idx = arguments.force_best_gpu_idx, cpu_only = arguments.cpu_only ) @@ -214,18 +130,6 @@ if __name__ == "__main__": convert_parser.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored. Not used in AVATAR model.") convert_parser.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") convert_parser.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - convert_parser.add_argument('--ask-for-params', action="store_true", dest="ask_for_params", default=False, help="Ask for params.") - convert_parser.add_argument('--mode', dest="mode", choices=['seamless','hist-match', 'hist-match-bw','seamless-hist-match'], default='seamless', help="Face overlaying mode. Seriously affects result.") - convert_parser.add_argument('--masked-hist-match', type=str2bool, nargs='?', const=True, default=True, help="True or False. Excludes background for hist match. Default - True.") - convert_parser.add_argument('--hist-match-threshold', type=int, dest="hist_match_threshold", default=255, help="Hist match threshold. Decrease to hide artifacts of hist match. Valid range [0..255]. Default 255") - convert_parser.add_argument('--use-predicted-mask', action="store_true", dest="use_predicted_mask", default=True, help="Use predicted mask by model. Default - True.") - convert_parser.add_argument('--erode-mask-modifier', type=int, dest="erode_mask_modifier", default=0, help="Automatic erode mask modifier. Valid range [-200..200].") - convert_parser.add_argument('--blur-mask-modifier', type=int, dest="blur_mask_modifier", default=0, help="Automatic blur mask modifier. Valid range [-200..200].") - convert_parser.add_argument('--seamless-erode-mask-modifier', type=int, dest="seamless_erode_mask_modifier", default=0, help="Automatic seamless erode mask modifier. Valid range [-200..200].") - convert_parser.add_argument('--output-face-scale-modifier', type=int, dest="output_face_scale_modifier", default=0, help="Output face scale modifier. Valid range [-50..50].") - convert_parser.add_argument('--final-image-color-degrade-power', type=int, dest="final_image_color_degrade_power", default=0, help="Degrades colors of final image to hide face problems. Valid range [0..100].") - convert_parser.add_argument('--transfercolor', action="store_true", dest="transfercolor", default=False, help="Transfer color from dst face to converted final face.") - convert_parser.add_argument('--alpha', action="store_true", dest="alpha", default=False, help="Embeds alpha channel of face mask to final PNG. Used in manual composing video by editors such as Sony Vegas or After Effects.") convert_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.") convert_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best.") convert_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.") diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py index 8ee530a..227083f 100644 --- a/mainscripts/Converter.py +++ b/mainscripts/Converter.py @@ -1,4 +1,6 @@ -import traceback +import sys +import os +import traceback from pathlib import Path from utils import Path_utils import cv2 @@ -30,7 +32,9 @@ class model_process_predictor(object): return obj['result'] time.sleep(0.005) -def model_process(model_name, model_dir, in_options, sq, cq): +def model_process(stdin_fd, model_name, model_dir, in_options, sq, cq): + sys.stdin = os.fdopen(stdin_fd) + try: model_path = Path(model_dir) @@ -152,7 +156,7 @@ class ConvertSubprocessor(SubprocessorBase): image = (cv2.imread(str(filename_path)) / 255.0).astype(np.float32) if self.converter.get_mode() == ConverterBase.MODE_IMAGE: - image = self.converter.convert_image(image, self.debug) + image = self.converter.convert_image(image, None, self.debug) if self.debug: for img in image: cv2.imshow ('Debug convert', img ) @@ -229,7 +233,7 @@ def main (input_dir, output_dir, model_dir, model_name, aligned_dir=None, **in_o model_sq = multiprocessing.Queue() model_cq = multiprocessing.Queue() model_lock = multiprocessing.Lock() - model_p = multiprocessing.Process(target=model_process, args=(model_name, model_dir, in_options, model_sq, model_cq)) + model_p = multiprocessing.Process(target=model_process, args=( sys.stdin.fileno(), model_name, model_dir, in_options, model_sq, model_cq)) model_p.start() while True: @@ -266,7 +270,39 @@ def main (input_dir, output_dir, model_dir, model_name, aligned_dir=None, **in_o alignments[ source_filename_stem ].append (dflpng.get_source_landmarks()) - + + #interpolate landmarks + #from facelib import LandmarksProcessor + #from facelib import FaceType + #a = sorted(alignments.keys()) + #a_len = len(a) + # + #box_pts = 3 + #box = np.ones(box_pts)/box_pts + #for i in range( a_len ): + # if i >= box_pts and i <= a_len-box_pts-1: + # af0 = alignments[ a[i] ][0] ##first face + # m0 = LandmarksProcessor.get_transform_mat (af0, 256, face_type=FaceType.FULL) + # + # points = [] + # + # for j in range(-box_pts, box_pts+1): + # af = alignments[ a[i+j] ][0] ##first face + # m = LandmarksProcessor.get_transform_mat (af, 256, face_type=FaceType.FULL) + # p = LandmarksProcessor.transform_points (af, m) + # points.append (p) + # + # points = np.array(points) + # points_len = len(points) + # t_points = np.transpose(points, [1,0,2]) + # + # p1 = np.array ( [ int(np.convolve(x[:,0], box, mode='same')[points_len//2]) for x in t_points ] ) + # p2 = np.array ( [ int(np.convolve(x[:,1], box, mode='same')[points_len//2]) for x in t_points ] ) + # + # new_points = np.concatenate( [np.expand_dims(p1,-1),np.expand_dims(p2,-1)], -1 ) + # + # alignments[ a[i] ][0] = LandmarksProcessor.transform_points (new_points, m0, True).astype(np.int32) + files_processed, faces_processed = ConvertSubprocessor ( converter = converter.copy_and_set_predictor( model_process_predictor(model_sq,model_cq,model_lock) ), input_path_image_paths = Path_utils.get_image_paths(input_path), diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py index 6f7ccef..8b45dcd 100644 --- a/mainscripts/Trainer.py +++ b/mainscripts/Trainer.py @@ -11,7 +11,7 @@ from utils import Path_utils from utils import image_utils import cv2 -def trainerThread (input_queue, output_queue, training_data_src_dir, training_data_dst_dir, model_path, model_name, save_interval_min=10, debug=False, target_epoch=0, **in_options): +def trainerThread (input_queue, output_queue, training_data_src_dir, training_data_dst_dir, model_path, model_name, save_interval_min=10, debug=False, **in_options): while True: try: @@ -29,8 +29,6 @@ def trainerThread (input_queue, output_queue, training_data_src_dir, training_da if not model_path.exists(): model_path.mkdir(exist_ok=True) - - import models model = models.import_model(model_name)( @@ -40,7 +38,7 @@ def trainerThread (input_queue, output_queue, training_data_src_dir, training_da debug=debug, **in_options) - is_reached_goal = (target_epoch > 0 and model.get_epoch() >= target_epoch) + is_reached_goal = model.is_reached_epoch_goal() def model_save(): if not debug and not is_reached_goal: @@ -58,11 +56,11 @@ def trainerThread (input_queue, output_queue, training_data_src_dir, training_da if model.is_first_run(): model_save() - if target_epoch != 0: + if model.get_target_epoch() != 0: if is_reached_goal: print ('Model already trained to target epoch. You can use preview.') else: - print('Starting. Target epoch: %d. Press "Enter" to stop training and save model.' % (target_epoch) ) + print('Starting. Target epoch: %d. Press "Enter" to stop training and save model.' % ( model.get_target_epoch() ) ) else: print('Starting. Press "Enter" to stop training and save model.') @@ -73,7 +71,7 @@ def trainerThread (input_queue, output_queue, training_data_src_dir, training_da loss_string = model.train_one_epoch() print (loss_string, end='\r') - if target_epoch != 0 and model.get_epoch() >= target_epoch: + if model.get_target_epoch() != 0 and model.is_reached_epoch_goal(): print ('Reached target epoch.') model_save() is_reached_goal = True diff --git a/models/ConverterBase.py b/models/ConverterBase.py index 250c7d0..b03e5a8 100644 --- a/models/ConverterBase.py +++ b/models/ConverterBase.py @@ -11,11 +11,12 @@ class ConverterBase(object): #overridable def __init__(self, predictor): self.predictor = predictor - + #overridable def get_mode(self): #MODE_FACE calls convert_face - #MODE_IMAGE calls convert_image + #MODE_IMAGE calls convert_image without landmarks + #MODE_IMAGE_WITH_LANDMARKS calls convert_image with landmarks return ConverterBase.MODE_FACE #overridable diff --git a/models/ConverterImage.py b/models/ConverterImage.py index 2cbfac3..3155a2f 100644 --- a/models/ConverterImage.py +++ b/models/ConverterImage.py @@ -34,7 +34,7 @@ class ConverterImage(ConverterBase): self.predictor ( np.zeros ( (self.predictor_input_size, self.predictor_input_size,3), dtype=np.float32) ) #override - def convert_image (self, img_bgr, debug): + def convert_image (self, img_bgr, img_landmarks, debug): img_size = img_bgr.shape[1], img_bgr.shape[0] predictor_input_bgr = cv2.resize ( img_bgr, (self.predictor_input_size, self.predictor_input_size), cv2.INTER_LANCZOS4 ) diff --git a/models/ConverterMasked.py b/models/ConverterMasked.py index dcf6f62..e5689aa 100644 --- a/models/ConverterMasked.py +++ b/models/ConverterMasked.py @@ -4,47 +4,53 @@ from facelib import FaceType import cv2 import numpy as np from utils import image_utils - +from utils.console_utils import * + class ConverterMasked(ConverterBase): #override def __init__(self, predictor, predictor_input_size=0, output_size=0, - face_type=FaceType.FULL, - clip_border_mask_per = 0, - masked_hist_match = True, - hist_match_threshold = 255, - mode='seamless', - use_predicted_mask = True, - erode_mask_modifier=0, - blur_mask_modifier=0, - seamless_erode_mask_modifier=0, - output_face_scale_modifier=0.0, - transfercolor=False, - final_image_color_degrade_power=0, - alpha=False, + face_type=FaceType.FULL, + base_erode_mask_modifier = 0, + base_blur_mask_modifier = 0, + **in_options): super().__init__(predictor) - self.predictor_input_size = predictor_input_size self.output_size = output_size - self.face_type = face_type - self.use_predicted_mask = use_predicted_mask - self.clip_border_mask_per = clip_border_mask_per - self.masked_hist_match = masked_hist_match - self.hist_match_threshold = hist_match_threshold - self.mode = mode - self.erode_mask_modifier = erode_mask_modifier - self.blur_mask_modifier = blur_mask_modifier - self.seamless_erode_mask_modifier = seamless_erode_mask_modifier - self.output_face_scale = np.clip(1.0 + output_face_scale_modifier*0.01, 0.5, 1.5) - self.transfercolor = transfercolor + self.face_type = face_type self.TFLabConverter = None - self.final_image_color_degrade_power = np.clip (final_image_color_degrade_power, 0, 100) - self.alpha = alpha - + + mode = input_int ("Choose mode: (1) overlay, (2) hist match, (3) hist match bw, (4) seamless (default), (5) seamless hist match : ", 4) + self.mode = {1:'overlay', + 2:'hist-match', + 3:'hist-match-bw', + 4:'seamless', + 5:'seamless-hist-match'}.get (mode, 'seamless') + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw': + self.masked_hist_match = input_bool("Masked hist match? (y/n skip:y) : ", True) + + if self.mode == 'hist-match' or self.mode == 'hist-match-bw' or self.mode == 'seamless-hist-match': + self.hist_match_threshold = np.clip ( input_int("Hist match threshold [0..255] (skip:255) : ", 255), 0, 255) + + self.use_predicted_mask = input_bool("Use predicted mask? (y/n skip:y) : ", True) + self.erode_mask_modifier = base_erode_mask_modifier + np.clip ( input_int ("Choose erode mask modifier [-200..200] (skip:0) : ", 0), -200, 200) + self.blur_mask_modifier = base_blur_mask_modifier + np.clip ( input_int ("Choose blur mask modifier [-200..200] (skip:0) : ", 0), -200, 200) + + self.seamless_erode_mask_modifier = 0 + if self.mode == 'seamless' or self.mode == 'seamless-hist-match': + self.seamless_erode_mask_modifier = np.clip ( input_int ("Choose seamless erode mask modifier [-100..100] (skip:0) : ", 0), -100, 100) + + self.output_face_scale = np.clip ( 1.0 + input_int ("Choose output face scale modifier [-50..50] (skip:0) : ", 0)*0.01, 0.5, 1.5) + self.transfercolor = input_bool("Transfer color from dst face to converted final face? (y/n skip:n) : ", False) + self.final_image_color_degrade_power = np.clip ( input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100) + self.alpha = input_bool("Export png with alpha channel? (y/n skip:n) : ", False) + print ("") + #override def get_mode(self): return ConverterBase.MODE_FACE @@ -79,7 +85,7 @@ class ConverterMasked(ConverterBase): if not self.use_predicted_mask: prd_face_mask_a_0 = predictor_input_mask_a_0 - + prd_face_mask_a_0[ prd_face_mask_a_0 < 0.001 ] = 0.0 prd_face_mask_a = np.expand_dims (prd_face_mask_a_0, axis=-1) @@ -145,16 +151,6 @@ class ConverterMasked(ConverterBase): print ("blur_size = %d" % (blur) ) img_mask_blurry_aaa = np.clip( img_mask_blurry_aaa, 0, 1.0 ) - - #if self.clip_border_mask_per > 0: - # prd_border_rect_mask_a = np.ones ( prd_face_mask_a.shape, dtype=prd_face_mask_a.dtype) - # prd_border_size = int ( prd_border_rect_mask_a.shape[1] * self.clip_border_mask_per ) - # - # prd_border_rect_mask_a[0:prd_border_size,:,:] = 0 - # prd_border_rect_mask_a[-prd_border_size:,:,:] = 0 - # prd_border_rect_mask_a[:,0:prd_border_size,:] = 0 - # prd_border_rect_mask_a[:,-prd_border_size:,:] = 0 - # prd_border_rect_mask_a = np.expand_dims(cv2.blur(prd_border_rect_mask_a, (prd_border_size, prd_border_size) ),-1) if self.mode == 'hist-match-bw': prd_face_bgr = cv2.cvtColor(prd_face_bgr, cv2.COLOR_BGR2GRAY) @@ -174,22 +170,21 @@ class ConverterMasked(ConverterBase): hist_match_2 = dst_face_bgr*hist_mask_a + (1.0-hist_mask_a)* np.ones ( prd_face_bgr.shape[:2] + (1,) , dtype=prd_face_bgr.dtype) hist_match_2[ hist_match_1 > 1.0 ] = 1.0 - - new_prd_face_bgr = image_utils.color_hist_match(hist_match_1, hist_match_2, self.hist_match_threshold ) - prd_face_bgr = new_prd_face_bgr + prd_face_bgr = image_utils.color_hist_match(hist_match_1, hist_match_2, self.hist_match_threshold ) if self.mode == 'hist-match-bw': prd_face_bgr = prd_face_bgr.astype(np.float32) - - out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - + if debug: debugs += [out_img.copy()] debugs += [img_mask_blurry_aaa.copy()] + if self.mode == 'overlay': + pass + if self.mode == 'seamless' or self.mode == 'seamless-hist-match': out_img = np.clip( img_bgr*(1-img_face_mask_aaa) + (out_img*img_face_mask_aaa) , 0, 1.0 ) if debug: @@ -200,14 +195,7 @@ class ConverterMasked(ConverterBase): if debug: debugs += [out_img.copy()] - - #if self.clip_border_mask_per > 0: - # img_prd_border_rect_mask_a = cv2.warpAffine( prd_border_rect_mask_a, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_LANCZOS4, cv2.BORDER_TRANSPARENT ) - # img_prd_border_rect_mask_a = np.expand_dims (img_prd_border_rect_mask_a, -1) - # - # out_img = out_img * img_prd_border_rect_mask_a + img_bgr * (1.0 - img_prd_border_rect_mask_a) - # img_mask_blurry_aaa *= img_prd_border_rect_mask_a - + out_img = np.clip( img_bgr*(1-img_mask_blurry_aaa) + (out_img*img_mask_blurry_aaa) , 0, 1.0 ) if self.mode == 'seamless-hist-match': diff --git a/models/ModelBase.py b/models/ModelBase.py index 5403a48..06f44f5 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -7,6 +7,7 @@ from pathlib import Path from utils import Path_utils from utils import std_utils from utils import image_utils +from utils.console_utils import * import numpy as np import cv2 from samples import SampleGeneratorBase @@ -18,8 +19,11 @@ class ModelBase(object): #DONT OVERRIDE def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, - batch_size=0, - write_preview_history = False, + ask_for_session_options=False, + session_write_preview_history = None, + session_target_epoch=0, + session_batch_size=0, + debug = False, **in_options ): print ("Loading model...") @@ -35,56 +39,94 @@ class ModelBase(object): self.dst_yaw_images_paths = None self.src_data_generator = None self.dst_data_generator = None - self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) - self.batch_size = batch_size - self.write_preview_history = write_preview_history self.debug = debug + self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None) + self.supress_std_once = ('TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1') + self.epoch = 0 + self.options = {} + self.loss_history = [] + self.sample_for_preview = None if self.model_data_path.exists(): model_data = pickle.loads ( self.model_data_path.read_bytes() ) self.epoch = model_data['epoch'] - self.options = model_data['options'] - self.loss_history = model_data['loss_history'] if 'loss_history' in model_data.keys() else [] - self.sample_for_preview = model_data['sample_for_preview'] if 'sample_for_preview' in model_data.keys() else None - else: - self.epoch = 0 - self.options = {} - self.loss_history = [] - self.sample_for_preview = None + if self.epoch != 0: + self.options = model_data['options'] + self.loss_history = model_data['loss_history'] if 'loss_history' in model_data.keys() else [] + self.sample_for_preview = model_data['sample_for_preview'] if 'sample_for_preview' in model_data.keys() else None - if self.write_preview_history: - self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) ) - - if not self.preview_history_path.exists(): - self.preview_history_path.mkdir(exist_ok=True) - else: - if self.epoch == 0: - for filename in Path_utils.get_image_paths(self.preview_history_path): - Path(filename).unlink() - - self.device_config = nnlib.DeviceConfig(allow_growth=False, **in_options) - if self.epoch == 0: - #first run - self.options['created_vram_gb'] = self.device_config.gpu_total_vram_gb - self.created_vram_gb = self.device_config.gpu_total_vram_gb + print ("\nModel first run. Enter model options as default for each run.") + self.options['write_preview_history'] = input_bool("Write preview history? (y/n skip:n) : ", False) + self.options['target_epoch'] = max(0, input_int("Target epoch (skip:unlimited) : ", 0)) + self.options['batch_size'] = max(0, input_int("Batch_size (skip:model choice) : ", 0)) + self.options['sort_by_yaw'] = input_bool("Feed faces to network sorted by yaw? (y/n skip:n) : ", False) + + #self.options['use_fp16'] = use_fp16 = input_bool("Use float16? (y/n skip:n) : ", False) else: - #not first run - if 'created_vram_gb' in self.options.keys(): - self.created_vram_gb = self.options['created_vram_gb'] - else: - self.options['created_vram_gb'] = self.device_config.gpu_total_vram_gb - self.created_vram_gb = self.device_config.gpu_total_vram_gb + self.options['write_preview_history'] = self.options.get('write_preview_history', False) + self.options['target_epoch'] = self.options.get('target_epoch', 0) + self.options['batch_size'] = self.options.get('batch_size', 0) + self.options['sort_by_yaw'] = self.options.get('sort_by_yaw', False) + #self.options['use_fp16'] = use_fp16 = self.options['use_fp16'] if 'use_fp16' in self.options.keys() else False + + use_fp16 = False #currently models fails with fp16 + + if ask_for_session_options: + print ("Override options for current session:") + session_write_preview_history = input_bool("Write preview history? (y/n skip:default) : ", None ) + session_target_epoch = input_int("Target epoch (skip:default) : ", 0) + session_batch_size = input_int("Batch_size (skip:default) : ", 0) + + if self.options['write_preview_history']: + if session_write_preview_history is None: + session_write_preview_history = self.options['write_preview_history'] + else: + self.options.pop('write_preview_history') + + if self.options['target_epoch'] != 0: + if session_target_epoch == 0: + session_target_epoch = self.options['target_epoch'] + else: + self.options.pop('target_epoch') + + if self.options['batch_size'] != 0: + if session_batch_size == 0: + session_batch_size = self.options['batch_size'] + else: + self.options.pop('batch_size') + + self.sort_by_yaw = self.options['sort_by_yaw'] + if not self.sort_by_yaw: + self.options.pop('sort_by_yaw') + + self.write_preview_history = session_write_preview_history + self.target_epoch = session_target_epoch + self.batch_size = session_batch_size + self.device_config = nnlib.DeviceConfig(allow_growth=False, use_fp16=use_fp16, **in_options) + + self.created_vram_gb = self.options['created_vram_gb'] if 'created_vram_gb' in self.options.keys() else self.device_config.gpu_total_vram_gb + + self.onInitializeOptions(self.epoch == 0, ask_for_session_options) nnlib.import_all (self.device_config) - self.onInitialize(**in_options) if self.debug or self.batch_size == 0: self.batch_size = 1 if self.is_training_mode: + if self.write_preview_history: + self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) ) + + if not self.preview_history_path.exists(): + self.preview_history_path.mkdir(exist_ok=True) + else: + if self.epoch == 0: + for filename in Path_utils.get_image_paths(self.preview_history_path): + Path(filename).unlink() + if self.generator_list is None: raise Exception( 'You didnt set_training_data_generators()') else: @@ -100,11 +142,18 @@ class ModelBase(object): print ("==") print ("== Current epoch: " + str(self.epoch) ) print ("==") - print ("== Options:") - print ("== |== batch_size : %s " % (self.batch_size) ) - print ("== |== multi_gpu : %s " % (self.device_config.multi_gpu) ) + print ("== Model options:") for key in self.options.keys(): print ("== |== %s : %s" % (key, self.options[key]) ) + print ("== Session options:") + if self.write_preview_history: + print ("== |== write_preview_history : True ") + if self.target_epoch != 0: + print ("== |== target_epoch : %s " % (self.target_epoch) ) + print ("== |== batch_size : %s " % (self.batch_size) ) + if self.device_config.multi_gpu: + print ("== |== multi_gpu : True ") + print ("== Running on:") if self.device_config.cpu_only: @@ -122,6 +171,10 @@ class ModelBase(object): print ("=========================") + #overridable + def onInitializeOptions(self, is_first_run, ask_for_session_options): + pass + #overridable def onInitialize(self, **in_options): ''' @@ -161,6 +214,12 @@ class ModelBase(object): from .ConverterBase import ConverterBase return ConverterBase(self, **in_options) + def get_target_epoch(self): + return self.target_epoch + + def is_reached_epoch_goal(self): + return self.target_epoch != 0 and self.epoch >= self.target_epoch + def to_multi_gpu_model_if_possible (self, models_list): if len(self.device_config.gpu_idxs) > 1: #make batch_size to divide on GPU count without remainder @@ -305,9 +364,6 @@ class ModelBase(object): if self.batch_size == 0: self.batch_size = 2 else: - if self.device_config.gpu_total_vram_gb < keys[0]: - raise Exception ('Sorry, this model works only on %dGB+ GPU' % ( keys[0] ) ) - if self.batch_size == 0: for x in keys: if self.device_config.gpu_total_vram_gb <= x: diff --git a/models/Model_AVATAR/Model.py b/models/Model_AVATAR/Model.py deleted file mode 100644 index 29b9f1b..0000000 --- a/models/Model_AVATAR/Model.py +++ /dev/null @@ -1,251 +0,0 @@ -import numpy as np -import cv2 -from models import ModelBase -from samples import * -from nnlib import nnlib - -class Model(ModelBase): - - encoder64H5 = 'encoder64.h5' - decoder64_srcH5 = 'decoder64_src.h5' - decoder64_dstH5 = 'decoder64_dst.h5' - encoder256H5 = 'encoder256.h5' - decoder256H5 = 'decoder256.h5' - - #override - def onInitialize(self, **in_options): - exec(nnlib.import_all(), locals(), globals()) - - self.set_vram_batch_requirements( {3.5:8,4:8,5:12,6:16,7:24,8:32,9:48} ) - if self.batch_size < 4: - self.batch_size = 4 - - img_shape64, img_shape256, self.encoder64, self.decoder64_src, self.decoder64_dst, self.encoder256, self.decoder256 = self.Build() - - if not self.is_first_run(): - self.encoder64.load_weights (self.get_strpath_storage_for_file(self.encoder64H5)) - self.decoder64_src.load_weights (self.get_strpath_storage_for_file(self.decoder64_srcH5)) - self.decoder64_dst.load_weights (self.get_strpath_storage_for_file(self.decoder64_dstH5)) - self.encoder256.load_weights (self.get_strpath_storage_for_file(self.encoder256H5)) - self.decoder256.load_weights (self.get_strpath_storage_for_file(self.decoder256H5)) - - #if self.is_training_mode: - # self.encoder64, self.decoder64_src, self.decoder64_dst, self.encoder256, self.decoder256 = self.to_multi_gpu_model_if_possible ( [self.encoder64, self.decoder64_src, self.decoder64_dst, self.encoder256, self.decoder256] ) - - input_A_warped64 = Input(img_shape64) - input_B_warped64 = Input(img_shape64) - A_rec64 = self.decoder64_src(self.encoder64(input_A_warped64)) - B_rec64 = self.decoder64_dst(self.encoder64(input_B_warped64)) - self.ae64 = Model([input_A_warped64, input_B_warped64], [A_rec64, B_rec64] ) - - if self.is_training_mode: - self.ae64, = self.to_multi_gpu_model_if_possible ( [self.ae64,] ) - - self.ae64.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), - loss=[DSSIMLoss(), DSSIMLoss()] ) - - self.A64_view = K.function ([input_A_warped64], [A_rec64]) - self.B64_view = K.function ([input_B_warped64], [B_rec64]) - - input_A_warped64 = Input(img_shape64) - input_A_target256 = Input(img_shape256) - A_rec256 = self.decoder256( self.encoder256(input_A_warped64) ) - - input_B_warped64 = Input(img_shape64) - BA_rec64 = self.decoder64_src( self.encoder64(input_B_warped64) ) - BA_rec256 = self.decoder256( self.encoder256(BA_rec64) ) - - self.ae256 = Model([input_A_warped64], [A_rec256] ) - - if self.is_training_mode: - self.ae256, = self.to_multi_gpu_model_if_possible ( [self.ae256,] ) - - self.ae256.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), - loss=[DSSIMLoss()]) - - self.A256_view = K.function ([input_A_warped64], [A_rec256]) - self.BA256_view = K.function ([input_B_warped64], [BA_rec256]) - - if self.is_training_mode: - f = SampleProcessor.TypeFlags - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ - [f.WARPED_TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], - [f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 256], - [f.SOURCE | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], - [f.SOURCE | f.FACE_ALIGN_HALF | f.MODE_BGR, 256] ] ), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ - [f.WARPED_TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], - [f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], - [f.SOURCE | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], - [f.SOURCE | f.FACE_ALIGN_HALF | f.MODE_BGR, 256] ] ) - ]) - #override - def onSave(self): - self.save_weights_safe( [[self.encoder64, self.get_strpath_storage_for_file(self.encoder64H5)], - [self.decoder64_src, self.get_strpath_storage_for_file(self.decoder64_srcH5)], - [self.decoder64_dst, self.get_strpath_storage_for_file(self.decoder64_dstH5)], - [self.encoder256, self.get_strpath_storage_for_file(self.encoder256H5)], - [self.decoder256, self.get_strpath_storage_for_file(self.decoder256H5)], - ] ) - - #override - def onTrainOneEpoch(self, sample): - warped_src64, target_src64, target_src256, target_src_source64, target_src_source256 = sample[0] - warped_dst64, target_dst64, target_dst_source64, target_dst_source256 = sample[1] - - loss64, loss_src64, loss_dst64 = self.ae64.train_on_batch ([warped_src64, warped_dst64], [target_src64, target_dst64]) - - loss256 = self.ae256.train_on_batch ([warped_src64], [target_src256]) - - return ( ('loss64', loss64 ), ('loss256', loss256), ) - - #override - def onGetPreview(self, sample): - sample_src64_source = sample[0][3][0:4] - sample_src256_source = sample[0][4][0:4] - - sample_dst64_source = sample[1][2][0:4] - sample_dst256_source = sample[1][3][0:4] - - SRC64, = self.A64_view ([sample_src64_source]) - DST64, = self.B64_view ([sample_dst64_source]) - SRCDST64, = self.A64_view ([sample_dst64_source]) - DSTSRC64, = self.B64_view ([sample_src64_source]) - - SRC_x1_256, = self.A256_view ([sample_src64_source]) - DST_x2_256, = self.BA256_view ([sample_dst64_source]) - - b1 = np.concatenate ( ( - np.concatenate ( (sample_src64_source[0], SRC64[0], sample_src64_source[1], SRC64[1], ), axis=1), - np.concatenate ( (sample_src64_source[1], SRC64[1], sample_src64_source[3], SRC64[3], ), axis=1), - np.concatenate ( (sample_dst64_source[0], DST64[0], sample_dst64_source[1], DST64[1], ), axis=1), - np.concatenate ( (sample_dst64_source[2], DST64[2], sample_dst64_source[3], DST64[3], ), axis=1), - ), axis=0 ) - - b2 = np.concatenate ( ( - np.concatenate ( (sample_src64_source[0], DSTSRC64[0], sample_src64_source[1], DSTSRC64[1], ), axis=1), - np.concatenate ( (sample_src64_source[2], DSTSRC64[2], sample_src64_source[3], DSTSRC64[3], ), axis=1), - np.concatenate ( (sample_dst64_source[0], SRCDST64[0], sample_dst64_source[1], SRCDST64[1], ), axis=1), - np.concatenate ( (sample_dst64_source[2], SRCDST64[2], sample_dst64_source[3], SRCDST64[3], ), axis=1), - - ), axis=0 ) - - result = np.concatenate ( ( np.concatenate ( (b1, sample_src256_source[0], SRC_x1_256[0] ), axis=1 ), - np.concatenate ( (b2, sample_dst256_source[0], DST_x2_256[0] ), axis=1 ), - ), axis = 0 ) - - return [ ('AVATAR', result ) ] - - def predictor_func (self, img): - x, = self.BA256_view ([ np.expand_dims(img, 0) ])[0] - return x - - #override - def get_converter(self, **in_options): - return ConverterAvatar(self.predictor_func, predictor_input_size=64, output_size=256, **in_options) - - def Build(self): - exec(nnlib.code_import_all, locals(), globals()) - - img_shape64 = (64,64,3) - img_shape256 = (256,256,3) - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(_input): - x = _input - x = Conv2D(90, kernel_size=5, strides=1, padding='same')(x) - x = Conv2D(90, kernel_size=5, strides=1, padding='same')(x) - x = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x) - - x = Conv2D(180, kernel_size=3, strides=1, padding='same')(x) - x = Conv2D(180, kernel_size=3, strides=1, padding='same')(x) - x = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x) - - x = Conv2D(360, kernel_size=3, strides=1, padding='same')(x) - x = Conv2D(360, kernel_size=3, strides=1, padding='same')(x) - x = MaxPooling2D(pool_size=(3, 3), strides=2, padding='same')(x) - - x = Dense (1024)(x) - x = LeakyReLU(0.1)(x) - x = Dropout(0.5)(x) - - x = Dense (1024)(x) - x = LeakyReLU(0.1)(x) - x = Dropout(0.5)(x) - x = Flatten()(x) - x = Dense (64)(x) - - return keras.models.Model (_input, x) - - encoder256 = Encoder( Input (img_shape64) ) - encoder64 = Encoder( Input (img_shape64) ) - - def decoder256(encoder): - decoder_input = Input ( K.int_shape(encoder.outputs[0])[1:] ) - x = decoder_input - x = Dense(16 * 16 * 720)(x) - x = Reshape ( (16, 16, 720) )(x) - x = upscale(720)(x) - x = upscale(360)(x) - x = upscale(180)(x) - x = upscale(90)(x) - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - return keras.models.Model(decoder_input, x) - - def decoder64(encoder): - decoder_input = Input ( K.int_shape(encoder.outputs[0])[1:] ) - x = decoder_input - x = Dense(8 * 8 * 720)(x) - x = Reshape ( (8, 8, 720) )(x) - x = upscale(360)(x) - x = upscale(180)(x) - x = upscale(90)(x) - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - return Model(decoder_input, x) - - return img_shape64, img_shape256, encoder64, decoder64(encoder64), decoder64(encoder64), encoder256, decoder256(encoder256) - -from models import ConverterBase -from facelib import FaceType -from facelib import LandmarksProcessor -class ConverterAvatar(ConverterBase): - - #override - def __init__(self, predictor, - predictor_input_size=0, - output_size=0, - **in_options): - - super().__init__(predictor) - - self.predictor_input_size = predictor_input_size - self.output_size = output_size - - #override - def get_mode(self): - return ConverterBase.MODE_IMAGE_WITH_LANDMARKS - - #override - def dummy_predict(self): - self.predictor ( np.zeros ( (self.predictor_input_size, self.predictor_input_size,3), dtype=np.float32) ) - - #override - def convert_image (self, img_bgr, img_face_landmarks, debug): - img_size = img_bgr.shape[1], img_bgr.shape[0] - - face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, self.predictor_input_size, face_type=FaceType.HALF ) - predictor_input_bgr = cv2.warpAffine( img_bgr, face_mat, (self.predictor_input_size, self.predictor_input_size), flags=cv2.INTER_LANCZOS4 ) - - predicted_bgr = self.predictor ( predictor_input_bgr ) - - output = cv2.resize ( predicted_bgr, (self.output_size, self.output_size), cv2.INTER_LANCZOS4 ) - if debug: - return (img_bgr,output,) - return output \ No newline at end of file diff --git a/models/Model_DF/Model.py b/models/Model_DF/Model.py index 8205318..1e2dcf7 100644 --- a/models/Model_DF/Model.py +++ b/models/Model_DF/Model.py @@ -38,7 +38,8 @@ class Model(ModelBase): if self.is_training_mode: f = SampleProcessor.TypeFlags self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_M | f.FACE_MASK_FULL, 128] ] ), @@ -107,16 +108,14 @@ class Model(ModelBase): #override def get_converter(self, **in_options): - from models import ConverterMasked - - if 'erode_mask_modifier' not in in_options.keys(): - in_options['erode_mask_modifier'] = 0 - in_options['erode_mask_modifier'] += 30 - - if 'blur_mask_modifier' not in in_options.keys(): - in_options['blur_mask_modifier'] = 0 - - return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) + from models import ConverterMasked + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + output_size=128, + face_type=FaceType.FULL, + base_erode_mask_modifier=30, + base_blur_mask_modifier=100, + **in_options) def Build(self, input_layer): exec(nnlib.code_import_all, locals(), globals()) diff --git a/models/Model_H128/Model.py b/models/Model_H128/Model.py index 805f8ea..48ffda3 100644 --- a/models/Model_H128/Model.py +++ b/models/Model_H128/Model.py @@ -44,7 +44,8 @@ class Model(ModelBase): if self.is_training_mode: f = SampleProcessor.TypeFlags self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 128], [f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 128], [f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_M | f.FACE_MASK_FULL, 128] ] ), @@ -112,16 +113,13 @@ class Model(ModelBase): #override def get_converter(self, **in_options): from models import ConverterMasked - - if 'erode_mask_modifier' not in in_options.keys(): - in_options['erode_mask_modifier'] = 0 - in_options['erode_mask_modifier'] += 100 - - if 'blur_mask_modifier' not in in_options.keys(): - in_options['blur_mask_modifier'] = 0 - in_options['blur_mask_modifier'] += 100 - - return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.HALF, **in_options) + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + output_size=128, + face_type=FaceType.HALF, + base_erode_mask_modifier=100, + base_blur_mask_modifier=100, + **in_options) def Build(self, created_vram_gb): exec(nnlib.code_import_all, locals(), globals()) diff --git a/models/Model_H64/Model.py b/models/Model_H64/Model.py index 4dce4ff..c30c92a 100644 --- a/models/Model_H64/Model.py +++ b/models/Model_H64/Model.py @@ -4,6 +4,7 @@ from nnlib import nnlib from models import ModelBase from facelib import FaceType from samples import * +from utils.console_utils import * class Model(ModelBase): @@ -44,7 +45,8 @@ class Model(ModelBase): if self.is_training_mode: f = SampleProcessor.TypeFlags self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], [f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_BGR, 64], [f.TRANSFORMED | f.FACE_ALIGN_HALF | f.MODE_M | f.FACE_MASK_FULL, 64] ] ), @@ -66,7 +68,6 @@ class Model(ModelBase): warped_src, target_src, target_src_full_mask = sample[0] warped_dst, target_dst, target_dst_full_mask = sample[1] - total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_full_mask, warped_dst, target_dst_full_mask], [target_src, target_src_full_mask, target_dst, target_dst_full_mask] ) return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) @@ -114,16 +115,13 @@ class Model(ModelBase): #override def get_converter(self, **in_options): from models import ConverterMasked - - if 'erode_mask_modifier' not in in_options.keys(): - in_options['erode_mask_modifier'] = 0 - in_options['erode_mask_modifier'] += 100 - - if 'blur_mask_modifier' not in in_options.keys(): - in_options['blur_mask_modifier'] = 0 - in_options['blur_mask_modifier'] += 100 - - return ConverterMasked(self.predictor_func, predictor_input_size=64, output_size=64, face_type=FaceType.HALF, **in_options) + return ConverterMasked(self.predictor_func, + predictor_input_size=64, + output_size=64, + face_type=FaceType.HALF, + base_erode_mask_modifier=100, + base_blur_mask_modifier=100, + **in_options) def Build(self, created_vram_gb): exec(nnlib.code_import_all, locals(), globals()) diff --git a/models/Model_LIAEF128/Model.py b/models/Model_LIAEF128/Model.py index b269bd8..f5e8145 100644 --- a/models/Model_LIAEF128/Model.py +++ b/models/Model_LIAEF128/Model.py @@ -42,8 +42,11 @@ class Model(ModelBase): if self.is_training_mode: f = SampleProcessor.TypeFlags - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, + self.set_training_data_generators ([ + + + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_M | f.FACE_MASK_FULL, 128] ] ), @@ -115,15 +118,13 @@ class Model(ModelBase): #override def get_converter(self, **in_options): from models import ConverterMasked - - if 'erode_mask_modifier' not in in_options.keys(): - in_options['erode_mask_modifier'] = 0 - in_options['erode_mask_modifier'] += 30 - - if 'blur_mask_modifier' not in in_options.keys(): - in_options['blur_mask_modifier'] = 0 - - return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) + return ConverterMasked(self.predictor_func, + predictor_input_size=128, + output_size=128, + face_type=FaceType.FULL, + base_erode_mask_modifier=30, + base_blur_mask_modifier=0, + **in_options) def Build(self, input_layer): exec(nnlib.code_import_all, locals(), globals()) diff --git a/models/Model_LIAEF128YAW/Model.py b/models/Model_LIAEF128YAW/Model.py deleted file mode 100644 index defc42c..0000000 --- a/models/Model_LIAEF128YAW/Model.py +++ /dev/null @@ -1,175 +0,0 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samples import * - -class Model(ModelBase): - - encoderH5 = 'encoder.h5' - decoderH5 = 'decoder.h5' - inter_BH5 = 'inter_B.h5' - inter_ABH5 = 'inter_AB.h5' - - #override - def onInitialize(self, **in_options): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4,5:4,6:8,7:12,8:16,9:20,10:24,11:24,12:32,13:48} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoder, self.inter_B, self.inter_AB = self.Build(ae_input_layer) - - if not self.is_first_run(): - self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) - self.decoder.load_weights (self.get_strpath_storage_for_file(self.decoderH5)) - self.inter_B.load_weights (self.get_strpath_storage_for_file(self.inter_BH5)) - self.inter_AB.load_weights (self.get_strpath_storage_for_file(self.inter_ABH5)) - - code = self.encoder(ae_input_layer) - AB = self.inter_AB(code) - B = self.inter_B(code) - self.autoencoder_src = Model([ae_input_layer,mask_layer], self.decoder(Concatenate()([AB, AB])) ) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], self.decoder(Concatenate()([B, AB])) ) - - if self.is_training_mode: - self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMaskLoss([mask_layer]), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMaskLoss([mask_layer]), 'mse'] ) - - if self.is_training_mode: - f = SampleProcessor.TypeFlags - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_M | f.FACE_MASK_FULL, 128] ] ), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_M | f.FACE_MASK_FULL, 128] ] ) - ]) - - #override - def onSave(self): - self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], - [self.decoder, self.get_strpath_storage_for_file(self.decoderH5)], - [self.inter_B, self.get_strpath_storage_for_file(self.inter_BH5)], - [self.inter_AB, self.get_strpath_storage_for_file(self.inter_ABH5)]] ) - - #override - def onTrainOneEpoch(self, sample): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('LIAEF128YAW', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - - face_128_bgr = face[...,0:3] - face_128_mask = np.expand_dims(face[...,3],-1) - - x, mx = self.autoencoder_src.predict ( [ np.expand_dims(face_128_bgr,0), np.expand_dims(face_128_mask,0) ] ) - x, mx = x[0], mx[0] - - return np.concatenate ( (x,mx), -1 ) - - #override - def get_converter(self, **in_options): - from models import ConverterMasked - - if 'erode_mask_modifier' not in in_options.keys(): - in_options['erode_mask_modifier'] = 0 - in_options['erode_mask_modifier'] += 30 - - if 'blur_mask_modifier' not in in_options.keys(): - in_options['blur_mask_modifier'] = 0 - - return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Flatten()(x) - return Model(input_layer, x) - - def Intermediate(): - input_layer = Input(shape=(None, 8 * 8 * 1024)) - x = input_layer - x = Dense(256)(x) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - return Model(input_layer, x) - - def Decoder(): - input_ = Input(shape=(16, 16, 1024)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) - - return Model(input_, [x,y]) - - return Encoder(), Decoder(), Intermediate(), Intermediate() diff --git a/models/Model_LIAEF128YAW/__init__.py b/models/Model_LIAEF128YAW/__init__.py deleted file mode 100644 index cdb3fe7..0000000 --- a/models/Model_LIAEF128YAW/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model \ No newline at end of file diff --git a/models/Model_MIAEF128/Model.py b/models/Model_MIAEF128/Model.py deleted file mode 100644 index d348fbf..0000000 --- a/models/Model_MIAEF128/Model.py +++ /dev/null @@ -1,225 +0,0 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samples import * - -class Model(ModelBase): - - encoderH5 = 'encoder.h5' - decoderMaskH5 = 'decoderMask.h5' - decoderCommonAH5 = 'decoderCommonA.h5' - decoderCommonBH5 = 'decoderCommonB.h5' - decoderRGBH5 = 'decoderRGB.h5' - decoderBWH5 = 'decoderBW.h5' - inter_BH5 = 'inter_B.h5' - inter_AH5 = 'inter_A.h5' - - #override - def onInitialize(self, **in_options): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4,5:4,6:8,7:12,8:16,9:20,10:24,11:24,12:32,13:48} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoderMask, self.decoderCommonA, self.decoderCommonB, self.decoderRGB, \ - self.decoderBW, self.inter_A, self.inter_B = self.Build(ae_input_layer) - - if not self.is_first_run(): - self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) - self.decoderMask.load_weights (self.get_strpath_storage_for_file(self.decoderMaskH5)) - self.decoderCommonA.load_weights (self.get_strpath_storage_for_file(self.decoderCommonAH5)) - self.decoderCommonB.load_weights (self.get_strpath_storage_for_file(self.decoderCommonBH5)) - self.decoderRGB.load_weights (self.get_strpath_storage_for_file(self.decoderRGBH5)) - self.decoderBW.load_weights (self.get_strpath_storage_for_file(self.decoderBWH5)) - self.inter_A.load_weights (self.get_strpath_storage_for_file(self.inter_AH5)) - self.inter_B.load_weights (self.get_strpath_storage_for_file(self.inter_BH5)) - - code = self.encoder(ae_input_layer) - A = self.inter_A(code) - B = self.inter_B(code) - - inter_A_A = Concatenate()([A, A]) - inter_B_A = Concatenate()([B, A]) - - x1,m1 = self.decoderCommonA (inter_A_A) - x2,m2 = self.decoderCommonA (inter_A_A) - self.autoencoder_src = Model([ae_input_layer,mask_layer], - [ self.decoderBW (Concatenate()([x1,x2]) ), - self.decoderMask(Concatenate()([m1,m2]) ) - ]) - - x1,m1 = self.decoderCommonA (inter_A_A) - x2,m2 = self.decoderCommonB (inter_A_A) - self.autoencoder_src_RGB = Model([ae_input_layer,mask_layer], - [ self.decoderRGB (Concatenate()([x1,x2]) ), - self.decoderMask (Concatenate()([m1,m2]) ) - ]) - - x1,m1 = self.decoderCommonA (inter_B_A) - x2,m2 = self.decoderCommonB (inter_B_A) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], - [ self.decoderRGB (Concatenate()([x1,x2]) ), - self.decoderMask (Concatenate()([m1,m2]) ) - ]) - - if self.is_training_mode: - self.autoencoder_src, self.autoencoder_dst = self.to_multi_gpu_model_if_possible ( [self.autoencoder_src, self.autoencoder_dst] ) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMaskLoss([mask_layer]), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMaskLoss([mask_layer]), 'mse'] ) - - if self.is_training_mode: - f = SampleProcessor.TypeFlags - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_GGG, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_G , 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_M | f.FACE_MASK_FULL, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_GGG, 128] ] ), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - output_sample_types=[ [f.WARPED_TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_BGR, 128], - [f.TRANSFORMED | f.FACE_ALIGN_FULL | f.MODE_M | f.FACE_MASK_FULL, 128]] ) - ]) - #override - def onSave(self): - self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], - [self.decoderMask, self.get_strpath_storage_for_file(self.decoderMaskH5)], - [self.decoderCommonA, self.get_strpath_storage_for_file(self.decoderCommonAH5)], - [self.decoderCommonB, self.get_strpath_storage_for_file(self.decoderCommonBH5)], - [self.decoderRGB, self.get_strpath_storage_for_file(self.decoderRGBH5)], - [self.decoderBW, self.get_strpath_storage_for_file(self.decoderBWH5)], - [self.inter_A, self.get_strpath_storage_for_file(self.inter_AH5)], - [self.inter_B, self.get_strpath_storage_for_file(self.inter_BH5)]] ) - - - #override - def onTrainOneEpoch(self, sample): - warped_src, target_src, target_src_mask, target_src_GGG = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [ warped_src, target_src_mask], [ target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [ warped_dst, target_dst_mask], [ target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][3][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src_RGB.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - np.repeat (np.expand_dims (test_A[i,:,:,0],-1), (3,), -1) , - np.repeat (AA[i], (3,), -1), - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('MIAEF128', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face): - face_128_bgr = face[...,0:3] - face_128_mask = np.expand_dims(face[...,-1],-1) - - x, mx = self.autoencoder_src_RGB.predict ( [ np.expand_dims(face_128_bgr,0), np.expand_dims(face_128_mask,0) ] ) - x, mx = x[0], mx[0] - - return np.concatenate ( (x,mx), -1 ) - - #override - def get_converter(self, **in_options): - from models import ConverterMasked - - if 'erode_mask_modifier' not in in_options.keys(): - in_options['erode_mask_modifier'] = 0 - in_options['erode_mask_modifier'] += 30 - - if 'blur_mask_modifier' not in in_options.keys(): - in_options['blur_mask_modifier'] = 0 - - return ConverterMasked(self.predictor_func, predictor_input_size=128, output_size=128, face_type=FaceType.FULL, clip_border_mask_per=0.046875, **in_options) - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Flatten()(x) - return Model(input_layer, x) - - def Intermediate(): - input_layer = Input(shape=(None, 8 * 8 * 1024)) - x = input_layer - x = Dense(256)(x) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - return Model(input_layer, x) - - def DecoderCommon(): - input_ = Input(shape=(16, 16, 1024)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - y = input_ - y = upscale(256)(y) - y = upscale(128)(y) - y = upscale(64)(y) - - return Model(input_, [x,y]) - - def DecoderRGB(): - input_ = Input(shape=(128, 128, 256)) - x = input_ - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - return Model(input_, [x]) - - def DecoderBW(): - input_ = Input(shape=(128, 128, 256)) - x = input_ - x = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(x) - return Model(input_, [x]) - - def DecoderMask(): - input_ = Input(shape=(128, 128, 128)) - y = input_ - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - return Model(input_, [y]) - - return Encoder(), DecoderMask(), DecoderCommon(), DecoderCommon(), DecoderRGB(), DecoderBW(), Intermediate(), Intermediate() \ No newline at end of file diff --git a/models/Model_MIAEF128/__init__.py b/models/Model_MIAEF128/__init__.py deleted file mode 100644 index cdb3fe7..0000000 --- a/models/Model_MIAEF128/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model \ No newline at end of file diff --git a/models/Model_RecycleGAN/Model.py b/models/Model_RecycleGAN/Model.py index f0d1856..f8748f5 100644 --- a/models/Model_RecycleGAN/Model.py +++ b/models/Model_RecycleGAN/Model.py @@ -25,7 +25,7 @@ class Model(ModelBase): if self.epoch == 0: #first run - print ("\nModel first run. Enter options.") + try: created_resolution = int ( input ("Resolution (default:64, valid: 64,128,256) : ") ) @@ -68,9 +68,9 @@ class Model(ModelBase): self.set_batch_size(created_batch_size) - use_batch_norm = created_batch_size > 1 - self.GA = modelify(ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=False))(Input(bgr_shape)) - self.GB = modelify(ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=False))(Input(bgr_shape)) + use_batch_norm = False #created_batch_size > 1 + self.GA = modelify(ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) + self.GB = modelify(ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape)) #self.GA = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) #self.GB = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape)) @@ -211,7 +211,7 @@ class Model(ModelBase): loss_G, = self.G_train ( feed ) loss_DA, = self.DA_train( feed ) loss_DB, = self.DB_train( feed ) - + #return ( ('G', loss_G), ) return ( ('G', loss_G), ('DA', loss_DA), ('DB', loss_DB) ) #override @@ -242,7 +242,9 @@ class Model(ModelBase): #override def get_converter(self, **in_options): - from models import ConverterImage - - return ConverterImage(self.predictor_func, predictor_input_size=self.options['created_resolution'], output_size=self.options['created_resolution'], **in_options) + from models import ConverterImage + return ConverterImage(self.predictor_func, + predictor_input_size=self.options['created_resolution'], + output_size=self.options['created_resolution'], + **in_options) diff --git a/models/Model_UFM/Model.py b/models/Model_UFM/Model.py new file mode 100644 index 0000000..323d675 --- /dev/null +++ b/models/Model_UFM/Model.py @@ -0,0 +1,298 @@ +import numpy as np + +from nnlib import nnlib +from models import ModelBase +from facelib import FaceType +from samples import * +from utils.console_utils import * + +#U-net Face Morpher +class UFMModel(ModelBase): + + encoderH5 = 'encoder.h5' + decoder_srcH5 = 'decoder_src.h5' + decoder_dstH5 = 'decoder_dst.h5' + decoder_srcmH5 = 'decoder_srcm.h5' + decoder_dstmH5 = 'decoder_dstm.h5' + + #override + def onInitializeOptions(self, is_first_run, ask_for_session_options): + default_resolution = 128 + default_filters = 64 + default_match_style = True + default_face_type = 'f' + + if is_first_run: + #first run + self.options['resolution'] = input_int("Resolution (valid: 64,128,256, skip:128) : ", default_resolution, [64,128,256]) + self.options['filters'] = np.clip ( input_int("Number of U-net filters (valid: 32-128, skip:64) : ", default_filters), 32, 128 ) + self.options['match_style'] = input_bool ("Match style? (y/n skip:y) : ", default_match_style) + self.options['face_type'] = input_str ("Half or Full face? (h/f, skip:f) : ", default_face_type, ['h','f']) + + else: + #not first run + self.options['resolution'] = self.options.get('resolution', default_resolution) + self.options['filters'] = self.options.get('filters', default_filters) + self.options['match_style'] = self.options.get('match_style', default_match_style) + self.options['face_type'] = self.options.get('face_type', default_face_type) + + #override + def onInitialize(self, **in_options): + exec(nnlib.import_all(), locals(), globals()) + + self.set_vram_batch_requirements({2:1,3:2,4:6,5:8,6:16,7:24,8:32}) + + resolution = self.options['resolution'] + bgr_shape = (resolution, resolution, 3) + mask_shape = (resolution, resolution, 1) + + filters = self.options['filters'] + + if resolution == 64: + lowest_dense = 512 + elif resolution == 128: + lowest_dense = 512 + elif resolution == 256: + lowest_dense = 256 + + self.encoder = modelify(UFMModel.EncFlow (ngf=filters, lowest_dense=lowest_dense)) (Input(bgr_shape)) + + dec_Inputs = [ Input(K.int_shape(x)[1:]) for x in self.encoder.outputs ] + + self.decoder_src = modelify(UFMModel.DecFlow (bgr_shape[2], ngf=filters)) (dec_Inputs) + self.decoder_dst = modelify(UFMModel.DecFlow (bgr_shape[2], ngf=filters)) (dec_Inputs) + + self.decoder_srcm = modelify(UFMModel.DecFlow (mask_shape[2], ngf=filters//2)) (dec_Inputs) + self.decoder_dstm = modelify(UFMModel.DecFlow (mask_shape[2], ngf=filters//2)) (dec_Inputs) + + if not self.is_first_run(): + self.encoder.load_weights (self.get_strpath_storage_for_file(self.encoderH5)) + self.decoder_src.load_weights (self.get_strpath_storage_for_file(self.decoder_srcH5)) + self.decoder_dst.load_weights (self.get_strpath_storage_for_file(self.decoder_dstH5)) + self.decoder_srcm.load_weights (self.get_strpath_storage_for_file(self.decoder_srcmH5)) + self.decoder_dstm.load_weights (self.get_strpath_storage_for_file(self.decoder_dstmH5)) + + warped_src = Input(bgr_shape) + target_src = Input(bgr_shape) + target_srcm = Input(mask_shape) + + warped_src_code = self.encoder (warped_src) + pred_src_src = self.decoder_src(warped_src_code) + pred_src_srcm = self.decoder_srcm(warped_src_code) + + warped_dst = Input(bgr_shape) + target_dst = Input(bgr_shape) + target_dstm = Input(mask_shape) + + warped_dst_code = self.encoder (warped_dst) + pred_dst_dst = self.decoder_dst(warped_dst_code) + pred_dst_dstm = self.decoder_dstm(warped_dst_code) + + pred_src_dst = self.decoder_src(warped_dst_code) + pred_src_dstm = self.decoder_srcm(warped_dst_code) + + target_srcm_blurred = tf_gaussian_blur(4.0)(target_srcm) + target_srcm_sigm = target_srcm_blurred / 2.0 + 0.5 + target_srcm_anti_sigm = 1.0 - target_srcm_sigm + + target_dstm_blurred = tf_gaussian_blur(4.0)(target_dstm) + target_dstm_sigm = target_dstm_blurred / 2.0 + 0.5 + target_dstm_anti_sigm = 1.0 - target_dstm_sigm + + target_src_sigm = target_src+1 + target_dst_sigm = target_dst+1 + + pred_src_src_sigm = pred_src_src+1 + pred_dst_dst_sigm = pred_dst_dst+1 + pred_src_dst_sigm = pred_src_dst+1 + + target_src_masked = target_src_sigm*target_srcm_sigm + + target_dst_masked = target_dst_sigm * target_dstm_sigm + target_dst_anti_masked = target_dst_sigm * target_dstm_anti_sigm + + pred_src_src_masked = pred_src_src_sigm * target_srcm_sigm + pred_dst_dst_masked = pred_dst_dst_sigm * target_dstm_sigm + + pred_src_dst_target_dst_masked = pred_src_dst_sigm * target_dstm_sigm + pred_src_dst_target_dst_anti_masked = pred_src_dst_sigm * target_dstm_anti_sigm + + + src_loss = K.mean( 100*K.square(tf_dssim(2.0)( target_src_masked, pred_src_src_masked )) ) + if self.options['match_style']: + src_loss += tf_style_loss(gaussian_blur_radius=resolution // 8, loss_weight=0.015)(pred_src_dst_target_dst_masked, target_dst_masked) + src_loss += 0.05 * K.mean( tf_dssim(2.0)( pred_src_dst_target_dst_anti_masked, target_dst_anti_masked )) + + self.src_train = K.function ([warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm ],[src_loss], + Adam(lr=5e-5, beta_1=0.5, beta_2=0.999).get_updates(src_loss, self.encoder.trainable_weights + self.decoder_src.trainable_weights) ) + + dst_loss = K.mean( 100*K.square(tf_dssim(2.0)( target_dst_masked, pred_dst_dst_masked )) ) + self.dst_train = K.function ([warped_dst, target_dst, target_dstm],[dst_loss], + Adam(lr=5e-5, beta_1=0.5, beta_2=0.999).get_updates(dst_loss, self.encoder.trainable_weights + self.decoder_dst.trainable_weights) ) + + + src_mask_loss = K.mean(K.square(target_srcm-pred_src_srcm)) + self.src_mask_train = K.function ([warped_src, target_srcm],[src_mask_loss], + Adam(lr=5e-5, beta_1=0.5, beta_2=0.999).get_updates(src_mask_loss, self.encoder.trainable_weights + self.decoder_srcm.trainable_weights) ) + + dst_mask_loss = K.mean(K.square(target_dstm-pred_dst_dstm)) + self.dst_mask_train = K.function ([warped_dst, target_dstm],[dst_mask_loss], + Adam(lr=5e-5, beta_1=0.5, beta_2=0.999).get_updates(dst_mask_loss, self.encoder.trainable_weights + self.decoder_dstm.trainable_weights) ) + + self.AE_view = K.function ([warped_src, warped_dst],[pred_src_src, pred_src_srcm, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm]) + self.AE_convert = K.function ([warped_dst],[pred_src_dst, pred_src_dstm]) + + if self.is_training_mode: + f = SampleProcessor.TypeFlags + + face_type = f.FACE_ALIGN_FULL if self.options['face_type'] == 'f' else f.FACE_ALIGN_HALF + + self.set_training_data_generators ([ + SampleGeneratorFace(self.training_data_src_path, sort_by_yaw_target_samples_path=self.training_data_dst_path if self.sort_by_yaw else None, + debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(normalize_tanh = True), + output_sample_types=[ [f.WARPED_TRANSFORMED | face_type | f.MODE_BGR, resolution], + [f.TRANSFORMED | face_type | f.MODE_BGR, resolution], + [f.TRANSFORMED | face_type | f.MODE_M | f.FACE_MASK_FULL, resolution] ] ), + + SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, + sample_process_options=SampleProcessor.Options(normalize_tanh = True), + output_sample_types=[ [f.WARPED_TRANSFORMED | face_type | f.MODE_BGR, resolution], + [f.TRANSFORMED | face_type | f.MODE_BGR, resolution], + [f.TRANSFORMED | face_type | f.MODE_M | f.FACE_MASK_FULL, resolution] ] ) + ]) + #override + def onSave(self): + self.save_weights_safe( [[self.encoder, self.get_strpath_storage_for_file(self.encoderH5)], + [self.decoder_src, self.get_strpath_storage_for_file(self.decoder_srcH5)], + [self.decoder_dst, self.get_strpath_storage_for_file(self.decoder_dstH5)], + [self.decoder_srcm, self.get_strpath_storage_for_file(self.decoder_srcmH5)], + [self.decoder_dstm, self.get_strpath_storage_for_file(self.decoder_dstmH5)] + ] ) + + #override + def onTrainOneEpoch(self, sample): + warped_src, target_src, target_src_mask = sample[0] + warped_dst, target_dst, target_dst_mask = sample[1] + + src_loss, = self.src_train ([warped_src, target_src, target_src_mask, warped_dst, target_dst, target_dst_mask]) + dst_loss, = self.dst_train ([warped_dst, target_dst, target_dst_mask]) + + src_mask_loss, = self.src_mask_train ([warped_src, target_src_mask]) + dst_mask_loss, = self.dst_mask_train ([warped_dst, target_dst_mask]) + + return ( ('src_loss', src_loss), ('dst_loss', dst_loss) ) + + + #override + def onGetPreview(self, sample): + test_A = sample[0][1][0:4] #first 4 samples + test_A_m = sample[0][2][0:4] #first 4 samples + test_B = sample[1][1][0:4] + test_B_m = sample[1][2][0:4] + + S = test_A + D = test_B + + SS, SM, DD, DM, SD, SDM = self.AE_view ([test_A, test_B]) + S, D, SS, SM, DD, DM, SD, SDM = [ x / 2 + 0.5 for x in [S, D, SS, SM, DD, DM, SD, SDM] ] + + SM, DM, SDM = [ np.repeat (x, (3,), -1) for x in [SM, DM, SDM] ] + + st = [] + for i in range(0, len(test_A)): + st.append ( np.concatenate ( ( + S[i], SS[i], #SM[i], + D[i], DD[i], #DM[i], + SD[i], #SDM[i] + ), axis=1) ) + + return [ ('U-net Face Morpher', np.concatenate ( st, axis=0 ) ) ] + + def predictor_func (self, face): + + face = face * 2.0 - 1.0 + + face_128_bgr = face[...,0:3] + + x, mx = [ (x[0] + 1.0) / 2.0 for x in self.AE_convert ( [ np.expand_dims(face_128_bgr,0) ] ) ] + + if self.options['match_style']: + res = self.options['resolution'] + s = int( res * 0.96875 ) + mx = np.pad ( np.ones ( (s,s) ), (res-s) // 2 , mode='constant') + mx = np.expand_dims(mx, -1) + + return np.concatenate ( (x,mx), -1 ) + + #override + def get_converter(self, **in_options): + from models import ConverterMasked + + if self.options['match_style']: + base_erode_mask_modifier = 50 + base_blur_mask_modifier = 50 + else: + base_erode_mask_modifier = 30 if self.options['face_type'] == 'f' else 100 + base_blur_mask_modifier = 0 if self.options['face_type'] == 'f' else 100 + + face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF + + return ConverterMasked(self.predictor_func, + predictor_input_size=self.options['resolution'], + output_size=self.options['resolution'], + face_type=face_type, + base_erode_mask_modifier=base_erode_mask_modifier, + base_blur_mask_modifier=base_blur_mask_modifier, + **in_options) + + @staticmethod + def EncFlow(ngf=64, num_downs=4, lowest_dense=512): + exec (nnlib.import_all(), locals(), globals()) + + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def func(input): + x = input + + result = [] + for i in range(num_downs): + x = LeakyReLU(0.1)(XNormalization(Conv2D( min(ngf* (2**i), ngf*8) , 5, 2, 'same')(x))) + + if i == num_downs-1: + x_shape = K.int_shape(x)[1:] + x = Reshape(x_shape)(Dense( np.prod(x_shape) )(Dense(lowest_dense)(Flatten()(x)))) + result += [x] + + return result + return func + + @staticmethod + def DecFlow(output_nc, ngf=64, activation='tanh'): + exec (nnlib.import_all(), locals(), globals()) + + use_bias = True + def XNormalization(x): + return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) + + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + + def func(input): + input_len = len(input) + + x = input[input_len-1] + for i in range(input_len-1, -1, -1): + x = SubpixelUpscaler()( LeakyReLU(0.1)(XNormalization(Conv2D( min(ngf* (2**i) *4, ngf*8 *4 ), 3, 1, 'same')(x))) ) + if i != 0: + x = Concatenate(axis=3)([ input[i-1] , x]) + + return Conv2D(output_nc, 3, 1, 'same', activation=activation)(x) + return func + +Model = UFMModel \ No newline at end of file diff --git a/models/Model_AVATAR/__init__.py b/models/Model_UFM/__init__.py similarity index 100% rename from models/Model_AVATAR/__init__.py rename to models/Model_UFM/__init__.py diff --git a/nnlib/devicelib.py b/nnlib/devicelib.py index 37a9261..5f1bd00 100644 --- a/nnlib/devicelib.py +++ b/nnlib/devicelib.py @@ -9,7 +9,7 @@ class devicelib: gpu_idxs = [] gpu_total_vram_gb = 0 allow_growth = True - float16 = False + use_fp16 = False cpu_only = False def __init__ (self, force_best_gpu_idx = -1, @@ -17,11 +17,11 @@ class devicelib: force_gpu_idxs = None, choose_worst_gpu = False, allow_growth = True, - float16 = False, + use_fp16 = False, cpu_only = False, **in_options): - self.float16 = float16 + self.use_fp16 = use_fp16 if cpu_only or not devicelib.hasNVML(): self.cpu_only = True else: diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py index 90a9038..f730b2a 100644 --- a/nnlib/nnlib.py +++ b/nnlib/nnlib.py @@ -1,6 +1,7 @@ import os import sys import contextlib +import numpy as np from utils import std_utils from .devicelib import devicelib @@ -26,16 +27,20 @@ class nnlib(object): tf_dssim = None tf_ssim = None tf_resize_like = None + tf_image_histogram = None tf_rgb_to_lab = None tf_lab_to_rgb = None - tf_image_histogram = None + tf_adain = None + tf_gaussian_blur = None + tf_style_loss = None modelify = None ReflectionPadding2D = None DSSIMLoss = None DSSIMMaskLoss = None PixelShuffler = None - + SubpixelUpscaler = None + ResNet = None UNet = None UNetTemporalPredictor = None @@ -53,6 +58,9 @@ tf_resize_like = nnlib.tf_resize_like tf_image_histogram = nnlib.tf_image_histogram tf_rgb_to_lab = nnlib.tf_rgb_to_lab tf_lab_to_rgb = nnlib.tf_lab_to_rgb +tf_adain = nnlib.tf_adain +tf_gaussian_blur = nnlib.tf_gaussian_blur +tf_style_loss = nnlib.tf_style_loss """ code_import_keras_string = \ """ @@ -62,12 +70,12 @@ K = keras.backend Input = keras.layers.Input Dense = keras.layers.Dense -Conv2D = keras.layers.convolutional.Conv2D -Conv2DTranspose = keras.layers.convolutional.Conv2DTranspose +Conv2D = keras.layers.Conv2D +Conv2DTranspose = keras.layers.Conv2DTranspose MaxPooling2D = keras.layers.MaxPooling2D BatchNormalization = keras.layers.BatchNormalization -LeakyReLU = keras.layers.advanced_activations.LeakyReLU +LeakyReLU = keras.layers.LeakyReLU ReLU = keras.layers.ReLU tanh = keras.layers.Activation('tanh') sigmoid = keras.layers.Activation('sigmoid') @@ -91,6 +99,7 @@ ReflectionPadding2D = nnlib.ReflectionPadding2D DSSIMLoss = nnlib.DSSIMLoss DSSIMMaskLoss = nnlib.DSSIMMaskLoss PixelShuffler = nnlib.PixelShuffler +SubpixelUpscaler = nnlib.SubpixelUpscaler """ code_import_keras_contrib_string = \ """ @@ -282,19 +291,93 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator return func nnlib.tf_image_histogram = tf_image_histogram + def tf_adain(epsilon=1e-5): + def func(content, style): + axes = [1,2] + c_mean, c_var = tf.nn.moments(content, axes=axes, keep_dims=True) + s_mean, s_var = tf.nn.moments(style, axes=axes, keep_dims=True) + c_std, s_std = tf.sqrt(c_var + epsilon), tf.sqrt(s_var + epsilon) + return s_std * (content - c_mean) / c_std + s_mean + return func + nnlib.tf_adain = tf_adain + + def tf_gaussian_blur(radius=2.0): + def gaussian_kernel(size,mean,std): + d = tf.distributions.Normal( float(mean), float(std) ) + + vals = d.prob(tf.range(start = -int(size), limit = int(size) + 1, dtype = tf.float32)) + + gauss_kernel = tf.einsum('i,j->ij', + vals, + vals) + + return gauss_kernel / tf.reduce_sum(gauss_kernel) + + gauss_kernel = gaussian_kernel(radius, 1.0, radius ) + gauss_kernel = gauss_kernel[:, :, tf.newaxis, tf.newaxis] + + def func(input): + return tf.nn.conv2d(input, gauss_kernel, strides=[1, 1, 1, 1], padding="SAME") + return func + nnlib.tf_gaussian_blur = tf_gaussian_blur + + def tf_style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, batch_normalize=False, epsilon=1e-5): + def sl(content, style): + axes = [1,2] + c_mean, c_var = tf.nn.moments(content, axes=axes, keep_dims=True) + s_mean, s_var = tf.nn.moments(style, axes=axes, keep_dims=True) + c_std, s_std = tf.sqrt(c_var + epsilon), tf.sqrt(s_var + epsilon) + + mean_loss = tf.reduce_sum(tf.squared_difference(c_mean, s_mean)) + std_loss = tf.reduce_sum(tf.squared_difference(c_std, s_std)) + + if batch_normalize: + #normalize w.r.t batch size + n = tf.cast(tf.shape(content)[0], dtype=tf.float32) + mean_loss /= n + std_loss /= n + + return (mean_loss + std_loss) * loss_weight + + def func(target, style): + target_nc = target.get_shape().as_list()[-1] + style_nc = style.get_shape().as_list()[-1] + if target_nc != style_nc: + raise Exception("target_nc != style_nc") + + targets = tf.split(target, target_nc, -1) + styles = tf.split(style, style_nc, -1) + + style_loss = [] + for i in range(len(targets)): + if gaussian_blur_radius > 0.0: + style_loss += [ sl( tf_gaussian_blur(gaussian_blur_radius)(targets[i]), + tf_gaussian_blur(gaussian_blur_radius)(styles[i])) ] + else: + style_loss += [ sl( targets[i], + styles[i]) ] + return np.sum ( style_loss ) + return func + + nnlib.tf_style_loss = tf_style_loss + @staticmethod def import_keras(device_config = None): if nnlib.keras is not None: return nnlib.code_import_keras nnlib.import_tf(device_config) - + device_config = nnlib.prefer_DeviceConfig if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': suppressor = std_utils.suppress_stdout_stderr().__enter__() import keras as keras_ nnlib.keras = keras_ - nnlib.keras.backend.tensorflow_backend.set_session(nnlib.tf_sess) + + if device_config.use_fp16: + nnlib.keras.backend.set_floatx('float16') + + nnlib.keras.backend.set_session(nnlib.tf_sess) if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': suppressor.__exit__() @@ -307,6 +390,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator def __initialize_keras_functions(): tf = nnlib.tf keras = nnlib.keras + K = keras.backend def modelify(model_functor): def func(tensor): @@ -365,10 +449,12 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator for mask in self.mask_list: if not self.is_tanh: - loss = (1.0 - tf.image.ssim (y_true*mask, y_pred*mask, 1.0)) / 2.0 + loss = (1.0 - (tf.image.ssim (y_true*mask, y_pred*mask, 1.0))) / 2.0 else: loss = (1.0 - tf.image.ssim ( (y_true/2+0.5)*(mask/2+0.5), (y_pred/2+0.5)*(mask/2+0.5), 1.0)) / 2.0 + loss = K.cast (loss, K.floatx()) + if total_loss is None: total_loss = loss else: @@ -376,7 +462,7 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator return total_loss nnlib.DSSIMMaskLoss = DSSIMMaskLoss - + class PixelShuffler(keras.layers.Layer): def __init__(self, size=(2, 2), data_format=None, **kwargs): super(PixelShuffler, self).__init__(**kwargs) @@ -391,33 +477,12 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator '; Received input shape:', str(input_shape)) if self.data_format == 'channels_first': - batch_size, c, h, w = input_shape - if batch_size is None: - batch_size = -1 - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = keras.backend.reshape(inputs, (batch_size, rh, rw, oc, h, w)) - out = keras.backend.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) - out = keras.backend.reshape(out, (batch_size, oc, oh, ow)) - return out + return tf.depth_to_space(inputs, self.size[0], 'NCHW') elif self.data_format == 'channels_last': - batch_size, h, w, c = input_shape - if batch_size is None: - batch_size = -1 - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = keras.backend.reshape(inputs, (batch_size, h, w, rh, rw, oc)) - out = keras.backend.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) - out = keras.backend.reshape(out, (batch_size, oh, ow, oc)) - return out + return tf.depth_to_space(inputs, self.size[0], 'NHWC') def compute_output_shape(self, input_shape): - if len(input_shape) != 4: raise ValueError('Inputs should have rank ' + str(4) + @@ -455,8 +520,10 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator base_config = super(PixelShuffler, self).get_config() return dict(list(base_config.items()) + list(config.items())) + nnlib.PixelShuffler = PixelShuffler - + nnlib.SubpixelUpscaler = PixelShuffler + @staticmethod def import_keras_contrib(device_config = None): if nnlib.keras_contrib is not None: @@ -512,10 +579,10 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator def XNormalization(x): return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.convolutional.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) def func(input): @@ -580,10 +647,10 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator def XNormalization(x): return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.convolutional.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) def UNetSkipConnection(outer_nc, inner_nc, sub_model=None, outermost=False, innermost=False, use_dropout=False): @@ -658,8 +725,8 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator def XNormalization(x): return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=True, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.convolutional.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) + def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): + return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) def func(input): x = input diff --git a/samples/SampleProcessor.py b/samples/SampleProcessor.py index f6ab534..89f052f 100644 --- a/samples/SampleProcessor.py +++ b/samples/SampleProcessor.py @@ -95,7 +95,7 @@ class SampleProcessor(object): mask[mask > 0.0] = 1.0 img = np.concatenate( (img, mask ), -1 ) - images[img_type][face_mask_type] = image_utils.warp_by_params (params, img, (img_type==1 or img_type==2), (img_type==2 or img_type==3), img_type != 0) + images[img_type][face_mask_type] = image_utils.warp_by_params (params, img, (img_type==1 or img_type==2), (img_type==2 or img_type==3), img_type != 0, face_mask_type == 0) img = images[img_type][face_mask_type] diff --git a/utils/image_utils.py b/utils/image_utils.py index 7be6f7d..ae5deed 100644 --- a/utils/image_utils.py +++ b/utils/image_utils.py @@ -255,11 +255,11 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0 return params -def warp_by_params (params, img, warp, transform, flip): +def warp_by_params (params, img, warp, transform, flip, is_border_replicate): if warp: img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_LANCZOS4 ) if transform: - img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_LANCZOS4 ) + img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_LANCZOS4 ) if flip and params['flip']: img = img[:,::-1,:] return img