optimized face sample generator, CPU load is significantly reduced

SAEHD: added new option GAN power 0.0 .. 10.0 Train the network in Generative Adversarial manner. Forces the neural network to learn small details of the face. You can enable/disable this option at any time, but better to enable it when the network is trained enough. Typical value is 1.0 GAN power with pretrain mode will not work. Example of enabling GAN on 81k iters +5k iters https://i.imgur.com/OdXHLhU.jpg https://i.imgur.com/CYAJmJx.jpg dfhd: default Decoder dimensions are now 48 the preview for 256 res is now correctly displayed fixed model naming/renaming/removing Improvements for those involved in post-processing in AfterEffects: Codec is reverted back to x264 in order to properly use in AfterEffects and video players. Merger now always outputs the mask to workspace\data_dst\merged_mask removed raw modes except raw-rgb raw-rgb mode now outputs selected face mask_mode (before square mask) 'export alpha mask' button is replaced by 'show alpha mask'. You can view the alpha mask without recompute the frames. 8) 'merged *.bat' now also output 'result_mask.' video file. 8) 'merged lossless' now uses x264 lossless codec (before PNG codec) result_mask video file is always lossless. Thus you can use result_mask video file as mask layer in the AfterEffects.
2025-07-06 13:02:15 -07:00 · 2020-01-28 12:24:45 +04:00 · 2020-01-28 12:24:45 +04:00 · 7386a9d6fd
commit 7386a9d6fd
parent 80f285067a
28 changed files with 455 additions and 363 deletions
--- a/core/cv2ex.py
+++ b/core/cv2ex.py
@ -4,8 +4,10 @@ from pathlib import Path
 from core.interact import interact as io
 import traceback

-#allows to open non-english characters path
 def cv2_imread(filename, flags=cv2.IMREAD_UNCHANGED, loader_func=None):
+    """
+    allows to open non-english characters path
+    """
    try:
        if loader_func is not None:
            bytes = bytearray(loader_func(filename))
--- a/core/imagelib/init.py
+++ b/core/imagelib/init.py
@ -11,7 +11,7 @@ from .warp import gen_warp_params, warp_by_params

 from .reduce_colors import reduce_colors

-from .color_transfer import color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone
+from .color_transfer import color_transfer, color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone

 from .common import normalize_channels, cut_odd_image, overlay_alpha_image

--- a/core/imagelib/color_transfer.py
+++ b/core/imagelib/color_transfer.py
@ -299,7 +299,7 @@ def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5):
    matched_img += mu_s
    matched_img[matched_img>1] = 1
    matched_img[matched_img<0] = 0
-    return matched_img.astype(source_img.dtype)
+    return np.clip(matched_img.astype(source_img.dtype), 0, 1)

 def lab_image_stats(image):
    # compute the mean and standard deviation of each channel
@ -391,3 +391,24 @@ def color_transfer_mix(img_src,img_trg):


    return (img_rct / 255.0).astype(np.float32)
+
+def color_transfer(ct_mode, img_src, img_trg):
+    """
+    color transfer for [0,1] float inputs
+    """
+    if ct_mode == 'lct':
+        out = linear_color_transfer (img_src, img_trg)
+    elif ct_mode == 'rct':
+        out = reinhard_color_transfer ( np.clip( img_src*255, 0, 255 ).astype(np.uint8),
+                                        np.clip( img_trg*255, 0, 255 ).astype(np.uint8) )
+        out = np.clip( out.astype(np.float32) / 255.0, 0.0, 1.0)
+    elif ct_mode == 'mkl':
+        out = color_transfer_mkl (img_src, img_trg)
+    elif ct_mode == 'idt':
+        out = color_transfer_idt (img_src, img_trg)
+    elif ct_mode == 'sot':
+        out = color_transfer_sot (img_src, img_trg)
+        out = np.clip( out, 0.0, 1.0)
+    else:
+        raise ValueError(f"unknown ct_mode {ct_mode}")
+    return out
--- a/core/imagelib/warp.py
+++ b/core/imagelib/warp.py
@ -47,11 +47,13 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0

    return params

-def warp_by_params (params, img, warp, transform, flip, is_border_replicate):
-    if warp:
+def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate):
+    if can_warp:
        img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC )
-    if transform:
-        img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
-    if flip and params['flip']:
+    if can_transform:
+        img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
+    if len(img.shape) == 2:
+        img = img[...,None]
+    if can_flip and params['flip']:
        img = img[:,::-1,...]
    return img
--- a/core/leras/layers.py
+++ b/core/leras/layers.py
@ -78,26 +78,7 @@ def initialize_layers(nn):
            return True

        def init_weights(self):
-            ops = []
-
-            ca_tuples_w = []
-            ca_tuples = []
-            for w in self.get_weights():
-                initializer = w.initializer
-                for input in initializer.inputs:
-                    if "_cai_" in input.name:
-                        ca_tuples_w.append (w)
-                        ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) )
-                        break
-                else:
-                    ops.append (initializer)
-
-            if len(ops) != 0:
-                nn.tf_sess.run (ops)
-
-            if len(ca_tuples) != 0:
-                nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] )
-
+            nn.tf_init_weights(self.get_weights())
    nn.Saveable = Saveable

    class LayerBase():
@ -302,6 +283,7 @@ def initialize_layers(nn):
                raise ValueError ("strides must be an int type")
            if not isinstance(dilations, int):
                raise ValueError ("dilations must be an int type")
+            kernel_size = int(kernel_size)
            
            if dtype is None:
                dtype = nn.tf_floatx
@ -405,6 +387,7 @@ def initialize_layers(nn):
        def __init__(self, in_ch, out_ch, kernel_size, strides=2, padding='SAME', use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ):
            if not isinstance(strides, int):
                raise ValueError ("strides must be an int type")
+            kernel_size = int(kernel_size)
            
            if dtype is None:
                dtype = nn.tf_floatx
--- a/core/leras/models.py
+++ b/core/leras/models.py
@ -0,0 +1,41 @@
+def initialize_models(nn):
+    tf = nn.tf
+  
+
+        
+    class PatchDiscriminator(nn.ModelBase):
+        def on_build(self, patch_size, in_ch, base_ch=256, kernel_initializer=None):
+            prev_ch = in_ch
+            self.convs = []            
+            for i, (kernel_size, strides) in enumerate(patch_discriminator_kernels[patch_size]):
+                cur_ch = base_ch * min( (2**i), 8 )
+                self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=kernel_initializer) )
+                prev_ch = cur_ch
+
+            self.out_conv =  nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=kernel_initializer)
+
+        def forward(self, x):
+            for conv in self.convs:
+                x = tf.nn.leaky_relu( conv(x), 0.1 )
+            return self.out_conv(x)
+            
+    nn.PatchDiscriminator = PatchDiscriminator
+    
+    
+patch_discriminator_kernels = \
+    { 1 : [ [1,1] ],             
+      2 : [ [2,1] ],
+      3 : [ [2,1], [2,1] ],        
+      4 : [ [2,2], [2,2] ],        
+      5 : [ [3,2], [2,2] ],        
+      6 : [ [4,2], [2,2] ],
+      7 : [ [3,2], [3,2] ],        
+      8 : [ [4,2], [3,2] ],        
+      9 : [ [3,2], [4,2] ], 
+      10 : [ [4,2], [4,2] ], 
+      11 : [ [3,2], [3,2], [2,1] ], 
+      12 : [ [4,2], [3,2], [2,1] ], 
+      13 : [ [3,2], [4,2], [2,1] ], 
+      14 : [ [4,2], [4,2], [2,1] ], 
+      15 : [ [3,2], [3,2], [3,1] ], 
+      16 : [ [4,2], [3,2], [3,1] ] }
--- a/core/leras/nn.py
+++ b/core/leras/nn.py
@ -46,6 +46,7 @@ class nn():
    # Tensor ops
    tf_get_value = None
    tf_batch_set_value = None
+    tf_init_weights = None
    tf_gradients = None
    tf_average_gv_list = None
    tf_average_tensor_list = None
@ -79,6 +80,9 @@ class nn():
    TFBaseOptimizer = None
    TFRMSpropOptimizer = None
    
+    # Models
+    PatchDiscriminator = None
+
    @staticmethod
    def initialize(device_config=None, floatx="float32", data_format="NHWC"):

@ -138,11 +142,13 @@ class nn():
            from .layers import initialize_layers
            from .initializers import initialize_initializers
            from .optimizers import initialize_optimizers
+            from .models import initialize_models

            initialize_tensor_ops(nn)
            initialize_layers(nn)
            initialize_initializers(nn)
            initialize_optimizers(nn)
+            initialize_models(nn)

        if nn.tf_sess is None:
            nn.tf_sess = tf.Session(config=nn.tf_sess_config)
--- a/core/leras/tensor_ops.py
+++ b/core/leras/tensor_ops.py
@ -29,6 +29,27 @@ def initialize_tensor_ops(nn):
                nn.tf_sess.run(assign_ops, feed_dict=feed_dict)
    nn.tf_batch_set_value = tf_batch_set_value

+    def tf_init_weights(weights):
+        ops = []
+
+        ca_tuples_w = []
+        ca_tuples = []
+        for w in weights:
+            initializer = w.initializer
+            for input in initializer.inputs:
+                if "_cai_" in input.name:
+                    ca_tuples_w.append (w)
+                    ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) )
+                    break
+            else:
+                ops.append (initializer)
+
+        if len(ops) != 0:
+            nn.tf_sess.run (ops)
+
+        if len(ca_tuples) != 0:
+            nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] )
+    nn.tf_init_weights = tf_init_weights
    
    def tf_gradients ( loss, vars ):
        grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True )
--- a/main.py
+++ b/main.py
@ -201,23 +201,23 @@ if __name__ == "__main__":

    def process_merge(arguments):
        osex.set_process_lowest_prio()
-        kwargs = {'model_class_name'       : arguments.model_name,
-                  'saved_models_path'      : Path(arguments.model_dir),
-                  'training_data_src_path' : Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None,
-                  'force_model_name'       : arguments.force_model_name,
-                  'input_path'   : Path(arguments.input_dir),
-                  'output_path'  : Path(arguments.output_dir),
-                  'aligned_path'  : Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None,
-                  'cpu_only'       : arguments.cpu_only,
-                  'force_gpu_idxs' : arguments.force_gpu_idxs,
-                }
        from mainscripts import Merger
-        Merger.main (**kwargs)
+        Merger.main ( model_class_name       = arguments.model_name,
+                      saved_models_path      = Path(arguments.model_dir),
+                      training_data_src_path = Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None,
+                      force_model_name       = arguments.force_model_name,
+                      input_path             = Path(arguments.input_dir),
+                      output_path            = Path(arguments.output_dir),
+                      output_mask_path       = Path(arguments.output_mask_dir),
+                      aligned_path           = Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None,
+                      force_gpu_idxs         = arguments.force_gpu_idxs,
+                      cpu_only               = arguments.cpu_only)

    p = subparsers.add_parser( "merge", help="Merger")
    p.add_argument('--training-data-src-dir', action=fixPathAction, dest="training_data_src_dir", default=None, help="(optional, may be required by some models) Dir of extracted SRC faceset.")
    p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
    p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the merged files will be stored.")
+    p.add_argument('--output-mask-dir', required=True, action=fixPathAction, dest="output_mask_dir", help="Output mask directory. This is where the mask files will be stored.")
    p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", default=None, help="Aligned directory. This is where the extracted of dst faces stored.")
    p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.")
    p.add_argument('--model', required=True, dest="model_name", choices=pathex.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Model class name.")
@ -268,13 +268,14 @@ if __name__ == "__main__":
    def process_videoed_video_from_sequence(arguments):
        osex.set_process_lowest_prio()
        from mainscripts import VideoEd
-        VideoEd.video_from_sequence (arguments.input_dir,
-                                     arguments.output_file,
-                                     arguments.reference_file,
-                                     arguments.ext,
-                                     arguments.fps,
-                                     arguments.bitrate,
-                                     arguments.lossless)
+        VideoEd.video_from_sequence (input_dir      = arguments.input_dir,
+                                     output_file    = arguments.output_file,
+                                     reference_file = arguments.reference_file,
+                                     ext      = arguments.ext,
+                                     fps      = arguments.fps,
+                                     bitrate  = arguments.bitrate,
+                                     include_audio = arguments.include_audio,
+                                     lossless = arguments.lossless)

    p = videoed_parser.add_parser( "video-from-sequence", help="Make video from image sequence.")
    p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.")
@ -283,7 +284,9 @@ if __name__ == "__main__":
    p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.")
    p.add_argument('--fps', type=int, dest="fps", default=None, help="FPS of output file. Overwritten by reference-file.")
    p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.")
+    p.add_argument('--include-audio', action="store_true", dest="include_audio", default=False, help="Include audio from reference file.")
    p.add_argument('--lossless', action="store_true", dest="lossless", default=False, help="PNG codec.")
+
    p.set_defaults(func=process_videoed_video_from_sequence)

    def process_labelingtool_edit_mask(arguments):
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@ -297,7 +297,18 @@ class ExtractSubprocessor(Subprocessor):
            if not cpu_only:
                if type == 'landmarks-manual':
                    devices = [devices.get_best_device()]
-                result = [ (device.index, 'GPU', device.name, device.total_mem_gb) for device in devices ]
+                
+                result = []
+                
+                for device in devices:
+                    count = 1
+                    
+                    if count == 1:
+                        result += [ (device.index, 'GPU', device.name, device.total_mem_gb) ]
+                    else:
+                        for i in range(count):                            
+                            result += [ (device.index, 'GPU', f"{device.name} #{i}", device.total_mem_gb) ]
+
                return result
            else:
                if type == 'landmarks-manual':
--- a/mainscripts/Merger.py
+++ b/mainscripts/Merger.py
@ -39,6 +39,7 @@ class MergeSubprocessor(Subprocessor):
            self.frame_info = frame_info
            self.next_temporal_frame_infos = next_temporal_frame_infos
            self.output_filepath = None
+            self.output_mask_filepath = None

            self.idx = None
            self.cfg = None
@ -54,6 +55,7 @@ class MergeSubprocessor(Subprocessor):
                           frame_info=None,
                           next_temporal_frame_infos=None,
                           output_filepath=None,
+                           output_mask_filepath=None,
                           need_return_image = False):
            self.idx = idx
            self.cfg = cfg
@ -61,6 +63,7 @@ class MergeSubprocessor(Subprocessor):
            self.frame_info = frame_info
            self.next_temporal_frame_infos = next_temporal_frame_infos
            self.output_filepath = output_filepath
+            self.output_mask_filepath = output_mask_filepath

            self.need_return_image = need_return_image
            if self.need_return_image:
@ -123,35 +126,22 @@ class MergeSubprocessor(Subprocessor):
            cfg.superres_func = self.superres_func

            frame_info = pf.frame_info
-
            filepath = frame_info.filepath
-            landmarks_list = frame_info.landmarks_list

-            output_filepath = pf.output_filepath
-            need_return_image = pf.need_return_image
+            if len(frame_info.landmarks_list) == 0:
+                self.log_info (f'no faces found for {filepath.name}, copying without faces')

-            if len(landmarks_list) == 0:
-                self.log_info ( 'no faces found for %s, copying without faces' % (filepath.name) )
-
-                if cfg.export_mask_alpha:
                img_bgr = cv2_imread(filepath)
+                imagelib.normalize_channels(img_bgr, 3)
+                cv2_imwrite (pf.output_filepath, img_bgr)
                h,w,c = img_bgr.shape
-                    if c == 1:
-                        img_bgr = np.repeat(img_bgr, 3, -1)
-                    if c == 3:
-                        img_bgr = np.concatenate ([img_bgr,  np.zeros((h,w,1), dtype=img_bgr.dtype) ], axis=-1)

-                    cv2_imwrite (output_filepath, img_bgr)
-                else:
-                    if filepath.suffix == '.png':
-                        shutil.copy ( str(filepath), str(output_filepath) )
-                    else:
-                        img_bgr = cv2_imread(filepath)
-                        cv2_imwrite (output_filepath, img_bgr)
+                img_mask = np.zeros( (h,w,1), dtype=img_bgr.dtype)
+                cv2_imwrite (pf.output_mask_filepath, img_mask)
+
+                if pf.need_return_image:
+                    pf.image = np.concatenate ([img_bgr, img_mask], axis=-1)

-                if need_return_image:
-                    img_bgr = cv2_imread(filepath)
-                    pf.image = img_bgr
            else:
                if cfg.type == MergerConfig.TYPE_MASKED:
                    cfg.fanseg_input_size = self.fanseg_input_size
@ -172,10 +162,10 @@ class MergeSubprocessor(Subprocessor):
                                                        pf.frame_info,
                                                        pf.next_temporal_frame_infos )

-                if output_filepath is not None and final_img is not None:
-                    cv2_imwrite (output_filepath, final_img )
+                cv2_imwrite (pf.output_filepath,      final_img[...,0:3] )
+                cv2_imwrite (pf.output_mask_filepath, final_img[...,3:4] )

-                if need_return_image:
+                if pf.need_return_image:
                    pf.image = final_img

            return pf
@ -186,7 +176,7 @@ class MergeSubprocessor(Subprocessor):
            return pf.frame_info.filepath

    #override
-    def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, frames_root_path, output_path, model_iter):
+    def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, frames_root_path, output_path, output_mask_path, model_iter):
        if len (frames) == 0:
            raise ValueError ("len (frames) == 0")

@ -226,6 +216,7 @@ class MergeSubprocessor(Subprocessor):

        self.frames_root_path = frames_root_path
        self.output_path = output_path
+        self.output_mask_path = output_mask_path
        self.model_iter = model_iter

        self.prefetch_frame_count = self.process_count = min(6,multiprocessing.cpu_count())
@ -305,12 +296,17 @@ class MergeSubprocessor(Subprocessor):
            for filename in pathex.get_image_paths(self.output_path): #remove all images in output_path
                Path(filename).unlink()

+            for filename in pathex.get_image_paths(self.output_mask_path): #remove all images in output_mask_path
+                Path(filename).unlink()
+
+
            frames[0].cfg = self.merger_config.copy()

        for i in range( len(self.frames) ):
            frame = self.frames[i]
            frame.idx = i
            frame.output_filepath      = self.output_path      / ( frame.frame_info.filepath.stem + '.png' )
+            frame.output_mask_filepath = self.output_mask_path / ( frame.frame_info.filepath.stem + '.png' )

    #override
    def process_info_generator(self):
@ -353,9 +349,6 @@ class MergeSubprocessor(Subprocessor):
                    '3' : lambda cfg,shift_pressed: cfg.set_mode(3),
                    '4' : lambda cfg,shift_pressed: cfg.set_mode(4),
                    '5' : lambda cfg,shift_pressed: cfg.set_mode(5),
-                    '6' : lambda cfg,shift_pressed: cfg.set_mode(6),
-                    '7' : lambda cfg,shift_pressed: cfg.set_mode(7),
-                    '8' : lambda cfg,shift_pressed: cfg.set_mode(8),
                    'q' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(1 if not shift_pressed else 5),
                    'a' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(-1 if not shift_pressed else -5),
                    'w' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(1 if not shift_pressed else 5),
@ -379,7 +372,6 @@ class MergeSubprocessor(Subprocessor):
                    'x' : lambda cfg,shift_pressed: cfg.toggle_mask_mode(),
                    'c' : lambda cfg,shift_pressed: cfg.toggle_color_transfer_mode(),
                    'v' : lambda cfg,shift_pressed: cfg.toggle_super_resolution_mode(),
-                    'b' : lambda cfg,shift_pressed: cfg.toggle_export_mask_alpha(),
                    'n' : lambda cfg,shift_pressed: cfg.toggle_sharpen_mode(),
                    }
            self.masked_keys = list(self.masked_keys_funcs.keys())
@ -393,6 +385,7 @@ class MergeSubprocessor(Subprocessor):

            for frame in self.frames:
                frame.output_filepath = None
+                frame.output_mask_filepath = None
                frame.image = None

            session_data = {
@ -435,12 +428,19 @@ class MergeSubprocessor(Subprocessor):
                            io.log_info (cur_frame.cfg.to_string( cur_frame.frame_info.filepath.name) )

                            if cur_frame.image is None:
-                                cur_frame.image = cv2_imread ( cur_frame.output_filepath)
-                                if cur_frame.image is None:
+                                image      = cv2_imread (cur_frame.output_filepath)
+                                image_mask = cv2_imread (cur_frame.output_mask_filepath)
+                                if image is None or image_mask is None:
                                    # unable to read? recompute then
                                    cur_frame.is_done = False
                                    cur_frame.is_shown = False
+                                else:
+                                    image_mask = imagelib.normalize_channels(image_mask, 1)
+                                    cur_frame.image = np.concatenate([image, image_mask], -1)
+
+                            if cur_frame.is_done:
                                self.main_screen.set_image(cur_frame.image)
+
                        else:
                            self.main_screen.set_waiting_icon(True)

@ -510,6 +510,8 @@ class MergeSubprocessor(Subprocessor):
                            self.screen_manager.get_current().diff_scale(-0.1)
                        elif chr_key == '=':
                            self.screen_manager.get_current().diff_scale(0.1)
+                        elif chr_key == 'b':
+                            self.screen_manager.get_current().toggle_show_checker_board()

        if go_prev_frame:
            if cur_frame is None or cur_frame.is_done:
@ -607,6 +609,7 @@ class MergeSubprocessor(Subprocessor):
                                                           frame_info=frame.frame_info,
                                                           next_temporal_frame_infos=frame.next_temporal_frame_infos,
                                                           output_filepath=frame.output_filepath,
+                                                           output_mask_filepath=frame.output_mask_filepath,
                                                           need_return_image=True )

        return None
@ -621,6 +624,7 @@ def main (model_class_name=None,
          force_model_name=None,
          input_path=None,
          output_path=None,
+          output_mask_path=None,
          aligned_path=None,
          force_gpu_idxs=None,
          cpu_only=None):
@ -634,6 +638,9 @@ def main (model_class_name=None,
        if not output_path.exists():
            output_path.mkdir(parents=True, exist_ok=True)

+        if not output_mask_path.exists():
+            output_mask_path.mkdir(parents=True, exist_ok=True)
+
        if not saved_models_path.exists():
            io.log_err('Model directory not found. Please ensure it exists.')
            return
@ -783,6 +790,7 @@ def main (model_class_name=None,
                        frames                 = frames,
                        frames_root_path       = input_path,
                        output_path            = output_path,
+                        output_mask_path       = output_mask_path,
                        model_iter             = model.get_iter()
                    ).run()

--- a/mainscripts/MergerScreen/MergerScreen.py
+++ b/mainscripts/MergerScreen/MergerScreen.py
@ -30,6 +30,7 @@ class Screen(object):
        self.scale = 1
        self.force_update = True
        self.is_first_appear = True
+        self.show_checker_board = False

        self.last_screen_shape = (480,640,3)
        self.checkerboard_image = None
@ -39,6 +40,10 @@ class Screen(object):
    def set_waiting_icon(self, b):
        self.waiting_icon = b

+    def toggle_show_checker_board(self):
+        self.show_checker_board = not self.show_checker_board
+        self.force_update = True
+
    def set_image(self, img):
        if not img is self.image:
            self.force_update = True
@ -85,6 +90,9 @@ class Screen(object):
                screen = cv2.resize ( screen, ( int(w*self.scale), int(h*self.scale) ) )

            if c == 4:
+                if not self.show_checker_board:
+                    screen = screen[...,0:3]
+                else:
                    if self.checkerboard_image is None or self.checkerboard_image.shape[0:2] != screen.shape[0:2]:
                        self.checkerboard_image = ScreenAssets.build_checkerboard_a(screen.shape)

--- a/mainscripts/VideoEd.py
+++ b/mainscripts/VideoEd.py
@ -68,7 +68,7 @@ def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, b
    if bitrate is None:
        bitrate = max (1, io.input_int ("Bitrate of output file in MB/s", 25) )

-    kwargs = {"c:v": "libx265",
+    kwargs = {"c:v": "libx264",
              "b:v": "%dM" %(bitrate),
              "pix_fmt": "yuv420p",
             }
@ -113,7 +113,7 @@ def denoise_image_sequence( input_dir, ext=None, factor=None ):
    except:
        io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) )

-def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, lossless=None ):
+def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, include_audio=False, lossless=None ):
    input_path = Path(input_dir)
    output_file_path = Path(output_file)
    reference_file_path = Path(reference_file) if reference_file is not None else None
@ -177,7 +177,7 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None,

    output_args = [i_in]

-    if ref_in_a is not None:
+    if include_audio and ref_in_a is not None:
        output_args += [ref_in_a]

    output_args += [str (output_file_path)]
@ -185,14 +185,17 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None,
    output_kwargs = {}

    if lossless:
-        output_kwargs.update ({"c:v": "png"
+        output_kwargs.update ({"c:v": "libx264",
+                               "crf": "0",
+                               "pix_fmt": "yuv420p",
                              })
    else:
-        output_kwargs.update ({"c:v": "libx265",
+        output_kwargs.update ({"c:v": "libx264",
                               "b:v": "%dM" %(bitrate),
                               "pix_fmt": "yuv420p",
                              })
                              
+    if include_audio and ref_in_a is not None:
        output_kwargs.update ({"c:a": "aac",
                               "b:a": "192k",
                               "ar" : "48000"
--- a/mainscripts/gfx/help_merger_masked.jpg
+++ b/mainscripts/gfx/help_merger_masked.jpg
--- a/mainscripts/gfx/help_merger_masked_source.psd
+++ b/mainscripts/gfx/help_merger_masked_source.psd
--- a/merger/MergeMasked.py
+++ b/merger/MergeMasked.py
@ -13,8 +13,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
    img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks)

    if cfg.mode == 'original':
-        if cfg.export_mask_alpha:
-            img_bgr = np.concatenate ( [img_bgr, img_face_mask_a], -1 )
        return img_bgr, img_face_mask_a

    out_img = img_bgr.copy()
@ -106,28 +104,9 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
    img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 #get rid of noise

    if 'raw' in cfg.mode:
-        face_corner_pts = np.array ([ [0,0], [output_size-1,0], [output_size-1,output_size-1],  [0,output_size-1] ], dtype=np.float32)
-        square_mask = np.zeros(img_bgr.shape, dtype=np.float32)
-        cv2.fillConvexPoly(square_mask, \
-                           LandmarksProcessor.transform_points (face_corner_pts, face_output_mat, invert=True ).astype(np.int), \
-                           (1,1,1) )
-
        if cfg.mode == 'raw-rgb':
-            out_merging_mask = square_mask
-
-        if cfg.mode == 'raw-rgb' or cfg.mode == 'raw-rgb-mask':
            out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT )
-
-        if cfg.mode == 'raw-rgb-mask':
-            out_img = np.concatenate ( [out_img, np.expand_dims (img_face_mask_aaa[:,:,0],-1)], -1 )
-            out_merging_mask = square_mask
-
-        elif cfg.mode == 'raw-mask-only':
-            out_img = img_face_mask_aaa
            out_merging_mask = img_face_mask_aaa
-        elif cfg.mode == 'raw-predicted-only':
-            out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT )
-            out_merging_mask = square_mask
            
        out_img = np.clip (out_img, 0.0, 1.0 )
    else:
@ -176,14 +155,12 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img

            if 'seamless' not in cfg.mode and cfg.color_transfer_mode != 0:
                if cfg.color_transfer_mode == 1: #rct
-                    prd_face_bgr = imagelib.reinhard_color_transfer ( (prd_face_bgr*255).astype(np.uint8),
-                                                                      (dst_face_bgr*255).astype(np.uint8),
+                    prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( prd_face_bgr*255, 0, 255).astype(np.uint8),
+                                                                      np.clip( dst_face_bgr*255, 0, 255).astype(np.uint8),
                                                                      source_mask=prd_face_mask_a, target_mask=prd_face_mask_a)
                    prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
-
                elif cfg.color_transfer_mode == 2: #lct
                    prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr)
-                    prd_face_bgr = np.clip( prd_face_bgr, 0.0, 1.0)
                elif cfg.color_transfer_mode == 3: #mkl
                    prd_face_bgr = imagelib.color_transfer_mkl (prd_face_bgr, dst_face_bgr)
                elif cfg.color_transfer_mode == 4: #mkl-m
@ -270,7 +247,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
                    out_face_bgr = np.clip( out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
                elif cfg.color_transfer_mode == 2: #lct
                    out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr)
-                    out_face_bgr = np.clip( out_face_bgr, 0.0, 1.0)
                elif cfg.color_transfer_mode == 3: #mkl
                    out_face_bgr = imagelib.color_transfer_mkl (out_face_bgr, dst_face_bgr)
                elif cfg.color_transfer_mode == 4: #mkl-m
@ -356,7 +332,6 @@ def MergeMasked (predictor_func, predictor_input_shape, cfg, frame_info):
            final_img = final_img*(1-merging_mask) + img*merging_mask
            final_mask = np.clip (final_mask + merging_mask, 0, 1 )

-    if cfg.export_mask_alpha:
    final_img = np.concatenate ( [final_img, final_mask], -1)

    return (final_img*255).astype(np.uint8)
--- a/merger/MergerConfig.py
+++ b/merger/MergerConfig.py
@ -101,10 +101,7 @@ mode_dict = {0:'original',
             2:'hist-match',
             3:'seamless',
             4:'seamless-hist-match',
-             5:'raw-rgb',
-             6:'raw-rgb-mask',
-             7:'raw-mask-only',
-             8:'raw-predicted-only'}
+             5:'raw-rgb',}

 mode_str_dict = {}

@ -144,7 +141,6 @@ class MergerConfigMasked(MergerConfig):
                       image_denoise_power = 0,
                       bicubic_degrade_power = 0,
                       color_degrade_power = 0,
-                       export_mask_alpha = False,
                       **kwargs
                       ):

@ -158,6 +154,9 @@ class MergerConfigMasked(MergerConfig):
        self.clip_hborder_mask_per = clip_hborder_mask_per

        #default changeable params
+        if mode not in mode_str_dict:
+            mode = mode_dict[1]
+
        self.mode = mode
        self.masked_hist_match = masked_hist_match
        self.hist_match_threshold = hist_match_threshold
@ -170,7 +169,6 @@ class MergerConfigMasked(MergerConfig):
        self.image_denoise_power = image_denoise_power
        self.bicubic_degrade_power = bicubic_degrade_power
        self.color_degrade_power = color_degrade_power
-        self.export_mask_alpha = export_mask_alpha

    def copy(self):
        return copy.copy(self)
@ -217,9 +215,6 @@ class MergerConfigMasked(MergerConfig):
    def add_bicubic_degrade_power(self, diff):
        self.bicubic_degrade_power = np.clip ( self.bicubic_degrade_power+diff, 0, 100)

-    def toggle_export_mask_alpha(self):
-        self.export_mask_alpha = not self.export_mask_alpha
-
    def ask_settings(self):
        s = """Choose mode: \n"""
        for key in mode_dict.keys():
@ -267,7 +262,6 @@ class MergerConfigMasked(MergerConfig):
            self.image_denoise_power = np.clip ( io.input_int ("Choose image degrade by denoise power", 0, add_info="0..500"), 0, 500)
            self.bicubic_degrade_power = np.clip ( io.input_int ("Choose image degrade by bicubic rescale power", 0, add_info="0..100"), 0, 100)
            self.color_degrade_power = np.clip (  io.input_int ("Degrade color power of final image", 0, add_info="0..100"), 0, 100)
-            self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask?", False)

        io.log_info ("")

@ -287,8 +281,7 @@ class MergerConfigMasked(MergerConfig):
                   self.color_transfer_mode == other.color_transfer_mode and \
                   self.image_denoise_power == other.image_denoise_power and \
                   self.bicubic_degrade_power == other.bicubic_degrade_power and \
-                   self.color_degrade_power == other.color_degrade_power and \
-                   self.export_mask_alpha == other.export_mask_alpha
+                   self.color_degrade_power == other.color_degrade_power

        return False

@ -324,8 +317,7 @@ class MergerConfigMasked(MergerConfig):
        if 'raw' not in self.mode:
            r += (f"""image_denoise_power: {self.image_denoise_power}\n"""
                  f"""bicubic_degrade_power: {self.bicubic_degrade_power}\n"""
-                  f"""color_degrade_power: {self.color_degrade_power}\n"""
-                  f"""export_mask_alpha: {self.export_mask_alpha}\n""")
+                  f"""color_degrade_power: {self.color_degrade_power}\n""")

        r += "================"

--- a/models/ModelBase.py
+++ b/models/ModelBase.py
@ -113,8 +113,8 @@ class ModelBase(object):
                        self.model_name = saved_models_names[model_idx]

                else:
-                    self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "noname")
-
+                    self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "new")
+                    self.model_name = self.model_name.replace('_', ' ')
                break

        self.model_name = self.model_name + '_' + self.model_class_name
@ -159,8 +159,8 @@ class ModelBase(object):
        #####

        io.input_skip_pending()
-
        self.on_initialize_options()
+
        if self.is_first_run():
            # save as default options only for first run model initialize
            self.default_options_path.write_bytes( pickle.dumps (self.options) )
@ -173,6 +173,8 @@ class ModelBase(object):
        self.on_initialize()
        self.options['batch_size'] = self.batch_size

+
+
        if self.is_training:
            self.preview_history_path = self.saved_models_path / ( f'{self.get_model_name()}_history' )
            self.autobackups_path     = self.saved_models_path / ( f'{self.get_model_name()}_autobackups' )
@ -275,7 +277,7 @@ class ModelBase(object):

    def ask_batch_size(self, suggest_batch_size=None):
        default_batch_size = self.load_or_def_option('batch_size', suggest_batch_size or self.batch_size)
-        self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually."))
+        self.options['batch_size'] = self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually."))


    #overridable
--- a/models/Model_Quick96/Model.py
+++ b/models/Model_Quick96/Model.py
@ -14,7 +14,7 @@ class QModel(ModelBase):
    #override
    def on_initialize(self):
        device_config = nn.getCurrentDeviceConfig()
-        self.model_data_format = "NCHW" if len(device_config.devices) != 0 else "NHWC"
+        self.model_data_format = "NCHW" if len(device_config.devices) != 0 and not self.is_debug()  else "NHWC"
        nn.initialize(data_format=self.model_data_format)
        tf = nn.tf

@ -167,9 +167,9 @@ class QModel(ModelBase):
        models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
        optimizer_vars_on_cpu = models_opt_device=='/CPU:0'

-        input_nc = 3
-        output_nc = 3
-        bgr_shape = nn.get4Dshape(resolution,resolution,input_nc)
+        input_ch = 3
+        output_ch = 3
+        bgr_shape = nn.get4Dshape(resolution,resolution,input_ch)
        mask_shape = nn.get4Dshape(resolution,resolution,1)
        lowest_dense_res = resolution // 16

@ -189,7 +189,7 @@ class QModel(ModelBase):

        # Initializing model classes
        with tf.device (models_opt_device):
-            self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, name='encoder')
+            self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, name='encoder')
            encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape))

            self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, d_ch=d_dims, name='inter')
@ -262,7 +262,7 @@ class QModel(ModelBase):
                    gpu_target_dst_masked      = gpu_target_dst*gpu_target_dstm_blur
                    gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur)

-                    gpu_target_srcmasked_opt  = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
+                    gpu_target_src_masked_opt  = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
                    gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst

                    gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
@ -271,8 +271,8 @@ class QModel(ModelBase):
                    gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur
                    gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur)

-                    gpu_src_loss =  tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
-                    gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
+                    gpu_src_loss =  tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                    gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
                    gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )

                    gpu_dst_loss  = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
@ -282,8 +282,8 @@ class QModel(ModelBase):
                    gpu_src_losses += [gpu_src_loss]
                    gpu_dst_losses += [gpu_dst_loss]

-                    gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss
-                    gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ]
+                    gpu_G_loss = gpu_src_loss + gpu_dst_loss
+                    gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_G_loss, self.src_dst_trainable_weights ) ]


            # Average losses and gradients, and create optimizer update ops
@ -362,10 +362,9 @@ class QModel(ModelBase):
            training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path()
            training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path()

-            cpu_count = multiprocessing.cpu_count()
-
+            cpu_count = min(multiprocessing.cpu_count(), 8)
            src_generators_count = cpu_count // 2
-            dst_generators_count = cpu_count - src_generators_count
+            dst_generators_count = cpu_count // 2

            self.set_training_data_generators ([
                    SampleGeneratorFace(training_data_src_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
@ -396,11 +395,12 @@ class QModel(ModelBase):

    #override
    def onTrainOneIter(self):
+
        if self.get_iter() % 3 == 0 and self.last_samples is not None:
            ( (warped_src, target_src, target_srcm), \
              (warped_dst, target_dst, target_dstm) ) = self.last_samples
-            src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm,
-                                                     target_dst, target_dst, target_dstm)
+            warped_src = target_src
+            warped_dst = target_dst
        else:
            samples = self.last_samples = self.generate_next_samples()
            ( (warped_src, target_src, target_srcm), \
@ -440,8 +440,7 @@ class QModel(ModelBase):
        return result

    def predictor_func (self, face=None):
-        face = face[None,...]
-        face = nn.to_data_format(face, self.model_data_format, "NHWC")
+        face = nn.to_data_format(face[None,...], self.model_data_format, "NHWC")

        bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x, "NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ]
        mask = mask_dst_dstm[0] * mask_src_dstm[0]
--- a/models/Model_SAEHD/Model.py
+++ b/models/Model_SAEHD/Model.py
@ -33,7 +33,9 @@ class SAEHDModel(ModelBase):
        default_archi              = self.options['archi']              = self.load_or_def_option('archi', 'dfhd')
        default_ae_dims            = self.options['ae_dims']            = self.load_or_def_option('ae_dims', 256)
        default_e_dims             = self.options['e_dims']             = self.load_or_def_option('e_dims', 64)
-        default_d_dims             = self.options['d_dims']             = self.load_or_def_option('d_dims', 64)
+
+        default_d_dims             = 48 if self.options['archi'] == 'dfhd' else 64
+        default_d_dims             = self.options['d_dims']             = self.load_or_def_option('d_dims', default_d_dims)

        default_d_mask_dims        = default_d_dims // 3
        default_d_mask_dims        += default_d_mask_dims % 2
@ -43,6 +45,7 @@ class SAEHDModel(ModelBase):
        default_learn_mask         = self.options['learn_mask']         = self.load_or_def_option('learn_mask', True)
        default_lr_dropout         = self.options['lr_dropout']         = self.load_or_def_option('lr_dropout', False)
        default_random_warp        = self.options['random_warp']        = self.load_or_def_option('random_warp', True)
+        default_gan_power          = self.options['gan_power']          = self.load_or_def_option('gan_power', 0.0)
        default_true_face_power    = self.options['true_face_power']    = self.load_or_def_option('true_face_power', 0.0)
        default_face_style_power   = self.options['face_style_power']   = self.load_or_def_option('face_style_power', 0.0)
        default_bg_style_power     = self.options['bg_style_power']     = self.load_or_def_option('bg_style_power', 0.0)
@ -87,13 +90,15 @@ class SAEHDModel(ModelBase):
            self.options['lr_dropout']  = io.input_bool ("Use learning rate dropout", default_lr_dropout, help_message="When the face is trained enough, you can enable this option to get extra sharpness for less amount of iterations.")
            self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness for less amount of iterations.")

+            self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 10.0", help_message="Train the network in Generative Adversarial manner. Accelerates the speed of training. Forces the neural network to learn small details of the face. You can enable/disable this option at any time. Typical value is 1.0"), 0.0, 10.0 )
+
            if 'df' in self.options['archi']:
-                self.options['true_face_power'] = np.clip ( io.input_number (" 'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
+                self.options['true_face_power'] = np.clip ( io.input_number ("'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Typical value is 0.01 . Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
            else:
                self.options['true_face_power'] = 0.0

            self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
-            self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
+            self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 )
            self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.")
            self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
            self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.")
@ -110,7 +115,7 @@ class SAEHDModel(ModelBase):
    #override
    def on_initialize(self):
        device_config = nn.getCurrentDeviceConfig()
-        self.model_data_format = "NCHW" if len(device_config.devices) != 0 else "NHWC"
+        self.model_data_format = "NCHW" if len(device_config.devices) != 0 and not self.is_debug() else "NHWC"
        nn.initialize(floatx="float16" if self.options['use_float16'] else "float32",
                      data_format=self.model_data_format)
        tf = nn.tf
@ -136,10 +141,8 @@ class SAEHDModel(ModelBase):

            def forward(self, x):
                x = self.conv1(x)
-
                if self.subpixel:
                    x = nn.tf_space_to_depth(x, 2)
-
                if self.use_activator:
                    x = tf.nn.leaky_relu(x, 0.1)
                return x
@ -332,7 +335,7 @@ class SAEHDModel(ModelBase):
        device_config = nn.getCurrentDeviceConfig()
        devices = device_config.devices

-        resolution = self.options['resolution']
+        self.resolution = resolution = self.options['resolution']
        learn_mask = self.options['learn_mask']
        archi = self.options['archi']
        ae_dims = self.options['ae_dims']
@ -341,15 +344,17 @@ class SAEHDModel(ModelBase):
        d_mask_dims = self.options['d_mask_dims']
        self.pretrain = self.options['pretrain']

+        self.gan_power = gan_power = self.options['gan_power'] if not self.pretrain else 0.0
+
        masked_training = True

        models_opt_on_gpu = False if len(devices) != 1 else self.options['models_opt_on_gpu']
        models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
        optimizer_vars_on_cpu = models_opt_device=='/CPU:0'

-        input_nc = 3
-        output_nc = 3
-        bgr_shape = nn.get4Dshape(resolution,resolution,input_nc)
+        input_ch = 3
+        output_ch = 3
+        bgr_shape = nn.get4Dshape(resolution,resolution,input_ch)
        mask_shape = nn.get4Dshape(resolution,resolution,1)
        lowest_dense_res = resolution // 16

@ -370,7 +375,7 @@ class SAEHDModel(ModelBase):
        # Initializing model classes
        with tf.device (models_opt_device):
            if 'df' in archi:
-                self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
+                self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
                encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape))

                self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, name='inter')
@ -386,11 +391,11 @@ class SAEHDModel(ModelBase):

                if self.is_training:
                    if self.options['true_face_power'] != 0:
-                        self.dis = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' )
-                        self.model_filename_list += [ [self.dis, 'dis.npy'] ]
+                        self.code_discriminator = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' )
+                        self.model_filename_list += [ [self.code_discriminator, 'code_discriminator.npy'] ]

            elif 'liae' in archi:
-                self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
+                self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
                encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape))

                self.inter_AB = Inter(in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims*2, name='inter_AB')
@ -407,6 +412,12 @@ class SAEHDModel(ModelBase):
                                              [self.decoder , 'decoder.npy'] ]

            if self.is_training:
+                if gan_power != 0:
+                    self.D_src = nn.PatchDiscriminator(patch_size=resolution//16, in_ch=output_ch, base_ch=512, name="D_src")
+                    self.D_dst = nn.PatchDiscriminator(patch_size=resolution//16, in_ch=output_ch, base_ch=512, name="D_dst")
+                    self.model_filename_list += [ [self.D_src, 'D_src.npy'] ]
+                    self.model_filename_list += [ [self.D_dst, 'D_dst.npy'] ]
+
                # Initialize optimizers
                lr=5e-5
                lr_dropout = 0.3 if self.options['lr_dropout'] else 1.0
@ -424,9 +435,14 @@ class SAEHDModel(ModelBase):
                self.src_dst_opt.initialize_variables (self.src_dst_all_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu)

                if self.options['true_face_power'] != 0:
-                    self.D_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_opt')
-                    self.D_opt.initialize_variables ( self.dis.get_weights(), vars_on_cpu=optimizer_vars_on_cpu)
-                    self.model_filename_list += [ (self.D_opt, 'D_opt.npy') ]
+                    self.D_code_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_code_opt')
+                    self.D_code_opt.initialize_variables ( self.code_discriminator.get_weights(), vars_on_cpu=optimizer_vars_on_cpu)
+                    self.model_filename_list += [ (self.D_code_opt, 'D_code_opt.npy') ]
+
+                if gan_power != 0:
+                    self.D_src_dst_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_src_dst_opt')
+                    self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights()+self.D_dst.get_weights(), vars_on_cpu=optimizer_vars_on_cpu)
+                    self.model_filename_list += [ (self.D_src_dst_opt, 'D_src_dst_opt.npy') ]

        if self.is_training:
            # Adjust batch size for multiple GPU
@ -445,9 +461,9 @@ class SAEHDModel(ModelBase):

            gpu_src_losses = []
            gpu_dst_losses = []
-            gpu_src_dst_loss_gvs = []
-            gpu_D_loss_gvs = []
-
+            gpu_G_loss_gvs = []
+            gpu_D_code_loss_gvs = []
+            gpu_D_src_dst_loss_gvs = []
            for gpu_id in range(gpu_count):
                with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):

@ -497,7 +513,7 @@ class SAEHDModel(ModelBase):
                    gpu_target_dst_masked      = gpu_target_dst*gpu_target_dstm_blur
                    gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur)

-                    gpu_target_srcmasked_opt  = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
+                    gpu_target_src_masked_opt  = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
                    gpu_target_dst_masked_opt  = gpu_target_dst_masked if masked_training else gpu_target_dst

                    gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
@ -506,8 +522,8 @@ class SAEHDModel(ModelBase):
                    gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur
                    gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur)

-                    gpu_src_loss =  tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
-                    gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
+                    gpu_src_loss =  tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                    gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
                    if learn_mask:
                        gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )

@ -528,26 +544,48 @@ class SAEHDModel(ModelBase):
                    gpu_src_losses += [gpu_src_loss]
                    gpu_dst_losses += [gpu_dst_loss]

-                    gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss
+                    gpu_G_loss = gpu_src_loss + gpu_dst_loss

-                    if self.options['true_face_power'] != 0:
                    def DLoss(labels,logits):
                        return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits), axis=[1,2,3])

-                        gpu_src_code_d = self.dis( gpu_src_code )
-                        gpu_src_code_d_ones = tf.ones_like(gpu_src_code_d)
+                    if self.options['true_face_power'] != 0:
+                        gpu_src_code_d = self.code_discriminator( gpu_src_code )
+                        gpu_src_code_d_ones  = tf.ones_like (gpu_src_code_d)
                        gpu_src_code_d_zeros = tf.zeros_like(gpu_src_code_d)
-                        gpu_dst_code_d = self.dis( gpu_dst_code )
+                        gpu_dst_code_d = self.code_discriminator( gpu_dst_code )
                        gpu_dst_code_d_ones = tf.ones_like(gpu_dst_code_d)

-                        gpu_src_dst_loss += self.options['true_face_power']*DLoss(gpu_src_code_d_ones, gpu_src_code_d)
+                        gpu_G_loss += self.options['true_face_power']*DLoss(gpu_src_code_d_ones, gpu_src_code_d)

-                        gpu_D_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \
+                        gpu_D_code_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \
                                           DLoss(gpu_src_code_d_zeros, gpu_src_code_d) ) * 0.5

-                        gpu_D_loss_gvs += [ nn.tf_gradients (gpu_D_loss, self.dis.get_weights() ) ]
+                        gpu_D_code_loss_gvs += [ nn.tf_gradients (gpu_D_code_loss, self.code_discriminator.get_weights() ) ]

-                    gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ]
+                    if gan_power != 0:
+                        gpu_pred_src_src_d       = self.D_src(gpu_pred_src_src_masked_opt)
+                        gpu_pred_src_src_d_ones  = tf.ones_like (gpu_pred_src_src_d)
+                        gpu_pred_src_src_d_zeros = tf.zeros_like(gpu_pred_src_src_d)
+                        gpu_target_src_d         = self.D_src(gpu_target_src_masked_opt)
+                        gpu_target_src_d_ones    = tf.ones_like(gpu_target_src_d)
+                        gpu_pred_dst_dst_d       = self.D_dst(gpu_pred_dst_dst_masked_opt)
+                        gpu_pred_dst_dst_d_ones  = tf.ones_like (gpu_pred_dst_dst_d)
+                        gpu_pred_dst_dst_d_zeros = tf.zeros_like(gpu_pred_dst_dst_d)
+                        gpu_target_dst_d         = self.D_dst(gpu_target_dst_masked_opt)
+                        gpu_target_dst_d_ones    = tf.ones_like(gpu_target_dst_d)
+
+                        gpu_D_src_dst_loss = (DLoss(gpu_target_src_d_ones   , gpu_target_src_d) + \
+                                              DLoss(gpu_pred_src_src_d_zeros, gpu_pred_src_src_d) ) * 0.5 + \
+                                             (DLoss(gpu_target_dst_d_ones   , gpu_target_dst_d) + \
+                                              DLoss(gpu_pred_dst_dst_d_zeros, gpu_pred_dst_dst_d) ) * 0.5
+
+                        gpu_D_src_dst_loss_gvs += [ nn.tf_gradients (gpu_D_src_dst_loss, self.D_src.get_weights()+self.D_dst.get_weights() ) ]
+
+                        gpu_G_loss += gan_power*(DLoss(gpu_pred_src_src_d_ones, gpu_pred_src_src_d) + DLoss(gpu_pred_dst_dst_d_ones, gpu_pred_dst_dst_d))
+
+
+                    gpu_G_loss_gvs += [ nn.tf_gradients ( gpu_G_loss, self.src_dst_trainable_weights ) ]


            # Average losses and gradients, and create optimizer update ops
@ -558,15 +596,15 @@ class SAEHDModel(ModelBase):
                pred_src_srcm = nn.tf_concat(gpu_pred_src_srcm_list, 0)
                pred_dst_dstm = nn.tf_concat(gpu_pred_dst_dstm_list, 0)
                pred_src_dstm = nn.tf_concat(gpu_pred_src_dstm_list, 0)
-
                src_loss = nn.tf_average_tensor_list(gpu_src_losses)
                dst_loss = nn.tf_average_tensor_list(gpu_dst_losses)
-                src_dst_loss_gv = nn.tf_average_gv_list (gpu_src_dst_loss_gvs)
-                src_dst_loss_gv_op = self.src_dst_opt.get_update_op (src_dst_loss_gv )
+                src_dst_loss_gv_op = self.src_dst_opt.get_update_op (nn.tf_average_gv_list (gpu_G_loss_gvs))

                if self.options['true_face_power'] != 0:
-                    D_loss_gv = nn.tf_average_gv_list(gpu_D_loss_gvs)
-                    D_loss_gv_op = self.D_opt.get_update_op (D_loss_gv )
+                    D_loss_gv_op = self.D_code_opt.get_update_op (nn.tf_average_gv_list(gpu_D_code_loss_gvs))
+
+                if gan_power != 0:
+                    src_D_src_dst_loss_gv_op = self.D_src_dst_opt.get_update_op (nn.tf_average_gv_list(gpu_D_src_dst_loss_gvs) )


            # Initializing training and view functions
@ -590,6 +628,17 @@ class SAEHDModel(ModelBase):
                    nn.tf_sess.run ([D_loss_gv_op], feed_dict={self.warped_src: warped_src, self.warped_dst: warped_dst})
                self.D_train = D_train

+            if gan_power != 0:
+                def D_src_dst_train(warped_src, target_src, target_srcm, \
+                                    warped_dst, target_dst, target_dstm):
+                    nn.tf_sess.run ([src_D_src_dst_loss_gv_op], feed_dict={self.warped_src :warped_src,
+                                                                           self.target_src :target_src,
+                                                                           self.target_srcm:target_srcm,
+                                                                           self.warped_dst :warped_dst,
+                                                                           self.target_dst :target_dst,
+                                                                           self.target_dstm:target_dstm})
+                self.D_src_dst_train = D_src_dst_train
+
            if learn_mask:
                def AE_view(warped_src, warped_dst):
                    return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm],
@ -663,12 +712,11 @@ class SAEHDModel(ModelBase):

            t_img_warped = t.IMG_WARPED_TRANSFORMED if self.options['random_warp'] else t.IMG_TRANSFORMED

-            cpu_count = multiprocessing.cpu_count()
-
+            cpu_count = min(multiprocessing.cpu_count(), 8)
            src_generators_count = cpu_count // 2
+            dst_generators_count = cpu_count // 2
            if self.options['ct_mode'] != 'none':
                src_generators_count = int(src_generators_count * 1.5)
-            dst_generators_count = cpu_count - src_generators_count

            self.set_training_data_generators ([
                    SampleGeneratorFace(training_data_src_path, random_ct_samples_path=random_ct_samples_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
@ -706,6 +754,9 @@ class SAEHDModel(ModelBase):
        if self.options['true_face_power'] != 0 and not self.pretrain:
            self.D_train (warped_src, warped_dst)

+        if self.gan_power != 0:
+            self.D_src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm)
+
        return ( ('src_loss', src_loss), ('dst_loss', dst_loss), )

    #override
@ -721,7 +772,8 @@ class SAEHDModel(ModelBase):

        target_srcm, target_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm, target_dstm] )]

-        n_samples = min(4, self.get_batch_size() )
+        n_samples = min(4, self.get_batch_size(), 800 // self.resolution )
+
        result = []
        st = []
        for i in range(n_samples):
@ -742,8 +794,7 @@ class SAEHDModel(ModelBase):
        return result

    def predictor_func (self, face=None):
-        face = face[None,...]
-        face = nn.to_data_format(face, self.model_data_format, "NHWC")
+        face = nn.to_data_format(face[None,...], self.model_data_format, "NHWC")

        if self.options['learn_mask']:
            bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ]
--- a/samplelib/PackedFaceset.py
+++ b/samplelib/PackedFaceset.py
@ -3,7 +3,7 @@ import shutil
 import struct
 from pathlib import Path

-import samplelib.SampleHost
+import samplelib.SampleLoader
 from core.interact import interact as io
 from samplelib import Sample
 from core import pathex
@ -34,7 +34,7 @@ class PackedFaceset():
        else:
            image_paths = pathex.get_image_paths(samples_path)

-        samples = samplelib.SampleHost.load_face_samples(image_paths)
+        samples = samplelib.SampleLoader.load_face_samples(image_paths)
        samples_len = len(samples)

        samples_configs = []
--- a/samplelib/SampleGeneratorFace.py
+++ b/samplelib/SampleGeneratorFace.py
@ -9,7 +9,7 @@ import numpy as np
 from core import mplib
 from core.joblib import SubprocessGenerator, ThisThreadGenerator
 from facelib import LandmarksProcessor
-from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
+from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
                       SampleType)


@ -39,7 +39,7 @@ class SampleGeneratorFace(SampleGeneratorBase):
        else:
            self.generators_count = max(1, generators_count)

-        samples = SampleHost.load (SampleType.FACE, self.samples_path)
+        samples = SampleLoader.load (SampleType.FACE, self.samples_path)
        self.samples_len = len(samples)

        if self.samples_len == 0:
@ -48,7 +48,7 @@ class SampleGeneratorFace(SampleGeneratorBase):
        index_host = mplib.IndexHost(self.samples_len)

        if random_ct_samples_path is not None:
-            ct_samples = SampleHost.load (SampleType.FACE, random_ct_samples_path)
+            ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path)
            ct_index_host = mplib.IndexHost( len(ct_samples) )
        else:
            ct_samples = None
--- a/samplelib/SampleGeneratorFacePerson.py
+++ b/samplelib/SampleGeneratorFacePerson.py
@ -8,7 +8,7 @@ import numpy as np
 from core import mplib
 from core.joblib import SubprocessGenerator, ThisThreadGenerator
 from facelib import LandmarksProcessor
-from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
+from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
                       SampleType)


@ -33,7 +33,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):

        raise NotImplementedError("Currently SampleGeneratorFacePerson is not implemented.")

-        samples_host = SampleHost.mp_host (SampleType.FACE, self.samples_path)
+        samples_host = SampleLoader.mp_host (SampleType.FACE, self.samples_path)
        samples = samples_host.get_list()
        self.samples_len = len(samples)

@ -98,7 +98,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):

    @staticmethod
    def get_person_id_max_count(samples_path):
-        return SampleHost.get_person_id_max_count(samples_path)
+        return SampleLoader.get_person_id_max_count(samples_path)

 """
 if self.person_id_mode==1:
--- a/samplelib/SampleGeneratorFaceTemporal.py
+++ b/samplelib/SampleGeneratorFaceTemporal.py
@ -9,7 +9,7 @@ import numpy as np
 from core import mplib
 from core.joblib import SubprocessGenerator, ThisThreadGenerator
 from facelib import LandmarksProcessor
-from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
+from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
                       SampleType)


@ -31,7 +31,7 @@ class SampleGeneratorFaceTemporal(SampleGeneratorBase):
        else:
            self.generators_count = generators_count

-        samples = SampleHost.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path)
+        samples = SampleLoader.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path)
        samples_len = len(samples)
        if samples_len == 0:
            raise ValueError('No training data provided.')
--- a/samplelib/SampleGeneratorImageTemporal.py
+++ b/samplelib/SampleGeneratorImageTemporal.py
@ -4,7 +4,7 @@ import cv2
 import numpy as np

 from core.joblib import SubprocessGenerator, ThisThreadGenerator
-from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
+from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
                       SampleType)


@ -22,7 +22,7 @@ class SampleGeneratorImageTemporal(SampleGeneratorBase):
        self.sample_process_options = sample_process_options
        self.output_sample_types = output_sample_types

-        self.samples = SampleHost.load (SampleType.IMAGE, self.samples_path)
+        self.samples = SampleLoader.load (SampleType.IMAGE, self.samples_path)

        self.generator_samples = [ self.samples ]
        self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] if self.debug else \
--- a/samplelib/SampleLoader.py
+++ b/samplelib/SampleLoader.py
@ -14,7 +14,7 @@ from facelib import FaceType, LandmarksProcessor
 from .Sample import Sample, SampleType


-class SampleHost:
+class SampleLoader:
    samples_cache = dict()
    @staticmethod
    def get_person_id_max_count(samples_path):
@ -33,7 +33,7 @@ class SampleHost:

    @staticmethod
    def load(sample_type, samples_path):
-        samples_cache = SampleHost.samples_cache
+        samples_cache = SampleLoader.samples_cache

        if str(samples_path) not in samples_cache.keys():
            samples_cache[str(samples_path)] = [None]*SampleType.QTY
@ -55,12 +55,12 @@ class SampleHost:
                    io.log_info (f"Loaded {len(result)} packed faces from {samples_path}")

                if result is None:
-                    result = SampleHost.load_face_samples( pathex.get_image_paths(samples_path) )
+                    result = SampleLoader.load_face_samples( pathex.get_image_paths(samples_path) )
                samples[sample_type] = result

        elif          sample_type == SampleType.FACE_TEMPORAL_SORTED:
-                result = SampleHost.load (SampleType.FACE, samples_path)
-                result = SampleHost.upgradeToFaceTemporalSortedSamples(result)
+                result = SampleLoader.load (SampleType.FACE, samples_path)
+                result = SampleLoader.upgradeToFaceTemporalSortedSamples(result)
                samples[sample_type] = result

        return samples[sample_type]
--- a/samplelib/SampleProcessor.py
+++ b/samplelib/SampleProcessor.py
@ -101,7 +101,6 @@ class SampleProcessor(object):
        for sample in samples:
            sample_bgr = sample.load_bgr()
            ct_sample_bgr = None
-            ct_sample_mask = None
            h,w,c = sample_bgr.shape

            is_face_sample = sample.landmarks is not None
@ -117,10 +116,6 @@ class SampleProcessor(object):
                resolution = opts.get('resolution', 0)
                types = opts.get('types', [] )

-                border_replicate = opts.get('border_replicate', True)
-                random_sub_res = opts.get('random_sub_res', 0)
-                normalize_std_dev = opts.get('normalize_std_dev', False)
-                normalize_vgg = opts.get('normalize_vgg', False)
                motion_blur = opts.get('motion_blur', None)
                gaussian_blur = opts.get('gaussian_blur', None)

@ -131,7 +126,6 @@ class SampleProcessor(object):

                img_type = SPTF.NONE
                target_face_type = SPTF.NONE
-                face_mask_type = SPTF.NONE
                mode_type = SPTF.NONE
                for t in types:
                    if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END:
@ -141,6 +135,12 @@ class SampleProcessor(object):
                    elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END:
                        mode_type = t
                
+                if mode_type == SPTF.MODE_M and not is_face_sample:
+                    raise ValueError("MODE_M applicable only for face samples")
+                    
+                can_warp      = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED)
+                can_transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED)
+
                if img_type == SPTF.NONE:
                    raise ValueError ('expected IMG_ type')

@ -148,7 +148,7 @@ class SampleProcessor(object):
                    l = sample.landmarks
                    l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 )
                    l = np.clip(l, 0.0, 1.0)
-                    img = l
+                    out_sample = l
                elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID:
                    pitch_yaw_roll = sample.get_pitch_yaw_roll()

@ -156,44 +156,29 @@ class SampleProcessor(object):
                        yaw = -yaw

                    if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID:
-                        pitch = np.clip( (pitch / math.pi) / 2.0 + 1.0, 0, 1)
-                        yaw =  np.clip( (yaw / math.pi) / 2.0 + 1.0, 0, 1)
-                        roll =  np.clip( (roll / math.pi) / 2.0 + 1.0, 0, 1)
+                        pitch = np.clip( (pitch / math.pi) / 2.0 + 0.5, 0, 1)
+                        yaw   = np.clip( (yaw / math.pi) / 2.0 + 0.5, 0, 1)
+                        roll  = np.clip( (roll / math.pi) / 2.0 + 0.5, 0, 1)

-                    img = (pitch, yaw, roll)
+                    out_sample = (pitch, yaw, roll)
                else:
                    if mode_type == SPTF.NONE:
                        raise ValueError ('expected MODE_ type')
                    
-                    def do_transform(img, mask):
-                        warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED)
-                        transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED)
-                        flip = img_type != SPTF.IMG_WARPED
+                    need_img  = mode_type != SPTF.MODE_M
+                    need_mask = mode_type == SPTF.MODE_M
   
-                        img = imagelib.warp_by_params (params, img, warp, transform, flip, border_replicate)
-                        if mask is not None:
-                            mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False)
-                            if len(mask.shape) == 2:
-                                mask = mask[...,np.newaxis]
-
-
-                        return img, mask
-
-                    img = sample_bgr
-
-                    ### Prepare a mask
-                    mask = None
-                    if is_face_sample:
+                    if need_mask:
                        if sample.eyebrows_expand_mod is not None:
-                            mask = LandmarksProcessor.get_image_hull_mask (img.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
+                            mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
                        else:
-                            mask = LandmarksProcessor.get_image_hull_mask (img.shape, sample.landmarks)
+                            mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks)

                        if sample.ie_polys is not None:
                            sample.ie_polys.overlay_mask(mask)
-                    ##################
-

+                    if need_img:
+                        img = sample_bgr
                        if motion_blur is not None:
                            chance, mb_max_size = motion_blur
                            chance = np.clip(chance, 0, 100)
@ -214,99 +199,78 @@ class SampleProcessor(object):
                            raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, target_ft) )

                        if sample.face_type == FaceType.MARK_ONLY:
-                            #first warp to target facetype
-                            img =  cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
-                            mask = cv2.warpAffine( mask, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
-                            #then apply transforms
-                            img, mask = do_transform (img, mask)
-                            img = np.concatenate( (img, mask ), -1 )
-                            img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
-                        else:
-                            img, mask = do_transform (img, mask)
+                            mat  = LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0])
                            
+                            if need_img:
+                                img  = cv2.warpAffine( img,  mat, flags=cv2.INTER_CUBIC )
+                                img  = imagelib.warp_by_params (params, img,  can_warp, can_transform, can_flip=True, border_replicate=True)
+                                img  = cv2.resize( img,  (resolution,resolution), cv2.INTER_CUBIC )
+                                
+                            if need_mask:
+                                mask = cv2.warpAffine( mask, mat, flags=cv2.INTER_CUBIC )
+                                mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
+                                mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None]
+                        else:
                            mat = LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft)
-                            img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
-                            mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )
-                            img = np.concatenate( (img, mask[...,None] ), -1 )
+                            
+                            if need_img:
+                                img  = imagelib.warp_by_params (params, img,  can_warp, can_transform, can_flip=True, border_replicate=True)
+                                img  = cv2.warpAffine( img,  mat, (resolution,resolution), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC )
+                            
+                            if need_mask:
+                                mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
+                                mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )[...,None]

                    else:
-                        img, mask = do_transform (img, mask)
-                        img = np.concatenate( (img, mask ), -1 )
+                        if need_img:
+                            img  = imagelib.warp_by_params (params, img,  can_warp, can_transform, can_flip=True, border_replicate=True)
                            img  = cv2.resize( img,  (resolution,resolution), cv2.INTER_CUBIC )
                        
-                    if random_sub_res != 0:
-                        sub_size = resolution - random_sub_res
-                        rnd_state = np.random.RandomState (sample_rnd_seed+random_sub_res)
-                        start_x = rnd_state.randint(sub_size+1)
-                        start_y = rnd_state.randint(sub_size+1)
-                        img = img[start_y:start_y+sub_size,start_x:start_x+sub_size,:]
+                        if need_mask:
+                            mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
+                            mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None]

+                    
+                    if mode_type == SPTF.MODE_M:
+                        out_sample = np.clip(mask, 0, 1).astype(np.float32)                        
+                    else:
                        img = np.clip(img, 0, 1).astype(np.float32)
-                    img_bgr  = img[...,0:3]
-                    img_mask = img[...,3:4]
                        
                        if ct_mode is not None and ct_sample is not None:
                            if ct_sample_bgr is None:
                                ct_sample_bgr = ct_sample.load_bgr()
-
-                        ct_sample_bgr_resized = cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR )
-
-                        if ct_mode == 'lct':
-                            img_bgr = imagelib.linear_color_transfer (img_bgr, ct_sample_bgr_resized)
-                            img_bgr = np.clip( img_bgr, 0.0, 1.0)
-                        elif ct_mode == 'rct':
-                            img_bgr = imagelib.reinhard_color_transfer ( np.clip( (img_bgr*255).astype(np.uint8), 0, 255),
-                                                                        np.clip( (ct_sample_bgr_resized*255).astype(np.uint8), 0, 255) )
-                            img_bgr = np.clip( img_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
-                        elif ct_mode == 'mkl':
-                            img_bgr = imagelib.color_transfer_mkl (img_bgr, ct_sample_bgr_resized)
-                        elif ct_mode == 'idt':
-                            img_bgr = imagelib.color_transfer_idt (img_bgr, ct_sample_bgr_resized)
-                        elif ct_mode == 'sot':
-                            img_bgr = imagelib.color_transfer_sot (img_bgr, ct_sample_bgr_resized)
-                            img_bgr = np.clip( img_bgr, 0.0, 1.0)
-
-                    if normalize_std_dev:
-                        img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) )
-                    elif normalize_vgg:
-                        img_bgr = np.clip(img_bgr*255, 0, 255)
-                        img_bgr[:,:,0] -= 103.939
-                        img_bgr[:,:,1] -= 116.779
-                        img_bgr[:,:,2] -= 123.68
+                            img = imagelib.color_transfer (ct_mode, 
+                                                           img, 
+                                                           cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) )                            
                            
                        if mode_type == SPTF.MODE_BGR:
-                        img = img_bgr
+                            out_sample = img
                        elif mode_type == SPTF.MODE_BGR_SHUFFLE:
                            rnd_state = np.random.RandomState (sample_rnd_seed)
-                        img = np.take (img_bgr, rnd_state.permutation(img_bgr.shape[-1]), axis=-1)
+                            out_sample = np.take (img, rnd_state.permutation(img.shape[-1]), axis=-1)

                        elif mode_type == SPTF.MODE_BGR_RANDOM_HSV_SHIFT:
                            rnd_state = np.random.RandomState (sample_rnd_seed)
-                        hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
+                            hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
                            h, s, v = cv2.split(hsv)
                            h = (h + rnd_state.randint(360) ) % 360
                            s = np.clip ( s + rnd_state.random()-0.5, 0, 1 )
                            v = np.clip ( v + rnd_state.random()-0.5, 0, 1 )
                            hsv = cv2.merge([h, s, v])
-                        img = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 )
+                            out_sample = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 )
                        elif mode_type == SPTF.MODE_G:
-                        img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)[...,None]
+                            out_sample = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[...,None]
                        elif mode_type == SPTF.MODE_GGG:
-                        img = np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1)
-                    elif mode_type == SPTF.MODE_M and is_face_sample:
-                        img = img_mask
+                            out_sample = np.repeat ( np.expand_dims(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),-1), (3,), -1)
                        
                    if not debug:
                        if normalize_tanh:
-                            img = np.clip (img * 2.0 - 1.0, -1.0, 1.0)
-                        else:
-                            img = np.clip (img, 0.0, 1.0)
+                            out_sample = np.clip (out_sample * 2.0 - 1.0, -1.0, 1.0)

                    if data_format == "NCHW":
-                        img = np.transpose(img, (2,0,1) )
+                        out_sample = np.transpose(out_sample, (2,0,1) )                        
 
-
-                outputs_sample.append ( img )
+                outputs_sample.append ( out_sample )
            outputs += [outputs_sample]

        return outputs
--- a/samplelib/init.py
+++ b/samplelib/init.py
@ -1,6 +1,6 @@
 from .Sample import Sample
 from .Sample import SampleType
-from .SampleHost import SampleHost
+from .SampleLoader import SampleLoader
 from .SampleProcessor import SampleProcessor
 from .SampleGeneratorBase import SampleGeneratorBase
 from .SampleGeneratorFace import SampleGeneratorFace