diff --git a/DFLIMG/DFLJPG.py b/DFLIMG/DFLJPG.py
index 2d2ba56..1ed8692 100644
--- a/DFLIMG/DFLJPG.py
+++ b/DFLIMG/DFLJPG.py
@@ -4,9 +4,10 @@ import struct
 import cv2
 import numpy as np
 
+from core.interact import interact as io
+from core.structex import *
 from facelib import FaceType
-from utils.struct_utils import *
-from interact import interact as io
+
 
 class DFLJPG(object):
     def __init__(self):
@@ -243,6 +244,7 @@ class DFLJPG(object):
                                      source_landmarks=source_landmarks,
                                      image_to_face_mat=image_to_face_mat,
                                      fanseg_mask=fanseg_mask,
+                                     eyebrows_expand_mod=eyebrows_expand_mod,
                                      relighted=relighted)
 
     def remove_ie_polys(self):
@@ -322,4 +324,3 @@ class DFLJPG(object):
         return self.dfl_dict.get ('eyebrows_expand_mod', None)
     def get_relighted(self):
         return self.dfl_dict.get ('relighted', False)
-
diff --git a/README.md b/README.md
index 1e54c71..e04188a 100644
--- a/README.md
+++ b/README.md
@@ -1,60 +1,77 @@
-﻿![](doc/DFL_welcome.jpg)
+﻿<table align="center"><tr><td align="center" width="9999">
+<img src="doc/DFL_welcome.jpg" align="center">
+<p align="center"><img src="doc/logo_cuda.png"><img src="doc/logo_tensorflow.png"></p>
+  
+# DeepFaceLab  
+### the leading software for creating deep fakes
 
-![](doc/logo_cuda.jpg)![](doc/logo_opencl.jpg)![](doc/logo_keras.jpg)![](doc/logo_tensorflow.jpg)![](doc/logo_plaidml.jpg)
 
-#deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets
+</td></tr>
+<tr><td align="center" width="9999">
 
-## **DeepFaceLab** is a tool that utilizes machine learning to replace faces in videos.
+More than 95% of deepfake videos are created with DeepFaceLab.
 
-- ### [Gallery](doc/gallery/doc_gallery.md)
+DeepFaceLab is used by such popular youtube channels as
 
-- ### Manuals:
+|[Ctrl Shift Face](https://www.youtube.com/channel/UCKpH0CKltc73e4wh0_pgL3g)|[Sham00k](https://www.youtube.com/channel/UCZXbWcv7fSZFTAZV4beckyw/videos)|[Collider videos](https://www.youtube.com/watch?v=A91P2qtPT54&list=PLayt6616lBclvOprvrC8qKGCO-mAhPRux)|[VFXChris Ume](https://www.youtube.com/channel/UCGf4OlX_aTt8DlrgiH3jN3g/videos)|
+|---|---|---|---|
 
-[English (google translated)](doc/manual_en_google_translated.pdf)
+</td></tr>
 
-[На русском](doc/manual_ru.pdf)
 
-- ### [Windows Desktop App](doc/doc_windows_desktop_app.md)
+<tr><td align="center" width="9999">
 
-- ### Forks
+## Releases
 
-[Google Colab fork](https://github.com/chervonij/DFL-Colab) by @chervonij
 
-[Linux fork](https://github.com/lbfs/DeepFaceLab_Linux) by @lbfs - may be outdated
+||||
+|---|---|---|
+|Windows|[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci)|if the download quota is exceeded, add the file to your own google drive and download from it|
+||[Torrent page](https://rutracker.org/forum/viewtopic.php?t=5558863)|Use bittorrent client.| 
+|Google Colab|[github](https://github.com/chervonij/DFL-Colab)|by @chervonij . You can train fakes for free using Google Colab.|
+|Linux|[github](https://github.com/lbfs/DeepFaceLab_Linux)|by @lbfs. May be outdated|
+||||
 
-- ### [Ready to work facesets](doc/doc_ready_to_work_facesets.md)
+</td></tr>
 
-- ### [Build and repository info](doc/doc_build_and_repository_info.md)
+<tr><td align="center" width="9999">
 
-- ### How I can help the project?
+## Links
 
-If you like this software, please consider a donation.
 
-GOAL: next DeepFacelab update.
+||||
+|---|---|---|
+|Guides and tutorials|[mrdeepfakes](https://mrdeepfakes.com/forums/forum-guides-and-tutorials)||
+||||
+|Ready to work facesets|[mrdeepfakes](https://mrdeepfakes.com/forums/forum-celebrity-facesets)||
+||||
+|Communication groups|[telegram (English / Russian)](https://t.me/DeepFaceLab_official)|Don't forget to hide your phone number.|
+||[mrdeepfakes](https://mrdeepfakes.com/forums/)|the biggest (Not) Safe For Work English community|
+||QQ 951138799| Chinese QQ group for ML/AI experts||
+||[deepfaker.xyz](https://www.deepfaker.xyz)|Chinesse guys are localizing DeepFaceLab|
+||[reddit r/GifFakes/](https://www.reddit.com/r/GifFakes/new/)|Post your deepfakes there !|
+||[reddit r/SFWdeepfakes/](https://www.reddit.com/r/SFWdeepfakes/new/)|Post your deepfakes there !|
 
-[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065)
+</td></tr>
 
-[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted)
-                    
-bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr
+<tr><td align="center" width="9999">
+  
+## How I can help the project?
 
-![](doc/example_faceset.jpg)
+||||
+|---|---|---|
+|Donate|If you like this software, please consider a donation. Current **GOAL**: next DeepFaceLab update.||
+||[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted)
+||[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065)||
+||bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr||
+||||
+|Collect facesets|You can collect faceset of any celebrity that can be used in DeepFaceLab and share it [in the community](https://mrdeepfakes.com/forums/forum-celebrity-facesets)|
 
-You can collect faceset of any celebrities that can be used in DeepFaceLab (described in manual) 
+<tr><td align="center" width="9999">
+  
+<p align="center"><img src="doc/DeepFaceLab is working.png"></p>
 
-and share it here [mrdeepfakes celebrity-facesets](https://mrdeepfakes.com/forums/forum-celebrity-facesets)
-
-- ### Communication groups:
-
-[telegram (English / Russian)](https://t.me/DeepFaceLab_official)
-
-[mrdeepfakes (English)](https://mrdeepfakes.com/forums/) - the biggest SFW and NSFW community
-
-(Chinese) QQ group 951138799 for ML/AI experts
-
-[deepfakes (Chinese)](https://deepfakescn.com)
-
-[reddit r/GifFakes/ (English)](https://www.reddit.com/r/GifFakes/new/)
-
-[reddit r/SFWdeepfakes/ (English)](https://www.reddit.com/r/SFWdeepfakes/new/)
+<sub>#deepfacelab #deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets</sub>
 
+</td></tr>
+</table>
diff --git a/converters/__init__.py b/converters/__init__.py
deleted file mode 100644
index 8aa1057..0000000
--- a/converters/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .FrameInfo import FrameInfo
-from .ConverterConfig import ConverterConfig, ConverterConfigMasked, ConverterConfigFaceAvatar
-from .ConvertMasked import ConvertMasked
-from .ConvertAvatar import ConvertFaceAvatar
diff --git a/utils/cv2_utils.py b/core/cv2ex.py
similarity index 100%
rename from utils/cv2_utils.py
rename to core/cv2ex.py
diff --git a/imagelib/IEPolys.py b/core/imagelib/IEPolys.py
similarity index 100%
rename from imagelib/IEPolys.py
rename to core/imagelib/IEPolys.py
diff --git a/imagelib/__init__.py b/core/imagelib/__init__.py
similarity index 86%
rename from imagelib/__init__.py
rename to core/imagelib/__init__.py
index 7a1ed9b..6c0236f 100644
--- a/imagelib/__init__.py
+++ b/core/imagelib/__init__.py
@@ -13,9 +13,7 @@ from .reduce_colors import reduce_colors
 
 from .color_transfer import color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone
 
-from .RankSRGAN import RankSRGAN
-
-from .common import normalize_channels, overlay_alpha_image
+from .common import normalize_channels, cut_odd_image, overlay_alpha_image
 
 from .IEPolys import IEPolys
 
diff --git a/imagelib/blur.py b/core/imagelib/blur.py
similarity index 100%
rename from imagelib/blur.py
rename to core/imagelib/blur.py
diff --git a/imagelib/color_transfer.py b/core/imagelib/color_transfer.py
similarity index 100%
rename from imagelib/color_transfer.py
rename to core/imagelib/color_transfer.py
diff --git a/imagelib/common.py b/core/imagelib/common.py
similarity index 88%
rename from imagelib/common.py
rename to core/imagelib/common.py
index 2e66441..d73df8b 100644
--- a/imagelib/common.py
+++ b/core/imagelib/common.py
@@ -23,7 +23,14 @@ def normalize_channels(img, target_channels):
         c = target_channels
 
     return img
-
+    
+def cut_odd_image(img):
+    h, w, c = img.shape
+    wm, hm = w % 2, h % 2
+    if wm + hm != 0: 
+        img = img[0:h-hm,0:w-wm,:]
+    return img
+    
 def overlay_alpha_image(img_target, img_source, xy_offset=(0,0) ):
     (h,w,c) = img_source.shape
     if c != 4:
diff --git a/imagelib/draw.py b/core/imagelib/draw.py
similarity index 100%
rename from imagelib/draw.py
rename to core/imagelib/draw.py
diff --git a/imagelib/equalize_and_stack_square.py b/core/imagelib/equalize_and_stack_square.py
similarity index 100%
rename from imagelib/equalize_and_stack_square.py
rename to core/imagelib/equalize_and_stack_square.py
diff --git a/imagelib/estimate_sharpness.py b/core/imagelib/estimate_sharpness.py
similarity index 100%
rename from imagelib/estimate_sharpness.py
rename to core/imagelib/estimate_sharpness.py
diff --git a/imagelib/morph.py b/core/imagelib/morph.py
similarity index 100%
rename from imagelib/morph.py
rename to core/imagelib/morph.py
diff --git a/imagelib/reduce_colors.py b/core/imagelib/reduce_colors.py
similarity index 100%
rename from imagelib/reduce_colors.py
rename to core/imagelib/reduce_colors.py
diff --git a/imagelib/text.py b/core/imagelib/text.py
similarity index 100%
rename from imagelib/text.py
rename to core/imagelib/text.py
diff --git a/imagelib/warp.py b/core/imagelib/warp.py
similarity index 88%
rename from imagelib/warp.py
rename to core/imagelib/warp.py
index a3c9490..d5d79b4 100644
--- a/imagelib/warp.py
+++ b/core/imagelib/warp.py
@@ -1,6 +1,6 @@
 import numpy as np
 import cv2
-from utils import random_utils
+from core import randomex
 
 def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05], rnd_seed=None  ):
     h,w,c = source.shape
@@ -26,8 +26,8 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0
     mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy()
     mapy = mapx.T
 
-    mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
-    mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
+    mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + randomex.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
+    mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + randomex.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
 
     half_cell_size = cell_size // 2
 
diff --git a/interact/__init__.py b/core/interact/__init__.py
similarity index 100%
rename from interact/__init__.py
rename to core/interact/__init__.py
diff --git a/interact/interact.py b/core/interact/interact.py
similarity index 74%
rename from interact/interact.py
rename to core/interact/interact.py
index dce7c37..46fd450 100644
--- a/interact/interact.py
+++ b/core/interact/interact.py
@@ -4,6 +4,7 @@ import sys
 import time
 import types
 
+import colorama
 import cv2
 from tqdm import tqdm
 
@@ -16,6 +17,8 @@ try:
 except:
     is_colab = False
 
+yn_str = {True:'y',False:'n'}
+
 class InteractBase(object):
     EVENT_LBUTTONDOWN = 1
     EVENT_LBUTTONUP = 2
@@ -186,13 +189,35 @@ class InteractBase(object):
         ar = self.key_events.get(wnd_name, [])
         self.key_events[wnd_name] = []
         return ar
+        
+    def input(self, s):
+        return input(s)
 
-    def input_number(self, s, default_value, valid_list=None, help_message=None):
+    def input_number(self, s, default_value, valid_list=None, show_default_value=True, add_info=None, help_message=None):
+        if show_default_value and default_value is not None:
+            s = f"[{default_value}] {s}"
+            
+        if add_info is not None or \
+           help_message is not None:
+            s += " ("
+            
+        if add_info is not None:
+            s += f" {add_info}"
+        if help_message is not None:
+            s += " ?:help"
+            
+        if add_info is not None or \
+           help_message is not None:
+            s += " )"
+            
+        s += " : "
+        
         while True:
             try:
                 inp = input(s)
                 if len(inp) == 0:
-                    raise ValueError("")
+                    result = default_value
+                    break
 
                 if help_message is not None and inp == '?':
                     print (help_message)
@@ -200,13 +225,39 @@ class InteractBase(object):
 
                 i = float(inp)
                 if (valid_list is not None) and (i not in valid_list):
-                    return default_value
-                return i
+                    result = default_value
+                    break
+                result = i
+                break
             except:
-                print (default_value)
-                return default_value
-
-    def input_int(self,s, default_value, valid_list=None, help_message=None):
+                result = default_value
+                break
+        
+        print(result)
+        return result
+        
+    def input_int(self, s, default_value, valid_list=None, add_info=None, show_default_value=True, help_message=None):
+        if show_default_value:
+            if len(s) != 0:
+                s = f"[{default_value}] {s}"
+            else:
+                s = f"[{default_value}]"
+        
+        if add_info is not None or \
+           help_message is not None:
+            s += " ("
+            
+        if add_info is not None:
+            s += f" {add_info}"
+        if help_message is not None:
+            s += " ?:help"
+            
+        if add_info is not None or \
+           help_message is not None:
+            s += " )"
+            
+        s += " : "
+        
         while True:
             try:
                 inp = input(s)
@@ -219,13 +270,23 @@ class InteractBase(object):
 
                 i = int(inp)
                 if (valid_list is not None) and (i not in valid_list):
-                    return default_value
-                return i
+                    result = default_value
+                    break
+                result = i
+                break
             except:
-                print (default_value)
-                return default_value
+                result = default_value
+                break
+        print (result)
+        return result
 
-    def input_bool(self, s, default_value, help_message=None):
+    def input_bool(self, s, default_value, help_message=None):        
+        s = f"[{yn_str[default_value]}] {s} ( y/n"
+
+        if help_message is not None:
+            s += " ?:help"
+        s += " ) : "
+            
         while True:
             try:
                 inp = input(s)
@@ -236,35 +297,65 @@ class InteractBase(object):
                     print (help_message)
                     continue
 
-                return bool ( {"y":True,"n":False,"1":True,"0":False}.get(inp.lower(), default_value) )
+                return bool ( {"y":True,"n":False}.get(inp.lower(), default_value) )
             except:
                 print ( "y" if default_value else "n" )
                 return default_value
 
-    def input_str(self, s, default_value, valid_list=None, help_message=None):
+    def input_str(self, s, default_value=None, valid_list=None, show_default_value=True, help_message=None):
+        if show_default_value and default_value is not None:
+            s = f"[{default_value}] {s}"
+            
+        if valid_list is not None or \
+           help_message is not None:
+            s += " ("
+           
+        if valid_list is not None:
+            s += " " + "/".join(valid_list)
+           
+        if help_message is not None:
+            s += " ?:help"
+        
+        if valid_list is not None or \
+           help_message is not None:
+            s += " )"
+            
+        s += " : "
+        
+        
         while True:
             try:
                 inp = input(s)
+                
                 if len(inp) == 0:
-                    raise ValueError("")
-
+                    if default_value is None:
+                        print("")
+                        return None
+                    result = default_value
+                    break
+                
                 if help_message is not None and inp == '?':
-                    print (help_message)
+                    print(help_message)
                     continue
-
+                
                 if valid_list is not None:
                     if inp.lower() in valid_list:
-                        return inp.lower()
+                        result = inp.lower()
+                        break
                     if inp in valid_list:
-                        return inp
-                    return default_value
-
-                return inp
+                        result = inp
+                        break                    
+                    continue
 
+                result = inp
+                break
             except:
-                print (default_value)
-                return default_value
-
+                result = default_value
+                break
+        
+        print(result)
+        return result
+        
     def input_process(self, stdin_fd, sq, str):
         sys.stdin = os.fdopen(stdin_fd)
         try:
@@ -292,7 +383,14 @@ class InteractBase(object):
 
 
 class InteractDesktop(InteractBase):
-
+    def __init__(self):
+        colorama.init()
+        super().__init__()
+        
+    def color_red(self):
+        pass
+        
+        
     def is_support_windows(self):
         return True
 
@@ -347,9 +445,20 @@ class InteractDesktop(InteractBase):
             ord_key = cv2.waitKey(wait_key_time)
             shift_pressed = False
             if ord_key != -1:
-                if chr(ord_key) >= 'A' and chr(ord_key) <= 'Z':
+                chr_key = chr(ord_key)
+                
+                if chr_key >= 'A' and chr_key <= 'Z':
                     shift_pressed = True
                     ord_key += 32
+                elif chr_key == '?':
+                    shift_pressed = True
+                    ord_key = ord('/')
+                elif chr_key == '<':
+                    shift_pressed = True
+                    ord_key = ord(',')
+                elif chr_key == '>':
+                    shift_pressed = True
+                    ord_key = ord('.')
         else:
             if sleep_time != 0:
                 time.sleep(sleep_time)
diff --git a/joblib/SubprocessFunctionCaller.py b/core/joblib/SubprocessFunctionCaller.py
similarity index 100%
rename from joblib/SubprocessFunctionCaller.py
rename to core/joblib/SubprocessFunctionCaller.py
diff --git a/utils/iter_utils.py b/core/joblib/SubprocessGenerator.py
similarity index 75%
rename from utils/iter_utils.py
rename to core/joblib/SubprocessGenerator.py
index e690e3b..b0d893e 100644
--- a/utils/iter_utils.py
+++ b/core/joblib/SubprocessGenerator.py
@@ -1,28 +1,8 @@
-import threading
 import queue as Queue
 import multiprocessing
-import time
-
-
-class ThisThreadGenerator(object):
-    def __init__(self, generator_func, user_param=None):
-        super().__init__()
-        self.generator_func = generator_func
-        self.user_param = user_param
-        self.initialized = False
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if not self.initialized:
-            self.initialized = True
-            self.generator_func = self.generator_func(self.user_param)
-
-        return next(self.generator_func)
 
 class SubprocessGenerator(object):
-    def __init__(self, generator_func, user_param=None, prefetch=2, start_now=False):
+    def __init__(self, generator_func, user_param=None, prefetch=2, start_now=True):
         super().__init__()
         self.prefetch = prefetch
         self.generator_func = generator_func
diff --git a/joblib/SubprocessorBase.py b/core/joblib/SubprocessorBase.py
similarity index 99%
rename from joblib/SubprocessorBase.py
rename to core/joblib/SubprocessorBase.py
index a9cbc36..993d5cb 100644
--- a/joblib/SubprocessorBase.py
+++ b/core/joblib/SubprocessorBase.py
@@ -2,7 +2,7 @@ import traceback
 import multiprocessing
 import time
 import sys
-from interact import interact as io
+from core.interact import interact as io
 
 
 class Subprocessor(object):
@@ -87,7 +87,7 @@ class Subprocessor(object):
             c2s.put ( {'op': 'error', 'data' : data} )
 
     #overridable
-    def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0, io_loop_sleep_time=0.005, initialize_subprocesses_in_serial=True):
+    def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0, io_loop_sleep_time=0.005, initialize_subprocesses_in_serial=False):
         if not issubclass(SubprocessorCli_class, Subprocessor.Cli):
             raise ValueError("SubprocessorCli_class must be subclass of Subprocessor.Cli")
 
@@ -130,7 +130,7 @@ class Subprocessor(object):
     #overridable
     def get_result(self):
         #return result that will be returned in func run()
-        raise NotImplementedError
+        return None
 
     #overridable
     def on_tick(self):
diff --git a/core/joblib/ThisThreadGenerator.py b/core/joblib/ThisThreadGenerator.py
new file mode 100644
index 0000000..e6f77a4
--- /dev/null
+++ b/core/joblib/ThisThreadGenerator.py
@@ -0,0 +1,16 @@
+class ThisThreadGenerator(object):
+    def __init__(self, generator_func, user_param=None):
+        super().__init__()
+        self.generator_func = generator_func
+        self.user_param = user_param
+        self.initialized = False
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if not self.initialized:
+            self.initialized = True
+            self.generator_func = self.generator_func(self.user_param)
+
+        return next(self.generator_func)
\ No newline at end of file
diff --git a/joblib/__init__.py b/core/joblib/__init__.py
similarity index 50%
rename from joblib/__init__.py
rename to core/joblib/__init__.py
index fbbc20c..68b3101 100644
--- a/joblib/__init__.py
+++ b/core/joblib/__init__.py
@@ -1,2 +1,4 @@
 from .SubprocessorBase import Subprocessor
 from .SubprocessFunctionCaller import SubprocessFunctionCaller
+from .ThisThreadGenerator import ThisThreadGenerator
+from .SubprocessGenerator import SubprocessGenerator
\ No newline at end of file
diff --git a/core/leras/__init__.py b/core/leras/__init__.py
new file mode 100644
index 0000000..7d9fb2b
--- /dev/null
+++ b/core/leras/__init__.py
@@ -0,0 +1 @@
+from .nn import nn
\ No newline at end of file
diff --git a/core/leras/device.py b/core/leras/device.py
new file mode 100644
index 0000000..e18ea2a
--- /dev/null
+++ b/core/leras/device.py
@@ -0,0 +1,205 @@
+import sys
+import ctypes
+import os
+    
+class Device(object):
+    def __init__(self, index, name, total_mem, free_mem, cc=0):
+        self.index = index
+        self.name = name
+        self.cc = cc
+        self.total_mem = total_mem
+        self.total_mem_gb = total_mem / 1024**3
+        self.free_mem = free_mem
+        self.free_mem_gb = free_mem / 1024**3
+        
+    def __str__(self):
+        return f"[{self.index}]:[{self.name}][{self.free_mem_gb:.3}/{self.total_mem_gb :.3}]"
+
+class Devices(object):
+    all_devices = None
+    
+    def __init__(self, devices):
+        self.devices = devices
+
+    def __len__(self):
+        return len(self.devices)
+        
+    def __getitem__(self, key):
+        result = self.devices[key]
+        if isinstance(key, slice):
+            return Devices(result)
+        return result
+        
+    def __iter__(self):
+        for device in self.devices:
+            yield device
+
+    def get_best_device(self):
+        result = None
+        idx_mem = 0
+        for device in self.devices:
+            mem = device.total_mem
+            if mem > idx_mem:
+                result = device
+                idx_mem = mem
+        return result
+
+    def get_worst_device(self):
+        result = None
+        idx_mem = sys.maxsize
+        for device in self.devices:
+            mem = device.total_mem
+            if mem < idx_mem:
+                result = device
+                idx_mem = mem
+        return result
+
+    def get_device_by_index(self, idx):
+        for device in self.devices:
+            if device.index == idx:
+                return device
+        return None
+        
+    def get_devices_from_index_list(self, idx_list):
+        result = []
+        for device in self.devices:
+            if device.index in idx_list:
+                result += [device]
+        return Devices(result)
+        
+    def get_equal_devices(self, device):
+        device_name = device.name
+        result = []
+        for device in self.devices:
+            if device.name == device_name:
+                result.append (device)
+        return Devices(result)
+    
+    def get_devices_at_least_mem(self, totalmemsize_gb):
+        result = []
+        for device in self.devices:
+            if device.total_mem >= totalmemsize_gb*(1024**3):
+                result.append (device)
+        return Devices(result)
+
+    @staticmethod
+    def initialize_main_env():
+        min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35)) 
+        libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
+        for libname in libnames:
+            try:
+                cuda = ctypes.CDLL(libname)
+            except:
+                continue
+            else:
+                break
+        else:
+            return Devices([])
+
+        nGpus = ctypes.c_int()
+        name = b' ' * 200
+        cc_major = ctypes.c_int()
+        cc_minor = ctypes.c_int()
+        freeMem = ctypes.c_size_t()
+        totalMem = ctypes.c_size_t()
+
+        result = ctypes.c_int()
+        device = ctypes.c_int()
+        context = ctypes.c_void_p()
+        error_str = ctypes.c_char_p()
+
+        devices = []
+
+        if cuda.cuInit(0) == 0 and \
+            cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
+            for i in range(nGpus.value):
+                if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \
+                    cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \
+                    cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0:
+                    continue
+
+                if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
+                    if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
+                        cc = cc_major.value * 10 + cc_minor.value
+                        if cc >= min_cc:
+                            devices.append ( {'name'      : name.split(b'\0', 1)[0].decode(),                                               
+                                              'total_mem' : totalMem.value,
+                                              'free_mem'  : freeMem.value,
+                                              'cc'        : cc
+                                              })
+                    cuda.cuCtxDetach(context)
+    
+        os.environ['NN_DEVICES_INITIALIZED'] = '1'
+        os.environ['NN_DEVICES_COUNT'] = str(len(devices))        
+        for i, device in enumerate(devices):            
+            os.environ[f'NN_DEVICE_{i}_NAME'] = device['name']
+            os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
+            os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
+            os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
+        
+    @staticmethod
+    def getDevices():        
+        if Devices.all_devices is None:    
+            if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
+                raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")            
+            devices = []
+            for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):                
+                devices.append ( Device(index=i,
+                                        name=os.environ[f'NN_DEVICE_{i}_NAME'],                                               
+                                        total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
+                                        free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']),
+                                        cc=int(os.environ[f'NN_DEVICE_{i}_CC']) ))
+            Devices.all_devices = Devices(devices)
+            
+        return Devices.all_devices
+    
+"""
+if Devices.all_devices is None:    
+            min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35)) 
+
+            libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
+            for libname in libnames:
+                try:
+                    cuda = ctypes.CDLL(libname)
+                except:
+                    continue
+                else:
+                    break
+            else:
+                return Devices([])
+
+            nGpus = ctypes.c_int()
+            name = b' ' * 200
+            cc_major = ctypes.c_int()
+            cc_minor = ctypes.c_int()
+            freeMem = ctypes.c_size_t()
+            totalMem = ctypes.c_size_t()
+
+            result = ctypes.c_int()
+            device = ctypes.c_int()
+            context = ctypes.c_void_p()
+            error_str = ctypes.c_char_p()
+
+            devices = []
+
+            if cuda.cuInit(0) == 0 and \
+               cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
+                for i in range(nGpus.value):
+                    if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \
+                       cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \
+                       cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0:
+                        continue
+
+                    if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
+                        if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
+                            cc = cc_major.value * 10 + cc_minor.value
+                            if cc >= min_cc:
+                                devices.append ( Device(index=i,
+                                                        name=name.split(b'\0', 1)[0].decode(),                                               
+                                                        total_mem=totalMem.value,
+                                                        free_mem=freeMem.value,
+                                                        cc=cc) )
+                        cuda.cuCtxDetach(context)
+            Devices.all_devices = Devices(devices)
+        return Devices.all_devices
+"""
\ No newline at end of file
diff --git a/core/leras/initializers.py b/core/leras/initializers.py
new file mode 100644
index 0000000..a3294cb
--- /dev/null
+++ b/core/leras/initializers.py
@@ -0,0 +1,52 @@
+import numpy as np
+
+def initialize_initializers(nn):
+    tf = nn.tf
+    from tensorflow.python.ops import init_ops
+    
+    class initializers():
+        class ca (init_ops.Initializer):
+            def __init__(self, dtype=None):
+                pass
+            
+            def __call__(self, shape, dtype=None, partition_info=None):
+                return tf.zeros( shape, name="_cai_")
+
+            @staticmethod
+            def generate(shape, eps_std=0.05, dtype=np.float32):
+                """
+                Super fast implementation of Convolution Aware Initialization for 4D shapes
+                Convolution Aware Initialization https://arxiv.org/abs/1702.06295
+                """
+                if len(shape) != 4:
+                    raise ValueError("only shape with rank 4 supported.")
+
+                row, column, stack_size, filters_size = shape
+
+                fan_in = stack_size * (row * column)
+
+                kernel_shape = (row, column)
+
+                kernel_fft_shape = np.fft.rfft2(np.zeros(kernel_shape)).shape
+
+                basis_size = np.prod(kernel_fft_shape)
+                if basis_size == 1:
+                    x = np.random.normal( 0.0, eps_std, (filters_size, stack_size, basis_size) )
+                else:
+                    nbb = stack_size // basis_size + 1
+                    x = np.random.normal(0.0, 1.0, (filters_size, nbb, basis_size, basis_size))
+                    x = x + np.transpose(x, (0,1,3,2) ) * (1-np.eye(basis_size))
+                    u, _, v = np.linalg.svd(x)
+                    x = np.transpose(u, (0,1,3,2) )
+                    x = np.reshape(x, (filters_size, -1, basis_size) )
+                    x = x[:,:stack_size,:]
+
+                x = np.reshape(x, ( (filters_size,stack_size,) + kernel_fft_shape ) )
+
+                x = np.fft.irfft2( x, kernel_shape ) \
+                    + np.random.normal(0, eps_std, (filters_size,stack_size,)+kernel_shape)
+
+                x = x * np.sqrt( (2/fan_in) / np.var(x) )
+                x = np.transpose( x, (2, 3, 1, 0) )
+                return x.astype(dtype)
+    nn.initializers = initializers
\ No newline at end of file
diff --git a/core/leras/layers.py b/core/leras/layers.py
new file mode 100644
index 0000000..7597ccf
--- /dev/null
+++ b/core/leras/layers.py
@@ -0,0 +1,591 @@
+import pickle
+import types
+from pathlib import Path
+from core import pathex
+from core.interact import interact as io
+import numpy as np
+
+
+def initialize_layers(nn):
+    tf = nn.tf
+    
+    class Saveable():
+        def __init__(self, name=None):
+            self.name = name
+
+        #override
+        def get_weights(self):
+            #return tf tensors that should be initialized/loaded/saved
+            pass
+
+        def save_weights(self, filename, force_dtype=None):
+            d = {}
+            weights = self.get_weights()
+
+            if self.name is None:
+                raise Exception("name must be defined.")
+
+            name = self.name
+            for w, w_val in zip(weights, nn.tf_sess.run (weights)):
+                w_name_split = w.name.split('/', 1)
+                if name != w_name_split[0]:
+                    raise Exception("weight first name != Saveable.name")
+
+                if force_dtype is not None:
+                    w_val = w_val.astype(force_dtype)
+
+                d[ w_name_split[1] ] = w_val
+
+            d_dumped = pickle.dumps (d, 4)
+            pathex.write_bytes_safe ( Path(filename), d_dumped )
+
+        def load_weights(self, filename):
+            """
+            returns True if file exists
+            """
+            filepath = Path(filename)
+            if filepath.exists():
+                result = True
+                d_dumped = filepath.read_bytes()
+                d = pickle.loads(d_dumped)
+            else:
+                return False
+
+            weights = self.get_weights()
+
+            if self.name is None:
+                raise Exception("name must be defined.")
+
+            tuples = []
+            for w in weights:
+                w_name_split = w.name.split('/')
+                if self.name != w_name_split[0]:
+                    raise Exception("weight first name != Saveable.name")
+
+                sub_w_name = "/".join(w_name_split[1:])
+
+                w_val = d.get(sub_w_name, None)
+                if w_val is None:
+                    io.log_err(f"Weight {w.name} was not loaded from file {filename}")
+                    tuples.append ( (w, w.initializer) )
+                else:
+                    tuples.append ( (w, w_val) )
+
+            nn.tf_batch_set_value(tuples)
+
+            return True
+
+        def init_weights(self):
+            ops = []
+            tuples = []
+            for w in self.get_weights():
+                initializer = w.initializer
+                for input in initializer.inputs:
+                    if "_cai_" in input.name:
+                        tuples.append ( (w, nn.initializers.ca.generate(w.shape.as_list(), dtype= w.dtype.as_numpy_dtype) ) )
+                        break
+                else:
+                    ops.append (initializer)
+
+            nn.tf_sess.run (ops)
+            nn.tf_batch_set_value(tuples)
+    nn.Saveable = Saveable
+    
+    class LayerBase():
+        def __init__(self, name=None, **kwargs):
+            self.name = name
+
+        #override
+        def build_weights(self):
+            pass
+
+        #override
+        def get_weights(self):
+            return []
+
+        def set_weights(self, new_weights):
+            weights = self.get_weights()
+            if len(weights) != len(new_weights):
+                raise ValueError ('len of lists mismatch')
+
+            tuples = []
+            for w, new_w in zip(weights, new_weights):
+                if len(w.shape) != new_w.shape:
+                    new_w = new_w.reshape(w.shape)
+
+                tuples.append ( (w, new_w) )
+
+            nn.tf_batch_set_value (tuples)
+    nn.LayerBase = LayerBase
+    
+    class ModelBase(Saveable):
+        def __init__(self, *args, name=None, **kwargs):
+            super().__init__(name=name)
+            self.layers = []
+            self.built = False
+            self.args = args
+            self.kwargs = kwargs
+            self.run_placeholders = None
+
+        def _build_sub(self, layer, name):
+            if isinstance (layer, list):
+                for i,sublayer in enumerate(layer):
+                    self._build_sub(sublayer, f"{name}_{i}")
+            elif isinstance (layer, LayerBase) or \
+                    isinstance (layer, ModelBase):
+
+                if layer.name is None:
+                    layer.name = name
+
+                if isinstance (layer, LayerBase):
+                    with tf.variable_scope(layer.name):
+                        layer.build_weights()
+                elif isinstance (layer, ModelBase):
+                    layer.build()
+
+                self.layers.append (layer)
+
+        def xor_list(self, lst1, lst2):
+            return  [value for value in lst1+lst2 if (value not in lst1) or (value not in lst2)  ]
+
+        def build(self):
+            with tf.variable_scope(self.name):
+                
+                current_vars = []
+                generator = None
+                while True:
+                    
+                    if generator is None:
+                        generator = self.on_build(*self.args, **self.kwargs)
+                        if not isinstance(generator, types.GeneratorType):
+                            generator = None
+                    
+                    if generator is not None:
+                        try:
+                            next(generator)
+                        except StopIteration:
+                            generator = None
+                        
+                    v = vars(self)                    
+                    new_vars = self.xor_list (current_vars, list(v.keys()) )
+
+                    for name in new_vars:
+                        self._build_sub(v[name],name)
+                        
+                    current_vars += new_vars
+                        
+                    if generator is None:
+                        break                           
+                        
+            self.built = True
+
+        #override
+        def get_weights(self):
+            if not self.built:
+                self.build()
+
+            weights = []
+            for layer in self.layers:
+                weights += layer.get_weights()
+            return weights
+
+        def get_layers(self):
+            if not self.built:
+                self.build()
+            layers = []
+            for layer in self.layers:
+                if isinstance (layer, LayerBase):
+                    layers.append(layer)
+                else:
+                    layers += layer.get_layers()
+            return layers
+
+        #override
+        def on_build(self, *args, **kwargs):
+            """
+            init model layers here
+            
+            return 'yield' if build is not finished
+                        therefore dependency models will be initialized 
+            """
+            pass
+
+        #override
+        def forward(self, *args, **kwargs):
+            #flow layers/models/tensors here
+            pass
+
+        def __call__(self, *args, **kwargs):
+            if not self.built:
+                self.build()
+
+            return self.forward(*args, **kwargs)
+            
+        def compute_output_shape(self, shapes):
+            if not self.built:
+                self.build()
+                
+            not_list = False
+            if not isinstance(shapes, list):
+                not_list = True
+                shapes = [shapes]
+            
+            with tf.device('/CPU:0'):
+                # CPU tensors will not impact any performance, only slightly RAM "leakage"
+                phs = []
+                for dtype,sh in shapes:
+                    phs += [ tf.placeholder(dtype, sh) ]
+
+                result = self.__call__(phs[0] if not_list else phs)
+                                
+                if not isinstance(result, list):
+                    result = [result]
+                    
+                result_shapes = []
+                
+                for t in result:
+                    result_shapes += [ t.shape.as_list() ]                        
+                    
+                return result_shapes[0] if not_list else result_shapes
+
+        def build_for_run(self, shapes_list):
+            if not isinstance(shapes_list, list):
+                raise ValueError("shapes_list must be a list.")
+
+            self.run_placeholders = []
+            for dtype,sh in shapes_list:
+                self.run_placeholders.append ( tf.placeholder(dtype, (None,)+sh) )
+
+            self.run_output = self.__call__(self.run_placeholders)
+
+        def run (self, inputs):
+            if self.run_placeholders is None:
+                raise Exception ("Model didn't build for run.")
+
+            if len(inputs) != len(self.run_placeholders):
+                raise ValueError("len(inputs) != self.run_placeholders")
+
+            feed_dict = {}
+            for ph, inp in zip(self.run_placeholders, inputs):
+                feed_dict[ph] = inp
+
+            return nn.tf_sess.run ( self.run_output, feed_dict=feed_dict)
+
+    nn.ModelBase = ModelBase
+    
+    class Conv2D(LayerBase):
+        """
+        use_wscale  bool enables equalized learning rate, kernel_initializer will be forced to random_normal
+
+
+        """
+        def __init__(self, in_ch, out_ch, kernel_size, strides=1, padding='SAME', dilations=1, use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ):
+            if not isinstance(strides, int):
+                raise ValueError ("strides must be an int type")
+            if not isinstance(dilations, int):
+                raise ValueError ("dilations must be an int type")
+
+            if isinstance(padding, str):
+                if padding == "SAME":
+                    padding = ( (kernel_size - 1) * dilations + 1 ) // 2
+                elif padding == "VALID":
+                    padding = 0
+                else:
+                    raise ValueError ("Wrong padding type. Should be VALID SAME or INT or 4x INTs")
+
+            if isinstance(padding, int):
+                if padding != 0:
+                    padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ]
+                else:
+                    padding = None
+
+            self.in_ch = in_ch
+            self.out_ch = out_ch
+            self.kernel_size = kernel_size
+            self.strides = [1,strides,strides,1]
+            self.padding = padding
+            self.dilations = [1,dilations,dilations,1]
+            self.use_bias = use_bias
+            self.use_wscale = use_wscale
+            self.kernel_initializer = None if use_wscale else kernel_initializer
+            self.bias_initializer = bias_initializer
+            self.trainable = trainable
+            if dtype is None:
+                dtype = nn.tf_floatx
+            self.dtype = dtype
+            super().__init__(**kwargs)
+
+        def build_weights(self):
+            kernel_initializer = self.kernel_initializer
+            if kernel_initializer is None:
+                if self.use_wscale:
+                    gain = 1.0 if self.kernel_size == 1 else np.sqrt(2)
+                    fan_in = self.kernel_size*self.kernel_size*self.in_ch
+                    he_std = gain / np.sqrt(fan_in) # He init
+                    self.wscale = tf.constant(he_std, dtype=self.dtype )
+                    kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
+                else:
+                    kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
+
+            self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.in_ch,self.out_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable )
+
+            if self.use_bias:
+                bias_initializer = self.bias_initializer
+                if bias_initializer is None:
+                    bias_initializer = tf.initializers.zeros(dtype=self.dtype)
+
+                self.bias = tf.get_variable("bias", (1,1,1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
+
+        def get_weights(self):
+            weights = [self.weight]
+            if self.use_bias:
+                weights += [self.bias]
+            return weights
+
+        def __call__(self, x):
+            weight = self.weight
+            if self.use_wscale:
+                weight = weight * self.wscale
+
+            if self.padding is not None:
+                x = tf.pad (x, self.padding, mode='CONSTANT')
+
+            x = tf.nn.conv2d(x, weight, self.strides, 'VALID', dilations=self.dilations)
+            if self.use_bias:
+                x = x + self.bias
+            return x
+
+        def __str__(self):
+            r = f"{self.__class__.__name__} : in_ch:{self.in_ch} out_ch:{self.out_ch} "
+
+            return r
+    nn.Conv2D = Conv2D
+    
+    class Conv2DTranspose(LayerBase):
+        """
+        use_wscale      enables weight scale (equalized learning rate)
+                        kernel_initializer will be forced to random_normal
+        """
+        def __init__(self, in_ch, out_ch, kernel_size, strides=2, padding='SAME', use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ):
+            if not isinstance(strides, int):
+                raise ValueError ("strides must be an int type")
+            self.in_ch = in_ch
+            self.out_ch = out_ch
+            self.kernel_size = kernel_size
+            self.strides = strides
+            self.padding = padding
+            self.use_bias = use_bias
+            self.use_wscale = use_wscale
+            self.kernel_initializer = None if use_wscale else kernel_initializer
+            self.bias_initializer = bias_initializer
+            self.trainable = trainable
+            if dtype is None:
+                dtype = nn.tf_floatx
+            self.dtype = dtype
+            super().__init__(**kwargs)
+
+        def build_weights(self):
+            kernel_initializer = self.kernel_initializer
+            if kernel_initializer is None:
+                if self.use_wscale:
+                    gain = 1.0 if self.kernel_size == 1 else np.sqrt(2)
+                    fan_in = self.kernel_size*self.kernel_size*self.in_ch
+                    he_std = gain / np.sqrt(fan_in) # He init
+                    self.wscale = tf.constant(he_std, dtype=self.dtype )
+                    kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
+                else:
+                    kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
+
+            self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.out_ch,self.in_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable )
+
+            if self.use_bias:
+                bias_initializer = self.bias_initializer
+                if bias_initializer is None:
+                    bias_initializer = tf.initializers.zeros(dtype=self.dtype)
+                self.bias = tf.get_variable("bias", (1,1,1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
+
+        def get_weights(self):
+            weights = [self.weight]
+            if self.use_bias:
+                weights += [self.bias]
+            return weights
+
+        def __call__(self, x):
+            shape = x.shape
+
+            h,w,c = shape[1], shape[2], shape[3]
+
+            output_shape = tf.stack ( (tf.shape(x)[0],
+                                    self.deconv_length(w, self.strides, self.kernel_size, self.padding),
+                                    self.deconv_length(h, self.strides, self.kernel_size, self.padding),
+                                    self.out_ch) )
+
+            weight = self.weight
+            if self.use_wscale:
+                weight = weight * self.wscale
+
+            x = tf.nn.conv2d_transpose(x, weight, output_shape, [1,self.strides,self.strides,1], padding=self.padding)
+
+            if self.use_bias:
+                x = x + self.bias
+            return x
+
+        def __str__(self):
+            r = f"{self.__class__.__name__} : in_ch:{self.in_ch} out_ch:{self.out_ch} "
+
+            return r
+
+        def deconv_length(self, dim_size, stride_size, kernel_size, padding):
+            assert padding in {'SAME', 'VALID', 'FULL'}
+            if dim_size is None:
+                return None
+            if padding == 'VALID':
+                dim_size = dim_size * stride_size + max(kernel_size - stride_size, 0)
+            elif padding == 'FULL':
+                dim_size = dim_size * stride_size - (stride_size + kernel_size - 2)
+            elif padding == 'SAME':
+                dim_size = dim_size * stride_size
+            return dim_size
+    nn.Conv2DTranspose = Conv2DTranspose
+    
+    class BlurPool(LayerBase):
+        def __init__(self, filt_size=3, stride=2, **kwargs ):
+            self.strides = [1,stride,stride,1]
+            self.filt_size = filt_size
+            self.padding = [ [0,0],
+                                [ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ],
+                                [ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ],
+                                [0,0] ]
+            if(self.filt_size==1):
+                a = np.array([1.,])
+            elif(self.filt_size==2):
+                a = np.array([1., 1.])
+            elif(self.filt_size==3):
+                a = np.array([1., 2., 1.])
+            elif(self.filt_size==4):
+                a = np.array([1., 3., 3., 1.])
+            elif(self.filt_size==5):
+                a = np.array([1., 4., 6., 4., 1.])
+            elif(self.filt_size==6):
+                a = np.array([1., 5., 10., 10., 5., 1.])
+            elif(self.filt_size==7):
+                a = np.array([1., 6., 15., 20., 15., 6., 1.])
+
+            a = a[:,None]*a[None,:]
+            a = a / np.sum(a)
+            a = a[:,:,None,None]
+            self.a = a
+            super().__init__(**kwargs)
+
+        def build_weights(self):
+            self.k = tf.constant (self.a, dtype=nn.tf_floatx )
+
+        def __call__(self, x):
+            k = tf.tile (self.k, (1,1,x.shape[-1],1) )
+            x = tf.pad(x, self.padding )
+            x = tf.nn.depthwise_conv2d(x, k, self.strides, 'VALID')
+            return x
+    nn.BlurPool = BlurPool
+    
+    class Dense(LayerBase):
+        def __init__(self, in_ch, out_ch, use_bias=True, use_wscale=False, maxout_ch=0, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ):
+            """
+            use_wscale          enables weight scale (equalized learning rate)
+                                kernel_initializer will be forced to random_normal
+                                
+            maxout_ch     https://link.springer.com/article/10.1186/s40537-019-0233-0
+                                typical 2-4 if you want to enable DenseMaxout behaviour                       
+            """            
+            self.in_ch = in_ch
+            self.out_ch = out_ch
+            self.use_bias = use_bias
+            self.use_wscale = use_wscale
+            self.maxout_ch = maxout_ch
+            self.kernel_initializer = kernel_initializer
+            self.bias_initializer = bias_initializer
+            self.trainable = trainable
+            if dtype is None:
+                dtype = tf.float32
+            self.dtype = dtype
+            super().__init__(**kwargs)
+
+        def build_weights(self):
+            if self.maxout_ch > 1:
+                weight_shape = (self.in_ch,self.out_ch*self.maxout_ch)
+            else:
+                weight_shape = (self.in_ch,self.out_ch)
+                
+            kernel_initializer = self.kernel_initializer
+            if kernel_initializer is None:
+                if self.use_wscale:
+                    gain = 1.0
+                    fan_in = np.prod( weight_shape[:-1] )                    
+                    he_std = gain / np.sqrt(fan_in) # He init
+                    self.wscale = tf.constant(he_std, dtype=self.dtype )
+                    kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
+                else:
+                    kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
+       
+            self.weight = tf.get_variable("weight", weight_shape, dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable )
+
+            if self.use_bias:
+                bias_initializer = self.bias_initializer
+                if bias_initializer is None:
+                    bias_initializer = tf.initializers.zeros(dtype=self.dtype)
+                self.bias = tf.get_variable("bias", (1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
+
+        def get_weights(self):
+            weights = [self.weight]
+            if self.use_bias:
+                weights += [self.bias]
+            return weights
+
+        def __call__(self, x):
+            weight = self.weight
+            if self.use_wscale:
+                weight = weight * self.wscale
+
+            x = tf.matmul(x, weight)
+            
+            if self.maxout_ch > 1:                  
+                x = tf.reshape (x, (-1, self.out_ch, self.maxout_ch) )
+                x = tf.reduce_max(x, axis=-1)
+                       
+            if self.use_bias:
+                x = x + self.bias               
+                
+            return x
+    nn.Dense = Dense
+    
+    class BatchNorm2D(LayerBase):
+        """
+        currently not for training
+        """
+        def __init__(self, dim, eps=1e-05, momentum=0.1, dtype=None, **kwargs ):
+            self.dim = dim
+            self.eps = eps
+            self.momentum = momentum
+            if dtype is None:
+                dtype = nn.tf_floatx
+            self.dtype = dtype
+
+            self.shape = (1,1,1,dim)
+
+            super().__init__(**kwargs)
+
+        def build_weights(self):
+            self.weight = tf.get_variable("weight", self.shape, dtype=self.dtype, initializer=tf.initializers.ones() )
+            self.bias = tf.get_variable("bias",     self.shape, dtype=self.dtype, initializer=tf.initializers.zeros() )
+            self.running_mean = tf.get_variable("running_mean", self.shape, dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False )
+            self.running_var  = tf.get_variable("running_var",  self.shape, dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False )
+
+        def get_weights(self):
+            return [self.weight, self.bias, self.running_mean, self.running_var]
+
+        def __call__(self, x):
+            x = (x - self.running_mean) / tf.sqrt( self.running_var + self.eps )
+            x *= self.weight
+            x += self.bias
+            return x
+            
+    nn.BatchNorm2D = BatchNorm2D
\ No newline at end of file
diff --git a/core/leras/nn.py b/core/leras/nn.py
new file mode 100644
index 0000000..280bcdb
--- /dev/null
+++ b/core/leras/nn.py
@@ -0,0 +1,256 @@
+"""
+Leras. 
+
+like lighter keras.
+This is my lightweight neural network library written from scratch
+based on pure tensorflow without keras.
+
+Provides:
++ full freedom of tensorflow operations without keras model's restrictions 
++ easy model operations like in PyTorch, but in graph mode (no eager execution)
++ convenient and understandable logic
+
+Reasons why we cannot import tensorflow or any tensorflow.sub modules right here:
+1) change env variables based on DeviceConfig before import tensorflow
+2) multiprocesses will import tensorflow every spawn
+"""
+
+import os
+import sys
+from pathlib import Path
+from core.interact import interact as io
+from .device import Devices
+
+class nn():
+    current_DeviceConfig = None
+
+    tf = None
+    tf_sess = None
+    tf_sess_config = None
+    
+    # Tensor ops
+    tf_get_value = None
+    tf_batch_set_value = None
+    tf_gradients = None
+    tf_average_gv_list = None
+    tf_average_tensor_list = None
+    tf_dot = None
+    tf_gelu = None
+    tf_upsample2d = None
+    tf_upsample2d_bilinear = None
+    tf_flatten = None
+    tf_random_binomial = None
+    tf_gaussian_blur = None
+    tf_style_loss = None
+    tf_dssim = None
+    
+    # Layers
+    Saveable = None
+    LayerBase = None
+    ModelBase = None
+    Conv2D = None
+    Conv2DTranspose = None
+    BlurPool = None
+    Dense = None
+    BatchNorm2D = None
+    
+    # Initializers
+    initializers = None
+    
+    # Optimizers
+    TFBaseOptimizer = None
+    TFRMSpropOptimizer = None
+    
+    @staticmethod
+    def initialize(device_config=None):
+        if nn.tf is None:
+            if device_config is None:
+                device_config = nn.getCurrentDeviceConfig()
+            else:
+                nn.setCurrentDeviceConfig(device_config)
+
+            if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
+                os.environ.pop('CUDA_VISIBLE_DEVICES')
+
+            os.environ['CUDA_​CACHE_​MAXSIZE'] = '536870912' #512Mb (32mb default)
+            
+            first_run = False
+            
+            if sys.platform[0:3] == 'win':
+                devices_str = ""
+                for device in device_config.devices:
+                    devices_str += "_" + device.name.replace(' ','_')
+                
+                compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str)
+                if not compute_cache_path.exists():
+                    first_run = True
+                os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
+
+            os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
+            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # tf log errors only
+
+            import warnings
+            warnings.simplefilter(action='ignore', category=FutureWarning)
+  
+            if first_run:
+                io.log_info("Caching GPU kernels...")
+
+            import tensorflow as tf            
+            nn.tf = tf
+            
+            if device_config.cpu_only:
+                config = tf.ConfigProto(device_count={'GPU': 0})
+            else:                
+                config = tf.ConfigProto()
+                config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
+
+            config.gpu_options.force_gpu_compatible = True
+            config.gpu_options.allow_growth = True
+            nn.tf_sess_config = config
+
+            nn.tf_floatx = nn.tf.float32 #nn.tf.float16 if device_config.use_fp16 else nn.tf.float32
+            nn.np_floatx = nn.tf_floatx.as_numpy_dtype
+    
+            from .tensor_ops import initialize_tensor_ops
+            from .layers import initialize_layers
+            from .initializers import initialize_initializers
+            from .optimizers import initialize_optimizers
+            
+            initialize_tensor_ops(nn)
+            initialize_layers(nn)
+            initialize_initializers(nn)
+            initialize_optimizers(nn)
+            
+        if nn.tf_sess is None:
+            nn.tf_sess = tf.Session(config=nn.tf_sess_config)
+            
+    @staticmethod
+    def initialize_main_env():
+        Devices.initialize_main_env()
+    
+    @staticmethod
+    def getCurrentDeviceConfig():
+        if nn.current_DeviceConfig is None:
+            nn.current_DeviceConfig = DeviceConfig.BestGPU()
+        return nn.current_DeviceConfig
+
+    @staticmethod
+    def setCurrentDeviceConfig(device_config):
+        nn.current_DeviceConfig = device_config
+
+    @staticmethod
+    def tf_reset_session():
+        if nn.tf is not None:
+            if nn.tf_sess is not None:
+                nn.tf.reset_default_graph()
+                nn.tf_sess.close()
+                nn.tf_sess = nn.tf.Session(config=nn.tf_sess_config)
+            
+    @staticmethod
+    def tf_close_session():        
+        if nn.tf_sess is not None:
+            nn.tf.reset_default_graph()
+            nn.tf_sess.close()
+            nn.tf_sess = None
+
+            
+    @staticmethod
+    def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False, return_device_config=False):
+        devices = Devices.getDevices()
+        if len(devices) == 0:
+            return []
+        
+        all_devices_indexes = [device.index for device in devices]
+        
+        if choose_only_one:
+            suggest_best_multi_gpu = False
+            suggest_all_gpu = False
+   
+        if suggest_all_gpu:
+            best_device_indexes = all_devices_indexes
+        elif suggest_best_multi_gpu:
+            best_device_indexes = [device.index for device in devices.get_equal_devices(devices.get_best_device()) ]
+        else:
+            best_device_indexes = [ devices.get_best_device().index ]
+        best_device_indexes = ",".join([str(x) for x in best_device_indexes])
+                        
+        io.log_info ("")
+        if choose_only_one:
+            io.log_info ("Choose one GPU idx.")
+        else:
+            io.log_info ("Choose one or several GPU idxs (separated by comma).")
+        io.log_info ("")
+        
+        if allow_cpu:
+            io.log_info ("[CPU] : CPU")
+        for device in devices:
+            io.log_info (f"  [{device.index}] : {device.name}")
+        
+        io.log_info ("")
+        
+        while True:
+            try:
+                if choose_only_one:
+                    choosed_idxs = io.input_str("Which GPU index to choose?", best_device_indexes)
+                else:
+                    choosed_idxs = io.input_str("Which GPU indexes to choose?", best_device_indexes)
+                
+                if allow_cpu and choosed_idxs.lower() == "cpu":
+                    choosed_idxs = []
+                    break
+                
+                choosed_idxs = [ int(x) for x in choosed_idxs.split(',') ]
+                
+                if choose_only_one:
+                    if len(choosed_idxs) == 1:
+                        break                    
+                else:
+                    if all( [idx in all_devices_indexes for idx in choosed_idxs] ):
+                        break
+            except:
+                pass
+        io.log_info ("")
+        
+        if return_device_config:
+            return nn.DeviceConfig.GPUIndexes(choosed_idxs)
+        else:        
+            return choosed_idxs
+
+    class DeviceConfig():    
+        def __init__ (self, devices=None):
+            devices = devices or []       
+            
+            if not isinstance(devices, Devices):
+                devices = Devices(devices)
+                 
+            self.devices = devices                   
+            self.cpu_only = len(devices) == 0      
+            
+        @staticmethod
+        def BestGPU():            
+            devices = Devices.getDevices()
+            if len(devices) == 0:
+                return nn.DeviceConfig.CPU()
+            
+            return nn.DeviceConfig([devices.get_best_device()])
+            
+        @staticmethod
+        def WorstGPU():     
+            devices = Devices.getDevices()
+            if len(devices) == 0:
+                return nn.DeviceConfig.CPU()
+                    
+            return nn.DeviceConfig([devices.get_worst_device()])
+            
+        @staticmethod
+        def GPUIndexes(indexes):
+            if len(indexes) != 0:
+                devices = Devices.getDevices().get_devices_from_index_list(indexes)
+            else:
+                devices = []
+                
+            return nn.DeviceConfig(devices)
+            
+        @staticmethod
+        def CPU():            
+            return nn.DeviceConfig([])
diff --git a/core/leras/optimizers.py b/core/leras/optimizers.py
new file mode 100644
index 0000000..0a6d477
--- /dev/null
+++ b/core/leras/optimizers.py
@@ -0,0 +1,108 @@
+def initialize_optimizers(nn):
+    tf = nn.tf
+    from tensorflow.python.ops import state_ops, control_flow_ops
+
+    class TFBaseOptimizer(nn.Saveable):
+        def __init__(self, name=None):
+            super().__init__(name=name)
+
+        def tf_clip_norm(self, g, c, n):
+            """Clip the gradient `g` if the L2 norm `n` exceeds `c`.
+            # Arguments
+                g: Tensor, the gradient tensor
+                c: float >= 0. Gradients will be clipped
+                    when their L2 norm exceeds this value.
+                n: Tensor, actual norm of `g`.
+            # Returns
+                Tensor, the gradient clipped if required.
+            """
+            if c <= 0:  # if clipnorm == 0 no need to add ops to the graph
+                return g
+
+            condition = n >= c
+            then_expression = tf.scalar_mul(c / n, g)
+            else_expression = g
+
+            # saving the shape to avoid converting sparse tensor to dense
+            if isinstance(then_expression, tf.Tensor):
+                g_shape = copy.copy(then_expression.get_shape())
+            elif isinstance(then_expression, tf.IndexedSlices):
+                g_shape = copy.copy(then_expression.dense_shape)
+            if condition.dtype != tf.bool:
+                condition = tf.cast(condition, 'bool')
+            g = tf.cond(condition,
+                        lambda: then_expression,
+                        lambda: else_expression)
+            if isinstance(then_expression, tf.Tensor):
+                g.set_shape(g_shape)
+            elif isinstance(then_expression, tf.IndexedSlices):
+                g._dense_shape = g_shape
+
+            return g
+    nn.TFBaseOptimizer = TFBaseOptimizer
+
+    class TFRMSpropOptimizer(TFBaseOptimizer):
+        def __init__(self, lr=0.001, rho=0.9, lr_dropout=1.0, epsilon=1e-7, clipnorm=0.0, name=None):
+            super().__init__(name=name)
+
+            if name is None:
+                raise ValueError('name must be defined.')
+
+            self.lr_dropout = lr_dropout
+            self.clipnorm = clipnorm
+
+            with tf.device('/CPU:0') :
+                with tf.variable_scope(self.name):
+                    self.lr = tf.Variable (lr, name="lr")
+                    self.rho = tf.Variable (rho, name="rho")
+                    self.epsilon = tf.Variable (epsilon, name="epsilon")
+                    self.iterations = tf.Variable(0, dtype=tf.int64, name='iters')
+
+            self.accumulators = []
+            self.accumulator_counter = 0
+            self.accumulators_dict = {}
+            self.lr_rnds_dict = {}
+
+        def get_weights(self):
+            return [self.lr, self.rho, self.epsilon, self.iterations] + self.accumulators
+
+        def initialize_variables(self, trainable_weights, vars_on_cpu=True):
+            # Initialize here all trainable variables used in training
+            e = tf.device('/CPU:0') if vars_on_cpu else None
+            if e: e.__enter__()
+            with tf.variable_scope(self.name):
+                accumulators = [ tf.get_variable ( f'acc_{i+self.accumulator_counter}', v.shape, initializer=tf.initializers.constant(0.0), trainable=False)
+                                    for (i, v ) in enumerate(trainable_weights) ]
+
+                self.accumulators_dict.update ( { v.name : acc for v,acc in zip(trainable_weights,accumulators) } )
+                self.accumulators += accumulators
+                self.accumulator_counter += len(trainable_weights)
+
+                if self.lr_dropout != 1.0:
+                    lr_rnds = [ nn.tf_random_binomial( v.shape, p=self.lr_dropout) for v in trainable_weights ]
+                    self.lr_rnds_dict.update ( { v.name : rnd for v,rnd in zip(trainable_weights,lr_rnds) } )
+            if e: e.__exit__(None, None, None)
+
+        def get_update_op(self, grads_vars):
+            updates = []
+            lr = self.lr
+            if self.clipnorm > 0.0:
+                norm = tf.sqrt( sum([tf.reduce_sum(tf.square(g)) for g,v in grads_vars]))
+            updates += [ state_ops.assign_add( self.iterations, 1) ]
+            for i, (g,v) in enumerate(grads_vars):
+                if self.clipnorm > 0.0:
+                    g = self.tf_clip_norm(g, self.clipnorm, norm)
+
+                a = self.accumulators_dict[v.name]
+                new_a = self.rho * a + (1. - self.rho) * tf.square(g)
+                v_diff = - lr * g / (tf.sqrt(new_a) + self.epsilon)
+                if self.lr_dropout != 1.0:
+                    lr_rnd = self.lr_rnds_dict[v.name]
+                    v_diff *= lr_rnd
+                new_v = v + v_diff
+
+                updates.append (state_ops.assign(a, new_a))
+                updates.append (state_ops.assign(v, new_v))
+
+            return control_flow_ops.group ( *updates, name=self.name+'_updates')
+    nn.TFRMSpropOptimizer = TFRMSpropOptimizer
\ No newline at end of file
diff --git a/core/leras/tensor_ops.py b/core/leras/tensor_ops.py
new file mode 100644
index 0000000..52aa5ab
--- /dev/null
+++ b/core/leras/tensor_ops.py
@@ -0,0 +1,295 @@
+import numpy as np
+
+def initialize_tensor_ops(nn):
+    tf = nn.tf
+    from tensorflow.python.ops import array_ops, random_ops, math_ops, sparse_ops, gradients      
+    from tensorflow.python.framework import sparse_tensor
+    
+    def tf_get_value(tensor):
+        return nn.tf_sess.run (tensor)
+    nn.tf_get_value = tf_get_value
+    
+    
+    def tf_batch_set_value(tuples):
+        if len(tuples) != 0:
+            with nn.tf.device('/CPU:0'):
+                assign_ops = []
+                feed_dict = {}
+
+                for x, value in tuples:
+                    if isinstance(value, nn.tf.Operation):
+                        assign_ops.append(value)
+                    else:
+                        value = np.asarray(value, dtype=x.dtype.as_numpy_dtype)
+                        assign_placeholder = nn.tf.placeholder( x.dtype.base_dtype, shape=[None]*value.ndim )
+                        assign_op = nn.tf.assign (x, assign_placeholder )
+                        assign_ops.append(assign_op)
+                        feed_dict[assign_placeholder] = value
+
+                nn.tf_sess.run(assign_ops, feed_dict=feed_dict)
+    nn.tf_batch_set_value = tf_batch_set_value
+    
+    
+    def tf_gradients ( loss, vars ):
+        grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True )
+        #todo none gradient for var
+        return [*zip(grads,vars)]
+    nn.tf_gradients = tf_gradients
+    
+    def tf_average_gv_list(grad_var_list, tf_device_string=None):
+        e = tf.device(tf_device_string) if tf_device_string is not None else None
+        if e is not None: e.__enter__()
+        result = []
+        for i, (gv) in enumerate(grad_var_list):
+            for j,(g,v) in enumerate(gv):
+                g = tf.expand_dims(g, 0)
+                if i == 0:
+                    result += [ [[g], v]  ]
+                else:
+                    result[j][0] += [g]
+
+        for i,(gs,v) in enumerate(result):
+            result[i] = ( tf.reduce_mean( tf.concat (gs, 0), 0 ), v )
+        if e is not None: e.__exit__(None,None,None)
+        return result
+    nn.tf_average_gv_list = tf_average_gv_list
+    
+    def tf_average_tensor_list(tensors_list, tf_device_string=None):
+        e = tf.device(tf_device_string) if tf_device_string is not None else None
+        if e is not None: e.__enter__()
+        result = tf.reduce_mean(tf.concat ([tf.expand_dims(t, 0) for t in tensors_list], 0), 0)
+        if e is not None: e.__exit__(None,None,None)
+        return result
+    nn.tf_average_tensor_list = tf_average_tensor_list
+    
+    def tf_dot(x, y):
+        if x.shape.ndims > 2 or y.shape.ndims > 2:
+            x_shape = []
+            for i, s in zip( x.shape.as_list(), array_ops.unstack(array_ops.shape(x))):
+                if i is not None:
+                    x_shape.append(i)
+                else:
+                    x_shape.append(s)
+            x_shape = tuple(x_shape)
+            y_shape = []
+            for i, s in zip( y.shape.as_list(), array_ops.unstack(array_ops.shape(y))):
+                if i is not None:
+                    y_shape.append(i)
+                else:
+                    y_shape.append(s)
+            y_shape = tuple(y_shape)
+            y_permute_dim = list(range(y.shape.ndims))
+            y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim
+            xt = array_ops.reshape(x, [-1, x_shape[-1]])
+            yt = array_ops.reshape(array_ops.transpose(y, perm=y_permute_dim), [y_shape[-2], -1])
+            
+            import code
+            code.interact(local=dict(globals(), **locals()))
+            return array_ops.reshape(math_ops.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:])
+        if isinstance(x, sparse_tensor.SparseTensor):
+            out = sparse_ops.sparse_tensor_dense_matmul(x, y)
+        else:
+            out = math_ops.matmul(x, y)
+        return out
+    nn.tf_dot = tf_dot
+    
+    def tf_gelu(x):
+        cdf = 0.5 * (1.0 + tf.nn.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
+        return x * cdf
+    nn.tf_gelu = tf_gelu
+     
+    def tf_upsample2d(x, size=2):
+        return tf.image.resize_nearest_neighbor(x, (x.shape[1]*size, x.shape[2]*size) )
+    nn.tf_upsample2d = tf_upsample2d
+    
+    def tf_upsample2d_bilinear(x, size=2):
+        return tf.image.resize_images(x, (x.shape[1]*size, x.shape[2]*size) )
+    nn.tf_upsample2d_bilinear = tf_upsample2d_bilinear
+    
+    def tf_flatten(x, dynamic_dims=False):
+        """
+        dynamic_dims allows to flatten without knowing size on input dims
+        """
+        if dynamic_dims:
+            sh = tf.shape(x)
+            return tf.reshape (x, (sh[0], tf.reduce_prod(sh[1:]) ) )
+        else:
+            return tf.reshape (x, (-1, np.prod(x.shape[1:])) )
+        
+    nn.tf_flatten = tf_flatten
+    
+    def tf_random_binomial(shape, p=0.0, dtype=None, seed=None):
+        if dtype is None:
+            dtype=tf.float32
+
+        if seed is None:
+            seed = np.random.randint(10e6)
+        return array_ops.where(
+            random_ops.random_uniform(shape, dtype=tf.float16, seed=seed) < p,
+            array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
+    nn.tf_random_binomial = tf_random_binomial
+    
+    def tf_gaussian_blur(input, radius=2.0):
+        def gaussian(x, mu, sigma):
+            return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2))
+
+        def make_kernel(sigma):
+            kernel_size = max(3, int(2 * 2 * sigma + 1))
+            mean = np.floor(0.5 * kernel_size)
+            kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)])
+            np_kernel = np.outer(kernel_1d, kernel_1d).astype(np.float32)
+            kernel = np_kernel / np.sum(np_kernel)
+            return kernel
+
+        gauss_kernel = make_kernel(radius)
+        gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis]
+        kernel_size = gauss_kernel.shape[0]
+
+        inputs = [ input[:,:,:,i:i+1]  for i in range( input.shape[-1] ) ]
+
+        outputs = []
+        for i in range(len(inputs)):
+            x = inputs[i]
+            if kernel_size != 0:
+                padding = kernel_size//2
+                x = tf.pad (x, [ [0,0], [padding,padding], [padding,padding], [0,0] ] )
+
+            outputs += [ tf.nn.conv2d(x, tf.constant(gauss_kernel, dtype=nn.tf_floatx ) , strides=[1,1,1,1], padding="VALID") ]
+
+        return tf.concat (outputs, axis=-1)
+    nn.tf_gaussian_blur = tf_gaussian_blur
+    
+    def tf_style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1):
+        def sd(content, style, loss_weight):
+            content_nc = content.shape[-1]
+            style_nc = style.shape[-1]
+            if content_nc != style_nc:
+                raise Exception("style_loss() content_nc != style_nc")
+
+            axes = [1,2]
+            c_mean, c_var = tf.nn.moments(content, axes=axes, keep_dims=True)
+            s_mean, s_var = tf.nn.moments(style, axes=axes, keep_dims=True)
+            c_std, s_std = tf.sqrt(c_var + 1e-5), tf.sqrt(s_var + 1e-5)
+
+            mean_loss = tf.reduce_sum(tf.square(c_mean-s_mean), axis=[1,2,3])
+            std_loss  = tf.reduce_sum(tf.square(c_std-s_std), axis=[1,2,3])
+
+            return (mean_loss + std_loss) * ( loss_weight / content_nc.value )
+
+        if gaussian_blur_radius > 0.0:
+            target = tf_gaussian_blur(target, gaussian_blur_radius)
+            style = tf_gaussian_blur(style, gaussian_blur_radius)
+
+        return sd( target, style, loss_weight=loss_weight )
+
+    nn.tf_style_loss = tf_style_loss
+    
+    def tf_dssim(img1,img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
+    
+        ch = img2.shape[-1]
+
+        def _fspecial_gauss(size, sigma):
+            #Function to mimic the 'fspecial' gaussian MATLAB function.
+            coords = np.arange(0, size, dtype=nn.np_floatx)
+            coords -= (size - 1 ) / 2.0
+            g = coords**2
+            g *= ( -0.5 / (sigma**2) )
+            g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) )
+            g = tf.constant ( np.reshape (g, (1,-1)), dtype=nn.tf_floatx )
+            g = tf.nn.softmax(g)
+            g = tf.reshape (g, (size, size, 1, 1))
+            g = tf.tile (g, (1,1,ch,1))
+            return g
+
+        kernel = _fspecial_gauss(filter_size,filter_sigma)
+
+        def reducer(x):
+            return tf.nn.depthwise_conv2d(x, kernel, strides=[1,1,1,1], padding='VALID')
+
+        c1 = (k1 * max_val) ** 2
+        c2 = (k2 * max_val) ** 2
+
+        mean0 = reducer(img1)
+        mean1 = reducer(img2)
+        num0 = mean0 * mean1 * 2.0
+        den0 = tf.square(mean0) + tf.square(mean1)
+        luminance = (num0 + c1) / (den0 + c1)
+
+        num1 = reducer(img1 * img2) * 2.0
+        den1 = reducer(tf.square(img1) + tf.square(img2))
+        c2 *= 1.0 #compensation factor
+        cs = (num1 - num0 + c2) / (den1 - den0 + c2)
+
+        ssim_val = tf.reduce_mean(luminance * cs, axis=(-3, -2) )
+        return(1.0 - ssim_val ) / 2.0
+    nn.tf_dssim = tf_dssim
+    
+    def tf_rgb_to_lab(srgb):
+        srgb_pixels = tf.reshape(srgb, [-1, 3])
+        linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
+        exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
+        rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
+        rgb_to_xyz = tf.constant([
+            #    X        Y          Z
+            [0.412453, 0.212671, 0.019334], # R
+            [0.357580, 0.715160, 0.119193], # G
+            [0.180423, 0.072169, 0.950227], # B
+        ])
+        xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
+
+        xyz_normalized_pixels = tf.multiply(xyz_pixels, [1/0.950456, 1.0, 1/1.088754])
+
+        epsilon = 6/29
+        linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
+        exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
+        fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4/29) * linear_mask + (xyz_normalized_pixels ** (1/3)) * exponential_mask
+
+        fxfyfz_to_lab = tf.constant([
+            #  l       a       b
+            [  0.0,  500.0,    0.0], # fx
+            [116.0, -500.0,  200.0], # fy
+            [  0.0,    0.0, -200.0], # fz
+        ])
+        lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
+        return tf.reshape(lab_pixels, tf.shape(srgb))
+    nn.tf_rgb_to_lab = tf_rgb_to_lab
+    
+    def tf_suppress_lower_mean(t, eps=0.00001):                        
+        if t.shape.ndims != 1:
+            raise ValueError("tf_suppress_lower_mean: t rank must be 1")        
+        t_mean_eps = tf.reduce_mean(t) - eps                    
+        q = tf.clip_by_value(t, t_mean_eps, tf.reduce_max(t) )   
+        q = tf.clip_by_value(q-t_mean_eps, 0, eps)
+        q = q * (t/eps)                         
+        return q
+"""
+class GeLU(KL.Layer):
+            Gaussian Error Linear Unit.
+            A smoother version of ReLU generally used
+            in the BERT or BERT architecture based models.
+            Original paper: https://arxiv.org/abs/1606.08415
+            Input shape:
+                Arbitrary. Use the keyword argument `input_shape`
+                (tuple of integers, does not include the samples axis)
+                when using this layer as the first layer in a model.
+            Output shape:
+                Same shape as the input.
+
+            def __init__(self, approximate=True, **kwargs):
+                super(GeLU, self).__init__(**kwargs)
+                self.approximate = approximate
+                self.supports_masking = True
+
+            def call(self, inputs):
+                cdf = 0.5 * (1.0 + K.tanh((np.sqrt(2 / np.pi) * (inputs + 0.044715 * K.pow(inputs, 3)))))
+                return inputs * cdf
+
+            def get_config(self):
+                config = {'approximate': self.approximate}
+                base_config = super(GeLU, self).get_config()
+                return dict(list(base_config.items()) + list(config.items()))
+
+            def compute_output_shape(self, input_shape):
+                return input_shape
+        nn.GeLU = GeLU
+"""
\ No newline at end of file
diff --git a/mathlib/__init__.py b/core/mathlib/__init__.py
similarity index 100%
rename from mathlib/__init__.py
rename to core/mathlib/__init__.py
diff --git a/mathlib/umeyama.py b/core/mathlib/umeyama.py
similarity index 100%
rename from mathlib/umeyama.py
rename to core/mathlib/umeyama.py
diff --git a/utils/mp_utils.py b/core/mplib/__init__.py
similarity index 100%
rename from utils/mp_utils.py
rename to core/mplib/__init__.py
diff --git a/utils/os_utils.py b/core/osex.py
similarity index 100%
rename from utils/os_utils.py
rename to core/osex.py
diff --git a/utils/Path_utils.py b/core/pathex.py
similarity index 88%
rename from utils/Path_utils.py
rename to core/pathex.py
index c609572..5c93eed 100644
--- a/utils/Path_utils.py
+++ b/core/pathex.py
@@ -3,6 +3,16 @@ from os import scandir
 
 image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"]
 
+def write_bytes_safe(p, bytes_data):
+    """
+    writes to .tmp first and then rename to target filename
+    """
+    p_tmp = p.parent / (p.name + '.tmp')
+    p_tmp.write_bytes(bytes_data)
+    if p.exists():
+        p.unlink()
+    p_tmp.rename (p)
+
 def scantree(path):
     """Recursively yield DirEntry objects for given directory."""
     for entry in scandir(path):
@@ -46,7 +56,7 @@ def get_file_paths(dir_path):
     dir_path = Path (dir_path)
 
     if dir_path.exists():
-        return sorted([ x.path for x in list(scandir(str(dir_path))) if x.is_file() ])
+        return [ Path(x) for x in sorted([ x.path for x in list(scandir(str(dir_path))) if x.is_file() ]) ]
     else:
         return []
     
diff --git a/utils/random_utils.py b/core/randomex.py
similarity index 100%
rename from utils/random_utils.py
rename to core/randomex.py
diff --git a/utils/std_utils.py b/core/stdex.py
similarity index 100%
rename from utils/std_utils.py
rename to core/stdex.py
diff --git a/utils/struct_utils.py b/core/structex.py
similarity index 100%
rename from utils/struct_utils.py
rename to core/structex.py
diff --git a/doc/DeepFaceLab is working.png b/doc/DeepFaceLab is working.png
new file mode 100644
index 0000000..4d86d36
Binary files /dev/null and b/doc/DeepFaceLab is working.png differ
diff --git a/doc/doc_build_and_repository_info.md b/doc/doc_build_and_repository_info.md
deleted file mode 100644
index 89a740e..0000000
--- a/doc/doc_build_and_repository_info.md
+++ /dev/null
@@ -1,5 +0,0 @@
-#### **CPU mode**
-
-It is possible to run from script for all stages using the `--cpu-only` flag. To run from script, install the separate dependencies for CPU mode using `pip -r requirements-cpu.txt`.
-
-Please note that extraction and training will take much long without a GPU and performance will greatly suffer without one. In particular, do not use DLIB extractor in CPU mode, it's too slow to run without a GPU. Train only on 64px resolution models like H64 or SAE (with low settings) and the lightweight encoder.
\ No newline at end of file
diff --git a/doc/doc_ready_to_work_facesets.md b/doc/doc_ready_to_work_facesets.md
deleted file mode 100644
index 75f3dee..0000000
--- a/doc/doc_ready_to_work_facesets.md
+++ /dev/null
@@ -1,11 +0,0 @@
-### **Example Face Sets**:
-
-Faces sets for the following have been pre-extracted,
-
-- Nicolas Cage
-- Steve Jobs
-- Putin
-- Elon Musk
-- Harrison Ford
-
-[Download from Google drive](https://drive.google.com/open?id=1LwMdfTxdOaNAHt_sGV76aQVn7XPseXJB)
diff --git a/doc/doc_windows_desktop_app.md b/doc/doc_windows_desktop_app.md
deleted file mode 100644
index e6c83ec..0000000
--- a/doc/doc_windows_desktop_app.md
+++ /dev/null
@@ -1,27 +0,0 @@
-### **Prebuilt Windows Releases**
-
-Windows builds with all dependencies included are released regularly. Only the NVIDIA GeForce display driver needs to be installed. Prebuilt DeepFaceLab, including GPU and CPU versions, can be downloaded from 
-
-[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci) 
-
-if the download qouta is exceeded, add the file to your own google drive and download from it
-
-[Torrent](https://rutracker.org/forum/viewtopic.php?t=5558863)
-
-Available builds:
-
-* DeepFaceLab_CUDA - for NVIDIA cards 
-
-* DeepFaceLab_OpenCL - for NVIDIA/AMD/IntelHD cards
-
-Important: you don't need to install CUDA ! 
-
-#### Video tutorials using prebuilt windows app
-
-* [Basic workflow](https://www.youtube.com/watch?v=K98nTNjXkq8)
-
-* [Basic workflow (thanks @derpfakes)](https://www.youtube.com/watch?v=cVcyghhmQSA)
-
-* [How To Make DeepFakes With DeepFaceLab - An Amatuer's Guide](https://www.youtube.com/watch?v=wBax7_UWXvc)
-
-* [Manual re-extract poorly aligned frames](https://www.youtube.com/watch?v=7z1ykVVCHhM)
\ No newline at end of file
diff --git a/doc/example_faceset.jpg b/doc/example_faceset.jpg
deleted file mode 100644
index 6ee914f..0000000
Binary files a/doc/example_faceset.jpg and /dev/null differ
diff --git a/doc/gallery/1.jpg b/doc/gallery/1.jpg
deleted file mode 100644
index bb4895b..0000000
Binary files a/doc/gallery/1.jpg and /dev/null differ
diff --git a/doc/gallery/2.jpg b/doc/gallery/2.jpg
deleted file mode 100644
index b8e6138..0000000
Binary files a/doc/gallery/2.jpg and /dev/null differ
diff --git a/doc/gallery/doc_gallery.md b/doc/gallery/doc_gallery.md
deleted file mode 100644
index 5ba780f..0000000
--- a/doc/gallery/doc_gallery.md
+++ /dev/null
@@ -1,3 +0,0 @@
-![](1.jpg)
-
-![](2.jpg)
\ No newline at end of file
diff --git a/doc/logo_cuda.jpg b/doc/logo_cuda.jpg
deleted file mode 100644
index 472571e..0000000
Binary files a/doc/logo_cuda.jpg and /dev/null differ
diff --git a/doc/logo_cuda.png b/doc/logo_cuda.png
new file mode 100644
index 0000000..0b928a6
Binary files /dev/null and b/doc/logo_cuda.png differ
diff --git a/doc/logo_keras.jpg b/doc/logo_keras.jpg
deleted file mode 100644
index 6082a5b..0000000
Binary files a/doc/logo_keras.jpg and /dev/null differ
diff --git a/doc/logo_opencl.jpg b/doc/logo_opencl.jpg
deleted file mode 100644
index 36ab395..0000000
Binary files a/doc/logo_opencl.jpg and /dev/null differ
diff --git a/doc/logo_plaidml.jpg b/doc/logo_plaidml.jpg
deleted file mode 100644
index c206915..0000000
Binary files a/doc/logo_plaidml.jpg and /dev/null differ
diff --git a/doc/logo_tensorflow.jpg b/doc/logo_tensorflow.jpg
deleted file mode 100644
index 19f44c9..0000000
Binary files a/doc/logo_tensorflow.jpg and /dev/null differ
diff --git a/doc/logo_tensorflow.png b/doc/logo_tensorflow.png
new file mode 100644
index 0000000..b06cdd6
Binary files /dev/null and b/doc/logo_tensorflow.png differ
diff --git a/doc/manual_en_google_translated.docx b/doc/manual_en_google_translated.docx
deleted file mode 100644
index ac64a7d..0000000
Binary files a/doc/manual_en_google_translated.docx and /dev/null differ
diff --git a/doc/manual_en_google_translated.pdf b/doc/manual_en_google_translated.pdf
deleted file mode 100644
index d6fade9..0000000
Binary files a/doc/manual_en_google_translated.pdf and /dev/null differ
diff --git a/doc/manual_extractor_0.jpg b/doc/manual_extractor_0.jpg
deleted file mode 100644
index b88c02c..0000000
Binary files a/doc/manual_extractor_0.jpg and /dev/null differ
diff --git a/doc/manual_ru.pdf b/doc/manual_ru.pdf
deleted file mode 100644
index 2c27e9f..0000000
Binary files a/doc/manual_ru.pdf and /dev/null differ
diff --git a/doc/manual_ru_source.docx b/doc/manual_ru_source.docx
deleted file mode 100644
index f37a425..0000000
Binary files a/doc/manual_ru_source.docx and /dev/null differ
diff --git a/ebsynth/__init__.py b/ebsynth/__init__.py
deleted file mode 100644
index ce31c32..0000000
--- a/ebsynth/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .ebsynth import color_transfer
\ No newline at end of file
diff --git a/ebsynth/ebsynth.dll b/ebsynth/ebsynth.dll
deleted file mode 100644
index f6c085e..0000000
Binary files a/ebsynth/ebsynth.dll and /dev/null differ
diff --git a/ebsynth/ebsynth.py b/ebsynth/ebsynth.py
deleted file mode 100644
index ec2ec08..0000000
--- a/ebsynth/ebsynth.py
+++ /dev/null
@@ -1,201 +0,0 @@
-import os
-import sys
-from ctypes import *
-from pathlib import Path
-
-import cv2
-import numpy as np
-
-libebsynth = None
-cached_buffer = {}
-
-EBSYNTH_BACKEND_CPU         = 0x0001
-EBSYNTH_BACKEND_CUDA        = 0x0002
-EBSYNTH_BACKEND_AUTO        = 0x0000
-EBSYNTH_MAX_STYLE_CHANNELS  = 8
-EBSYNTH_MAX_GUIDE_CHANNELS  = 24
-EBSYNTH_VOTEMODE_PLAIN      = 0x0001         # weight = 1
-EBSYNTH_VOTEMODE_WEIGHTED   = 0x0002         # weight = 1/(1+error)
-
-
-def _normalize_img_shape (img):
-    img_len = len(img.shape)
-    if img_len == 2:
-        sh, sw = img.shape
-        sc = 0
-    elif img_len == 3:
-        sh, sw, sc = img.shape
-
-    if sc == 0:
-        sc = 1
-        img = img [...,np.newaxis]
-    return img
-
-def run (img_style, guides,
-         patch_size=5,
-         num_pyramid_levels=-1,
-         num_search_vote_iters = 6,
-         num_patch_match_iters = 4,
-         stop_threshold = 5,
-         uniformity_weight = 3500.0,
-         extraPass3x3 = False,
-         ):
-    if patch_size < 3:
-        raise ValueError ("patch_size is too small")
-    if patch_size % 2 == 0:
-        raise ValueError ("patch_size must be an odd number")
-    if len(guides) == 0:
-        raise ValueError ("at least one guide must be specified")
-
-    global libebsynth
-    if libebsynth is None:
-        if sys.platform[0:3] == 'win':
-            libebsynth_path = str ( Path(__file__).parent / 'ebsynth.dll' )
-            libebsynth = CDLL(libebsynth_path)
-        else:
-            #todo: implement for linux
-            pass
-
-        if libebsynth is not None:
-            libebsynth.ebsynthRun.argtypes = ( \
-                c_int,
-                c_int,
-                c_int,
-                c_int,
-                c_int,
-                c_void_p,
-                c_void_p,
-                c_int,
-                c_int,
-                c_void_p,
-                c_void_p,
-                POINTER(c_float),
-                POINTER(c_float),
-                c_float,
-                c_int,
-                c_int,
-                c_int,
-                POINTER(c_int),
-                POINTER(c_int),
-                POINTER(c_int),
-                c_int,
-                c_void_p,
-                c_void_p
-                )
-
-    if libebsynth is None:
-        return img_style
-
-    img_style = _normalize_img_shape (img_style)
-    sh, sw, sc = img_style.shape
-    t_h, t_w, t_c = 0,0,0
-
-    if sc > EBSYNTH_MAX_STYLE_CHANNELS:
-        raise ValueError (f"error: too many style channels {sc}, maximum number is {EBSYNTH_MAX_STYLE_CHANNELS}")
-
-    guides_source = []
-    guides_target = []
-    guides_weights = []
-
-    for i in range(len(guides)):
-        source_guide, target_guide, guide_weight = guides[i]
-        source_guide = _normalize_img_shape(source_guide)
-        target_guide = _normalize_img_shape(target_guide)
-        s_h, s_w, s_c = source_guide.shape
-        nt_h, nt_w, nt_c = target_guide.shape
-
-        if s_h != sh or s_w != sw:
-            raise ValueError ("guide source and style resolution must match style resolution.")
-
-        if t_c == 0:
-            t_h, t_w, t_c = nt_h, nt_w, nt_c
-        elif nt_h != t_h or nt_w != t_w:
-            raise ValueError ("guides target resolutions must be equal")
-
-        if s_c != nt_c:
-            raise ValueError ("guide source and target channels must match exactly.")
-
-        guides_source.append (source_guide)
-        guides_target.append (target_guide)
-
-        guides_weights += [ guide_weight / s_c ] * s_c
-
-    guides_source = np.concatenate ( guides_source, axis=-1)
-    guides_target = np.concatenate ( guides_target, axis=-1)
-    guides_weights = (c_float*len(guides_weights) ) ( *guides_weights )
-
-    styleWeight = 1.0
-    style_weights = [ styleWeight / sc for i in range(sc) ]
-    style_weights = (c_float*sc) ( *style_weights )
-
-
-    maxPyramidLevels = 0
-    for level in range(32,-1,-1):
-        if min( min(sh, t_h)*pow(2.0, -level), \
-                min(sw, t_w)*pow(2.0, -level) ) >= (2*patch_size+1):
-            maxPyramidLevels = level+1
-            break
-
-    if num_pyramid_levels == -1:
-        num_pyramid_levels = maxPyramidLevels
-    num_pyramid_levels = min(num_pyramid_levels, maxPyramidLevels)
-
-    num_search_vote_iters_per_level = (c_int*num_pyramid_levels) ( *[num_search_vote_iters]*num_pyramid_levels )
-    num_patch_match_iters_per_level = (c_int*num_pyramid_levels) ( *[num_patch_match_iters]*num_pyramid_levels )
-    stop_threshold_per_level = (c_int*num_pyramid_levels) ( *[stop_threshold]*num_pyramid_levels )
-
-    buffer = cached_buffer.get ( (t_h,t_w,sc), None )
-    if buffer is None:
-        buffer = create_string_buffer (t_h*t_w*sc)
-        cached_buffer[(t_h,t_w,sc)] = buffer
-
-    libebsynth.ebsynthRun (EBSYNTH_BACKEND_CPU,     #backend
-                           sc,                      #numStyleChannels
-                           guides_source.shape[-1], #numGuideChannels
-                           sw,                      #sourceWidth
-                           sh,                      #sourceHeight
-                           img_style.tobytes(),     #sourceStyleData (width * height * numStyleChannels) bytes, scan-line order
-                           guides_source.tobytes(), #sourceGuideData (width * height * numGuideChannels) bytes, scan-line order
-                           t_w,                     #targetWidth
-                           t_h,                     #targetHeight
-                           guides_target.tobytes(), #targetGuideData (width * height * numGuideChannels) bytes, scan-line order
-                           None,                    #targetModulationData (width * height * numGuideChannels) bytes, scan-line order; pass NULL to switch off the modulation
-                           style_weights,           #styleWeights (numStyleChannels) floats
-                           guides_weights,          #guideWeights (numGuideChannels) floats
-                           uniformity_weight,                   #uniformityWeight reasonable values are between 500-15000, 3500 is a good default
-                           patch_size,              #patchSize odd sizes only, use 5 for 5x5 patch, 7 for 7x7, etc.
-                           EBSYNTH_VOTEMODE_PLAIN,  #voteMode use VOTEMODE_WEIGHTED for sharper result
-                           num_pyramid_levels,      #numPyramidLevels
-
-                           num_search_vote_iters_per_level, #numSearchVoteItersPerLevel how many search/vote iters to perform at each level (array of ints, coarse first, fine last)
-                           num_patch_match_iters_per_level, #numPatchMatchItersPerLevel how many Patch-Match iters to perform at each level (array of ints, coarse first, fine last)
-                           stop_threshold_per_level, #stopThresholdPerLevel stop improving pixel when its change since last iteration falls under this threshold
-                           1 if extraPass3x3 else 0, #extraPass3x3 perform additional polishing pass with 3x3 patches at the finest level, use 0 to disable
-                           None,                     #outputNnfData (width * height * 2) ints, scan-line order; pass NULL to ignore
-                           buffer                    #outputImageData  (width * height * numStyleChannels) bytes, scan-line order
-                          )
-
-    return np.frombuffer(buffer, dtype=np.uint8).reshape ( (t_h,t_w,sc) ).copy()
-
-#transfer color from source to target
-def color_transfer(img_source, img_target):
-    guides = [( cv2.cvtColor(img_source, cv2.COLOR_BGR2GRAY),
-                cv2.cvtColor(img_target, cv2.COLOR_BGR2GRAY),
-                1 ) ]
-    
-    h,w,c = img_source.shape
-    result = []
-    for i in range(c):
-        result += [        
-                    run( img_source[...,i:i+1] , guides=guides, 
-                                patch_size=11, 
-                                num_pyramid_levels=40, 
-                                num_search_vote_iters = 6,
-                                num_patch_match_iters = 4,
-                                stop_threshold = 5,
-                                uniformity_weight=500.0,
-                                extraPass3x3=True,
-                                )
-                  ]
-                    
-    return np.concatenate( result, axis=-1 )
diff --git a/facelib/DLIBExtractor.py b/facelib/DLIBExtractor.py
deleted file mode 100644
index a91164d..0000000
--- a/facelib/DLIBExtractor.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import numpy as np
-import os
-import cv2
-
-from pathlib import Path
-
-class DLIBExtractor(object):
-    def __init__(self, dlib):
-        self.scale_to = 1850
-        #3100 eats ~1.687GB VRAM on 2GB 730 desktop card, but >4Gb on 6GB card,
-        #but 3100 doesnt work on 2GB 850M notebook card, I cant understand this behaviour
-        #1850 works on 2GB 850M notebook card, works faster than 3100, produces good result
-        self.dlib = dlib
-
-    def __enter__(self):
-        self.dlib_cnn_face_detector = self.dlib.cnn_face_detection_model_v1( str(Path(__file__).parent / "mmod_human_face_detector.dat") )
-        self.dlib_cnn_face_detector ( np.zeros ( (self.scale_to, self.scale_to, 3), dtype=np.uint8), 0 )
-        return self
-
-    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
-        del self.dlib_cnn_face_detector
-        return False #pass exception between __enter__ and __exit__ to outter level
-
-    def extract_from_bgr (self, input_image):
-        input_image = input_image[:,:,::-1].copy()
-        (h, w, ch) = input_image.shape
-
-        detected_faces = []
-        input_scale = self.scale_to / (w if w > h else h)
-        input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR)
-        detected_faces = self.dlib_cnn_face_detector(input_image, 0)
-
-        result = []
-        for d_rect in detected_faces:
-            if type(d_rect) == self.dlib.mmod_rectangle:
-                d_rect = d_rect.rect
-            left, top, right, bottom = d_rect.left(), d_rect.top(), d_rect.right(), d_rect.bottom()
-            result.append ( (int(left/input_scale), int(top/input_scale), int(right/input_scale), int(bottom/input_scale)) )
-
-        return result
diff --git a/facelib/2DFAN-4.h5 b/facelib/FAN.npy
similarity index 98%
rename from facelib/2DFAN-4.h5
rename to facelib/FAN.npy
index ac782ad..ba55f15 100644
Binary files a/facelib/2DFAN-4.h5 and b/facelib/FAN.npy differ
diff --git a/facelib/FANExtractor.py b/facelib/FANExtractor.py
index 9fd9b02..3429172 100644
--- a/facelib/FANExtractor.py
+++ b/facelib/FANExtractor.py
@@ -7,29 +7,159 @@ import numpy as np
 from numpy import linalg as npla
 
 from facelib import FaceType, LandmarksProcessor
-from nnlib import nnlib
+from core.leras import nn
 
 """
 ported from https://github.com/1adrianb/face-alignment
 """
 class FANExtractor(object):
-    def __init__ (self):
-        pass
+    def __init__ (self, place_model_on_cpu=False):
+        model_path = Path(__file__).parent / "FAN.npy"
+        if not model_path.exists():
+            raise Exception("Unable to load FANExtractor model")
 
-    def __enter__(self):
-        keras_model_path = Path(__file__).parent / "2DFAN-4.h5"
-        if not keras_model_path.exists():
-            return None
+        nn.initialize()
+        tf = nn.tf
 
-        exec( nnlib.import_all(), locals(), globals() )
-        self.model = FANExtractor.BuildModel()
-        self.model.load_weights(str(keras_model_path))
+        class ConvBlock(nn.ModelBase):
+            def on_build(self, in_planes, out_planes):
+                self.in_planes = in_planes
+                self.out_planes = out_planes
 
-        return self
+                self.bn1 = nn.BatchNorm2D(in_planes)
+                self.conv1 = nn.Conv2D (in_planes, out_planes/2, kernel_size=3, strides=1, padding='SAME', use_bias=False )
 
-    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
-        del self.model
-        return False #pass exception between __enter__ and __exit__ to outter level
+                self.bn2 = nn.BatchNorm2D(out_planes/2)
+                self.conv2 = nn.Conv2D (out_planes/2, out_planes/4, kernel_size=3, strides=1, padding='SAME', use_bias=False )
+
+                self.bn3 = nn.BatchNorm2D(out_planes/4)
+                self.conv3 = nn.Conv2D (out_planes/4, out_planes/4, kernel_size=3, strides=1, padding='SAME', use_bias=False )
+
+                if self.in_planes != self.out_planes:
+                    self.down_bn1 = nn.BatchNorm2D(in_planes)
+                    self.down_conv1 = nn.Conv2D (in_planes, out_planes, kernel_size=1, strides=1, padding='VALID', use_bias=False )
+                else:
+                    self.down_bn1 = None
+                    self.down_conv1 = None
+
+            def forward(self, input):
+                x = input
+                x = self.bn1(x)
+                x = tf.nn.relu(x)
+                x = out1 = self.conv1(x)
+
+                x = self.bn2(x)
+                x = tf.nn.relu(x)
+                x = out2 = self.conv2(x)
+
+                x = self.bn3(x)
+                x = tf.nn.relu(x)
+                x = out3 = self.conv3(x)
+                x = tf.concat ([out1, out2, out3], axis=-1)
+
+                if self.in_planes != self.out_planes:
+                    downsample = self.down_bn1(input)
+                    downsample = tf.nn.relu (downsample)
+                    downsample = self.down_conv1 (downsample)
+                    x = x + downsample
+                else:
+                    x = x + input
+
+                return x
+
+        class HourGlass (nn.ModelBase):
+            def on_build(self, in_planes, depth):
+                self.b1 = ConvBlock (in_planes, 256)
+                self.b2 = ConvBlock (in_planes, 256)
+
+                if depth > 1:
+                    self.b2_plus = HourGlass(256, depth-1)
+                else:
+                    self.b2_plus = ConvBlock(256, 256)
+
+                self.b3 = ConvBlock(256, 256)
+
+            def forward(self, input):
+                up1 = self.b1(input)
+
+                low1 = tf.nn.avg_pool(input, [1,2,2,1], [1,2,2,1], 'VALID')
+                low1 = self.b2 (low1)
+
+                low2 = self.b2_plus(low1)
+                low3 = self.b3(low2)
+
+                up2 = nn.tf_upsample2d(low3)
+
+                return up1+up2
+
+        class FAN (nn.ModelBase):
+            def __init__(self):
+                super().__init__(name='FAN')
+
+            def on_build(self):
+                self.conv1 = nn.Conv2D (3, 64, kernel_size=7, strides=2, padding='SAME')
+                self.bn1 = nn.BatchNorm2D(64)
+
+                self.conv2 = ConvBlock(64, 128)
+                self.conv3 = ConvBlock(128, 128)
+                self.conv4 = ConvBlock(128, 256)
+
+                self.m = []
+                self.top_m = []
+                self.conv_last = []
+                self.bn_end = []
+                self.l = []
+                self.bl = []
+                self.al = []
+                for i in range(4):
+                    self.m += [ HourGlass(256, 4) ]
+                    self.top_m += [ ConvBlock(256, 256) ]
+
+                    self.conv_last += [ nn.Conv2D (256, 256, kernel_size=1, strides=1, padding='VALID') ]
+                    self.bn_end += [ nn.BatchNorm2D(256) ]
+
+                    self.l += [ nn.Conv2D (256, 68, kernel_size=1, strides=1, padding='VALID') ]
+
+                    if i < 4-1:
+                        self.bl += [ nn.Conv2D (256, 256, kernel_size=1, strides=1, padding='VALID') ]
+                        self.al += [ nn.Conv2D (68, 256, kernel_size=1, strides=1, padding='VALID') ]
+
+            def forward(self, inp) :
+                x, = inp
+                x = self.conv1(x)
+                x = self.bn1(x)
+                x = tf.nn.relu(x)
+
+                x = self.conv2(x)
+                x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], 'VALID')
+                x = self.conv3(x)
+                x = self.conv4(x)
+
+                outputs = []
+                previous = x
+                for i in range(4):
+                    ll = self.m[i] (previous)
+                    ll = self.top_m[i] (ll)
+                    ll = self.conv_last[i] (ll)
+                    ll = self.bn_end[i] (ll)
+                    ll = tf.nn.relu(ll)
+                    tmp_out = self.l[i](ll)
+                    outputs.append(tmp_out)
+                    if i < 4 - 1:
+                        ll = self.bl[i](ll)
+                        previous = previous + ll + self.al[i](tmp_out)
+                return outputs[-1]
+
+        e = None
+        if place_model_on_cpu:
+            e = tf.device("/CPU:0")
+
+        if e is not None: e.__enter__()
+        self.model = FAN()
+        self.model.load_weights(str(model_path))
+        if e is not None: e.__exit__(None,None,None)
+
+        self.model.build_for_run ([ ( tf.float32, (256,256,3) ) ])
 
     def extract (self, input_image, rects, second_pass_extractor=None, is_bgr=True, multi_sample=False):
         if len(rects) == 0:
@@ -63,13 +193,13 @@ class FANExtractor(object):
                     images += [ self.crop(input_image, c, scale)  ]
 
                 images = np.stack (images)
-                images = images.astype(np.float32) / 255.0                
+                images = images.astype(np.float32) / 255.0
 
                 predicted = []
                 for i in range( len(images) ):
-                    predicted += [ self.model.predict ( images[i][None,...] ).transpose (0,3,1,2)[0] ]
+                    predicted += [ self.model.run ( [ images[i][None,...] ]  ).transpose (0,3,1,2)[0] ]
 
-                predicted = np.stack(predicted)                    
+                predicted = np.stack(predicted)
 
                 for i, pred in enumerate(predicted):
                     ptss += [ self.get_pts_from_predict ( pred, centers[i], scale) ]
@@ -144,81 +274,3 @@ class FANExtractor(object):
         c += 0.5
 
         return np.array( [ self.transform (c[i], center, scale, a_w) for i in range(a_ch) ] )
-
-    @staticmethod
-    def BuildModel():
-        def ConvBlock(out_planes, input):
-            in_planes = K.int_shape(input)[-1]
-            x = input
-            x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x)
-            x = ReLU() (x)
-            x = out1 = Conv2D( int(out_planes/2), kernel_size=3, strides=1, padding='valid', use_bias = False) (ZeroPadding2D(1)(x))
-
-            x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x)
-            x = ReLU() (x)
-            x = out2 = Conv2D( int(out_planes/4), kernel_size=3, strides=1, padding='valid', use_bias = False) (ZeroPadding2D(1)(x))
-
-            x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x)
-            x = ReLU() (x)
-            x = out3 = Conv2D( int(out_planes/4), kernel_size=3, strides=1, padding='valid', use_bias = False) (ZeroPadding2D(1)(x))
-
-            x = Concatenate()([out1, out2, out3])
-
-            if in_planes != out_planes:
-                downsample = BatchNormalization(momentum=0.1, epsilon=1e-05)(input)
-                downsample = ReLU() (downsample)
-                downsample = Conv2D( out_planes, kernel_size=1, strides=1, padding='valid', use_bias = False) (downsample)
-                x = Add ()([x, downsample])
-            else:
-                x = Add ()([x, input])
-
-
-            return x
-
-        def HourGlass (depth, input):
-            up1 = ConvBlock(256, input)
-
-            low1 = AveragePooling2D (pool_size=2, strides=2, padding='valid' )(input)
-            low1 = ConvBlock (256, low1)
-
-            if depth > 1:
-                low2 = HourGlass (depth-1, low1)
-            else:
-                low2 = ConvBlock(256, low1)
-
-            low3 = ConvBlock(256, low2)
-
-            up2 = UpSampling2D(size=2) (low3)
-            return Add() ( [up1, up2] )
-
-        FAN_Input = Input ( (256, 256, 3) )
-
-        x = FAN_Input
-
-        x = Conv2D (64, kernel_size=7, strides=2, padding='valid')(ZeroPadding2D(3)(x))
-        x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x)
-        x = ReLU()(x)
-
-        x = ConvBlock (128, x)
-        x = AveragePooling2D (pool_size=2, strides=2, padding='valid') (x)
-        x = ConvBlock (128, x)
-        x = ConvBlock (256, x)
-
-        outputs = []
-        previous = x
-        for i in range(4):
-            ll = HourGlass (4, previous)
-            ll = ConvBlock (256, ll)
-
-            ll = Conv2D(256, kernel_size=1, strides=1, padding='valid') (ll)
-            ll = BatchNormalization(momentum=0.1, epsilon=1e-05)(ll)
-            ll = ReLU() (ll)
-
-            tmp_out = Conv2D(68, kernel_size=1, strides=1, padding='valid') (ll)
-            outputs.append(tmp_out)
-
-            if i < 4 - 1:
-                ll = Conv2D(256, kernel_size=1, strides=1, padding='valid') (ll)
-                previous = Add() ( [previous, ll, KL.Conv2D(256, kernel_size=1, strides=1, padding='valid') (tmp_out) ] )
-
-        return Model(FAN_Input, outputs[-1] )
diff --git a/nnlib/FANSeg_256_full_face.h5 b/facelib/FANSeg_256_full_face.npy
similarity index 99%
rename from nnlib/FANSeg_256_full_face.h5
rename to facelib/FANSeg_256_full_face.npy
index 6886504..53a6664 100644
Binary files a/nnlib/FANSeg_256_full_face.h5 and b/facelib/FANSeg_256_full_face.npy differ
diff --git a/facelib/FaceEnhancer.h5 b/facelib/FaceEnhancer.npy
similarity index 99%
rename from facelib/FaceEnhancer.h5
rename to facelib/FaceEnhancer.npy
index 201105b..1890f42 100644
Binary files a/facelib/FaceEnhancer.h5 and b/facelib/FaceEnhancer.npy differ
diff --git a/facelib/FaceEnhancer.py b/facelib/FaceEnhancer.py
index c3b2016..88c4da4 100644
--- a/facelib/FaceEnhancer.py
+++ b/facelib/FaceEnhancer.py
@@ -4,151 +4,321 @@ from pathlib import Path
 import cv2
 import numpy as np
 
-
+from core.leras import nn
 
 class FaceEnhancer(object):
     """
     x4 face enhancer
     """
-    def __init__(self):
-        from nnlib import nnlib
-        exec( nnlib.import_all(), locals(), globals() )
+    def __init__(self, place_model_on_cpu=False):
+        nn.initialize()
+        tf = nn.tf
 
-        model_path = Path(__file__).parent / "FaceEnhancer.h5"
+        class FaceEnhancer (nn.ModelBase):
+            def __init__(self, name='FaceEnhancer'):
+                super().__init__(name=name)
+
+            def on_build(self):
+                self.conv1 = nn.Conv2D (3, 64, kernel_size=3, strides=1, padding='SAME')
+
+                self.dense1 = nn.Dense (1, 64, use_bias=False)
+                self.dense2 = nn.Dense (1, 64, use_bias=False)
+
+                self.e0_conv0 = nn.Conv2D (64, 64, kernel_size=3, strides=1, padding='SAME')
+                self.e0_conv1 = nn.Conv2D (64, 64, kernel_size=3, strides=1, padding='SAME')
+
+                self.e1_conv0 = nn.Conv2D (64, 112, kernel_size=3, strides=1, padding='SAME')
+                self.e1_conv1 = nn.Conv2D (112, 112, kernel_size=3, strides=1, padding='SAME')
+
+                self.e2_conv0 = nn.Conv2D (112, 192, kernel_size=3, strides=1, padding='SAME')
+                self.e2_conv1 = nn.Conv2D (192, 192, kernel_size=3, strides=1, padding='SAME')
+
+                self.e3_conv0 = nn.Conv2D (192, 336, kernel_size=3, strides=1, padding='SAME')
+                self.e3_conv1 = nn.Conv2D (336, 336, kernel_size=3, strides=1, padding='SAME')
+
+                self.e4_conv0 = nn.Conv2D (336, 512, kernel_size=3, strides=1, padding='SAME')
+                self.e4_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME')
+
+                self.center_conv0 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME')
+                self.center_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME')
+                self.center_conv2 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME')
+                self.center_conv3 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME')
+
+                self.d4_conv0 = nn.Conv2D (1024, 512, kernel_size=3, strides=1, padding='SAME')
+                self.d4_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME')
+
+                self.d3_conv0 = nn.Conv2D (848, 512, kernel_size=3, strides=1, padding='SAME')
+                self.d3_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME')
+
+                self.d2_conv0 = nn.Conv2D (704, 288, kernel_size=3, strides=1, padding='SAME')
+                self.d2_conv1 = nn.Conv2D (288, 288, kernel_size=3, strides=1, padding='SAME')
+
+                self.d1_conv0 = nn.Conv2D (400, 160, kernel_size=3, strides=1, padding='SAME')
+                self.d1_conv1 = nn.Conv2D (160, 160, kernel_size=3, strides=1, padding='SAME')
+
+                self.d0_conv0 = nn.Conv2D (224, 96, kernel_size=3, strides=1, padding='SAME')
+                self.d0_conv1 = nn.Conv2D (96, 96, kernel_size=3, strides=1, padding='SAME')
+
+                self.out1x_conv0 = nn.Conv2D (96, 48, kernel_size=3, strides=1, padding='SAME')
+                self.out1x_conv1 = nn.Conv2D (48, 3, kernel_size=3, strides=1, padding='SAME')
+
+                self.dec2x_conv0 = nn.Conv2D (96, 96, kernel_size=3, strides=1, padding='SAME')
+                self.dec2x_conv1 = nn.Conv2D (96, 96, kernel_size=3, strides=1, padding='SAME')
+
+                self.out2x_conv0 = nn.Conv2D (96, 48, kernel_size=3, strides=1, padding='SAME')
+                self.out2x_conv1 = nn.Conv2D (48, 3, kernel_size=3, strides=1, padding='SAME')
+
+                self.dec4x_conv0 = nn.Conv2D (96, 72, kernel_size=3, strides=1, padding='SAME')
+                self.dec4x_conv1 = nn.Conv2D (72, 72, kernel_size=3, strides=1, padding='SAME')
+
+                self.out4x_conv0 = nn.Conv2D (72, 36, kernel_size=3, strides=1, padding='SAME')
+                self.out4x_conv1 = nn.Conv2D (36, 3 , kernel_size=3, strides=1, padding='SAME')
+
+            def forward(self, inp):
+                bgr, param, param1 = inp
+
+                x = self.conv1(bgr)
+                a = self.dense1(param)
+                a = tf.reshape(a, (-1,1,1,64) )
+
+                b = self.dense2(param1)
+                b = tf.reshape(b, (-1,1,1,64) )
+
+                x = tf.nn.leaky_relu(x+a+b, 0.1)
+
+                x = tf.nn.leaky_relu(self.e0_conv0(x), 0.1)
+                x = e0 = tf.nn.leaky_relu(self.e0_conv1(x), 0.1)
+
+                x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+                x = tf.nn.leaky_relu(self.e1_conv0(x), 0.1)
+                x = e1 = tf.nn.leaky_relu(self.e1_conv1(x), 0.1)
+
+                x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+                x = tf.nn.leaky_relu(self.e2_conv0(x), 0.1)
+                x = e2 = tf.nn.leaky_relu(self.e2_conv1(x), 0.1)
+
+                x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+                x = tf.nn.leaky_relu(self.e3_conv0(x), 0.1)
+                x = e3 = tf.nn.leaky_relu(self.e3_conv1(x), 0.1)
+
+                x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+                x = tf.nn.leaky_relu(self.e4_conv0(x), 0.1)
+                x = e4 = tf.nn.leaky_relu(self.e4_conv1(x), 0.1)
+
+                x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+                x = tf.nn.leaky_relu(self.center_conv0(x), 0.1)
+                x = tf.nn.leaky_relu(self.center_conv1(x), 0.1)
+                x = tf.nn.leaky_relu(self.center_conv2(x), 0.1)
+                x = tf.nn.leaky_relu(self.center_conv3(x), 0.1)
+
+                x = tf.concat( [nn.tf_upsample2d_bilinear(x), e4], -1 )
+                x = tf.nn.leaky_relu(self.d4_conv0(x), 0.1)
+                x = tf.nn.leaky_relu(self.d4_conv1(x), 0.1)
+
+                x = tf.concat( [nn.tf_upsample2d_bilinear(x), e3], -1 )
+                x = tf.nn.leaky_relu(self.d3_conv0(x), 0.1)
+                x = tf.nn.leaky_relu(self.d3_conv1(x), 0.1)
+
+                x = tf.concat( [nn.tf_upsample2d_bilinear(x), e2], -1 )
+                x = tf.nn.leaky_relu(self.d2_conv0(x), 0.1)
+                x = tf.nn.leaky_relu(self.d2_conv1(x), 0.1)
+
+                x = tf.concat( [nn.tf_upsample2d_bilinear(x), e1], -1 )
+                x = tf.nn.leaky_relu(self.d1_conv0(x), 0.1)
+                x = tf.nn.leaky_relu(self.d1_conv1(x), 0.1)
+
+                x = tf.concat( [nn.tf_upsample2d_bilinear(x), e0], -1 )
+                x = tf.nn.leaky_relu(self.d0_conv0(x), 0.1)
+                x = d0 = tf.nn.leaky_relu(self.d0_conv1(x), 0.1)
+
+                x = tf.nn.leaky_relu(self.out1x_conv0(x), 0.1)
+                x = self.out1x_conv1(x)
+                out1x = bgr + tf.nn.tanh(x)
+
+                x = d0
+                x = tf.nn.leaky_relu(self.dec2x_conv0(x), 0.1)
+                x = tf.nn.leaky_relu(self.dec2x_conv1(x), 0.1)
+                x = d2x = nn.tf_upsample2d_bilinear(x)
+
+                x = tf.nn.leaky_relu(self.out2x_conv0(x), 0.1)
+                x = self.out2x_conv1(x)
+
+                out2x = nn.tf_upsample2d_bilinear(out1x) + tf.nn.tanh(x)
+
+                x = d2x
+                x = tf.nn.leaky_relu(self.dec4x_conv0(x), 0.1)
+                x = tf.nn.leaky_relu(self.dec4x_conv1(x), 0.1)
+                x = d4x = nn.tf_upsample2d_bilinear(x)
+
+                x = tf.nn.leaky_relu(self.out4x_conv0(x), 0.1)
+                x = self.out4x_conv1(x)
+
+                out4x = nn.tf_upsample2d_bilinear(out2x) + tf.nn.tanh(x)
+
+                return out4x
+
+        model_path = Path(__file__).parent / "FaceEnhancer.npy"
         if not model_path.exists():
-            return
-        
-        bgr_inp = Input ( (192,192,3) )
-        t_param_inp = Input ( (1,) )
-        t_param1_inp = Input ( (1,) )
-        x = Conv2D (64, 3, strides=1, padding='same' )(bgr_inp)
-        
-        a = Dense (64, use_bias=False) ( t_param_inp )
-        a = Reshape( (1,1,64) )(a)
-        b = Dense (64, use_bias=False ) ( t_param1_inp )
-        b = Reshape( (1,1,64) )(b)    
-        x = Add()([x,a,b])
-        
-        x = LeakyReLU(0.1)(x)
+            raise Exception("Unable to load FaceEnhancer.npy")
 
-        x = LeakyReLU(0.1)(Conv2D (64, 3, strides=1, padding='same' )(x))
-        x = e0 = LeakyReLU(0.1)(Conv2D (64, 3, strides=1, padding='same')(x))
-        
-        x = AveragePooling2D()(x)
-        x = LeakyReLU(0.1)(Conv2D (112, 3, strides=1, padding='same')(x))
-        x = e1 = LeakyReLU(0.1)(Conv2D (112, 3, strides=1, padding='same')(x))
-        
-        x = AveragePooling2D()(x)
-        x = LeakyReLU(0.1)(Conv2D (192, 3, strides=1, padding='same')(x))
-        x = e2 = LeakyReLU(0.1)(Conv2D (192, 3, strides=1, padding='same')(x))
-        
-        x = AveragePooling2D()(x)
-        x = LeakyReLU(0.1)(Conv2D (336, 3, strides=1, padding='same')(x))
-        x = e3 = LeakyReLU(0.1)(Conv2D (336, 3, strides=1, padding='same')(x))
-        
-        x = AveragePooling2D()(x)
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        x = e4 = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        
-        x = AveragePooling2D()(x)
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
+        e = tf.device("/CPU:0") if place_model_on_cpu else None
+        if e is not None: e.__enter__()
+        self.model = FaceEnhancer()
+        self.model.load_weights (model_path)
+        if e is not None: e.__exit__(None,None,None)
 
-        x = Concatenate()([ BilinearInterpolation()(x), e4 ])        
-
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        
-        x = Concatenate()([ BilinearInterpolation()(x), e3 ])
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x))
-        
-        x = Concatenate()([ BilinearInterpolation()(x), e2 ])
-        x = LeakyReLU(0.1)(Conv2D (288, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (288, 3, strides=1, padding='same')(x))
-        
-        x = Concatenate()([ BilinearInterpolation()(x), e1 ])
-        x = LeakyReLU(0.1)(Conv2D (160, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (160, 3, strides=1, padding='same')(x))
-        
-        x = Concatenate()([ BilinearInterpolation()(x), e0 ])
-        x = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x))
-        x = d0 = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x))
-
-        x = LeakyReLU(0.1)(Conv2D (48, 3, strides=1, padding='same')(x))
-
-        x = Conv2D (3, 3, strides=1, padding='same', activation='tanh')(x)
-        out1x = Add()([bgr_inp, x])
-        
-        x = d0
-        x = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x))    
-        x = d2x = BilinearInterpolation()(x)
-        
-        x = LeakyReLU(0.1)(Conv2D (48, 3, strides=1, padding='same')(x))
-        x = Conv2D (3, 3, strides=1, padding='same', activation='tanh')(x)
-        
-        out2x = Add()([BilinearInterpolation()(out1x), x])
-        
-        x = d2x
-        x = LeakyReLU(0.1)(Conv2D (72, 3, strides=1, padding='same')(x))
-        x = LeakyReLU(0.1)(Conv2D (72, 3, strides=1, padding='same')(x))
-        x = d4x = BilinearInterpolation()(x)
-        
-        x = LeakyReLU(0.1)(Conv2D (36, 3, strides=1, padding='same')(x))
-        x = Conv2D (3, 3, strides=1, padding='same', activation='tanh')(x)
-        out4x = Add()([BilinearInterpolation()(out2x), x ])
-
-        self.model = keras.models.Model ( [bgr_inp,t_param_inp,t_param1_inp], [out4x] ) 
-        self.model.load_weights (str(model_path))
+        self.model.build_for_run ([ (tf.float32, (192,192,3) ),
+                                    (tf.float32, (1,) ),
+                                    (tf.float32, (1,) ),
+                                ])
 
 
     def enhance (self, inp_img, is_tanh=False, preserve_size=True):
         if not is_tanh:
             inp_img = np.clip( inp_img * 2 -1, -1, 1 )
-            
+
         param = np.array([0.2])
-        param1 = np.array([1.0])        
+        param1 = np.array([1.0])
         up_res = 4
         patch_size = 192
         patch_size_half = patch_size // 2
-    
-        h,w,c = inp_img.shape
+
+        ih,iw,ic = inp_img.shape
+        h,w,c = ih,iw,ic
         
+        th,tw = h*up_res, w*up_res
+        
+        t_padding = 0
+        b_padding = 0
+        l_padding = 0
+        r_padding = 0
+        
+        if h < patch_size:
+            t_padding = (patch_size-h)//2
+            b_padding = (patch_size-h) - t_padding
+
+        if w < patch_size:
+            l_padding = (patch_size-w)//2
+            r_padding = (patch_size-w) - l_padding
+            
+        if t_padding != 0:
+            inp_img = np.concatenate ([ np.zeros ( (t_padding,w,c), dtype=np.float32 ), inp_img ], axis=0 )
+            h,w,c = inp_img.shape            
+                                        
+        if b_padding != 0:
+            inp_img = np.concatenate ([ inp_img, np.zeros ( (b_padding,w,c), dtype=np.float32 ) ], axis=0 )
+            h,w,c = inp_img.shape
+            
+        if l_padding != 0:
+            inp_img = np.concatenate ([ np.zeros ( (h,l_padding,c), dtype=np.float32 ), inp_img ], axis=1 )
+            h,w,c = inp_img.shape            
+                                        
+        if r_padding != 0:
+            inp_img = np.concatenate ([ inp_img, np.zeros ( (h,r_padding,c), dtype=np.float32 ) ], axis=1 )
+            h,w,c = inp_img.shape
+            
+            
         i_max = w-patch_size+1
-        j_max = h-patch_size+1     
-        
+        j_max = h-patch_size+1
+
         final_img = np.zeros ( (h*up_res,w*up_res,c), dtype=np.float32 )
         final_img_div = np.zeros ( (h*up_res,w*up_res,1), dtype=np.float32 )
- 
+
         x = np.concatenate ( [ np.linspace (0,1,patch_size_half*up_res), np.linspace (1,0,patch_size_half*up_res) ] )
         x,y = np.meshgrid(x,x)
         patch_mask = (x*y)[...,None]
-        
+
         j=0
         while j < j_max:
             i = 0
-            while i < i_max:          
-                patch_img = inp_img[j:j+patch_size, i:i+patch_size,:]             
-                x = self.model.predict( [ patch_img[None,...], param, param1 ] )[0]
+            while i < i_max:
+                patch_img = inp_img[j:j+patch_size, i:i+patch_size,:]
+                x = self.model.run( [ patch_img[None,...], [param], [param1] ] )[0]
                 final_img    [j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += x*patch_mask
                 final_img_div[j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += patch_mask
                 if i == i_max-1:
                     break
-                i = min( i+patch_size_half, i_max-1)                
+                i = min( i+patch_size_half, i_max-1)
             if j == j_max-1:
                 break
             j = min( j+patch_size_half, j_max-1)
-            
+
         final_img_div[final_img_div==0] = 1.0
         final_img /= final_img_div
-        
+
+        if t_padding+b_padding+l_padding+r_padding != 0:
+            final_img = final_img [t_padding*up_res:(h-b_padding)*up_res, l_padding*up_res:(w-r_padding)*up_res,:]
+            
         if preserve_size:
-            final_img = cv2.resize (final_img, (w,h), cv2.INTER_LANCZOS4)
-        
+            final_img = cv2.resize (final_img, (iw,ih), cv2.INTER_LANCZOS4)
+
         if not is_tanh:
             final_img = np.clip( final_img/2+0.5, 0, 1 )
-            
+
         return final_img
+
+
+"""
+
+    def enhance (self, inp_img, is_tanh=False, preserve_size=True):
+        if not is_tanh:
+            inp_img = np.clip( inp_img * 2 -1, -1, 1 )
+
+        param = np.array([0.2])
+        param1 = np.array([1.0])
+        up_res = 4
+        patch_size = 192
+        patch_size_half = patch_size // 2
+
+        h,w,c = inp_img.shape
+        
+        th,tw = h*up_res, w*up_res
+        
+        preupscale_rate = 1.0
+        
+        if h < patch_size or w < patch_size:
+            preupscale_rate = 1.0 / ( max(h,w) / patch_size )
+            
+        if preupscale_rate != 1.0:            
+            inp_img = cv2.resize (inp_img, ( int(w*preupscale_rate), int(h*preupscale_rate) ), cv2.INTER_LANCZOS4)
+            h,w,c = inp_img.shape
+
+        i_max = w-patch_size+1
+        j_max = h-patch_size+1
+
+        final_img = np.zeros ( (h*up_res,w*up_res,c), dtype=np.float32 )
+        final_img_div = np.zeros ( (h*up_res,w*up_res,1), dtype=np.float32 )
+
+        x = np.concatenate ( [ np.linspace (0,1,patch_size_half*up_res), np.linspace (1,0,patch_size_half*up_res) ] )
+        x,y = np.meshgrid(x,x)
+        patch_mask = (x*y)[...,None]
+
+        j=0
+        while j < j_max:
+            i = 0
+            while i < i_max:
+                patch_img = inp_img[j:j+patch_size, i:i+patch_size,:]
+                x = self.model.run( [ patch_img[None,...], [param], [param1] ] )[0]
+                final_img    [j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += x*patch_mask
+                final_img_div[j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += patch_mask
+                if i == i_max-1:
+                    break
+                i = min( i+patch_size_half, i_max-1)
+            if j == j_max-1:
+                break
+            j = min( j+patch_size_half, j_max-1)
+
+        final_img_div[final_img_div==0] = 1.0
+        final_img /= final_img_div
+
+        if preserve_size:
+            final_img = cv2.resize (final_img, (w,h), cv2.INTER_LANCZOS4)
+        else:
+            if preupscale_rate != 1.0:        
+                final_img = cv2.resize (final_img, (tw,th), cv2.INTER_LANCZOS4)
+
+        if not is_tanh:
+            final_img = np.clip( final_img/2+0.5, 0, 1 )
+
+        return final_img
+"""
\ No newline at end of file
diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py
index 7d9f706..560ae29 100644
--- a/facelib/LandmarksProcessor.py
+++ b/facelib/LandmarksProcessor.py
@@ -6,11 +6,11 @@ import cv2
 import numpy as np
 import numpy.linalg as npla
 
-import imagelib
-import mathlib
+from core import imagelib
+from core import mathlib
 from facelib import FaceType
-from imagelib import IEPolys
-from mathlib.umeyama import umeyama
+from core.imagelib import IEPolys
+from core.mathlib.umeyama import umeyama
 
 landmarks_2D = np.array([
 [ 0.000213256,  0.106454  ], #17
@@ -665,8 +665,10 @@ def calc_face_yaw(landmarks):
     r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
     return float(r-l)
 
-#returns pitch,yaw,roll [-1...+1]
 def estimate_pitch_yaw_roll(aligned_256px_landmarks):
+    """
+    returns pitch,yaw,roll [-pi...+pi]
+    """
     shape = (256,256)
     focal_length = shape[1]
     camera_center = (shape[1] / 2, shape[0] / 2)
@@ -682,7 +684,8 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):
         np.zeros((4, 1)) )
 
     pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] )
-    pitch = np.clip ( pitch/1.30, -1.0, 1.0 )
-    yaw = np.clip ( yaw / 1.11, -1.0, 1.0 )
-    roll = np.clip ( roll/3.15, -1.0, 1.0 ) #todo radians
+    pitch = np.clip ( pitch, -math.pi, math.pi )
+    yaw = np.clip ( yaw , -math.pi, math.pi )
+    roll = np.clip ( roll, -math.pi, math.pi )
+    
     return -pitch, yaw, roll
diff --git a/facelib/MTCExtractor.py b/facelib/MTCExtractor.py
deleted file mode 100644
index c524ab9..0000000
--- a/facelib/MTCExtractor.py
+++ /dev/null
@@ -1,350 +0,0 @@
-import numpy as np
-import os
-import cv2
-
-from pathlib import Path
-from nnlib import nnlib
-
-class MTCExtractor(object):
-    def __init__(self):
-        self.scale_to = 1920
-
-        self.min_face_size = self.scale_to * 0.042
-        self.thresh1 = 0.7
-        self.thresh2 = 0.85
-        self.thresh3 = 0.6
-        self.scale_factor = 0.95
-
-        exec( nnlib.import_all(), locals(), globals() )
-        PNet_Input = Input ( (None, None,3) )
-        x = PNet_Input
-        x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
-        x = PReLU (shared_axes=[1,2], name="PReLU1" )(x)
-        x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x)
-        x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
-        x = PReLU (shared_axes=[1,2], name="PReLU2" )(x)
-        x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x)
-        x = PReLU (shared_axes=[1,2], name="PReLU3" )(x)
-        prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x)
-        prob = Softmax()(prob)
-        x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x)
-
-        PNet_model = Model(PNet_Input, [x,prob] )
-        PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() )
-
-        RNet_Input = Input ( (24, 24, 3) )
-        x = RNet_Input
-        x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
-        x = PReLU (shared_axes=[1,2], name="prelu1" )(x)
-        x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x)
-        x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
-        x = PReLU (shared_axes=[1,2], name="prelu2" )(x)
-        x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x)
-        x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x)
-        x = PReLU (shared_axes=[1,2], name="prelu3" )(x)
-        x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x)
-        x = Dense (128, name='conv4')(x)
-        x = PReLU (name="prelu4" )(x)
-        prob = Dense (2, name='conv51')(x)
-        prob = Softmax()(prob)
-        x = Dense (4, name='conv52')(x)
-        RNet_model = Model(RNet_Input, [x,prob] )
-        RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() )
-
-        ONet_Input = Input ( (48, 48, 3) )
-        x = ONet_Input
-        x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x)
-        x = PReLU (shared_axes=[1,2], name="prelu1" )(x)
-        x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x)
-        x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x)
-        x = PReLU (shared_axes=[1,2], name="prelu2" )(x)
-        x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x)
-        x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x)
-        x = PReLU (shared_axes=[1,2], name="prelu3" )(x)
-        x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x)
-        x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x)
-        x = PReLU (shared_axes=[1,2], name="prelu4" )(x)
-        x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x)
-        x = Dense (256, name='conv5')(x)
-        x = PReLU (name="prelu5" )(x)
-        prob = Dense (2, name='conv61')(x)
-        prob = Softmax()(prob)
-        x1 = Dense (4, name='conv62')(x)
-        x2 = Dense (10, name='conv63')(x)
-        ONet_model = Model(ONet_Input, [x1,x2,prob] )
-        ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() )
-
-        self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs )
-        self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs )
-        self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs )
-
-    def __enter__(self):
-        faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor )
-
-        return self
-
-    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
-        return False #pass exception between __enter__ and __exit__ to outter level
-
-    def extract (self, input_image, is_bgr=True):
-
-        if is_bgr:
-            input_image = input_image[:,:,::-1].copy()
-            is_bgr = False
-
-        (h, w, ch) = input_image.shape
-
-        input_scale = self.scale_to / max(w,h)
-        input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR)
-
-        detected_faces, pnts = detect_face ( input_image, self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor )
-        detected_faces = [ ( int(face[0]/input_scale), int(face[1]/input_scale), int(face[2]/input_scale), int(face[3]/input_scale)) for face in detected_faces ]
-
-        return detected_faces
-
-def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
-    """Detects faces in an image, and returns bounding boxes and points for them.
-    img: input image
-    minsize: minimum faces' size
-    pnet, rnet, onet: caffemodel
-    threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
-    factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
-    """
-    factor_count=0
-    total_boxes=np.empty((0,9))
-    points=np.empty(0)
-    h=img.shape[0]
-    w=img.shape[1]
-    minl=np.amin([h, w])
-    m=12.0/minsize
-    minl=minl*m
-    # create scale pyramid
-    scales=[]
-    while minl>=12:
-        scales += [m*np.power(factor, factor_count)]
-        minl = minl*factor
-        factor_count += 1
-    # first stage
-    for scale in scales:
-        hs=int(np.ceil(h*scale))
-        ws=int(np.ceil(w*scale))
-        #print ('scale %f %d %d' % (scale, ws,hs))
-        im_data = imresample(img, (hs, ws))
-        im_data = (im_data-127.5)*0.0078125
-        img_x = np.expand_dims(im_data, 0)
-        img_y = np.transpose(img_x, (0,2,1,3))
-        out = pnet([img_y])
-        out0 = np.transpose(out[0], (0,2,1,3))
-        out1 = np.transpose(out[1], (0,2,1,3))
-
-        boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
-
-        # inter-scale nms
-        pick = nms(boxes.copy(), 0.5, 'Union')
-        if boxes.size>0 and pick.size>0:
-            boxes = boxes[pick,:]
-            total_boxes = np.append(total_boxes, boxes, axis=0)
-
-    numbox = total_boxes.shape[0]
-    if numbox>0:
-        pick = nms(total_boxes.copy(), 0.7, 'Union')
-        total_boxes = total_boxes[pick,:]
-        regw = total_boxes[:,2]-total_boxes[:,0]
-        regh = total_boxes[:,3]-total_boxes[:,1]
-        qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
-        qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
-        qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
-        qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
-        total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
-        total_boxes = rerec(total_boxes.copy())
-        total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
-        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
-
-    numbox = total_boxes.shape[0]
-    if numbox>0:
-        # second stage
-        tempimg = np.zeros((24,24,3,numbox))
-        for k in range(0,numbox):
-            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
-            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
-            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
-                tempimg[:,:,:,k] = imresample(tmp, (24, 24))
-            else:
-                return np.empty()
-        tempimg = (tempimg-127.5)*0.0078125
-        tempimg1 = np.transpose(tempimg, (3,1,0,2))
-        out = rnet([tempimg1])
-        out0 = np.transpose(out[0])
-        out1 = np.transpose(out[1])
-        score = out1[1,:]
-        ipass = np.where(score>threshold[1])
-        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
-        mv = out0[:,ipass[0]]
-        if total_boxes.shape[0]>0:
-            pick = nms(total_boxes, 0.7, 'Union')
-            total_boxes = total_boxes[pick,:]
-            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
-            total_boxes = rerec(total_boxes.copy())
-
-    numbox = total_boxes.shape[0]
-    if numbox>0:
-        # third stage
-        total_boxes = np.fix(total_boxes).astype(np.int32)
-        dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
-        tempimg = np.zeros((48,48,3,numbox))
-        for k in range(0,numbox):
-            tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
-            tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
-            if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
-                tempimg[:,:,:,k] = imresample(tmp, (48, 48))
-            else:
-                return np.empty()
-        tempimg = (tempimg-127.5)*0.0078125
-        tempimg1 = np.transpose(tempimg, (3,1,0,2))
-        out = onet([tempimg1])
-        out0 = np.transpose(out[0])
-        out1 = np.transpose(out[1])
-        out2 = np.transpose(out[2])
-        score = out2[1,:]
-        points = out1
-        ipass = np.where(score>threshold[2])
-        points = points[:,ipass[0]]
-        total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
-        mv = out0[:,ipass[0]]
-
-        w = total_boxes[:,2]-total_boxes[:,0]+1
-        h = total_boxes[:,3]-total_boxes[:,1]+1
-        points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
-        points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
-        if total_boxes.shape[0]>0:
-            total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
-            pick = nms(total_boxes.copy(), 0.7, 'Min')
-            total_boxes = total_boxes[pick,:]
-            points = points[:,pick]
-
-    return total_boxes, points
-
-
-# function [boundingbox] = bbreg(boundingbox,reg)
-def bbreg(boundingbox,reg):
-    """Calibrate bounding boxes"""
-    if reg.shape[1]==1:
-        reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
-
-    w = boundingbox[:,2]-boundingbox[:,0]+1
-    h = boundingbox[:,3]-boundingbox[:,1]+1
-    b1 = boundingbox[:,0]+reg[:,0]*w
-    b2 = boundingbox[:,1]+reg[:,1]*h
-    b3 = boundingbox[:,2]+reg[:,2]*w
-    b4 = boundingbox[:,3]+reg[:,3]*h
-    boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
-    return boundingbox
-
-def generateBoundingBox(imap, reg, scale, t):
-    """Use heatmap to generate bounding boxes"""
-    stride=2
-    cellsize=12
-
-    imap = np.transpose(imap)
-    dx1 = np.transpose(reg[:,:,0])
-    dy1 = np.transpose(reg[:,:,1])
-    dx2 = np.transpose(reg[:,:,2])
-    dy2 = np.transpose(reg[:,:,3])
-    y, x = np.where(imap >= t)
-    if y.shape[0]==1:
-        dx1 = np.flipud(dx1)
-        dy1 = np.flipud(dy1)
-        dx2 = np.flipud(dx2)
-        dy2 = np.flipud(dy2)
-    score = imap[(y,x)]
-    reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
-    if reg.size==0:
-        reg = np.empty((0,3))
-    bb = np.transpose(np.vstack([y,x]))
-    q1 = np.fix((stride*bb+1)/scale)
-    q2 = np.fix((stride*bb+cellsize-1+1)/scale)
-    boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
-    return boundingbox, reg
-
-# function pick = nms(boxes,threshold,type)
-def nms(boxes, threshold, method):
-    if boxes.size==0:
-        return np.empty((0,3))
-    x1 = boxes[:,0]
-    y1 = boxes[:,1]
-    x2 = boxes[:,2]
-    y2 = boxes[:,3]
-    s = boxes[:,4]
-    area = (x2-x1+1) * (y2-y1+1)
-    I = np.argsort(s)
-    pick = np.zeros_like(s, dtype=np.int16)
-    counter = 0
-    while I.size>0:
-        i = I[-1]
-        pick[counter] = i
-        counter += 1
-        idx = I[0:-1]
-        xx1 = np.maximum(x1[i], x1[idx])
-        yy1 = np.maximum(y1[i], y1[idx])
-        xx2 = np.minimum(x2[i], x2[idx])
-        yy2 = np.minimum(y2[i], y2[idx])
-        w = np.maximum(0.0, xx2-xx1+1)
-        h = np.maximum(0.0, yy2-yy1+1)
-        inter = w * h
-        if method == 'Min':
-            o = inter / np.minimum(area[i], area[idx])
-        else:
-            o = inter / (area[i] + area[idx] - inter)
-        I = I[np.where(o<=threshold)]
-    pick = pick[0:counter]
-    return pick
-
-# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
-def pad(total_boxes, w, h):
-    """Compute the padding coordinates (pad the bounding boxes to square)"""
-    tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
-    tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
-    numbox = total_boxes.shape[0]
-
-    dx = np.ones((numbox), dtype=np.int32)
-    dy = np.ones((numbox), dtype=np.int32)
-    edx = tmpw.copy().astype(np.int32)
-    edy = tmph.copy().astype(np.int32)
-
-    x = total_boxes[:,0].copy().astype(np.int32)
-    y = total_boxes[:,1].copy().astype(np.int32)
-    ex = total_boxes[:,2].copy().astype(np.int32)
-    ey = total_boxes[:,3].copy().astype(np.int32)
-
-    tmp = np.where(ex>w)
-    edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
-    ex[tmp] = w
-
-    tmp = np.where(ey>h)
-    edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
-    ey[tmp] = h
-
-    tmp = np.where(x<1)
-    dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
-    x[tmp] = 1
-
-    tmp = np.where(y<1)
-    dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
-    y[tmp] = 1
-
-    return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
-
-# function [bboxA] = rerec(bboxA)
-def rerec(bboxA):
-    """Convert bboxA to square."""
-    h = bboxA[:,3]-bboxA[:,1]
-    w = bboxA[:,2]-bboxA[:,0]
-    l = np.maximum(w, h)
-    bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
-    bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
-    bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
-    return bboxA
-
-def imresample(img, sz):
-    im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable
-    return im_data
diff --git a/facelib/PoseEstimator.py b/facelib/PoseEstimator.py
deleted file mode 100644
index 7939d90..0000000
--- a/facelib/PoseEstimator.py
+++ /dev/null
@@ -1,302 +0,0 @@
-import os
-import pickle
-from functools import partial
-from pathlib import Path
-
-import cv2
-import numpy as np
-
-from interact import interact as io
-from nnlib import nnlib
-
-"""
-PoseEstimator estimates pitch, yaw, roll, from FAN aligned face.
-trained on https://www.umdfaces.io
-based on https://arxiv.org/pdf/1901.06778.pdf HYBRID COARSE-FINE CLASSIFICATION FOR HEAD POSE ESTIMATION  
-"""
-
-class PoseEstimator(object):
-    VERSION = 1
-    def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False):
-        exec( nnlib.import_all(), locals(), globals() )
-        self.resolution = resolution
-        
-        self.angles = [60, 45, 30, 10, 2]
-        self.alpha_cat_losses = [7,5,3,1,1]
-        self.class_nums = [ angle+1 for angle in self.angles ]
-        self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums)
-
-        if weights_file_root is not None:
-            weights_file_root = Path(weights_file_root)
-        else:
-            weights_file_root = Path(__file__).parent
-
-        self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) )
-        self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) )
-        self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) )
-        
-        self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) )
-  
-        self.input_bgr_shape = (resolution, resolution, 3)
-        
-        def ResamplerFunc(input):
-            mean_t, logvar_t = input
-            return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t))
-
-        self.BVAEResampler = Lambda ( lambda x: x[0] + K.random_normal(K.shape(x[0])) * K.sqrt(K.exp(0.5*x[1])),
-                                        output_shape=K.int_shape(self.encoder.outputs[0])[1:] )
-
-        inp_t = Input (self.input_bgr_shape)
-        inp_real_t = Input (self.input_bgr_shape)
-        inp_pitch_t = Input ( (1,) )
-        inp_yaw_t = Input ( (1,) )
-        inp_roll_t = Input ( (1,) )
-        
-
-        mean_t, logvar_t = self.encoder(inp_t)
-    
-        latent_t = self.BVAEResampler([mean_t, logvar_t])
-        
-        if training:
-            bgr_t = self.decoder (latent_t)        
-            pyrs_t = self.model_l(latent_t)
-        else:
-            self.model = Model(inp_t, self.model_l(latent_t) )
-            pyrs_t = self.model(inp_t)
-        
-        if load_weights:
-            if training:
-                self.encoder.load_weights (str(self.encoder_weights_path))
-                self.decoder.load_weights (str(self.decoder_weights_path))
-                self.model_l.load_weights (str(self.l_weights_path))
-            else:
-                self.model.load_weights (str(self.model_weights_path))
-                
-        else:
-            def gather_Conv2D_layers(models_list):
-                conv_weights_list = []
-                for model in models_list:
-                    for layer in model.layers:
-                        layer_type = type(layer)
-                        if layer_type == keras.layers.Conv2D:
-                            conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights            
-                        elif layer_type == keras.engine.training.Model:
-                            conv_weights_list += gather_Conv2D_layers ([layer])
-                return conv_weights_list
-                        
-            CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) )
-            
-
-        if training:
-            inp_pyrs_t = []
-            for class_num in self.class_nums:
-                inp_pyrs_t += [ Input ((3,)) ]
-            
-            pyr_loss = []
-
-            for i,class_num in enumerate(self.class_nums):
-                a = self.alpha_cat_losses[i]
-                pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ]
-    
-            def BVAELoss(beta=4):
-                def func(input):
-                    mean_t, logvar_t = input
-                    return beta * K.mean ( K.sum( 0.5*(K.exp(logvar_t)+ K.square(mean_t)-logvar_t-1), axis=1) )
-                return func
-                
-            BVAE_loss = BVAELoss()([mean_t, logvar_t])
-            
-            bgr_loss = K.mean(K.sum(K.abs(inp_real_t-bgr_t), axis=[1,2,3]))
-            
-            G_loss = BVAE_loss+bgr_loss
-            pyr_loss = sum(pyr_loss)
-
-            
-            self.train = K.function ([inp_t, inp_real_t],
-                                     [ G_loss ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( G_loss, self.encoder.trainable_weights+self.decoder.trainable_weights ) )
-            
-            self.train_l = K.function ([inp_t] + inp_pyrs_t,
-                                     [pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) )
-
-
-            self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] )
-     
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
-        return False #pass exception between __enter__ and __exit__ to outter level
-
-    def save_weights(self):
-        self.encoder.save_weights (str(self.encoder_weights_path))
-        self.decoder.save_weights (str(self.decoder_weights_path))
-        self.model_l.save_weights (str(self.l_weights_path))
-        
-        inp_t = Input (self.input_bgr_shape)
-
-        Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path)) 
-
-    def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False):
-        if not skip_bgr_train:
-            bgr_loss, = self.train( [warps, imgs] )
-            pyr_loss = 0
-        else:
-            bgr_loss = 0      
-              
-            feed = [imgs]        
-            for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)):
-                a = angle / 2
-                c = np.round( (pyr_tanh+1) * a )  / a -1 #.astype(K.floatx())
-                feed += [c] 
-
-            pyr_loss, = self.train_l(feed)
-            
-        return bgr_loss, pyr_loss
-
-    def extract (self, input_image, is_input_tanh=False):
-        if is_input_tanh:
-            raise NotImplemented("is_input_tanh")
-            
-        input_shape_len = len(input_image.shape)
-        if input_shape_len == 3:
-            input_image = input_image[np.newaxis,...]
-
-        bgr, result, = self.view( [input_image] )
-        
-        
-        #result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 )
-
-        if input_shape_len == 3:
-            bgr = bgr[0]
-            result = result[0]
-
-        return bgr, result
-
-    @staticmethod
-    def BuildModels ( resolution, class_nums, ae_dims=128):
-        exec( nnlib.import_all(), locals(), globals() )
-        
-        x = inp = Input ( (resolution,resolution,3) )
-        x = PoseEstimator.EncFlow(ae_dims)(x)
-        encoder = Model(inp,x)
-        
-        x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
-        x = PoseEstimator.DecFlow(resolution, ae_dims)(x)
-        decoder = Model(inp,x)
-        
-        x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) )
-        x = PoseEstimator.LatentFlow(class_nums=class_nums)(x)
-        model_l = Model(inp, x )
-        
-        return encoder, decoder, model_l
-
-    @staticmethod
-    def EncFlow(ae_dims):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        def downscale (dim, **kwargs):
-            def func(x):
-                return ReLU() ( Conv2D(dim, kernel_size=5, strides=2, padding='same')(x))
-            return func
-           
-
-        downscale = partial(downscale)
-        
-        ed_ch_dims = 128
-
-        def func(input):
-            x = input
-            x = downscale(64)(x)
-            x = downscale(128)(x)
-            x = downscale(256)(x)            
-            x = downscale(512)(x)    
-            x = Flatten()(x)
-
-            x = Dense(256)(x)
-            x = ReLU()(x)
-            
-            x = Dense(256)(x)
-            x = ReLU()(x)
-
-            mean = Dense(ae_dims)(x)
-            logvar = Dense(ae_dims)(x)
-            
-            return mean, logvar
-            
-        return func
-        
-    @staticmethod
-    def DecFlow(resolution, ae_dims):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        def upscale (dim, strides=2, **kwargs):
-            def func(x):
-                return ReLU()(  ( Conv2DTranspose(dim, kernel_size=3, strides=strides, padding='same')(x)) )
-            return func
-            
-        def to_bgr (output_nc, **kwargs):
-            def func(x):
-                return Conv2D(output_nc, kernel_size=5, padding='same', activation='sigmoid')(x)
-            return func
-            
-        upscale = partial(upscale)
-        lowest_dense_res = resolution // 16
-
-        def func(input):
-            x = input
-            
-            x = Dense(256)(x)
-            x = ReLU()(x)
-            
-            x = Dense(256)(x)
-            x = ReLU()(x)            
-            
-            x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x)      
-            x = ReLU()(x)   
-            
-            x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x)
-            
-            x = upscale(512)(x)            
-            x = upscale(256)(x)
-            x = upscale(128)(x)
-            x = upscale(64)(x)
-            x = to_bgr(3)(x)           
-                 
-            return x
-        return func
-        
-    @staticmethod
-    def LatentFlow(class_nums):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        def func(latent):
-            x = latent
-
-            x = Dense(1024, activation='relu')(x)
-            x = Dropout(0.5)(x)
-            x = Dense(1024, activation='relu')(x)
-            # x = Dropout(0.5)(x)
-            # x = Dense(4096, activation='relu')(x)
-            
-            output = []
-            for class_num in class_nums:
-                pyr = Dense(3, activation='tanh')(x)
-                output += [pyr]
-                
-            return output
-            
-            #y = Dropout(0.5)(y)
-            #y = Dense(1024, activation='relu')(y)
-        return func
-        
-                
-# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg')
-# x = resnet50(x)
-# output = []
-# for class_num in class_nums:
-#     pitch = Dense(class_num)(x)
-#     yaw = Dense(class_num)(x)
-#     roll = Dense(class_num)(x)
-#     output += [pitch,yaw,roll]
-    
-# return output
diff --git a/facelib/S3FD.h5 b/facelib/S3FD.npy
similarity index 84%
rename from facelib/S3FD.h5
rename to facelib/S3FD.npy
index aed0938..66c054f 100644
Binary files a/facelib/S3FD.h5 and b/facelib/S3FD.npy differ
diff --git a/facelib/S3FDExtractor.py b/facelib/S3FDExtractor.py
index adafae9..c58d931 100644
--- a/facelib/S3FDExtractor.py
+++ b/facelib/S3FDExtractor.py
@@ -4,21 +4,171 @@ from pathlib import Path
 import cv2
 import numpy as np
 
-from nnlib import nnlib
+from core.leras import nn
 
 class S3FDExtractor(object):
-    def __init__(self, do_dummy_predict=False):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        model_path = Path(__file__).parent / "S3FD.h5"
-        if not model_path.exists():
-            return None
-
-        self.model = nnlib.keras.models.load_model ( str(model_path) )
+    def __init__(self, place_model_on_cpu=False):
+        nn.initialize()
+        tf = nn.tf
         
-        if do_dummy_predict:
-            self.extract ( np.zeros( (640,640,3), dtype=np.uint8) )
+        model_path = Path(__file__).parent / "S3FD.npy"
+        if not model_path.exists():
+            raise Exception("Unable to load S3FD.npy")
 
+        class L2Norm(nn.LayerBase):
+            def __init__(self, n_channels, **kwargs):
+                self.n_channels = n_channels
+                super().__init__(**kwargs)
+                
+            def build_weights(self):
+                self.weight = tf.get_variable ("weight", (1, 1, 1, self.n_channels), dtype=nn.tf_floatx, initializer=tf.initializers.ones )
+
+            def get_weights(self):
+                return [self.weight]
+                    
+            def __call__(self, inputs):
+                x = inputs
+                x = x / (tf.sqrt( tf.reduce_sum( tf.pow(x, 2), axis=-1, keepdims=True ) ) + 1e-10) * self.weight
+                return x
+                
+        class S3FD(nn.ModelBase):
+            def __init__(self):
+                super().__init__(name='S3FD')
+                
+            def on_build(self):
+                self.minus = tf.constant([104,117,123], dtype=nn.tf_floatx )
+                self.conv1_1 = nn.Conv2D(3, 64, kernel_size=3, strides=1, padding='SAME')
+                self.conv1_2 = nn.Conv2D(64, 64, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv2_1 = nn.Conv2D(64, 128, kernel_size=3, strides=1, padding='SAME')
+                self.conv2_2 = nn.Conv2D(128, 128, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv3_1 = nn.Conv2D(128, 256, kernel_size=3, strides=1, padding='SAME')
+                self.conv3_2 = nn.Conv2D(256, 256, kernel_size=3, strides=1, padding='SAME')
+                self.conv3_3 = nn.Conv2D(256, 256, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv4_1 = nn.Conv2D(256, 512, kernel_size=3, strides=1, padding='SAME')
+                self.conv4_2 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME')
+                self.conv4_3 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv5_1 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME')
+                self.conv5_2 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME')
+                self.conv5_3 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME')
+                
+                self.fc6 = nn.Conv2D(512, 1024, kernel_size=3, strides=1, padding=3)
+                self.fc7 = nn.Conv2D(1024, 1024, kernel_size=1, strides=1, padding='SAME')
+                
+                self.conv6_1 = nn.Conv2D(1024, 256, kernel_size=1, strides=1, padding='SAME')
+                self.conv6_2 = nn.Conv2D(256, 512, kernel_size=3, strides=2, padding='SAME')
+                
+                self.conv7_1 = nn.Conv2D(512, 128, kernel_size=1, strides=1, padding='SAME')
+                self.conv7_2 = nn.Conv2D(128, 256, kernel_size=3, strides=2, padding='SAME')
+                
+                self.conv3_3_norm = L2Norm(256)
+                self.conv4_3_norm = L2Norm(512)
+                self.conv5_3_norm = L2Norm(512)
+                
+                
+                self.conv3_3_norm_mbox_conf = nn.Conv2D(256, 4, kernel_size=3, strides=1, padding='SAME')
+                self.conv3_3_norm_mbox_loc = nn.Conv2D(256, 4, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv4_3_norm_mbox_conf = nn.Conv2D(512, 2, kernel_size=3, strides=1, padding='SAME')
+                self.conv4_3_norm_mbox_loc = nn.Conv2D(512, 4, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv5_3_norm_mbox_conf = nn.Conv2D(512, 2, kernel_size=3, strides=1, padding='SAME')
+                self.conv5_3_norm_mbox_loc = nn.Conv2D(512, 4, kernel_size=3, strides=1, padding='SAME')
+                
+                self.fc7_mbox_conf = nn.Conv2D(1024, 2, kernel_size=3, strides=1, padding='SAME')
+                self.fc7_mbox_loc = nn.Conv2D(1024, 4, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv6_2_mbox_conf = nn.Conv2D(512, 2, kernel_size=3, strides=1, padding='SAME')
+                self.conv6_2_mbox_loc = nn.Conv2D(512, 4, kernel_size=3, strides=1, padding='SAME')
+                
+                self.conv7_2_mbox_conf = nn.Conv2D(256, 2, kernel_size=3, strides=1, padding='SAME')
+                self.conv7_2_mbox_loc = nn.Conv2D(256, 4, kernel_size=3, strides=1, padding='SAME')
+                
+            def forward(self, inp):
+                x, = inp
+                x = x - self.minus
+                x = tf.nn.relu(self.conv1_1(x))
+                x = tf.nn.relu(self.conv1_2(x))                
+                x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+
+                x = tf.nn.relu(self.conv2_1(x))
+                x = tf.nn.relu(self.conv2_2(x))                
+                x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+                
+                x = tf.nn.relu(self.conv3_1(x))
+                x = tf.nn.relu(self.conv3_2(x))     
+                x = tf.nn.relu(self.conv3_3(x))          
+                f3_3 = x   
+                x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+
+                x = tf.nn.relu(self.conv4_1(x))
+                x = tf.nn.relu(self.conv4_2(x))     
+                x = tf.nn.relu(self.conv4_3(x))          
+                f4_3 = x   
+                x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+
+                x = tf.nn.relu(self.conv5_1(x))
+                x = tf.nn.relu(self.conv5_2(x))     
+                x = tf.nn.relu(self.conv5_3(x))          
+                f5_3 = x   
+                x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID")
+
+                x = tf.nn.relu(self.fc6(x))
+                x = tf.nn.relu(self.fc7(x))
+                ffc7 = x
+                
+                x = tf.nn.relu(self.conv6_1(x))
+                x = tf.nn.relu(self.conv6_2(x))
+                f6_2 = x
+                
+                x = tf.nn.relu(self.conv7_1(x))
+                x = tf.nn.relu(self.conv7_2(x))
+                f7_2 = x
+                
+                f3_3 = self.conv3_3_norm(f3_3)
+                f4_3 = self.conv4_3_norm(f4_3)
+                f5_3 = self.conv5_3_norm(f5_3)
+                
+                cls1 = self.conv3_3_norm_mbox_conf(f3_3)
+                reg1 = self.conv3_3_norm_mbox_loc(f3_3)
+                
+                cls2 = tf.nn.softmax(self.conv4_3_norm_mbox_conf(f4_3))
+                reg2 = self.conv4_3_norm_mbox_loc(f4_3)
+                
+                cls3 = tf.nn.softmax(self.conv5_3_norm_mbox_conf(f5_3))
+                reg3 = self.conv5_3_norm_mbox_loc(f5_3)
+                
+                cls4 = tf.nn.softmax(self.fc7_mbox_conf(ffc7))
+                reg4 = self.fc7_mbox_loc(ffc7)
+                
+                cls5 = tf.nn.softmax(self.conv6_2_mbox_conf(f6_2))
+                reg5 = self.conv6_2_mbox_loc(f6_2)
+                
+                cls6 = tf.nn.softmax(self.conv7_2_mbox_conf(f7_2))
+                reg6 = self.conv7_2_mbox_loc(f7_2)
+
+                # max-out background label
+                bmax = tf.maximum(tf.maximum(cls1[:,:,:,0:1], cls1[:,:,:,1:2]), cls1[:,:,:,2:3])            
+                
+                cls1 = tf.concat ([bmax, cls1[:,:,:,3:4] ], axis=-1)
+                cls1 = tf.nn.softmax(cls1)
+                
+                return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6]
+
+        e = None
+        if place_model_on_cpu:
+            e = tf.device("/CPU:0")
+
+        if e is not None: e.__enter__()
+        self.model = S3FD()
+        self.model.load_weights (model_path)
+        if e is not None: e.__exit__(None,None,None)  
+        
+        self.model.build_for_run ([ ( tf.float32, (None,None,3) ) ])
+        
     def __enter__(self):
         return self
 
@@ -40,7 +190,7 @@ class S3FDExtractor(object):
         input_scale = d / scale_to
         input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR)
 
-        olist = self.model.predict( np.expand_dims(input_image,0) )
+        olist = self.model.run ([ input_image[None,...] ] )
 
         detected_faces = []
         for ltrb in self.refine (olist):
@@ -75,8 +225,8 @@ class S3FDExtractor(object):
             s_d2 = stride / 2
             s_m4 = stride * 4
 
-            for hindex, windex in zip(*np.where(ocls > 0.05)):
-                score = ocls[hindex, windex]
+            for hindex, windex in zip(*np.where(ocls[...,1] > 0.05)):
+                score = ocls[hindex, windex, 1]
                 loc   = oreg[hindex, windex, :]
                 priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4])
                 priors_2p = priors[2:]
diff --git a/facelib/TernausNet.py b/facelib/TernausNet.py
new file mode 100644
index 0000000..a8fed6c
--- /dev/null
+++ b/facelib/TernausNet.py
@@ -0,0 +1,318 @@
+import os
+import pickle
+from functools import partial
+from pathlib import Path
+
+import cv2
+import numpy as np
+
+from core.interact import interact as io
+from core.leras import nn
+
+"""
+Dataset used to train located in official DFL mega.nz folder
+https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg
+
+using https://github.com/ternaus/TernausNet
+TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation
+"""
+
+class TernausNet(object):
+    VERSION = 1
+    def __init__ (self, name, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False, place_model_on_cpu=False):
+        nn.initialize()
+        tf = nn.tf
+        
+        class Ternaus(nn.ModelBase):
+            def on_build(self, in_ch, ch):
+                
+                self.features_0 = nn.Conv2D (in_ch, ch, kernel_size=3, padding='SAME')
+                self.blurpool_0 = nn.BlurPool (filt_size=3)
+                
+                self.features_3 = nn.Conv2D (ch, ch*2, kernel_size=3, padding='SAME')
+                self.blurpool_3 = nn.BlurPool (filt_size=3)
+                
+                self.features_6 = nn.Conv2D (ch*2, ch*4, kernel_size=3, padding='SAME')
+                self.features_8 = nn.Conv2D (ch*4, ch*4, kernel_size=3, padding='SAME')
+                self.blurpool_8 = nn.BlurPool (filt_size=3)
+                
+                self.features_11 = nn.Conv2D (ch*4, ch*8, kernel_size=3, padding='SAME')
+                self.features_13 = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME')
+                self.blurpool_13 = nn.BlurPool (filt_size=3)
+                
+                self.features_16 = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME')
+                self.features_18 = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME')
+                self.blurpool_18 = nn.BlurPool (filt_size=3)
+                
+                self.conv_center = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME')
+    
+                self.conv1_up = nn.Conv2DTranspose (ch*8, ch*4, kernel_size=3, padding='SAME')
+                self.conv1 = nn.Conv2D (ch*12, ch*8, kernel_size=3, padding='SAME')
+                
+                self.conv2_up = nn.Conv2DTranspose (ch*8, ch*4, kernel_size=3, padding='SAME')
+                self.conv2 = nn.Conv2D (ch*12, ch*8, kernel_size=3, padding='SAME')
+                
+                self.conv3_up = nn.Conv2DTranspose (ch*8, ch*2, kernel_size=3, padding='SAME')
+                self.conv3 = nn.Conv2D (ch*6, ch*4, kernel_size=3, padding='SAME')
+                
+                self.conv4_up = nn.Conv2DTranspose (ch*4, ch, kernel_size=3, padding='SAME')
+                self.conv4 = nn.Conv2D (ch*3, ch*2, kernel_size=3, padding='SAME')
+                
+                self.conv5_up = nn.Conv2DTranspose (ch*2, ch//2, kernel_size=3, padding='SAME')
+                self.conv5 = nn.Conv2D (ch//2+ch, ch, kernel_size=3, padding='SAME')
+                
+                self.out_conv = nn.Conv2D (ch, 1, kernel_size=3, padding='SAME')
+                
+            def forward(self, inp):
+                x, = inp
+                
+                x = x0 = tf.nn.relu(self.features_0(x))
+                x = self.blurpool_0(x)
+                                
+                x = x1 = tf.nn.relu(self.features_3(x))
+                x = self.blurpool_3(x) 
+                
+                x = tf.nn.relu(self.features_6(x))
+                x = x2 = tf.nn.relu(self.features_8(x))
+                x = self.blurpool_8(x) 
+                
+                x = tf.nn.relu(self.features_11(x))
+                x = x3 = tf.nn.relu(self.features_13(x))
+                x = self.blurpool_13(x)
+                
+                x = tf.nn.relu(self.features_16(x))
+                x = x4 = tf.nn.relu(self.features_18(x))
+                x = self.blurpool_18(x)
+                
+                x = self.conv_center(x)
+                
+                x = tf.nn.relu(self.conv1_up(x))               
+                x = tf.concat( [x,x4], -1)
+                x = tf.nn.relu(self.conv1(x))
+                
+                x = tf.nn.relu(self.conv2_up(x))               
+                x = tf.concat( [x,x3], -1)
+                x = tf.nn.relu(self.conv2(x))
+                
+                x = tf.nn.relu(self.conv3_up(x))               
+                x = tf.concat( [x,x2], -1)
+                x = tf.nn.relu(self.conv3(x))
+                
+                x = tf.nn.relu(self.conv4_up(x))               
+                x = tf.concat( [x,x1], -1)
+                x = tf.nn.relu(self.conv4(x))
+                
+                x = tf.nn.relu(self.conv5_up(x))               
+                x = tf.concat( [x,x0], -1)
+                x = tf.nn.relu(self.conv5(x))
+                
+                x = tf.nn.sigmoid(self.out_conv(x))
+                return x   
+
+        if weights_file_root is not None:
+            weights_file_root = Path(weights_file_root)
+        else:
+            weights_file_root = Path(__file__).parent
+        self.weights_path = weights_file_root / ('%s_%d_%s.npy' % (name, resolution, face_type_str) )
+        
+        e = tf.device('/CPU:0') if place_model_on_cpu else None
+        
+        if e is not None: e.__enter__()
+        self.net = Ternaus(3, 64, name='Ternaus')        
+        if load_weights:            
+            self.net.load_weights (self.weights_path)            
+        else:
+            self.net.init_weights()
+        if e is not None: e.__exit__(None,None,None)      
+            
+        self.net.build_for_run ( [(tf.float32, (resolution,resolution,3))] )
+        
+        if training:
+            raise Exception("training not supported yet")
+                
+           
+            """
+            if training:
+                try:
+                    with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f:
+                        d = pickle.loads (f.read())
+
+                    for i in [0,3,6,8,11,13,16,18]:
+                        s = 'features.%d' % i
+
+                        self.model.get_layer (s).set_weights ( d[s] )
+                except:
+                    io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy")
+
+                conv_weights_list = []
+                for layer in self.model.layers:
+                    if 'CA.' in layer.name:
+                        conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights
+                CAInitializerMP ( conv_weights_list )
+            """  
+
+        
+        """
+        if training:
+            inp_t = Input ( (resolution, resolution, 3) )
+            real_t = Input ( (resolution, resolution, 1) )
+            out_t = self.model(inp_t)
+
+            loss = K.mean(10*K.binary_crossentropy(real_t,out_t) )
+
+            out_t_diff1 = out_t[:, 1:, :, :] - out_t[:, :-1, :, :]
+            out_t_diff2 = out_t[:, :, 1:, :] - out_t[:, :, :-1, :]
+
+            total_var_loss = K.mean( 0.1*K.abs(out_t_diff1), axis=[1, 2, 3] ) + K.mean( 0.1*K.abs(out_t_diff2), axis=[1, 2, 3] )
+
+            opt = Adam(lr=0.0001, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2)
+
+            self.train_func = K.function  ( [inp_t, real_t], [K.mean(loss)], opt.get_updates( [loss], self.model.trainable_weights) )
+        """
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
+        return False #pass exception between __enter__ and __exit__ to outter level
+
+    def save_weights(self):
+        self.net.save_weights (str(self.weights_path))
+
+    def train(self, inp, real):
+        loss, = self.train_func ([inp, real])
+        return loss
+
+    def extract (self, input_image):
+        input_shape_len = len(input_image.shape)
+        if input_shape_len == 3:
+            input_image = input_image[np.newaxis,...]
+
+        result = np.clip ( self.net.run([input_image]), 0, 1.0 )
+        result[result < 0.1] = 0 #get rid of noise
+
+        if input_shape_len == 3:
+            result = result[0]
+
+        return result
+        
+        
+        
+        
+        
+        
+    
+""" 
+        self.weights_path = weights_file_root / ('%s_%d_%s.h5' % (name, resolution, face_type_str) )
+        
+        
+        self.net.build()
+        
+      
+        self.net.features_0.set_weights ( self.model.get_layer('features.0').get_weights() )
+        self.net.features_3.set_weights ( self.model.get_layer('features.3').get_weights() )
+        self.net.features_6.set_weights ( self.model.get_layer('features.6').get_weights() )
+        self.net.features_8.set_weights ( self.model.get_layer('features.8').get_weights() )
+        self.net.features_11.set_weights ( self.model.get_layer('features.11').get_weights() )
+        self.net.features_13.set_weights ( self.model.get_layer('features.13').get_weights() )
+        self.net.features_16.set_weights ( self.model.get_layer('features.16').get_weights() )
+        self.net.features_18.set_weights ( self.model.get_layer('features.18').get_weights() )
+        
+        self.net.conv_center.set_weights ( self.model.get_layer('CA.1').get_weights() )
+
+        self.net.conv1_up.set_weights ( self.model.get_layer('CA.2').get_weights() )
+        self.net.conv1.set_weights ( self.model.get_layer('CA.3').get_weights() )
+        
+        self.net.conv2_up.set_weights ( self.model.get_layer('CA.4').get_weights() )
+        self.net.conv2.set_weights ( self.model.get_layer('CA.5').get_weights() )
+        
+        self.net.conv3_up.set_weights ( self.model.get_layer('CA.6').get_weights() )
+        self.net.conv3.set_weights ( self.model.get_layer('CA.7').get_weights() )
+        
+        self.net.conv4_up.set_weights ( self.model.get_layer('CA.8').get_weights() )
+        self.net.conv4.set_weights ( self.model.get_layer('CA.9').get_weights() )
+        
+        self.net.conv5_up.set_weights ( self.model.get_layer('CA.10').get_weights() )
+        self.net.conv5.set_weights ( self.model.get_layer('CA.11').get_weights() )
+        
+        self.net.out_conv.set_weights ( self.model.get_layer('CA.12').get_weights() )
+
+        self.net.build_for_run ( [ (tf.float32, (resolution,resolution,3)) ])
+        self.net.save_weights (self.weights_path2)
+
+        
+    def extract (self, input_image):
+        input_shape_len = len(input_image.shape)
+        if input_shape_len == 3:
+            input_image = input_image[np.newaxis,...]
+
+        result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 )
+        result[result < 0.1] = 0 #get rid of noise
+
+        if input_shape_len == 3:
+            result = result[0]
+
+        return result
+    
+
+    @staticmethod
+    def BuildModel ( resolution, ngf=64):
+        exec( nn.initialize(), locals(), globals() )
+        inp = Input ( (resolution,resolution,3) )
+        x = inp
+        x = TernausNet.Flow(ngf=ngf)(x)
+        model = Model(inp,x)
+        return model
+
+    @staticmethod
+    def Flow(ngf=64):
+        exec( nn.initialize(), locals(), globals() )
+
+        def func(input):
+            x = input
+
+            x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x)
+            x = BlurPool(filt_size=3)(x)
+
+            x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x)
+            x = BlurPool(filt_size=3)(x)
+
+            x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x)
+            x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x)
+            x = BlurPool(filt_size=3)(x)
+
+            x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x)
+            x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x)
+            x = BlurPool(filt_size=3)(x)
+
+            x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x)
+            x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x)
+            x = BlurPool(filt_size=3)(x)
+
+            x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', name='CA.1')(x)
+
+            x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.2') (x)
+            x = Concatenate(axis=3)([ x, x4])
+            x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.3') (x)
+
+            x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.4') (x)
+            x = Concatenate(axis=3)([ x, x3])
+            x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.5') (x)
+
+            x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu', name='CA.6') (x)
+            x = Concatenate(axis=3)([ x, x2])
+            x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu', name='CA.7') (x)
+
+            x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu', name='CA.8') (x)
+            x = Concatenate(axis=3)([ x, x1])
+            x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu', name='CA.9') (x)
+
+            x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu', name='CA.10') (x)
+            x = Concatenate(axis=3)([ x, x0])
+            x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu', name='CA.11') (x)
+
+            return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid', name='CA.12')(x)
+
+
+        return func
+"""
diff --git a/facelib/__init__.py b/facelib/__init__.py
index cde2ab5..ca3292e 100644
--- a/facelib/__init__.py
+++ b/facelib/__init__.py
@@ -1,7 +1,5 @@
 from .FaceType import FaceType
-from .DLIBExtractor import DLIBExtractor
-from .MTCExtractor import MTCExtractor
 from .S3FDExtractor import S3FDExtractor
 from .FANExtractor import FANExtractor
-from .PoseEstimator import PoseEstimator
-from .FaceEnhancer import FaceEnhancer
\ No newline at end of file
+from .FaceEnhancer import FaceEnhancer
+from .TernausNet import TernausNet
\ No newline at end of file
diff --git a/facelib/mmod_human_face_detector.dat b/facelib/mmod_human_face_detector.dat
deleted file mode 100644
index f1f73a5..0000000
Binary files a/facelib/mmod_human_face_detector.dat and /dev/null differ
diff --git a/facelib/mtcnn_onet.h5 b/facelib/mtcnn_onet.h5
deleted file mode 100644
index bd615de..0000000
Binary files a/facelib/mtcnn_onet.h5 and /dev/null differ
diff --git a/facelib/mtcnn_pnet.h5 b/facelib/mtcnn_pnet.h5
deleted file mode 100644
index e13f81b..0000000
Binary files a/facelib/mtcnn_pnet.h5 and /dev/null differ
diff --git a/facelib/mtcnn_rnet.h5 b/facelib/mtcnn_rnet.h5
deleted file mode 100644
index 798a807..0000000
Binary files a/facelib/mtcnn_rnet.h5 and /dev/null differ
diff --git a/nnlib/vgg11_enc_weights.npy b/facelib/vgg11_enc_weights.npy
similarity index 100%
rename from nnlib/vgg11_enc_weights.npy
rename to facelib/vgg11_enc_weights.npy
diff --git a/imagelib/RankSRGAN.h5 b/imagelib/RankSRGAN.h5
deleted file mode 100644
index 765ae2e..0000000
Binary files a/imagelib/RankSRGAN.h5 and /dev/null differ
diff --git a/imagelib/RankSRGAN.py b/imagelib/RankSRGAN.py
deleted file mode 100644
index 529dea8..0000000
--- a/imagelib/RankSRGAN.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import numpy as np
-import cv2
-from pathlib import Path
-from nnlib import nnlib
-from interact import interact as io
-
-class RankSRGAN():
-    def __init__(self):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        class PixelShufflerTorch(KL.Layer):
-            def __init__(self, size=(2, 2), data_format='channels_last', **kwargs):
-                super(PixelShufflerTorch, self).__init__(**kwargs)
-                self.data_format = data_format
-                self.size = size
-
-            def call(self, inputs):
-                input_shape = K.shape(inputs)
-                if K.int_shape(input_shape)[0] != 4:
-                    raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs)))
-
-                batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1]
-                rh, rw = self.size
-                oh, ow = h * rh, w * rw
-                oc = c // (rh * rw)
-
-                out = inputs
-                out = K.permute_dimensions(out, (0, 3, 1, 2)) #NCHW
-
-                out = K.reshape(out, (batch_size, oc, rh, rw, h, w))
-                out = K.permute_dimensions(out, (0, 1, 4, 2, 5, 3))
-                out = K.reshape(out, (batch_size, oc, oh, ow))
-
-                out = K.permute_dimensions(out, (0, 2, 3, 1))
-                return out
-
-            def compute_output_shape(self, input_shape):
-                if len(input_shape) != 4:
-                    raise ValueError('Inputs should have rank ' + str(4) + '; Received input shape:', str(input_shape))
-
-                height = input_shape[1] * self.size[0] if input_shape[1] is not None else None
-                width = input_shape[2] * self.size[1] if input_shape[2] is not None else None
-                channels = input_shape[3] // self.size[0] // self.size[1]
-
-                if channels * self.size[0] * self.size[1] != input_shape[3]:
-                    raise ValueError('channels of input and size are incompatible')
-
-                return (input_shape[0],
-                        height,
-                        width,
-                        channels)
-
-            def get_config(self):
-                config = {'size': self.size,
-                        'data_format': self.data_format}
-                base_config = super(PixelShufflerTorch, self).get_config()
-
-                return dict(list(base_config.items()) + list(config.items()))
-
-        def res_block(inp, name_prefix):
-            x = inp
-            x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', activation="relu", name=name_prefix+"0")(x)
-            x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name=name_prefix+"2")(x)
-            return Add()([inp,x])
-
-        ndf = 64
-        nb = 16
-        inp = Input ( (None, None,3) )
-        x = inp
-
-        x = x0 = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name="model0")(x)
-        for i in range(nb):
-            x = res_block(x, "model1%.2d" %i )
-        x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name="model1160")(x)
-        x = Add()([x0,x])
-
-        x = ReLU() ( PixelShufflerTorch() ( Conv2D (ndf*4, kernel_size=3, strides=1, padding='same', name="model2")(x) ) )
-        x = ReLU() ( PixelShufflerTorch() ( Conv2D (ndf*4, kernel_size=3, strides=1, padding='same', name="model5")(x) ) )
-
-        x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', activation="relu", name="model8")(x)
-        x = Conv2D (3,   kernel_size=3, strides=1, padding='same', name="model10")(x)
-        self.model = Model(inp, x )
-        self.model.load_weights ( Path(__file__).parent / 'RankSRGAN.h5')
-
-    def upscale(self, img, scale=2, is_bgr=True, is_float=True):
-        if scale not in [2,4]:
-            raise ValueError ("RankSRGAN: supported scale are 2 or 4.")
-
-        if not is_bgr:
-            img = img[...,::-1]
-
-        if not is_float:
-            img /= 255.0
-
-        h, w = img.shape[:2]
-        ch = img.shape[2] if len(img.shape) >= 3 else 1
-
-        output = self.model.predict([img[None,...]])[0]
-
-        if scale == 2:
-            output = cv2.resize (output, (w*scale, h*scale), cv2.INTER_CUBIC)
-
-        if not is_float:
-            output = np.clip (output * 255.0, 0, 255.0)
-
-        if not is_bgr:
-            output = output[...,::-1]
-
-        return output
\ No newline at end of file
diff --git a/main.py b/main.py
index 2166817..f7a78ba 100644
--- a/main.py
+++ b/main.py
@@ -1,14 +1,17 @@
 if __name__ == "__main__":
+    from core.leras import nn    
+    nn.initialize_main_env()
+    
     import os
     import sys
     import time
     import argparse
     import multiprocessing
     multiprocessing.set_start_method("spawn")
-    from utils import Path_utils
-    from utils import os_utils
+    from core import pathex
+    from core import osex
     from pathlib import Path
-    from interact import interact as io
+    from core.interact import interact as io
 
     if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6):
         raise Exception("This program requires at least Python 3.6")
@@ -21,36 +24,37 @@ if __name__ == "__main__":
     subparsers = parser.add_subparsers()
 
     def process_extract(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import Extractor
-        Extractor.main( arguments.input_dir,
-                        arguments.output_dir,
-                        arguments.debug_dir,
-                        arguments.detector,
-                        arguments.manual_fix,
-                        arguments.manual_output_debug_fix,
-                        arguments.manual_window_size,
-                        face_type=arguments.face_type,
-                        device_args={'cpu_only'  : arguments.cpu_only,
-                                     'multi_gpu' : arguments.multi_gpu,
-                                    }
+        Extractor.main( detector                = arguments.detector,
+                        input_path              = Path(arguments.input_dir),
+                        output_path             = Path(arguments.output_dir),
+                        output_debug            = arguments.output_debug,                        
+                        manual_fix              = arguments.manual_fix,
+                        manual_output_debug_fix = arguments.manual_output_debug_fix,
+                        manual_window_size      = arguments.manual_window_size,
+                        face_type               = arguments.face_type,
+                        cpu_only                = arguments.cpu_only,
+                        force_gpu_idxs          = [ int(x) for x in arguments.force_gpu_idxs.split(',') ] if arguments.force_gpu_idxs is not None else None,
                       )
 
     p = subparsers.add_parser( "extract", help="Extract the faces from a pictures.")
+    p.add_argument('--detector', dest="detector", choices=['s3fd','manual'], default=None, help="Type of detector.")
     p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
     p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.")
-    p.add_argument('--debug-dir', action=fixPathAction, dest="debug_dir", help="Writes debug images to this directory.")
+    p.add_argument('--output-debug', action="store_true", dest="output_debug", default=None, help="Writes debug images to <output-dir>_debug\ directory.")
+    p.add_argument('--no-output-debug', action="store_false", dest="output_debug", default=None, help="Don't writes debug images to <output-dir>_debug\ directory.")
     p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'full_face_no_align', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")
-    p.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.")
-    p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.")
     p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.")
     p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.")
     p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.")
-    p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.")
+    p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU..")
+    p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.")
+    
     p.set_defaults (func=process_extract)
 
     def process_dev_extract_vggface2_dataset(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import dev_misc
         dev_misc.extract_vggface2_dataset( arguments.input_dir,
                                             device_args={'cpu_only'  : arguments.cpu_only,
@@ -65,7 +69,7 @@ if __name__ == "__main__":
     p.set_defaults (func=process_dev_extract_vggface2_dataset)
 
     def process_dev_extract_umd_csv(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import dev_misc
         dev_misc.extract_umd_csv( arguments.input_csv_file,
                                   device_args={'cpu_only'  : arguments.cpu_only,
@@ -81,7 +85,7 @@ if __name__ == "__main__":
 
 
     def process_dev_apply_celebamaskhq(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import dev_misc
         dev_misc.apply_celebamaskhq( arguments.input_dir )
 
@@ -90,7 +94,7 @@ if __name__ == "__main__":
     p.set_defaults (func=process_dev_apply_celebamaskhq)
 
     def process_dev_test(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import dev_misc
         dev_misc.dev_test( arguments.input_dir )
 
@@ -99,17 +103,17 @@ if __name__ == "__main__":
     p.set_defaults (func=process_dev_test)
     
     def process_sort(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import Sorter
-        Sorter.main (input_path=arguments.input_dir, sort_by_method=arguments.sort_by_method)
+        Sorter.main (input_path=Path(arguments.input_dir), sort_by_method=arguments.sort_by_method)
 
     p = subparsers.add_parser( "sort", help="Sort faces in a directory.")
     p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
-    p.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "final-no-blur", "vggface", "absdiff", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." )
+    p.add_argument('--by', dest="sort_by_method", default=None, choices=("blur", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "absdiff"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." )
     p.set_defaults (func=process_sort)
 
     def process_util(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import Util
 
         if arguments.convert_png_to_jpg:
@@ -158,65 +162,71 @@ if __name__ == "__main__":
     p.set_defaults (func=process_util)
 
     def process_train(arguments):
-        os_utils.set_process_lowest_prio()
-        args = {'training_data_src_dir'  : arguments.training_data_src_dir,
-                'training_data_dst_dir'  : arguments.training_data_dst_dir,
-                'pretraining_data_dir'   : arguments.pretraining_data_dir,
-                'model_path'             : arguments.model_dir,
-                'model_name'             : arguments.model_name,
-                'no_preview'             : arguments.no_preview,
-                'debug'                  : arguments.debug,
-                'execute_programs'       : [ [int(x[0]), x[1] ] for x in arguments.execute_program ]
-                }
-        device_args = {'cpu_only'  : arguments.cpu_only,
-                       'force_gpu_idx' : arguments.force_gpu_idx,
-                       }
+        osex.set_process_lowest_prio()
+                    
+
+        kwargs = {'model_class_name'         : arguments.model_name,
+                  'saved_models_path'        : Path(arguments.model_dir),
+                  'training_data_src_path'   : Path(arguments.training_data_src_dir),
+                  'training_data_dst_path'   : Path(arguments.training_data_dst_dir),
+                  'pretraining_data_path'    : Path(arguments.pretraining_data_dir) if arguments.pretraining_data_dir is not None else None,
+                  'pretrained_model_path'    : Path(arguments.pretrained_model_dir) if arguments.pretrained_model_dir is not None else None,
+                  'no_preview'               : arguments.no_preview,
+                  'force_model_name'         : arguments.force_model_name,
+                  'force_gpu_idxs'           : arguments.force_gpu_idxs,
+                  'cpu_only'                 : arguments.cpu_only,
+                  'execute_programs'         : [ [int(x[0]), x[1] ] for x in arguments.execute_program ],
+                  'debug'                    : arguments.debug,    
+                  }
         from mainscripts import Trainer
-        Trainer.main(args, device_args)
+        Trainer.main(**kwargs)
 
     p = subparsers.add_parser( "train", help="Trainer")
     p.add_argument('--training-data-src-dir', required=True, action=fixPathAction, dest="training_data_src_dir", help="Dir of extracted SRC faceset.")
     p.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of extracted DST faceset.")
     p.add_argument('--pretraining-data-dir', action=fixPathAction, dest="pretraining_data_dir", default=None, help="Optional dir of extracted faceset that will be used in pretraining mode.")
-    p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.")
-    p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model")
-    p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.")
+    p.add_argument('--pretrained-model-dir', action=fixPathAction, dest="pretrained_model_dir", default=None, help="Optional dir of pretrain model files. (Currently only for Quick96).")    
+    p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Saved models dir.")
+    p.add_argument('--model', required=True, dest="model_name", choices=pathex.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Model class name.")
     p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.")
+    p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.")
+    p.add_argument('--force-model-name', dest="force_model_name", default=None, help="Forcing to choose model name from model/ folder.")    
     p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.")
-    p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.")
+    p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.")
     p.add_argument('--execute-program', dest="execute_program", default=[], action='append', nargs='+')
     p.set_defaults (func=process_train)
 
-    def process_convert(arguments):
-        os_utils.set_process_lowest_prio()
-        args = {'training_data_src_dir' : arguments.training_data_src_dir,
-                'input_dir'   : arguments.input_dir,
-                'output_dir'  : arguments.output_dir,
-                'aligned_dir' : arguments.aligned_dir,
-                'model_dir'   : arguments.model_dir,
-                'model_name'  : arguments.model_name,
+    def process_merge(arguments):
+        osex.set_process_lowest_prio()
+        kwargs = {'model_class_name'       : arguments.model_name,
+                  'saved_models_path'      : Path(arguments.model_dir),
+                  'training_data_src_path' : Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None,
+                  'force_model_name'       : arguments.force_model_name,
+                  'input_path'   : Path(arguments.input_dir),
+                  'output_path'  : Path(arguments.output_dir),
+                  'aligned_path'  : Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None,
+                  'cpu_only'       : arguments.cpu_only,
+                  'force_gpu_idxs' : arguments.force_gpu_idxs,
                 }
-        device_args = {'cpu_only'  : arguments.cpu_only,
-                       'force_gpu_idx' : arguments.force_gpu_idx,
-                       }
-        from mainscripts import Converter
-        Converter.main (args, device_args)
+        from mainscripts import Merger
+        Merger.main (**kwargs)
 
-    p = subparsers.add_parser( "convert", help="Converter")
-    p.add_argument('--training-data-src-dir', action=fixPathAction, dest="training_data_src_dir", help="(optional, may be required by some models) Dir of extracted SRC faceset.")
+    p = subparsers.add_parser( "merge", help="Merger")
+    p.add_argument('--training-data-src-dir', action=fixPathAction, dest="training_data_src_dir", default=None, help="(optional, may be required by some models) Dir of extracted SRC faceset.")
     p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
-    p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.")
-    p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored.")
+    p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the merged files will be stored.")
+    p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", default=None, help="Aligned directory. This is where the extracted of dst faces stored.")
     p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.")
-    p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model")
-    p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.")
-    p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.")
-    p.set_defaults(func=process_convert)
+    p.add_argument('--model', required=True, dest="model_name", choices=pathex.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Model class name.")
+    p.add_argument('--force-model-name', dest="force_model_name", default=None, help="Forcing to choose model name from model/ folder.")    
+    p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Merge on CPU.")
+    p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.")
+    p.set_defaults(func=process_merge)
 
     videoed_parser = subparsers.add_parser( "videoed", help="Video processing.").add_subparsers()
 
     def process_videoed_extract_video(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import VideoEd
         VideoEd.extract_video (arguments.input_file, arguments.output_dir, arguments.output_ext, arguments.fps)
     p = videoed_parser.add_parser( "extract-video", help="Extract images from video file.")
@@ -227,7 +237,7 @@ if __name__ == "__main__":
     p.set_defaults(func=process_videoed_extract_video)
 
     def process_videoed_cut_video(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import VideoEd
         VideoEd.cut_video (arguments.input_file,
                            arguments.from_time,
@@ -243,7 +253,7 @@ if __name__ == "__main__":
     p.set_defaults(func=process_videoed_cut_video)
 
     def process_videoed_denoise_image_sequence(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import VideoEd
         VideoEd.denoise_image_sequence (arguments.input_dir, arguments.ext, arguments.factor)
     p = videoed_parser.add_parser( "denoise-image-sequence", help="Denoise sequence of images, keeping sharp edges. This allows you to make the final fake more believable, since the neural network is not able to make a detailed skin texture, but it makes the edges quite clear. Therefore, if the whole frame is more `blurred`, then a fake will seem more believable. Especially true for scenes of the film, which are usually very clear.")
@@ -253,7 +263,7 @@ if __name__ == "__main__":
     p.set_defaults(func=process_videoed_denoise_image_sequence)
 
     def process_videoed_video_from_sequence(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import VideoEd
         VideoEd.video_from_sequence (arguments.input_dir,
                                      arguments.output_file,
@@ -289,25 +299,28 @@ if __name__ == "__main__":
     facesettool_parser = subparsers.add_parser( "facesettool", help="Faceset tools.").add_subparsers()
 
     def process_faceset_enhancer(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import FacesetEnhancer
-        FacesetEnhancer.process_folder ( Path(arguments.input_dir), multi_gpu=arguments.multi_gpu, cpu_only=arguments.cpu_only )
+        FacesetEnhancer.process_folder ( Path(arguments.input_dir),     
+                                         cpu_only=arguments.cpu_only,
+                                         force_gpu_idxs=arguments.force_gpu_idxs
+                                       )
         
     p = facesettool_parser.add_parser ("enhance", help="Enhance details in DFL faceset.")
     p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory of aligned faces.")
-    p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.")
     p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Process on CPU.")
+    p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.")
     
     p.set_defaults(func=process_faceset_enhancer)
     
     """
     def process_relight_faceset(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import FacesetRelighter
         FacesetRelighter.relight (arguments.input_dir, arguments.lighten, arguments.random_one)
 
     def process_delete_relighted(arguments):
-        os_utils.set_process_lowest_prio()
+        osex.set_process_lowest_prio()
         from mainscripts import FacesetRelighter
         FacesetRelighter.delete_relighted (arguments.input_dir)
         
@@ -332,21 +345,6 @@ if __name__ == "__main__":
 
     print ("Done.")
 
-    """
-    Suppressing error with keras 2.2.4+ on python exit:
-
-        Exception ignored in: <bound method BaseSession._Callable.__del__ of <tensorflow.python.client.session.BaseSession._Callable object at 0x000000001BDEA9B0>>
-        Traceback (most recent call last):
-        File "D:\DeepFaceLab\_internal\bin\lib\site-packages\tensorflow\python\client\session.py", line 1413, in __del__
-        AttributeError: 'NoneType' object has no attribute 'raise_exception_on_not_ok_status'
-
-    reproduce: https://github.com/keras-team/keras/issues/11751 ( still no solution )
-    """
-    outnull_file = open(os.devnull, 'w')
-    os.dup2 ( outnull_file.fileno(), sys.stderr.fileno() )
-    sys.stderr = outnull_file
-
-
 '''
 import code
 code.interact(local=dict(globals(), **locals()))
diff --git a/mainscripts/ConverterScreen/__init__.py b/mainscripts/ConverterScreen/__init__.py
deleted file mode 100644
index 5103fc4..0000000
--- a/mainscripts/ConverterScreen/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .ConverterScreen import Screen, ScreenManager
\ No newline at end of file
diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py
index 76f55c1..7d41684 100644
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@@ -12,22 +12,22 @@ import cv2
 import numpy as np
 
 import facelib
-import imagelib
-import mathlib
-from facelib import FaceType, LandmarksProcessor
-from interact import interact as io
-from joblib import Subprocessor
-from nnlib import TernausNet, nnlib
-from utils import Path_utils
-from utils.cv2_utils import *
+from core import imagelib
+from core import mathlib
+from facelib import FaceType, LandmarksProcessor, TernausNet
+from core.interact import interact as io
+from core.joblib import Subprocessor
+from core.leras import nn
+from core import pathex
+from core.cv2ex import *
 from DFLIMG import *
 
 DEBUG = False
 
 class ExtractSubprocessor(Subprocessor):
     class Data(object):
-        def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, manual=False, force_output_path=None, final_output_files = None):
-            self.filename = filename
+        def __init__(self, filepath=None, rects=None, landmarks = None, landmarks_accurate=True, manual=False, force_output_path=None, final_output_files = None):
+            self.filepath = filepath
             self.rects = rects or []
             self.rects_rotation = 0
             self.landmarks_accurate = landmarks_accurate
@@ -41,289 +41,295 @@ class ExtractSubprocessor(Subprocessor):
 
         #override
         def on_initialize(self, client_dict):
-            self.type         = client_dict['type']
-            self.image_size   = client_dict['image_size']
-            self.face_type    = client_dict['face_type']
+            self.type                 = client_dict['type']
+            self.image_size           = client_dict['image_size']
+            self.face_type            = client_dict['face_type']
             self.max_faces_from_image = client_dict['max_faces_from_image']
-            self.device_idx   = client_dict['device_idx']
-            self.cpu_only     = client_dict['device_type'] == 'CPU'
-            self.final_output_path  = Path(client_dict['final_output_dir']) if 'final_output_dir' in client_dict.keys() else None
-            self.debug_dir    = client_dict['debug_dir']
-            
+            self.device_idx           = client_dict['device_idx']
+            self.cpu_only             = client_dict['device_type'] == 'CPU'
+            self.final_output_path    = client_dict['final_output_path']            
+            self.output_debug_path    = client_dict['output_debug_path']
+
             #transfer and set stdin in order to work code.interact in debug subprocess
             stdin_fd         = client_dict['stdin_fd']
             if stdin_fd is not None and DEBUG:
                 sys.stdin = os.fdopen(stdin_fd)
 
+            self.log_info (f"Running on {client_dict['device_name'] }")
+
+            if self.cpu_only:
+                device_config = nn.DeviceConfig.CPU()
+                place_model_on_cpu = True
+            else:
+                device_config = nn.DeviceConfig.GPUIndexes ([self.device_idx])
+                place_model_on_cpu = device_config.devices[0].total_mem_gb < 4
+
+            if self.type == 'all' or 'rects' in self.type or 'landmarks' in self.type:
+                nn.initialize (device_config)
+                
+            if self.type == 'all' or self.type == 'rects-s3fd' or 'landmarks' in self.type:
+                self.rects_extractor = facelib.S3FDExtractor(place_model_on_cpu=place_model_on_cpu)
+
+            if self.type == 'all' or 'landmarks' in self.type:
+                self.landmarks_extractor = facelib.FANExtractor(place_model_on_cpu=place_model_on_cpu)
+
             self.cached_image = (None, None)
 
-            self.e = None
-            device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True)
-            self.device_vram = device_config.gpu_vram_gb[0]
-
-            intro_str = 'Running on %s.' % (client_dict['device_name'])
-            if not self.cpu_only and self.device_vram <= 2:
-                intro_str += " Recommended to close all programs using this device."
-
-            self.log_info (intro_str)
-
-            if 'rects' in self.type:
-                if self.type == 'rects-mt':
-                    nnlib.import_all (device_config)
-                    self.e = facelib.MTCExtractor()
-                elif self.type == 'rects-dlib':
-                    nnlib.import_dlib (device_config)
-                    self.e = facelib.DLIBExtractor(nnlib.dlib)
-                elif self.type == 'rects-s3fd':
-                    nnlib.import_all (device_config)
-                    self.e = facelib.S3FDExtractor(do_dummy_predict=True)
-                else:
-                    raise ValueError ("Wrong type.")
-
-                if self.e is not None:
-                    self.e.__enter__()
-
-            elif self.type == 'landmarks':
-                nnlib.import_all (device_config)
-                self.e = facelib.FANExtractor()
-                self.e.__enter__()
-                if self.device_vram >= 2:
-                    self.second_pass_e = facelib.S3FDExtractor(do_dummy_predict=False)
-                    self.second_pass_e.__enter__()
-                else:
-                    self.second_pass_e = None
-                    
-            elif self.type == 'fanseg':
-                nnlib.import_all (device_config)
-                self.e = TernausNet(256, FaceType.toString(FaceType.FULL) )
-                self.e.__enter__()
-                    
-            elif self.type == 'final':
-                pass
-
-        #override
-        def on_finalize(self):
-            if self.e is not None:
-                self.e.__exit__()
-
         #override
         def process_data(self, data):
-            filename_path = Path( data.filename )
-            filename_path_str = str(filename_path)
-            
-            if self.type == 'landmarks' and len(data.rects) == 0:
-                return data            
-            
-            if self.cached_image[0] == filename_path_str:
-                image = self.cached_image[1] #cached image for manual extractor
-            else:
-                image = cv2_imread( filename_path_str )
-
-                if image is None:
-                    self.log_err ( 'Failed to extract %s, reason: cv2_imread() fail.' % ( str(filename_path) ) )
-                    return data
-
-                image = imagelib.normalize_channels(image, 3)
-                h, w, ch = image.shape
-
-                wm, hm = w % 2, h % 2
-                if wm + hm != 0: #fix odd image
-                    image = image[0:h-hm,0:w-wm,:]
-                self.cached_image = ( filename_path_str, image )
-
-            src_dflimg = None
-            h, w, ch = image.shape
-            if h == w:
-                #extracting from already extracted jpg image?
-                src_dflimg = DFLIMG.load (filename_path)
-
-            if 'rects' in self.type:
-                if min(w,h) < 128:
-                    self.log_err ( 'Image is too small %s : [%d, %d]' % ( str(filename_path), w, h ) )
-                    data.rects = []
-                else:
-                    for rot in ([0, 90, 270, 180]):
-                        data.rects_rotation = rot
-                        if rot == 0:
-                            rotated_image = image
-                        elif rot == 90:
-                            rotated_image = image.swapaxes( 0,1 )[:,::-1,:]
-                        elif rot == 180:
-                            rotated_image = image[::-1,::-1,:]
-                        elif rot == 270:
-                            rotated_image = image.swapaxes( 0,1 )[::-1,:,:]
-
-                        rects = data.rects = self.e.extract (rotated_image, is_bgr=True)
-                        if len(rects) != 0:
-                            break
-                        
-                    if self.max_faces_from_image != 0 and len(data.rects) > 1:
-                        data.rects = data.rects[0:self.max_faces_from_image]
-
-                return data
-
-            elif self.type == 'landmarks':                    
-                if data.rects_rotation == 0:
-                    rotated_image = image
-                elif data.rects_rotation == 90:
-                    rotated_image = image.swapaxes( 0,1 )[:,::-1,:]
-                elif data.rects_rotation == 180:
-                    rotated_image = image[::-1,::-1,:]
-                elif data.rects_rotation == 270:
-                    rotated_image = image.swapaxes( 0,1 )[::-1,:,:]
-
-                data.landmarks = self.e.extract (rotated_image, data.rects, self.second_pass_e if (src_dflimg is None and data.landmarks_accurate) else None, is_bgr=True)
-                if data.rects_rotation != 0:
-                    for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)):
-                        new_rect, new_lmrks = rect, lmrks
-                        (l,t,r,b) = rect
-                        if data.rects_rotation == 90:
-                            new_rect = ( t, h-l, b, h-r)
-                            if lmrks is not None:
-                                new_lmrks = lmrks[:,::-1].copy()
-                                new_lmrks[:,1] = h - new_lmrks[:,1]
-                        elif data.rects_rotation == 180:
-                            if lmrks is not None:
-                                new_rect = ( w-l, h-t, w-r, h-b)
-                                new_lmrks = lmrks.copy()
-                                new_lmrks[:,0] = w - new_lmrks[:,0]
-                                new_lmrks[:,1] = h - new_lmrks[:,1]
-                        elif data.rects_rotation == 270:
-                            new_rect = ( w-b, l, w-t, r )
-                            if lmrks is not None:
-                                new_lmrks = lmrks[:,::-1].copy()
-                                new_lmrks[:,0] = w - new_lmrks[:,0]
-                        data.rects[i], data.landmarks[i] = new_rect, new_lmrks
-
-                return data
-
-            elif self.type == 'final':
-                data.final_output_files = []
-                rects = data.rects
-                landmarks = data.landmarks
-
-                if self.debug_dir is not None:
-                    debug_output_file = str( Path(self.debug_dir) / (filename_path.stem+'.jpg') )
-                    debug_image = image.copy()
-
-                if src_dflimg is not None and len(rects) != 1:
-                    #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it
-                    print("src_dflimg is not None and len(rects) != 1", str(filename_path) )
-                    output_file = str(self.final_output_path / filename_path.name)
-                    if str(filename_path) != str(output_file):
-                        shutil.copy ( str(filename_path), str(output_file) )
-                    data.final_output_files.append (output_file)
-                else:
-                    face_idx = 0
-                    for rect, image_landmarks in zip( rects, landmarks ):
-                        
-                        if src_dflimg is not None and face_idx > 1:
-                            #cannot extract more than 1 face from dflimg
-                            break
-                                
-                        if image_landmarks is None:
-                            continue
-
-                        rect = np.array(rect)
-
-                        if self.face_type == FaceType.MARK_ONLY:
-                            image_to_face_mat = None
-                            face_image = image
-                            face_image_landmarks = image_landmarks
-                        else:
-                            image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type)
-                            
-                            face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4)
-                            face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat)
-
-                            landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True)
-
-                            rect_area      = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]]))
-                            landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] )
-
-                            if not data.manual and self.face_type <= FaceType.FULL_NO_ALIGN and landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area
-                                continue
-
-                            if self.debug_dir is not None:
-                                LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True)
-
-                        final_output_path = self.final_output_path                        
-                        if data.force_output_path is not None:
-                            final_output_path = data.force_output_path
-                        
-                        if src_dflimg is not None and filename_path.suffix == '.jpg':
-                            #if extracting from dflimg and jpg copy it in order not to lose quality
-                            output_file = str(final_output_path / filename_path.name)
-                            if str(filename_path) != str(output_file):
-                                shutil.copy ( str(filename_path), str(output_file) )
-                        else:
-                            
-                            output_file = '{}_{}{}'.format(str(final_output_path / filename_path.stem), str(face_idx), '.jpg')
-                            cv2_imwrite(output_file, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100] )
-
-                        DFLJPG.embed_data(output_file, face_type=FaceType.toString(self.face_type),
-                                                       landmarks=face_image_landmarks.tolist(),
-                                                       source_filename=filename_path.name,
-                                                       source_rect=rect,
-                                                       source_landmarks=image_landmarks.tolist(),
-                                                       image_to_face_mat=image_to_face_mat
-                                            )
-
-                        data.final_output_files.append (output_file)
-                        face_idx += 1
-                    data.faces_detected = face_idx
-
-                if self.debug_dir is not None:
-                    cv2_imwrite(debug_output_file, debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] )
-
+            if 'landmarks' in self.type and len(data.rects) == 0:
                 return data
                 
-            elif self.type == 'fanseg':
-                if src_dflimg is not None:
-                    fanseg_mask = self.e.extract( image / 255.0 )
-                    src_dflimg.embed_and_set( filename_path_str, 
-                                              fanseg_mask=fanseg_mask,
-                                              )
-        
+            filepath = data.filepath                
+            cached_filepath, image = self.cached_image
+            if cached_filepath != filepath:
+                image = cv2_imread( filepath )
+                if image is None:
+                    self.log_err (f'Failed to open {filepath}, reason: cv2_imread() fail.')
+                    return data
+                image = imagelib.normalize_channels(image, 3)
+                image = imagelib.cut_odd_image(image)
+                self.cached_image = ( filepath, image )
+
+            h, w, c = image.shape
+            extract_from_dflimg = (h == w and DFLIMG.load (filepath) is not None)
+            
+            if 'rects' in self.type or self.type == 'all':
+                data = ExtractSubprocessor.Cli.rects_stage (data=data,
+                                                            image=image,
+                                                            max_faces_from_image=self.max_faces_from_image,
+                                                            rects_extractor=self.rects_extractor,
+                                                            )
+
+            if 'landmarks' in self.type or self.type == 'all':
+                data = ExtractSubprocessor.Cli.landmarks_stage (data=data,
+                                                                image=image,
+                                                                extract_from_dflimg=extract_from_dflimg,
+                                                                landmarks_extractor=self.landmarks_extractor,
+                                                                rects_extractor=self.rects_extractor,
+                                                                )
+
+            if self.type == 'final' or self.type == 'all':
+                data = ExtractSubprocessor.Cli.final_stage(data=data,
+                                                           image=image,
+                                                           face_type=self.face_type,
+                                                           image_size=self.image_size,
+                                                           extract_from_dflimg=extract_from_dflimg,
+                                                           output_debug_path=self.output_debug_path,
+                                                           final_output_path=self.final_output_path,
+                                                           )
+            return data
+            
+        @staticmethod
+        def rects_stage(data,
+                        image,
+                        max_faces_from_image,
+                        rects_extractor,
+                        ):
+            h,w,c = image.shape
+            if min(h,w) < 128:
+                # Image is too small
+                data.rects = []
+            else:
+                for rot in ([0, 90, 270, 180]):
+                    if rot == 0:
+                        rotated_image = image
+                    elif rot == 90:
+                        rotated_image = image.swapaxes( 0,1 )[:,::-1,:]
+                    elif rot == 180:
+                        rotated_image = image[::-1,::-1,:]
+                    elif rot == 270:
+                        rotated_image = image.swapaxes( 0,1 )[::-1,:,:]
+                    rects = data.rects = rects_extractor.extract (rotated_image, is_bgr=True)
+                    if len(rects) != 0:
+                        data.rects_rotation = rot
+                        break
+                if max_faces_from_image != 0 and len(data.rects) > 1:
+                    data.rects = data.rects[0:max_faces_from_image]
+            return data
+
+
+        @staticmethod
+        def landmarks_stage(data,
+                            image,
+                            extract_from_dflimg,
+                            landmarks_extractor,
+                            rects_extractor,
+                            ):
+            if data.rects_rotation == 0:
+                rotated_image = image
+            elif data.rects_rotation == 90:
+                rotated_image = image.swapaxes( 0,1 )[:,::-1,:]
+            elif data.rects_rotation == 180:
+                rotated_image = image[::-1,::-1,:]
+            elif data.rects_rotation == 270:
+                rotated_image = image.swapaxes( 0,1 )[::-1,:,:]
+
+            data.landmarks = landmarks_extractor.extract (rotated_image, data.rects, rects_extractor if (not extract_from_dflimg and data.landmarks_accurate) else None, is_bgr=True)
+            if data.rects_rotation != 0:
+                for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)):
+                    new_rect, new_lmrks = rect, lmrks
+                    (l,t,r,b) = rect
+                    if data.rects_rotation == 90:
+                        new_rect = ( t, h-l, b, h-r)
+                        if lmrks is not None:
+                            new_lmrks = lmrks[:,::-1].copy()
+                            new_lmrks[:,1] = h - new_lmrks[:,1]
+                    elif data.rects_rotation == 180:
+                        if lmrks is not None:
+                            new_rect = ( w-l, h-t, w-r, h-b)
+                            new_lmrks = lmrks.copy()
+                            new_lmrks[:,0] = w - new_lmrks[:,0]
+                            new_lmrks[:,1] = h - new_lmrks[:,1]
+                    elif data.rects_rotation == 270:
+                        new_rect = ( w-b, l, w-t, r )
+                        if lmrks is not None:
+                            new_lmrks = lmrks[:,::-1].copy()
+                            new_lmrks[:,0] = w - new_lmrks[:,0]
+                    data.rects[i], data.landmarks[i] = new_rect, new_lmrks
+
+            return data
+
+        @staticmethod
+        def final_stage(data,
+                        image,
+                        face_type,
+                        image_size,
+                        extract_from_dflimg = False,
+                        output_debug_path=None,
+                        final_output_path=None,
+                        ):
+            data.final_output_files = []
+            filepath = data.filepath
+            rects = data.rects
+            landmarks = data.landmarks
+
+            if output_debug_path is not None:
+                debug_image = image.copy()
+
+            if extract_from_dflimg and len(rects) != 1:
+                #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it
+                print("extract_from_dflimg and len(rects) != 1", filepath )
+                output_filepath = final_output_path / filepath.name
+                if filepath != str(output_file):
+                    shutil.copy ( str(filepath), str(output_filepath) )
+                data.final_output_files.append (output_filepath)
+            else:
+                face_idx = 0
+                for rect, image_landmarks in zip( rects, landmarks ):
+
+                    if extract_from_dflimg and face_idx > 1:
+                        #cannot extract more than 1 face from dflimg
+                        break
+
+                    if image_landmarks is None:
+                        continue
+
+                    rect = np.array(rect)
+
+                    if face_type == FaceType.MARK_ONLY:
+                        image_to_face_mat = None
+                        face_image = image
+                        face_image_landmarks = image_landmarks
+                    else:
+                        image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, image_size, face_type)
+
+                        face_image = cv2.warpAffine(image, image_to_face_mat, (image_size, image_size), cv2.INTER_LANCZOS4)
+                        face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat)
+
+                        landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,image_size-1), (image_size-1, image_size-1), (image_size-1,0) ], image_to_face_mat, True)
+
+                        rect_area      = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]]))
+                        landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] )
+
+                        if not data.manual and face_type <= FaceType.FULL_NO_ALIGN and landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area
+                            continue
+
+                        if output_debug_path is not None:
+                            LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, image_size, face_type, transparent_mask=True)
+
+                    output_path = final_output_path
+                    if data.force_output_path is not None:
+                        output_path = data.force_output_path
+
+                    if extract_from_dflimg and filepath.suffix == '.jpg':
+                        #if extracting from dflimg and jpg copy it in order not to lose quality
+                        output_filepath = output_path / filepath.name
+                        if filepath != output_filepath:
+                            shutil.copy ( str(filepath), str(output_filepath) )
+                    else:
+                        output_filepath = output_path / f"{filepath.stem}_{face_idx}.jpg"
+                        cv2_imwrite(output_filepath, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100] )
+
+                    DFLJPG.embed_data(output_filepath, face_type=FaceType.toString(face_type),
+                                                    landmarks=face_image_landmarks.tolist(),
+                                                    source_filename=filepath.name,
+                                                    source_rect=rect,
+                                                    source_landmarks=image_landmarks.tolist(),
+                                                    image_to_face_mat=image_to_face_mat
+                                        )
+
+                    data.final_output_files.append (output_filepath)
+                    face_idx += 1
+                data.faces_detected = face_idx
+
+            if output_debug_path is not None:
+                cv2_imwrite( output_debug_path / (filepath.stem+'.jpg'), debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] )
+
+            return data
+
         #overridable
         def get_data_name (self, data):
             #return string identificator of your data
-            return data.filename
+            return data.filepath
+
+    @staticmethod
+    def get_devices_for_config (type, device_config):
+        devices = device_config.devices
+        cpu_only = len(devices) == 0
+
+        if 'rects'     in type or \
+           'landmarks' in type or \
+           'all'       in type:
+
+            if not cpu_only:
+                if type == 'landmarks-manual':
+                    devices = [devices.get_best_device()]
+                result = [ (device.index, 'GPU', device.name, device.total_mem_gb) for device in devices ]
+                return result
+            else:
+                if type == 'landmarks-manual':
+                    return [ (0, 'CPU', 'CPU', 0 ) ]
+                else:
+                    return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ]
+
+        elif type == 'final':
+            return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in (range(min(8, multiprocessing.cpu_count())) if not DEBUG else [0]) ]
+
+    def __init__(self, input_data, type, image_size=None, face_type=None, output_debug_path=None, manual_window_size=0, max_faces_from_image=0, final_output_path=None, device_config=None):
+        if type == 'landmarks-manual':
+            for x in input_data:
+                x.manual = True
 
-    #override
-    def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, max_faces_from_image=0, final_output_path=None):
         self.input_data = input_data
+
         self.type = type
         self.image_size = image_size
         self.face_type = face_type
-        self.debug_dir = debug_dir
+        self.output_debug_path = output_debug_path
         self.final_output_path = final_output_path
-        self.manual = manual
         self.manual_window_size = manual_window_size
         self.max_faces_from_image = max_faces_from_image
         self.result = []
-
-        self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only)
         
-        if self.manual or DEBUG:
-            no_response_time_sec = 999999 
-        elif nnlib.device.backend == 'plaidML':
-            no_response_time_sec = 600
-        else:
-            no_response_time_sec = 60
-            
-        super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec)
+        self.devices = ExtractSubprocessor.get_devices_for_config(self.type, device_config)
 
-    #override
-    def on_check_run(self):
-        if len(self.devices) == 0:
-            io.log_err("No devices found to start subprocessor.")
-            return False
-        return True
+        super().__init__('Extractor', ExtractSubprocessor.Cli,
+                             999999 if type == 'landmarks-manual' or DEBUG else 120)
 
     #override
     def on_clients_initialized(self):
-        if self.manual == True:
+        if self.type == 'landmarks-manual':
             self.wnd_name = 'Manual pass'
             io.named_window(self.wnd_name)
             io.capture_mouse(self.wnd_name)
@@ -346,7 +352,7 @@ class ExtractSubprocessor(Subprocessor):
 
     #override
     def on_clients_finalized(self):
-        if self.manual == True:
+        if self.type == 'landmarks-manual':
             io.destroy_all_windows()
 
         io.progress_bar_close()
@@ -357,8 +363,8 @@ class ExtractSubprocessor(Subprocessor):
                      'image_size': self.image_size,
                      'face_type': self.face_type,
                      'max_faces_from_image':self.max_faces_from_image,
-                     'debug_dir': self.debug_dir,
-                     'final_output_dir': str(self.final_output_path),
+                     'output_debug_path': self.output_debug_path,
+                     'final_output_path': self.final_output_path,
                      'stdin_fd': sys.stdin.fileno() }
 
 
@@ -371,15 +377,12 @@ class ExtractSubprocessor(Subprocessor):
 
     #override
     def get_data(self, host_dict):
-        if not self.manual:
-            if len (self.input_data) > 0:
-                return self.input_data.pop(0)
-        else:
+        if self.type == 'landmarks-manual':
             need_remark_face = False
             redraw_needed = False
             while len (self.input_data) > 0:
                 data = self.input_data[0]
-                filename, data_rects, data_landmarks = data.filename, data.rects, data.landmarks
+                filepath, data_rects, data_landmarks = data.filepath, data.rects, data.landmarks
                 is_frame_done = False
 
                 if need_remark_face: # need remark image from input data that already has a marked face?
@@ -396,21 +399,21 @@ class ExtractSubprocessor(Subprocessor):
                         self.y = ( self.rect[1] + self.rect[3] ) / 2
 
                 if len(data_rects) == 0:
-                    if self.cache_original_image[0] == filename:
+                    if self.cache_original_image[0] == filepath:
                         self.original_image = self.cache_original_image[1]
                     else:
-                        self.original_image = imagelib.normalize_channels( cv2_imread( filename ), 3 )
-                        
-                        self.cache_original_image = (filename, self.original_image )
+                        self.original_image = imagelib.normalize_channels( cv2_imread( filepath ), 3 )
+
+                        self.cache_original_image = (filepath, self.original_image )
 
                     (h,w,c) = self.original_image.shape
                     self.view_scale = 1.0 if self.manual_window_size == 0 else self.manual_window_size / ( h * (16.0/9.0) )
 
-                    if self.cache_image[0] == (h,w,c) + (self.view_scale,filename):
+                    if self.cache_image[0] == (h,w,c) + (self.view_scale,filepath):
                         self.image = self.cache_image[1]
                     else:
                         self.image = cv2.resize (self.original_image, ( int(w*self.view_scale), int(h*self.view_scale) ), interpolation=cv2.INTER_LINEAR)
-                        self.cache_image = ( (h,w,c) + (self.view_scale,filename), self.image )
+                        self.cache_image = ( (h,w,c) + (self.view_scale,filepath), self.image )
 
                     (h,w,c) = self.image.shape
 
@@ -526,9 +529,9 @@ class ExtractSubprocessor(Subprocessor):
 
                             if redraw_needed:
                                 redraw_needed = False
-                                return ExtractSubprocessor.Data (filename, landmarks_accurate=self.landmarks_accurate)
+                                return ExtractSubprocessor.Data (filepath, landmarks_accurate=self.landmarks_accurate)
                             else:
-                                return ExtractSubprocessor.Data (filename, rects=[self.rect], landmarks_accurate=self.landmarks_accurate)
+                                return ExtractSubprocessor.Data (filepath, rects=[self.rect], landmarks_accurate=self.landmarks_accurate)
 
                 else:
                     is_frame_done = True
@@ -539,19 +542,22 @@ class ExtractSubprocessor(Subprocessor):
                     io.progress_bar_inc(1)
                     self.extract_needed = True
                     self.rect_locked = False
+        else:
+            if len (self.input_data) > 0:
+                return self.input_data.pop(0)
 
         return None
 
     #override
     def on_data_return (self, host_dict, data):
-        if not self.manual:
+        if not self.type != 'landmarks-manual':
             self.input_data.insert(0, data)
 
     #override
     def on_result (self, host_dict, data, result):
-        if self.manual == True:
-            filename, landmarks = result.filename, result.landmarks
-            
+        if self.type == 'landmarks-manual':
+            filepath, landmarks = result.filepath, result.landmarks
+
             if len(landmarks) != 0 and landmarks[0] is not None:
                 self.landmarks = landmarks[0]
 
@@ -596,56 +602,6 @@ class ExtractSubprocessor(Subprocessor):
     def get_result(self):
         return self.result
 
-    @staticmethod
-    def get_devices_for_config (manual, type, multi_gpu, cpu_only):
-        backend = nnlib.device.backend
-        if 'cpu' in backend:
-            cpu_only = True
-
-        if 'rects' in type or type == 'landmarks' or type == 'fanseg':
-            if not cpu_only and type == 'rects-mt' and backend == "plaidML": #plaidML works with MT very slowly
-                cpu_only = True
-
-            if not cpu_only:
-                devices = []
-                if not manual and multi_gpu:
-                    devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2)
-
-                if len(devices) == 0:
-                    idx = nnlib.device.getBestValidDeviceIdx()
-                    if idx != -1:
-                        devices = [idx]
-
-                if len(devices) == 0:
-                    cpu_only = True
-
-                result = []
-                for idx in devices:
-                    dev_name = nnlib.device.getDeviceName(idx)
-                    dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx)
-                    
-                    count = 1
-                    
-                    if not manual:
-                        if (type == 'rects-mt' ):
-                            count = int (max (1, dev_vram / 2) )
-                            
-                    if count == 1:
-                        result += [ (idx, 'GPU', dev_name, dev_vram) ]
-                    else:
-                        for i in range (count):
-                            result += [ (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram) ]
-
-                return result
-
-            if cpu_only:
-                if manual:
-                    return [ (0, 'CPU', 'CPU', 0 ) ]
-                else:
-                    return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ]
-
-        elif type == 'final':
-            return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in (range(min(8, multiprocessing.cpu_count())) if not DEBUG else [0]) ]
 
 class DeletedFilesSearcherSubprocessor(Subprocessor):
     class Cli(Subprocessor.Cli):
@@ -704,87 +660,100 @@ class DeletedFilesSearcherSubprocessor(Subprocessor):
     def get_result(self):
         return self.result
 
-def main(input_dir,
-         output_dir,
-         debug_dir=None,
-         detector='mt',
+def main(detector=None,
+         input_path=None,
+         output_path=None,
+         output_debug=None,
          manual_fix=False,
          manual_output_debug_fix=False,
          manual_window_size=1368,
          image_size=256,
          face_type='full_face',
          max_faces_from_image=0,
-         device_args={}):
-
-    input_path = Path(input_dir)
-    output_path = Path(output_dir)
+         cpu_only = False,
+         force_gpu_idxs = None,
+         ):
     face_type = FaceType.fromString(face_type)
 
-    multi_gpu = device_args.get('multi_gpu', False)
-    cpu_only = device_args.get('cpu_only', False)
-
     if not input_path.exists():
-        raise ValueError('Input directory not found. Please ensure it exists.')
+        io.log_err ('Input directory not found. Please ensure it exists.')
+        return
+
+    if detector is None:
+        io.log_info ("Choose detector type.")
+        io.log_info ("[0] S3FD")
+        io.log_info ("[1] manual")
+        detector = {0:'s3fd', 1:'manual'}[ io.input_int("", 0, [0,1]) ]
+
+    device_config = nn.DeviceConfig.GPUIndexes( force_gpu_idxs or nn.ask_choose_device_idxs(choose_only_one=detector=='manual', suggest_all_gpu=True) ) \
+                    if not cpu_only else nn.DeviceConfig.CPU()
+
+    output_debug_path = output_path.parent / (output_path.name + '_debug')
+
+    if output_debug is None:
+        output_debug = io.input_bool (f"Write debug images to {output_debug_path.name}?", False)
 
     if output_path.exists():
         if not manual_output_debug_fix and input_path != output_path:
-            output_images_paths = Path_utils.get_image_paths(output_path)
+            output_images_paths = pathex.get_image_paths(output_path)
             if len(output_images_paths) > 0:
-                io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False )
+                io.input(f"WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.")
                 for filename in output_images_paths:
                     Path(filename).unlink()
     else:
         output_path.mkdir(parents=True, exist_ok=True)
 
+    input_path_image_paths = pathex.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info)
+
     if manual_output_debug_fix:
-        if debug_dir is None:
-            raise ValueError('debug-dir must be specified')
-        detector = 'manual'
-        io.log_info('Performing re-extract frames which were deleted from _debug directory.')
+        if not output_debug_path.exists():
+            io.log_err(f'{output_debug_path} not found. Re-extract faces with "Write debug images" option.')
+            return
+        else:
+            detector = 'manual'
+            io.log_info('Performing re-extract frames which were deleted from _debug directory.')
 
-    input_path_image_paths = Path_utils.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info)
-    if debug_dir is not None:
-        debug_output_path = Path(debug_dir)
-
-        if manual_output_debug_fix:
-            if not debug_output_path.exists():
-                raise ValueError("%s not found " % ( str(debug_output_path) ))
-
-            input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, Path_utils.get_image_paths(debug_output_path) ).run()
+            input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, pathex.get_image_paths(output_debug_path) ).run()
             input_path_image_paths = sorted (input_path_image_paths)
             io.log_info('Found %d images.' % (len(input_path_image_paths)))
+    else:
+        if output_debug_path.exists():
+            for filename in pathex.get_image_paths(output_debug_path):
+                Path(filename).unlink()
         else:
-            if debug_output_path.exists():
-                for filename in Path_utils.get_image_paths(debug_output_path):
-                    Path(filename).unlink()
-            else:
-                debug_output_path.mkdir(parents=True, exist_ok=True)
+            output_debug_path.mkdir(parents=True, exist_ok=True)
 
     images_found = len(input_path_image_paths)
     faces_detected = 0
     if images_found != 0:
         if detector == 'manual':
             io.log_info ('Performing manual extract...')
-            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename, manual=True) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run()
+            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(Path(filename)) for filename in input_path_image_paths ], 'landmarks-manual', image_size, face_type, output_debug_path if output_debug else None, manual_window_size=manual_window_size, device_config=device_config).run()
+        
+            io.log_info ('Performing 3rd pass...')
+            data = ExtractSubprocessor (data, 'final', image_size, face_type, output_debug_path if output_debug else None, final_output_path=output_path, device_config=device_config).run()
+        
         else:
-            io.log_info ('Performing 1st pass...')
-            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, max_faces_from_image=max_faces_from_image).run()
-
-            io.log_info ('Performing 2nd pass...')
-            data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run()
-
-        io.log_info ('Performing 3rd pass...')
-        data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
+            io.log_info ('Extracting faces...')
+            data = ExtractSubprocessor ([ ExtractSubprocessor.Data(Path(filename)) for filename in input_path_image_paths ], 
+                                         'all', 
+                                         image_size, 
+                                         face_type, 
+                                         output_debug_path if output_debug else None, 
+                                         max_faces_from_image=max_faces_from_image, 
+                                         final_output_path=output_path,
+                                         device_config=device_config).run()
+            
         faces_detected += sum([d.faces_detected for d in data])
 
         if manual_fix:
             if all ( np.array ( [ d.faces_detected > 0 for d in data] ) == True ):
                 io.log_info ('All faces are detected, manual fix not needed.')
             else:
-                fix_data = [ ExtractSubprocessor.Data(d.filename, manual=True) for d in data if d.faces_detected == 0 ]
+                fix_data = [ ExtractSubprocessor.Data(d.filepath) for d in data if d.faces_detected == 0 ]
                 io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) )
-                fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run()
-                fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
+                fix_data = ExtractSubprocessor (fix_data, 'landmarks-manual', image_size, face_type, output_debug_path if output_debug else None, manual_window_size=manual_window_size, device_config=device_config).run()
+                fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, output_debug_path if output_debug else None, final_output_path=output_path, device_config=device_config).run()
                 faces_detected += sum([d.faces_detected for d in fix_data])
 
 
diff --git a/mainscripts/FacesetEnhancer.py b/mainscripts/FacesetEnhancer.py
index a63161a..eb4dc20 100644
--- a/mainscripts/FacesetEnhancer.py
+++ b/mainscripts/FacesetEnhancer.py
@@ -2,22 +2,23 @@ import multiprocessing
 import shutil
 
 from DFLIMG import *
-from interact import interact as io
-from joblib import Subprocessor
-from nnlib import nnlib
-from utils import Path_utils
-from utils.cv2_utils import *
+from core.interact import interact as io
+from core.joblib import Subprocessor
+from core.leras import nn
+from core import pathex
+from core.cv2ex import *
 
 
 class FacesetEnhancerSubprocessor(Subprocessor):
     
     #override
-    def __init__(self, image_paths, output_dirpath, multi_gpu=False, cpu_only=False):
+    def __init__(self, image_paths, output_dirpath, device_config):
         self.image_paths = image_paths
         self.output_dirpath = output_dirpath
         self.result = []
-        self.devices = FacesetEnhancerSubprocessor.get_devices_for_config(multi_gpu, cpu_only)
-   
+        self.nn_initialize_mp_lock = multiprocessing.Lock()
+        self.devices = FacesetEnhancerSubprocessor.get_devices_for_config(device_config)
+        
         super().__init__('FacesetEnhancer', FacesetEnhancerSubprocessor.Cli, 600)
 
     #override
@@ -30,7 +31,8 @@ class FacesetEnhancerSubprocessor(Subprocessor):
         
     #override
     def process_info_generator(self):
-        base_dict = {'output_dirpath':self.output_dirpath}
+        base_dict = {'output_dirpath':self.output_dirpath,
+                     'nn_initialize_mp_lock': self.nn_initialize_mp_lock,}
 
         for (device_idx, device_type, device_name, device_total_vram_gb) in self.devices:
             client_dict = base_dict.copy()
@@ -59,37 +61,13 @@ class FacesetEnhancerSubprocessor(Subprocessor):
         return self.result
                    
     @staticmethod
-    def get_devices_for_config (multi_gpu, cpu_only):
-        backend = nnlib.device.backend
-        if 'cpu' in backend:
-            cpu_only = True
-
-        if not cpu_only and backend == "plaidML":
-            cpu_only = True
-
-        if not cpu_only:
-            devices = []
-            if multi_gpu:
-                devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2)
-
-            if len(devices) == 0:
-                idx = nnlib.device.getBestValidDeviceIdx()
-                if idx != -1:
-                    devices = [idx]
-
-            if len(devices) == 0:
-                cpu_only = True
-
-            result = []
-            for idx in devices:
-                dev_name = nnlib.device.getDeviceName(idx)
-                dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx)
-
-                result += [ (idx, 'GPU', dev_name, dev_vram) ]
-                
-            return result
-
-        if cpu_only:
+    def get_devices_for_config (device_config):        
+        devices = device_config.devices
+        cpu_only = len(devices) == 0
+                    
+        if not cpu_only:  
+            return [ (device.index, 'GPU', device.name, device.total_mem_gb) for device in devices ]
+        else:
             return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ]
     
     class Cli(Subprocessor.Cli):
@@ -99,20 +77,23 @@ class FacesetEnhancerSubprocessor(Subprocessor):
             device_idx   = client_dict['device_idx']
             cpu_only     = client_dict['device_type'] == 'CPU'
             self.output_dirpath = client_dict['output_dirpath']
-            
-            device_config = nnlib.DeviceConfig ( cpu_only=cpu_only, force_gpu_idx=device_idx, allow_growth=True)
-            nnlib.import_all (device_config)
-            
-            device_vram = device_config.gpu_vram_gb[0]
+            nn_initialize_mp_lock = client_dict['nn_initialize_mp_lock']
+
+            if cpu_only:
+                device_config = nn.DeviceConfig.CPU()
+                device_vram = 99
+            else:
+                device_config = nn.DeviceConfig.GPUIndexes ([device_idx])
+                device_vram = device_config.devices[0].total_mem_gb
+                
+            nn.initialize (device_config)           
 
             intro_str = 'Running on %s.' % (client_dict['device_name'])
-            if not cpu_only and device_vram <= 2:
-                intro_str += " Recommended to close all programs using this device."
-
+            
             self.log_info (intro_str)
 
-            from facelib import FaceEnhancer
-            self.fe = FaceEnhancer()
+            from facelib import FaceEnhancer            
+            self.fe = FaceEnhancer( place_model_on_cpu=(device_vram<=2) )
 
         #override
         def process_data(self, filepath):
@@ -137,7 +118,10 @@ class FacesetEnhancerSubprocessor(Subprocessor):
         
             return (0, filepath, None)
             
-def process_folder ( dirpath, multi_gpu=False, cpu_only=False ):
+def process_folder ( dirpath, cpu_only=False, force_gpu_idxs=None ):
+    device_config = nn.DeviceConfig.GPUIndexes( force_gpu_idxs or nn.ask_choose_device_idxs(suggest_all_gpu=True) ) \
+                    if not cpu_only else nn.DeviceConfig.CPU()
+                    
     output_dirpath = dirpath.parent / (dirpath.name + '_enhanced')
     output_dirpath.mkdir (exist_ok=True, parents=True)
             
@@ -146,15 +130,15 @@ def process_folder ( dirpath, multi_gpu=False, cpu_only=False ):
     io.log_info (f"Enhancing faceset in {dirpath_parts}")
     io.log_info ( f"Processing to {output_dirpath_parts}")
 
-    output_images_paths = Path_utils.get_image_paths(output_dirpath)
+    output_images_paths = pathex.get_image_paths(output_dirpath)
     if len(output_images_paths) > 0:
         for filename in output_images_paths:
             Path(filename).unlink()
     
-    image_paths = [Path(x) for x in Path_utils.get_image_paths( dirpath )]    
-    result = FacesetEnhancerSubprocessor ( image_paths, output_dirpath, multi_gpu=multi_gpu, cpu_only=cpu_only).run()
+    image_paths = [Path(x) for x in pathex.get_image_paths( dirpath )]    
+    result = FacesetEnhancerSubprocessor ( image_paths, output_dirpath, device_config=device_config).run()
 
-    is_merge = io.input_bool (f"\r\nMerge {output_dirpath_parts} to {dirpath_parts} ? (y/n skip:y) : ", True)
+    is_merge = io.input_bool (f"\r\nMerge {output_dirpath_parts} to {dirpath_parts} ?", True)
     if is_merge:
         io.log_info (f"Copying processed files to {dirpath_parts}")
         
diff --git a/mainscripts/FacesetRelighter.py b/mainscripts/FacesetRelighter.py
deleted file mode 100644
index 238ab74..0000000
--- a/mainscripts/FacesetRelighter.py
+++ /dev/null
@@ -1,263 +0,0 @@
-import traceback
-from pathlib import Path
-
-import imagelib
-from interact import interact as io
-from nnlib import DeepPortraitRelighting
-from utils import Path_utils
-from utils.cv2_utils import *
-from DFLIMG import *
-
-class RelightEditor:
-    def __init__(self, image_paths, dpr, lighten):
-        self.image_paths = image_paths
-        self.dpr = dpr
-        self.lighten = lighten
-
-        self.current_img_path = None
-        self.current_img = None
-        self.current_img_shape = None
-        self.pick_new_face()
-
-        self.alt_azi_ar = [ [0,0,1.0] ]
-        self.alt_azi_cur = 0
-
-        self.mouse_x = self.mouse_y = 9999
-        self.screen_status_block = None
-        self.screen_status_block_dirty = True
-        self.screen_changed = True
-
-    def pick_new_face(self):
-        self.current_img_path = self.image_paths[ np.random.randint(len(self.image_paths)) ]
-        self.current_img = cv2_imread (str(self.current_img_path))
-        self.current_img_shape = self.current_img.shape
-        self.set_screen_changed()
-
-    def set_screen_changed(self):
-        self.screen_changed = True
-
-    def switch_screen_changed(self):
-        result = self.screen_changed
-        self.screen_changed = False
-        return result
-
-    def make_screen(self):
-        alt,azi,inten=self.alt_azi_ar[self.alt_azi_cur]
-
-        img = self.dpr.relight (self.current_img, alt, azi, inten, self.lighten)
-
-        h,w,c = img.shape
-
-        lines = ['Pick light directions for whole faceset.',
-                 '[q]-new test face',
-                 '[w][e]-navigate',
-                 '[a][s]-intensity',
-                 '[r]-new [t]-delete [enter]-process',
-                 '']
-
-        for i, (alt,azi,inten) in enumerate(self.alt_azi_ar):
-            s = '>:' if self.alt_azi_cur == i else ' :'
-            s += f'alt=[{ int(alt):03}] azi=[{ int(azi):03}] int=[{inten:01.1f}]'
-            lines += [ s ]
-
-        lines_count = len(lines)
-        h_line = 16
-
-        sh = lines_count * h_line
-        sw = 400
-        sc = c
-        status_img = np.ones ( (sh,sw,sc) ) * 0.1
-
-        for i in range(lines_count):
-            status_img[ i*h_line:(i+1)*h_line, 0:sw] += \
-                imagelib.get_text_image (  (h_line,sw,c), lines[i], color=[0.8]*c )
-
-        status_img = np.clip(status_img*255, 0, 255).astype(np.uint8)
-
-        #combine screens
-        if sh > h:
-            img = np.concatenate ([img, np.zeros( (sh-h,w,c), dtype=img.dtype ) ], axis=0)
-        elif h > sh:
-            status_img = np.concatenate ([status_img, np.zeros( (h-sh,sw,sc), dtype=img.dtype ) ], axis=0)
-
-        img = np.concatenate ([img, status_img], axis=1)
-
-        return img
-
-    def run(self):
-        wnd_name = "Relighter"
-        io.named_window(wnd_name)
-        io.capture_keys(wnd_name)
-        io.capture_mouse(wnd_name)
-
-        zoom_factor = 1.0
-
-        is_angle_editing = False
-
-        is_exit = False
-        while not is_exit:
-            io.process_messages(0.0001)
-
-            mouse_events = io.get_mouse_events(wnd_name)
-            for ev in mouse_events:
-                (x, y, ev, flags) = ev
-                if ev == io.EVENT_LBUTTONDOWN:
-                    is_angle_editing = True
-
-                if ev == io.EVENT_LBUTTONUP:
-                    is_angle_editing = False
-
-                if is_angle_editing:
-                    h,w,c = self.current_img_shape
-                    
-                    alt,azi,inten = self.alt_azi_ar[self.alt_azi_cur]
-                    alt = np.clip ( ( 0.5-y/w )*2.0,   -1, 1)*90
-                    azi = np.clip ( (x / h - 0.5)*2.0, -1, 1)*90
-                    self.alt_azi_ar[self.alt_azi_cur] = (alt,azi,inten)
-                        
-
-                    self.set_screen_changed()
-
-            key_events = io.get_key_events(wnd_name)
-            key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False)
-
-            if key != 0:
-                if chr_key == 'q':
-                    self.pick_new_face()
-                elif chr_key == 'w':
-                    self.alt_azi_cur = np.clip (self.alt_azi_cur-1, 0, len(self.alt_azi_ar)-1)
-                    self.set_screen_changed()
-                elif chr_key == 'e':
-                    self.alt_azi_cur = np.clip (self.alt_azi_cur+1, 0, len(self.alt_azi_ar)-1)
-                    self.set_screen_changed()
-                elif chr_key == 'r':
-                    #add direction
-                    self.alt_azi_ar += [ [0,0,1.0] ]
-                    self.alt_azi_cur +=1
-                    self.set_screen_changed()
-                elif chr_key == 't':
-                    if len(self.alt_azi_ar) > 1:
-                        self.alt_azi_ar.pop(self.alt_azi_cur)
-                        self.alt_azi_cur = np.clip (self.alt_azi_cur, 0, len(self.alt_azi_ar)-1)
-                        self.set_screen_changed()
-                elif chr_key == 'a':
-                    alt,azi,inten = self.alt_azi_ar[self.alt_azi_cur]
-                    inten = np.clip ( inten-0.1, 0.0, 1.0)
-                    self.alt_azi_ar[self.alt_azi_cur] = (alt,azi,inten)
-                    self.set_screen_changed()
-                elif chr_key == 's':
-                    alt,azi,inten = self.alt_azi_ar[self.alt_azi_cur]
-                    inten = np.clip ( inten+0.1, 0.0, 1.0)
-                    self.alt_azi_ar[self.alt_azi_cur] = (alt,azi,inten)
-                    self.set_screen_changed()
-                elif key == 27 or chr_key == '\r' or chr_key == '\n': #esc
-                    is_exit = True
-
-            if self.switch_screen_changed():
-                screen = self.make_screen()
-                if zoom_factor != 1.0:
-                    h,w,c = screen.shape
-                    screen = cv2.resize ( screen, ( int(w*zoom_factor), int(h*zoom_factor) ) )
-                io.show_image (wnd_name, screen )
-
-        io.destroy_window(wnd_name)
-
-        return self.alt_azi_ar
-
-def relight(input_dir, lighten=None, random_one=None):
-    if lighten is None:
-        lighten = io.input_bool ("Lighten the faces? ( y/n default:n ?:help ) : ", False, help_message="Lighten the faces instead of shadow. May produce artifacts." )
-
-    if io.is_colab():
-        io.log_info("In colab version you cannot choose light directions manually.")
-        manual = False
-    else:
-        manual = io.input_bool ("Choose light directions manually? ( y/n default:y ) : ", True)
-
-    if not manual:
-        if random_one is None:
-            random_one = io.input_bool ("Relight the faces only with one random direction and random intensity? ( y/n default:y ?:help) : ", True, help_message="Otherwise faceset will be relighted with predefined 7 light directions but with random intensity.")
-
-    image_paths = [Path(x) for x in Path_utils.get_image_paths(input_dir)]
-    filtered_image_paths = []
-    for filepath in io.progress_bar_generator(image_paths, "Collecting fileinfo"):
-        try:
-            dflimg = DFLIMG.load (Path(filepath))
-
-            if dflimg is None:
-                io.log_err ("%s is not a dfl image file" % (filepath.name) )
-            else:
-                if not dflimg.get_relighted():
-                    filtered_image_paths += [filepath]
-        except:
-            io.log_err (f"Exception occured while processing file {filepath.name}. Error: {traceback.format_exc()}")
-    image_paths = filtered_image_paths
-
-    if len(image_paths) == 0:
-        io.log_info("No files to process.")
-        return
-
-    dpr = DeepPortraitRelighting()
-
-    if manual:
-        alt_azi_ar = RelightEditor(image_paths, dpr, lighten).run()
-        
-    for filepath in io.progress_bar_generator(image_paths, "Relighting"):
-        try:
-            dflimg = DFLIMG.load ( Path(filepath) )
-            if dflimg is None:
-                io.log_err ("%s is not a dfl image file" % (filepath.name) )
-                continue
-            else:
-                if dflimg.get_relighted():
-                    continue
-                img = cv2_imread (str(filepath))
-
-                if random_one:
-                    alt = np.random.randint(-90,91)
-                    azi = np.random.randint(-90,91)
-                    inten = np.random.random()*0.3+0.3
-                    relighted_imgs = [dpr.relight(img,alt=alt,azi=azi,intensity=inten,lighten=lighten)]
-                else:
-                    if not manual and not random_one:
-                        inten = np.random.random()*0.3+0.3
-                        alt_azi_ar = [(60,0,inten), (60,60,inten), (0,60,inten), (-60,60,inten), (-60,0,inten), (-60,-60,inten), (0,-60,inten), (60,-60,inten)]
-                    
-                    relighted_imgs = [dpr.relight(img,alt=alt,azi=azi,intensity=inten,lighten=lighten) for (alt,azi,inten) in alt_azi_ar ]
-
-                i = 0
-                for i,relighted_img in enumerate(relighted_imgs):
-                    im_flags = []
-                    if filepath.suffix == '.jpg':
-                        im_flags += [int(cv2.IMWRITE_JPEG_QUALITY), 100]
-
-                    while True:
-                        relighted_filepath = filepath.parent / (filepath.stem+f'_relighted_{i}'+filepath.suffix)
-                        if not relighted_filepath.exists():
-                            break
-                        i += 1
-
-                    cv2_imwrite (relighted_filepath, relighted_img )
-                    
-                    dflimg.remove_source_filename()
-                    dflimg.embed_and_set (relighted_filepath, relighted=True )
-        except:
-            io.log_err (f"Exception occured while processing file {filepath.name}. Error: {traceback.format_exc()}")
-
-def delete_relighted(input_dir):
-    input_path = Path(input_dir)
-    image_paths = [Path(x) for x in Path_utils.get_image_paths(input_path)]
-
-    files_to_delete = []
-    for filepath in io.progress_bar_generator(image_paths, "Loading"):
-        dflimg = DFLIMG.load ( Path(filepath) )
-
-        if dflimg is None:
-            io.log_err ("%s is not a dfl image file" % (filepath.name) )
-            continue
-        else:
-            if dflimg.get_relighted():
-                files_to_delete += [filepath]
-
-    for file in io.progress_bar_generator(files_to_delete, "Deleting"):
-        file.unlink()
diff --git a/mainscripts/MaskEditorTool.py b/mainscripts/MaskEditorTool.py
index 125926f..4eee26f 100644
--- a/mainscripts/MaskEditorTool.py
+++ b/mainscripts/MaskEditorTool.py
@@ -8,13 +8,13 @@ import cv2
 import numpy as np
 import numpy.linalg as npl
 
-import imagelib
+from core import imagelib
 from DFLIMG import *
 from facelib import LandmarksProcessor
-from imagelib import IEPolys
-from interact import interact as io
-from utils import Path_utils
-from utils.cv2_utils import *
+from core.imagelib import IEPolys
+from core.interact import interact as io
+from core import pathex
+from core.cv2ex import *
 
 
 class MaskEditor:
@@ -343,7 +343,7 @@ def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None, no_default
         skipped_path.mkdir(parents=True)
         
     if not no_default_mask:
-        eyebrows_expand_mod = np.clip ( io.input_int ("Default eyebrows expand modifier? (0..400, skip:100) : ", 100), 0, 400 ) / 100.0
+        eyebrows_expand_mod = np.clip ( io.input_int ("Default eyebrows expand modifier?", 100, add_info="0..400"), 0, 400 ) / 100.0
     else:
         eyebrows_expand_mod = None
 
@@ -354,7 +354,7 @@ def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None, no_default
 
     cached_images = {}
 
-    image_paths = [ Path(x) for x in Path_utils.get_image_paths(input_path)]
+    image_paths = [ Path(x) for x in pathex.get_image_paths(input_path)]
     done_paths = []
     done_images_types = {}
     image_paths_total = len(image_paths)
diff --git a/mainscripts/Converter.py b/mainscripts/Merger.py
similarity index 63%
rename from mainscripts/Converter.py
rename to mainscripts/Merger.py
index 614a5a0..ba6af29 100644
--- a/mainscripts/Converter.py
+++ b/mainscripts/Merger.py
@@ -13,23 +13,23 @@ import cv2
 import numpy as np
 import numpy.linalg as npla
 
-import imagelib
+from core import imagelib
 import samplelib
-from converters import (ConverterConfig, ConvertFaceAvatar, ConvertMasked,
-                        FrameInfo)
-from facelib import FaceType, LandmarksProcessor
-from interact import interact as io
-from joblib import SubprocessFunctionCaller, Subprocessor
-from nnlib import TernausNet
-from utils import Path_utils
-from utils.cv2_utils import *
+from merger import (MergerConfig, MergeFaceAvatar, MergeMasked,
+                       FrameInfo)
 from DFLIMG import DFLIMG
+from facelib import FaceEnhancer, FaceType, LandmarksProcessor, TernausNet
+from core.interact import interact as io
+from core.joblib import SubprocessFunctionCaller, Subprocessor
+from core.leras import nn
+from core import pathex
+from core.cv2ex import *
 
-from .ConverterScreen import Screen, ScreenManager
+from .MergerScreen import Screen, ScreenManager
 
-CONVERTER_DEBUG = False
+MERGER_DEBUG = False
 
-class ConvertSubprocessor(Subprocessor):
+class MergeSubprocessor(Subprocessor):
 
     class Frame(object):
         def __init__(self, prev_temporal_frame_infos=None,
@@ -76,18 +76,14 @@ class ConvertSubprocessor(Subprocessor):
             self.predictor_func = client_dict['predictor_func']
             self.predictor_input_shape = client_dict['predictor_input_shape']
             self.superres_func = client_dict['superres_func']
+            self.fanseg_input_size = client_dict['fanseg_input_size']
+            self.fanseg_extract_func = client_dict['fanseg_extract_func']
 
             #transfer and set stdin in order to work code.interact in debug subprocess
             stdin_fd         = client_dict['stdin_fd']
             if stdin_fd is not None:
                 sys.stdin = os.fdopen(stdin_fd)
 
-            from nnlib import nnlib
-            #model process ate all GPU mem,
-            #so we cannot use GPU for any TF operations in converter processes
-            #therefore forcing active_DeviceConfig to CPU only
-            nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True)
-
             def blursharpen_func (img, sharpen_mode=0, kernel_size=3, amount=100):
                 if kernel_size % 2 == 0:
                     kernel_size += 1
@@ -118,37 +114,6 @@ class ConvertSubprocessor(Subprocessor):
                 return img
             self.blursharpen_func = blursharpen_func
 
-            self.fanseg_by_face_type = {}
-            self.fanseg_input_size = 256
-
-            def fanseg_extract(face_type, *args, **kwargs):
-                fanseg = self.fanseg_by_face_type.get(face_type, None)
-                if self.fanseg_by_face_type.get(face_type, None) is None:
-                    fanseg = TernausNet("FANSeg", self.fanseg_input_size , FaceType.toString( face_type ) )
-                    self.fanseg_by_face_type[face_type] = fanseg
-
-                return fanseg.extract(*args, **kwargs)
-
-            self.fanseg_extract_func = fanseg_extract
-
-            self.fanchq_by_face_type = {}
-            self.fanchq_input_size = 256
-            def fanchq_extract(face_type, *args, **kwargs):
-                fanchq = self.fanchq_by_face_type.get(face_type, None)
-                if self.fanchq_by_face_type.get(face_type, None) is None:
-                    fanchq = TernausNet("FANCHQ", self.fanchq_input_size , FaceType.toString( face_type ) )
-                    self.fanchq_by_face_type[face_type] = fanchq
-
-                return fanchq.extract(*args, **kwargs)
-
-            self.fanchq_extract_func = fanchq_extract
-
-            import ebsynth
-            def ebs_ct(*args, **kwargs):
-                return ebsynth.color_transfer(*args, **kwargs)
-
-            self.ebs_ct_func = ebs_ct
-
             return None
 
         #override
@@ -156,7 +121,6 @@ class ConvertSubprocessor(Subprocessor):
             cfg = pf.cfg.copy()
             cfg.blursharpen_func = self.blursharpen_func
             cfg.superres_func = self.superres_func
-            cfg.ebs_ct_func = self.ebs_ct_func
 
             frame_info = pf.frame_info
 
@@ -169,15 +133,15 @@ class ConvertSubprocessor(Subprocessor):
 
             if len(landmarks_list) == 0:
                 self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) )
-                
+
                 if cfg.export_mask_alpha:
                     img_bgr = cv2_imread(filename)
                     h,w,c = img_bgr.shape
                     if c == 1:
-                        img_bgr = np.repeat(img_bgr, 3, -1)                        
+                        img_bgr = np.repeat(img_bgr, 3, -1)
                     if c == 3:
                         img_bgr = np.concatenate ([img_bgr,  np.zeros((h,w,1), dtype=img_bgr.dtype) ], axis=-1)
-                                            
+
                     cv2_imwrite (output_filename, img_bgr)
                 else:
                     if filename_path.suffix == '.png':
@@ -190,23 +154,21 @@ class ConvertSubprocessor(Subprocessor):
                     img_bgr = cv2_imread(filename)
                     pf.image = img_bgr
             else:
-                if cfg.type == ConverterConfig.TYPE_MASKED:
+                if cfg.type == MergerConfig.TYPE_MASKED:
                     cfg.fanseg_input_size = self.fanseg_input_size
                     cfg.fanseg_extract_func = self.fanseg_extract_func
-                    cfg.fanchq_input_size = self.fanchq_input_size
-                    cfg.fanchq_extract_func = self.fanchq_extract_func
 
                     try:
-                        final_img = ConvertMasked (self.predictor_func, self.predictor_input_shape, cfg, frame_info)
+                        final_img = MergeMasked (self.predictor_func, self.predictor_input_shape, cfg, frame_info)
                     except Exception as e:
                         e_str = traceback.format_exc()
                         if 'MemoryError' in e_str:
                             raise Subprocessor.SilenceException
                         else:
-                            raise Exception( 'Error while converting file [%s]: %s' % (filename, e_str) )
+                            raise Exception( 'Error while merging file [%s]: %s' % (filename, e_str) )
 
-                elif cfg.type == ConverterConfig.TYPE_FACE_AVATAR:
-                    final_img = ConvertFaceAvatar (self.predictor_func, self.predictor_input_shape,
+                elif cfg.type == MergerConfig.TYPE_FACE_AVATAR:
+                    final_img = MergeFaceAvatar (self.predictor_func, self.predictor_input_shape,
                                                    cfg, pf.prev_temporal_frame_infos,
                                                         pf.frame_info,
                                                         pf.next_temporal_frame_infos )
@@ -225,32 +187,43 @@ class ConvertSubprocessor(Subprocessor):
             return pf.frame_info.filename
 
     #override
-    def __init__(self, is_interactive, converter_session_filepath, predictor_func, predictor_input_shape, converter_config, frames, output_path, model_iter):
+    def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, output_path, model_iter):
         if len (frames) == 0:
             raise ValueError ("len (frames) == 0")
 
-        super().__init__('Converter', ConvertSubprocessor.Cli, 86400 if CONVERTER_DEBUG else 60, io_loop_sleep_time=0.001, initialize_subprocesses_in_serial=False)
+        super().__init__('Merger', MergeSubprocessor.Cli, 86400 if MERGER_DEBUG else 60, io_loop_sleep_time=0.001)
 
         self.is_interactive = is_interactive
-        self.converter_session_filepath = Path(converter_session_filepath)
-        self.converter_config = converter_config
-
-        #dummy predict and sleep, tensorflow caching kernels. If remove it, sometime conversion speed can be x2 slower
-        predictor_func (dummy_predict=True)
-        time.sleep(2)
+        self.merger_session_filepath = Path(merger_session_filepath)
+        self.merger_config = merger_config
 
         self.predictor_func_host, self.predictor_func = SubprocessFunctionCaller.make_pair(predictor_func)
         self.predictor_input_shape = predictor_input_shape
 
-        self.dcscn = None
-        self.ranksrgan = None
-        def superres_func(mode, *args, **kwargs):
+        self.face_enhancer = None
+        def superres_func(mode, face_bgr):
             if mode == 1:
-                if self.ranksrgan is None:
-                    self.ranksrgan = imagelib.RankSRGAN()
-                return self.ranksrgan.upscale(*args, **kwargs)
+                if self.face_enhancer is None:
+                    self.face_enhancer = FaceEnhancer(place_model_on_cpu=True)
 
-        self.dcscn_host, self.superres_func = SubprocessFunctionCaller.make_pair(superres_func)
+                return self.face_enhancer.enhance (face_bgr, is_tanh=True, preserve_size=False)
+
+        self.superres_host, self.superres_func = SubprocessFunctionCaller.make_pair(superres_func)
+
+        self.fanseg_by_face_type = {}
+        self.fanseg_input_size = 256
+        def fanseg_extract_func(face_type, *args, **kwargs):
+            fanseg = self.fanseg_by_face_type.get(face_type, None)
+            if self.fanseg_by_face_type.get(face_type, None) is None:
+                cpu_only = len(nn.getCurrentDeviceConfig().devices) == 0
+
+                with nn.tf.device('/CPU:0' if cpu_only else '/GPU:0'):
+                    fanseg = TernausNet("FANSeg", self.fanseg_input_size , FaceType.toString( face_type ), place_model_on_cpu=True )
+
+                self.fanseg_by_face_type[face_type] = fanseg
+            return fanseg.extract(*args, **kwargs)
+
+        self.fanseg_host, self.fanseg_extract_func = SubprocessFunctionCaller.make_pair(fanseg_extract_func)
 
         self.output_path = output_path
         self.model_iter = model_iter
@@ -258,11 +231,11 @@ class ConvertSubprocessor(Subprocessor):
         self.prefetch_frame_count = self.process_count = min(6,multiprocessing.cpu_count())
 
         session_data = None
-        if self.is_interactive and self.converter_session_filepath.exists():
+        if self.is_interactive and self.merger_session_filepath.exists():
 
-            if io.input_bool ("Use saved session? (y/n skip:y) : ", True):
+            if io.input_bool ("Use saved session?", True):
                 try:
-                    with open( str(self.converter_session_filepath), "rb") as f:
+                    with open( str(self.merger_session_filepath), "rb") as f:
                         session_data = pickle.loads(f.read())
                 except Exception as e:
                     pass
@@ -293,12 +266,12 @@ class ConvertSubprocessor(Subprocessor):
                         break
 
             if frames_equal:
-                io.log_info ('Using saved session from ' + '/'.join (self.converter_session_filepath.parts[-2:]) )
+                io.log_info ('Using saved session from ' + '/'.join (self.merger_session_filepath.parts[-2:]) )
 
                 for frame in s_frames:
                     if frame.cfg is not None:
-                        #recreate ConverterConfig class using constructor with get_config() as dict params
-                        #so if any new param will be added, old converter session will work properly
+                        #recreate MergerConfig class using constructor with get_config() as dict params
+                        #so if any new param will be added, old merger session will work properly
                         frame.cfg = frame.cfg.__class__( **frame.cfg.get_config() )
 
                 self.frames = s_frames
@@ -328,10 +301,10 @@ class ConvertSubprocessor(Subprocessor):
                 session_data = None
 
         if session_data is None:
-            for filename in Path_utils.get_image_paths(self.output_path): #remove all images in output_path
+            for filename in pathex.get_image_paths(self.output_path): #remove all images in output_path
                 Path(filename).unlink()
 
-            frames[0].cfg = self.converter_config.copy()
+            frames[0].cfg = self.merger_config.copy()
 
         for i in range( len(self.frames) ):
             frame = self.frames[i]
@@ -342,7 +315,7 @@ class ConvertSubprocessor(Subprocessor):
 
     #override
     def process_info_generator(self):
-        r = [0] if CONVERTER_DEBUG else range(self.process_count)
+        r = [0] if MERGER_DEBUG else range(self.process_count)
 
         for i in r:
             yield 'CPU%d' % (i), {}, {'device_idx': i,
@@ -350,28 +323,68 @@ class ConvertSubprocessor(Subprocessor):
                                       'predictor_func': self.predictor_func,
                                       'predictor_input_shape' : self.predictor_input_shape,
                                       'superres_func': self.superres_func,
-                                      'stdin_fd': sys.stdin.fileno() if CONVERTER_DEBUG else None
+                                      'fanseg_input_size' : self.fanseg_input_size,
+                                      'fanseg_extract_func' : self.fanseg_extract_func,
+                                      'stdin_fd': sys.stdin.fileno() if MERGER_DEBUG else None
                                       }
 
     #overridable optional
     def on_clients_initialized(self):
-        io.progress_bar ("Converting", len (self.frames_idxs), initial=len(self.frames_done_idxs) )
+        io.progress_bar ("Merging", len (self.frames_idxs), initial=len(self.frames_done_idxs) )
 
         self.process_remain_frames = not self.is_interactive
         self.is_interactive_quitting = not self.is_interactive
 
         if self.is_interactive:
             help_images = {
-                    ConverterConfig.TYPE_MASKED :      cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_converter_masked.jpg') ),
-                    ConverterConfig.TYPE_FACE_AVATAR : cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_converter_face_avatar.jpg') ),
+                    MergerConfig.TYPE_MASKED :      cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_merger_masked.jpg') ),
+                    MergerConfig.TYPE_FACE_AVATAR : cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_merger_face_avatar.jpg') ),
                 }
 
             self.main_screen = Screen(initial_scale_to_width=1368, image=None, waiting_icon=True)
-            self.help_screen = Screen(initial_scale_to_height=768, image=help_images[self.converter_config.type], waiting_icon=False)
-            self.screen_manager = ScreenManager( "Converter", [self.main_screen, self.help_screen], capture_keys=True )
+            self.help_screen = Screen(initial_scale_to_height=768, image=help_images[self.merger_config.type], waiting_icon=False)
+            self.screen_manager = ScreenManager( "Merger", [self.main_screen, self.help_screen], capture_keys=True )
             self.screen_manager.set_current (self.help_screen)
             self.screen_manager.show_current()
 
+            self.masked_keys_funcs = {
+                    '`' : lambda cfg,shift_pressed: cfg.set_mode(0),
+                    '1' : lambda cfg,shift_pressed: cfg.set_mode(1),
+                    '2' : lambda cfg,shift_pressed: cfg.set_mode(2),
+                    '3' : lambda cfg,shift_pressed: cfg.set_mode(3),
+                    '4' : lambda cfg,shift_pressed: cfg.set_mode(4),
+                    '5' : lambda cfg,shift_pressed: cfg.set_mode(5),
+                    '6' : lambda cfg,shift_pressed: cfg.set_mode(6),
+                    '7' : lambda cfg,shift_pressed: cfg.set_mode(7),
+                    '8' : lambda cfg,shift_pressed: cfg.set_mode(8),
+                    'q' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(1 if not shift_pressed else 5),
+                    'a' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(-1 if not shift_pressed else -5),
+                    'w' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(1 if not shift_pressed else 5),
+                    's' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(-1 if not shift_pressed else -5),
+                    'e' : lambda cfg,shift_pressed: cfg.add_blur_mask_modifier(1 if not shift_pressed else 5),
+                    'd' : lambda cfg,shift_pressed: cfg.add_blur_mask_modifier(-1 if not shift_pressed else -5),
+                    'r' : lambda cfg,shift_pressed: cfg.add_motion_blur_power(1 if not shift_pressed else 5),
+                    'f' : lambda cfg,shift_pressed: cfg.add_motion_blur_power(-1 if not shift_pressed else -5),
+                    'y' : lambda cfg,shift_pressed: cfg.add_blursharpen_amount(1 if not shift_pressed else 5),
+                    'h' : lambda cfg,shift_pressed: cfg.add_blursharpen_amount(-1 if not shift_pressed else -5),
+                    'u' : lambda cfg,shift_pressed: cfg.add_output_face_scale(1 if not shift_pressed else 5),
+                    'j' : lambda cfg,shift_pressed: cfg.add_output_face_scale(-1 if not shift_pressed else -5),
+                    'i' : lambda cfg,shift_pressed: cfg.add_image_denoise_power(1 if not shift_pressed else 5),
+                    'k' : lambda cfg,shift_pressed: cfg.add_image_denoise_power(-1 if not shift_pressed else -5),
+                    'o' : lambda cfg,shift_pressed: cfg.add_bicubic_degrade_power(1 if not shift_pressed else 5),
+                    'l' : lambda cfg,shift_pressed: cfg.add_bicubic_degrade_power(-1 if not shift_pressed else -5),
+                    'p' : lambda cfg,shift_pressed: cfg.add_color_degrade_power(1 if not shift_pressed else 5),
+                    ';' : lambda cfg,shift_pressed: cfg.add_color_degrade_power(-1),
+                    ':' : lambda cfg,shift_pressed: cfg.add_color_degrade_power(-5),
+                    'z' : lambda cfg,shift_pressed: cfg.toggle_masked_hist_match(),
+                    'x' : lambda cfg,shift_pressed: cfg.toggle_mask_mode(),
+                    'c' : lambda cfg,shift_pressed: cfg.toggle_color_transfer_mode(),
+                    'v' : lambda cfg,shift_pressed: cfg.toggle_super_resolution_mode(),
+                    'b' : lambda cfg,shift_pressed: cfg.toggle_export_mask_alpha(),
+                    'n' : lambda cfg,shift_pressed: cfg.toggle_sharpen_mode(),
+                    }
+            self.masked_keys = list(self.masked_keys_funcs.keys())
+
     #overridable optional
     def on_clients_finalized(self):
         io.progress_bar_close()
@@ -389,22 +402,24 @@ class ConvertSubprocessor(Subprocessor):
                 'frames_done_idxs': self.frames_done_idxs,
                 'model_iter' : self.model_iter,
             }
-            self.converter_session_filepath.write_bytes( pickle.dumps(session_data) )
+            self.merger_session_filepath.write_bytes( pickle.dumps(session_data) )
 
-            io.log_info ("Session is saved to " + '/'.join (self.converter_session_filepath.parts[-2:]) )
+            io.log_info ("Session is saved to " + '/'.join (self.merger_session_filepath.parts[-2:]) )
 
-    cfg_change_keys = ['`','1', '2', '3', '4', '5', '6', '7', '8', 
-                                 'q', 'a', 'w', 's', 'e', 'd', 'r', 'f', 'y','h','u','j','i','k','o','l','p', ';',':',#'t', 'g',
-                                 'z', 'x', 'c', 'v', 'b','n'   ]
     #override
     def on_tick(self):
         self.predictor_func_host.process_messages()
-        self.dcscn_host.process_messages()
+        self.superres_host.process_messages()
+        self.fanseg_host.process_messages()
 
         go_prev_frame = False
+        go_first_frame = False
         go_prev_frame_overriding_cfg = False
+        go_first_frame_overriding_cfg = False
+
         go_next_frame = self.process_remain_frames
         go_next_frame_overriding_cfg = False
+        go_last_frame_overriding_cfg = False
 
         cur_frame = None
         if len(self.frames_idxs) != 0:
@@ -423,7 +438,8 @@ class ConvertSubprocessor(Subprocessor):
                             if cur_frame.image is None:
                                 cur_frame.image = cv2_imread ( cur_frame.output_filename)
                                 if cur_frame.image is None:
-                                    cur_frame.is_done = False #unable to read? recompute then
+                                    # unable to read? recompute then
+                                    cur_frame.is_done = False
                                     cur_frame.is_shown = False
                             self.main_screen.set_image(cur_frame.image)
                         else:
@@ -446,119 +462,79 @@ class ConvertSubprocessor(Subprocessor):
                 if key == 27: #esc
                     self.is_interactive_quitting = True
                 elif self.screen_manager.get_current() is self.main_screen:
-                    if chr_key in self.cfg_change_keys:
+                    
+                    if self.merger_config.type == MergerConfig.TYPE_MASKED and chr_key in self.masked_keys:                        
                         self.process_remain_frames = False
 
                         if cur_frame is not None:
                             cfg = cur_frame.cfg
                             prev_cfg = cfg.copy()
 
-                            if cfg.type == ConverterConfig.TYPE_MASKED:
-                                if chr_key == '`':
-                                    cfg.set_mode(0)
-                                elif key >= ord('1') and key <= ord('8'):
-                                    cfg.set_mode( key - ord('0') )
-                                elif chr_key == 'q':
-                                    cfg.add_hist_match_threshold(1 if not shift_pressed else 5)
-                                elif chr_key == 'a':
-                                    cfg.add_hist_match_threshold(-1 if not shift_pressed else -5)
-                                elif chr_key == 'w':
-                                    cfg.add_erode_mask_modifier(1 if not shift_pressed else 5)
-                                elif chr_key == 's':
-                                    cfg.add_erode_mask_modifier(-1 if not shift_pressed else -5)
-                                elif chr_key == 'e':
-                                    cfg.add_blur_mask_modifier(1 if not shift_pressed else 5)
-                                elif chr_key == 'd':
-                                    cfg.add_blur_mask_modifier(-1 if not shift_pressed else -5)
-                                elif chr_key == 'r':
-                                    cfg.add_motion_blur_power(1 if not shift_pressed else 5)
-                                elif chr_key == 'f':
-                                    cfg.add_motion_blur_power(-1 if not shift_pressed else -5)
-                                elif chr_key == 'y':
-                                    cfg.add_blursharpen_amount(1 if not shift_pressed else 5)
-                                elif chr_key == 'h':
-                                    cfg.add_blursharpen_amount(-1 if not shift_pressed else -5)
-                                elif chr_key == 'u':
-                                    cfg.add_output_face_scale(1 if not shift_pressed else 5)
-                                elif chr_key == 'j':
-                                    cfg.add_output_face_scale(-1 if not shift_pressed else -5)
-                                elif chr_key == 'i':
-                                    cfg.add_image_denoise_power(1 if not shift_pressed else 5)
-                                elif chr_key == 'k':
-                                    cfg.add_image_denoise_power(-1 if not shift_pressed else -5)
-                                elif chr_key == 'o':
-                                    cfg.add_bicubic_degrade_power(1 if not shift_pressed else 5)
-                                elif chr_key == 'l':
-                                    cfg.add_bicubic_degrade_power(-1 if not shift_pressed else -5)
-
-                                elif chr_key == 'p':
-                                    cfg.add_color_degrade_power(1 if not shift_pressed else 5)
-                                elif chr_key == ';':
-                                    cfg.add_color_degrade_power(-1)
-                                elif chr_key == ':':
-                                    cfg.add_color_degrade_power(-5)
-
-                                elif chr_key == 'z':
-                                    cfg.toggle_masked_hist_match()
-                                elif chr_key == 'x':
-                                    cfg.toggle_mask_mode()
-                                elif chr_key == 'c':
-                                    cfg.toggle_color_transfer_mode()
-                                elif chr_key == 'v':
-                                    cfg.toggle_super_resolution_mode()
-                                elif chr_key == 'b':
-                                    cfg.toggle_export_mask_alpha()
-                                elif chr_key == 'n':
-                                    cfg.toggle_sharpen_mode()
-
-                            else:
-                                if chr_key == 'y':
-                                    cfg.add_blursharpen_amount(1 if not shift_pressed else 5)
-                                elif chr_key == 'h':
-                                    cfg.add_blursharpen_amount(-1 if not shift_pressed else -5)
-                                elif chr_key == 's':
-                                    cfg.toggle_add_source_image()
-                                elif chr_key == 'v':
-                                    cfg.toggle_super_resolution_mode()
-                                elif chr_key == 'n':
-                                    cfg.toggle_sharpen_mode()
+                            if cfg.type == MergerConfig.TYPE_MASKED:                                
+                                self.masked_keys_funcs[chr_key](cfg, shift_pressed)
 
                             if prev_cfg != cfg:
                                 io.log_info ( cfg.to_string(cur_frame.frame_info.filename_short) )
                                 cur_frame.is_done = False
                                 cur_frame.is_shown = False
                     else:
+
                         if chr_key == ',' or chr_key == 'm':
                             self.process_remain_frames = False
                             go_prev_frame = True
-                            go_prev_frame_overriding_cfg = chr_key == 'm'
+
+                            if chr_key == ',':
+                                if shift_pressed:
+                                    go_first_frame = True
+                                
+                            elif chr_key == 'm':
+                                if not shift_pressed:
+                                    go_prev_frame_overriding_cfg = True
+                                else:
+                                    go_first_frame_overriding_cfg = True
+
                         elif chr_key == '.' or chr_key == '/':
                             self.process_remain_frames = False
                             go_next_frame = True
-                            go_next_frame_overriding_cfg = chr_key == '/'
-                        elif chr_key == '\r' or chr_key == '\n':
-                            self.process_remain_frames = not self.process_remain_frames
+
+                            if chr_key == '.':
+                                if shift_pressed:
+                                    self.process_remain_frames = not self.process_remain_frames
+                                    
+                            elif chr_key == '/':
+                                if not shift_pressed:
+                                    go_next_frame_overriding_cfg = True
+                                else:
+                                    go_last_frame_overriding_cfg = True
+
                         elif chr_key == '-':
                             self.screen_manager.get_current().diff_scale(-0.1)
                         elif chr_key == '=':
                             self.screen_manager.get_current().diff_scale(0.1)
 
-
         if go_prev_frame:
             if cur_frame is None or cur_frame.is_done:
                 if cur_frame is not None:
                     cur_frame.image = None
 
-                if len(self.frames_done_idxs) > 0:
-                    prev_frame = self.frames[self.frames_done_idxs.pop()]
-                    self.frames_idxs.insert(0, prev_frame.idx)
-                    prev_frame.is_shown = False
-                    io.progress_bar_inc(-1)
+                while True:
+                    if len(self.frames_done_idxs) > 0:
+                        prev_frame = self.frames[self.frames_done_idxs.pop()]
+                        self.frames_idxs.insert(0, prev_frame.idx)
+                        prev_frame.is_shown = False
+                        io.progress_bar_inc(-1)
 
-                    if cur_frame is not None and go_prev_frame_overriding_cfg:
-                        if prev_frame.cfg != cur_frame.cfg:
-                            prev_frame.cfg = cur_frame.cfg.copy()
-                            prev_frame.is_done = False
+                        if cur_frame is not None and (go_prev_frame_overriding_cfg or go_first_frame_overriding_cfg):
+                            if prev_frame.cfg != cur_frame.cfg:
+                                prev_frame.cfg = cur_frame.cfg.copy()
+                                prev_frame.is_done = False
+
+                        cur_frame = prev_frame
+
+                    if go_first_frame_overriding_cfg or go_first_frame:
+                        if len(self.frames_done_idxs) > 0:
+                            continue
+                    break
 
         elif go_next_frame:
             if cur_frame is not None and cur_frame.is_done:
@@ -568,26 +544,33 @@ class ConvertSubprocessor(Subprocessor):
                 self.frames_idxs.pop(0)
                 io.progress_bar_inc(1)
 
+                f = self.frames
+
                 if len(self.frames_idxs) != 0:
-                    next_frame = self.frames[ self.frames_idxs[0] ]
-
-                    if go_next_frame_overriding_cfg:
-                        f = self.frames
-                        for i in range( next_frame.idx, len(self.frames) ):
-                            f[i].cfg = None
-                            f[i].is_shown = False
-
-                    if next_frame.cfg is None or next_frame.is_shown == False: #next frame is never shown or override current cfg to next frames and the prefetches
-                        for i in range( min(len(self.frames_idxs), self.prefetch_frame_count) ):
-                            frame = self.frames[ self.frames_idxs[i] ]
-
-                            if frame.cfg is None or frame.cfg != cur_frame.cfg:
-                                frame.cfg = cur_frame.cfg.copy()
-                                frame.is_done = False #initiate solve again
-
-
+                    next_frame = f[ self.frames_idxs[0] ]
                     next_frame.is_shown = False
 
+                    if go_next_frame_overriding_cfg or go_last_frame_overriding_cfg:
+
+                        if go_next_frame_overriding_cfg:
+                            to_frames = next_frame.idx+1
+                        else:
+                            to_frames = len(f)
+
+                        for i in range( next_frame.idx, to_frames ):
+                            f[i].cfg = None
+
+                    for i in range( min(len(self.frames_idxs), self.prefetch_frame_count) ):
+                        frame = f[ self.frames_idxs[i] ]
+                        if frame.cfg is None:
+                            if i == 0:
+                                frame.cfg = cur_frame.cfg.copy()
+                            else:
+                                frame.cfg = f[ self.frames_idxs[i-1] ].cfg.copy()
+                            
+                            frame.is_done = False #initiate solve again
+                            frame.is_shown = False
+
             if len(self.frames_idxs) == 0:
                 self.process_remain_frames = False
 
@@ -619,7 +602,7 @@ class ConvertSubprocessor(Subprocessor):
 
             if not frame.is_done and not frame.is_processing and frame.cfg is not None:
                 frame.is_processing = True
-                return ConvertSubprocessor.ProcessingFrame(idx=frame.idx,
+                return MergeSubprocessor.ProcessingFrame(idx=frame.idx,
                                                            cfg=frame.cfg.copy(),
                                                            prev_temporal_frame_infos=frame.prev_temporal_frame_infos,
                                                            frame_info=frame.frame_info,
@@ -633,19 +616,18 @@ class ConvertSubprocessor(Subprocessor):
     def get_result(self):
         return 0
 
-def main (args, device_args):
-    io.log_info ("Running converter.\r\n")
-
-    training_data_src_dir = args.get('training_data_src_dir', None)
-    training_data_src_path = Path(training_data_src_dir) if training_data_src_dir is not None else None
-    aligned_dir = args.get('aligned_dir', None)
-    avaperator_aligned_dir = args.get('avaperator_aligned_dir', None)
+def main (model_class_name=None,
+          saved_models_path=None,
+          training_data_src_path=None,
+          force_model_name=None,
+          input_path=None,
+          output_path=None,
+          aligned_path=None,
+          force_gpu_idxs=None,
+          cpu_only=None):
+    io.log_info ("Running merger.\r\n")
 
     try:
-        input_path = Path(args['input_dir'])
-        output_path = Path(args['output_dir'])
-        model_path = Path(args['model_dir'])
-
         if not input_path.exists():
             io.log_err('Input directory not found. Please ensure it exists.')
             return
@@ -653,54 +635,53 @@ def main (args, device_args):
         if not output_path.exists():
             output_path.mkdir(parents=True, exist_ok=True)
 
-        if not model_path.exists():
+        if not saved_models_path.exists():
             io.log_err('Model directory not found. Please ensure it exists.')
             return
 
-        is_interactive = io.input_bool ("Use interactive converter? (y/n skip:y) : ", True) if not io.is_colab() else False
+        is_interactive = io.input_bool ("Use interactive merger?", True) if not io.is_colab() else False
 
         import models
-        model = models.import_model( args['model_name'])(model_path, device_args=device_args, training_data_src_path=training_data_src_path)
-        converter_session_filepath = model.get_strpath_storage_for_file('converter_session.dat')
-        predictor_func, predictor_input_shape, cfg = model.get_ConverterConfig()
+        model = models.import_model(model_class_name)(is_training=False,
+                                                      saved_models_path=saved_models_path,
+                                                      training_data_src_path=training_data_src_path,
+                                                      force_gpu_idxs=force_gpu_idxs,
+                                                      cpu_only=cpu_only)
+        merger_session_filepath = model.get_strpath_storage_for_file('merger_session.dat')
+        predictor_func, predictor_input_shape, cfg = model.get_MergerConfig()
 
         if not is_interactive:
             cfg.ask_settings()
 
-        input_path_image_paths = Path_utils.get_image_paths(input_path)
+        input_path_image_paths = pathex.get_image_paths(input_path)
 
-        if cfg.type == ConverterConfig.TYPE_MASKED:
-            if aligned_dir is None:
-                io.log_err('Aligned directory not found. Please ensure it exists.')
-                return
-
-            aligned_path = Path(aligned_dir)
+        if cfg.type == MergerConfig.TYPE_MASKED:
             if not aligned_path.exists():
                 io.log_err('Aligned directory not found. Please ensure it exists.')
                 return
 
             packed_samples = None
             try:
-                packed_samples = samplelib.PackedFaceset.load(aligned_path)  
+                packed_samples = samplelib.PackedFaceset.load(aligned_path)
             except:
                 io.log_err(f"Error occured while loading samplelib.PackedFaceset.load {str(aligned_path)}, {traceback.format_exc()}")
 
- 
-            if packed_samples is not None:      
-                io.log_info ("Using packed faceset.")          
+
+            if packed_samples is not None:
+                io.log_info ("Using packed faceset.")
                 def generator():
-                    for sample in io.progress_bar_generator( packed_samples, "Collecting alignments"):                      
-                        filepath = Path(sample.filename)                        
+                    for sample in io.progress_bar_generator( packed_samples, "Collecting alignments"):
+                        filepath = Path(sample.filename)
                         yield DFLIMG.load(filepath, loader_func=lambda x: sample.read_raw_file()  )
             else:
                 def generator():
-                    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(aligned_path), "Collecting alignments"):
+                    for filepath in io.progress_bar_generator( pathex.get_image_paths(aligned_path), "Collecting alignments"):
                         filepath = Path(filepath)
                         yield DFLIMG.load(filepath)
-                            
+
             alignments = {}
             multiple_faces_detected = False
-            
+
             for dflimg in generator():
                 if dflimg is None:
                     io.log_err ("%s is not a dfl image file" % (filepath.name) )
@@ -709,10 +690,10 @@ def main (args, device_args):
                 source_filename = dflimg.get_source_filename()
                 if source_filename is None or source_filename == "_":
                     continue
-                
+
                 source_filename = Path(source_filename)
                 source_filename_stem = source_filename.stem
-                
+
                 if source_filename_stem not in alignments.keys():
                     alignments[ source_filename_stem ] = []
 
@@ -724,7 +705,7 @@ def main (args, device_args):
             if multiple_faces_detected:
                 io.log_info ("Warning: multiple faces detected. Strongly recommended to process them separately.")
 
-            frames = [ ConvertSubprocessor.Frame( frame_info=FrameInfo(filename=p, landmarks_list=alignments.get(Path(p).stem, None))) for p in input_path_image_paths ]
+            frames = [ MergeSubprocessor.Frame( frame_info=FrameInfo(filename=p, landmarks_list=alignments.get(Path(p).stem, None))) for p in input_path_image_paths ]
 
             if multiple_faces_detected:
                 io.log_info ("Warning: multiple faces detected. Motion blur will not be used.")
@@ -760,11 +741,11 @@ def main (args, device_args):
                     fi.motion_deg = -math.atan2(motion_vector[1],motion_vector[0])*180 / math.pi
 
 
-        elif cfg.type == ConverterConfig.TYPE_FACE_AVATAR:
+        elif cfg.type == MergerConfig.TYPE_FACE_AVATAR:
             filesdata = []
             for filepath in io.progress_bar_generator(input_path_image_paths, "Collecting info"):
                 filepath = Path(filepath)
-                
+
                 dflimg = DFLIMG.load(filepath)
                 if dflimg is None:
                     io.log_err ("%s is not a dfl image file" % (filepath.name) )
@@ -787,19 +768,19 @@ def main (args, device_args):
                     prev_temporal_frame_infos.insert (0, prev_frame_info )
                     next_temporal_frame_infos.append (   next_frame_info )
 
-                frames.append ( ConvertSubprocessor.Frame(prev_temporal_frame_infos=prev_temporal_frame_infos,
+                frames.append ( MergeSubprocessor.Frame(prev_temporal_frame_infos=prev_temporal_frame_infos,
                                                           frame_info=frame_info,
                                                           next_temporal_frame_infos=next_temporal_frame_infos) )
 
         if len(frames) == 0:
-            io.log_info ("No frames to convert in input_dir.")
+            io.log_info ("No frames to merge in input_dir.")
         else:
-            ConvertSubprocessor (
+            MergeSubprocessor (
                         is_interactive         = is_interactive,
-                        converter_session_filepath = converter_session_filepath,
+                        merger_session_filepath = merger_session_filepath,
                         predictor_func         = predictor_func,
                         predictor_input_shape  = predictor_input_shape,
-                        converter_config       = cfg,
+                        merger_config       = cfg,
                         frames                 = frames,
                         output_path            = output_path,
                         model_iter             = model.get_iter()
diff --git a/mainscripts/ConverterScreen/ConverterScreen.py b/mainscripts/MergerScreen/MergerScreen.py
similarity index 96%
rename from mainscripts/ConverterScreen/ConverterScreen.py
rename to mainscripts/MergerScreen/MergerScreen.py
index 78ed45d..d105a86 100644
--- a/mainscripts/ConverterScreen/ConverterScreen.py
+++ b/mainscripts/MergerScreen/MergerScreen.py
@@ -3,10 +3,10 @@ from pathlib import Path
 
 import numpy as np
 
-import imagelib
-from interact import interact as io
-from utils.cv2_utils import *
-from utils.os_utils import get_screen_size
+from core import imagelib
+from core.interact import interact as io
+from core.cv2ex import *
+from core import osex
 
 
 class ScreenAssets(object):
@@ -96,7 +96,7 @@ class Screen(object):
             if self.is_first_appear:
                 self.is_first_appear = False
                 #center window
-                desktop_w, desktop_h = get_screen_size()
+                desktop_w, desktop_h = osex.get_screen_size()
                 h,w,c = screen.shape
                 cv2.moveWindow(self.scrn_manager.wnd_name, max(0,(desktop_w-w) // 2), max(0, (desktop_h-h) // 2) )
 
diff --git a/mainscripts/MergerScreen/__init__.py b/mainscripts/MergerScreen/__init__.py
new file mode 100644
index 0000000..ea3e320
--- /dev/null
+++ b/mainscripts/MergerScreen/__init__.py
@@ -0,0 +1 @@
+from .MergerScreen import Screen, ScreenManager
\ No newline at end of file
diff --git a/mainscripts/ConverterScreen/gfx/sand_clock_64.png b/mainscripts/MergerScreen/gfx/sand_clock_64.png
similarity index 100%
rename from mainscripts/ConverterScreen/gfx/sand_clock_64.png
rename to mainscripts/MergerScreen/gfx/sand_clock_64.png
diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py
index 9666ca9..e5d5d06 100644
--- a/mainscripts/Sorter.py
+++ b/mainscripts/Sorter.py
@@ -1,4 +1,5 @@
 ﻿import multiprocessing
+import math
 import operator
 import os
 import sys
@@ -11,23 +12,18 @@ import cv2
 import numpy as np
 from numpy import linalg as npla
 
-import imagelib
-from facelib import LandmarksProcessor
-from imagelib import estimate_sharpness
-from interact import interact as io
-from joblib import Subprocessor
-from nnlib import VGGFace, nnlib
-from utils import Path_utils
-from utils.cv2_utils import *
+from core import imagelib, pathex
+from core.cv2ex import *
+from core.imagelib import estimate_sharpness
+from core.interact import interact as io
+from core.joblib import Subprocessor
+from core.leras import nn
 from DFLIMG import *
+from facelib import LandmarksProcessor
+
 
 class BlurEstimatorSubprocessor(Subprocessor):
     class Cli(Subprocessor.Cli):
-
-        #override
-        def on_initialize(self, client_dict):
-            self.log_info('Running on %s.' % (client_dict['device_name']) )
-
         #override
         def process_data(self, data):
             filepath = Path( data[0] )
@@ -62,10 +58,11 @@ class BlurEstimatorSubprocessor(Subprocessor):
 
     #override
     def process_info_generator(self):
-        for i in range(0, multiprocessing.cpu_count() ):
-            yield 'CPU%d' % (i), {}, {'device_idx': i,
-                                      'device_name': 'CPU%d' % (i),
-                                      }
+        cpu_count = multiprocessing.cpu_count()
+        io.log_info(f'Running on {cpu_count} CPUs')
+
+        for i in range(cpu_count):
+            yield 'CPU%d' % (i), {}, {}
 
     #override
     def get_data(self, host_dict):
@@ -95,7 +92,7 @@ class BlurEstimatorSubprocessor(Subprocessor):
 def sort_by_blur(input_path):
     io.log_info ("Sorting by blur...")
 
-    img_list = [ (filename,[]) for filename in Path_utils.get_image_paths(input_path) ]
+    img_list = [ (filename,[]) for filename in pathex.get_image_paths(input_path) ]
     img_list, trash_img_list = BlurEstimatorSubprocessor (img_list).run()
 
     io.log_info ("Sorting...")
@@ -103,81 +100,11 @@ def sort_by_blur(input_path):
 
     return img_list, trash_img_list
 
-def sort_by_face(input_path):
-    io.log_info ("Sorting by face similarity...")
-
-    img_list = []
-    trash_img_list = []
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"):
-        filepath = Path(filepath)
-
-        dflimg = DFLIMG.load (filepath)
-
-        if dflimg is None:
-            io.log_err ("%s is not a dfl image file" % (filepath.name) )
-            trash_img_list.append ( [str(filepath)] )
-            continue
-
-        img_list.append( [str(filepath), dflimg.get_landmarks()] )
-
-
-    img_list_len = len(img_list)
-    for i in io.progress_bar_generator ( range(0, img_list_len-1), "Sorting"):
-        min_score = float("inf")
-        j_min_score = i+1
-        for j in range(i+1,len(img_list)):
-
-            fl1 = img_list[i][1]
-            fl2 = img_list[j][1]
-            score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) )
-
-            if score < min_score:
-                min_score = score
-                j_min_score = j
-        img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1]
-
-    return img_list, trash_img_list
-
-def sort_by_face_dissim(input_path):
-
-    io.log_info ("Sorting by face dissimilarity...")
-
-    img_list = []
-    trash_img_list = []
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"):
-        filepath = Path(filepath)
-
-        dflimg = DFLIMG.load (filepath)
-
-        if dflimg is None:
-            io.log_err ("%s is not a dfl image file" % (filepath.name) )
-            trash_img_list.append ( [str(filepath)] )
-            continue
-
-        img_list.append( [str(filepath), dflimg.get_landmarks(), 0 ] )
-
-    img_list_len = len(img_list)
-    for i in io.progress_bar_generator( range(img_list_len-1), "Sorting"):
-        score_total = 0
-        for j in range(i+1,len(img_list)):
-            if i == j:
-                continue
-            fl1 = img_list[i][1]
-            fl2 = img_list[j][1]
-            score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) )
-
-        img_list[i][2] = score_total
-
-    io.log_info ("Sorting...")
-    img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True)
-
-    return img_list, trash_img_list
-
 def sort_by_face_yaw(input_path):
     io.log_info ("Sorting by face yaw...")
     img_list = []
     trash_img_list = []
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"):
         filepath = Path(filepath)
 
         dflimg = DFLIMG.load (filepath)
@@ -200,7 +127,7 @@ def sort_by_face_pitch(input_path):
     io.log_info ("Sorting by face pitch...")
     img_list = []
     trash_img_list = []
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"):
         filepath = Path(filepath)
 
         dflimg = DFLIMG.load (filepath)
@@ -221,10 +148,6 @@ def sort_by_face_pitch(input_path):
 
 class HistSsimSubprocessor(Subprocessor):
     class Cli(Subprocessor.Cli):
-        #override
-        def on_initialize(self, client_dict):
-            self.log_info ('Running on %s.' % (client_dict['device_name']) )
-
         #override
         def process_data(self, data):
             img_list = []
@@ -277,10 +200,11 @@ class HistSsimSubprocessor(Subprocessor):
 
     #override
     def process_info_generator(self):
-        for i in range( len(self.img_chunks_list) ):
-            yield 'CPU%d' % (i), {'i':i}, {'device_idx': i,
-                                           'device_name': 'CPU%d' % (i)
-                                          }
+        cpu_count = len(self.img_chunks_list)
+        io.log_info(f'Running on {cpu_count} threads')
+        for i in range(cpu_count):
+            yield 'CPU%d' % (i), {'i':i}, {}
+
     #override
     def on_clients_initialized(self):
         io.progress_bar ("Sorting", len(self.img_list))
@@ -311,14 +235,13 @@ class HistSsimSubprocessor(Subprocessor):
 
 def sort_by_hist(input_path):
     io.log_info ("Sorting by histogram similarity...")
-    img_list = HistSsimSubprocessor(Path_utils.get_image_paths(input_path)).run()
-    return img_list
+    img_list = HistSsimSubprocessor(pathex.get_image_paths(input_path)).run()
+    return img_list, []
 
 class HistDissimSubprocessor(Subprocessor):
     class Cli(Subprocessor.Cli):
         #override
         def on_initialize(self, client_dict):
-            self.log_info ('Running on %s.' % (client_dict['device_name']) )
             self.img_list = client_dict['img_list']
             self.img_list_len = len(self.img_list)
 
@@ -355,11 +278,11 @@ class HistDissimSubprocessor(Subprocessor):
 
     #override
     def process_info_generator(self):
-        for i in range(0, min(multiprocessing.cpu_count(), 8) ):
-            yield 'CPU%d' % (i), {}, {'device_idx': i,
-                                      'device_name': 'CPU%d' % (i),
-                                      'img_list' : self.img_list
-                                      }
+        cpu_count = min(multiprocessing.cpu_count(), 8)
+        io.log_info(f'Running on {cpu_count} CPUs')
+        for i in range(cpu_count):
+            yield 'CPU%d' % (i), {}, {'img_list' : self.img_list}
+
     #override
     def get_data(self, host_dict):
         if len (self.img_list_range) > 0:
@@ -385,7 +308,7 @@ def sort_by_hist_dissim(input_path):
 
     img_list = []
     trash_img_list = []
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"):
         filepath = Path(filepath)
 
         dflimg = DFLIMG.load (filepath)
@@ -407,37 +330,37 @@ def sort_by_hist_dissim(input_path):
 
 def sort_by_brightness(input_path):
     io.log_info ("Sorting by brightness...")
-    img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten()  )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ]
+    img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten()  )] for x in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading") ]
     io.log_info ("Sorting...")
     img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True)
-    return img_list
+    return img_list, []
 
 def sort_by_hue(input_path):
     io.log_info ("Sorting by hue...")
-    img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten()  )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ]
+    img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten()  )] for x in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading") ]
     io.log_info ("Sorting...")
     img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True)
-    return img_list
+    return img_list, []
 
 def sort_by_black(input_path):
     io.log_info ("Sorting by amount of black pixels...")
 
     img_list = []
-    for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"):
+    for x in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"):
         img = cv2_imread(x)
         img_list.append ([x, img[(img == 0)].size ])
 
     io.log_info ("Sorting...")
     img_list = sorted(img_list, key=operator.itemgetter(1), reverse=False)
 
-    return img_list
+    return img_list, []
 
 def sort_by_origname(input_path):
     io.log_info ("Sort by original filename...")
 
     img_list = []
     trash_img_list = []
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"):
         filepath = Path(filepath)
 
         dflimg = DFLIMG.load (filepath)
@@ -455,7 +378,7 @@ def sort_by_origname(input_path):
 
 def sort_by_oneface_in_image(input_path):
     io.log_info ("Sort by one face in images...")
-    image_paths = Path_utils.get_image_paths(input_path)
+    image_paths = pathex.get_image_paths(input_path)
     a = np.array ([ ( int(x[0]), int(x[1]) ) \
                       for x in [ Path(filepath).stem.split('_') for filepath in image_paths ] if len(x) == 2
                   ])
@@ -468,13 +391,14 @@ def sort_by_oneface_in_image(input_path):
             img_list = [ (path,) for i,path in enumerate(image_paths) if i not in idxs ]
             trash_img_list = [ (image_paths[x],) for x in idxs ]
             return img_list, trash_img_list
+
+    io.log_info ("Nothing found. Possible recover original filenames first.")
     return [], []
 
 class FinalLoaderSubprocessor(Subprocessor):
     class Cli(Subprocessor.Cli):
         #override
         def on_initialize(self, client_dict):
-            self.log_info ('Running on %s.' % (client_dict['device_name']) )
             self.include_by_blur = client_dict['include_by_blur']
 
         #override
@@ -528,11 +452,11 @@ class FinalLoaderSubprocessor(Subprocessor):
 
     #override
     def process_info_generator(self):
-        for i in range(0, min(multiprocessing.cpu_count(), 8) ):
-            yield 'CPU%d' % (i), {}, {'device_idx': i,
-                                      'device_name': 'CPU%d' % (i),
-                                      'include_by_blur': self.include_by_blur
-                                      }
+        cpu_count = min(multiprocessing.cpu_count(), 8)
+        io.log_info(f'Running on {cpu_count} CPUs')
+
+        for i in range(cpu_count):
+            yield 'CPU%d' % (i), {}, {'include_by_blur': self.include_by_blur}
 
     #override
     def get_data(self, host_dict):
@@ -559,10 +483,6 @@ class FinalLoaderSubprocessor(Subprocessor):
 
 class FinalHistDissimSubprocessor(Subprocessor):
     class Cli(Subprocessor.Cli):
-        #override
-        def on_initialize(self, client_dict):
-            self.log_info ('Running on %s.' % (client_dict['device_name']) )
-
         #override
         def process_data(self, data):
             idx, pitch_yaw_img_list = data
@@ -598,10 +518,11 @@ class FinalHistDissimSubprocessor(Subprocessor):
 
     #override
     def process_info_generator(self):
-        for i in range(min(multiprocessing.cpu_count(), 8) ):
-            yield 'CPU%d' % (i), {'i':i}, {'device_idx': i,
-                                           'device_name': 'CPU%d' % (i)
-                                          }
+        cpu_count = min(multiprocessing.cpu_count(), 8)
+        io.log_info(f'Running on {cpu_count} CPUs')
+        for i in range(cpu_count):
+            yield 'CPU%d' % (i), {}, {}
+
     #override
     def on_clients_initialized(self):
         io.progress_bar ("Sort by hist-dissim", len(self.pitch_yaw_sample_list_idxs) )
@@ -632,18 +553,18 @@ class FinalHistDissimSubprocessor(Subprocessor):
     def get_result(self):
         return self.result
 
-def sort_final(input_path, include_by_blur=True):
-    io.log_info ("Performing final sort.")
+def sort_best(input_path, include_by_blur=True):
+    io.log_info ("Performing sort by best faces.")
 
-    target_count = io.input_int ("Target number of images? (default:2000) : ", 2000)
+    target_count = io.input_int ("Target number of faces?", 2000)
 
-    img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path), include_by_blur ).run()
+    img_list, trash_img_list = FinalLoaderSubprocessor( pathex.get_image_paths(input_path), include_by_blur ).run()
     final_img_list = []
 
     grads = 128
     imgs_per_grad = round (target_count / grads)
 
-    grads_space = np.linspace (-1.0,1.0,grads)
+    grads_space = np.linspace (-math.pi / 2, math.pi / 2,grads)
 
     yaws_sample_list = [None]*grads
     for g in io.progress_bar_generator ( range(grads), "Sort by yaw"):
@@ -696,7 +617,7 @@ def sort_final(input_path, include_by_blur=True):
 
         pitch_sample_list = [None]*pitch_grads
 
-        grads_space = np.linspace (-1.0,1.0, pitch_grads )
+        grads_space = np.linspace (-math.pi / 2,math.pi / 2, pitch_grads )
 
         for pg in range (pitch_grads):
 
@@ -747,7 +668,7 @@ def sort_final(input_path, include_by_blur=True):
 
     return final_img_list, trash_img_list
 
-
+"""
 def sort_by_vggface(input_path):
     io.log_info ("Sorting by face similarity using VGGFace model...")
 
@@ -756,7 +677,7 @@ def sort_by_vggface(input_path):
     final_img_list = []
     trash_img_list = []
 
-    image_paths = Path_utils.get_image_paths(input_path)
+    image_paths = pathex.get_image_paths(input_path)
     img_list = [ (x,) for x in image_paths ]
     img_list_len = len(img_list)
     img_list_range = [*range(img_list_len)]
@@ -786,42 +707,48 @@ def sort_by_vggface(input_path):
         feats[i+1], feats[idx] = feats[idx], feats[i+1]
 
     return img_list, trash_img_list
+"""
 
 def sort_by_absdiff(input_path):
     io.log_info ("Sorting by absolute difference...")
 
-    is_sim = io.input_bool ("Sort by similar? ( y/n ?:help skip:y ) : ", True, help_message="Otherwise sort by dissimilar.")
+    is_sim = io.input_bool ("Sort by similar?", True, help_message="Otherwise sort by dissimilar.")
 
-    from nnlib import nnlib
-    exec( nnlib.import_all( device_config=nnlib.device.Config() ), locals(), globals() )
+    from core.leras import nn
 
-    image_paths = Path_utils.get_image_paths(input_path)
+    device_config = nn.ask_choose_device_idxs(choose_only_one=True, return_device_config=True)
+    nn.initialize( device_config=device_config )
+    tf = nn.tf
+
+    image_paths = pathex.get_image_paths(input_path)
     image_paths_len = len(image_paths)
 
     batch_size = 1024
     batch_size_remain = image_paths_len % batch_size
 
-    i_t = Input ( (256,256,3) )
-    j_t = Input ( (256,256,3) )
+    i_t = tf.placeholder (tf.float32, (None,256,256,3) )
+    j_t = tf.placeholder (tf.float32, (None,256,256,3) )
 
-    outputs = []
+    outputs_full = []
+    outputs_remain = []
+    
     for i in range(batch_size):
-        outputs += [ K.sum( K.abs(i_t-j_t[i]), axis=[1,2,3] ) ]
+        diff_t = tf.reduce_sum( tf.abs(i_t-j_t[i]), axis=[1,2,3] )
+        outputs_full.append(diff_t)
+        if i < batch_size_remain:
+            outputs_remain.append(diff_t)
 
-    func_bs_full = K.function ( [i_t,j_t], outputs)
+    def func_bs_full(i,j):
+        return nn.tf_sess.run (outputs_full, feed_dict={i_t:i,j_t:j})
 
-    outputs = []
-    for i in range(batch_size_remain):
-        outputs += [ K.sum( K.abs(i_t-j_t[i]), axis=[1,2,3] ) ]
-
-    func_bs_remain = K.function ( [i_t,j_t], outputs)
+    def func_bs_remain(i,j):
+        return nn.tf_sess.run (outputs_remain, feed_dict={i_t:i,j_t:j})
 
     import h5py
     db_file_path = Path(tempfile.gettempdir()) / 'sort_cache.hdf5'
     db_file = h5py.File( str(db_file_path), "w")
     db = db_file.create_dataset("results", (image_paths_len,image_paths_len), compression="gzip")
 
-
     pg_len = image_paths_len // batch_size
     if batch_size_remain != 0:
         pg_len += 1
@@ -841,7 +768,7 @@ def sort_by_absdiff(input_path):
             if i >= j:
                 i_images = [ cv2_imread(x) for x in image_paths[i:i+batch_size] ]
                 i_images_len = len(i_images)
-                result = func ([i_images,j_images])
+                result = func (i_images,j_images)
                 db[j:j+j_images_len,i:i+i_images_len] = np.array(result)
                 io.progress_bar_inc(1)
 
@@ -874,7 +801,7 @@ def final_process(input_path, img_list, trash_img_list):
 
         io.log_info ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) )
 
-        for filename in Path_utils.get_image_paths(trash_path):
+        for filename in pathex.get_image_paths(trash_path):
             Path(filename).unlink()
 
         for i in io.progress_bar_generator( range(len(trash_img_list)), "Moving trash", leave=False):
@@ -905,29 +832,40 @@ def final_process(input_path, img_list, trash_img_list):
             except:
                 io.log_info ('fail to rename %s' % (src.name) )
 
-def main (input_path, sort_by_method):
-    input_path = Path(input_path)
-    sort_by_method = sort_by_method.lower()
+sort_func_methods = {
+    'blur':        ("blur", sort_by_blur),
+    'face-yaw':    ("face yaw direction", sort_by_face_yaw),
+    'face-pitch':  ("face pitch direction", sort_by_face_pitch),
+    'hist':        ("histogram similarity", sort_by_hist),
+    'hist-dissim': ("histogram dissimilarity", sort_by_hist_dissim),
+    'brightness':  ("brightness", sort_by_brightness),
+    'hue':         ("hue", sort_by_hue),
+    'black':       ("amount of black pixels", sort_by_black),
+    'origname':    ("original filename", sort_by_origname),
+    'oneface':     ("one face in image", sort_by_oneface_in_image),
+    'absdiff':     ("absolute pixel difference", sort_by_absdiff),
+    'final':       ("best faces", sort_best),
+}
 
+def main (input_path, sort_by_method=None):
     io.log_info ("Running sort tool.\r\n")
 
-    img_list = []
-    trash_img_list = []
-    if sort_by_method == 'blur':            img_list, trash_img_list = sort_by_blur (input_path)
-    elif sort_by_method == 'face':          img_list, trash_img_list = sort_by_face (input_path)
-    elif sort_by_method == 'face-dissim':   img_list, trash_img_list = sort_by_face_dissim (input_path)
-    elif sort_by_method == 'face-yaw':      img_list, trash_img_list = sort_by_face_yaw (input_path)
-    elif sort_by_method == 'face-pitch':    img_list, trash_img_list = sort_by_face_pitch (input_path)
-    elif sort_by_method == 'hist':          img_list = sort_by_hist (input_path)
-    elif sort_by_method == 'hist-dissim':   img_list, trash_img_list = sort_by_hist_dissim (input_path)
-    elif sort_by_method == 'brightness':    img_list = sort_by_brightness (input_path)
-    elif sort_by_method == 'hue':           img_list = sort_by_hue (input_path)
-    elif sort_by_method == 'black':         img_list = sort_by_black (input_path)
-    elif sort_by_method == 'origname':      img_list, trash_img_list = sort_by_origname (input_path)
-    elif sort_by_method == 'oneface':       img_list, trash_img_list = sort_by_oneface_in_image (input_path)
-    elif sort_by_method == 'vggface':       img_list, trash_img_list = sort_by_vggface (input_path)
-    elif sort_by_method == 'absdiff':       img_list, trash_img_list = sort_by_absdiff (input_path)
-    elif sort_by_method == 'final':         img_list, trash_img_list = sort_final (input_path)
-    elif sort_by_method == 'final-no-blur': img_list, trash_img_list = sort_final (input_path, include_by_blur=False)
+    if sort_by_method is None:
+        io.log_info(f"Choose sorting method:")
+
+        key_list = list(sort_func_methods.keys())
+        for i, key in enumerate(key_list):
+            desc, func = sort_func_methods[key]
+            io.log_info(f"[{i}] {desc}")
+
+        io.log_info("")
+        id = io.input_int("", 3, valid_list=[*range(len(key_list))] )
+
+        sort_by_method = key_list[id]
+    else:
+        sort_by_method = sort_by_method.lower()
+
+    desc, func = sort_func_methods[sort_by_method]
+    img_list, trash_img_list = func(input_path)
 
     final_process (input_path, img_list, trash_img_list)
diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py
index ce9c1df..b3e174c 100644
--- a/mainscripts/Trainer.py
+++ b/mainscripts/Trainer.py
@@ -6,29 +6,31 @@ import time
 import numpy as np
 import itertools
 from pathlib import Path
-from utils import Path_utils
-import imagelib
+from core import pathex
+from core import imagelib
 import cv2
 import models
-from interact import interact as io
+from core.interact import interact as io
 
-def trainerThread (s2c, c2s, e, args, device_args):
+def trainerThread (s2c, c2s, e,  
+                    model_class_name = None,
+                    saved_models_path = None,
+                    training_data_src_path = None,
+                    training_data_dst_path = None,
+                    pretraining_data_path = None,         
+                    pretrained_model_path = None,                               
+                    no_preview=False, 
+                    force_model_name=None,
+                    force_gpu_idxs=None,
+                    cpu_only=None,   
+                    execute_programs = None,
+                    debug=False,                  
+                    **kwargs):
     while True:
         try:
             start_time = time.time()
 
-            training_data_src_path = Path( args.get('training_data_src_dir', '') )
-            training_data_dst_path = Path( args.get('training_data_dst_dir', '') )
-            
-            pretraining_data_path = args.get('pretraining_data_dir', '')
-            pretraining_data_path = Path(pretraining_data_path) if pretraining_data_path is not None else None
-            
-            model_path = Path( args.get('model_path', '') )
-            model_name = args.get('model_name', '')
             save_interval_min = 15
-            debug = args.get('debug', '')
-            execute_programs = args.get('execute_programs', [])
-            no_preview = args.get('no_preview', False)
 
             if not training_data_src_path.exists():
                 io.log_err('Training data src directory does not exist.')
@@ -38,18 +40,22 @@ def trainerThread (s2c, c2s, e, args, device_args):
                 io.log_err('Training data dst directory does not exist.')
                 break
 
-            if not model_path.exists():
-                model_path.mkdir(exist_ok=True)
+            if not saved_models_path.exists():
+                saved_models_path.mkdir(exist_ok=True)
 
-            model = models.import_model(model_name)(
-                        model_path,
+            model = models.import_model(model_class_name)(
+                        is_training=True,
+                        saved_models_path=saved_models_path,
                         training_data_src_path=training_data_src_path,
                         training_data_dst_path=training_data_dst_path,
                         pretraining_data_path=pretraining_data_path,
-                        is_training=True,
-                        debug=debug,
+                        pretrained_model_path=pretrained_model_path,
                         no_preview=no_preview,
-                        device_args=device_args)
+                        force_model_name=force_model_name,
+                        force_gpu_idxs=force_gpu_idxs,
+                        cpu_only=cpu_only,
+                        debug=debug,
+                        )
 
             is_reached_goal = model.is_reached_iter_goal()
 
@@ -71,10 +77,6 @@ def trainerThread (s2c, c2s, e, args, device_args):
                     c2s.put ( {'op':'show', 'previews': previews} )
                 e.set() #Set the GUI Thread as Ready
 
-
-            if model.is_first_run():
-                model_save()
-
             if model.get_target_iter() != 0:
                 if is_reached_goal:
                     io.log_info('Model already trained to target iteration. You can use preview.')
@@ -108,6 +110,12 @@ def trainerThread (s2c, c2s, e, args, device_args):
                                 print("Unable to execute program: %s" % (prog) )
 
                     if not is_reached_goal:
+                                        
+                        if model.get_iter() == 0:
+                            io.log_info("")
+                            io.log_info("Trying to do the first iteration. If an error occurs, reduce the model parameters.")
+                            io.log_info("")
+                            
                         iter, iter_time = model.train_one_iter()
 
                         loss_history = model.get_loss_history()
@@ -119,8 +127,8 @@ def trainerThread (s2c, c2s, e, args, device_args):
 
                         if shared_state['after_save']:
                             shared_state['after_save'] = False
-                            last_save_time = time.time() #upd last_save_time only after save+one_iter, because plaidML rebuilds programs after save https://github.com/plaidml/plaidml/issues/274
-
+                            last_save_time = time.time() 
+                            
                             mean_loss = np.mean ( [ np.array(loss_history[i]) for i in range(save_iter, iter) ], axis=0)
 
                             for loss_value in mean_loss:
@@ -137,7 +145,10 @@ def trainerThread (s2c, c2s, e, args, device_args):
                                 io.log_info ('\r' + loss_string, end='')
                             else:
                                 io.log_info (loss_string, end='\r')
-
+                        
+                        if model.get_iter() == 1:
+                            model_save()
+                        
                         if model.get_target_iter() != 0 and model.is_reached_iter_goal():
                             io.log_info ('Reached target iteration.')
                             model_save()
@@ -185,16 +196,16 @@ def trainerThread (s2c, c2s, e, args, device_args):
 
 
 
-def main(args, device_args):
+def main(**kwargs):
     io.log_info ("Running trainer.\r\n")
 
-    no_preview = args.get('no_preview', False)
+    no_preview = kwargs.get('no_preview', False)
 
     s2c = queue.Queue()
     c2s = queue.Queue()
 
     e = threading.Event()
-    thread = threading.Thread(target=trainerThread, args=(s2c, c2s, e, args, device_args) )
+    thread = threading.Thread(target=trainerThread, args=(s2c, c2s, e), kwargs=kwargs )
     thread.start()
 
     e.wait() #Wait for inital load to occur.
diff --git a/mainscripts/Util.py b/mainscripts/Util.py
index ba2ad97..bc65b90 100644
--- a/mainscripts/Util.py
+++ b/mainscripts/Util.py
@@ -5,10 +5,10 @@ import cv2
 
 from DFLIMG import *
 from facelib import LandmarksProcessor
-from imagelib import IEPolys
-from interact import interact as io
-from utils import Path_utils
-from utils.cv2_utils import *
+from core.imagelib import IEPolys
+from core.interact import interact as io
+from core import pathex
+from core.cv2ex import *
 
 
 def save_faceset_metadata_folder(input_path):
@@ -19,7 +19,7 @@ def save_faceset_metadata_folder(input_path):
     io.log_info (f"Saving metadata to {str(metadata_filepath)}\r\n")
 
     d = {}
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"):
         filepath = Path(filepath)
         dflimg = DFLIMG.load (filepath)
         
@@ -52,7 +52,7 @@ def restore_faceset_metadata_folder(input_path):
     except:
         raise FileNotFoundError(filename)
 
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"):
         filepath = Path(filepath)
         
         shape, dfl_dict = d.get(filepath.name, None)
@@ -92,7 +92,7 @@ def remove_ie_polys_folder(input_path):
 
     io.log_info ("Removing ie_polys...\r\n")
 
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Removing"):
         filepath = Path(filepath)
         remove_ie_polys_file(filepath)
         
@@ -114,7 +114,7 @@ def remove_fanseg_folder(input_path):
 
     io.log_info ("Removing fanseg mask...\r\n")
 
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Removing"):
         filepath = Path(filepath)
         remove_fanseg_file(filepath)
 
@@ -150,14 +150,14 @@ def convert_png_to_jpg_folder (input_path):
 
     io.log_info ("Converting PNG to JPG...\r\n")
 
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Converting"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Converting"):
         filepath = Path(filepath)
         convert_png_to_jpg_file(filepath)
 
 def add_landmarks_debug_images(input_path):
     io.log_info ("Adding landmarks debug images...")
 
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"):
         filepath = Path(filepath)
 
         img = cv2_imread(str(filepath))
@@ -179,7 +179,7 @@ def recover_original_aligned_filename(input_path):
     io.log_info ("Recovering original aligned filename...")
 
     files = []
-    for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"):
+    for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"):
         filepath = Path(filepath)
 
         dflimg = DFLIMG.load (filepath)
diff --git a/mainscripts/VideoEd.py b/mainscripts/VideoEd.py
index ac7ebc5..cab6fcc 100644
--- a/mainscripts/VideoEd.py
+++ b/mainscripts/VideoEd.py
@@ -2,8 +2,8 @@ import subprocess
 import numpy as np
 import ffmpeg
 from pathlib import Path
-from utils import Path_utils
-from interact import interact as io
+from core import pathex
+from core.interact import interact as io
 
 def extract_video(input_file, output_dir, output_ext=None, fps=None):
     input_file_path = Path(input_file)
@@ -14,7 +14,7 @@ def extract_video(input_file, output_dir, output_ext=None, fps=None):
 
 
     if input_file_path.suffix == '.*':
-        input_file_path = Path_utils.get_first_file_by_stem (input_file_path.parent, input_file_path.stem)
+        input_file_path = pathex.get_first_file_by_stem (input_file_path.parent, input_file_path.stem)
     else:
         if not input_file_path.exists():
             input_file_path = None
@@ -24,12 +24,12 @@ def extract_video(input_file, output_dir, output_ext=None, fps=None):
         return
 
     if fps is None:
-        fps = io.input_int ("Enter FPS ( ?:help skip:fullfps ) : ", 0, help_message="How many frames of every second of the video will be extracted.")
+        fps = io.input_int ("Enter FPS", 0, help_message="How many frames of every second of the video will be extracted. 0 - full fps")
 
     if output_ext is None:
-        output_ext = io.input_str ("Output image format? ( jpg png ?:help skip:png ) : ", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.")
+        output_ext = io.input_str ("Output image format", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.")
 
-    for filename in Path_utils.get_image_paths (output_path, ['.'+output_ext]):
+    for filename in pathex.get_image_paths (output_path, ['.'+output_ext]):
         Path(filename).unlink()
 
     job = ffmpeg.input(str(input_file_path))
@@ -57,16 +57,16 @@ def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, b
     output_file_path = input_file_path.parent / (input_file_path.stem + "_cut" + input_file_path.suffix)
 
     if from_time is None:
-        from_time = io.input_str ("From time (skip: 00:00:00.000) : ", "00:00:00.000")
+        from_time = io.input_str ("From time", "00:00:00.000")
 
     if to_time is None:
-        to_time = io.input_str ("To time (skip: 00:00:00.000) : ", "00:00:00.000")
+        to_time = io.input_str ("To time", "00:00:00.000")
 
     if audio_track_id is None:
-        audio_track_id = io.input_int ("Specify audio track id. ( skip:0 ) : ", 0)
+        audio_track_id = io.input_int ("Specify audio track id.", 0)
 
     if bitrate is None:
-        bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:25) : ", 25) )
+        bitrate = max (1, io.input_int ("Bitrate of output file in MB/s", 25) )
 
     kwargs = {"c:v": "libx264",
               "b:v": "%dM" %(bitrate),
@@ -93,10 +93,10 @@ def denoise_image_sequence( input_dir, ext=None, factor=None ):
         return
 
     if ext is None:
-        ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png")
+        ext = io.input_str ("Input image format (extension)", "png")
 
     if factor is None:
-        factor = np.clip ( io.input_int ("Denoise factor? (1-20 default:5) : ", 5), 1, 20 )
+        factor = np.clip ( io.input_int ("Denoise factor?", 5, add_info="1-20"), 1, 20 )
 
     kwargs = {}
     if ext == 'jpg':
@@ -129,17 +129,17 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None,
     out_ext = output_file_path.suffix
 
     if ext is None:
-        ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png")
+        ext = io.input_str ("Input image format (extension)", "png")
 
     if lossless is None:
-        lossless = io.input_bool ("Use lossless codec ? ( default:no ) : ", False)
+        lossless = io.input_bool ("Use lossless codec", False)
 
     video_id = None
     audio_id = None
     ref_in_a = None
     if reference_file_path is not None:
         if reference_file_path.suffix == '.*':
-            reference_file_path = Path_utils.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem)
+            reference_file_path = pathex.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem)
         else:
             if not reference_file_path.exists():
                 reference_file_path = None
@@ -166,12 +166,12 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None,
 
     if fps is None:
         #if fps not specified and not overwritten by reference-file
-        fps = max (1, io.input_int ("FPS ? (default:25) : ", 25) )
+        fps = max (1, io.input_int ("Enter FPS", 25) )
 
     if not lossless and bitrate is None:
-        bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:16) : ", 16) )
+        bitrate = max (1, io.input_int ("Bitrate of output file in MB/s", 16) )
 
-    input_image_paths = Path_utils.get_image_paths(input_path)
+    input_image_paths = pathex.get_image_paths(input_path)
 
     i_in = ffmpeg.input('pipe:', format='image2pipe', r=fps)
     
diff --git a/mainscripts/dev_misc.py b/mainscripts/dev_misc.py
index 27c44e4..cb12ae9 100644
--- a/mainscripts/dev_misc.py
+++ b/mainscripts/dev_misc.py
@@ -7,10 +7,10 @@ import numpy as np
 
 from DFLIMG import *
 from facelib import FaceType, LandmarksProcessor
-from interact import interact as io
-from joblib import Subprocessor
-from utils import Path_utils
-from utils.cv2_utils import *
+from core.interact import interact as io
+from core.joblib import Subprocessor
+from core import pathex
+from core.cv2ex import *
 
 from . import Extractor, Sorter
 from .Extractor import ExtractSubprocessor
@@ -41,7 +41,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ):
 
     output_path = input_path.parent / (input_path.name + '_out')
     
-    dir_names = Path_utils.get_all_dir_names(input_path)
+    dir_names = pathex.get_all_dir_names(input_path)
     
     if not output_path.exists():
         output_path.mkdir(parents=True, exist_ok=True)
@@ -54,7 +54,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ):
         if not cur_output_path.exists():
             cur_output_path.mkdir(parents=True, exist_ok=True)
             
-        input_path_image_paths = Path_utils.get_image_paths(cur_input_path)
+        input_path_image_paths = pathex.get_image_paths(cur_input_path)
 
         for filename in input_path_image_paths:
             filename_path = Path(filename)
@@ -116,7 +116,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ):
         cur_input_path = input_path / dir_name
         cur_output_path = output_path / dir_name
         
-        input_path_image_paths = Path_utils.get_image_paths(cur_input_path)
+        input_path_image_paths = pathex.get_image_paths(cur_input_path)
         l = len(input_path_image_paths)
         #if l < 250 or l > 350:
         #    continue
@@ -176,7 +176,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ):
     
     output_path = input_path.parent / (input_path.name + '_out')
     
-    dir_names = Path_utils.get_all_dir_names(input_path)
+    dir_names = pathex.get_all_dir_names(input_path)
     
     if not output_path.exists():
         output_path.mkdir(parents=True, exist_ok=True)
@@ -188,7 +188,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ):
         cur_input_path = input_path / dir_name
         cur_output_path = output_path / dir_name
         
-        l = len(Path_utils.get_image_paths(cur_input_path))
+        l = len(pathex.get_image_paths(cur_input_path))
         if l < 250 or l > 350:
             continue
 
@@ -316,13 +316,13 @@ def apply_celebamaskhq(input_dir ):
     if not img_path.exists():
         raise ValueError(f'{str(img_path)} directory not found. Please ensure it exists.')
 
-    CelebAMASKHQSubprocessor(Path_utils.get_image_paths(img_path), 
-                             Path_utils.get_image_paths(mask_path, subdirs=True) ).run()
+    CelebAMASKHQSubprocessor(pathex.get_image_paths(img_path), 
+                             pathex.get_image_paths(mask_path, subdirs=True) ).run()
     
     return
     
     paths_to_extract = []
-    for filename in io.progress_bar_generator(Path_utils.get_image_paths(img_path), desc="Processing"):
+    for filename in io.progress_bar_generator(pathex.get_image_paths(img_path), desc="Processing"):
         filepath = Path(filename)
         dflimg = DFLIMG.load(filepath)
 
@@ -381,7 +381,7 @@ def extract_fanseg(input_dir, device_args={} ):
         raise ValueError('Input directory not found. Please ensure it exists.')
     
     paths_to_extract = []
-    for filename in Path_utils.get_image_paths(input_path) :
+    for filename in pathex.get_image_paths(input_path) :
         filepath = Path(filename)
         dflimg = DFLIMG.load ( filepath )
         if dflimg is not None:
@@ -413,7 +413,7 @@ def extract_umd_csv(input_file_csv,
     io.log_info("Output dir is %s." % (str(output_path)) )
     
     if output_path.exists():
-        output_images_paths = Path_utils.get_image_paths(output_path)
+        output_images_paths = pathex.get_image_paths(output_path)
         if len(output_images_paths) > 0:
             io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False )
             for filename in output_images_paths:
@@ -443,12 +443,7 @@ def extract_umd_csv(input_file_csv,
     data = []
     for d in csv_data:
         filename = input_file_csv_root_path / d['FILE']
-        
-        #pitch, yaw, roll = float(d['PITCH']), float(d['YAW']), float(d['ROLL']) 
-        #if pitch < -90 or pitch > 90 or yaw < -90 or yaw > 90 or roll < -90 or roll > 90:
-        #    continue
-        #    
-        #pitch_yaw_roll = pitch/90.0, yaw/90.0, roll/90.0
+
         
         x,y,w,h = float(d['FACE_X']), float(d['FACE_Y']), float(d['FACE_WIDTH']), float(d['FACE_HEIGHT'])
 
@@ -473,11 +468,11 @@ def extract_umd_csv(input_file_csv,
 def dev_test(input_dir):
     input_path = Path(input_dir)
     
-    dir_names = Path_utils.get_all_dir_names(input_path)
+    dir_names = pathex.get_all_dir_names(input_path)
     
     for dir_name in io.progress_bar_generator(dir_names, desc="Processing"):
         
-        img_paths = Path_utils.get_image_paths (input_path / dir_name)
+        img_paths = pathex.get_image_paths (input_path / dir_name)
         for filename in img_paths:
             filepath = Path(filename)
             
diff --git a/mainscripts/gfx/help_converter_masked.jpg b/mainscripts/gfx/help_converter_masked.jpg
deleted file mode 100644
index e737f54..0000000
Binary files a/mainscripts/gfx/help_converter_masked.jpg and /dev/null differ
diff --git a/mainscripts/gfx/help_converter_face_avatar.jpg b/mainscripts/gfx/help_merger_face_avatar.jpg
similarity index 100%
rename from mainscripts/gfx/help_converter_face_avatar.jpg
rename to mainscripts/gfx/help_merger_face_avatar.jpg
diff --git a/mainscripts/gfx/help_converter_face_avatar_source.psd b/mainscripts/gfx/help_merger_face_avatar_source.psd
similarity index 100%
rename from mainscripts/gfx/help_converter_face_avatar_source.psd
rename to mainscripts/gfx/help_merger_face_avatar_source.psd
diff --git a/mainscripts/gfx/help_merger_masked.jpg b/mainscripts/gfx/help_merger_masked.jpg
new file mode 100644
index 0000000..d7598d2
Binary files /dev/null and b/mainscripts/gfx/help_merger_masked.jpg differ
diff --git a/mainscripts/gfx/help_converter_masked_source.psd b/mainscripts/gfx/help_merger_masked_source.psd
similarity index 94%
rename from mainscripts/gfx/help_converter_masked_source.psd
rename to mainscripts/gfx/help_merger_masked_source.psd
index ebb3b0d..6b1e387 100644
Binary files a/mainscripts/gfx/help_converter_masked_source.psd and b/mainscripts/gfx/help_merger_masked_source.psd differ
diff --git a/converters/FrameInfo.py b/merger/FrameInfo.py
similarity index 100%
rename from converters/FrameInfo.py
rename to merger/FrameInfo.py
diff --git a/converters/ConvertAvatar.py b/merger/MergeAvatar.py
similarity index 88%
rename from converters/ConvertAvatar.py
rename to merger/MergeAvatar.py
index e5264a0..07ada6d 100644
--- a/converters/ConvertAvatar.py
+++ b/merger/MergeAvatar.py
@@ -1,9 +1,9 @@
 import cv2
 import numpy as np
 
-import imagelib
+from core import imagelib
 from facelib import FaceType, LandmarksProcessor
-from utils.cv2_utils import *
+from core.cv2ex import *
 
 def process_frame_info(frame_info, inp_sh):
     img_uint8 = cv2_imread (frame_info.filename)
@@ -14,7 +14,7 @@ def process_frame_info(frame_info, inp_sh):
     img = cv2.warpAffine( img, img_mat, inp_sh[0:2], borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC )
     return img
     
-def ConvertFaceAvatar (predictor_func, predictor_input_shape, cfg, prev_temporal_frame_infos, frame_info, next_temporal_frame_infos):
+def MergeFaceAvatar (predictor_func, predictor_input_shape, cfg, prev_temporal_frame_infos, frame_info, next_temporal_frame_infos):
     inp_sh = predictor_input_shape
 
     prev_imgs=[]
diff --git a/converters/ConvertMasked.py b/merger/MergeMasked.py
similarity index 90%
rename from converters/ConvertMasked.py
rename to merger/MergeMasked.py
index 67769cd..8aa3067 100644
--- a/converters/ConvertMasked.py
+++ b/merger/MergeMasked.py
@@ -3,12 +3,12 @@ import traceback
 import cv2
 import numpy as np
 
-import imagelib
+from core import imagelib
 from facelib import FaceType, LandmarksProcessor
-from interact import interact as io
-from utils.cv2_utils import *
+from core.interact import interact as io
+from core.cv2ex import *
 
-def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmarks):
+def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmarks):
     img_size = img_bgr.shape[1], img_bgr.shape[0]
     img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks)
 
@@ -22,7 +22,7 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i
 
     output_size = predictor_input_shape[0]
     if cfg.super_resolution_mode != 0:
-        output_size *= 2
+        output_size *= 4
 
     face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type)
     face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale   )
@@ -37,12 +37,12 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i
 
     predicted = predictor_func (predictor_input_bgr)
     if isinstance(predicted, tuple):
-        #converter return bgr,mask
+        #merger return bgr,mask
         prd_face_bgr      = np.clip (predicted[0], 0, 1.0)
         prd_face_mask_a_0 = np.clip (predicted[1], 0, 1.0)
         predictor_masked = True
     else:
-        #converter return bgr only, using dst mask
+        #merger return bgr only, using dst mask
         prd_face_bgr      = np.clip (predicted, 0, 1.0 )
         prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, predictor_input_shape[0:2] )
         predictor_masked = False
@@ -82,24 +82,7 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i
                 m = cv2.getAffineTransform(b, fanseg_rect_corner_pts)
                 FAN_dst_face_mask_a_0 = cv2.warpAffine(dst_face_fanseg_mask, m, (cfg.fanseg_input_size,)*2, flags=cv2.INTER_CUBIC )
                 FAN_dst_face_mask_a_0 = cv2.resize (FAN_dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC)
-        """
-        if cfg.mask_mode == 8: #FANCHQ-dst
-            full_face_fanchq_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, cfg.fanchq_input_size, face_type=FaceType.FULL)
-            dst_face_fanchq_bgr = cv2.warpAffine(img_bgr, full_face_fanchq_mat, (cfg.fanchq_input_size,)*2, flags=cv2.INTER_CUBIC )
-            dst_face_fanchq_mask = cfg.fanchq_extract_func( FaceType.FULL, dst_face_fanchq_bgr )
 
-            if cfg.face_type == FaceType.FULL:
-                FANCHQ_dst_face_mask_a_0 = cv2.resize (dst_face_fanchq_mask, (output_size,output_size), cv2.INTER_CUBIC)
-            else:
-                face_fanchq_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, cfg.fanchq_input_size, face_type=cfg.face_type)
-
-                fanchq_rect_corner_pts = np.array ( [ [0,0], [cfg.fanchq_input_size-1,0], [0,cfg.fanchq_input_size-1] ], dtype=np.float32 )
-                a = LandmarksProcessor.transform_points (fanchq_rect_corner_pts, face_fanchq_mat, invert=True )
-                b = LandmarksProcessor.transform_points (a, full_face_fanchq_mat )
-                m = cv2.getAffineTransform(b, fanchq_rect_corner_pts)
-                FAN_dst_face_mask_a_0 = cv2.warpAffine(dst_face_fanchq_mask, m, (cfg.fanchq_input_size,)*2, flags=cv2.INTER_CUBIC )
-                FAN_dst_face_mask_a_0 = cv2.resize (FAN_dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC)
-        """
         if cfg.mask_mode == 3:   #FAN-prd
             prd_face_mask_a_0 = FAN_prd_face_mask_a_0
         elif cfg.mask_mode == 4: #FAN-dst
@@ -350,14 +333,14 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i
     return out_img, out_merging_mask[...,0:1]
 
 
-def ConvertMasked (predictor_func, predictor_input_shape, cfg, frame_info):
+def MergeMasked (predictor_func, predictor_input_shape, cfg, frame_info):
     img_bgr_uint8 = cv2_imread(frame_info.filename)
     img_bgr_uint8 = imagelib.normalize_channels (img_bgr_uint8, 3)
     img_bgr = img_bgr_uint8.astype(np.float32) / 255.0
 
     outs = []
     for face_num, img_landmarks in enumerate( frame_info.landmarks_list ):
-        out_img, out_img_merging_mask = ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_landmarks)
+        out_img, out_img_merging_mask = MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_landmarks)
         outs += [ (out_img, out_img_merging_mask) ]
 
     #Combining multiple face outputs
diff --git a/converters/ConverterConfig.py b/merger/MergerConfig.py
similarity index 79%
rename from converters/ConverterConfig.py
rename to merger/MergerConfig.py
index b7fb095..7f3984e 100644
--- a/converters/ConverterConfig.py
+++ b/merger/MergerConfig.py
@@ -2,10 +2,10 @@ import numpy as np
 import copy
 
 from facelib import FaceType
-from interact import interact as io
+from core.interact import interact as io
 
 
-class ConverterConfig(object):
+class MergerConfig(object):
     TYPE_NONE = 0
     TYPE_MASKED = 1
     TYPE_FACE_AVATAR = 2
@@ -28,11 +28,7 @@ class ConverterConfig(object):
         self.fanseg_input_size = None
         self.fanseg_extract_func = None
 
-        self.fanchq_input_size = None
-        self.fanchq_extract_func = None
-        self.ebs_ct_func = None
-
-        self.super_res_dict = {0:"None", 1:'RankSRGAN'}
+        self.super_res_dict = {0:"None", 1:'FaceEnhancer'}
         self.sharpen_dict = {0:"None", 1:'box', 2:'gaussian'}
 
         #default changeable params
@@ -47,18 +43,18 @@ class ConverterConfig(object):
     def ask_settings(self):
         s = """Choose sharpen mode: \n"""
         for key in self.sharpen_dict.keys():
-            s += f"""({key}) {self.sharpen_dict[key]}\n"""
-        s += f"""?:help Default: {list(self.sharpen_dict.keys())[0]} : """
-        self.sharpen_mode = io.input_int (s, 0, valid_list=self.sharpen_dict.keys(), help_message="Enhance details by applying sharpen filter.")
+            s += f"""({key}) {self.sharpen_dict[key]}\n"""            
+        io.log_info(s)
+        self.sharpen_mode = io.input_int ("", 0, valid_list=self.sharpen_dict.keys(), help_message="Enhance details by applying sharpen filter.")
 
         if self.sharpen_mode != 0:
-            self.blursharpen_amount = np.clip ( io.input_int ("Choose blur/sharpen amount [-100..100] (skip:0) : ", 0), -100, 100 )
+            self.blursharpen_amount = np.clip ( io.input_int ("Choose blur/sharpen amount", 0, add_info="-100..100"), -100, 100 )
 
         s = """Choose super resolution mode: \n"""
         for key in self.super_res_dict.keys():
             s += f"""({key}) {self.super_res_dict[key]}\n"""
-        s += f"""?:help Default: {list(self.super_res_dict.keys())[0]} : """
-        self.super_resolution_mode = io.input_int (s, 0, valid_list=self.super_res_dict.keys(), help_message="Enhance details by applying superresolution network.")
+        io.log_info(s)
+        self.super_resolution_mode = io.input_int ("", 0, valid_list=self.super_res_dict.keys(), help_message="Enhance details by applying superresolution network.")
 
     def toggle_sharpen_mode(self):
         a = list( self.sharpen_dict.keys() )
@@ -85,7 +81,7 @@ class ConverterConfig(object):
     def __eq__(self, other):
         #check equality of changeable params
 
-        if isinstance(other, ConverterConfig):
+        if isinstance(other, MergerConfig):
             return self.sharpen_mode == other.sharpen_mode and \
                    self.blursharpen_amount == other.blursharpen_amount and \
                    self.super_resolution_mode == other.super_resolution_mode
@@ -130,7 +126,7 @@ half_face_mask_mode_dict = {1:'learned',
 ctm_dict = { 0: "None", 1:"rct", 2:"lct", 3:"mkl", 4:"mkl-m", 5:"idt", 6:"idt-m", 7:"sot-m", 8:"mix-m" }
 ctm_str_dict = {None:0, "rct":1, "lct":2, "mkl":3, "mkl-m":4, "idt":5, "idt-m":6, "sot-m":7, "mix-m":8 }
 
-class ConverterConfigMasked(ConverterConfig):
+class MergerConfigMasked(MergerConfig):
 
     def __init__(self, face_type=FaceType.FULL,
                        default_mode = 'overlay',
@@ -152,11 +148,11 @@ class ConverterConfigMasked(ConverterConfig):
                        **kwargs
                        ):
 
-        super().__init__(type=ConverterConfig.TYPE_MASKED, **kwargs)
+        super().__init__(type=MergerConfig.TYPE_MASKED, **kwargs)
 
         self.face_type = face_type
         if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL ]:
-            raise ValueError("ConverterConfigMasked does not support this type of face.")
+            raise ValueError("MergerConfigMasked does not support this type of face.")
 
         self.default_mode = default_mode
         self.clip_hborder_mask_per = clip_hborder_mask_per
@@ -228,58 +224,57 @@ class ConverterConfigMasked(ConverterConfig):
         s = """Choose mode: \n"""
         for key in mode_dict.keys():
             s += f"""({key}) {mode_dict[key]}\n"""
-        s += f"""Default: { mode_str_dict.get(self.default_mode, 1)  } : """
-
-        mode = io.input_int (s, mode_str_dict.get(self.default_mode, 1) )
+        io.log_info(s)
+        mode = io.input_int ("", mode_str_dict.get(self.default_mode, 1) )
 
         self.mode = mode_dict.get (mode, self.default_mode )
 
         if 'raw' not in self.mode:
             if self.mode == 'hist-match' or self.mode == 'hist-match-bw':
-                self.masked_hist_match = io.input_bool("Masked hist match? (y/n skip:y) : ", True)
+                self.masked_hist_match = io.input_bool("Masked hist match?", True)
 
             if self.mode == 'hist-match' or self.mode == 'hist-match-bw' or self.mode == 'seamless-hist-match':
-                self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold [0..255] (skip:255) :  ", 255), 0, 255)
+                self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold", 255, add_info="0..255"), 0, 255)
 
         if self.face_type == FaceType.FULL:
             s = """Choose mask mode: \n"""
             for key in full_face_mask_mode_dict.keys():
                 s += f"""({key}) {full_face_mask_mode_dict[key]}\n"""
-            s += f"""?:help Default: 1 : """
+            io.log_info(s)
 
-            self.mask_mode = io.input_int (s, 1, valid_list=full_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks.")
+            self.mask_mode = io.input_int ("", 1, valid_list=full_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks.")
         else:
             s = """Choose mask mode: \n"""
             for key in half_face_mask_mode_dict.keys():
                 s += f"""({key}) {half_face_mask_mode_dict[key]}\n"""
-            s += f"""?:help , Default: 1 : """
-            self.mask_mode = io.input_int (s, 1, valid_list=half_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images.")
+            io.log_info(s)
+            self.mask_mode = io.input_int ("", 1, valid_list=half_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images.")
 
         if 'raw' not in self.mode:
-            self.erode_mask_modifier = np.clip ( io.input_int ("Choose erode mask modifier [-400..400] (skip:%d) : " % 0, 0), -400, 400)
-            self.blur_mask_modifier =  np.clip ( io.input_int ("Choose blur mask modifier [-400..400] (skip:%d) : " % 0, 0), -400, 400)
-            self.motion_blur_power = np.clip ( io.input_int ("Choose motion blur power [0..100] (skip:%d) : " % (0), 0), 0, 100)
+            self.erode_mask_modifier = np.clip ( io.input_int ("Choose erode mask modifier", 0, add_info="-400..400"), -400, 400)
+            self.blur_mask_modifier =  np.clip ( io.input_int ("Choose blur mask modifier", 0, add_info="-400..400"), -400, 400)
+            self.motion_blur_power = np.clip ( io.input_int ("Choose motion blur power", 0, add_info="0..100"), 0, 100)
 
-        self.output_face_scale = np.clip (io.input_int ("Choose output face scale modifier [-50..50] (skip:0) : ", 0), -50, 50)
+        self.output_face_scale = np.clip (io.input_int ("Choose output face scale modifier", 0, add_info="-50..50" ), -50, 50)
 
         if 'raw' not in self.mode:
-            self.color_transfer_mode = io.input_str ( f"Apply color transfer to predicted face? Choose mode ( {' / '.join ([str(x) for x in list(ctm_str_dict.keys())])} skip:None ) : ", None, ctm_str_dict.keys() )
+            self.color_transfer_mode = io.input_str ( "Color transfer to predicted face", None, valid_list=list(ctm_str_dict.keys())[1:] )
             self.color_transfer_mode = ctm_str_dict[self.color_transfer_mode]
 
         super().ask_settings()
 
         if 'raw' not in self.mode:
-            self.image_denoise_power = np.clip ( io.input_int ("Choose image degrade by denoise power [0..500] (skip:%d) : " % (0), 0), 0, 500)
-            self.bicubic_degrade_power = np.clip ( io.input_int ("Choose image degrade by bicubic rescale power [0..100] (skip:%d) : " % (0), 0), 0, 100)
-            self.color_degrade_power = np.clip (  io.input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100)
-            self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask? (y/n skip:n) : ", False)
+            self.image_denoise_power = np.clip ( io.input_int ("Choose image degrade by denoise power", 0, add_info="0..500"), 0, 500)
+            self.bicubic_degrade_power = np.clip ( io.input_int ("Choose image degrade by bicubic rescale power", 0, add_info="0..100"), 0, 100)
+            self.color_degrade_power = np.clip (  io.input_int ("Degrade color power of final image", 0, add_info="0..100"), 0, 100)
+            self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask?", False)
 
         io.log_info ("")
 
     def __eq__(self, other):
         #check equality of changeable params
 
-        if isinstance(other, ConverterConfigMasked):
+        if isinstance(other, MergerConfigMasked):
             return super().__eq__(other) and \
                    self.mode == other.mode and \
                    self.masked_hist_match == other.masked_hist_match and \
@@ -299,7 +294,7 @@ class ConverterConfigMasked(ConverterConfig):
 
     def to_string(self, filename):
         r = (
-            f"""ConverterConfig {filename}:\n"""
+            f"""MergerConfig {filename}:\n"""
             f"""Mode: {self.mode}\n"""
             )
 
@@ -337,11 +332,11 @@ class ConverterConfigMasked(ConverterConfig):
         return r
 
 
-class ConverterConfigFaceAvatar(ConverterConfig):
+class MergerConfigFaceAvatar(MergerConfig):
 
     def __init__(self, temporal_face_count=0,
                        add_source_image=False):
-        super().__init__(type=ConverterConfig.TYPE_FACE_AVATAR)
+        super().__init__(type=MergerConfig.TYPE_FACE_AVATAR)
         self.temporal_face_count = temporal_face_count
 
         #changeable params
@@ -352,7 +347,7 @@ class ConverterConfigFaceAvatar(ConverterConfig):
 
     #override
     def ask_settings(self):
-        self.add_source_image = io.input_bool("Add source image? (y/n ?:help skip:n) : ", False, help_message="Add source image for comparison.")
+        self.add_source_image = io.input_bool("Add source image?", False, help_message="Add source image for comparison.")
         super().ask_settings()
 
     def toggle_add_source_image(self):
@@ -362,7 +357,7 @@ class ConverterConfigFaceAvatar(ConverterConfig):
     def __eq__(self, other):
         #check equality of changeable params
 
-        if isinstance(other, ConverterConfigFaceAvatar):
+        if isinstance(other, MergerConfigFaceAvatar):
             return super().__eq__(other) and \
                    self.add_source_image == other.add_source_image
 
@@ -370,7 +365,7 @@ class ConverterConfigFaceAvatar(ConverterConfig):
 
     #override
     def to_string(self, filename):
-        return (f"ConverterConfig {filename}:\n"
+        return (f"MergerConfig {filename}:\n"
                 f"add_source_image : {self.add_source_image}\n") + \
                 super().to_string(filename) + "================"
 
diff --git a/merger/__init__.py b/merger/__init__.py
new file mode 100644
index 0000000..e21651e
--- /dev/null
+++ b/merger/__init__.py
@@ -0,0 +1,4 @@
+from .FrameInfo import FrameInfo
+from .MergerConfig import MergerConfig, MergerConfigMasked, MergerConfigFaceAvatar
+from .MergeMasked import MergeMasked
+from .MergeAvatar import MergeFaceAvatar
diff --git a/models/ModelBase.py b/models/ModelBase.py
index c7027d1..1b43099 100644
--- a/models/ModelBase.py
+++ b/models/ModelBase.py
@@ -1,162 +1,179 @@
 import colorsys
 import inspect
 import json
+import operator
 import os
 import pickle
 import shutil
+import tempfile
 import time
 from pathlib import Path
 
 import cv2
 import numpy as np
 
-import imagelib
-from interact import interact as io
-from nnlib import nnlib
+from core import imagelib
+from core.interact import interact as io
+from core.leras import nn
 from samplelib import SampleGeneratorBase
-from utils import Path_utils, std_utils
-from utils.cv2_utils import *
+from core import pathex
+from core.cv2ex import *
+
 
-'''
-You can implement your own model. Check examples.
-'''
 class ModelBase(object):
-
-
-    def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, pretraining_data_path=None, is_training=False, debug = False, no_preview=False, device_args = None,
-                 ask_enable_autobackup=True,
-                 ask_write_preview_history=True,
-                 ask_target_iter=True,
-                 ask_batch_size=True,
-                 ask_random_flip=True, **kwargs):
-
-        device_args['force_gpu_idx'] = device_args.get('force_gpu_idx',-1)
-        device_args['cpu_only'] = True if debug else device_args.get('cpu_only',False)
-
-        if device_args['force_gpu_idx'] == -1 and not device_args['cpu_only']:
-            idxs_names_list = nnlib.device.getValidDevicesIdxsWithNamesList()
-            if len(idxs_names_list) > 1:
-                io.log_info ("You have multi GPUs in a system: ")
-                for idx, name in idxs_names_list:
-                    io.log_info ("[%d] : %s" % (idx, name) )
-
-                device_args['force_gpu_idx'] = io.input_int("Which GPU idx to choose? ( skip: best GPU ) : ", -1, [ x[0] for x in idxs_names_list] )
-        self.device_args = device_args
-
-        self.device_config = nnlib.DeviceConfig(allow_growth=True, **self.device_args)
-
-        io.log_info ("Loading model...")
-
-        self.model_path = model_path
-        self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') )
-
+    def __init__(self, is_training=False,
+                       saved_models_path=None,
+                       training_data_src_path=None,
+                       training_data_dst_path=None,
+                       pretraining_data_path=None,
+                       pretrained_model_path=None,
+                       no_preview=False,
+                       force_model_name=None,
+                       force_gpu_idxs=None,
+                       cpu_only=False,
+                       debug=False,
+                       **kwargs):
+        self.is_training = is_training
+        self.saved_models_path = saved_models_path
         self.training_data_src_path = training_data_src_path
         self.training_data_dst_path = training_data_dst_path
         self.pretraining_data_path = pretraining_data_path
-
-        self.debug = debug
+        self.pretrained_model_path = pretrained_model_path
         self.no_preview = no_preview
-        self.is_training_mode = is_training
+        self.debug = debug
+
+        self.model_class_name = model_class_name = Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1]
+
+        if force_model_name is not None:
+            self.model_name = force_model_name
+        else:
+            while True:
+                # gather all model dat files
+                saved_models_names = []
+                for filepath in pathex.get_file_paths(saved_models_path):
+                    filepath_name = filepath.name
+                    if filepath_name.endswith(f'{model_class_name}_data.dat'):
+                        saved_models_names += [ (filepath_name.split('_')[0], os.path.getmtime(filepath)) ]
+
+                # sort by modified datetime
+                saved_models_names = sorted(saved_models_names, key=operator.itemgetter(1), reverse=True )
+                saved_models_names = [ x[0] for x in saved_models_names ]
+
+                if len(saved_models_names) != 0:
+                    io.log_info ("Choose one of saved models, or enter a name to create a new model.")
+                    io.log_info ("[r] : rename")
+                    io.log_info ("[d] : delete")
+                    io.log_info ("")
+                    for i, model_name in enumerate(saved_models_names):
+                        s = f"[{i}] : {model_name} "
+                        if i == 0:
+                            s += "- latest"
+                        io.log_info (s)                    
+
+                    inp = io.input_str(f"", "0", show_default_value=False )
+                    model_idx = -1
+                    try:
+                        model_idx = np.clip ( int(inp), 0, len(saved_models_names)-1 )
+                    except:
+                        pass
+
+                    if model_idx == -1:
+                        if len(inp) == 1:
+                            is_rename = inp[0] == 'r'
+                            is_delete = inp[0] == 'd'
+                            
+                            if is_rename or is_delete:
+                                if len(saved_models_names) != 0:
+                                    
+                                    if is_rename:
+                                        name = io.input_str(f"Enter the name of the model you want to rename")
+                                    elif is_delete:
+                                        name = io.input_str(f"Enter the name of the model you want to delete")                                    
+                                    
+                                    if name in saved_models_names:
+                                        
+                                        if is_rename:
+                                            new_model_name = io.input_str(f"Enter new name of the model")
+
+                                        for filepath in pathex.get_file_paths(saved_models_path):
+                                            filepath_name = filepath.name
+                                            
+                                            model_filename, remain_filename = filepath_name.split('_', 1)
+                                            if model_filename == name:
+                                                
+                                                if is_rename:                                                
+                                                    new_filepath = filepath.parent / ( new_model_name + '_' + remain_filename )
+                                                    filepath.rename (new_filepath)
+                                                elif is_delete:
+                                                    filepath.unlink()
+                                continue
+
+                        self.model_name = inp
+                    else:
+                        self.model_name = saved_models_names[model_idx]
+
+                else:
+                    self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "noname")
+
+                break
+
+        self.model_name = self.model_name + '_' + self.model_class_name
 
         self.iter = 0
         self.options = {}
         self.loss_history = []
         self.sample_for_preview = None
+        self.choosed_gpu_indexes = None
 
         model_data = {}
+        self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') )
         if self.model_data_path.exists():
+            io.log_info (f"Loading {self.model_name} model...")
             model_data = pickle.loads ( self.model_data_path.read_bytes() )
-            self.iter = max( model_data.get('iter',0), model_data.get('epoch',0) )
-            if 'epoch' in self.options:
-                self.options.pop('epoch')
+            self.iter = model_data.get('iter',0)
             if self.iter != 0:
                 self.options = model_data['options']
                 self.loss_history = model_data.get('loss_history', [])
                 self.sample_for_preview = model_data.get('sample_for_preview', None)
+                self.choosed_gpu_indexes = model_data.get('choosed_gpu_indexes', None)
 
-        ask_override = self.is_training_mode and self.iter != 0 and io.input_in_time ("Press enter in 2 seconds to override model settings.", 5 if io.is_colab() else 2 )
-
-        yn_str = {True:'y',False:'n'}
-
-        if self.iter == 0:
+        if self.is_first_run():
             io.log_info ("\nModel first run.")
 
-        if ask_enable_autobackup and (self.iter == 0 or ask_override):
-            default_autobackup = False if self.iter == 0 else self.options.get('autobackup',False)
-            self.options['autobackup'] = io.input_bool("Enable autobackup? (y/n ?:help skip:%s) : " % (yn_str[default_autobackup]) , default_autobackup, help_message="Autobackup model files with preview every hour for last 15 hours. Latest backup located in model/<>_autobackups/01")
-        else:
-            self.options['autobackup'] = self.options.get('autobackup', False)
+        self.device_config = nn.DeviceConfig.GPUIndexes( force_gpu_idxs or nn.ask_choose_device_idxs(suggest_best_multi_gpu=True)) \
+                             if not cpu_only else nn.DeviceConfig.CPU()
 
-        if ask_write_preview_history and (self.iter == 0 or ask_override):
-            default_write_preview_history = False if self.iter == 0 else self.options.get('write_preview_history',False)
-            self.options['write_preview_history'] = io.input_bool("Write preview history? (y/n ?:help skip:%s) : " % (yn_str[default_write_preview_history]) , default_write_preview_history, help_message="Preview history will be writed to <ModelName>_history folder.")
-        else:
-            self.options['write_preview_history'] = self.options.get('write_preview_history', False)
+        nn.initialize(self.device_config)
 
-        if (self.iter == 0 or ask_override) and self.options['write_preview_history'] and io.is_support_windows():
-            choose_preview_history = io.input_bool("Choose image for the preview history? (y/n skip:%s) : " % (yn_str[False]) , False)
-        elif (self.iter == 0 or ask_override) and self.options['write_preview_history'] and io.is_colab():
-            choose_preview_history = io.input_bool("Randomly choose new image for preview history? (y/n ?:help skip:%s) : " % (yn_str[False]), False, help_message="Preview image history will stay stuck with old faces if you reuse the same model on different celebs. Choose no unless you are changing src/dst to a new person")
-        else:
-            choose_preview_history = False
+        ####
+        self.default_options_path = saved_models_path / f'{self.model_class_name}_default_options.dat'
+        self.default_options = {}
+        if self.default_options_path.exists():
+            try:
+                self.default_options = pickle.loads ( self.default_options_path.read_bytes() )
+            except:
+                pass
 
-        if ask_target_iter:
-            if (self.iter == 0 or ask_override):
-                self.options['target_iter'] = max(0, io.input_int("Target iteration (skip:unlimited/default) : ", 0))
-            else:
-                self.options['target_iter'] = max(model_data.get('target_iter',0), self.options.get('target_epoch',0))
-                if 'target_epoch' in self.options:
-                    self.options.pop('target_epoch')
+        self.choose_preview_history = False
+        self.batch_size = self.load_or_def_option('batch_size', 1)
+        #####
 
-        if ask_batch_size and (self.iter == 0 or ask_override):
-            default_batch_size = 0 if self.iter == 0 else self.options.get('batch_size',0)
-            self.batch_size = max(0, io.input_int("Batch_size (?:help skip:%d) : " % (default_batch_size), default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually."))
-        else:
-            self.batch_size = self.options.get('batch_size', 0)
-
-        if ask_random_flip:
-            default_random_flip = self.options.get('random_flip', True)
-            if (self.iter == 0 or ask_override):
-                self.options['random_flip'] = io.input_bool(f"Flip faces randomly? (y/n ?:help skip:{yn_str[default_random_flip]}) : ", default_random_flip, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.")
-            else:
-                self.options['random_flip'] = self.options.get('random_flip', default_random_flip)
+        self.on_initialize_options()
+        if self.is_first_run():
+            # save as default options only for first run model initialize
+            self.default_options_path.write_bytes( pickle.dumps (self.options) )
 
         self.autobackup = self.options.get('autobackup', False)
-        if not self.autobackup and 'autobackup' in self.options:
-            self.options.pop('autobackup')
-
         self.write_preview_history = self.options.get('write_preview_history', False)
-        if not self.write_preview_history and 'write_preview_history' in self.options:
-            self.options.pop('write_preview_history')
-
         self.target_iter = self.options.get('target_iter',0)
-        if self.target_iter == 0 and 'target_iter' in self.options:
-            self.options.pop('target_iter')
-
-        #self.batch_size = self.options.get('batch_size',0)
-        self.sort_by_yaw = self.options.get('sort_by_yaw',False)
         self.random_flip = self.options.get('random_flip',True)
 
-        self.onInitializeOptions(self.iter == 0, ask_override)
-
-        nnlib.import_all(self.device_config)
-        self.keras = nnlib.keras
-        self.K = nnlib.keras.backend
-
-        self.onInitialize()
-
+        self.on_initialize()
         self.options['batch_size'] = self.batch_size
-
-        if self.debug or self.batch_size == 0:
-            self.batch_size = 1
-
-        if self.is_training_mode:
-            if self.device_args['force_gpu_idx'] == -1:
-                self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) )
-                self.autobackups_path = self.model_path / ( '%s_autobackups' % (self.get_model_name()) )
-            else:
-                self.preview_history_path = self.model_path / ( '%d_%s_history' % (self.device_args['force_gpu_idx'], self.get_model_name()) )
-                self.autobackups_path = self.model_path / ( '%d_%s_autobackups' % (self.device_args['force_gpu_idx'], self.get_model_name()) )
+        
+        if self.is_training:
+            self.preview_history_path = self.saved_models_path / ( f'{self.get_model_name()}_history' )
+            self.autobackups_path     = self.saved_models_path / ( f'{self.get_model_name()}_autobackups' )
 
             if self.autobackup:
                 self.autobackup_current_hour = time.localtime().tm_hour
@@ -169,7 +186,7 @@ class ModelBase(object):
                     self.preview_history_path.mkdir(exist_ok=True)
                 else:
                     if self.iter == 0:
-                        for filename in Path_utils.get_image_paths(self.preview_history_path):
+                        for filename in pathex.get_image_paths(self.preview_history_path):
                             Path(filename).unlink()
 
             if self.generator_list is None:
@@ -179,15 +196,15 @@ class ModelBase(object):
                     if not isinstance(generator, SampleGeneratorBase):
                         raise ValueError('training data generator is not subclass of SampleGeneratorBase')
 
-            if self.sample_for_preview is None or choose_preview_history:
-                if choose_preview_history and io.is_support_windows():
+            if self.sample_for_preview is None or self.choose_preview_history:
+                if self.choose_preview_history and io.is_support_windows():
                     io.log_info ("Choose image for the preview history. [p] - next. [enter] - confirm.")
                     wnd_name = "[p] - next. [enter] - confirm."
                     io.named_window(wnd_name)
                     io.capture_keys(wnd_name)
                     choosed = False
                     while not choosed:
-                        self.sample_for_preview = self.generate_next_sample()
+                        self.sample_for_preview = self.generate_next_samples()
                         preview = self.get_static_preview()
                         io.show_image( wnd_name, (preview*255).astype(np.uint8) )
 
@@ -207,73 +224,66 @@ class ModelBase(object):
 
                     io.destroy_window(wnd_name)
                 else:
-                    self.sample_for_preview = self.generate_next_sample()
+                    self.sample_for_preview = self.generate_next_samples()
 
             try:
                 self.get_static_preview()
             except:
-                self.sample_for_preview = self.generate_next_sample()
+                self.sample_for_preview = self.generate_next_samples()
 
             self.last_sample = self.sample_for_preview
 
-        ###Generate text summary of model hyperparameters
-        #Find the longest key name and value string. Used as column widths.
-        width_name = max([len(k) for k in self.options.keys()] + [17]) + 1 # Single space buffer to left edge. Minimum of 17, the length of the longest static string used "Current iteration"
-        width_value = max([len(str(x)) for x in self.options.values()] + [len(str(self.iter)), len(self.get_model_name())]) + 1 # Single space buffer to right edge
-        if not self.device_config.cpu_only: #Check length of GPU names
-            width_value = max([len(nnlib.device.getDeviceName(idx))+1 for idx in self.device_config.gpu_idxs] + [width_value])
-        width_total = width_name + width_value + 2 #Plus 2 for ": "
+        io.log_info( self.get_summary_text() )
 
-        model_summary_text = []
-        model_summary_text += [f'=={" Model Summary ":=^{width_total}}=='] # Model/status summary
-        model_summary_text += [f'=={" "*width_total}==']
-        model_summary_text += [f'=={"Model name": >{width_name}}: {self.get_model_name(): <{width_value}}=='] # Name
-        model_summary_text += [f'=={" "*width_total}==']
-        model_summary_text += [f'=={"Current iteration": >{width_name}}: {str(self.iter): <{width_value}}=='] # Iter
-        model_summary_text += [f'=={" "*width_total}==']
+    def load_or_def_option(self, name, def_value):
+        options_val = self.options.get(name, None)
+        if options_val is not None:
+            return options_val
 
-        model_summary_text += [f'=={" Model Options ":-^{width_total}}=='] # Model options
-        model_summary_text += [f'=={" "*width_total}==']
-        for key in self.options.keys():
-            model_summary_text += [f'=={key: >{width_name}}: {str(self.options[key]): <{width_value}}=='] # self.options key/value pairs
-        model_summary_text += [f'=={" "*width_total}==']
+        def_opt_val = self.default_options.get(name, None)
+        if def_opt_val is not None:
+            return def_opt_val
 
-        model_summary_text += [f'=={" Running On ":-^{width_total}}=='] # Training hardware info
-        model_summary_text += [f'=={" "*width_total}==']
-        if self.device_config.multi_gpu:
-            model_summary_text += [f'=={"Using multi_gpu": >{width_name}}: {"True": <{width_value}}=='] # multi_gpu
-            model_summary_text += [f'=={" "*width_total}==']
-        if self.device_config.cpu_only:
-            model_summary_text += [f'=={"Using device": >{width_name}}: {"CPU": <{width_value}}=='] # cpu_only
-        else:
-            for idx in self.device_config.gpu_idxs:
-                model_summary_text += [f'=={"Device index": >{width_name}}: {idx: <{width_value}}=='] # GPU hardware device index
-                model_summary_text += [f'=={"Name": >{width_name}}: {nnlib.device.getDeviceName(idx): <{width_value}}=='] # GPU name
-                vram_str = f'{nnlib.device.getDeviceVRAMTotalGb(idx):.2f}GB' # GPU VRAM - Formated as #.## (or ##.##)
-                model_summary_text += [f'=={"VRAM": >{width_name}}: {vram_str: <{width_value}}==']
-        model_summary_text += [f'=={" "*width_total}==']
-        model_summary_text += [f'=={"="*width_total}==']
+        return def_value
 
-        if not self.device_config.cpu_only and self.device_config.gpu_vram_gb[0] <= 2: # Low VRAM warning
-            model_summary_text += ["/!\\"]
-            model_summary_text += ["/!\\ WARNING:"]
-            model_summary_text += ["/!\\ You are using a GPU with 2GB or less VRAM. This may significantly reduce the quality of your result!"]
-            model_summary_text += ["/!\\ If training does not start, close all programs and try again."]
-            model_summary_text += ["/!\\ Also you can disable Windows Aero Desktop to increase available VRAM."]
-            model_summary_text += ["/!\\"]
+    def ask_override(self):
+        return self.is_training and self.iter != 0 and io.input_in_time ("Press enter in 2 seconds to override model settings.", 5 if io.is_colab() else 2 )
+
+    def ask_enable_autobackup(self):
+        default_autobackup = self.options['autobackup'] = self.load_or_def_option('autobackup', False)
+        self.options['autobackup'] = io.input_bool(f"Enable autobackup", default_autobackup, help_message="Autobackup model files with preview every hour for last 15 hours. Latest backup located in model/<>_autobackups/01")
+
+    def ask_write_preview_history(self):
+        default_write_preview_history = self.load_or_def_option('write_preview_history', False)
+        self.options['write_preview_history'] = io.input_bool(f"Write preview history", default_write_preview_history, help_message="Preview history will be writed to <ModelName>_history folder.")
+
+        if self.options['write_preview_history']:
+            if io.is_support_windows():
+                self.choose_preview_history = io.input_bool("Choose image for the preview history", False)
+            elif io.is_colab():
+                self.choose_preview_history = io.input_bool("Randomly choose new image for preview history", False, help_message="Preview image history will stay stuck with old faces if you reuse the same model on different celebs. Choose no unless you are changing src/dst to a new person")
+
+    def ask_target_iter(self):
+        default_target_iter = self.load_or_def_option('target_iter', 0)
+        self.options['target_iter'] = max(0, io.input_int("Target iteration", default_target_iter))
+
+    def ask_random_flip(self):
+        default_random_flip = self.load_or_def_option('random_flip', True)
+        self.options['random_flip'] = io.input_bool("Flip faces randomly", default_random_flip, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.")
+
+    def ask_batch_size(self, suggest_batch_size=None):
+        default_batch_size = self.load_or_def_option('batch_size', suggest_batch_size or self.batch_size)
+        self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually."))
 
-        model_summary_text = "\n".join (model_summary_text)
-        self.model_summary_text = model_summary_text
-        io.log_info(model_summary_text)
 
     #overridable
-    def onInitializeOptions(self, is_first_run, ask_override):
+    def on_initialize_options(self):
         pass
 
     #overridable
-    def onInitialize(self):
+    def on_initialize(self):
         '''
-        initialize your keras models
+        initialize your models
 
         store and retrieve your model options in self.options['']
 
@@ -283,12 +293,12 @@ class ModelBase(object):
 
     #overridable
     def onSave(self):
-        #save your keras models here
+        #save your models here
         pass
 
     #overridable
     def onTrainOneIter(self, sample, generator_list):
-        #train your keras models here
+        #train your models here
 
         #return array of losses
         return ( ('loss_src', 0), ('loss_dst', 0) )
@@ -301,42 +311,26 @@ class ModelBase(object):
 
     #overridable if you want model name differs from folder name
     def get_model_name(self):
-        return Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1]
+        return self.model_name
 
     #overridable , return [ [model, filename],... ]  list
     def get_model_filename_list(self):
         return []
 
     #overridable
-    def get_ConverterConfig(self):
-        #return predictor_func, predictor_input_shape, ConverterConfig() for the model
+    def get_MergerConfig(self):
+        #return predictor_func, predictor_input_shape, MergerConfig() for the model
         raise NotImplementedError
 
+    def get_pretraining_data_path(self):
+        return self.pretraining_data_path
+        
     def get_target_iter(self):
         return self.target_iter
 
     def is_reached_iter_goal(self):
         return self.target_iter != 0 and self.iter >= self.target_iter
 
-    #multi gpu in keras actually is fake and doesn't work for training https://github.com/keras-team/keras/issues/11976
-    #def to_multi_gpu_model_if_possible (self, models_list):
-    #    if len(self.device_config.gpu_idxs) > 1:
-    #        #make batch_size to divide on GPU count without remainder
-    #        self.batch_size = int( self.batch_size / len(self.device_config.gpu_idxs) )
-    #        if self.batch_size == 0:
-    #            self.batch_size = 1
-    #        self.batch_size *= len(self.device_config.gpu_idxs)
-    #
-    #        result = []
-    #        for model in models_list:
-    #            for i in range( len(model.output_names) ):
-    #                model.output_names = 'output_%d' % (i)
-    #            result += [ nnlib.keras.utils.multi_gpu_model( model, self.device_config.gpu_idxs ) ]
-    #
-    #        return result
-    #    else:
-    #        return models_list
-
     def get_previews(self):
         return self.onGetPreview ( self.last_sample )
 
@@ -345,21 +339,23 @@ class ModelBase(object):
 
     def save(self):
         summary_path = self.get_strpath_storage_for_file('summary.txt')
-        Path( summary_path ).write_text(self.model_summary_text)
+        Path( summary_path ).write_text( self.get_summary_text() )
+
         self.onSave()
 
         model_data = {
             'iter': self.iter,
             'options': self.options,
             'loss_history': self.loss_history,
-            'sample_for_preview' : self.sample_for_preview
+            'sample_for_preview' : self.sample_for_preview,
+            'choosed_gpu_indexes' : self.choosed_gpu_indexes,
         }
-        self.model_data_path.write_bytes( pickle.dumps(model_data) )
-
-        bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ]
-        bckp_filename_list += [ str(summary_path), str(self.model_data_path) ]
+        pathex.write_bytes_safe (self.model_data_path, pickle.dumps(model_data) )
 
         if self.autobackup:
+            bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ]
+            bckp_filename_list += [ str(summary_path), str(self.model_data_path) ]
+
             current_hour = time.localtime().tm_hour
             if self.autobackup_current_hour != current_hour:
                 self.autobackup_current_hour = current_hour
@@ -373,10 +369,10 @@ class ModelBase(object):
 
                     if idx_backup_path.exists():
                         if i == 15:
-                            Path_utils.delete_all_files(idx_backup_path)
+                            pathex.delete_all_files(idx_backup_path)
                         else:
                             next_idx_packup_path.mkdir(exist_ok=True)
-                            Path_utils.move_all_files (idx_backup_path, next_idx_packup_path)
+                            pathex.move_all_files (idx_backup_path, next_idx_packup_path)
 
                     if i == 1:
                         idx_backup_path.mkdir(exist_ok=True)
@@ -394,97 +390,6 @@ class ModelBase(object):
                             img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8)
                             cv2_imwrite (filepath, img )
 
-    def load_weights_safe(self, model_filename_list, optimizer_filename_list=[]):
-        exec(nnlib.code_import_all, locals(), globals())
-
-        loaded = []
-        not_loaded = []
-        for mf in model_filename_list:
-            model, filename = mf
-            filename = self.get_strpath_storage_for_file(filename)
-
-            if Path(filename).exists():
-                loaded += [ mf ]
-
-                if issubclass(model.__class__, keras.optimizers.Optimizer):
-                    opt = model
-
-                    try:
-                        with open(filename, "rb") as f:
-                            fd = pickle.loads(f.read())
-
-                        weights = fd.get('weights', None)
-                        if weights is not None:
-                            opt.set_weights(weights)
-
-                    except Exception as e:
-                        print ("Unable to load ", filename)
-
-                else:
-                    model.load_weights(filename)
-            else:
-                not_loaded += [ mf ]
-
-
-        return loaded, not_loaded
-
-    def save_weights_safe(self, model_filename_list):
-        exec(nnlib.code_import_all, locals(), globals())
-
-        for model, filename in model_filename_list:
-            filename = self.get_strpath_storage_for_file(filename) + '.tmp'
-
-            if issubclass(model.__class__, keras.optimizers.Optimizer):
-                opt = model
-
-                try:
-                    fd = {}
-                    symbolic_weights = getattr(opt, 'weights')
-                    if symbolic_weights:
-                        fd['weights'] = self.K.batch_get_value(symbolic_weights)
-
-                    with open(filename, 'wb') as f:
-                        f.write( pickle.dumps(fd) )
-                except Exception as e:
-                    print ("Unable to save ", filename)
-            else:
-                model.save_weights( filename)
-
-        rename_list = model_filename_list
-
-        """
-        #unused
-        , optimizer_filename_list=[]
-        if len(optimizer_filename_list) != 0:
-            opt_filename = self.get_strpath_storage_for_file('opt.h5')
-
-            try:
-                d = {}
-                for opt, filename in optimizer_filename_list:
-                    fd = {}
-                    symbolic_weights = getattr(opt, 'weights')
-                    if symbolic_weights:
-                        fd['weights'] = self.K.batch_get_value(symbolic_weights)
-
-                    d[filename] = fd
-
-                with open(opt_filename+'.tmp', 'wb') as f:
-                    f.write( pickle.dumps(d) )
-
-                rename_list += [('', 'opt.h5')]
-            except Exception as e:
-                print ("Unable to save ", opt_filename)
-        """
-
-        for _, filename in rename_list:
-            filename = self.get_strpath_storage_for_file(filename)
-            source_filename = Path(filename+'.tmp')
-            if source_filename.exists():
-                target_filename = Path(filename)
-                if target_filename.exists():
-                    target_filename.unlink()
-                source_filename.rename ( str(target_filename) )
-
     def debug_one_iter(self):
         images = []
         for generator in self.generator_list:
@@ -494,19 +399,15 @@ class ModelBase(object):
 
         return imagelib.equalize_and_stack_square (images)
 
-    def generate_next_sample(self):
-        return [ generator.generate_next() for generator in self.generator_list]
-
-    #overridable
-    def on_success_train_one_iter(self):
-        pass
+    def generate_next_samples(self):
+        self.last_sample = sample = [ generator.generate_next() for generator in self.generator_list]
+        return sample
 
     def train_one_iter(self):
-        sample = self.generate_next_sample()
+
         iter_time = time.time()
-        losses = self.onTrainOneIter(sample, self.generator_list)
+        losses = self.onTrainOneIter()
         iter_time = time.time() - iter_time
-        self.last_sample = sample
 
         self.loss_history.append ( [float(loss[1]) for loss in losses] )
 
@@ -527,17 +428,15 @@ class ModelBase(object):
                 img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8)
                 cv2_imwrite (filepath, img )
 
-        self.on_success_train_one_iter()
-
         self.iter += 1
 
         return self.iter, iter_time
 
     def pass_one_iter(self):
-        self.last_sample = self.generate_next_sample()
+        self.generate_next_samples()
 
     def finalize(self):
-        nnlib.finalize_all()
+        nn.tf_close_session()
 
     def is_first_run(self):
         return self.iter == 0
@@ -554,6 +453,10 @@ class ModelBase(object):
     def get_iter(self):
         return self.iter
 
+    def set_iter(self, iter):
+        self.iter = iter
+        self.loss_history = self.loss_history[:iter]
+
     def get_loss_history(self):
         return self.loss_history
 
@@ -564,30 +467,48 @@ class ModelBase(object):
         return self.generator_list
 
     def get_model_root_path(self):
-        return self.model_path
+        return self.saved_models_path
 
     def get_strpath_storage_for_file(self, filename):
-        if self.device_args['force_gpu_idx'] == -1:
-            return str( self.model_path / ( self.get_model_name() + '_' + filename) )
-        else:
-            return str( self.model_path / ( str(self.device_args['force_gpu_idx']) + '_' + self.get_model_name() + '_' + filename) )
+        return str( self.saved_models_path / ( self.get_model_name() + '_' + filename) )
 
-    def set_vram_batch_requirements (self, d):
-        #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48}
-        keys = [x for x in d.keys()]
+    def get_summary_text(self):
+        ###Generate text summary of model hyperparameters
+        #Find the longest key name and value string. Used as column widths.
+        width_name = max([len(k) for k in self.options.keys()] + [17]) + 1 # Single space buffer to left edge. Minimum of 17, the length of the longest static string used "Current iteration"
+        width_value = max([len(str(x)) for x in self.options.values()] + [len(str(self.get_iter())), len(self.get_model_name())]) + 1 # Single space buffer to right edge
+        if not self.device_config.cpu_only: #Check length of GPU names
+            width_value = max([len(device.name)+1 for device in self.device_config.devices] + [width_value])
+        width_total = width_name + width_value + 2 #Plus 2 for ": "
 
+        summary_text = []
+        summary_text += [f'=={" Model Summary ":=^{width_total}}=='] # Model/status summary
+        summary_text += [f'=={" "*width_total}==']
+        summary_text += [f'=={"Model name": >{width_name}}: {self.get_model_name(): <{width_value}}=='] # Name
+        summary_text += [f'=={" "*width_total}==']
+        summary_text += [f'=={"Current iteration": >{width_name}}: {str(self.get_iter()): <{width_value}}=='] # Iter
+        summary_text += [f'=={" "*width_total}==']
+
+        summary_text += [f'=={" Model Options ":-^{width_total}}=='] # Model options
+        summary_text += [f'=={" "*width_total}==']
+        for key in self.options.keys():
+            summary_text += [f'=={key: >{width_name}}: {str(self.options[key]): <{width_value}}=='] # self.options key/value pairs
+        summary_text += [f'=={" "*width_total}==']
+
+        summary_text += [f'=={" Running On ":-^{width_total}}=='] # Training hardware info
+        summary_text += [f'=={" "*width_total}==']
         if self.device_config.cpu_only:
-            if self.batch_size == 0:
-                self.batch_size = 2
+            summary_text += [f'=={"Using device": >{width_name}}: {"CPU": <{width_value}}=='] # cpu_only
         else:
-            if self.batch_size == 0:
-                for x in keys:
-                    if self.device_config.gpu_vram_gb[0] <= x:
-                        self.batch_size = d[x]
-                        break
-
-                if self.batch_size == 0:
-                    self.batch_size = d[ keys[-1] ]
+            for device in self.device_config.devices:
+                summary_text += [f'=={"Device index": >{width_name}}: {device.index: <{width_value}}=='] # GPU hardware device index
+                summary_text += [f'=={"Name": >{width_name}}: {device.name: <{width_value}}=='] # GPU name
+                vram_str = f'{device.total_mem_gb:.2f}GB' # GPU VRAM - Formated as #.## (or ##.##)
+                summary_text += [f'=={"VRAM": >{width_name}}: {vram_str: <{width_value}}==']
+        summary_text += [f'=={" "*width_total}==']
+        summary_text += [f'=={"="*width_total}==']
+        summary_text = "\n".join (summary_text)
+        return summary_text
 
     @staticmethod
     def get_loss_history_preview(loss_history, iter, w, c):
diff --git a/models/Model_AVATAR/Model.py b/models/Model_AVATAR/Model.py
deleted file mode 100644
index 3aef73d..0000000
--- a/models/Model_AVATAR/Model.py
+++ /dev/null
@@ -1,490 +0,0 @@
-from functools import partial
-
-import cv2
-import numpy as np
-
-from facelib import FaceType
-from interact import interact as io
-from mathlib import get_power_of_two
-from models import ModelBase
-from nnlib import nnlib
-from samplelib import *
-
-from facelib import PoseEstimator
-
-class AVATARModel(ModelBase):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs,
-                            ask_random_flip=False)
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        if is_first_run:
-            #avatar_type = io.input_int("Avatar type ( 0:source, 1:head, 2:full_face ?:help skip:1) : ", 1, [0,1,2],
-            #                           help_message="Training target for the model. Source is direct untouched images. Full_face or head are centered nose unaligned faces.")
-            #avatar_type = {0:'source',
-            #               1:'head',
-            #               2:'full_face'}[avatar_type]
-
-            self.options['avatar_type'] = 'head'
-        else:
-            self.options['avatar_type'] = self.options.get('avatar_type', 'head')
-
-        if is_first_run or ask_override:
-            def_stage = self.options.get('stage', 1)
-            self.options['stage'] = io.input_int("Stage (0, 1, 2 ?:help skip:%d) : " % def_stage, def_stage, [0,1,2], help_message="Train first stage, then second. Tune batch size to maximum possible for both stages.")
-        else:
-            self.options['stage'] = self.options.get('stage', 1)
-
-    #override
-    def onInitialize(self, batch_size=-1, **in_options):
-        exec(nnlib.code_import_all, locals(), globals())
-        self.set_vram_batch_requirements({6:4})
-
-        resolution = self.resolution = 224
-        avatar_type = self.options['avatar_type']
-        stage = self.stage = self.options['stage']
-        df_res = self.df_res = 128
-        df_bgr_shape = (df_res, df_res, 3)
-        df_mask_shape = (df_res, df_res, 1)
-        res_bgr_shape = (resolution, resolution, 3)
-        res_bgr_t_shape = (resolution, resolution, 9)
-
-        self.enc = modelify(AVATARModel.EncFlow())( [Input(df_bgr_shape),] )
-
-        self.decA64 = modelify(AVATARModel.DecFlow()) ( [ Input(K.int_shape(self.enc.outputs[0])[1:]) ] )
-        self.decB64 = modelify(AVATARModel.DecFlow()) ( [ Input(K.int_shape(self.enc.outputs[0])[1:]) ] )
-        self.D = modelify(AVATARModel.Discriminator() ) (Input(df_bgr_shape))
-        self.C = modelify(AVATARModel.ResNet (9, n_blocks=6, ngf=128, use_dropout=False))( Input(res_bgr_t_shape))
-
-        self.CA_conv_weights_list = []
-        if self.is_first_run():            
-            for model, _ in self.get_model_filename_list():
-                for layer in model.layers:
-                    if type(layer) == keras.layers.Conv2D:
-                        self.CA_conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights
-
-        if not self.is_first_run():
-            self.load_weights_safe( self.get_model_filename_list() )
-
-        def DLoss(labels,logits):
-            return K.mean(K.binary_crossentropy(labels,logits))
-
-        warped_A64 = Input(df_bgr_shape)
-        real_A64 = Input(df_bgr_shape)
-        real_A64m = Input(df_mask_shape)
-
-        real_B64_t0 = Input(df_bgr_shape)
-        real_B64_t1 = Input(df_bgr_shape)
-        real_B64_t2 = Input(df_bgr_shape)
-
-        real_A64_t0 = Input(df_bgr_shape)
-        real_A64m_t0 = Input(df_mask_shape)
-        real_A_t0 = Input(res_bgr_shape)
-        real_A64_t1 = Input(df_bgr_shape)
-        real_A64m_t1 = Input(df_mask_shape)
-        real_A_t1 = Input(res_bgr_shape)
-        real_A64_t2 = Input(df_bgr_shape)
-        real_A64m_t2 = Input(df_mask_shape)
-        real_A_t2 = Input(res_bgr_shape)
-
-        warped_B64 = Input(df_bgr_shape)
-        real_B64 = Input(df_bgr_shape)
-        real_B64m = Input(df_mask_shape)
-
-        warped_A_code = self.enc (warped_A64)
-        warped_B_code = self.enc (warped_B64)
-
-        rec_A64 = self.decA64(warped_A_code)
-        rec_B64 = self.decB64(warped_B_code)
-        rec_AB64 = self.decA64(warped_B_code)
-
-        def Lambda_grey_mask (x,m):
-            return Lambda (lambda x: x[0]*m+(1-m)*0.5, output_shape= K.int_shape(x)[1:3] + (3,)) ([x, m])
-
-        def Lambda_gray_pad(x):
-            a = np.ones((resolution,resolution,3))*0.5
-            pad = ( resolution - df_res ) // 2
-            a[pad:-pad:,pad:-pad:,:] = 0
-
-            return Lambda ( lambda x: K.spatial_2d_padding(x, padding=((pad, pad), (pad, pad)) ) + K.constant(a, dtype=K.floatx() ),
-                     output_shape=(resolution,resolution,3) ) (x)
-
-        def Lambda_concat ( x ):
-            c = sum ( [ K.int_shape(l)[-1] for l in x ] )
-            return Lambda ( lambda x: K.concatenate (x, axis=-1), output_shape=K.int_shape(x[0])[1:3] + (c,) ) (x)
-
-        def Lambda_Cto3t(x):
-            return Lambda ( lambda x: x[...,0:3], output_shape= K.int_shape(x)[1:3] + (3,) ) (x), \
-                   Lambda ( lambda x: x[...,3:6], output_shape= K.int_shape(x)[1:3] + (3,) ) (x), \
-                   Lambda ( lambda x: x[...,6:9], output_shape= K.int_shape(x)[1:3] + (3,) ) (x)
-
-        real_A64_d = self.D( Lambda_grey_mask(real_A64, real_A64m) )
-
-        real_A64_d_ones = K.ones_like(real_A64_d)
-        fake_A64_d = self.D(rec_AB64)
-        fake_A64_d_ones = K.ones_like(fake_A64_d)
-        fake_A64_d_zeros = K.zeros_like(fake_A64_d)
-
-        rec_AB_t0 = Lambda_gray_pad( self.decA64 (self.enc (real_B64_t0)) )
-        rec_AB_t1 = Lambda_gray_pad( self.decA64 (self.enc (real_B64_t1)) )
-        rec_AB_t2 = Lambda_gray_pad( self.decA64 (self.enc (real_B64_t2)) )
-
-        C_in_A_t0 = Lambda_gray_pad( Lambda_grey_mask (real_A64_t0, real_A64m_t0) )
-        C_in_A_t1 = Lambda_gray_pad( Lambda_grey_mask (real_A64_t1, real_A64m_t1) )
-        C_in_A_t2 = Lambda_gray_pad( Lambda_grey_mask (real_A64_t2, real_A64m_t2) )
-
-        rec_C_A_t0, rec_C_A_t1, rec_C_A_t2 = Lambda_Cto3t ( self.C ( Lambda_concat ( [C_in_A_t0, C_in_A_t1, C_in_A_t2]) ) )
-        rec_C_AB_t0, rec_C_AB_t1, rec_C_AB_t2 = Lambda_Cto3t( self.C ( Lambda_concat ( [rec_AB_t0, rec_AB_t1, rec_AB_t2]) ) )
-
-        #real_A_t012_d = self.CD ( K.concatenate ( [real_A_t0, real_A_t1,real_A_t2], axis=-1)  )
-        #real_A_t012_d_ones = K.ones_like(real_A_t012_d)
-        #rec_C_AB_t012_d = self.CD ( K.concatenate ( [rec_C_AB_t0,rec_C_AB_t1, rec_C_AB_t2], axis=-1) )
-        #rec_C_AB_t012_d_ones = K.ones_like(rec_C_AB_t012_d)
-        #rec_C_AB_t012_d_zeros = K.zeros_like(rec_C_AB_t012_d)
-
-        self.G64_view = K.function([warped_A64, warped_B64],[rec_A64, rec_B64, rec_AB64])
-        self.G_view = K.function([real_A64_t0, real_A64m_t0, real_A64_t1, real_A64m_t1, real_A64_t2, real_A64m_t2, real_B64_t0, real_B64_t1, real_B64_t2], [rec_C_A_t0, rec_C_A_t1, rec_C_A_t2, rec_C_AB_t0, rec_C_AB_t1, rec_C_AB_t2])
-
-        if self.is_training_mode:
-            loss_AB64 = K.mean(10 * dssim(kernel_size=int(df_res/11.6),max_value=1.0) ( rec_A64, real_A64*real_A64m + (1-real_A64m)*0.5) ) + \
-                        K.mean(10 * dssim(kernel_size=int(df_res/11.6),max_value=1.0) ( rec_B64, real_B64*real_B64m + (1-real_B64m)*0.5) ) + 0.1*DLoss(fake_A64_d_ones, fake_A64_d )
-
-            weights_AB64 = self.enc.trainable_weights + self.decA64.trainable_weights + self.decB64.trainable_weights
-
-            loss_C = K.mean( 10 * dssim(kernel_size=int(resolution/11.6),max_value=1.0) ( real_A_t0, rec_C_A_t0 ) ) + \
-                     K.mean( 10 * dssim(kernel_size=int(resolution/11.6),max_value=1.0) ( real_A_t1, rec_C_A_t1 ) ) + \
-                     K.mean( 10 * dssim(kernel_size=int(resolution/11.6),max_value=1.0) ( real_A_t2, rec_C_A_t2 ) )
-                     #0.1*DLoss(rec_C_AB_t012_d_ones, rec_C_AB_t012_d )
-
-            weights_C = self.C.trainable_weights
-
-            loss_D = (DLoss(real_A64_d_ones, real_A64_d ) + \
-                      DLoss(fake_A64_d_zeros, fake_A64_d ) ) * 0.5
-
-            #loss_CD = ( DLoss(real_A_t012_d_ones, real_A_t012_d) + \
-            #            DLoss(rec_C_AB_t012_d_zeros, rec_C_AB_t012_d) ) * 0.5
-            #
-            #weights_CD = self.CD.trainable_weights
-
-            def opt(lr=5e-5):
-                return Adam(lr=lr, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2 if 'tensorflow' in self.device_config.backend else 0 )
-
-            self.AB64_train = K.function ([warped_A64, real_A64, real_A64m, warped_B64, real_B64, real_B64m], [loss_AB64], opt().get_updates(loss_AB64, weights_AB64) )
-            self.C_train = K.function ([real_A64_t0, real_A64m_t0, real_A_t0,
-                                        real_A64_t1, real_A64m_t1, real_A_t1,
-                                        real_A64_t2, real_A64m_t2, real_A_t2,
-                                        real_B64_t0, real_B64_t1,  real_B64_t2],[ loss_C ], opt().get_updates(loss_C, weights_C) )
-
-            self.D_train = K.function ([warped_A64, real_A64, real_A64m, warped_B64, real_B64, real_B64m],[loss_D], opt().get_updates(loss_D, self.D.trainable_weights) )
-
-
-            #self.CD_train = K.function ([real_A64_t0, real_A64m_t0, real_A_t0,
-            #                             real_A64_t1, real_A64m_t1, real_A_t1,
-            #                             real_A64_t2, real_A64m_t2, real_A_t2,
-            #                             real_B64_t0, real_B64_t1,  real_B64_t2 ],[ loss_CD ], opt().get_updates(loss_CD, weights_CD) )
-
-            ###########
-            t = SampleProcessor.Types
-
-            training_target = {'source' : t.NONE,
-                               'full_face' : t.FACE_TYPE_FULL_NO_ALIGN,
-                               'head' : t.FACE_TYPE_HEAD_NO_ALIGN}[avatar_type]
-
-            generators = [
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=False),
-                        output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res},
-                                              {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res},
-                                              {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_M), 'resolution':df_res}
-                                            ] ),
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=False),
-                        output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res},
-                                              {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res},
-                                              {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_M), 'resolution':df_res}
-                                            ] ),
-
-                    SampleGeneratorFaceTemporal(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        temporal_image_count=3,
-                        sample_process_options=SampleProcessor.Options(random_flip=False),
-                        output_sample_types=[{'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res},#IMG_WARPED_TRANSFORMED
-                                             {'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_M), 'resolution':df_res},
-                                             {'types': (t.IMG_SOURCE, training_target, t.MODE_BGR), 'resolution':resolution},
-                                            ] ),
-
-                    SampleGeneratorFaceTemporal(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        temporal_image_count=3,
-                        sample_process_options=SampleProcessor.Options(random_flip=False),
-                        output_sample_types=[{'types': (t.IMG_SOURCE, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res},
-                                             {'types': (t.IMG_SOURCE, t.NONE, t.MODE_BGR), 'resolution':resolution},
-                                            ] ),
-                   ]
-
-            if self.stage == 1:
-                generators[2].set_active(False)
-                generators[3].set_active(False)
-            elif self.stage == 2:
-                generators[0].set_active(False)
-                generators[1].set_active(False)
-
-            self.set_training_data_generators (generators)
-        else:
-            self.G_convert = K.function([real_B64_t0, real_B64_t1, real_B64_t2],[rec_C_AB_t1])
-
-    #override , return [ [model, filename],... ]  list
-    def get_model_filename_list(self):
-        return [   [self.enc, 'enc.h5'],
-                    [self.decA64, 'decA64.h5'],
-                    [self.decB64, 'decB64.h5'],
-                    [self.C, 'C.h5'],
-                    [self.D, 'D.h5'],
-                    #[self.CD, 'CD.h5'],
-               ]
-
-    #override
-    def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list() )
-        
-    #override
-    def on_success_train_one_iter(self):
-        if len(self.CA_conv_weights_list) != 0:
-            exec(nnlib.import_all(), locals(), globals())
-            CAInitializerMP ( self.CA_conv_weights_list )
-            self.CA_conv_weights_list = []
-            
-    #override
-    def onTrainOneIter(self, generators_samples, generators_list):
-        warped_src64, src64, src64m = generators_samples[0]
-        warped_dst64, dst64, dst64m = generators_samples[1]
-
-        real_A64_t0, real_A64m_t0, real_A_t0, real_A64_t1, real_A64m_t1, real_A_t1, real_A64_t2, real_A64m_t2, real_A_t2 = generators_samples[2]
-        real_B64_t0, _, real_B64_t1, _, real_B64_t2, _ = generators_samples[3]
-
-        if self.stage == 0 or self.stage == 1:
-            loss,   = self.AB64_train ( [warped_src64, src64, src64m, warped_dst64, dst64, dst64m] )
-            loss_D, = self.D_train  ( [warped_src64, src64, src64m, warped_dst64, dst64, dst64m] )
-            if self.stage != 0:
-                loss_C = loss_CD = 0
-
-        if self.stage == 0 or self.stage == 2:
-            loss_C1, = self.C_train ( [real_A64_t0, real_A64m_t0, real_A_t0,
-                                       real_A64_t1, real_A64m_t1, real_A_t1,
-                                       real_A64_t2, real_A64m_t2, real_A_t2,
-                                       real_B64_t0, real_B64_t1, real_B64_t2] )
-
-            loss_C2, = self.C_train ( [real_A64_t2, real_A64m_t2, real_A_t2,
-                                       real_A64_t1, real_A64m_t1, real_A_t1,
-                                       real_A64_t0, real_A64m_t0, real_A_t0,
-                                       real_B64_t0, real_B64_t1, real_B64_t2] )
-
-            #loss_CD1, = self.CD_train ( [real_A64_t0, real_A64m_t0, real_A_t0,
-            #                            real_A64_t1, real_A64m_t1, real_A_t1,
-            #                            real_A64_t2, real_A64m_t2, real_A_t2,
-            #                            real_B64_t0, real_B64_t1, real_B64_t2] )
-            #
-            #loss_CD2, = self.CD_train ( [real_A64_t2, real_A64m_t2, real_A_t2,
-            #                             real_A64_t1, real_A64m_t1, real_A_t1,
-            #                             real_A64_t0, real_A64m_t0, real_A_t0,
-            #                             real_B64_t0, real_B64_t1, real_B64_t2] )
-
-            loss_C = (loss_C1 + loss_C2) / 2
-            #loss_CD = (loss_CD1 + loss_CD2) / 2
-            if self.stage != 0:
-                loss = loss_D = 0
-
-        return ( ('loss', loss), ('D', loss_D), ('C', loss_C), ) #('CD', loss_CD) )
-
-    #override
-    def onGetPreview(self, sample):
-        test_A064w  = sample[0][0][0:4]
-        test_A064r  = sample[0][1][0:4]
-        test_A064m  = sample[0][2][0:4]
-
-        test_B064w  = sample[1][0][0:4]
-        test_B064r  = sample[1][1][0:4]
-        test_B064m  = sample[1][2][0:4]
-
-        t_src64_0  = sample[2][0][0:4]
-        t_src64m_0 = sample[2][1][0:4]
-        t_src_0    = sample[2][2][0:4]
-        t_src64_1  = sample[2][3][0:4]
-        t_src64m_1 = sample[2][4][0:4]
-        t_src_1    = sample[2][5][0:4]
-        t_src64_2  = sample[2][6][0:4]
-        t_src64m_2 = sample[2][7][0:4]
-        t_src_2    = sample[2][8][0:4]
-
-        t_dst64_0 = sample[3][0][0:4]
-        t_dst_0   = sample[3][1][0:4]
-        t_dst64_1 = sample[3][2][0:4]
-        t_dst_1   = sample[3][3][0:4]
-        t_dst64_2 = sample[3][4][0:4]
-        t_dst_2   = sample[3][5][0:4]
-
-        G64_view_result = self.G64_view ([test_A064r, test_B064r])
-        test_A064r, test_B064r, rec_A64, rec_B64, rec_AB64 = [ x[0] for x in ([test_A064r, test_B064r] + G64_view_result)  ]
-
-        sample64x4 = np.concatenate ([ np.concatenate ( [rec_B64, rec_A64], axis=1 ),
-                                       np.concatenate ( [test_B064r, rec_AB64], axis=1) ], axis=0 )
-
-        sample64x4 = cv2.resize (sample64x4, (self.resolution, self.resolution) )
-
-        G_view_result = self.G_view([t_src64_0, t_src64m_0, t_src64_1, t_src64m_1, t_src64_2, t_src64m_2, t_dst64_0, t_dst64_1, t_dst64_2 ])
-
-        t_dst_0, t_dst_1, t_dst_2, rec_C_A_t0, rec_C_A_t1, rec_C_A_t2, rec_C_AB_t0, rec_C_AB_t1, rec_C_AB_t2 = [ x[0] for x in ([t_dst_0, t_dst_1, t_dst_2, ] + G_view_result)  ]
-
-        c1 = np.concatenate ( (sample64x4, rec_C_A_t0, t_dst_0, rec_C_AB_t0 ), axis=1 )
-        c2 = np.concatenate ( (sample64x4, rec_C_A_t1, t_dst_1, rec_C_AB_t1 ), axis=1 )
-        c3 = np.concatenate ( (sample64x4, rec_C_A_t2, t_dst_2, rec_C_AB_t2 ), axis=1 )
-
-        r = np.concatenate ( [c1,c2,c3], axis=0 )
-
-        return [ ('AVATAR', r ) ]
-
-    def predictor_func (self, prev_imgs=None, img=None, next_imgs=None, dummy_predict=False):
-        if dummy_predict:
-            z = np.zeros ( (1, self.df_res, self.df_res, 3), dtype=np.float32 )
-            self.G_convert ([z,z,z])
-        else:
-            feed = [ prev_imgs[-1][np.newaxis,...], img[np.newaxis,...], next_imgs[0][np.newaxis,...] ]
-            x = self.G_convert (feed)[0]
-            return np.clip ( x[0], 0, 1)
-
-    #override
-    def get_ConverterConfig(self):
-        import converters
-        return self.predictor_func, (self.df_res, self.df_res, 3), converters.ConverterConfigFaceAvatar(temporal_face_count=1)
-
-    @staticmethod
-    def Discriminator(ndf=128):
-        exec (nnlib.import_all(), locals(), globals())
-
-        def func(input):
-            b,h,w,c = K.int_shape(input)
-
-            x = input
-
-            x = Conv2D( ndf, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-            x = LeakyReLU(0.2)(x)
-
-            x = Conv2D( ndf*2, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-            x = InstanceNormalization (axis=-1)(x)
-            x = LeakyReLU(0.2)(x)
-
-            x = Conv2D( ndf*4, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-            x = InstanceNormalization (axis=-1)(x)
-            x = LeakyReLU(0.2)(x)
-
-            x = Conv2D( ndf*8, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-            x = InstanceNormalization (axis=-1)(x)
-            x = LeakyReLU(0.2)(x)
-
-            return Conv2D( 1, 4, strides=1, padding='valid', activation='sigmoid')( ZeroPadding2D(3)(x) )
-        return func
-
-    @staticmethod
-    def EncFlow():
-        exec (nnlib.import_all(), locals(), globals())
-
-        def downscale (dim):
-            def func(x):
-                return LeakyReLU(0.1)( Conv2D(dim, 5, strides=2, padding='same')(x))
-            return func
-
-        def upscale (dim):
-            def func(x):
-                return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x)))
-            return func
-
-
-        def func(input):
-            x, = input
-            b,h,w,c = K.int_shape(x)
-
-            dim_res = w // 16
-
-            x = downscale(64)(x)
-            x = downscale(128)(x)
-            x = downscale(256)(x)
-            x = downscale(512)(x)
-
-            x = Dense(512)(Flatten()(x))
-            x = Dense(dim_res * dim_res * 512)(x)
-            x = Reshape((dim_res, dim_res, 512))(x)
-            x = upscale(512)(x)
-            return x
-
-        return func
-
-    @staticmethod
-    def DecFlow(output_nc=3, **kwargs):
-        exec (nnlib.import_all(), locals(), globals())
-
-        def upscale (dim):
-            def func(x):
-                return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x)))
-            return func
-        
-        def to_bgr (output_nc, **kwargs):
-            def func(x):
-                return Conv2D(output_nc, kernel_size=5, strides=1, padding='same', activation='sigmoid')(x)
-            return func
-            
-        def func(input):
-            x = input[0]
-
-            x = upscale(512)(x)
-            x = upscale(256)(x)
-            x = upscale(128)(x)
-            return to_bgr(output_nc) (x)
-
-        return func
-   
-    @staticmethod
-    def ResNet(output_nc, ngf=64, n_blocks=6, use_dropout=False):
-        exec (nnlib.import_all(), locals(), globals())
-
-        def func(input):
-            def ResnetBlock(dim, use_dropout=False):
-                def func(input):
-                    x = input
-
-                    x = Conv2D(dim, 3, strides=1, padding='same')(x)
-                    x = InstanceNormalization (axis=-1)(x)
-                    x = ReLU()(x)
-
-                    if use_dropout:
-                        x = Dropout(0.5)(x)
-
-                    x = Conv2D(dim, 3, strides=1, padding='same')(x)
-                    x = InstanceNormalization (axis=-1)(x)
-                    x = ReLU()(x)
-                    return Add()([x,input])
-                return func
-
-            x = input
-
-            x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf, 7, strides=1, padding='same')(x)))
-
-            x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf*2, 3, strides=2, padding='same')(x)))
-            x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf*4, 3, strides=2, padding='same')(x)))
-
-            x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf*4, 3, strides=2, padding='same')(x)))
-
-            for i in range(n_blocks):
-                x = ResnetBlock(ngf*4, use_dropout=use_dropout)(x)
-
-            x = ReLU()(InstanceNormalization (axis=-1)(Conv2DTranspose(ngf*4, 3, strides=2, padding='same')(x)))
-
-            x = ReLU()(InstanceNormalization (axis=-1)(Conv2DTranspose(ngf*2, 3, strides=2, padding='same')(x)))
-            x = ReLU()(InstanceNormalization (axis=-1)(Conv2DTranspose(ngf  , 3, strides=2, padding='same')(x)))
-
-            x = Conv2D(output_nc, 7, strides=1, activation='sigmoid', padding='same')(x)
-
-            return x
-
-        return func
-
-Model = AVATARModel
\ No newline at end of file
diff --git a/models/Model_AVATAR/__init__.py b/models/Model_AVATAR/__init__.py
deleted file mode 100644
index cdb3fe7..0000000
--- a/models/Model_AVATAR/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
\ No newline at end of file
diff --git a/models/Model_DEV_FANSEG/Model.py b/models/Model_DEV_FANSEG/Model.py
deleted file mode 100644
index afe32fb..0000000
--- a/models/Model_DEV_FANSEG/Model.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import numpy as np
-
-from nnlib import nnlib, TernausNet
-from models import ModelBase
-from facelib import FaceType
-from samplelib import *
-from interact import interact as io
-
-class Model(ModelBase):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs, 
-                            ask_enable_autobackup=False,
-                            ask_write_preview_history=False, 
-                            ask_target_iter=False,
-                            ask_random_flip=False)
-    
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):            
-        default_face_type = 'f'
-        if is_first_run:
-            self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="").lower()
-        else:
-            self.options['face_type'] = self.options.get('face_type', default_face_type)
-     
-    #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements( {1.5:4, 11:48} )
-
-        self.resolution = 256
-        self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
-
-        model_name = 'FANSeg'
-        self.fan_seg = TernausNet(model_name, self.resolution, 
-                                            FaceType.toString(self.face_type), 
-                                            load_weights=not self.is_first_run(),
-                                            weights_file_root=self.get_model_root_path(),
-                                            training=True)
-
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF
-            
-            self.set_training_data_generators ([    
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, 
-                            sample_process_options=SampleProcessor.Options(random_flip=True), 
-                            output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_RANDOM_HSV_SHIFT), 'resolution' : self.resolution, 'motion_blur':(25, 5),  'gaussian_blur':(25,5), 'border_replicate':False},
-                                                  { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_M), 'resolution': self.resolution },
-                                                ]),
-                                                
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, 
-                            sample_process_options=SampleProcessor.Options(random_flip=True ), 
-                            output_sample_types=[ { 'types': (t.IMG_TRANSFORMED , face_type, t.MODE_BGR_RANDOM_HSV_SHIFT), 'resolution' : self.resolution},
-                                                ])
-                                               ])
-                
-    #override
-    def onSave(self):        
-        self.fan_seg.save_weights()
-        
-    #override
-    def onTrainOneIter(self, generators_samples, generators_list):
-        target_src, target_src_mask = generators_samples[0]
-
-        loss = self.fan_seg.train( target_src, target_src_mask )
-
-        return ( ('loss', loss), )
-        
-    #override
-    def onGetPreview(self, sample):
-        test_A      = sample[0][0][0:4] #first 4 samples
-        test_Am     = sample[0][1][0:4] #first 4 samples
-        test_B      = sample[1][0][0:4] #first 4 samples
-        
-        
-        mAA = self.fan_seg.extract(test_A)
-        mBB = self.fan_seg.extract(test_B)
-
-        test_Am = np.repeat ( test_Am, (3,), -1)
-        mAA = np.repeat ( mAA, (3,), -1)
-        mBB = np.repeat ( mBB, (3,), -1)
-        
-        st = []
-        for i in range(0, len(test_A)):
-            st.append ( np.concatenate ( (
-                test_A[i,:,:,0:3],
-                test_Am[i],
-                mAA[i],
-                test_A[i,:,:,0:3]*mAA[i],
-                ), axis=1) )
-                
-        st2 = []
-        for i in range(0, len(test_B)):
-            st2.append ( np.concatenate ( (
-                test_B[i,:,:,0:3],                
-                mBB[i],
-                test_B[i,:,:,0:3]*mBB[i],
-                ), axis=1) )
-                
-        return [ ('training data', np.concatenate ( st, axis=0 ) ),
-                 ('evaluating data', np.concatenate ( st2, axis=0 ) ),
-                 ]
diff --git a/models/Model_DEV_FANSEG/__init__.py b/models/Model_DEV_FANSEG/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_DEV_FANSEG/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_DEV_FUNIT/Model.py b/models/Model_DEV_FUNIT/Model.py
deleted file mode 100644
index 4a0788f..0000000
--- a/models/Model_DEV_FUNIT/Model.py
+++ /dev/null
@@ -1,178 +0,0 @@
-from functools import partial
-
-import cv2
-import numpy as np
-
-from facelib import FaceType
-from interact import interact as io
-from mathlib import get_power_of_two
-from models import ModelBase
-from nnlib import nnlib, FUNIT
-from samplelib import *
-
-
-
-class FUNITModel(ModelBase):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs,
-                            ask_random_flip=False)
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        
-        default_resolution = 64
-        if is_first_run:
-            self.options['resolution'] = io.input_int(f"Resolution ( 64,96,128,224 ?:help skip:{default_resolution}) : ", default_resolution, [64,96,128,224])
-        else:
-            self.options['resolution'] = self.options.get('resolution', default_resolution)
-
-        default_face_type = 'mf'
-        if is_first_run:
-            self.options['face_type'] = io.input_str (f"Half or Full face? (h/mf/f, ?:help skip:{default_face_type}) : ", default_face_type, ['h','mf','f'], help_message="").lower()
-        else:
-            self.options['face_type'] = self.options.get('face_type', default_face_type)
-            
-        if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend:
-            def_optimizer_mode = self.options.get('optimizer_mode', 1)
-            self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.")
-        else:
-            self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1)
-            
-    #override
-    def onInitialize(self, batch_size=-1, **in_options):
-        exec(nnlib.code_import_all, locals(), globals())
-        self.set_vram_batch_requirements({4:16,11:24})
-
-        resolution = self.options['resolution']
-        face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
-        person_id_max_count = SampleGeneratorFacePerson.get_person_id_max_count(self.training_data_src_path)
-
-        
-        self.model = FUNIT( face_type_str=FaceType.toString(face_type), 
-                            batch_size=self.batch_size,
-                            encoder_nf=64,
-                            encoder_downs=2,
-                            encoder_res_blk=2,
-                            class_downs=4,
-                            class_nf=64,
-                            class_latent=64,
-                            mlp_blks=2,
-                            dis_nf=64,
-                            dis_res_blks=8,#10
-                            num_classes=person_id_max_count,
-                            subpixel_decoder=True,
-                            initialize_weights=self.is_first_run(),     
-                            is_training=self.is_training_mode,
-                            tf_cpu_mode=self.options['optimizer_mode']-1
-                           )
-                             
-        if not self.is_first_run():
-            self.load_weights_safe(self.model.get_model_filename_list())       
-            
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            if self.options['face_type'] == 'h':
-                face_type = t.FACE_TYPE_HALF
-            elif self.options['face_type'] == 'mf':
-                face_type = t.FACE_TYPE_MID_FULL
-            elif self.options['face_type'] == 'f':
-                face_type = t.FACE_TYPE_FULL
-            
-            output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh':True} ]
-            output_sample_types1=[ {'types': (t.IMG_SOURCE, face_type, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh':True} ]
-            
-            self.set_training_data_generators ([
-                        SampleGeneratorFacePerson(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0] ),
-                            output_sample_types=output_sample_types, person_id_mode=1, ),
-
-                        SampleGeneratorFacePerson(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0] ),
-                            output_sample_types=output_sample_types, person_id_mode=1, ),
-
-                        SampleGeneratorFacePerson(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0]),
-                            output_sample_types=output_sample_types1, person_id_mode=1, ),
-
-                        SampleGeneratorFacePerson(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0]),
-                            output_sample_types=output_sample_types1, person_id_mode=1, ),
-                    ])
-
-    #override
-    def get_model_filename_list(self):
-        return self.model.get_model_filename_list()
-
-    #override
-    def onSave(self):
-        self.save_weights_safe(self.model.get_model_filename_list())
-
-    #override
-    def onTrainOneIter(self, generators_samples, generators_list):
-        xa,la = generators_samples[0]
-        xb,lb = generators_samples[1]
-        
-        G_loss, D_loss = self.model.train(xa,la,xb,lb)
-
-        return ( ('G_loss', G_loss), ('D_loss', D_loss), )
-
-    #override
-    def onGetPreview(self, generators_samples):
-        xa  = generators_samples[0][0]
-        xb  = generators_samples[1][0]
-        ta  = generators_samples[2][0]
-        tb  = generators_samples[3][0]
-        
-        view_samples = min(4, xa.shape[0])
-
-        lines_train = []
-        lines_test = []
-
-        for i in range(view_samples):
-
-            s_xa = self.model.get_average_class_code([ xa[i:i+1] ])[0][None,...]
-            s_xb = self.model.get_average_class_code([ xb[i:i+1] ])[0][None,...]
-
-            s_ta = self.model.get_average_class_code([ ta[i:i+1] ])[0][None,...]
-            s_tb = self.model.get_average_class_code([ tb[i:i+1] ])[0][None,...]
-
-            xaxa = self.model.convert  ([ xa[i:i+1], s_xa  ] )[0][0]
-            xbxb = self.model.convert  ([ xb[i:i+1], s_xb  ] )[0][0]
-            xaxb = self.model.convert  ([ xa[i:i+1], s_xb  ] )[0][0]
-            xbxa = self.model.convert  ([ xb[i:i+1], s_xa  ] )[0][0]
-
-            tata = self.model.convert  ([ ta[i:i+1], s_ta  ] )[0][0]
-            tbtb = self.model.convert  ([ tb[i:i+1], s_tb  ] )[0][0]
-            tatb = self.model.convert  ([ ta[i:i+1], s_tb  ] )[0][0]
-            tbta = self.model.convert  ([ tb[i:i+1], s_ta  ] )[0][0]
-
-            line_train = [ xa[i], xaxa, xb[i], xbxb, xaxb, xbxa ]
-            line_test =  [ ta[i], tata, tb[i], tbtb, tatb, tbta ]
-
-            lines_train += [ np.concatenate([ np.clip(x/2+0.5,0,1) for x in line_train], axis=1) ]
-            lines_test  += [ np.concatenate([ np.clip(x/2+0.5,0,1) for x in line_test ], axis=1) ]
-
-        lines_train = np.concatenate ( lines_train, axis=0 )
-        lines_test = np.concatenate ( lines_test, axis=0 )
-        return [ ('TRAIN', lines_train ), ('TEST', lines_test) ]
-
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.model.convert ([ np.zeros ( (1, self.options['resolution'], self.options['resolution'], 3), dtype=np.float32 ), self.average_class_code ])
-        else:
-            bgr, = self.model.convert ([  face[np.newaxis,...]*2-1, self.average_class_code  ])
-            return bgr[0] / 2 + 0.5
-
-    #override
-    def get_ConverterConfig(self):
-        face_type = FaceType.FULL
-
-        import converters
-        return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), converters.ConverterConfigMasked(face_type=face_type,
-                                     default_mode = 1,
-                                     clip_hborder_mask_per=0.0625 if (face_type == FaceType.FULL) else 0,
-                                    )
-
-
-Model = FUNITModel
diff --git a/models/Model_DEV_FUNIT/__init__.py b/models/Model_DEV_FUNIT/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_DEV_FUNIT/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_DEV_POSEEST/Model.py b/models/Model_DEV_POSEEST/Model.py
deleted file mode 100644
index 68114d9..0000000
--- a/models/Model_DEV_POSEEST/Model.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import numpy as np
-
-from nnlib import nnlib
-from models import ModelBase
-from facelib import FaceType
-from facelib import PoseEstimator
-from samplelib import *
-from interact import interact as io
-import imagelib
-
-class Model(ModelBase):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs,
-                            ask_enable_autobackup=False,
-                            ask_write_preview_history=False,
-                            ask_target_iter=False,
-                            ask_random_flip=False)
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        yn_str = {True:'y',False:'n'}
-
-        default_face_type = 'f'
-        if is_first_run:
-            self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
-        else:
-            self.options['face_type'] = self.options.get('face_type', default_face_type)
-
-        def_train_bgr = self.options.get('train_bgr', True)
-        if is_first_run or ask_override:
-            self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr)
-        else:
-            self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr)
-
-    #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements( {4:64} )
-
-        self.resolution = 128
-        self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
-
-
-        self.pose_est = PoseEstimator(self.resolution,
-                                      FaceType.toString(self.face_type),
-                                      load_weights=not self.is_first_run(),
-                                      weights_file_root=self.get_model_root_path(),
-                                      training=True)
-
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF
-
-            self.set_training_data_generators ([
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
-                            sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
-                            output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) },
-                                                  {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution },
-                                                  {'types': (t.IMG_PITCH_YAW_ROLL,)}
-                                                ]),
-
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4,
-                            sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True,
-                            output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution },
-                                                  {'types': (t.IMG_PITCH_YAW_ROLL,)}
-                                                ])
-                                            ])
-
-    #override
-    def onSave(self):
-        self.pose_est.save_weights()
-
-    #override
-    def onTrainOneIter(self, generators_samples, generators_list):
-        target_srcw, target_src, pitch_yaw_roll = generators_samples[0]
-
-        bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] )
-
-        return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), )
-
-    #override
-    def onGetPreview(self, generators_samples):
-        test_src     = generators_samples[0][1][0:4] #first 4 samples
-        test_pyr_src = generators_samples[0][2][0:4]
-        test_dst     = generators_samples[1][0][0:4]
-        test_pyr_dst = generators_samples[1][1][0:4]
-
-        h,w,c = self.resolution,self.resolution,3
-        h_line = 13
-
-        result = []
-        for name, img, pyr in [ ['training data', test_src, test_pyr_src],  \
-                                ['evaluating data',test_dst, test_pyr_dst] ]:
-            bgr_pred, pyr_pred = self.pose_est.extract(img)
-            
-            hor_imgs = []
-            for i in range(len(img)):
-                img_info = np.ones ( (h,w,c) ) * 0.1
-                
-                i_pyr = pyr[i]
-                i_pyr_pred = pyr_pred[i]
-                lines = ["%.4f %.4f %.4f" % (i_pyr[0],i_pyr[1],i_pyr[2]),
-                         "%.4f %.4f %.4f" % (i_pyr_pred[0],i_pyr_pred[1],i_pyr_pred[2]) ]
-
-                lines_count = len(lines)
-                for ln in range(lines_count):
-                    img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \
-                        imagelib.get_text_image (  (h_line,w,c), lines[ln], color=[0.8]*c )
-
-                hor_imgs.append ( np.concatenate ( (
-                    img[i,:,:,0:3],
-                    bgr_pred[i],
-                    img_info
-                    ), axis=1) )
-
-
-            result += [ (name, np.concatenate (hor_imgs, axis=0)) ]
-
-        return result
\ No newline at end of file
diff --git a/models/Model_DEV_POSEEST/__init__.py b/models/Model_DEV_POSEEST/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_DEV_POSEEST/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_DF/Model.py b/models/Model_DF/Model.py
deleted file mode 100644
index 3164a93..0000000
--- a/models/Model_DF/Model.py
+++ /dev/null
@@ -1,169 +0,0 @@
-import numpy as np
-
-from nnlib import nnlib
-from models import ModelBase
-from facelib import FaceType
-from samplelib import *
-from interact import interact as io
-
-class Model(ModelBase):
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        if is_first_run or ask_override:
-            def_pixel_loss = self.options.get('pixel_loss', False)
-            self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.")
-        else:
-            self.options['pixel_loss'] = self.options.get('pixel_loss', False)
-
-    #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements( {4.5:4} )
-
-        ae_input_layer = Input(shape=(128, 128, 3))
-        mask_layer = Input(shape=(128, 128, 1)) #same as output
-
-        self.encoder, self.decoder_src, self.decoder_dst = self.Build(ae_input_layer)
-
-        if not self.is_first_run():
-            weights_to_load = [  [self.encoder    , 'encoder.h5'],
-                                 [self.decoder_src, 'decoder_src.h5'],
-                                 [self.decoder_dst, 'decoder_dst.h5']
-                              ]
-            self.load_weights_safe(weights_to_load)
-
-        rec_src = self.decoder_src(self.encoder(ae_input_layer))
-        rec_dst = self.decoder_dst(self.encoder(ae_input_layer))
-        self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src)
-        self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst)
-
-        self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] )
-        self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] )
-
-        self.convert = K.function([ae_input_layer], rec_src)
-
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ]
-
-            self.set_training_data_generators ([
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ),
-                        output_sample_types=output_sample_types),
-
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
-                        output_sample_types=output_sample_types)
-                ])
-
-    #override
-    def get_model_filename_list(self):
-        return [[self.encoder, 'encoder.h5'],
-                [self.decoder_src, 'decoder_src.h5'],
-                [self.decoder_dst, 'decoder_dst.h5']]
-
-    #override
-    def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list() )
-
-    #override
-    def onTrainOneIter(self, sample, generators_list):
-        warped_src, target_src, target_src_mask = sample[0]
-        warped_dst, target_dst, target_dst_mask = sample[1]
-
-        loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] )
-        loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] )
-
-        return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) )
-
-
-    #override
-    def onGetPreview(self, sample):
-        test_A   = sample[0][1][0:4] #first 4 samples
-        test_A_m = sample[0][2][0:4] #first 4 samples
-        test_B   = sample[1][1][0:4]
-        test_B_m = sample[1][2][0:4]
-
-        AA, mAA = self.autoencoder_src.predict([test_A, test_A_m])
-        AB, mAB = self.autoencoder_src.predict([test_B, test_B_m])
-        BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m])
-
-        mAA = np.repeat ( mAA, (3,), -1)
-        mAB = np.repeat ( mAB, (3,), -1)
-        mBB = np.repeat ( mBB, (3,), -1)
-
-        st = []
-        for i in range(0, len(test_A)):
-            st.append ( np.concatenate ( (
-                test_A[i,:,:,0:3],
-                AA[i],
-                #mAA[i],
-                test_B[i,:,:,0:3],
-                BB[i],
-                #mBB[i],
-                AB[i],
-                #mAB[i]
-                ), axis=1) )
-
-        return [ ('DF', np.concatenate ( st, axis=0 ) ) ]
-
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.convert ([ np.zeros ( (1, 128, 128, 3), dtype=np.float32 ) ])
-        else:
-            x, mx = self.convert ( [ face[np.newaxis,...] ] )
-            return x[0], mx[0][...,0]
-
-    #override
-    def get_ConverterConfig(self):
-        import converters
-        return self.predictor_func, (128,128,3), converters.ConverterConfigMasked(face_type=FaceType.FULL, default_mode='seamless')
-
-    def Build(self, input_layer):
-        exec(nnlib.code_import_all, locals(), globals())
-
-        def downscale (dim):
-            def func(x):
-                return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x))
-            return func
-
-        def upscale (dim):
-            def func(x):
-                return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x)))
-            return func
-
-        def Encoder(input_layer):
-            x = input_layer
-            x = downscale(128)(x)
-            x = downscale(256)(x)
-            x = downscale(512)(x)
-            x = downscale(1024)(x)
-
-            x = Dense(512)(Flatten()(x))
-            x = Dense(8 * 8 * 512)(x)
-            x = Reshape((8, 8, 512))(x)
-            x = upscale(512)(x)
-
-            return Model(input_layer, x)
-
-        def Decoder():
-            input_ = Input(shape=(16, 16, 512))
-            x = input_
-            x = upscale(512)(x)
-            x = upscale(256)(x)
-            x = upscale(128)(x)
-
-            y = input_  #mask decoder
-            y = upscale(512)(y)
-            y = upscale(256)(y)
-            y = upscale(128)(y)
-
-            x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x)
-            y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y)
-
-            return Model(input_, [x,y])
-
-        return Encoder(input_layer), Decoder(), Decoder()
diff --git a/models/Model_DF/__init__.py b/models/Model_DF/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_DF/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_H128/Model.py b/models/Model_H128/Model.py
deleted file mode 100644
index 870780d..0000000
--- a/models/Model_H128/Model.py
+++ /dev/null
@@ -1,203 +0,0 @@
-import numpy as np
-
-from nnlib import nnlib
-from models import ModelBase
-from facelib import FaceType
-from samplelib import *
-from interact import interact as io
-
-class Model(ModelBase):
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        if is_first_run:
-            self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.")
-        else:
-            default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future
-            if 'created_vram_gb' in self.options.keys():
-                self.options.pop ('created_vram_gb')
-            self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae)
-
-        if is_first_run or ask_override:
-            def_pixel_loss = self.options.get('pixel_loss', False)
-            self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.")
-        else:
-            self.options['pixel_loss'] = self.options.get('pixel_loss', False)
-
-    #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements( {2.5:4} )
-
-        bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] )
-        if not self.is_first_run():
-            weights_to_load = [  [self.encoder    , 'encoder.h5'],
-                                 [self.decoder_src, 'decoder_src.h5'],
-                                 [self.decoder_dst, 'decoder_dst.h5']
-                              ]
-            self.load_weights_safe(weights_to_load)
-
-        input_src_bgr = Input(bgr_shape)
-        input_src_mask = Input(mask_shape)
-        input_dst_bgr = Input(bgr_shape)
-        input_dst_mask = Input(mask_shape)
-
-        rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) )
-        rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) )
-
-        self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] )
-
-        self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999),
-                        loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] )
-
-        self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask])
-        self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask])
-
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':128} ]
-
-            self.set_training_data_generators ([
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ),
-                            output_sample_types=output_sample_types ),
-
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
-                            output_sample_types=output_sample_types )
-                ])
-
-    #override
-    def get_model_filename_list(self):
-        return [[self.encoder, 'encoder.h5'],
-                [self.decoder_src, 'decoder_src.h5'],
-                [self.decoder_dst, 'decoder_dst.h5']]
-
-    #override
-    def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list() )
-
-    #override
-    def onTrainOneIter(self, sample, generators_list):
-        warped_src, target_src, target_src_mask = sample[0]
-        warped_dst, target_dst, target_dst_mask = sample[1]
-
-        total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_mask, warped_dst, target_dst_mask], [target_src, target_src_mask, target_dst, target_dst_mask] )
-
-        return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) )
-
-    #override
-    def onGetPreview(self, sample):
-        test_A   = sample[0][1][0:4] #first 4 samples
-        test_A_m = sample[0][2][0:4] #first 4 samples
-        test_B   = sample[1][1][0:4]
-        test_B_m = sample[1][2][0:4]
-
-        AA, mAA = self.src_view([test_A])
-        AB, mAB = self.src_view([test_B])
-        BB, mBB = self.dst_view([test_B])
-
-        mAA = np.repeat ( mAA, (3,), -1)
-        mAB = np.repeat ( mAB, (3,), -1)
-        mBB = np.repeat ( mBB, (3,), -1)
-
-        st = []
-        for i in range(0, len(test_A)):
-            st.append ( np.concatenate ( (
-                test_A[i,:,:,0:3],
-                AA[i],
-                #mAA[i],
-                test_B[i,:,:,0:3],
-                BB[i],
-                #mBB[i],
-                AB[i],
-                #mAB[i]
-                ), axis=1) )
-
-        return [ ('H128', np.concatenate ( st, axis=0 ) ) ]
-
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.src_view ([ np.zeros ( (1, 128, 128, 3), dtype=np.float32 ) ])
-        else:
-            x, mx = self.src_view ( [ face[np.newaxis,...] ] )
-            return x[0], mx[0][...,0]
-
-    #override
-    def get_ConverterConfig(self):
-        import converters
-        return self.predictor_func, (128,128,3), converters.ConverterConfigMasked(face_type=FaceType.HALF, default_mode='seamless')
-
-    def Build(self, lighter_ae):
-        exec(nnlib.code_import_all, locals(), globals())
-
-        bgr_shape = (128, 128, 3)
-        mask_shape = (128, 128, 1)
-
-        def downscale (dim):
-            def func(x):
-                return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x))
-            return func
-
-        def upscale (dim):
-            def func(x):
-                return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x)))
-            return func
-
-        def Encoder(input_shape):
-            input_layer = Input(input_shape)
-            x = input_layer
-            if not lighter_ae:
-                x = downscale(128)(x)
-                x = downscale(256)(x)
-                x = downscale(512)(x)
-                x = downscale(1024)(x)
-                x = Dense(512)(Flatten()(x))
-                x = Dense(8 * 8 * 512)(x)
-                x = Reshape((8, 8, 512))(x)
-                x = upscale(512)(x)
-            else:
-                x = downscale(128)(x)
-                x = downscale(256)(x)
-                x = downscale(512)(x)
-                x = downscale(1024)(x)
-                x = Dense(256)(Flatten()(x))
-                x = Dense(8 * 8 * 256)(x)
-                x = Reshape((8, 8, 256))(x)
-                x = upscale(256)(x)
-
-            return Model(input_layer, x)
-
-        def Decoder():
-            if not lighter_ae:
-                input_ = Input(shape=(16, 16, 512))
-                x = input_
-                x = upscale(512)(x)
-                x = upscale(256)(x)
-                x = upscale(128)(x)
-
-                y = input_  #mask decoder
-                y = upscale(512)(y)
-                y = upscale(256)(y)
-                y = upscale(128)(y)
-            else:
-                input_ = Input(shape=(16, 16, 256))
-                x = input_
-                x = upscale(256)(x)
-                x = upscale(128)(x)
-                x = upscale(64)(x)
-
-                y = input_  #mask decoder
-                y = upscale(256)(y)
-                y = upscale(128)(y)
-                y = upscale(64)(y)
-
-            x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x)
-            y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y)
-
-
-            return Model(input_, [x,y])
-
-        return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder()
diff --git a/models/Model_H128/__init__.py b/models/Model_H128/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_H128/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_H64/Model.py b/models/Model_H64/Model.py
deleted file mode 100644
index 2e7142c..0000000
--- a/models/Model_H64/Model.py
+++ /dev/null
@@ -1,200 +0,0 @@
-import numpy as np
-
-from nnlib import nnlib
-from models import ModelBase
-from facelib import FaceType
-from samplelib import *
-from interact import interact as io
-
-class Model(ModelBase):
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        if is_first_run:
-            self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.")
-        else:
-            default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future
-            if 'created_vram_gb' in self.options.keys():
-                self.options.pop ('created_vram_gb')
-            self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae)
-
-        if is_first_run or ask_override:
-            def_pixel_loss = self.options.get('pixel_loss', False)
-            self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.")
-        else:
-            self.options['pixel_loss'] = self.options.get('pixel_loss', False)
-
-    #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements( {1.5:4} )
-
-
-        bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae'])
-
-        if not self.is_first_run():
-            weights_to_load = [  [self.encoder    , 'encoder.h5'],
-                                 [self.decoder_src, 'decoder_src.h5'],
-                                 [self.decoder_dst, 'decoder_dst.h5']
-                              ]
-            self.load_weights_safe(weights_to_load)
-
-        input_src_bgr = Input(bgr_shape)
-        input_src_mask = Input(mask_shape)
-        input_dst_bgr = Input(bgr_shape)
-        input_dst_mask = Input(mask_shape)
-
-        rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) )
-        rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) )
-
-        self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] )
-
-        self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] )
-
-        self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask])
-        self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask])
-
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':64} ]
-
-            self.set_training_data_generators ([
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ),
-                            output_sample_types=output_sample_types),
-
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                            sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
-                            output_sample_types=output_sample_types)
-                ])
-
-    #override
-    def get_model_filename_list(self):
-        return [[self.encoder, 'encoder.h5'],
-                [self.decoder_src, 'decoder_src.h5'],
-                [self.decoder_dst, 'decoder_dst.h5']]
-
-    #override
-    def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list() )
-
-    #override
-    def onTrainOneIter(self, sample, generators_list):
-        warped_src, target_src, target_src_full_mask = sample[0]
-        warped_dst, target_dst, target_dst_full_mask = sample[1]
-
-        total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_full_mask, warped_dst, target_dst_full_mask], [target_src, target_src_full_mask, target_dst, target_dst_full_mask] )
-
-        return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) )
-
-    #override
-    def onGetPreview(self, sample):
-        test_A   = sample[0][1][0:4] #first 4 samples
-        test_A_m = sample[0][2][0:4]
-        test_B   = sample[1][1][0:4]
-        test_B_m = sample[1][2][0:4]
-
-        AA, mAA = self.src_view([test_A])
-        AB, mAB = self.src_view([test_B])
-        BB, mBB = self.dst_view([test_B])
-
-        mAA = np.repeat ( mAA, (3,), -1)
-        mAB = np.repeat ( mAB, (3,), -1)
-        mBB = np.repeat ( mBB, (3,), -1)
-
-        st = []
-        for i in range(0, len(test_A)):
-            st.append ( np.concatenate ( (
-                test_A[i,:,:,0:3],
-                AA[i],
-                #mAA[i],
-                test_B[i,:,:,0:3],
-                BB[i],
-                #mBB[i],
-                AB[i],
-                #mAB[i]
-                ), axis=1) )
-
-        return [ ('H64', np.concatenate ( st, axis=0 ) ) ]
-
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.src_view ([ np.zeros ( (1, 64, 64, 3), dtype=np.float32 ) ])
-        else:
-            x, mx = self.src_view ( [ face[np.newaxis,...] ] )
-            return x[0], mx[0][...,0]
-
-    #override
-    def get_ConverterConfig(self):
-        import converters
-        return self.predictor_func, (64,64,3), converters.ConverterConfigMasked(face_type=FaceType.HALF, default_mode='seamless')
-
-    def Build(self, lighter_ae):
-        exec(nnlib.code_import_all, locals(), globals())
-
-        bgr_shape = (64, 64, 3)
-        mask_shape = (64, 64, 1)
-
-        def downscale (dim):
-            def func(x):
-                return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x))
-            return func
-
-        def upscale (dim):
-            def func(x):
-                return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x)))
-            return func
-
-        def Encoder(input_shape):
-            input_layer = Input(input_shape)
-            x = input_layer
-            if not lighter_ae:
-                x = downscale(128)(x)
-                x = downscale(256)(x)
-                x = downscale(512)(x)
-                x = downscale(1024)(x)
-                x = Dense(1024)(Flatten()(x))
-                x = Dense(4 * 4 * 1024)(x)
-                x = Reshape((4, 4, 1024))(x)
-                x = upscale(512)(x)
-            else:
-                x = downscale(128)(x)
-                x = downscale(256)(x)
-                x = downscale(512)(x)
-                x = downscale(768)(x)
-                x = Dense(512)(Flatten()(x))
-                x = Dense(4 * 4 * 512)(x)
-                x = Reshape((4, 4, 512))(x)
-                x = upscale(256)(x)
-            return Model(input_layer, x)
-
-        def Decoder():
-            if not lighter_ae:
-                input_ = Input(shape=(8, 8, 512))
-                x = input_
-
-                x = upscale(512)(x)
-                x = upscale(256)(x)
-                x = upscale(128)(x)
-
-            else:
-                input_ = Input(shape=(8, 8, 256))
-
-                x = input_
-                x = upscale(256)(x)
-                x = upscale(128)(x)
-                x = upscale(64)(x)
-
-            y = input_  #mask decoder
-            y = upscale(256)(y)
-            y = upscale(128)(y)
-            y = upscale(64)(y)
-
-            x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x)
-            y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y)
-
-            return Model(input_, [x,y])
-
-        return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder()
diff --git a/models/Model_H64/__init__.py b/models/Model_H64/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_H64/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_LIAEF128/Model.py b/models/Model_LIAEF128/Model.py
deleted file mode 100644
index fc81c93..0000000
--- a/models/Model_LIAEF128/Model.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import numpy as np
-
-from nnlib import nnlib
-from models import ModelBase
-from facelib import FaceType
-from samplelib import *
-from interact import interact as io
-
-class Model(ModelBase):
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        if is_first_run or ask_override:
-            def_pixel_loss = self.options.get('pixel_loss', False)
-            self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.")
-        else:
-            self.options['pixel_loss'] = self.options.get('pixel_loss', False)
-
-    #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements( {4.5:4} )
-
-        ae_input_layer = Input(shape=(128, 128, 3))
-        mask_layer = Input(shape=(128, 128, 1)) #same as output
-
-        self.encoder, self.decoder, self.inter_B, self.inter_AB = self.Build(ae_input_layer)
-
-        if not self.is_first_run():
-            weights_to_load = [  [self.encoder,  'encoder.h5'],
-                                 [self.decoder,  'decoder.h5'],
-                                 [self.inter_B,  'inter_B.h5'],
-                                 [self.inter_AB, 'inter_AB.h5']
-                              ]
-            self.load_weights_safe(weights_to_load)
-
-        code = self.encoder(ae_input_layer)
-        AB = self.inter_AB(code)
-        B = self.inter_B(code)
-        rec_src = self.decoder(Concatenate()([AB, AB]))
-        rec_dst = self.decoder(Concatenate()([B, AB]))
-        self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src )
-        self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst )
-
-        self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] )
-        self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] )
-
-        self.convert = K.function([ae_input_layer],rec_src)
-
-
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128},
-                                  { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ]
-
-            self.set_training_data_generators ([
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ),
-                        output_sample_types=output_sample_types),
-
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
-                        output_sample_types=output_sample_types)
-                ])
-
-    #override
-    def get_model_filename_list(self):
-        return [[self.encoder, 'encoder.h5'],
-                [self.decoder, 'decoder.h5'],
-                [self.inter_B, 'inter_B.h5'],
-                [self.inter_AB, 'inter_AB.h5']]
-
-    #override
-    def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list() )
-
-    #override
-    def onTrainOneIter(self, sample, generators_list):
-        warped_src, target_src, target_src_mask = sample[0]
-        warped_dst, target_dst, target_dst_mask = sample[1]
-
-        loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] )
-        loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] )
-
-        return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) )
-
-
-    #override
-    def onGetPreview(self, sample):
-        test_A   = sample[0][1][0:4] #first 4 samples
-        test_A_m = sample[0][2][0:4] #first 4 samples
-        test_B   = sample[1][1][0:4]
-        test_B_m = sample[1][2][0:4]
-
-        AA, mAA = self.autoencoder_src.predict([test_A, test_A_m])
-        AB, mAB = self.autoencoder_src.predict([test_B, test_B_m])
-        BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m])
-
-        mAA = np.repeat ( mAA, (3,), -1)
-        mAB = np.repeat ( mAB, (3,), -1)
-        mBB = np.repeat ( mBB, (3,), -1)
-
-        st = []
-        for i in range(0, len(test_A)):
-            st.append ( np.concatenate ( (
-                test_A[i,:,:,0:3],
-                AA[i],
-                #mAA[i],
-                test_B[i,:,:,0:3],
-                BB[i],
-                #mBB[i],
-                AB[i],
-                #mAB[i]
-                ), axis=1) )
-
-        return [ ('LIAEF128', np.concatenate ( st, axis=0 ) ) ]
-
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.convert ([ np.zeros ( (1, 128, 128, 3), dtype=np.float32 ) ])
-        else:
-            x, mx = self.convert ( [ face[np.newaxis,...] ] )
-            return x[0], mx[0][...,0]
-
-    #override
-    def get_ConverterConfig(self):
-        import converters
-        return self.predictor_func, (128,128,3), converters.ConverterConfigMasked(face_type=FaceType.FULL, default_mode='seamless')
-
-    def Build(self, input_layer):
-        exec(nnlib.code_import_all, locals(), globals())
-
-        def downscale (dim):
-            def func(x):
-                return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x))
-            return func
-
-        def upscale (dim):
-            def func(x):
-                return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x)))
-            return func
-
-        def Encoder():
-            x = input_layer
-            x = downscale(128)(x)
-            x = downscale(256)(x)
-            x = downscale(512)(x)
-            x = downscale(1024)(x)
-            x = Flatten()(x)
-            return Model(input_layer, x)
-
-        def Intermediate():
-            input_layer = Input(shape=(None, 8 * 8 * 1024))
-            x = input_layer
-            x = Dense(256)(x)
-            x = Dense(8 * 8 * 512)(x)
-            x = Reshape((8, 8, 512))(x)
-            x = upscale(512)(x)
-            return Model(input_layer, x)
-
-        def Decoder():
-            input_ = Input(shape=(16, 16, 1024))
-            x = input_
-            x = upscale(512)(x)
-            x = upscale(256)(x)
-            x = upscale(128)(x)
-            x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x)
-
-            y = input_  #mask decoder
-            y = upscale(512)(y)
-            y = upscale(256)(y)
-            y = upscale(128)(y)
-            y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y)
-
-            return Model(input_, [x,y])
-
-        return Encoder(), Decoder(), Intermediate(), Intermediate()
diff --git a/models/Model_LIAEF128/__init__.py b/models/Model_LIAEF128/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_LIAEF128/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_Quick96/Model.py b/models/Model_Quick96/Model.py
index 6b66041..4d18710 100644
--- a/models/Model_Quick96/Model.py
+++ b/models/Model_Quick96/Model.py
@@ -1,261 +1,503 @@
+import multiprocessing
 from functools import partial
 
 import numpy as np
 
-import mathlib
+from core import mathlib
+from core.interact import interact as io
+from core.leras import nn
 from facelib import FaceType
-from interact import interact as io
 from models import ModelBase
-from nnlib import nnlib
 from samplelib import *
 
-
-class Quick96Model(ModelBase):
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs, 
-                            ask_enable_autobackup=False,
-                            ask_write_preview_history=False,
-                            ask_target_iter=True,
-                            ask_batch_size=False,
-                            ask_random_flip=False)                 
-                 
+class QModel(ModelBase):
     #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements({1.5:2,2:4})
+    def on_initialize(self):
+        nn.initialize()
+        tf = nn.tf
+
+        conv_kernel_initializer = nn.initializers.ca
+        
+        class Downscale(nn.ModelBase):
+            def __init__(self, in_ch, out_ch, kernel_size=5, dilations=1, subpixel=True, use_activator=True, *kwargs ):
+                self.in_ch = in_ch
+                self.out_ch = out_ch
+                self.kernel_size = kernel_size
+                self.dilations = dilations
+                self.subpixel = subpixel
+                self.use_activator = use_activator
+                super().__init__(*kwargs)
+
+            def on_build(self, *args, **kwargs ):
+                self.conv1 = nn.Conv2D( self.in_ch,
+                                          self.out_ch // (4 if self.subpixel else 1),
+                                          kernel_size=self.kernel_size,
+                                          strides=1 if self.subpixel else 2,
+                                          padding='SAME', dilations=self.dilations, kernel_initializer=conv_kernel_initializer )
+
+            def forward(self, x):
+                x = self.conv1(x)
+
+                if self.subpixel:
+                    x = tf.nn.space_to_depth(x, 2)
+
+                if self.use_activator:
+                    x = tf.nn.leaky_relu(x, 0.2)
+                return x
+
+            def get_out_ch(self):
+                return (self.out_ch // 4) * 4
+
+        class DownscaleBlock(nn.ModelBase):
+            def on_build(self, in_ch, ch, n_downscales, kernel_size, dilations=1, subpixel=True):
+                self.downs = []
+
+                last_ch = in_ch
+                for i in range(n_downscales):
+                    cur_ch = ch*( min(2**i, 8)  )
+                    self.downs.append ( Downscale(last_ch, cur_ch, kernel_size=kernel_size, dilations=dilations, subpixel=subpixel) )
+                    last_ch = self.downs[-1].get_out_ch()
+
+            def forward(self, inp):
+                x = inp
+                for down in self.downs:
+                    x = down(x)
+                return x
+       
+        class Upscale(nn.ModelBase):
+            def on_build(self, in_ch, out_ch, kernel_size=3 ):
+                self.conv1 = nn.Conv2D( in_ch, out_ch*4, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer)
+
+            def forward(self, x):
+                x = self.conv1(x)
+                x = tf.nn.leaky_relu(x, 0.2)
+                x = tf.nn.depth_to_space(x, 2)
+                return x
+                
+        class UpdownResidualBlock(nn.ModelBase):
+            def on_build(self, ch, inner_ch, kernel_size=3 ):
+                self.up   = Upscale (ch, inner_ch, kernel_size=kernel_size)
+                self.res  = ResidualBlock (inner_ch, kernel_size=kernel_size)
+                self.down = Downscale (inner_ch, ch, kernel_size=kernel_size, use_activator=False)
+
+            def forward(self, inp):
+                x = self.up(inp)
+                x = upx = self.res(x)
+                x = self.down(x)
+                x = x + inp
+                x = tf.nn.leaky_relu(x, 0.2)
+                return x, upx
+                
+        class ResidualBlock(nn.ModelBase):
+            def on_build(self, ch, kernel_size=3 ):
+                self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer)
+                self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer)
+
+            def forward(self, inp):
+                x = self.conv1(inp)
+                x = tf.nn.leaky_relu(x, 0.2)
+                x = self.conv2(x)
+                x = inp + x
+                x = tf.nn.leaky_relu(x, 0.2)
+                return x
+
+        class Encoder(nn.ModelBase):
+            def on_build(self, in_ch, e_ch):
+                self.down1 = DownscaleBlock(in_ch, e_ch, n_downscales=4, kernel_size=5)
+            def forward(self, inp):
+                return nn.tf_flatten(self.down1(inp))
+
+        class Inter(nn.ModelBase):
+            def __init__(self, in_ch, lowest_dense_res, ae_ch, ae_out_ch, d_ch, **kwargs):
+                self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch, self.d_ch = in_ch, lowest_dense_res, ae_ch, ae_out_ch, d_ch
+                super().__init__(**kwargs)
+
+            def on_build(self):
+                in_ch, lowest_dense_res, ae_ch, ae_out_ch, d_ch = self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch, self.d_ch
+
+                self.dense1 = nn.Dense( in_ch, ae_ch, kernel_initializer=tf.initializers.orthogonal )
+                self.dense2 = nn.Dense( ae_ch, lowest_dense_res * lowest_dense_res * ae_out_ch, maxout_features=2, kernel_initializer=tf.initializers.orthogonal )
+                self.upscale1 = Upscale(ae_out_ch, d_ch*8)
+                self.res1 = ResidualBlock(d_ch*8)
+
+            def forward(self, inp):
+                x = self.dense1(inp)
+                x = self.dense2(x)
+                x = tf.reshape (x, (-1, lowest_dense_res, lowest_dense_res, self.ae_out_ch))
+                x = self.upscale1(x)
+                x = self.res1(x)
+                return x
+
+            def get_out_ch(self):
+                return self.ae_out_ch
+
+        class Decoder(nn.ModelBase):
+            def on_build(self, in_ch, d_ch):        
+                self.upscale1 = Upscale(in_ch, d_ch*4)
+                
+                self.res1     = UpdownResidualBlock(d_ch*4, d_ch*2)                
+                self.upscale2 = Upscale(d_ch*4, d_ch*2)                
+                self.res2     = UpdownResidualBlock(d_ch*2, d_ch)                
+                self.upscale3 = Upscale(d_ch*2, d_ch*1)
+                self.res3     = UpdownResidualBlock(d_ch, d_ch//2)
+
+                self.upscalem1 = Upscale(in_ch, d_ch)
+                self.upscalem2 = Upscale(d_ch, d_ch//2)
+                self.upscalem3 = Upscale(d_ch//2, d_ch//2)
+
+                self.out_conv = nn.Conv2D( d_ch*1, 3, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer)
+                self.out_convm = nn.Conv2D( d_ch//2, 1, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer)
+
+            def forward(self, inp):
+                z = inp               
+                                                
+                x = self.upscale1(z)             
+                x, upx = self.res1(x)
+                
+                x = self.upscale2(x)
+                x = tf.nn.leaky_relu(x + upx, 0.2)                    
+                x, upx = self.res2(x)
+                
+                x = self.upscale3(x)
+                x = tf.nn.leaky_relu(x + upx, 0.2)                    
+                x, upx = self.res3(x)  
+                    
+                """
+                x = self.upscale1 (z)
+                x = self.res1     (x)
+                x = self.upscale2 (x)
+                x = self.res2     (x)
+                x = self.upscale3 (x)
+                x = self.res3     (x)
+                """
+
+                y = self.upscalem1 (z)
+                y = self.upscalem2 (y)
+                y = self.upscalem3 (y)
+
+                return tf.nn.sigmoid(self.out_conv(x)), \
+                       tf.nn.sigmoid(self.out_convm(y))
+
+        device_config = nn.getCurrentDeviceConfig()
+        devices = device_config.devices
 
         resolution = self.resolution = 96
+        ae_dims = 128
+        e_dims = 128
+        d_dims = 64
+        self.pretrain = True
+        self.pretrain_just_disabled = False
         
-        class CommonModel(object):
-            def downscale (self, dim, kernel_size=5, dilation_rate=1):
-                def func(x):
-                    return SubpixelDownscaler()(ELU()(Conv2D(dim // 4, kernel_size=kernel_size, strides=1, dilation_rate=dilation_rate, padding='same')(x)))
-                return func
+        masked_training = True
 
-            def upscale (self, dim, size=(2,2)):
-                def func(x):
-                    return SubpixelUpscaler(size=size)(ELU()(Conv2D(dim * np.prod(size) , kernel_size=3, strides=1, padding='same')(x)))
-                return func
+        models_opt_on_gpu = len(devices) == 1 and devices[0].total_mem_gb >= 4
+        models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
+        optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
 
-            def ResidualBlock(self, dim):
-                def func(inp):
-                    x = Conv2D(dim, kernel_size=3, padding='same')(inp)
-                    x = LeakyReLU(0.2)(x)
-                    x = Conv2D(dim, kernel_size=3, padding='same')(x)
-                    x = Add()([x, inp])  
-                    x = LeakyReLU(0.2)(x)  
-                    return x
-                return func
+        input_nc = 3
+        output_nc = 3
+        bgr_shape = (resolution, resolution, output_nc)
+        mask_shape = (resolution, resolution, 1)
+        lowest_dense_res = resolution // 16
 
-        class QModel(CommonModel):
-            def __init__(self, resolution, ae_dims, e_dims, d_dims):
-                super().__init__()
-                bgr_shape = (resolution, resolution, 3)
-                mask_shape = (resolution, resolution, 1)
-                lowest_dense_res = resolution // 16
+        self.model_filename_list = []
 
-                def enc_flow():
-                    def func(inp):
-                        x = self.downscale(e_dims, 3, 1 )(inp)                        
-                        x = self.downscale(e_dims*2, 3, 1 )(x)
-                        x = self.downscale(e_dims*4, 3, 1 )(x)
-                        x0 = self.downscale(e_dims*8, 3, 1 )(x)         
-                                         
-                        x = self.downscale(e_dims, 3, 2 )(inp)                        
-                        x = self.downscale(e_dims*2, 3, 2 )(x)
-                        x = self.downscale(e_dims*4, 3, 2 )(x)
-                        x1 = self.downscale(e_dims*8, 3, 2 )(x)     
-                                             
-                        x = Concatenate()([x0,x1])        
-                                        
-                        x = DenseMaxout(ae_dims, kernel_initializer='orthogonal')(Flatten()(x))
-                        x = DenseMaxout(lowest_dense_res * lowest_dense_res * ae_dims, kernel_initializer='orthogonal')(x)
-                        x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x)
-                        
-                        x = self.ResidualBlock(ae_dims)(x)
-                        x = self.upscale(d_dims*8)(x)
-                        x = self.ResidualBlock(d_dims*8)(x)
-                        return x
-                    return func
 
-                def dec_flow():
-                    def func(inp):
-                        x = self.upscale(d_dims*4)(inp)
-                        x = self.ResidualBlock(d_dims*4)(x)
-                        x = self.upscale(d_dims*2)(x)
-                        x = self.ResidualBlock(d_dims*2)(x)
-                        x = self.upscale(d_dims)(x)
-                        x = self.ResidualBlock(d_dims)(x)
-                        
-                        y = self.upscale(d_dims)(inp)
-                        y = self.upscale(d_dims//2)(y)
-                        y = self.upscale(d_dims//4)(y)
-                        
-                        return Conv2D(3, kernel_size=1, padding='same', activation='tanh')(x), \
-                               Conv2D(1, kernel_size=1, padding='same', activation='sigmoid')(y)
+        with tf.device ('/CPU:0'):
+            #Place holders on CPU
+            self.warped_src = tf.placeholder (tf.float32, (None,)+bgr_shape)
+            self.warped_dst = tf.placeholder (tf.float32, (None,)+bgr_shape)
 
-                    return func
+            self.target_src = tf.placeholder (tf.float32, (None,)+bgr_shape)
+            self.target_dst = tf.placeholder (tf.float32, (None,)+bgr_shape)
 
-                self.encoder = modelify(enc_flow()) ( Input(bgr_shape) )
+            self.target_srcm = tf.placeholder (tf.float32, (None,)+mask_shape)
+            self.target_dstm = tf.placeholder (tf.float32, (None,)+mask_shape)
 
-                sh = K.int_shape( self.encoder.outputs[0] )[1:]
-                self.decoder_src = modelify(dec_flow()) ( Input(sh) )
-                self.decoder_dst = modelify(dec_flow()) ( Input(sh) )
+        # Initializing model classes
+        with tf.device (models_opt_device):
+            self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, name='encoder')
+            encoder_out_ch = self.encoder.compute_output_shape ( (tf.float32, (None,resolution,resolution,input_nc)))[-1]
 
-                self.src_trainable_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights
-                self.dst_trainable_weights = self.encoder.trainable_weights + self.decoder_dst.trainable_weights
+            self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, d_ch=d_dims, name='inter')
+            inter_out_ch = self.inter.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1]
 
-                self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape)
-                self.target_src, self.target_dst = Input(bgr_shape), Input(bgr_shape)
-                self.target_srcm, self.target_dstm = Input(mask_shape), Input(mask_shape)
-                                
-                self.src_code = self.encoder(self.warped_src)                            
-                self.dst_code = self.encoder(self.warped_dst)    
+            self.decoder_src = Decoder(in_ch=inter_out_ch, d_ch=d_dims, name='decoder_src')
+            self.decoder_dst = Decoder(in_ch=inter_out_ch, d_ch=d_dims, name='decoder_dst')
 
-                self.pred_src_src, self.pred_src_srcm = self.decoder_src(self.src_code)
-                self.pred_dst_dst, self.pred_dst_dstm = self.decoder_dst(self.dst_code)
-                self.pred_src_dst, self.pred_src_dstm = self.decoder_src(self.dst_code)
+            self.model_filename_list += [ [self.encoder,     'encoder.npy'    ],
+                                          [self.inter,       'inter.npy'      ],
+                                          [self.decoder_src, 'decoder_src.npy'],
+                                          [self.decoder_dst, 'decoder_dst.npy']  ]
 
-            def get_model_filename_list(self, exclude_for_pretrain=False):
-                ar = []
-                if not exclude_for_pretrain:
-                    ar += [ [self.encoder, 'encoder.h5'] ]
-                ar += [  [self.decoder_src, 'decoder_src.h5'],
-                         [self.decoder_dst, 'decoder_dst.h5']  ]
-                         
-                return ar
+            if self.is_training:
+                self.src_dst_trainable_weights = self.encoder.get_weights() + self.decoder_src.get_weights() + self.decoder_dst.get_weights()
                 
-        self.model = QModel (resolution, 128, 64, 64)
+                # Initialize optimizers
+                self.src_dst_opt = nn.TFRMSpropOptimizer(lr=2e-4, lr_dropout=0.3, name='src_dst_opt')
+                self.src_dst_opt.initialize_variables(self.src_dst_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu )
+                self.model_filename_list += [ (self.src_dst_opt, 'src_dst_opt.npy') ]
 
-        loaded, not_loaded = [], self.model.get_model_filename_list()
-        if not self.is_first_run():
-            loaded, not_loaded = self.load_weights_safe(not_loaded)
+        if self.is_training:
+            # Adjust batch size for multiple GPU
+            gpu_count = max(1, len(devices) )
+            bs_per_gpu = max(1, 4 // gpu_count)
+            self.set_batch_size( gpu_count*bs_per_gpu)
 
-        CA_models = [ model for model, _ in not_loaded ]
-
-        self.CA_conv_weights_list = []
-        for model in CA_models:
-            for layer in model.layers:
-                if type(layer) == keras.layers.Conv2D:
-                    self.CA_conv_weights_list += [layer.weights[0]] #- is Conv2D kernel_weights
-
-        if self.is_training_mode:
-            lr_dropout = 0.3 if nnlib.device.backend != 'plaidML' else 0.0
-            self.src_dst_opt      = RMSprop(lr=2e-4, lr_dropout=lr_dropout)
-            self.src_dst_mask_opt = RMSprop(lr=2e-4, lr_dropout=lr_dropout)
-                
-            target_src_masked = self.model.target_src*self.model.target_srcm
-            target_dst_masked = self.model.target_dst*self.model.target_dstm
-
-            pred_src_src_masked = self.model.pred_src_src*self.model.target_srcm
-            pred_dst_dst_masked = self.model.pred_dst_dst*self.model.target_dstm
+            # Compute losses per GPU
+            gpu_pred_src_src_list = []
+            gpu_pred_dst_dst_list = []
+            gpu_pred_src_dst_list = []
+            gpu_pred_src_srcm_list = []
+            gpu_pred_dst_dstm_list = []
+            gpu_pred_src_dstm_list = []
             
-            src_loss =  K.mean ( 10*dssim(kernel_size=int(resolution/11.6),max_value=2.0)( target_src_masked+1, pred_src_src_masked+1) )
-            src_loss += K.mean ( 10*K.square( target_src_masked - pred_src_src_masked ) )
-            src_loss += K.mean(K.square(self.model.target_srcm-self.model.pred_src_srcm))
+            gpu_src_losses = []
+            gpu_dst_losses = []
+            gpu_src_dst_loss_gvs = []
 
-            dst_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=2.0)(target_dst_masked+1, pred_dst_dst_masked+1) )
-            dst_loss += K.mean( 10*K.square( target_dst_masked - pred_dst_dst_masked ) )
-            dst_loss += K.mean(K.square(self.model.target_dstm-self.model.pred_dst_dstm))
+            for gpu_id in range(gpu_count):
+                with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
+                    batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
+                    with tf.device(f'/CPU:0'):
+                        # slice on CPU, otherwise all batch data will be transfered to GPU first
+                        gpu_warped_src   = self.warped_src [batch_slice,:,:,:]
+                        gpu_warped_dst   = self.warped_dst [batch_slice,:,:,:]
+                        gpu_target_src   = self.target_src [batch_slice,:,:,:]
+                        gpu_target_dst   = self.target_dst [batch_slice,:,:,:]
+                        gpu_target_srcm  = self.target_srcm[batch_slice,:,:,:]
+                        gpu_target_dstm  = self.target_dstm[batch_slice,:,:,:]
 
-            self.src_train = K.function ([self.model.warped_src, self.model.target_src, self.model.target_srcm], [src_loss], self.src_dst_opt.get_updates( src_loss, self.model.src_trainable_weights) )
-            self.dst_train = K.function ([self.model.warped_dst, self.model.target_dst, self.model.target_dstm], [dst_loss], self.src_dst_opt.get_updates( dst_loss, self.model.dst_trainable_weights) )
-            self.AE_view = K.function ([self.model.warped_src, self.model.warped_dst], [self.model.pred_src_src, self.model.pred_dst_dst, self.model.pred_dst_dstm, self.model.pred_src_dst, self.model.pred_src_dstm])
+                    # process model tensors                    
+                    gpu_src_code     = self.inter(self.encoder(gpu_warped_src))
+                    gpu_dst_code     = self.inter(self.encoder(gpu_warped_dst))
+                    gpu_pred_src_src, gpu_pred_src_srcm = self.decoder_src(gpu_src_code)
+                    gpu_pred_dst_dst, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code)
+                    gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
+
+                    gpu_pred_src_src_list.append(gpu_pred_src_src)
+                    gpu_pred_dst_dst_list.append(gpu_pred_dst_dst)
+                    gpu_pred_src_dst_list.append(gpu_pred_src_dst)
+                    
+                    gpu_pred_src_srcm_list.append(gpu_pred_src_srcm)
+                    gpu_pred_dst_dstm_list.append(gpu_pred_dst_dstm)
+                    gpu_pred_src_dstm_list.append(gpu_pred_src_dstm)
+                    
+                    gpu_target_srcm_blur = nn.tf_gaussian_blur(gpu_target_srcm,  max(1, resolution // 32) )
+                    gpu_target_dstm_blur = nn.tf_gaussian_blur(gpu_target_dstm,  max(1, resolution // 32) )
+
+                    gpu_target_dst_masked      = gpu_target_dst*gpu_target_dstm_blur
+                    gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur)
+
+                    gpu_target_srcmasked_opt  = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
+                    gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst
+
+                    gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
+                    gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst
+
+                    gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur
+                    gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur)
+
+                    gpu_src_loss =  tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                    gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
+                    gpu_src_loss += tf.reduce_mean ( tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )
+
+                    gpu_dst_loss  = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
+                    gpu_dst_loss += tf.reduce_mean ( 10*tf.square(  gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
+                    gpu_dst_loss += tf.reduce_mean ( tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )
+
+                    gpu_src_losses += [gpu_src_loss]
+                    gpu_dst_losses += [gpu_dst_loss]
+
+                    gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss
+                    gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ]
+
+
+            # Average losses and gradients, and create optimizer update ops
+            with tf.device (models_opt_device):
+                if gpu_count == 1:
+                    pred_src_src = gpu_pred_src_src_list[0]
+                    pred_dst_dst = gpu_pred_dst_dst_list[0]
+                    pred_src_dst = gpu_pred_src_dst_list[0]
+                    pred_src_srcm = gpu_pred_src_srcm_list[0]
+                    pred_dst_dstm = gpu_pred_dst_dstm_list[0]
+                    pred_src_dstm = gpu_pred_src_dstm_list[0]
+                    
+                    src_loss = gpu_src_losses[0]
+                    dst_loss = gpu_dst_losses[0]
+                    src_dst_loss_gv = gpu_src_dst_loss_gvs[0]
+                else:
+                    pred_src_src = tf.concat(gpu_pred_src_src_list, 0)
+                    pred_dst_dst = tf.concat(gpu_pred_dst_dst_list, 0)
+                    pred_src_dst = tf.concat(gpu_pred_src_dst_list, 0)
+                    pred_src_srcm = tf.concat(gpu_pred_src_srcm_list, 0)
+                    pred_dst_dstm = tf.concat(gpu_pred_dst_dstm_list, 0)
+                    pred_src_dstm = tf.concat(gpu_pred_src_dstm_list, 0)
+                    
+                    src_loss = nn.tf_average_tensor_list(gpu_src_losses)
+                    dst_loss = nn.tf_average_tensor_list(gpu_dst_losses)
+                    src_dst_loss_gv = nn.tf_average_gv_list (gpu_src_dst_loss_gvs)
+
+                src_dst_loss_gv_op = self.src_dst_opt.get_update_op (src_dst_loss_gv)
+
+            # Initializing training and view functions
+            def src_dst_train(warped_src, target_src, target_srcm, \
+                              warped_dst, target_dst, target_dstm):
+                s, d, _ = nn.tf_sess.run ( [ src_loss, dst_loss, src_dst_loss_gv_op],
+                                            feed_dict={self.warped_src :warped_src,
+                                                       self.target_src :target_src,
+                                                       self.target_srcm:target_srcm,
+                                                       self.warped_dst :warped_dst,
+                                                       self.target_dst :target_dst,
+                                                       self.target_dstm:target_dstm,
+                                                       })
+                s = np.mean(s)
+                d = np.mean(d)
+                return s, d
+            self.src_dst_train = src_dst_train
+
+            def AE_view(warped_src, warped_dst):
+                return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm],
+                                            feed_dict={self.warped_src:warped_src,
+                                                    self.warped_dst:warped_dst})
+
+            self.AE_view = AE_view
         else:
-            self.AE_convert = K.function ([self.model.warped_dst],[ self.model.pred_src_dst, self.model.pred_dst_dstm, self.model.pred_src_dstm ])
+            # Initializing merge function
+            with tf.device( f'/GPU:0' if len(devices) != 0 else f'/CPU:0'):
+                gpu_dst_code     = self.inter(self.encoder(self.warped_dst))
+                gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
+                _, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code)
 
-        if self.is_training_mode:
+            def AE_merge( warped_dst):
+                return nn.tf_sess.run ( [gpu_pred_src_dst, gpu_pred_dst_dstm, gpu_pred_src_dstm], feed_dict={self.warped_dst:warped_dst})
+
+            self.AE_merge = AE_merge
+            
+        
+        
+        
+        # Loading/initializing all models/optimizers weights
+        for model, filename in io.progress_bar_generator(self.model_filename_list, "Initializing models"):
+            do_init = self.is_first_run()
+                        
+            if self.pretrain_just_disabled:
+                if model == self.inter:
+                    do_init = True
+
+            if not do_init:
+                do_init = not model.load_weights( self.get_strpath_storage_for_file(filename) )
+
+            if do_init and self.pretrained_model_path is not None:                
+                pretrained_filepath = self.pretrained_model_path / filename
+                if pretrained_filepath.exists():
+                    do_init = not model.load_weights(pretrained_filepath)
+                    
+            if do_init:
+                model.init_weights()
+
+        # initializing sample generators
+
+        if self.is_training:
             t = SampleProcessor.Types
+            face_type = t.FACE_TYPE_FULL
+
+            training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path()
+            training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path()
+
+            cpu_count = multiprocessing.cpu_count()
+
+            src_generators_count = cpu_count // 2
+            dst_generators_count = cpu_count - src_generators_count
 
             self.set_training_data_generators ([
-                    SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=False, scale_range=np.array([-0.05, 0.05]) ),
-                        output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True },
-                                                {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True },
-                                                {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution': resolution } ]
-                                              ),
+                    SampleGeneratorFace(training_data_src_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
+                        sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False),
+                        output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':resolution, },
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution, },
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ],
+                        generators_count=src_generators_count ),
 
-                    SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=False, ),
-                        output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True },
-                                                {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True },
-                                                {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution': resolution} ])
+                    SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
+                        sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False),
+                        output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':resolution},
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution},
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ],
+                        generators_count=dst_generators_count )
                              ])
-            self.counter = 0
-    
+                             
+            self.last_samples = None
+
     #override
     def get_model_filename_list(self):
-        return self.model.get_model_filename_list ()
+        return self.model_filename_list
 
     #override
     def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list() )
+        for model, filename in io.progress_bar_generator(self.get_model_filename_list(), "Saving", leave=False):
+            model.save_weights ( self.get_strpath_storage_for_file(filename) )
+
 
     #override
-    def on_success_train_one_iter(self):        
-        if len(self.CA_conv_weights_list) != 0:
-            exec(nnlib.import_all(), locals(), globals())
-            CAInitializerMP ( self.CA_conv_weights_list )
-            self.CA_conv_weights_list = []
-
-    #override
-    def onTrainOneIter(self, generators_samples, generators_list):
-        warped_src, target_src, target_srcm = generators_samples[0]
-        warped_dst, target_dst, target_dstm = generators_samples[1]
-        
-        self.counter += 1
-        if self.counter % 3 == 0:
-            src_loss, = self.src_train ([warped_src, target_src, target_srcm])
-            dst_loss, = self.dst_train ([warped_dst, target_dst, target_dstm])
+    def onTrainOneIter(self):
+        if self.get_iter() % 3 == 0 and self.last_samples is not None:
+            ( (warped_src, target_src, target_srcm), \
+              (warped_dst, target_dst, target_dstm) ) = self.last_samples 
+            src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm, 
+                                                     target_dst, target_dst, target_dstm)
         else:
-            src_loss, = self.src_train ([target_src, target_src, target_srcm])
-            dst_loss, = self.dst_train ([target_dst, target_dst, target_dstm])
+            samples = self.last_samples = self.generate_next_samples()
+            ( (warped_src, target_src, target_srcm), \
+              (warped_dst, target_dst, target_dstm) ) = samples
 
+            src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, 
+                                                     warped_dst, target_dst, target_dstm)
+        
         return ( ('src_loss', src_loss), ('dst_loss', dst_loss), )
 
     #override
-    def onGetPreview(self, sample):
-        test_S   = sample[0][1][0:4] #first 4 samples
-        test_S_m = sample[0][2][0:4] #first 4 samples
-        test_D   = sample[1][1][0:4]
-        test_D_m = sample[1][2][0:4]
+    def onGetPreview(self, samples):
+        n_samples = min(4, self.get_batch_size() )
 
-        S, D, SS, DD, DDM, SD, SDM = [test_S,test_D] + self.AE_view ([test_S, test_D])        
-        S, D, SS, DD, SD, = [ np.clip(x/2+0.5, 0.0, 1.0) for x in [S, D, SS, DD, SD] ]        
-        DDM, SDM, = [ np.clip( np.repeat (x, (3,), -1), 0, 1) for x in [DDM, SDM] ]
+        ( (warped_src, target_src, target_srcm),
+          (warped_dst, target_dst, target_dstm) ) = \
+                [ [sample[0:n_samples] for sample in sample_list ]
+                                                 for sample_list in samples ]
+
+        S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
+        DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ]
 
         result = []
         st = []
-        for i in range(len(test_S)):
+        for i in range(n_samples):
             ar = S[i], SS[i], D[i], DD[i], SD[i]
             st.append ( np.concatenate ( ar, axis=1) )
 
         result += [ ('Quick96', np.concatenate (st, axis=0 )), ]
-        
+
         st_m = []
-        for i in range(len(test_S)):
-            ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i])
+        for i in range(n_samples):
+            ar = S[i]*target_srcm[i], SS[i], D[i]*target_dstm[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i])
             st_m.append ( np.concatenate ( ar, axis=1) )
 
         result += [ ('Quick96 masked', np.concatenate (st_m, axis=0 )), ]
 
         return result
 
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.AE_convert ([ np.zeros ( (1, self.resolution, self.resolution, 3), dtype=np.float32 ) ])
-        else:
-            face = face * 2 - 1
-            bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]])
-            bgr = bgr /2 + 0.5
-            mask = mask_dst_dstm[0] * mask_src_dstm[0]
-            return bgr[0], mask[...,0]
+    def predictor_func (self, face=None):
+
+        bgr, mask_dst_dstm, mask_src_dstm = self.AE_merge (face[np.newaxis,...])
+        mask = mask_dst_dstm[0] * mask_src_dstm[0]
+        return bgr[0], mask[...,0]
 
     #override
-    def get_ConverterConfig(self):
-        import converters
-        return self.predictor_func, (self.resolution, self.resolution, 3), converters.ConverterConfigMasked(face_type=FaceType.FULL,
-                                     default_mode='seamless', clip_hborder_mask_per=0.0625)
+    def get_MergerConfig(self):
+        face_type = FaceType.FULL
 
-Model = Quick96Model
+        import merger
+        return self.predictor_func, (self.resolution, self.resolution, 3), merger.MergerConfigMasked(face_type=face_type,
+                                     default_mode = 'overlay',
+                                     clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0,
+                                    )
+
+Model = QModel
diff --git a/models/Model_SAE/Model.py b/models/Model_SAE/Model.py
deleted file mode 100644
index 28f5904..0000000
--- a/models/Model_SAE/Model.py
+++ /dev/null
@@ -1,568 +0,0 @@
-from functools import partial
-
-import numpy as np
-
-import mathlib
-from facelib import FaceType
-from interact import interact as io
-from models import ModelBase
-from nnlib import nnlib
-from samplelib import *
-
-
-#SAE - Styled AutoEncoder
-class SAEModel(ModelBase):
-
-    #override
-    def onInitializeOptions(self, is_first_run, ask_override):
-        yn_str = {True:'y',False:'n'}
-
-        default_resolution = 128
-        default_archi = 'df'
-        default_face_type = 'f'
-        
-
-        if is_first_run:
-            resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.")
-            resolution = np.clip (resolution, 64, 256)
-            while np.modf(resolution / 16)[0] != 0.0:
-                resolution -= 1
-            self.options['resolution'] = resolution
-
-            self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
-        else:
-            self.options['resolution'] = self.options.get('resolution', default_resolution)
-            self.options['face_type'] = self.options.get('face_type', default_face_type)
-            
-        default_learn_mask = self.options.get('learn_mask', True)
-        if is_first_run or ask_override:
-            self.options['learn_mask'] = io.input_bool ( f"Learn mask? (y/n, ?:help skip:{yn_str[default_learn_mask]} ) : " , default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.")
-        else:
-            self.options['learn_mask'] = self.options.get('learn_mask', default_learn_mask)
-
-        if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend:
-            def_optimizer_mode = self.options.get('optimizer_mode', 1)
-            self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.")
-        else:
-            self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1)
-
-        if is_first_run:
-            self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse.
-        else:
-            self.options['archi'] = self.options.get('archi', default_archi)
-
-        default_ae_dims = 256 if 'liae' in self.options['archi'] else 512
-        default_e_ch_dims = 42
-        default_d_ch_dims = default_e_ch_dims // 2
-        def_ca_weights = False
-
-        if is_first_run:
-            self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 )
-            self.options['e_ch_dims'] = np.clip ( io.input_int("Encoder dims per channel (21-85 ?:help skip:%d) : " % (default_e_ch_dims) , default_e_ch_dims, help_message="More encoder dims help to recognize more facial features, but require more VRAM. You can fine-tune model size to fit your GPU." ), 21, 85 )
-            default_d_ch_dims = self.options['e_ch_dims'] // 2
-            self.options['d_ch_dims'] = np.clip ( io.input_int("Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_d_ch_dims) , default_d_ch_dims, help_message="More decoder dims help to get better details, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 )
-            self.options['ca_weights'] = io.input_bool (f"Use CA weights? (y/n, ?:help skip:{yn_str[def_ca_weights]} ) : ", def_ca_weights, help_message="Initialize network with 'Convolution Aware' weights. This may help to achieve a higher accuracy model, but consumes a time at first run.")
-        else:
-            self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims)
-            self.options['e_ch_dims'] = self.options.get('e_ch_dims', default_e_ch_dims)
-            self.options['d_ch_dims'] = self.options.get('d_ch_dims', default_d_ch_dims)
-            self.options['ca_weights'] = self.options.get('ca_weights', def_ca_weights)
-
-        default_face_style_power = 0.0
-        default_bg_style_power = 0.0
-        if is_first_run or ask_override:
-            def_pixel_loss = self.options.get('pixel_loss', False)
-            self.options['pixel_loss'] = io.input_bool (f"Use pixel loss? (y/n, ?:help skip:{yn_str[def_pixel_loss]} ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time. Enabling this option too early increases the chance of model collapse.")
-
-            default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power)
-            self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power,
-                                                                               help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
-
-            default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power)
-            self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power,
-                                                                               help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
-
-            default_ct_mode = self.options.get('ct_mode', 'none')
-            self.options['ct_mode'] = io.input_str (f"Color transfer mode apply to src faceset. ( none/rct/lct/mkl/idt/sot, ?:help skip:{default_ct_mode}) : ", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.")
-            
-            if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301
-                default_clipgrad = False if is_first_run else self.options.get('clipgrad', False)
-                self.options['clipgrad'] = io.input_bool (f"Enable gradient clipping? (y/n, ?:help skip:{yn_str[default_clipgrad]}) : ", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
-            else:
-                self.options['clipgrad'] = False
-
-        else:
-            self.options['pixel_loss'] = self.options.get('pixel_loss', False)
-            self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power)
-            self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power)
-            self.options['ct_mode'] = self.options.get('ct_mode', 'none')
-            self.options['clipgrad'] = self.options.get('clipgrad', False)
-
-        if is_first_run:
-            self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.")
-        else:
-            self.options['pretrain'] = False
-
-    #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements({1.5:4})
-
-        resolution = self.options['resolution']
-        learn_mask = self.options['learn_mask']
-
-        ae_dims = self.options['ae_dims']
-        e_ch_dims = self.options['e_ch_dims']
-        d_ch_dims = self.options['d_ch_dims']
-        self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False)
-        if not self.pretrain:
-            self.options.pop('pretrain')
-
-        bgr_shape = (resolution, resolution, 3)
-        mask_shape = (resolution, resolution, 1)
-
-        masked_training = True
-
-        class SAEDFModel(object):
-            def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask):
-                super().__init__()
-                self.learn_mask = learn_mask
-
-                output_nc = 3
-                bgr_shape = (resolution, resolution, output_nc)
-                mask_shape = (resolution, resolution, 1)
-                lowest_dense_res = resolution // 16
-                e_dims = output_nc*e_ch_dims
-
-                def upscale (dim):
-                    def func(x):
-                        return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x))))
-                    return func
-
-                def enc_flow(e_dims, ae_dims, lowest_dense_res):
-                    def func(x):
-                        x = LeakyReLU(0.1)(Conv2D(e_dims,   kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-                        x = LeakyReLU(0.1)(Conv2D(e_dims*2, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-                        x = LeakyReLU(0.1)(Conv2D(e_dims*4, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-                        x = LeakyReLU(0.1)(Conv2D(e_dims*8, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-
-                        x = Dense(ae_dims)(Flatten()(x))
-                        x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x)
-                        x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x)
-                        x = upscale(ae_dims)(x)
-                        return x
-                    return func
-
-                def dec_flow(output_nc, d_ch_dims, add_residual_blocks=True):
-                    dims = output_nc * d_ch_dims
-                    def ResidualBlock(dim):
-                        def func(inp):
-                            x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(inp))
-                            x = LeakyReLU(0.2)(x)
-                            x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(x))
-                            x = Add()([x, inp])
-                            x = LeakyReLU(0.2)(x)
-                            return x
-                        return func
-
-                    def func(x):
-                        x = upscale(dims*8)(x)
-
-                        if add_residual_blocks:
-                            x = ResidualBlock(dims*8)(x)
-                            x = ResidualBlock(dims*8)(x)
-
-                        x = upscale(dims*4)(x)
-
-                        if add_residual_blocks:
-                            x = ResidualBlock(dims*4)(x)
-                            x = ResidualBlock(dims*4)(x)
-
-                        x = upscale(dims*2)(x)
-
-                        if add_residual_blocks:
-                            x = ResidualBlock(dims*2)(x)
-                            x = ResidualBlock(dims*2)(x)
-
-                        return Conv2D(output_nc, kernel_size=5, padding='valid', activation='sigmoid')(ZeroPadding2D(2)(x))
-                    return func
-
-                self.encoder = modelify(enc_flow(e_dims, ae_dims, lowest_dense_res)) ( Input(bgr_shape) )
-
-                sh = K.int_shape( self.encoder.outputs[0] )[1:]
-                self.decoder_src = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) )
-                self.decoder_dst = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) )
-
-                if learn_mask:
-                    self.decoder_srcm = modelify(dec_flow(1, d_ch_dims, add_residual_blocks=False)) ( Input(sh) )
-                    self.decoder_dstm = modelify(dec_flow(1, d_ch_dims, add_residual_blocks=False)) ( Input(sh) )
-
-                self.src_dst_trainable_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights
-
-                if learn_mask:
-                    self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights
-
-                self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape)
-                src_code, dst_code = self.encoder(self.warped_src), self.encoder(self.warped_dst)
-
-                self.pred_src_src = self.decoder_src(src_code)
-                self.pred_dst_dst = self.decoder_dst(dst_code)
-                self.pred_src_dst = self.decoder_src(dst_code)
-
-                if learn_mask:
-                    self.pred_src_srcm = self.decoder_srcm(src_code)
-                    self.pred_dst_dstm = self.decoder_dstm(dst_code)
-                    self.pred_src_dstm = self.decoder_srcm(dst_code)
-
-            def get_model_filename_list(self, exclude_for_pretrain=False):
-                ar = []
-                if not exclude_for_pretrain:
-                    ar += [ [self.encoder, 'encoder.h5'] ]
-                ar += [  [self.decoder_src, 'decoder_src.h5'],
-                         [self.decoder_dst, 'decoder_dst.h5']  ]
-                if self.learn_mask:
-                    ar += [ [self.decoder_srcm, 'decoder_srcm.h5'],
-                            [self.decoder_dstm, 'decoder_dstm.h5']  ]
-                return ar
-
-        class SAELIAEModel(object):
-            def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask):
-                super().__init__()
-                self.learn_mask = learn_mask
-
-                output_nc = 3
-                bgr_shape = (resolution, resolution, output_nc)
-                mask_shape = (resolution, resolution, 1)
-
-                e_dims = output_nc*e_ch_dims
-
-                lowest_dense_res = resolution // 16
-
-                def upscale (dim):
-                    def func(x):
-                        return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x))))
-                    return func
-
-                def enc_flow(e_dims):
-                    def func(x):
-                        x = LeakyReLU(0.1)(Conv2D(e_dims,   kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-                        x = LeakyReLU(0.1)(Conv2D(e_dims*2, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-                        x = LeakyReLU(0.1)(Conv2D(e_dims*4, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-                        x = LeakyReLU(0.1)(Conv2D(e_dims*8, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x)))
-                        x = Flatten()(x)
-                        return x
-                    return func
-
-                def inter_flow(lowest_dense_res, ae_dims):
-                    def func(x):
-                        x = Dense(ae_dims)(x)
-                        x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x)
-                        x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x)
-                        x = upscale(ae_dims*2)(x)
-                        return x
-                    return func
-
-                def dec_flow(output_nc, d_ch_dims, add_residual_blocks=True):
-                    d_dims = output_nc*d_ch_dims
-                    def ResidualBlock(dim):
-                        def func(inp):
-                            x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(inp))
-                            x = LeakyReLU(0.2)(x)
-                            x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(inp))
-                            x = Add()([x, inp])
-                            x = LeakyReLU(0.2)(x)
-                            return x
-                        return func
-
-                    def func(x):
-                        x = upscale(d_dims*8)(x)
-
-                        if add_residual_blocks:
-                            x = ResidualBlock(d_dims*8)(x)
-                            x = ResidualBlock(d_dims*8)(x)
-
-                        x = upscale(d_dims*4)(x)
-
-                        if add_residual_blocks:
-                            x = ResidualBlock(d_dims*4)(x)
-                            x = ResidualBlock(d_dims*4)(x)
-
-                        x = upscale(d_dims*2)(x)
-
-                        if add_residual_blocks:
-                            x = ResidualBlock(d_dims*2)(x)
-                            x = ResidualBlock(d_dims*2)(x)
-
-                        return Conv2D(output_nc, kernel_size=5, padding='valid', activation='sigmoid')(ZeroPadding2D(2)(x))
-                    return func
-
-                self.encoder = modelify(enc_flow(e_dims)) ( Input(bgr_shape) )
-
-                sh = K.int_shape( self.encoder.outputs[0] )[1:]
-                self.inter_B = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) )
-                self.inter_AB = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) )
-
-                sh = np.array(K.int_shape( self.inter_B.outputs[0] )[1:])*(1,1,2)
-                self.decoder = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) )
-
-                if learn_mask:
-                    self.decoderm = modelify(dec_flow(1, d_ch_dims, add_residual_blocks=False)) ( Input(sh) )
-
-                self.src_dst_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights
-
-                if learn_mask:
-                    self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights
-
-                self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape)
-
-                warped_src_code = self.encoder (self.warped_src)
-                warped_src_inter_AB_code = self.inter_AB (warped_src_code)
-                warped_src_inter_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code])
-
-                warped_dst_code = self.encoder (self.warped_dst)
-                warped_dst_inter_B_code = self.inter_B (warped_dst_code)
-                warped_dst_inter_AB_code = self.inter_AB (warped_dst_code)
-                warped_dst_inter_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code])
-
-                warped_src_dst_inter_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code])
-
-                self.pred_src_src = self.decoder(warped_src_inter_code)
-                self.pred_dst_dst = self.decoder(warped_dst_inter_code)
-                self.pred_src_dst = self.decoder(warped_src_dst_inter_code)
-
-                if learn_mask:
-                    self.pred_src_srcm = self.decoderm(warped_src_inter_code)
-                    self.pred_dst_dstm = self.decoderm(warped_dst_inter_code)
-                    self.pred_src_dstm = self.decoderm(warped_src_dst_inter_code)
-
-            def get_model_filename_list(self, exclude_for_pretrain=False):
-                ar = [ [self.encoder, 'encoder.h5'],
-                       [self.inter_B, 'inter_B.h5'] ]
-
-                if not exclude_for_pretrain:
-                    ar += [ [self.inter_AB, 'inter_AB.h5'] ]
-
-                ar += [  [self.decoder, 'decoder.h5']  ]
-
-                if self.learn_mask:
-                    ar += [ [self.decoderm, 'decoderm.h5'] ]
-
-                return ar
-
-        if 'df' in self.options['archi']:
-            self.model = SAEDFModel (resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask)
-        elif 'liae' in self.options['archi']:
-            self.model = SAELIAEModel (resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask)
-
-        loaded, not_loaded = [], self.model.get_model_filename_list()
-        if not self.is_first_run():
-            loaded, not_loaded = self.load_weights_safe(not_loaded)
-
-        CA_models = []
-        if self.options.get('ca_weights', False):
-            CA_models += [ model for model, _ in not_loaded ]
-
-        CA_conv_weights_list = []
-        for model in CA_models:
-            for layer in model.layers:
-                if type(layer) == keras.layers.Conv2D:
-                    CA_conv_weights_list += [layer.weights[0]] #- is Conv2D kernel_weights
-
-        if len(CA_conv_weights_list) != 0:
-            CAInitializerMP ( CA_conv_weights_list )
-
-        warped_src = self.model.warped_src
-        target_src = Input ( (resolution, resolution, 3) )
-        target_srcm = Input ( (resolution, resolution, 1) )
-
-        warped_dst = self.model.warped_dst
-        target_dst = Input ( (resolution, resolution, 3) )
-        target_dstm = Input ( (resolution, resolution, 1) )
-
-        target_src_sigm = target_src
-        target_dst_sigm = target_dst
-
-        target_srcm_sigm = gaussian_blur( max(1, K.int_shape(target_srcm)[1] // 32) )(target_srcm)
-        target_dstm_sigm = gaussian_blur( max(1, K.int_shape(target_dstm)[1] // 32) )(target_dstm)
-        target_dstm_anti_sigm = 1.0 - target_dstm_sigm
-
-        target_src_masked = target_src_sigm*target_srcm_sigm
-        target_dst_masked = target_dst_sigm*target_dstm_sigm
-        target_dst_anti_masked = target_dst_sigm*target_dstm_anti_sigm
-
-        target_src_masked_opt = target_src_masked if masked_training else target_src_sigm
-        target_dst_masked_opt = target_dst_masked if masked_training else target_dst_sigm
-
-        pred_src_src = self.model.pred_src_src
-        pred_dst_dst = self.model.pred_dst_dst
-        pred_src_dst = self.model.pred_src_dst
-        if learn_mask:
-            pred_src_srcm = self.model.pred_src_srcm
-            pred_dst_dstm = self.model.pred_dst_dstm
-            pred_src_dstm = self.model.pred_src_dstm
-
-        pred_src_src_sigm = self.model.pred_src_src
-        pred_dst_dst_sigm = self.model.pred_dst_dst
-        pred_src_dst_sigm = self.model.pred_src_dst
-
-        pred_src_src_masked = pred_src_src_sigm*target_srcm_sigm
-        pred_dst_dst_masked = pred_dst_dst_sigm*target_dstm_sigm
-
-        pred_src_src_masked_opt = pred_src_src_masked if masked_training else pred_src_src_sigm
-        pred_dst_dst_masked_opt = pred_dst_dst_masked if masked_training else pred_dst_dst_sigm
-
-        psd_target_dst_masked = pred_src_dst_sigm*target_dstm_sigm
-        psd_target_dst_anti_masked = pred_src_dst_sigm*target_dstm_anti_sigm
-
-        if self.is_training_mode:
-            self.src_dst_opt      = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1)
-            self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1)
-
-            if not self.options['pixel_loss']:
-                src_loss = K.mean ( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_opt, pred_src_src_masked_opt) )
-            else:
-                src_loss = K.mean ( 50*K.square( target_src_masked_opt - pred_src_src_masked_opt ) )
-
-            face_style_power = self.options['face_style_power'] / 100.0
-            if face_style_power != 0:
-                src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked, target_dst_masked )
-
-            bg_style_power = self.options['bg_style_power'] / 100.0
-            if bg_style_power != 0:
-                if not self.options['pixel_loss']:
-                    src_loss += K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked, target_dst_anti_masked ))
-                else:
-                    src_loss += K.mean( (50*bg_style_power)*K.square( psd_target_dst_anti_masked - target_dst_anti_masked ))
-
-            if not self.options['pixel_loss']:
-                dst_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_opt, pred_dst_dst_masked_opt) )
-            else:
-                dst_loss = K.mean( 50*K.square( target_dst_masked_opt - pred_dst_dst_masked_opt ) )
-
-            self.src_dst_train = K.function ([warped_src, warped_dst, target_src, target_srcm, target_dst, target_dstm],[src_loss,dst_loss], self.src_dst_opt.get_updates(src_loss+dst_loss, self.model.src_dst_trainable_weights) )
-
-            if self.options['learn_mask']:
-                src_mask_loss = K.mean(K.square(target_srcm-pred_src_srcm))
-                dst_mask_loss = K.mean(K.square(target_dstm-pred_dst_dstm))
-                self.src_dst_mask_train = K.function ([warped_src, warped_dst, target_srcm, target_dstm],[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, self.model.src_dst_mask_trainable_weights ) )
-
-            if self.options['learn_mask']:
-                self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm])
-            else:
-                self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src, pred_dst_dst, pred_src_dst ])
-
-        else:
-            if self.options['learn_mask']:
-                self.AE_convert = K.function ([warped_dst],[ pred_src_dst, pred_dst_dstm, pred_src_dstm ])
-            else:
-                self.AE_convert = K.function ([warped_dst],[ pred_src_dst ])
-
-
-        if self.is_training_mode:
-            t = SampleProcessor.Types
-            face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF
-
-            t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE
-
-            training_data_src_path = self.training_data_src_path
-            training_data_dst_path = self.training_data_dst_path
-
-            if self.pretrain and self.pretraining_data_path is not None:
-                training_data_src_path = self.pretraining_data_path
-                training_data_dst_path = self.pretraining_data_path
-
-            self.set_training_data_generators ([
-                    SampleGeneratorFace(training_data_src_path, random_ct_samples_path=training_data_dst_path if self.options['ct_mode'] != 'none' else None,
-                                                                debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ),
-                        output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution, 'ct_mode': self.options['ct_mode'] },
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution, 'ct_mode': self.options['ct_mode'] },
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ]
-                         ),
-
-                    SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size,
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ),
-                        output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution},
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution},
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ])
-                             ])
-
-    #override
-    def get_model_filename_list(self):
-        ar = self.model.get_model_filename_list ( exclude_for_pretrain=(self.pretrain and self.iter != 0) )
-        return ar
-
-    #override
-    def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list() )
-
-    #override
-    def onTrainOneIter(self, generators_samples, generators_list):
-        warped_src, target_src, target_srcm = generators_samples[0]
-        warped_dst, target_dst, target_dstm = generators_samples[1]
-
-        feed = [warped_src, warped_dst, target_src, target_srcm, target_dst, target_dstm]
-
-        src_loss, dst_loss, = self.src_dst_train (feed)
-
-        if self.options['learn_mask']:
-            feed = [ warped_src, warped_dst, target_srcm, target_dstm ]
-            src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed)
-
-        return ( ('src_loss', src_loss), ('dst_loss', dst_loss), )
-
-    #override
-    def onGetPreview(self, sample):
-        test_S   = sample[0][1][0:4] #first 4 samples
-        test_S_m = sample[0][2][0:4] #first 4 samples
-        test_D   = sample[1][1][0:4]
-        test_D_m = sample[1][2][0:4]
-
-        if self.options['learn_mask']:
-            S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ]
-            DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ]
-        else:
-            S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ]
-
-        result = []
-        st = []
-        for i in range(len(test_S)):
-            ar = S[i], SS[i], D[i], DD[i], SD[i]
-
-            st.append ( np.concatenate ( ar, axis=1) )
-
-        result += [ ('SAE', np.concatenate (st, axis=0 )), ]
-
-        if self.options['learn_mask']:
-            st_m = []
-            for i in range(len(test_S)):
-                ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i])
-                st_m.append ( np.concatenate ( ar, axis=1) )
-
-            result += [ ('SAE masked', np.concatenate (st_m, axis=0 )), ]
-
-        return result
-
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.AE_convert ([ np.zeros ( (1, self.options['resolution'], self.options['resolution'], 3), dtype=np.float32 ) ])
-        else:
-            if self.options['learn_mask']:
-                bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]])
-                mask = mask_dst_dstm[0] * mask_src_dstm[0]
-                return bgr[0], mask[...,0]
-            else:
-                bgr, = self.AE_convert ([face[np.newaxis,...]])
-                return bgr[0]
-
-    #override
-    def get_ConverterConfig(self):
-        face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF
-
-        import converters
-        return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), converters.ConverterConfigMasked(face_type=face_type,
-                                     default_mode = 'overlay' if self.options['ct_mode'] != 'none' or self.options['face_style_power'] or self.options['bg_style_power'] else 'seamless',
-                                     clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0,
-                                    )
-
-Model = SAEModel
diff --git a/models/Model_SAE/__init__.py b/models/Model_SAE/__init__.py
deleted file mode 100644
index 0188f11..0000000
--- a/models/Model_SAE/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .Model import Model
diff --git a/models/Model_SAEHD/Model.py b/models/Model_SAEHD/Model.py
index 180b2a1..67a19f3 100644
--- a/models/Model_SAEHD/Model.py
+++ b/models/Model_SAEHD/Model.py
@@ -1,524 +1,666 @@
+import multiprocessing
 from functools import partial
 
 import numpy as np
 
-import mathlib
+from core import mathlib
+from core.interact import interact as io
+from core.leras import nn
 from facelib import FaceType
-from interact import interact as io
 from models import ModelBase
-from nnlib import nnlib
 from samplelib import *
 
-
-#SAE - Styled AutoEncoder
 class SAEHDModel(ModelBase):
 
     #override
-    def onInitializeOptions(self, is_first_run, ask_override):
+    def on_initialize_options(self):
+        device_config = nn.getCurrentDeviceConfig()
+        
+        lowest_vram = 2
+        if len(device_config.devices) != 0:
+            lowest_vram = device_config.devices.get_worst_device().total_mem_gb
+            
+        if lowest_vram >= 4:
+            suggest_batch_size = 8
+        else:
+            suggest_batch_size = 4
+        
         yn_str = {True:'y',False:'n'}
+        ask_override = self.ask_override()
 
-        default_resolution = 128
-        default_archi = 'df'
-        default_face_type = 'f'
+        if self.is_first_run() or ask_override:
+            self.ask_enable_autobackup()
+            self.ask_write_preview_history()
+            self.ask_target_iter()
+            self.ask_random_flip()
+            self.ask_batch_size(suggest_batch_size)
 
+        default_resolution         = self.options['resolution']         = self.load_or_def_option('resolution', 128)
+        default_face_type          = self.options['face_type']          = self.load_or_def_option('face_type', 'f')
+        default_models_opt_on_gpu  = self.options['models_opt_on_gpu']  = self.load_or_def_option('models_opt_on_gpu', True)
+        default_archi              = self.options['archi']              = self.load_or_def_option('archi', 'dfhd')
+        default_ae_dims            = self.options['ae_dims']            = self.load_or_def_option('ae_dims', 256)
+        default_e_dims             = self.options['e_dims']             = self.load_or_def_option('e_dims', 64)
+        default_d_dims             = self.options['d_dims']             = self.load_or_def_option('d_dims', 64)
+        
+        default_d_mask_dims        = default_d_dims // 3
+        default_d_mask_dims        += default_d_mask_dims % 2
+        default_d_mask_dims        = self.options['d_mask_dims']        = self.load_or_def_option('d_mask_dims', default_d_mask_dims)
+        
+        default_learn_mask         = self.options['learn_mask']         = self.load_or_def_option('learn_mask', True)
+        default_lr_dropout         = self.options['lr_dropout']         = self.load_or_def_option('lr_dropout', False)
+        default_random_warp        = self.options['random_warp']        = self.load_or_def_option('random_warp', True)
+        default_true_face_training = self.options['true_face_training'] = self.load_or_def_option('true_face_training', False)
+        default_face_style_power   = self.options['face_style_power']   = self.load_or_def_option('face_style_power', 0.0)
+        default_bg_style_power     = self.options['bg_style_power']     = self.load_or_def_option('bg_style_power', 0.0)
+        default_ct_mode            = self.options['ct_mode']            = self.load_or_def_option('ct_mode', 'none')
+        default_clipgrad           = self.options['clipgrad']           = self.load_or_def_option('clipgrad', False)
+        default_pretrain           = self.options['pretrain']           = self.load_or_def_option('pretrain', False)
 
-        if is_first_run:
-            resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.")
-            resolution = np.clip (resolution, 64, 256)
-            while np.modf(resolution / 16)[0] != 0.0:
-                resolution -= 1
+        if self.is_first_run():
+            resolution = io.input_int("Resolution", default_resolution, add_info="64-256", help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.")
+            resolution = np.clip ( (resolution // 16) * 16, 64, 256)
             self.options['resolution'] = resolution
-            self.options['face_type'] = io.input_str ("Half, mid full, or full face? (h/mf/f, ?:help skip:f) : ", default_face_type, ['h','mf','f'], help_message="Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face.").lower()
-        else:
-            self.options['resolution'] = self.options.get('resolution', default_resolution)
-            self.options['face_type'] = self.options.get('face_type', default_face_type)
+            self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f'], help_message="Half / mid face / full face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face.").lower()
 
-        default_learn_mask = self.options.get('learn_mask', True)
-        if is_first_run or ask_override:
-            self.options['learn_mask'] = io.input_bool ( f"Learn mask? (y/n, ?:help skip:{yn_str[default_learn_mask]} ) : " , default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted.")
-        else:
-            self.options['learn_mask'] = self.options.get('learn_mask', default_learn_mask)
+        if (self.is_first_run() or ask_override) and len(device_config.devices) == 1:
+            self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.")
 
-        if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend:
-            def_optimizer_mode = self.options.get('optimizer_mode', 1)
-            self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.")
-        else:
-            self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1)
+        if self.is_first_run():
+            self.options['archi'] = io.input_str ("AE architecture", default_archi, ['dfhd','liaehd','df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'hd' is heavyweight version for the best quality.").lower() #-s version is slower, but has decreased change to collapse.
+            self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dimensions", default_ae_dims, add_info="32-1024", help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 )
+            
+            e_dims = np.clip ( io.input_int("Encoder dimensions", default_e_dims, add_info="16-256", help_message="More dims help to recognize more facial features and achieve sharper result, but require more VRAM. You can fine-tune model size to fit your GPU." ), 16, 256 )
+            self.options['e_dims'] = e_dims + e_dims % 2
+            
+            d_dims = np.clip ( io.input_int("Decoder dimensions", default_d_dims, add_info="16-256", help_message="More dims help to recognize more facial features and achieve sharper result, but require more VRAM. You can fine-tune model size to fit your GPU." ), 16, 256 )
+            self.options['d_dims'] = d_dims + d_dims % 2
+            
+            d_mask_dims = np.clip ( io.input_int("Decoder mask dimensions", default_d_mask_dims, add_info="16-256", help_message="Typical mask dimensions = decoder dimensions / 3. If you manually cut out obstacles from the dst mask, you can increase this parameter to achieve better quality." ), 16, 256 )
+            self.options['d_mask_dims'] = d_mask_dims + d_mask_dims % 2
+            
+        if self.is_first_run() or ask_override:
+            self.options['learn_mask']  = io.input_bool ("Learn mask", default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case merger forced to use 'not predicted mask' that is not smooth as predicted.")
+            self.options['lr_dropout']  = io.input_bool ("Use learning rate dropout", default_lr_dropout, help_message="When the face is trained enough, you can enable this option to get extra sharpness for less amount of iterations.")
+            self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness for less amount of iterations.")
 
-        if is_first_run:
-            self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse.
-        else:
-            self.options['archi'] = self.options.get('archi', default_archi)
-
-        default_ae_dims = 256
-        default_ed_ch_dims = 21
-
-        if is_first_run:
-            self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 )
-            self.options['ed_ch_dims'] = np.clip ( io.input_int("Encoder/Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_ed_ch_dims) , default_ed_ch_dims, help_message="More dims help to recognize more facial features and achieve sharper result, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 )
-        else:
-            self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims)
-            self.options['ed_ch_dims'] = self.options.get('ed_ch_dims', default_ed_ch_dims)
-
-        default_true_face_training = self.options.get('true_face_training', False)
-        default_face_style_power = self.options.get('face_style_power', 0.0)
-        default_bg_style_power = self.options.get('bg_style_power', 0.0)
-
-        if is_first_run or ask_override:
-            if nnlib.device.backend != 'plaidML':
-                default_lr_dropout = self.options.get('lr_dropout', False)
-                self.options['lr_dropout'] = io.input_bool ( f"Use learning rate dropout? (y/n, ?:help skip:{yn_str[default_lr_dropout]} ) : ", default_lr_dropout, help_message="When the face is trained enough, you can enable this option to get extra sharpness for less amount of iterations.")
+            if 'df' in self.options['archi']:
+                self.options['true_face_training'] = io.input_bool ("Enable 'true face' training", default_true_face_training, help_message="The result face will be more like src and will get extra sharpness. Enable it for last 10-20k iterations before conversion.")
             else:
-                self.options['lr_dropout'] = False
-                
-            default_random_warp = self.options.get('random_warp', True)
-            self.options['random_warp'] = io.input_bool (f"Enable random warp of samples? ( y/n, ?:help skip:{yn_str[default_random_warp]}) : ", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness for less amount of iterations.")
+                self.options['true_face_training'] = False
 
-            self.options['true_face_training'] = io.input_bool (f"Enable 'true face' training? (y/n, ?:help skip:{yn_str[default_true_face_training]}) : ", default_true_face_training, help_message="The result face will be more like src and will get extra sharpness. Enable it for last 10-20k iterations before conversion.")
+            self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
+            self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
+            self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.")
+            self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
+            self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.")
 
-            self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power,
-                                                                               help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
+        if self.options['pretrain'] and self.get_pretraining_data_path() is None:
+            raise Exception("pretraining_data_path is not defined") 
 
-            self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power,
-                                                                               help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
-
-            default_ct_mode = self.options.get('ct_mode', 'none')
-            self.options['ct_mode'] = io.input_str (f"Color transfer mode apply to src faceset. ( none/rct/lct/mkl/idt/sot, ?:help skip:{default_ct_mode}) : ", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.")
-
-            if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301
-                default_clipgrad = False if is_first_run else self.options.get('clipgrad', False)
-                self.options['clipgrad'] = io.input_bool (f"Enable gradient clipping? (y/n, ?:help skip:{yn_str[default_clipgrad]}) : ", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
-            else:
-                self.options['clipgrad'] = False
-        else:
-            self.options['lr_dropout'] = self.options.get('lr_dropout', False)
-            self.options['random_warp'] = self.options.get('random_warp', True)
-            self.options['true_face_training'] = self.options.get('true_face_training', default_true_face_training)
-            self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power)
-            self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power)
-            self.options['ct_mode'] = self.options.get('ct_mode', 'none')
-            self.options['clipgrad'] = self.options.get('clipgrad', False)
-
-        if is_first_run:
-            self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.")
-        else:
-            self.options['pretrain'] = False
+        self.pretrain_just_disabled = (default_pretrain == True and self.options['pretrain'] == False)
+        
+        if self.pretrain_just_disabled:
+            self.set_iter(1)
 
     #override
-    def onInitialize(self):
-        exec(nnlib.import_all(), locals(), globals())
-        self.set_vram_batch_requirements({1.5:4,4:8})
+    def on_initialize(self):
+        nn.initialize()
+        tf = nn.tf
+
+        conv_kernel_initializer = nn.initializers.ca
+        
+        class Downscale(nn.ModelBase):
+            def __init__(self, in_ch, out_ch, kernel_size=5, dilations=1, subpixel=True, use_activator=True, *kwargs ):
+                self.in_ch = in_ch
+                self.out_ch = out_ch
+                self.kernel_size = kernel_size
+                self.dilations = dilations
+                self.subpixel = subpixel
+                self.use_activator = use_activator
+                super().__init__(*kwargs)
+
+            def on_build(self, *args, **kwargs ):                
+                self.conv1 = nn.Conv2D( self.in_ch, 
+                                          self.out_ch // (4 if self.subpixel else 1),  
+                                          kernel_size=self.kernel_size, 
+                                          strides=1 if self.subpixel else 2,
+                                          padding='SAME', dilations=self.dilations, kernel_initializer=conv_kernel_initializer )
+
+            def forward(self, x):
+                x = self.conv1(x)
+                
+                if self.subpixel:
+                    x = tf.nn.space_to_depth(x, 2)
+                
+                if self.use_activator:
+                    x = tf.nn.leaky_relu(x, 0.1)
+                return x
+
+            def get_out_ch(self):
+                return (self.out_ch // 4) * 4
+
+        class DownscaleBlock(nn.ModelBase):
+            def on_build(self, in_ch, ch, n_downscales, kernel_size, dilations=1, subpixel=True):
+                self.downs = []
+                
+                last_ch = in_ch
+                for i in range(n_downscales):
+                    cur_ch = ch*( min(2**i, 8)  )
+                    self.downs.append ( Downscale(last_ch, cur_ch, kernel_size=kernel_size, dilations=dilations, subpixel=subpixel) )
+                    last_ch = self.downs[-1].get_out_ch()
+                    
+            def forward(self, inp):
+                x = inp
+                for down in self.downs:
+                    x = down(x)
+                return x
+                
+        class Upscale(nn.ModelBase):
+            def on_build(self, in_ch, out_ch, kernel_size=3 ):
+                self.conv1 = nn.Conv2D( in_ch, out_ch*4, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer)
+
+            def forward(self, x):
+                x = self.conv1(x)
+                x = tf.nn.leaky_relu(x, 0.1)
+                x = tf.nn.depth_to_space(x, 2)
+                return x
+
+        class ResidualBlock(nn.ModelBase):
+            def on_build(self, ch, kernel_size=3 ):
+                self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer)
+                self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer)
+
+            def forward(self, inp):
+                x = self.conv1(inp)
+                x = tf.nn.leaky_relu(x, 0.2)
+                x = self.conv2(x)
+                x = tf.nn.leaky_relu(inp + x, 0.2)
+                return x
+
+        class UpdownResidualBlock(nn.ModelBase):
+            def on_build(self, ch, inner_ch, kernel_size=3 ):
+                self.up   = Upscale (ch, inner_ch, kernel_size=kernel_size)
+                self.res  = ResidualBlock (inner_ch, kernel_size=kernel_size)
+                self.down = Downscale (inner_ch, ch, kernel_size=kernel_size, use_activator=False)
+
+            def forward(self, inp):
+                x = self.up(inp)
+                x = upx = self.res(x)
+                x = self.down(x)
+                x = x + inp
+                x = tf.nn.leaky_relu(x, 0.2)
+                return x, upx
+
+        class Encoder(nn.ModelBase):                
+            def on_build(self, in_ch, e_ch, is_hd):               
+                self.is_hd=is_hd                
+                if self.is_hd:
+                    self.down1 = DownscaleBlock(in_ch, e_ch*2, n_downscales=4, kernel_size=3, dilations=1)
+                    self.down2 = DownscaleBlock(in_ch, e_ch*2, n_downscales=4, kernel_size=5, dilations=1)
+                    self.down3 = DownscaleBlock(in_ch, e_ch//2, n_downscales=4, kernel_size=5, dilations=2)
+                    self.down4 = DownscaleBlock(in_ch, e_ch//2, n_downscales=4, kernel_size=7, dilations=2)
+                else:
+                    self.down1 = DownscaleBlock(in_ch, e_ch, n_downscales=4, kernel_size=5, dilations=1, subpixel=False)
+                    
+            def forward(self, inp):
+                if self.is_hd:
+                    x = tf.concat([ nn.tf_flatten(self.down1(inp)),
+                                    nn.tf_flatten(self.down2(inp)),
+                                    nn.tf_flatten(self.down3(inp)),
+                                    nn.tf_flatten(self.down4(inp)) ], -1 )
+                else:
+                    x = nn.tf_flatten(self.down1(inp))
+                    
+                return x
+                
+        class Inter(nn.ModelBase):
+            def __init__(self, in_ch, lowest_dense_res, ae_ch, ae_out_ch, **kwargs):
+                self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch = in_ch, lowest_dense_res, ae_ch, ae_out_ch
+                super().__init__(**kwargs)
+                
+            def on_build(self):
+                in_ch, lowest_dense_res, ae_ch, ae_out_ch = self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch
+
+                self.dense1 = nn.Dense( in_ch, ae_ch, kernel_initializer=tf.initializers.orthogonal )
+                self.dense2 = nn.Dense( ae_ch, lowest_dense_res * lowest_dense_res * ae_out_ch, kernel_initializer=tf.initializers.orthogonal )
+                self.upscale1 = Upscale(ae_out_ch, ae_out_ch)
+
+            def forward(self, inp):
+                x = self.dense1(inp)
+                x = self.dense2(x)
+                x = tf.reshape (x, (-1, lowest_dense_res, lowest_dense_res, self.ae_out_ch))
+                x = self.upscale1(x)
+                return x
+                
+            def get_out_ch(self):
+                return self.ae_out_ch
+                        
+        class Decoder(nn.ModelBase):
+            def on_build(self, in_ch, d_ch, d_mask_ch, is_hd ):
+                self.is_hd = is_hd
+
+                self.upscale0 = Upscale(in_ch, d_ch*8, kernel_size=3)
+                self.upscale1 = Upscale(d_ch*8, d_ch*4, kernel_size=3)
+                self.upscale2 = Upscale(d_ch*4, d_ch*2, kernel_size=3)        
+ 
+                if is_hd:
+                    self.res0 = UpdownResidualBlock(in_ch, d_ch*8, kernel_size=3) 
+                    self.res1 = UpdownResidualBlock(d_ch*8, d_ch*4, kernel_size=3) 
+                    self.res2 = UpdownResidualBlock(d_ch*4, d_ch*2, kernel_size=3) 
+                    self.res3 = UpdownResidualBlock(d_ch*2, d_ch, kernel_size=3)
+                else:
+                    self.res0 = ResidualBlock(d_ch*8, kernel_size=3) 
+                    self.res1 = ResidualBlock(d_ch*4, kernel_size=3) 
+                    self.res2 = ResidualBlock(d_ch*2, kernel_size=3)
+
+                self.out_conv  = nn.Conv2D( d_ch*2, 3, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer)
+                
+                self.upscalem0 = Upscale(in_ch, d_mask_ch*8, kernel_size=3)
+                self.upscalem1 = Upscale(d_mask_ch*8, d_mask_ch*4, kernel_size=3)
+                self.upscalem2 = Upscale(d_mask_ch*4, d_mask_ch*2, kernel_size=3)         
+                self.out_convm = nn.Conv2D( d_mask_ch*2, 1, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer)
+            
+            def get_weights_ex(self, include_mask):
+                # Call internal get_weights in order to initialize inner logic
+                self.get_weights() 
+
+                weights = self.upscale0.get_weights() + self.upscale1.get_weights() + self.upscale2.get_weights() \
+                          + self.res0.get_weights() + self.res1.get_weights() + self.res2.get_weights() + self.out_conv.get_weights()
+                            
+                if include_mask:
+                    weights += self.upscalem0.get_weights() + self.upscalem1.get_weights() + self.upscalem2.get_weights() \
+                               + self.out_convm.get_weights()                   
+                return weights
+                
+            
+            def forward(self, inp):
+                z = inp
+                                
+                if self.is_hd:
+                    x, upx = self.res0(z)  
+                                                  
+                    x = self.upscale0(x)
+                    x = tf.nn.leaky_relu(x + upx, 0.2)                    
+                    x, upx = self.res1(x)
+                    
+                    x = self.upscale1(x)
+                    x = tf.nn.leaky_relu(x + upx, 0.2)                    
+                    x, upx = self.res2(x)
+                    
+                    x = self.upscale2(x)
+                    x = tf.nn.leaky_relu(x + upx, 0.2)                    
+                    x, upx = self.res3(x)                
+                else:
+                    x = self.upscale0(z)
+                    x = self.res0(x)
+                    x = self.upscale1(x)
+                    x = self.res1(x)
+                    x = self.upscale2(x)
+                    x = self.res2(x)
+
+                m = self.upscalem0(z)
+                m = self.upscalem1(m)
+                m = self.upscalem2(m)
+                
+                return tf.nn.sigmoid(self.out_conv(x)), \
+                       tf.nn.sigmoid(self.out_convm(m))
+
+        class CodeDiscriminator(nn.ModelBase):
+            def on_build(self, in_ch, code_res, ch=256):
+                n_downscales = 2 + code_res // 8
+
+                self.convs = []
+                prev_ch = in_ch
+                for i in range(n_downscales):
+                    cur_ch = ch * min( (2**i), 8 )
+                    self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=4 if i == 0 else 3, strides=2, padding='SAME', kernel_initializer=conv_kernel_initializer) )
+                    prev_ch = cur_ch
+
+                self.out_conv =  nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=conv_kernel_initializer)
+
+            def forward(self, x):
+                for conv in self.convs:
+                    x = tf.nn.leaky_relu( conv(x), 0.1 )
+                return self.out_conv(x)
+
+        device_config = nn.getCurrentDeviceConfig()
+        devices = device_config.devices
 
         resolution = self.options['resolution']
         learn_mask = self.options['learn_mask']
-
-        ae_dims = self.options['ae_dims']
-        ed_ch_dims = self.options['ed_ch_dims']
-        self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False)
-        if not self.pretrain:
-            self.options.pop('pretrain')
-
-        bgr_shape = (resolution, resolution, 3)
-        mask_shape = (resolution, resolution, 1)
-
-        self.true_face_training = self.options.get('true_face_training', False)
+        archi = self.options['archi']
+        ae_dims = self.options['ae_dims']        
+        e_dims = self.options['e_dims']
+        d_dims = self.options['d_dims']
+        d_mask_dims = self.options['d_mask_dims'] 
+        self.pretrain = self.options['pretrain']
+        
         masked_training = True
 
-        class CommonModel(object):
-            def downscale (self, dim, kernel_size=5, dilation_rate=1, use_activator=True):
-                def func(x):
-                    if not use_activator:
-                        return SubpixelDownscaler()(Conv2D(dim // 4, kernel_size=kernel_size, strides=1, dilation_rate=dilation_rate, padding='same')(x))
-                    else:
-                        return SubpixelDownscaler()(LeakyReLU(0.1)(Conv2D(dim // 4, kernel_size=kernel_size, strides=1, dilation_rate=dilation_rate, padding='same')(x)))
-                return func
-
-            def upscale (self, dim, size=(2,2)):
-                def func(x):
-                    return SubpixelUpscaler(size=size)(LeakyReLU(0.1)(Conv2D(dim * np.prod(size) , kernel_size=3, strides=1, padding='same')(x)))
-                return func
-
-            def ResidualBlock(self, dim):
-                def func(inp):
-                    x = Conv2D(dim, kernel_size=3, padding='same')(inp)
-                    x = LeakyReLU(0.2)(x)
-                    x = Conv2D(dim, kernel_size=3, padding='same')(x)
-                    x = Add()([x, inp])
-                    x = LeakyReLU(0.2)(x)
-                    return x
-                return func
-
-        class SAEDFModel(CommonModel):
-            def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask):
-                super().__init__()
-                self.learn_mask = learn_mask
-
-                output_nc = 3
-                bgr_shape = (resolution, resolution, output_nc)
-                mask_shape = (resolution, resolution, 1)
-                lowest_dense_res = resolution // 16
-                e_dims = output_nc*e_ch_dims
-
-
-
-                def enc_flow(e_ch_dims, ae_dims, lowest_dense_res):
-                    dims = output_nc * e_ch_dims
-                    if dims % 2 != 0:
-                        dims += 1
-
-                    def func(inp):
-                        x = self.downscale(dims  , 3, 1 )(inp)
-                        x = self.downscale(dims*2, 3, 1 )(x)
-                        x = self.downscale(dims*4, 3, 1 )(x)
-                        x0 = self.downscale(dims*8, 3, 1 )(x)
-
-                        x = self.downscale(dims  , 5, 1 )(inp)
-                        x = self.downscale(dims*2, 5, 1 )(x)
-                        x = self.downscale(dims*4, 5, 1 )(x)
-                        x1 = self.downscale(dims*8, 5, 1 )(x)
-
-                        x = self.downscale(dims  , 5, 2 )(inp)
-                        x = self.downscale(dims*2, 5, 2 )(x)
-                        x = self.downscale(dims*4, 5, 2 )(x)
-                        x2 = self.downscale(dims*8, 5, 2 )(x)
-
-                        x = self.downscale(dims  , 7, 2 )(inp)
-                        x = self.downscale(dims*2, 7, 2 )(x)
-                        x = self.downscale(dims*4, 7, 2 )(x)
-                        x3 = self.downscale(dims*8, 7, 2 )(x)
-
-                        x = Concatenate()([x0,x1,x2,x3])
-
-                        x = Dense(ae_dims)(Flatten()(x))
-                        x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x)
-                        x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x)
-                        x = self.upscale(ae_dims)(x)
-                        return x
-                    return func
-
-                def dec_flow(output_nc, d_ch_dims, is_mask=False):
-                    dims = output_nc * d_ch_dims
-                    if dims % 2 != 0:
-                        dims += 1
-
-                    def func(x):
-
-                        for i in [8,4,2]:
-                            x = self.upscale(dims*i)(x)
-
-                            if not is_mask:
-                                x0 = x
-                                x = self.upscale( (dims*i)//2 )(x)
-                                x = self.ResidualBlock( (dims*i)//2 )(x)
-                                x = self.downscale( dims*i, use_activator=False ) (x)
-                                x = Add()([x, x0])
-                                x = LeakyReLU(0.2)(x)
-
-                        return Conv2D(output_nc, kernel_size=1, padding='same', activation='sigmoid')(x)
-
-                    return func
-
-                self.encoder = modelify(enc_flow(e_ch_dims, ae_dims, lowest_dense_res)) ( Input(bgr_shape) )
-
-                sh = K.int_shape( self.encoder.outputs[0] )[1:]
-                self.decoder_src = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) )
-                self.decoder_dst = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) )
-
-                if learn_mask:
-                    self.decoder_srcm = modelify(dec_flow(1, d_ch_dims, is_mask=True)) ( Input(sh) )
-                    self.decoder_dstm = modelify(dec_flow(1, d_ch_dims, is_mask=True)) ( Input(sh) )
-
-                self.src_dst_trainable_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights
-
-                if learn_mask:
-                    self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights
-
-                self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape)
-                self.target_src, self.target_dst = Input(bgr_shape), Input(bgr_shape)
-                self.target_srcm, self.target_dstm = Input(mask_shape), Input(mask_shape)
-                self.src_code, self.dst_code = self.encoder(self.warped_src), self.encoder(self.warped_dst)
-
-                self.pred_src_src = self.decoder_src(self.src_code)
-                self.pred_dst_dst = self.decoder_dst(self.dst_code)
-                self.pred_src_dst = self.decoder_src(self.dst_code)
-
-                if learn_mask:
-                    self.pred_src_srcm = self.decoder_srcm(self.src_code)
-                    self.pred_dst_dstm = self.decoder_dstm(self.dst_code)
-                    self.pred_src_dstm = self.decoder_srcm(self.dst_code)
-
-            def get_model_filename_list(self, exclude_for_pretrain=False):
-                ar = []
-                if not exclude_for_pretrain:
-                    ar += [ [self.encoder, 'encoder.h5'] ]
-                ar += [  [self.decoder_src, 'decoder_src.h5'],
-                         [self.decoder_dst, 'decoder_dst.h5']  ]
-                if self.learn_mask:
-                    ar += [ [self.decoder_srcm, 'decoder_srcm.h5'],
-                            [self.decoder_dstm, 'decoder_dstm.h5']  ]
-                return ar
-
-        class SAELIAEModel(CommonModel):
-            def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask):
-                super().__init__()
-                self.learn_mask = learn_mask
-
-                output_nc = 3
-                bgr_shape = (resolution, resolution, output_nc)
-                mask_shape = (resolution, resolution, 1)
-
-                lowest_dense_res = resolution // 16
-
-                def enc_flow(e_ch_dims):
-                    dims = output_nc*e_ch_dims
-                    if dims % 2 != 0:
-                        dims += 1
-
-                    def func(inp):
-                        x = self.downscale(dims  , 3, 1 )(inp)
-                        x = self.downscale(dims*2, 3, 1 )(x)
-                        x = self.downscale(dims*4, 3, 1 )(x)
-                        x0 = self.downscale(dims*8, 3, 1 )(x)
-
-                        x = self.downscale(dims  , 5, 1 )(inp)
-                        x = self.downscale(dims*2, 5, 1 )(x)
-                        x = self.downscale(dims*4, 5, 1 )(x)
-                        x1 = self.downscale(dims*8, 5, 1 )(x)
-
-                        x = self.downscale(dims  , 5, 2 )(inp)
-                        x = self.downscale(dims*2, 5, 2 )(x)
-                        x = self.downscale(dims*4, 5, 2 )(x)
-                        x2 = self.downscale(dims*8, 5, 2 )(x)
-
-                        x = self.downscale(dims  , 7, 2 )(inp)
-                        x = self.downscale(dims*2, 7, 2 )(x)
-                        x = self.downscale(dims*4, 7, 2 )(x)
-                        x3 = self.downscale(dims*8, 7, 2 )(x)
-
-                        x = Concatenate()([x0,x1,x2,x3])
-
-                        x = Flatten()(x)
-                        return x
-                    return func
-
-                def inter_flow(lowest_dense_res, ae_dims):
-                    def func(x):
-                        x = Dense(ae_dims)(x)
-                        x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x)
-                        x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x)
-                        x = self.upscale(ae_dims*2)(x)
-                        return x
-                    return func
-
-                def dec_flow(output_nc, d_ch_dims, is_mask=False):
-                    dims = output_nc * d_ch_dims
-                    if dims % 2 != 0:
-                        dims += 1
-
-                    def func(x):
-
-                        for i in [8,4,2]:
-                            x = self.upscale(dims*i)(x)
-
-                            if not is_mask:
-                                x0 = x
-                                x = self.upscale( (dims*i)//2 )(x)
-                                x = self.ResidualBlock( (dims*i)//2 )(x)
-                                x = self.downscale( dims*i, use_activator=False ) (x)
-                                x = Add()([x, x0])
-                                x = LeakyReLU(0.2)(x)
-
-                        return Conv2D(output_nc, kernel_size=1, padding='same', activation='sigmoid')(x)
-
-                    return func
-
-                self.encoder = modelify(enc_flow(e_ch_dims)) ( Input(bgr_shape) )
-
-                sh = K.int_shape( self.encoder.outputs[0] )[1:]
-                self.inter_B = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) )
-                self.inter_AB = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) )
-
-                sh = np.array(K.int_shape( self.inter_B.outputs[0] )[1:])*(1,1,2)
-                self.decoder = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) )
-
-                if learn_mask:
-                    self.decoderm = modelify(dec_flow(1, d_ch_dims, is_mask=True)) ( Input(sh) )
-
-                self.src_dst_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights
-
-                if learn_mask:
-                    self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights
-
-                self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape)
-                self.target_src, self.target_dst = Input(bgr_shape), Input(bgr_shape)
-                self.target_srcm, self.target_dstm = Input(mask_shape), Input(mask_shape)
-
-                warped_src_code = self.encoder (self.warped_src)
-                warped_src_inter_AB_code = self.inter_AB (warped_src_code)
-                self.src_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code])
-
-                warped_dst_code = self.encoder (self.warped_dst)
-                warped_dst_inter_B_code = self.inter_B (warped_dst_code)
-                warped_dst_inter_AB_code = self.inter_AB (warped_dst_code)
-                self.dst_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code])
-
-                src_dst_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code])
-
-                self.pred_src_src = self.decoder(self.src_code)
-                self.pred_dst_dst = self.decoder(self.dst_code)
-                self.pred_src_dst = self.decoder(src_dst_code)
-
-                if learn_mask:
-                    self.pred_src_srcm = self.decoderm(self.src_code)
-                    self.pred_dst_dstm = self.decoderm(self.dst_code)
-                    self.pred_src_dstm = self.decoderm(src_dst_code)
-
-            def get_model_filename_list(self, exclude_for_pretrain=False):
-                ar = [ [self.encoder, 'encoder.h5'],
-                       [self.inter_B, 'inter_B.h5'] ]
-
-                if not exclude_for_pretrain:
-                    ar += [ [self.inter_AB, 'inter_AB.h5'] ]
-
-                ar += [  [self.decoder, 'decoder.h5']  ]
-
-                if self.learn_mask:
-                    ar += [ [self.decoderm, 'decoderm.h5'] ]
-
-                return ar
-
-        if 'df' in self.options['archi']:
-            self.model = SAEDFModel (resolution, ae_dims, ed_ch_dims, ed_ch_dims, learn_mask)
-        elif 'liae' in self.options['archi']:
-            self.model = SAELIAEModel (resolution, ae_dims, ed_ch_dims, ed_ch_dims, learn_mask)
-
-        self.opt_dis_model = []
-
-        if self.true_face_training:
-            def dis_flow(ndf=256):
-                def func(x):
-                    x, = x
-
-                    code_res = K.int_shape(x)[1]
-
-                    x = Conv2D( ndf, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-                    x = LeakyReLU(0.1)(x)
-
-                    x = Conv2D( ndf*2, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-                    x = LeakyReLU(0.1)(x)
-
-                    if code_res > 8:
-                        x = Conv2D( ndf*4, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-                        x = LeakyReLU(0.1)(x)
-
-                    if code_res > 16:
-                        x = Conv2D( ndf*8, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-                        x = LeakyReLU(0.1)(x)
-
-                    if code_res > 32:
-                        x = Conv2D( ndf*8, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) )
-                        x = LeakyReLU(0.1)(x)
-
-                    return Conv2D( 1, 1, strides=1, padding='valid', activation='sigmoid')(x)
-                return func
-
-            sh = [ Input( K.int_shape(self.model.src_code)[1:] ) ]
-            self.dis = modelify(dis_flow()) (sh)
-
-            self.opt_dis_model = [ (self.dis, 'dis.h5') ]
-
-        loaded, not_loaded = [], self.model.get_model_filename_list()+self.opt_dis_model
-        if not self.is_first_run():
-            loaded, not_loaded = self.load_weights_safe(not_loaded)
-
-        CA_models = [ model for model, _ in not_loaded ]
-
-        self.CA_conv_weights_list = []
-        for model in CA_models:
-            for layer in model.layers:
-                if type(layer) == keras.layers.Conv2D:
-                    self.CA_conv_weights_list += [layer.weights[0]] #- is Conv2D kernel_weights
-
-        target_srcm = gaussian_blur( max(1, resolution // 32) )(self.model.target_srcm)
-        target_dstm = gaussian_blur( max(1, resolution // 32) )(self.model.target_dstm)
-
-        target_src_masked = self.model.target_src*target_srcm
-        target_dst_masked = self.model.target_dst*target_dstm
-        target_dst_anti_masked = self.model.target_dst*(1.0 - target_dstm)
-
-        target_src_masked_opt = target_src_masked if masked_training else self.model.target_src
-        target_dst_masked_opt = target_dst_masked if masked_training else self.model.target_dst
-
-        pred_src_src_masked_opt = self.model.pred_src_src*target_srcm if masked_training else self.model.pred_src_src
-        pred_dst_dst_masked_opt = self.model.pred_dst_dst*target_dstm if masked_training else self.model.pred_dst_dst
-
-        psd_target_dst_masked = self.model.pred_src_dst*target_dstm
-        psd_target_dst_anti_masked = self.model.pred_src_dst*(1.0 - target_dstm)
-
-        if self.is_training_mode:
-            lr_dropout = 0.3 if self.options['lr_dropout'] else 0.0
-            self.src_dst_opt      = RMSprop(lr=5e-5, lr_dropout=lr_dropout, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1)
-            self.src_dst_mask_opt = RMSprop(lr=5e-5, lr_dropout=lr_dropout, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1)
-            self.D_opt            = RMSprop(lr=5e-5, lr_dropout=lr_dropout, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1)
-
-            src_loss =  K.mean ( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_opt, pred_src_src_masked_opt) )
-            src_loss += K.mean ( 10*K.square( target_src_masked_opt - pred_src_src_masked_opt ) )
-
-            face_style_power = self.options['face_style_power'] / 100.0
-            if face_style_power != 0:
-                src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked, target_dst_masked )
-
-            bg_style_power = self.options['bg_style_power'] / 100.0
-            if bg_style_power != 0:
-                src_loss += K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked, target_dst_anti_masked ))
-                src_loss += K.mean( (10*bg_style_power)*K.square( psd_target_dst_anti_masked - target_dst_anti_masked ))
-
-            dst_loss =  K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_opt, pred_dst_dst_masked_opt) )
-            dst_loss += K.mean( 10*K.square( target_dst_masked_opt - pred_dst_dst_masked_opt ) )
-
-            G_loss = src_loss+dst_loss
-
-            if self.true_face_training:
-                def DLoss(labels,logits):
-                    return K.mean(K.binary_crossentropy(labels,logits))
-
-                src_code_d = self.dis( self.model.src_code )
-                src_code_d_ones = K.ones_like(src_code_d)
-                src_code_d_zeros = K.zeros_like(src_code_d)
-                dst_code_d = self.dis( self.model.dst_code )
-                dst_code_d_ones = K.ones_like(dst_code_d)
-                G_loss += 0.01*DLoss(src_code_d_ones, src_code_d)
-
-                loss_D = (DLoss(dst_code_d_ones , dst_code_d) + \
-                          DLoss(src_code_d_zeros, src_code_d) ) * 0.5
-
-                self.D_train = K.function ([self.model.warped_src, self.model.warped_dst],[loss_D], self.D_opt.get_updates(loss_D, self.dis.trainable_weights) )
-
-            self.src_dst_train = K.function ([self.model.warped_src, self.model.warped_dst, self.model.target_src, self.model.target_srcm, self.model.target_dst, self.model.target_dstm],
-                                             [src_loss,dst_loss],
-                                             self.src_dst_opt.get_updates( G_loss, self.model.src_dst_trainable_weights)
-                                             )
-
-            if self.options['learn_mask']:
-                src_mask_loss = K.mean(K.square(self.model.target_srcm-self.model.pred_src_srcm))
-                dst_mask_loss = K.mean(K.square(self.model.target_dstm-self.model.pred_dst_dstm))
-                self.src_dst_mask_train = K.function ([self.model.warped_src, self.model.warped_dst, self.model.target_srcm, self.model.target_dstm],[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, self.model.src_dst_mask_trainable_weights ) )
-
-            if self.options['learn_mask']:
-                self.AE_view = K.function ([self.model.warped_src, self.model.warped_dst], [self.model.pred_src_src, self.model.pred_dst_dst, self.model.pred_dst_dstm, self.model.pred_src_dst, self.model.pred_src_dstm])
+        models_opt_on_gpu = False if len(devices) != 1 else self.options['models_opt_on_gpu']
+        models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
+        optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
+
+        input_nc = 3
+        output_nc = 3
+        bgr_shape = (resolution, resolution, output_nc)
+        mask_shape = (resolution, resolution, 1)
+        lowest_dense_res = resolution // 16
+
+        self.model_filename_list = []
+
+
+        with tf.device ('/CPU:0'):
+            #Place holders on CPU
+            self.warped_src = tf.placeholder (tf.float32, (None,)+bgr_shape)
+            self.warped_dst = tf.placeholder (tf.float32, (None,)+bgr_shape)
+
+            self.target_src = tf.placeholder (tf.float32, (None,)+bgr_shape)
+            self.target_dst = tf.placeholder (tf.float32, (None,)+bgr_shape)
+
+            self.target_srcm = tf.placeholder (tf.float32, (None,)+mask_shape)
+            self.target_dstm = tf.placeholder (tf.float32, (None,)+mask_shape)
+
+        # Initializing model classes
+        with tf.device (models_opt_device):
+            if 'df' in archi:
+                self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder')                
+                encoder_out_ch = self.encoder.compute_output_shape ( (tf.float32, (None,resolution,resolution,input_nc)))[-1]
+                
+                self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, name='inter')
+                inter_out_ch = self.inter.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1]
+                
+                self.decoder_src = Decoder(in_ch=inter_out_ch, d_ch=d_dims, d_mask_ch=d_mask_dims, is_hd='hd' in archi, name='decoder_src')
+                self.decoder_dst = Decoder(in_ch=inter_out_ch, d_ch=d_dims, d_mask_ch=d_mask_dims, is_hd='hd' in archi, name='decoder_dst')
+
+                self.model_filename_list += [ [self.encoder,     'encoder.npy'    ],
+                                              [self.inter,       'inter.npy'      ],
+                                              [self.decoder_src, 'decoder_src.npy'],
+                                              [self.decoder_dst, 'decoder_dst.npy']  ]
+
+                if self.is_training:
+                    if self.options['true_face_training']:
+                        self.dis = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' )
+                        self.model_filename_list += [ [self.dis, 'dis.npy'] ]
+
+            elif 'liae' in archi:
+                self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
+                encoder_out_ch = self.encoder.compute_output_shape ( (tf.float32, (None,resolution,resolution,input_nc)))[-1]
+                
+                self.inter_AB = Inter(in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims*2, name='inter_AB')
+                self.inter_B  = Inter(in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims*2, name='inter_B')
+                
+                inter_AB_out_ch = self.inter_AB.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1]
+                inter_B_out_ch = self.inter_B.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1]
+                inters_out_ch = inter_AB_out_ch+inter_B_out_ch
+                
+                self.decoder = Decoder(in_ch=inters_out_ch, d_ch=d_dims, d_mask_ch=d_mask_dims, is_hd='hd' in archi, name='decoder')
+                    
+                self.model_filename_list += [ [self.encoder,  'encoder.npy'],
+                                              [self.inter_AB, 'inter_AB.npy'],
+                                              [self.inter_B , 'inter_B.npy'],
+                                              [self.decoder , 'decoder.npy'] ]
+
+            if self.is_training:
+                # Initialize optimizers
+                lr=5e-5
+                lr_dropout = 0.3 if self.options['lr_dropout'] else 1.0
+                clipnorm = 1.0 if self.options['clipgrad'] else 0.0
+                self.src_dst_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='src_dst_opt')
+                self.model_filename_list += [ (self.src_dst_opt, 'src_dst_opt.npy') ]
+                if 'df' in archi:
+                    self.src_dst_all_trainable_weights = self.encoder.get_weights() + self.decoder_src.get_weights() + self.decoder_dst.get_weights()
+                    self.src_dst_trainable_weights = self.encoder.get_weights() + self.decoder_src.get_weights_ex(learn_mask) + self.decoder_dst.get_weights_ex(learn_mask)
+
+                elif 'liae' in archi:
+                    self.src_dst_all_trainable_weights = self.encoder.get_weights() + self.inter_AB.get_weights() + self.inter_B.get_weights() + self.decoder.get_weights()
+                    self.src_dst_trainable_weights = self.encoder.get_weights() + self.inter_AB.get_weights() + self.inter_B.get_weights() + self.decoder.get_weights_ex(learn_mask)
+
+                self.src_dst_opt.initialize_variables (self.src_dst_all_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu)
+                
+                if self.options['true_face_training']:
+                    self.D_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_opt')
+                    self.D_opt.initialize_variables ( self.dis.get_weights(), vars_on_cpu=optimizer_vars_on_cpu)
+                    self.model_filename_list += [ (self.D_opt, 'D_opt.npy') ]
+
+        if self.is_training:
+            # Adjust batch size for multiple GPU
+            gpu_count = max(1, len(devices) )
+            bs_per_gpu = max(1, self.get_batch_size() // gpu_count)
+            self.set_batch_size( gpu_count*bs_per_gpu)
+
+            
+            # Compute losses per GPU
+            gpu_pred_src_src_list = []
+            gpu_pred_dst_dst_list = []
+            gpu_pred_src_dst_list = []
+            gpu_pred_src_srcm_list = []
+            gpu_pred_dst_dstm_list = []
+            gpu_pred_src_dstm_list = []
+
+            gpu_src_losses = []
+            gpu_dst_losses = []
+            gpu_src_dst_loss_gvs = []
+            gpu_D_loss_gvs = []
+
+            for gpu_id in range(gpu_count):
+                with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
+                    batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
+                    with tf.device(f'/CPU:0'):
+                        # slice on CPU, otherwise all batch data will be transfered to GPU first
+                        gpu_warped_src   = self.warped_src [batch_slice,:,:,:]
+                        gpu_warped_dst   = self.warped_dst [batch_slice,:,:,:]
+                        gpu_target_src   = self.target_src [batch_slice,:,:,:]
+                        gpu_target_dst   = self.target_dst [batch_slice,:,:,:]
+                        gpu_target_srcm  = self.target_srcm[batch_slice,:,:,:]
+                        gpu_target_dstm  = self.target_dstm[batch_slice,:,:,:]
+
+                    # process model tensors
+                    if 'df' in archi:
+                        gpu_src_code     = self.inter(self.encoder(gpu_warped_src))
+                        gpu_dst_code     = self.inter(self.encoder(gpu_warped_dst))
+                        gpu_pred_src_src, gpu_pred_src_srcm = self.decoder_src(gpu_src_code)
+                        gpu_pred_dst_dst, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code)
+                        gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code)
+                        
+                    elif 'liae' in archi:
+                        gpu_src_code = self.encoder (gpu_warped_src)
+                        gpu_src_inter_AB_code = self.inter_AB (gpu_src_code)
+                        gpu_src_code = tf.concat([gpu_src_inter_AB_code,gpu_src_inter_AB_code],-1)
+                        gpu_dst_code = self.encoder (gpu_warped_dst)
+                        gpu_dst_inter_B_code = self.inter_B (gpu_dst_code)
+                        gpu_dst_inter_AB_code = self.inter_AB (gpu_dst_code)
+                        gpu_dst_code = tf.concat([gpu_dst_inter_B_code,gpu_dst_inter_AB_code],-1)
+                        gpu_src_dst_code = tf.concat([gpu_dst_inter_AB_code,gpu_dst_inter_AB_code],-1)
+
+                        gpu_pred_src_src, gpu_pred_src_srcm = self.decoder(gpu_src_code)
+                        gpu_pred_dst_dst, gpu_pred_dst_dstm = self.decoder(gpu_dst_code)
+                        gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder(gpu_src_dst_code)
+                            
+                    gpu_pred_src_src_list.append(gpu_pred_src_src)
+                    gpu_pred_dst_dst_list.append(gpu_pred_dst_dst)
+                    gpu_pred_src_dst_list.append(gpu_pred_src_dst)
+                    
+                    gpu_pred_src_srcm_list.append(gpu_pred_src_srcm)
+                    gpu_pred_dst_dstm_list.append(gpu_pred_dst_dstm)
+                    gpu_pred_src_dstm_list.append(gpu_pred_src_dstm)
+                    
+                    gpu_target_srcm_blur = nn.tf_gaussian_blur(gpu_target_srcm,  max(1, resolution // 32) )
+                    gpu_target_dstm_blur = nn.tf_gaussian_blur(gpu_target_dstm,  max(1, resolution // 32) )
+
+                    gpu_target_dst_masked      = gpu_target_dst*gpu_target_dstm_blur
+                    gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur)
+
+                    gpu_target_srcmasked_opt  = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
+                    gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst
+
+                    gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
+                    gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst
+
+                    gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur
+                    gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur)
+
+                    gpu_src_loss =  tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                    gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
+                    if learn_mask:
+                        gpu_src_loss += tf.reduce_mean ( tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )
+ 
+                    face_style_power = self.options['face_style_power'] / 100.0
+                    if face_style_power != 0 and not self.pretrain:
+                        gpu_src_loss += nn.tf_style_loss(gpu_psd_target_dst_masked, gpu_target_dst_masked, gaussian_blur_radius=resolution//16, loss_weight=10000*face_style_power)
+
+                    bg_style_power = self.options['bg_style_power'] / 100.0
+                    if bg_style_power != 0 and not self.pretrain:
+                        gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.tf_dssim(gpu_psd_target_dst_anti_masked, gpu_target_dst_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) 
+                        gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square( gpu_psd_target_dst_anti_masked - gpu_target_dst_anti_masked), axis=[1,2,3] )
+
+                    gpu_dst_loss  = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1]) 
+                    gpu_dst_loss += tf.reduce_mean ( 10*tf.square(  gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
+                    if learn_mask:
+                        gpu_dst_loss += tf.reduce_mean ( tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )
+
+                    gpu_src_losses += [gpu_src_loss]
+                    gpu_dst_losses += [gpu_dst_loss]
+                    
+                    gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss
+
+                    if self.options['true_face_training']:
+                        def DLoss(labels,logits):
+                            return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits), axis=[1,2,3])
+
+                        gpu_src_code_d = self.dis( gpu_src_code )
+                        gpu_src_code_d_ones = tf.ones_like(gpu_src_code_d)
+                        gpu_src_code_d_zeros = tf.zeros_like(gpu_src_code_d)
+                        gpu_dst_code_d = self.dis( gpu_dst_code )
+                        gpu_dst_code_d_ones = tf.ones_like(gpu_dst_code_d)
+ 
+                        gpu_src_dst_loss += 0.01*DLoss(gpu_src_code_d_ones, gpu_src_code_d)
+
+                        gpu_D_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \
+                                      DLoss(gpu_src_code_d_zeros, gpu_src_code_d) ) * 0.5
+
+                        gpu_D_loss_gvs += [ nn.tf_gradients (gpu_D_loss, self.dis.get_weights() ) ]
+
+                    gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ]
+
+
+            # Average losses and gradients, and create optimizer update ops
+            with tf.device (models_opt_device):
+                if gpu_count == 1:
+                    pred_src_src = gpu_pred_src_src_list[0]
+                    pred_dst_dst = gpu_pred_dst_dst_list[0]
+                    pred_src_dst = gpu_pred_src_dst_list[0]
+                    pred_src_srcm = gpu_pred_src_srcm_list[0]
+                    pred_dst_dstm = gpu_pred_dst_dstm_list[0]
+                    pred_src_dstm = gpu_pred_src_dstm_list[0]
+                    
+                    src_loss = gpu_src_losses[0]
+                    dst_loss = gpu_dst_losses[0]
+                    src_dst_loss_gv = gpu_src_dst_loss_gvs[0]
+                else:
+                    pred_src_src = tf.concat(gpu_pred_src_src_list, 0)
+                    pred_dst_dst = tf.concat(gpu_pred_dst_dst_list, 0)
+                    pred_src_dst = tf.concat(gpu_pred_src_dst_list, 0)
+                    pred_src_srcm = tf.concat(gpu_pred_src_srcm_list, 0)
+                    pred_dst_dstm = tf.concat(gpu_pred_dst_dstm_list, 0)
+                    pred_src_dstm = tf.concat(gpu_pred_src_dstm_list, 0)
+                    
+                    src_loss = nn.tf_average_tensor_list(gpu_src_losses)
+                    dst_loss = nn.tf_average_tensor_list(gpu_dst_losses)
+                    src_dst_loss_gv = nn.tf_average_gv_list (gpu_src_dst_loss_gvs)
+
+                if self.options['true_face_training']:
+                    D_loss_gv = nn.tf_average_gv_list(gpu_D_loss_gvs)
+                    
+                src_dst_loss_gv_op = self.src_dst_opt.get_update_op (src_dst_loss_gv )
+                
+                if self.options['true_face_training']:
+                    D_loss_gv_op = self.D_opt.get_update_op (D_loss_gv )
+
+
+            # Initializing training and view functions
+            def src_dst_train(warped_src, target_src, target_srcm, \
+                              warped_dst, target_dst, target_dstm):
+                s, d, _ = nn.tf_sess.run ( [ src_loss, dst_loss, src_dst_loss_gv_op],
+                                            feed_dict={self.warped_src :warped_src,
+                                                       self.target_src :target_src,
+                                                       self.target_srcm:target_srcm,
+                                                       self.warped_dst :warped_dst,
+                                                       self.target_dst :target_dst,
+                                                       self.target_dstm:target_dstm,
+                                                       })
+                s = np.mean(s)
+                d = np.mean(d)
+                return s, d
+            self.src_dst_train = src_dst_train
+
+            if self.options['true_face_training']:
+                def D_train(warped_src, warped_dst):
+                    nn.tf_sess.run ([D_loss_gv_op], feed_dict={self.warped_src: warped_src, self.warped_dst: warped_dst})
+                self.D_train = D_train
+
+            if learn_mask:
+                def AE_view(warped_src, warped_dst):
+                    return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm],
+                                             feed_dict={self.warped_src:warped_src,
+                                                        self.warped_dst:warped_dst})
             else:
-                self.AE_view = K.function ([self.model.warped_src, self.model.warped_dst], [self.model.pred_src_src, self.model.pred_dst_dst, self.model.pred_src_dst ])
-
+                def AE_view(warped_src, warped_dst):
+                    return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_src_dst],
+                                             feed_dict={self.warped_src:warped_src,
+                                                        self.warped_dst:warped_dst})
+            self.AE_view = AE_view
         else:
-            if self.options['learn_mask']:
-                self.AE_convert = K.function ([self.model.warped_dst],[ self.model.pred_src_dst, self.model.pred_dst_dstm, self.model.pred_src_dstm ])
+            # Initializing merge function            
+            with tf.device( f'/GPU:0' if len(devices) != 0 else f'/CPU:0'):
+                if 'df' in archi:                
+                    gpu_dst_code     = self.inter(self.encoder(self.warped_dst))
+                    gpu_pred_src_dst = self.decoder_src(gpu_dst_code)
+                    gpu_pred_dst_dstm = self.decoder_dstm(gpu_dst_code)
+                    gpu_pred_src_dstm = self.decoder_srcm(gpu_dst_code)
+                elif 'liae' in archi:
+                    gpu_dst_code = self.encoder (self.warped_dst)
+                    gpu_dst_inter_B_code = self.inter_B (gpu_dst_code)
+                    gpu_dst_inter_AB_code = self.inter_AB (gpu_dst_code)
+                    gpu_dst_code = tf.concat([gpu_dst_inter_B_code,gpu_dst_inter_AB_code],-1)
+                    gpu_src_dst_code = tf.concat([gpu_dst_inter_AB_code,gpu_dst_inter_AB_code],-1)
+
+                    gpu_pred_src_dst = self.decoder(gpu_src_dst_code)
+                    gpu_pred_dst_dstm = self.decoderm(gpu_dst_code)
+                    gpu_pred_src_dstm = self.decoderm(gpu_src_dst_code)
+                    
+            if learn_mask:
+                def AE_merge( warped_dst):
+                    return nn.tf_sess.run ( [gpu_pred_src_dst, gpu_pred_dst_dstm, gpu_pred_src_dstm], feed_dict={self.warped_dst:warped_dst})
             else:
-                self.AE_convert = K.function ([self.model.warped_dst],[ self.model.pred_src_dst ])
+                def AE_merge( warped_dst):
+                    return nn.tf_sess.run ( [gpu_pred_src_dst], feed_dict={self.warped_dst:warped_dst})
 
+            self.AE_merge = AE_merge
 
-        if self.is_training_mode:
+        # Loading/initializing all models/optimizers weights
+        for model, filename in io.progress_bar_generator(self.model_filename_list, "Initializing models"):
+            do_init = self.is_first_run()
+            
+            if self.pretrain_just_disabled:
+                if 'df' in archi:
+                    if model == self.inter:
+                        do_init = True
+                elif 'liae' in archi:
+                    if model == self.inter_AB:
+                        do_init = True
+            
+            if not do_init:
+                do_init = not model.load_weights( self.get_strpath_storage_for_file(filename) )
+                
+            if do_init:
+                model.init_weights()
+
+        # initializing sample generators
+        
+        if self.is_training:
             t = SampleProcessor.Types
-
             if self.options['face_type'] == 'h':
                 face_type = t.FACE_TYPE_HALF
             elif self.options['face_type'] == 'mf':
@@ -526,82 +668,76 @@ class SAEHDModel(ModelBase):
             elif self.options['face_type'] == 'f':
                 face_type = t.FACE_TYPE_FULL
 
-            t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE
-
-            training_data_src_path = self.training_data_src_path
-            training_data_dst_path = self.training_data_dst_path
-
-            if self.pretrain and self.pretraining_data_path is not None:
-                training_data_src_path = self.pretraining_data_path
-                training_data_dst_path = self.pretraining_data_path
+            training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path()
+            training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path()
 
+            random_ct_samples_path=training_data_dst_path if self.options['ct_mode'] != 'none' and not self.pretrain else None
+            
             t_img_warped = t.IMG_WARPED_TRANSFORMED if self.options['random_warp'] else t.IMG_TRANSFORMED
+            
+            cpu_count = multiprocessing.cpu_count()
+            
+            src_generators_count = cpu_count // 2
+            if self.options['ct_mode'] != 'none':
+                src_generators_count = int(src_generators_count * 1.5)                
+            dst_generators_count = cpu_count - src_generators_count
 
             self.set_training_data_generators ([
-                    SampleGeneratorFace(training_data_src_path, random_ct_samples_path=training_data_dst_path if self.options['ct_mode'] != 'none' else None,
-                                                                debug=self.is_debug(), batch_size=self.batch_size, 
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ),
-                        output_sample_types = [ {'types' : (t_img_warped, face_type, t_mode_bgr), 'resolution':resolution, 'ct_mode': self.options['ct_mode'] },
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution, 'ct_mode': self.options['ct_mode'] },
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ]
-                                              ),
+                    SampleGeneratorFace(training_data_src_path, random_ct_samples_path=random_ct_samples_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
+                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
+                        output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'resolution':resolution, 'ct_mode': self.options['ct_mode'] },
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution, 'ct_mode': self.options['ct_mode'] },
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ],
+                        generators_count=src_generators_count ),
 
-                    SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, 
-                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ),
-                        output_sample_types = [ {'types' : (t_img_warped, face_type, t_mode_bgr), 'resolution':resolution},
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution},
-                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ])
+                    SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
+                        sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
+                        output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'resolution':resolution},
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution},
+                                                {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ],
+                        generators_count=dst_generators_count )
                              ])
 
     #override
     def get_model_filename_list(self):
-        return self.model.get_model_filename_list ( exclude_for_pretrain=(self.pretrain and self.iter != 0) ) +self.opt_dis_model
+        return self.model_filename_list
 
     #override
     def onSave(self):
-        self.save_weights_safe( self.get_model_filename_list()+self.opt_dis_model )
+        for model, filename in io.progress_bar_generator(self.get_model_filename_list(), "Saving", leave=False):
+            model.save_weights ( self.get_strpath_storage_for_file(filename) )
+
 
     #override
-    def on_success_train_one_iter(self):
-        if len(self.CA_conv_weights_list) != 0:
-            exec(nnlib.import_all(), locals(), globals())
-            CAInitializerMP ( self.CA_conv_weights_list )
-            self.CA_conv_weights_list = []
+    def onTrainOneIter(self):
+        ( (warped_src, target_src, target_srcm), \
+          (warped_dst, target_dst, target_dstm) ) = self.generate_next_samples()
+            
+        src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm)
 
-    #override
-    def onTrainOneIter(self, generators_samples, generators_list):
-        warped_src, target_src, target_srcm = generators_samples[0]
-        warped_dst, target_dst, target_dstm = generators_samples[1]
-
-        feed = [warped_src, warped_dst, target_src, target_srcm, target_dst, target_dstm]
-
-        src_loss, dst_loss, = self.src_dst_train (feed)
-
-        if self.true_face_training:
-            self.D_train([warped_src, warped_dst])
-
-        if self.options['learn_mask']:
-            feed = [ warped_src, warped_dst, target_srcm, target_dstm ]
-            src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed)
+        if self.options['true_face_training'] and not self.pretrain:
+            self.D_train (warped_src, warped_dst)
 
         return ( ('src_loss', src_loss), ('dst_loss', dst_loss), )
 
     #override
-    def onGetPreview(self, sample):
-        test_S   = sample[0][1][0:4] #first 4 samples
-        test_S_m = sample[0][2][0:4] #first 4 samples
-        test_D   = sample[1][1][0:4]
-        test_D_m = sample[1][2][0:4]
+    def onGetPreview(self, samples):
+        n_samples = min(4, self.get_batch_size() )
+
+        ( (warped_src, target_src, target_srcm),
+          (warped_dst, target_dst, target_dstm) ) = \
+                [ [sample[0:n_samples] for sample in sample_list ]
+                                                 for sample_list in samples ]
 
         if self.options['learn_mask']:
-            S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ]
+            S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
             DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ]
         else:
-            S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ]
+            S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
 
         result = []
         st = []
-        for i in range(len(test_S)):
+        for i in range(n_samples):
             ar = S[i], SS[i], D[i], DD[i], SD[i]
 
             st.append ( np.concatenate ( ar, axis=1) )
@@ -610,28 +746,25 @@ class SAEHDModel(ModelBase):
 
         if self.options['learn_mask']:
             st_m = []
-            for i in range(len(test_S)):
-                ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i])
+            for i in range(n_samples):
+                ar = S[i]*target_srcm[i], SS[i], D[i]*target_dstm[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i])
                 st_m.append ( np.concatenate ( ar, axis=1) )
 
             result += [ ('SAEHD masked', np.concatenate (st_m, axis=0 )), ]
 
         return result
 
-    def predictor_func (self, face=None, dummy_predict=False):
-        if dummy_predict:
-            self.AE_convert ([ np.zeros ( (1, self.options['resolution'], self.options['resolution'], 3), dtype=np.float32 ) ])
+    def predictor_func (self, face=None):
+        if self.options['learn_mask']:
+            bgr, mask_dst_dstm, mask_src_dstm = self.AE_merge (face[np.newaxis,...])
+            mask = mask_dst_dstm[0] * mask_src_dstm[0]
+            return bgr[0], mask[...,0]
         else:
-            if self.options['learn_mask']:
-                bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]])
-                mask = mask_dst_dstm[0] * mask_src_dstm[0]
-                return bgr[0], mask[...,0]
-            else:
-                bgr, = self.AE_convert ([face[np.newaxis,...]])
-                return bgr[0]
+            bgr, = self.AE_merge (face[np.newaxis,...])
+            return bgr[0]
 
     #override
-    def get_ConverterConfig(self):
+    def get_MergerConfig(self):
         if self.options['face_type'] == 'h':
             face_type = FaceType.HALF
         elif self.options['face_type'] == 'mf':
@@ -639,8 +772,8 @@ class SAEHDModel(ModelBase):
         elif self.options['face_type'] == 'f':
             face_type = FaceType.FULL
 
-        import converters
-        return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), converters.ConverterConfigMasked(face_type=face_type,
+        import merger
+        return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), merger.MergerConfigMasked(face_type=face_type,
                                      default_mode = 'overlay' if self.options['ct_mode'] != 'none' or self.options['face_style_power'] or self.options['bg_style_power'] else 'seamless',
                                      clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0,
                                     )
diff --git a/models/__init__.py b/models/__init__.py
index 971091d..490e9c8 100644
--- a/models/__init__.py
+++ b/models/__init__.py
@@ -1,5 +1,5 @@
 from .ModelBase import ModelBase
 
-def import_model(name):
-    module = __import__('Model_'+name, globals(), locals(), [], 1)
+def import_model(model_class_name):
+    module = __import__('Model_'+model_class_name, globals(), locals(), [], 1)
     return getattr(module, 'Model')
diff --git a/models/archived_models.zip b/models/archived_models.zip
deleted file mode 100644
index 02b7a0b..0000000
Binary files a/models/archived_models.zip and /dev/null differ
diff --git a/nnlib/CAInitializer.py b/nnlib/CAInitializer.py
deleted file mode 100644
index f81dd06..0000000
--- a/nnlib/CAInitializer.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import numpy as np
-
-def _compute_fans(shape, data_format='channels_last'):
-    """Computes the number of input and output units for a weight shape.
-    # Arguments
-        shape: Integer shape tuple.
-        data_format: Image data format to use for convolution kernels.
-            Note that all kernels in Keras are standardized on the
-            `channels_last` ordering (even when inputs are set
-            to `channels_first`).
-    # Returns
-        A tuple of scalars, `(fan_in, fan_out)`.
-    # Raises
-        ValueError: in case of invalid `data_format` argument.
-    """
-    if len(shape) == 2:
-        fan_in = shape[0]
-        fan_out = shape[1]
-    elif len(shape) in {3, 4, 5}:
-        # Assuming convolution kernels (1D, 2D or 3D).
-        # TH kernel shape: (depth, input_depth, ...)
-        # TF kernel shape: (..., input_depth, depth)
-        if data_format == 'channels_first':
-            receptive_field_size = np.prod(shape[2:])
-            fan_in = shape[1] * receptive_field_size
-            fan_out = shape[0] * receptive_field_size
-        elif data_format == 'channels_last':
-            receptive_field_size = np.prod(shape[:-2])
-            fan_in = shape[-2] * receptive_field_size
-            fan_out = shape[-1] * receptive_field_size
-        else:
-            raise ValueError('Invalid data_format: ' + data_format)
-    else:
-        # No specific assumptions.
-        fan_in = np.sqrt(np.prod(shape))
-        fan_out = np.sqrt(np.prod(shape))
-    return fan_in, fan_out
-
-def _create_basis(filters, size, floatx, eps_std):
-    if size == 1:
-        return np.random.normal(0.0, eps_std, (filters, size))
-
-    nbb = filters // size + 1
-    li = []
-    for i in range(nbb):
-        a = np.random.normal(0.0, 1.0, (size, size))
-        a = _symmetrize(a)
-        u, _, v = np.linalg.svd(a)
-        li.extend(u.T.tolist())
-    p = np.array(li[:filters], dtype=floatx)
-    return p
-
-def _symmetrize(a):
-    return a + a.T - np.diag(a.diagonal())
-
-def _scale_filters(filters, variance):
-    c_var = np.var(filters)
-    p = np.sqrt(variance / c_var)
-    return filters * p
-
-def CAGenerateWeights ( shape, floatx, data_format, eps_std=0.05, seed=None ):
-    if seed is not None:
-        np.random.seed(seed)
-
-    fan_in, fan_out = _compute_fans(shape, data_format)
-    variance = 2 / fan_in
-
-    rank = len(shape)
-    if rank == 3:
-        row, stack_size, filters_size = shape
-
-        transpose_dimensions = (2, 1, 0)
-        kernel_shape = (row,)
-        correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0])
-        correct_fft = np.fft.rfft
-
-    elif rank == 4:
-        row, column, stack_size, filters_size = shape
-
-        transpose_dimensions = (2, 3, 1, 0)
-        kernel_shape = (row, column)
-        correct_ifft = np.fft.irfft2
-        correct_fft = np.fft.rfft2
-
-    elif rank == 5:
-        x, y, z, stack_size, filters_size = shape
-
-        transpose_dimensions = (3, 4, 0, 1, 2)
-        kernel_shape = (x, y, z)
-        correct_fft = np.fft.rfftn
-        correct_ifft = np.fft.irfftn
-    else:
-        raise ValueError('rank unsupported')
-
-    kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape
-
-    init = []
-    for i in range(filters_size):
-        basis = _create_basis(
-            stack_size, np.prod(kernel_fourier_shape), floatx, eps_std)
-        basis = basis.reshape((stack_size,) + kernel_fourier_shape)
-
-        filters = [correct_ifft(x, kernel_shape) +
-                   np.random.normal(0, eps_std, kernel_shape) for
-                   x in basis]
-
-        init.append(filters)
-
-    # Format of array is now: filters, stack, row, column
-    init = np.array(init)
-    init = _scale_filters(init, variance)
-    return init.transpose(transpose_dimensions)
diff --git a/nnlib/DeepPortraitRelighting.py b/nnlib/DeepPortraitRelighting.py
deleted file mode 100644
index 8fc6176..0000000
--- a/nnlib/DeepPortraitRelighting.py
+++ /dev/null
@@ -1,241 +0,0 @@
-import math
-from pathlib import Path
-
-import cv2
-import numpy as np
-import numpy.linalg as npla
-
-
-class DeepPortraitRelighting(object):
-    
-    def __init__(self):
-        from nnlib import nnlib
-        nnlib.import_torch()        
-        self.torch = nnlib.torch
-        self.torch_device = nnlib.torch_device        
-        self.model = DeepPortraitRelighting.build_model(self.torch, self.torch_device)
-
-    def SH_basis(self, alt, azi):
-        alt = alt * math.pi / 180.0
-        azi = azi * math.pi / 180.0
-        
-        x = math.cos(alt)*math.sin(azi)
-        y = -math.cos(alt)*math.cos(azi)        
-        z = math.sin(alt)
-         
-        normal = np.array([x,y,z])
-        
-        norm_X = normal[0]
-        norm_Y = normal[1]
-        norm_Z = normal[2]
-
-        sh_basis = np.zeros((9))
-        att= np.pi*np.array([1, 2.0/3.0, 1/4.0])
-        sh_basis[0] = 0.5/np.sqrt(np.pi)*att[0]
-
-        sh_basis[1] = np.sqrt(3)/2/np.sqrt(np.pi)*norm_Y*att[1]
-        sh_basis[2] = np.sqrt(3)/2/np.sqrt(np.pi)*norm_Z*att[1]
-        sh_basis[3] = np.sqrt(3)/2/np.sqrt(np.pi)*norm_X*att[1]
-
-        sh_basis[4] = np.sqrt(15)/2/np.sqrt(np.pi)*norm_Y*norm_X*att[2]
-        sh_basis[5] = np.sqrt(15)/2/np.sqrt(np.pi)*norm_Y*norm_Z*att[2]
-        sh_basis[6] = np.sqrt(5)/4/np.sqrt(np.pi)*(3*norm_Z**2-1)*att[2]
-        sh_basis[7] = np.sqrt(15)/2/np.sqrt(np.pi)*norm_X*norm_Z*att[2]
-        sh_basis[8] = np.sqrt(15)/4/np.sqrt(np.pi)*(norm_X**2-norm_Y**2)*att[2]
-        return sh_basis
-        
-    #n = [0..8]
-    def relight(self, img, alt, azi, intensity=1.0, lighten=False):
-        torch = self.torch
-    
-        sh = self.SH_basis (alt, azi)   
-        sh = (sh.reshape( (1,9,1,1) ) ).astype(np.float32)      
-        #sh *= 0.1  
-        sh = torch.autograd.Variable(torch.from_numpy(sh).to(self.torch_device))
-        
-        row, col, _ = img.shape
-        img = cv2.resize(img, (512, 512))
-        Lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
-
-        inputL = Lab[:,:,0]
-        outputImg, outputSH  = self.model(torch.autograd.Variable(torch.from_numpy(inputL[None,None,...].astype(np.float32)/255.0).to(self.torch_device)), 
-                                          sh, 0)
-        
-        outputImg = outputImg[0].cpu().data.numpy()
-        outputImg = outputImg.transpose((1,2,0))
-        outputImg = np.squeeze(outputImg)
-        outputImg = np.clip (outputImg, 0.0, 1.0)        
-        outputImg = cv2.blur(outputImg, (3,3) ) 
-
-        if not lighten:
-            outputImg = inputL*(1.0-intensity) + (inputL*outputImg)*intensity
-        else:
-            outputImg = inputL*(1.0-intensity) + (outputImg*255.0)*intensity
-            
-        outputImg = np.clip(outputImg, 0,255).astype(np.uint8)   
-
-        Lab[:,:,0] = outputImg
-        result = cv2.cvtColor(Lab, cv2.COLOR_LAB2BGR)
-        result = cv2.resize(result, (col, row))
-        return result
-
-    @staticmethod
-    def build_model(torch, torch_device):
-        nn = torch.nn
-        F = torch.nn.functional
-
-        def conv3X3(in_planes, out_planes, stride=1):
-            return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
-            
-        # define the network
-        class BasicBlock(nn.Module):
-            def __init__(self, inplanes, outplanes, batchNorm_type=0, stride=1, downsample=None):
-                super(BasicBlock, self).__init__()
-                # batchNorm_type 0 means batchnormalization
-                #                1 means instance normalization
-                self.inplanes = inplanes
-                self.outplanes = outplanes
-                self.conv1 = conv3X3(inplanes, outplanes, 1)
-                self.conv2 = conv3X3(outplanes, outplanes, 1)
-                if batchNorm_type == 0:
-                    self.bn1 = nn.BatchNorm2d(outplanes)
-                    self.bn2 = nn.BatchNorm2d(outplanes)
-                else:
-                    self.bn1 = nn.InstanceNorm2d(outplanes)
-                    self.bn2 = nn.InstanceNorm2d(outplanes)
-                
-                self.shortcuts = nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, bias=False)
-            
-            def forward(self, x):
-                out = self.conv1(x)
-                out = self.bn1(out)
-                out = F.relu(out)
-                out = self.conv2(out)
-                out = self.bn2(out)
-                
-                if self.inplanes != self.outplanes:
-                    out += self.shortcuts(x)
-                else:
-                    out += x
-                
-                out = F.relu(out)
-                return out
-
-        class HourglassBlock(nn.Module):
-            def __init__(self, inplane, mid_plane, middleNet, skipLayer=True):
-                super(HourglassBlock, self).__init__()
-                # upper branch
-                self.skipLayer = True
-                self.upper = BasicBlock(inplane, inplane, batchNorm_type=1)
-                
-                # lower branch
-                self.downSample = nn.MaxPool2d(kernel_size=2, stride=2)
-                self.upSample = nn.Upsample(scale_factor=2, mode='nearest')
-                self.low1 = BasicBlock(inplane, mid_plane)
-                self.middle = middleNet
-                self.low2 = BasicBlock(mid_plane, inplane, batchNorm_type=1)
-
-            def forward(self, x, light, count, skip_count):
-                # we use count to indicate wich layer we are in
-                # max_count indicates the from which layer, we would use skip connections
-                out_upper = self.upper(x)
-                out_lower = self.downSample(x)
-                out_lower = self.low1(out_lower)
-                out_lower, out_middle = self.middle(out_lower, light, count+1, skip_count)
-                out_lower = self.low2(out_lower)                
-                out_lower = self.upSample(out_lower)            
-                if count >= skip_count and self.skipLayer:
-                    out = out_lower + out_upper
-                else:
-                    out = out_lower
-                return out, out_middle
-
-        class lightingNet(nn.Module):
-            def __init__(self, ncInput, ncOutput, ncMiddle):
-                super(lightingNet, self).__init__()
-                self.ncInput = ncInput
-                self.ncOutput = ncOutput
-                self.ncMiddle = ncMiddle
-                self.predict_FC1 = nn.Conv2d(self.ncInput,  self.ncMiddle, kernel_size=1, stride=1, bias=False)
-                self.predict_relu1 = nn.PReLU()
-                self.predict_FC2 = nn.Conv2d(self.ncMiddle, self.ncOutput, kernel_size=1, stride=1, bias=False)
-
-                self.post_FC1 = nn.Conv2d(self.ncOutput,  self.ncMiddle, kernel_size=1, stride=1, bias=False)
-                self.post_relu1 = nn.PReLU()
-                self.post_FC2 = nn.Conv2d(self.ncMiddle, self.ncInput, kernel_size=1, stride=1, bias=False)
-                self.post_relu2 = nn.ReLU()  # to be consistance with the original feature
-
-            def forward(self, innerFeat, target_light, count, skip_count):            
-                x = innerFeat[:,0:self.ncInput,:,:] # lighting feature
-                _, _, row, col = x.shape
-                # predict lighting
-                feat = x.mean(dim=(2,3), keepdim=True)            
-                light = self.predict_relu1(self.predict_FC1(feat))
-                light = self.predict_FC2(light)
-                upFeat = self.post_relu1(self.post_FC1(target_light))
-                upFeat = self.post_relu2(self.post_FC2(upFeat))
-                upFeat = upFeat.repeat((1,1,row, col))
-                innerFeat[:,0:self.ncInput,:,:] = upFeat
-                return innerFeat, light#light
-
-
-        class HourglassNet(nn.Module):
-            def __init__(self, baseFilter = 16, gray=True):
-                super(HourglassNet, self).__init__()
-
-                self.ncLight = 27   # number of channels for input to lighting network
-                self.baseFilter = baseFilter
-
-                # number of channles for output of lighting network
-                if gray:
-                    self.ncOutLight = 9  # gray: channel is 1
-                else:
-                    self.ncOutLight = 27  # color: channel is 3
-
-                self.ncPre = self.baseFilter  # number of channels for pre-convolution
-
-                # number of channels 
-                self.ncHG3 = self.baseFilter
-                self.ncHG2 = 2*self.baseFilter
-                self.ncHG1 = 4*self.baseFilter
-                self.ncHG0 = 8*self.baseFilter + self.ncLight
-
-                self.pre_conv = nn.Conv2d(1, self.ncPre, kernel_size=5, stride=1, padding=2)
-                self.pre_bn = nn.BatchNorm2d(self.ncPre)
-
-                self.light = lightingNet(self.ncLight, self.ncOutLight, 128)
-                self.HG0 = HourglassBlock(self.ncHG1, self.ncHG0, self.light)
-                self.HG1 = HourglassBlock(self.ncHG2, self.ncHG1, self.HG0)
-                self.HG2 = HourglassBlock(self.ncHG3, self.ncHG2, self.HG1)
-                self.HG3 = HourglassBlock(self.ncPre, self.ncHG3, self.HG2)
-
-                self.conv_1 = nn.Conv2d(self.ncPre, self.ncPre, kernel_size=3, stride=1, padding=1)
-                self.bn_1 = nn.BatchNorm2d(self.ncPre) 
-                self.conv_2 = nn.Conv2d(self.ncPre, self.ncPre, kernel_size=1, stride=1, padding=0)
-                self.bn_2 = nn.BatchNorm2d(self.ncPre) 
-                self.conv_3 = nn.Conv2d(self.ncPre, self.ncPre, kernel_size=1, stride=1, padding=0)
-                self.bn_3 = nn.BatchNorm2d(self.ncPre)
-
-                self.output = nn.Conv2d(self.ncPre, 1, kernel_size=1, stride=1, padding=0)
-
-            def forward(self, x, target_light, skip_count):
-                feat = self.pre_conv(x)
-                
-                feat = F.relu(self.pre_bn(feat))
-                # get the inner most features
-                feat, out_light = self.HG3(feat, target_light, 0, skip_count)
-                #return feat, out_light
-                
-                feat = F.relu(self.bn_1(self.conv_1(feat)))
-                feat = F.relu(self.bn_2(self.conv_2(feat)))
-                feat = F.relu(self.bn_3(self.conv_3(feat)))
-                out_img = self.output(feat)
-                out_img = torch.sigmoid(out_img)
-                return out_img, out_light   
-                
-        model = HourglassNet()
-        t_dict = torch.load( Path(__file__).parent / 'DeepPortraitRelighting.t7' )
-        model.load_state_dict(t_dict)
-        model.to( torch_device )
-        model.train(False)
-        return model
diff --git a/nnlib/DeepPortraitRelighting.t7 b/nnlib/DeepPortraitRelighting.t7
deleted file mode 100644
index 943b172..0000000
Binary files a/nnlib/DeepPortraitRelighting.t7 and /dev/null differ
diff --git a/nnlib/FUNIT.py b/nnlib/FUNIT.py
deleted file mode 100644
index 0bd5006..0000000
--- a/nnlib/FUNIT.py
+++ /dev/null
@@ -1,333 +0,0 @@
-from pathlib import Path
-
-import numpy as np
-
-from interact import interact as io
-from nnlib import nnlib
-
-"""
-My port of FUNIT: Few-Shot Unsupervised Image-to-Image Translation to pure keras.
-original repo: https://github.com/NVlabs/FUNIT/
-"""
-class FUNIT(object):
-    VERSION = 1
-    def __init__ (self, face_type_str,
-                        batch_size,
-                        encoder_nf=64,
-                        encoder_downs=2,
-                        encoder_res_blk=2,
-                        class_downs=4,
-                        class_nf=64,
-                        class_latent=64,
-                        mlp_blks=2,
-                        dis_nf=64,
-                        dis_res_blks=10,
-                        num_classes=2,
-                        subpixel_decoder=True,
-                        initialize_weights=True,
-
-                        load_weights_locally=False,
-                        weights_file_root=None,
-
-                        is_training=True,
-                        tf_cpu_mode=0,
-                        ):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        self.batch_size = batch_size
-        bgr_shape = (None, None, 3)
-        label_shape = (1,)
-
-        self.enc_content = modelify ( FUNIT.ContentEncoderFlow(downs=encoder_downs, nf=encoder_nf, n_res_blks=encoder_res_blk) ) ( Input(bgr_shape) )
-        self.enc_class_model = modelify ( FUNIT.ClassModelEncoderFlow(downs=class_downs, nf=class_nf, latent_dim=class_latent) ) ( Input(bgr_shape) )
-        self.decoder     = modelify ( FUNIT.DecoderFlow(ups=encoder_downs, n_res_blks=encoder_res_blk, mlp_blks=mlp_blks, subpixel_decoder=subpixel_decoder  ) ) \
-                             ( [ Input(K.int_shape(self.enc_content.outputs[0])[1:], name="decoder_input_1"),
-                                 Input(K.int_shape(self.enc_class_model.outputs[0])[1:], name="decoder_input_2")
-                               ] )
-
-        self.dis = modelify ( FUNIT.DiscriminatorFlow(nf=dis_nf, n_res_blks=dis_res_blks, num_classes=num_classes) ) (Input(bgr_shape))
-
-        self.G_opt = RMSprop(lr=0.0001, decay=0.0001, tf_cpu_mode=tf_cpu_mode)
-        self.D_opt = RMSprop(lr=0.0001, decay=0.0001, tf_cpu_mode=tf_cpu_mode)
-
-        xa = Input(bgr_shape, name="xa")
-        la = Input(label_shape, dtype="int32", name="la")
-
-        xb = Input(bgr_shape, name="xb")
-        lb = Input(label_shape, dtype="int32", name="lb")
-
-        s_xa_one = Input( (  K.int_shape(self.enc_class_model.outputs[0])[-1],), name="s_xa_input")
-
-        c_xa = self.enc_content(xa)
-
-        s_xa = self.enc_class_model(xa)
-        s_xb = self.enc_class_model(xb)
-
-        s_xa_mean = K.mean(s_xa, axis=0)
-
-        xr = self.decoder ([c_xa,s_xa])
-        xt = self.decoder ([c_xa,s_xb])
-        xr_one = self.decoder ([c_xa,s_xa_one])
-
-        d_xr, d_xr_feat = self.dis(xr)
-        d_xt, d_xt_feat = self.dis(xt)
-
-        d_xa, d_xa_feat = self.dis(xa)
-        d_xb, d_xb_feat = self.dis(xb)
-
-        def dis_gather(x,l):
-            tensors = []
-            for i in range(self.batch_size):
-                t = x[i:i+1,:,:, l[i,0]]
-                tensors += [t]
-            return tensors
-
-        def dis_gather_batch_mean(x,l, func=None):
-            x_shape = K.shape(x)
-            b,h,w,c = x_shape[0],x_shape[1],x_shape[2],x_shape[3]
-            b,h,w,c = [ K.cast(x, K.floatx()) for x in [b,h,w,c] ]
-
-            tensors = dis_gather(x,l)
-            if func is not None:
-                tensors = [func(t) for t in tensors]
-
-            return K.sum(tensors, axis=[1,2,3]) / (h*w)
-
-        def dis_gather_mean(x,l, func=None, acc_func=None):
-            x_shape = K.shape(x)
-            b,h,w,c = x_shape[0],x_shape[1],x_shape[2],x_shape[3]
-            b,h,w,c = [ K.cast(x, K.floatx()) for x in [b,h,w,c] ]
-
-            tensors = dis_gather(x,l)
-
-            if acc_func is not None:
-                acc = []
-                for t in tensors:
-                    acc += [ K.sum( K.cast( acc_func(t), K.floatx() )) ]
-                acc = K.cast( K.sum(acc), K.floatx() ) / (b*h*w)
-            else:
-                acc = None
-
-            if func is not None:
-                tensors = [func(t) for t in tensors]
-            
-            return K.sum(tensors, axis=[1,2,3] ) / (h*w), acc
-
-        d_xr_la, d_xr_la_acc = dis_gather_mean(d_xr, la, acc_func=lambda x: x >= 0)
-        d_xt_lb, d_xt_lb_acc = dis_gather_mean(d_xt, lb, acc_func=lambda x: x >= 0)
-
-        d_xb_lb = dis_gather_batch_mean(d_xb, lb)
-
-        d_xb_lb_real, d_xb_lb_real_acc = dis_gather_mean(d_xb, lb, lambda x: K.relu(1.0-x), acc_func=lambda x: x >= 0)
-        d_xt_lb_fake, d_xt_lb_fake_acc = dis_gather_mean(d_xt, lb, lambda x: K.relu(1.0+x), acc_func=lambda x: x < 0)
-        
-
-        G_c_rec = K.mean(K.abs(K.mean(d_xr_feat, axis=[1,2]) - K.mean(d_xa_feat, axis=[1,2])), axis=1 ) #* 1.0
-        G_m_rec = K.mean(K.abs(K.mean(d_xt_feat, axis=[1,2]) - K.mean(d_xb_feat, axis=[1,2])), axis=1 ) #* 1.0
-        G_x_rec = 0.1 * K.mean(K.abs(xr-xa), axis=[1,2,3])
-
-        G_loss = (-d_xr_la-d_xt_lb)*0.5 + G_x_rec + G_c_rec + G_m_rec
-
-        G_weights = self.enc_class_model.trainable_weights + self.enc_content.trainable_weights + self.decoder.trainable_weights
-        ######
-
-        D_real = d_xb_lb_real #1.0 *
-        D_fake = d_xt_lb_fake #1.0 *
-        
-        l_reg = 10 * K.sum( K.gradients( d_xb_lb, xb )[0] ** 2 , axis=[1,2,3] ) #/ self.batch_size )
-
-        D_loss = D_real + D_fake + l_reg
-
-        D_weights = self.dis.trainable_weights
-
-        self.G_train = K.function ([xa, la, xb, lb],[K.mean(G_loss)], self.G_opt.get_updates(G_loss, G_weights) )
-
-        self.D_train = K.function ([xa, la, xb, lb],[K.mean(D_loss)], self.D_opt.get_updates(D_loss, D_weights) )
-        self.get_average_class_code = K.function ([xa],[s_xa_mean])
-
-        self.G_convert = K.function  ([xa,s_xa_one],[xr_one])
-
-        if initialize_weights:
-            #gather weights from layers for initialization
-            weights_list = []
-            for model, _ in self.get_model_filename_list():
-                if type(model) == keras.models.Model:
-                    for layer in model.layers:
-                        if type(layer) == FUNITAdain:
-                            weights_list += [ x for x in layer.weights if 'kernel' in x.name ]
-                        elif  type(layer) == keras.layers.Conv2D or type(layer) == keras.layers.Dense:
-                            weights_list += [ layer.weights[0] ]
-
-            initer = keras.initializers.he_normal()
-            for w in weights_list:
-                K.set_value( w, K.get_value(initer(K.int_shape(w)))  )
-
-
-        if load_weights_locally:
-            pass
-        #f weights_file_root is not None:
-        #   weights_file_root = Path(weights_file_root)
-        #lse:
-        #   weights_file_root = Path(__file__).parent
-        #elf.weights_path = weights_file_root / ('FUNIT_%s.h5' % (face_type_str) )
-        #f load_weights:
-        #   self.model.load_weights (str(self.weights_path))
-
-
-
-    def get_model_filename_list(self):
-        return [[self.enc_class_model, 'enc_class_model.h5'],
-                [self.enc_content,     'enc_content.h5'],
-                [self.decoder,         'decoder.h5'],
-                [self.dis,             'dis.h5'],
-                [self.G_opt,           'G_opt.h5'],
-                [self.D_opt,           'D_opt.h5'],
-                ]
-
-    def train(self, xa,la,xb,lb):
-        D_loss, = self.D_train ([xa,la,xb,lb])
-        G_loss, = self.G_train ([xa,la,xb,lb])
-        return G_loss, D_loss
-
-    def get_average_class_code(self, *args, **kwargs):
-        return self.get_average_class_code(*args, **kwargs)
-
-    def convert(self, *args, **kwargs):
-        return self.G_convert(*args, **kwargs)
-
-    @staticmethod
-    def ContentEncoderFlow(downs=2, nf=64, n_res_blks=2):
-        exec (nnlib.import_all(), locals(), globals())
-
-        def ResBlock(dim):
-            def func(input):
-                x = input
-                x = Conv2D(dim, 3, strides=1, padding='same')(x)
-                x = InstanceNormalization()(x)
-                x = ReLU()(x)
-                x = Conv2D(dim, 3, strides=1, padding='same')(x)
-                x = InstanceNormalization()(x)
-
-                return Add()([x,input])
-            return func
-
-        def func(x):
-            x = Conv2D (nf, kernel_size=7, strides=1, padding='same')(x)
-            x = InstanceNormalization()(x)
-            x = ReLU()(x)
-            for i in range(downs):
-                x = Conv2D (nf * 2**(i+1), kernel_size=4, strides=2, padding='valid')(ZeroPadding2D(1)(x))
-                x = InstanceNormalization()(x)
-                x = ReLU()(x)
-            for i in range(n_res_blks):
-                x = ResBlock( nf * 2**downs )(x)
-            return x
-
-        return func
-
-    @staticmethod
-    def ClassModelEncoderFlow(downs=4, nf=64, latent_dim=64):
-        exec (nnlib.import_all(), locals(), globals())
-
-        def func(x):
-            x = Conv2D (nf, kernel_size=7, strides=1, padding='same', activation='relu')(x)
-            for i in range(downs):
-                x = Conv2D (nf * min ( 4, 2**(i+1) ), kernel_size=4, strides=2, padding='valid', activation='relu')(ZeroPadding2D(1)(x))
-            x = GlobalAveragePooling2D()(x)
-            x = Dense(latent_dim)(x)
-            return x
-
-        return func
-
-    @staticmethod
-    def DecoderFlow(ups, n_res_blks=2, mlp_blks=2, subpixel_decoder=False ):
-        exec (nnlib.import_all(), locals(), globals())
-
-        def ResBlock(dim):
-            def func(input):
-                inp, mlp = input
-                x = inp
-                x = Conv2D(dim, 3, strides=1, padding='same')(x)
-                x = FUNITAdain(kernel_initializer='he_normal')([x,mlp])
-                x = ReLU()(x)
-                x = Conv2D(dim, 3, strides=1, padding='same')(x)
-                x = FUNITAdain(kernel_initializer='he_normal')([x,mlp])
-                return Add()([x,inp])
-            return func
-
-        def func(inputs):
-            x , class_code = inputs
-
-            nf = K.int_shape(x)[-1]
-
-            ### MLP block inside decoder
-            mlp = class_code
-            for i in range(mlp_blks):
-                mlp = Dense(nf, activation='relu')(mlp)
-
-            for i in range(n_res_blks):
-                x = ResBlock(nf)( [x,mlp] )
-
-            for i in range(ups):
-
-                if subpixel_decoder:
-                    x = Conv2D (4* (nf // 2**(i+1)), kernel_size=3, strides=1, padding='same')(x)
-                    x = SubpixelUpscaler()(x)
-                else:
-                    x = UpSampling2D()(x)
-                    x = Conv2D (nf // 2**(i+1), kernel_size=5, strides=1, padding='same')(x)
-
-                x = InstanceNormalization()(x)
-                x = ReLU()(x)
-
-            rgb = Conv2D (3, kernel_size=7, strides=1, padding='same', activation='tanh')(x)
-            return rgb
-
-        return func
-
-
-
-    @staticmethod
-    def DiscriminatorFlow(nf, n_res_blks, num_classes ):
-        exec (nnlib.import_all(), locals(), globals())
-
-        n_layers = n_res_blks // 2
-
-        def ActFirstResBlock(fout):
-            def func(x):
-                fin = K.int_shape(x)[-1]
-                fhid = min(fin, fout)
-
-                if fin != fout:
-                    x_s = Conv2D (fout, kernel_size=1, strides=1, padding='valid', use_bias=False)(x)
-                else:
-                    x_s = x
-
-                x = LeakyReLU(0.2)(x)
-                x = Conv2D (fhid, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x))
-                x = LeakyReLU(0.2)(x)
-                x = Conv2D (fout, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x))
-                return  Add()([x_s, x])
-
-            return func
-
-        def func( x ):
-            l_nf = nf
-            x = Conv2D (l_nf, kernel_size=7, strides=1, padding='valid')(ZeroPadding2D(3)(x))
-            for i in range(n_layers-1):
-                l_nf_out = min( l_nf*2, 1024 )
-                x = ActFirstResBlock(l_nf)(x)
-                x = ActFirstResBlock(l_nf_out)(x)
-                x = AveragePooling2D( pool_size=3, strides=2, padding='valid' )(ZeroPadding2D(1)(x))
-                l_nf = min( l_nf*2, 1024 )
-
-            l_nf_out = min( l_nf*2, 1024 )
-            x        = ActFirstResBlock(l_nf)(x)
-            feat = x = ActFirstResBlock(l_nf_out)(x)
-
-            x = LeakyReLU(0.2)(x)
-            x = Conv2D (num_classes, kernel_size=1, strides=1, padding='valid')(x)
-
-            return x, feat
-
-        return func
\ No newline at end of file
diff --git a/nnlib/TernausNet.py b/nnlib/TernausNet.py
deleted file mode 100644
index 9016ead..0000000
--- a/nnlib/TernausNet.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import os
-import pickle
-from functools import partial
-from pathlib import Path
-
-import cv2
-import numpy as np
-
-from interact import interact as io
-from nnlib import nnlib
-
-"""
-Dataset used to train located in official DFL mega.nz folder
-https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg
-
-using https://github.com/ternaus/TernausNet
-TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation
-"""
-
-class TernausNet(object):
-    VERSION = 1
-    def __init__ (self, name, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        self.model = TernausNet.BuildModel(resolution, ngf=64)
-
-        if weights_file_root is not None:
-            weights_file_root = Path(weights_file_root)
-        else:
-            weights_file_root = Path(__file__).parent
-
-        self.weights_path = weights_file_root / ('%s_%d_%s.h5' % (name, resolution, face_type_str) )
-
-        if load_weights:
-            self.model.load_weights (str(self.weights_path))
-        else:
-            if training:
-                try:
-                    with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f:
-                        d = pickle.loads (f.read())
-
-                    for i in [0,3,6,8,11,13,16,18]:
-                        s = 'features.%d' % i
-
-                        self.model.get_layer (s).set_weights ( d[s] )
-                except:
-                    io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy")
-
-                conv_weights_list = []
-                for layer in self.model.layers:
-                    if 'CA.' in layer.name:
-                        conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights
-                CAInitializerMP ( conv_weights_list )
-
-        if training:
-            inp_t = Input ( (resolution, resolution, 3) )
-            real_t = Input ( (resolution, resolution, 1) )
-            out_t = self.model(inp_t)
-
-            loss = K.mean(10*K.binary_crossentropy(real_t,out_t) )
-
-            out_t_diff1 = out_t[:, 1:, :, :] - out_t[:, :-1, :, :]
-            out_t_diff2 = out_t[:, :, 1:, :] - out_t[:, :, :-1, :]
-
-            total_var_loss = K.mean( 0.1*K.abs(out_t_diff1), axis=[1, 2, 3] ) + K.mean( 0.1*K.abs(out_t_diff2), axis=[1, 2, 3] )
-
-            opt = Adam(lr=0.0001, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2)
-
-            self.train_func = K.function  ( [inp_t, real_t], [K.mean(loss)], opt.get_updates( [loss], self.model.trainable_weights) )
-
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type=None, exc_value=None, traceback=None):
-        return False #pass exception between __enter__ and __exit__ to outter level
-
-    def save_weights(self):
-        self.model.save_weights (str(self.weights_path))
-
-    def train(self, inp, real):
-        loss, = self.train_func ([inp, real])
-        return loss
-
-    def extract (self, input_image, is_input_tanh=False):
-        input_shape_len = len(input_image.shape)
-        if input_shape_len == 3:
-            input_image = input_image[np.newaxis,...]
-
-        result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 )
-        result[result < 0.1] = 0 #get rid of noise
-
-        if input_shape_len == 3:
-            result = result[0]
-
-        return result
-
-    @staticmethod
-    def BuildModel ( resolution, ngf=64):
-        exec( nnlib.import_all(), locals(), globals() )
-        inp = Input ( (resolution,resolution,3) )
-        x = inp
-        x = TernausNet.Flow(ngf=ngf)(x)
-        model = Model(inp,x)
-        return model
-
-    @staticmethod
-    def Flow(ngf=64):
-        exec( nnlib.import_all(), locals(), globals() )
-
-        def func(input):
-            x = input
-
-            x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x)
-            x = BlurPool(filt_size=3)(x)
-
-            x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x)
-            x = BlurPool(filt_size=3)(x)
-
-            x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x)
-            x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x)
-            x = BlurPool(filt_size=3)(x)
-
-            x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x)
-            x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x)
-            x = BlurPool(filt_size=3)(x)
-
-            x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x)
-            x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x)
-            x = BlurPool(filt_size=3)(x)
-
-            x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', name='CA.1')(x)
-
-            x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.2') (x)
-            x = Concatenate(axis=3)([ x, x4])
-            x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.3') (x)
-
-            x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.4') (x)
-            x = Concatenate(axis=3)([ x, x3])
-            x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.5') (x)
-
-            x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu', name='CA.6') (x)
-            x = Concatenate(axis=3)([ x, x2])
-            x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu', name='CA.7') (x)
-
-            x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu', name='CA.8') (x)
-            x = Concatenate(axis=3)([ x, x1])
-            x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu', name='CA.9') (x)
-
-            x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu', name='CA.10') (x)
-            x = Concatenate(axis=3)([ x, x0])
-            x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu', name='CA.11') (x)
-
-            return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid', name='CA.12')(x)
-
-
-        return func
diff --git a/nnlib/VGGFace.py b/nnlib/VGGFace.py
deleted file mode 100644
index 60a1de4..0000000
--- a/nnlib/VGGFace.py
+++ /dev/null
@@ -1,51 +0,0 @@
-from nnlib import nnlib
-
-def VGGFace():
-    exec(nnlib.import_all(), locals(), globals())
-    
-    img_input = Input(shape=(224,224,3) )
-
-    # Block 1
-    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(img_input)
-    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x)
-
-    # Block 2
-    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x)
-    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x)
-
-    # Block 3
-    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x)
-    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x)
-    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(x)
-
-    # Block 4
-    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x)
-    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x)
-    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(x)
-
-    # Block 5
-    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x)
-    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x)
-    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x)
-    x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5')(x)
-
-
-    # Classification block
-    x = Flatten(name='flatten')(x)
-    x = Dense(4096, name='fc6')(x)
-    x = Activation('relu', name='fc6/relu')(x)
-    x = Dense(4096, name='fc7')(x)
-    x = Activation('relu', name='fc7/relu')(x)
-    x = Dense(2622, name='fc8')(x)
-    x = Activation('softmax', name='fc8/softmax')(x)
-
-    model = Model(img_input, x, name='vggface_vgg16')
-    weights_path = keras.utils.data_utils.get_file('rcmalli_vggface_tf_vgg16.h5',
-                                                   'https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_vgg16.h5')
-    model.load_weights(weights_path, by_name=True)
-
-    return model
\ No newline at end of file
diff --git a/nnlib/__init__.py b/nnlib/__init__.py
deleted file mode 100644
index 6876185..0000000
--- a/nnlib/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from .nnlib import nnlib
-from .FUNIT import FUNIT
-from .TernausNet import TernausNet
-from .VGGFace import VGGFace
-from .DeepPortraitRelighting import DeepPortraitRelighting
\ No newline at end of file
diff --git a/nnlib/device.py b/nnlib/device.py
deleted file mode 100644
index 0264842..0000000
--- a/nnlib/device.py
+++ /dev/null
@@ -1,311 +0,0 @@
-import sys
-import ctypes
-import os
-import json
-import numpy as np
-
-#you can set DFL_TF_MIN_REQ_CAP manually for your build
-#the reason why we cannot check tensorflow.version is it requires import tensorflow
-tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35))
-
-class device:
-    backend = None
-    class Config():
-        force_gpu_idx = -1
-        multi_gpu = False
-        force_gpu_idxs = None
-        choose_worst_gpu = False
-        gpu_idxs = []
-        gpu_names = []
-        gpu_compute_caps = []
-        gpu_vram_gb = []
-        allow_growth = True
-        use_fp16 = False
-        cpu_only = False
-        backend = None
-        def __init__ (self, force_gpu_idx = -1,
-                            multi_gpu = False,
-                            force_gpu_idxs = None,
-                            choose_worst_gpu = False,
-                            allow_growth = True,
-                            use_fp16 = False,
-                            cpu_only = False,
-                            **in_options):
-
-            self.backend = device.backend
-            self.use_fp16 = use_fp16
-            self.cpu_only = cpu_only
-
-            if not self.cpu_only:
-                self.cpu_only = (self.backend == "tensorflow-cpu")
-
-            if not self.cpu_only:
-                self.force_gpu_idx = force_gpu_idx
-                self.multi_gpu = multi_gpu
-                self.force_gpu_idxs = force_gpu_idxs
-                self.choose_worst_gpu = choose_worst_gpu
-                self.allow_growth = allow_growth
-
-                self.gpu_idxs = []
-
-                if force_gpu_idxs is not None:
-                    for idx in force_gpu_idxs.split(','):
-                        idx = int(idx)
-                        if device.isValidDeviceIdx(idx):
-                            self.gpu_idxs.append(idx)
-                else:
-                    gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx()
-                    if gpu_idx != -1:
-                        if self.multi_gpu:
-                            self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx )
-                            if len(self.gpu_idxs) <= 1:
-                                self.multi_gpu = False
-                        else:
-                            self.gpu_idxs = [gpu_idx]
-
-                self.cpu_only = (len(self.gpu_idxs) == 0)
-
-
-            if not self.cpu_only:
-                self.gpu_names = []
-                self.gpu_compute_caps = []
-                self.gpu_vram_gb = []
-                for gpu_idx in self.gpu_idxs:
-                    self.gpu_names += [device.getDeviceName(gpu_idx)]
-                    self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ]
-                    self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ]
-                self.cpu_only = (len(self.gpu_idxs) == 0)
-            else:
-                self.gpu_names = ['CPU']
-                self.gpu_compute_caps = [99]
-                self.gpu_vram_gb = [0]
-
-            if self.cpu_only:
-                self.backend = "tensorflow-cpu"
-
-    @staticmethod
-    def getValidDeviceIdxsEnumerator():
-        if device.backend == "plaidML":
-            for i in range(plaidML_devices_count):
-                yield i
-        elif device.backend == "tensorflow":
-            for dev in cuda_devices:
-                yield dev['index']
-
-    @staticmethod
-    def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
-        result = []
-        if device.backend == "plaidML":
-            for i in device.getValidDeviceIdxsEnumerator():
-                if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024:
-                     result.append (i)
-        elif device.backend == "tensorflow":
-            for dev in cuda_devices:
-                if dev['total_mem'] >= totalmemsize_gb*1024*1024*1024:
-                    result.append ( dev['index'] )
-
-        return result
-
-    @staticmethod
-    def getValidDevicesIdxsWithNamesList():
-        if device.backend == "plaidML":
-            return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ]
-        elif device.backend == "tensorflow":
-            return [ ( dev['index'], dev['name'] ) for dev in cuda_devices ]
-        elif device.backend == "tensorflow-cpu":
-            return [ (0, 'CPU') ]
-
-    @staticmethod
-    def getDeviceVRAMTotalGb (idx):
-        if device.backend == "plaidML":
-            if idx < plaidML_devices_count:
-                return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024)
-        elif device.backend == "tensorflow":
-            for dev in cuda_devices:
-                if idx == dev['index']:
-                    return round ( dev['total_mem'] / (1024*1024*1024) )
-            return 0
-
-    @staticmethod
-    def getBestValidDeviceIdx():
-        if device.backend == "plaidML":
-            idx = -1
-            idx_mem = 0
-            for i in device.getValidDeviceIdxsEnumerator():
-                total = plaidML_devices[i]['globalMemSize']
-                if total > idx_mem:
-                    idx = i
-                    idx_mem = total
-
-            return idx
-        elif device.backend == "tensorflow":
-            idx = -1
-            idx_mem = 0
-            for dev in cuda_devices:
-                if dev['total_mem'] > idx_mem:
-                    idx = dev['index']
-                    idx_mem = dev['total_mem']
-
-            return idx
-
-    @staticmethod
-    def getWorstValidDeviceIdx():
-        if device.backend == "plaidML":
-            idx = -1
-            idx_mem = sys.maxsize
-            for i in device.getValidDeviceIdxsEnumerator():
-                total = plaidML_devices[i]['globalMemSize']
-                if total < idx_mem:
-                    idx = i
-                    idx_mem = total
-
-            return idx
-        elif device.backend == "tensorflow":
-            idx = -1
-            idx_mem = sys.maxsize
-            for dev in cuda_devices:
-                if dev['total_mem'] < idx_mem:
-                    idx = dev['index']
-                    idx_mem = dev['total_mem']
-
-            return idx
-
-    @staticmethod
-    def isValidDeviceIdx(idx):
-        if device.backend == "plaidML":
-            return idx in [*device.getValidDeviceIdxsEnumerator()]
-        elif device.backend == "tensorflow":
-            for dev in cuda_devices:
-                if idx == dev['index']:
-                    return True
-        return False
-
-    @staticmethod
-    def getDeviceIdxsEqualModel(idx):
-        if device.backend == "plaidML":
-            result = []
-            idx_name = plaidML_devices[idx]['description']
-            for i in device.getValidDeviceIdxsEnumerator():
-                if plaidML_devices[i]['description'] == idx_name:
-                    result.append (i)
-
-            return result
-        elif device.backend == "tensorflow":
-            result = []
-            idx_name = device.getDeviceName(idx)
-            for dev in cuda_devices:
-                if dev['name'] == idx_name:
-                    result.append ( dev['index'] )
-                    
-
-            return result
-
-    @staticmethod
-    def getDeviceName (idx):
-        if device.backend == "plaidML":
-            if idx < plaidML_devices_count:
-                return plaidML_devices[idx]['description']
-        elif device.backend == "tensorflow":
-            for dev in cuda_devices:
-                if dev['index'] == idx:
-                    return dev['name']
-
-        return None
-
-    @staticmethod
-    def getDeviceID (idx):
-        if device.backend == "plaidML":
-            if idx < plaidML_devices_count:
-                return plaidML_devices[idx]['id'].decode()
-
-        return None
-
-    @staticmethod
-    def getDeviceComputeCapability(idx):
-        if device.backend == "plaidML":
-            return 99
-        elif device.backend == "tensorflow":
-            for dev in cuda_devices:
-                if dev['index'] == idx:
-                    return dev['cc']
-        return 0
-
-plaidML_build = os.environ.get("DFL_PLAIDML_BUILD", "0") == "1"
-plaidML_devices = None
-plaidML_devices_count = 0
-cuda_devices = None
-
-if plaidML_build:
-    if plaidML_devices is None:
-        plaidML_devices = []
-        # Using plaidML OpenCL backend to determine system devices
-        try:
-            os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup'
-            import plaidml
-            ctx = plaidml.Context()
-            for d in plaidml.devices(ctx, return_all=True)[0]:
-                details = json.loads(d.details)
-                if details['type'] == 'CPU': #skipping opencl-CPU
-                    continue
-                plaidML_devices += [ {'id':d.id,
-                                    'globalMemSize' : int(details['globalMemSize']),
-                                    'description' : d.description.decode()
-                                }]
-            ctx.shutdown()
-        except:
-            pass
-    plaidML_devices_count = len(plaidML_devices)
-    if plaidML_devices_count != 0:
-        device.backend = "plaidML"
-else:      
-    if cuda_devices is None:
-        cuda_devices = []
-        libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
-        cuda = None
-        for libname in libnames:
-            try:
-                cuda = ctypes.CDLL(libname)
-            except:
-                continue
-            else:
-                break
-        
-        if cuda is not None:
-            nGpus = ctypes.c_int()
-            name = b' ' * 200
-            cc_major = ctypes.c_int()
-            cc_minor = ctypes.c_int()
-            freeMem = ctypes.c_size_t()
-            totalMem = ctypes.c_size_t()
-
-            result = ctypes.c_int()
-            device_t = ctypes.c_int()
-            context = ctypes.c_void_p()
-            error_str = ctypes.c_char_p()
-
-            if cuda.cuInit(0) == 0 and \
-                cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
-                for i in range(nGpus.value):
-                    if cuda.cuDeviceGet(ctypes.byref(device_t), i) != 0 or \
-                        cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device_t) != 0 or \
-                        cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device_t) != 0:
-                        continue
-
-                    if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device_t) == 0:
-                        if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
-                            cc = cc_major.value * 10 + cc_minor.value
-                            if cc >= tf_min_req_cap:
-                                cuda_devices.append ( {'index':i,
-                                                       'name':name.split(b'\0', 1)[0].decode(),                                               
-                                                       'total_mem':totalMem.value,
-                                                       'free_mem':freeMem.value,
-                                                       'cc':cc
-                                                      }
-                                                    )
-                        cuda.cuCtxDetach(context)    
-        
-    if len(cuda_devices) != 0:
-        device.backend = "tensorflow"
-
-if device.backend is None:
-    device.backend = "tensorflow-cpu"
diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py
deleted file mode 100644
index 975cf94..0000000
--- a/nnlib/nnlib.py
+++ /dev/null
@@ -1,1904 +0,0 @@
-import contextlib
-import multiprocessing
-import os
-import sys
-from pathlib import Path
-
-import numpy as np
-
-from interact import interact as io
-from joblib import Subprocessor
-from utils import std_utils
-
-from .CAInitializer import CAGenerateWeights
-from .device import device
-
-
-class nnlib(object):
-    device = device #forwards nnlib.devicelib to device in order to use nnlib as standalone lib
-    DeviceConfig = device.Config
-    active_DeviceConfig = DeviceConfig() #default is one best GPU
-
-    backend = ""
-
-    dlib = None
-
-    torch = None
-    torch_device = None
-
-    keras = None
-    keras_contrib = None
-
-    tf = None
-    tf_sess = None
-    tf_sess_config = None
-    
-    PML = None
-    PMLK = None
-    PMLTile= None
-
-    code_import_keras = None
-    code_import_keras_contrib = None
-    code_import_all = None
-
-    code_import_dlib = None
-
-
-    ResNet = None
-    UNet = None
-    UNetTemporalPredictor = None
-    NLayerDiscriminator = None
-
-    code_import_keras_string = \
-"""
-keras = nnlib.keras
-K = keras.backend
-KL = keras.layers
-
-Input = KL.Input
-
-Dense = KL.Dense
-Conv2D = KL.Conv2D
-WScaleConv2DLayer = nnlib.WScaleConv2DLayer
-Conv2DTranspose = KL.Conv2DTranspose
-EqualConv2D = nnlib.EqualConv2D
-SeparableConv2D = KL.SeparableConv2D
-DepthwiseConv2D = KL.DepthwiseConv2D
-MaxPooling2D = KL.MaxPooling2D
-AveragePooling2D = KL.AveragePooling2D
-GlobalAveragePooling2D = KL.GlobalAveragePooling2D
-UpSampling2D = KL.UpSampling2D
-BatchNormalization = KL.BatchNormalization
-PixelNormalization = nnlib.PixelNormalization
-
-Activation = KL.Activation
-LeakyReLU = KL.LeakyReLU
-ELU = KL.ELU
-GeLU = nnlib.GeLU
-ReLU = KL.ReLU
-PReLU = KL.PReLU
-tanh = KL.Activation('tanh')
-sigmoid = KL.Activation('sigmoid')
-Dropout = KL.Dropout
-Softmax = KL.Softmax
-
-Lambda = KL.Lambda
-Add = KL.Add
-Multiply = KL.Multiply
-Concatenate = KL.Concatenate
-
-
-Flatten = KL.Flatten
-Reshape = KL.Reshape
-
-ZeroPadding2D = KL.ZeroPadding2D
-
-RandomNormal = keras.initializers.RandomNormal
-Model = keras.models.Model
-
-Adam = nnlib.Adam
-RMSprop = nnlib.RMSprop
-LookaheadOptimizer = nnlib.LookaheadOptimizer
-SGD = nnlib.keras.optimizers.SGD
-
-modelify = nnlib.modelify
-gaussian_blur = nnlib.gaussian_blur
-style_loss = nnlib.style_loss
-dssim = nnlib.dssim
-
-DenseMaxout = nnlib.DenseMaxout
-PixelShuffler = nnlib.PixelShuffler
-SubpixelUpscaler = nnlib.SubpixelUpscaler
-SubpixelDownscaler = nnlib.SubpixelDownscaler
-Scale = nnlib.Scale
-BilinearInterpolation = nnlib.BilinearInterpolation
-BlurPool = nnlib.BlurPool
-FUNITAdain = nnlib.FUNITAdain
-SelfAttention = nnlib.SelfAttention
-
-CAInitializerMP = nnlib.CAInitializerMP
-
-#ReflectionPadding2D = nnlib.ReflectionPadding2D
-#AddUniformNoise = nnlib.AddUniformNoise
-"""
-    code_import_keras_contrib_string = \
-"""
-keras_contrib = nnlib.keras_contrib
-GroupNormalization = keras_contrib.layers.GroupNormalization
-InstanceNormalization = keras_contrib.layers.InstanceNormalization
-"""
-    code_import_dlib_string = \
-"""
-dlib = nnlib.dlib
-"""
-
-    code_import_all_string = \
-"""
-DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss
-ResNet = nnlib.ResNet
-UNet = nnlib.UNet
-UNetTemporalPredictor = nnlib.UNetTemporalPredictor
-NLayerDiscriminator = nnlib.NLayerDiscriminator
-"""
-    @staticmethod
-    def import_torch(device_config=None):
-        if nnlib.torch is not None:
-            return
-
-        if device_config is None:
-            device_config = nnlib.active_DeviceConfig
-        else:
-            nnlib.active_DeviceConfig = device_config
-
-        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
-            os.environ.pop('CUDA_VISIBLE_DEVICES')
-
-        io.log_info ("Using PyTorch backend.")
-        import torch
-        nnlib.torch = torch
-
-        if device_config.cpu_only or device_config.backend == 'plaidML':
-            nnlib.torch_device = torch.device(type='cpu')
-        else:
-            nnlib.torch_device = torch.device(type='cuda', index=device_config.gpu_idxs[0] )
-            torch.cuda.set_device(nnlib.torch_device)
-
-    @staticmethod
-    def _import_tf(device_config):
-        if nnlib.tf is not None:
-            return nnlib.code_import_tf
-
-        if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1':
-            suppressor = std_utils.suppress_stdout_stderr().__enter__()
-        else:
-            suppressor = None
-
-        if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
-            os.environ.pop('CUDA_VISIBLE_DEVICES')
-
-        os.environ['CUDA_​CACHE_​MAXSIZE'] = '536870912' #512Mb (32mb default)
-        
-        if sys.platform[0:3] == 'win':
-            if len(device_config.gpu_idxs) == 1:
-                os.environ['CUDA_CACHE_PATH'] = \
-                  str(Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_' + device_config.gpu_names[0].replace(' ','_')))
-
-        os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
-        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #tf log errors only
-
-        import warnings
-        warnings.simplefilter(action='ignore', category=FutureWarning)
-
-        import tensorflow as tf
-        nnlib.tf = tf
-
-        if device_config.cpu_only:
-            config = tf.ConfigProto(device_count={'GPU': 0})
-        else:
-            config = tf.ConfigProto()
-
-            visible_device_list = ''
-            for idx in device_config.gpu_idxs:
-                visible_device_list += str(idx) + ','
-            config.gpu_options.visible_device_list=visible_device_list[:-1]
-
-        config.gpu_options.force_gpu_compatible = True
-        config.gpu_options.allow_growth = device_config.allow_growth
-        nnlib.tf_sess_config = config
-        
-        nnlib.tf_sess = tf.Session(config=config)
-
-        if suppressor is not None:
-            suppressor.__exit__()
-
-    @staticmethod
-    def import_keras(device_config):
-        if nnlib.keras is not None:
-            return nnlib.code_import_keras
-
-        nnlib.backend = device_config.backend
-        if "tensorflow" in nnlib.backend:
-            nnlib._import_tf(device_config)
-        elif nnlib.backend == "plaidML":
-            os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
-            os.environ["PLAIDML_DEVICE_IDS"] = ",".join ( [ nnlib.device.getDeviceID(idx) for idx in device_config.gpu_idxs] )
-
-        #if "tensorflow" in nnlib.backend:
-        #    nnlib.keras = nnlib.tf.keras
-        #else:
-        import keras as keras_
-        nnlib.keras = keras_
-
-        if 'KERAS_BACKEND' in os.environ:
-            os.environ.pop('KERAS_BACKEND')
-
-        if nnlib.backend == "plaidML":
-            import plaidml
-            import plaidml.tile
-            nnlib.PML = plaidml
-            nnlib.PMLK = plaidml.keras.backend
-            nnlib.PMLTile = plaidml.tile
-
-        if device_config.use_fp16:
-            nnlib.keras.backend.set_floatx('float16')
-
-        if "tensorflow" in nnlib.backend:
-            nnlib.keras.backend.set_session(nnlib.tf_sess)
-
-        nnlib.keras.backend.set_image_data_format('channels_last')
-
-        nnlib.code_import_keras = compile (nnlib.code_import_keras_string,'','exec')
-        nnlib.__initialize_keras_functions()
-
-        return nnlib.code_import_keras
-
-    @staticmethod
-    def __initialize_keras_functions():
-        keras = nnlib.keras
-        K = keras.backend
-        KL = keras.layers
-        backend = nnlib.backend
-
-        def modelify(model_functor):
-            def func(tensor):
-                return keras.models.Model (tensor, model_functor(tensor))
-            return func
-
-        nnlib.modelify = modelify
-
-        def gaussian_blur(radius=2.0):
-            def gaussian(x, mu, sigma):
-                return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2))
-
-            def make_kernel(sigma):
-                kernel_size = max(3, int(2 * 2 * sigma + 1))
-                mean = np.floor(0.5 * kernel_size)
-                kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)])
-                np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx())
-                kernel = np_kernel / np.sum(np_kernel)
-                return kernel
-
-            gauss_kernel = make_kernel(radius)
-            gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis]
-
-            def func(input):
-                inputs = [ input[:,:,:,i:i+1]  for i in range( K.int_shape( input )[-1] ) ]
-
-                outputs = []
-                for i in range(len(inputs)):
-                    outputs += [ K.conv2d( inputs[i] , K.constant(gauss_kernel) , strides=(1,1), padding="same") ]
-
-                return K.concatenate (outputs, axis=-1)
-            return func
-        nnlib.gaussian_blur = gaussian_blur
-
-        def style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, wnd_size=0, step_size=1):
-            if gaussian_blur_radius > 0.0:
-                gblur = gaussian_blur(gaussian_blur_radius)
-
-            def sd(content, style, loss_weight):
-                content_nc = K.int_shape(content)[-1]
-                style_nc = K.int_shape(style)[-1]
-                if content_nc != style_nc:
-                    raise Exception("style_loss() content_nc != style_nc")
-
-                axes = [1,2]
-                c_mean, c_var = K.mean(content, axis=axes, keepdims=True), K.var(content, axis=axes, keepdims=True)
-                s_mean, s_var = K.mean(style, axis=axes, keepdims=True), K.var(style, axis=axes, keepdims=True)
-                c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5)
-
-                mean_loss = K.sum(K.square(c_mean-s_mean))
-                std_loss = K.sum(K.square(c_std-s_std))
-
-                return (mean_loss + std_loss) * ( loss_weight / float(content_nc) )
-
-            def func(target, style):
-                if wnd_size == 0:
-                    if gaussian_blur_radius > 0.0:
-                        return sd( gblur(target), gblur(style), loss_weight=loss_weight)
-                    else:
-                        return sd( target, style, loss_weight=loss_weight )
-                else:
-                    #currently unused
-                    if nnlib.tf is not None:
-                        sh = K.int_shape(target)[1]
-                        k = (sh-wnd_size) // step_size + 1
-                        if gaussian_blur_radius > 0.0:
-                            target, style = gblur(target), gblur(style)
-                        target = nnlib.tf.image.extract_image_patches(target, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID')
-                        style  = nnlib.tf.image.extract_image_patches(style,  [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID')
-                        return sd( target, style, loss_weight )
-                    if nnlib.PML is not None:
-                        print ("Sorry, plaidML backend does not support style_loss")
-                        return 0
-            return func
-        nnlib.style_loss = style_loss
-
-        def dssim(kernel_size=11, k1=0.01, k2=0.03, max_value=1.0):
-            # port of tf.image.ssim to pure keras in order to work on plaidML backend.
-
-            def func(y_true, y_pred):
-                ch = K.shape(y_pred)[-1]
-
-                def _fspecial_gauss(size, sigma):
-                    #Function to mimic the 'fspecial' gaussian MATLAB function.
-                    coords = np.arange(0, size, dtype=K.floatx())
-                    coords -= (size - 1 ) / 2.0
-                    g = coords**2
-                    g *= ( -0.5 / (sigma**2) )
-                    g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) )
-                    g = K.constant ( np.reshape (g, (1,-1)) )
-                    g = K.softmax(g)
-                    g = K.reshape (g, (size, size, 1, 1))
-                    g = K.tile (g, (1,1,ch,1))
-                    return g
-
-                kernel = _fspecial_gauss(kernel_size,1.5)
-
-                def reducer(x):
-                    return K.depthwise_conv2d(x, kernel, strides=(1, 1), padding='valid')
-
-                c1 = (k1 * max_value) ** 2
-                c2 = (k2 * max_value) ** 2
-
-                mean0 = reducer(y_true)
-                mean1 = reducer(y_pred)
-                num0 = mean0 * mean1 * 2.0
-                den0 = K.square(mean0) + K.square(mean1)
-                luminance = (num0 + c1) / (den0 + c1)
-
-                num1 = reducer(y_true * y_pred) * 2.0
-                den1 = reducer(K.square(y_true) + K.square(y_pred))
-                c2 *= 1.0 #compensation factor
-                cs = (num1 - num0 + c2) / (den1 - den0 + c2)
-
-                ssim_val = K.mean(luminance * cs, axis=(-3, -2) )
-                return(1.0 - ssim_val ) / 2.0
-
-            return func
-
-        nnlib.dssim = dssim
-
-        if 'tensorflow' in backend:
-            class PixelShuffler(keras.layers.Layer):
-                def __init__(self, size=(2, 2),  data_format='channels_last', **kwargs):
-                    super(PixelShuffler, self).__init__(**kwargs)
-                    self.data_format = data_format
-                    self.size = size
-
-                def call(self, inputs):
-                    input_shape = K.shape(inputs)
-                    if K.int_shape(input_shape)[0] != 4:
-                        raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs)))
-
-                    if self.data_format == 'channels_first':
-                        return K.tf.depth_to_space(inputs, self.size[0], 'NCHW')
-
-                    elif self.data_format == 'channels_last':
-                        return K.tf.depth_to_space(inputs, self.size[0], 'NHWC')
-
-                def compute_output_shape(self, input_shape):
-                    if len(input_shape) != 4:
-                        raise ValueError('Inputs should have rank ' +
-                                        str(4) +
-                                        '; Received input shape:', str(input_shape))
-
-                    if self.data_format == 'channels_first':
-                        height = input_shape[2] * self.size[0] if input_shape[2] is not None else None
-                        width = input_shape[3] * self.size[1] if input_shape[3] is not None else None
-                        channels = input_shape[1] // self.size[0] // self.size[1]
-
-                        if channels * self.size[0] * self.size[1] != input_shape[1]:
-                            raise ValueError('channels of input and size are incompatible')
-
-                        return (input_shape[0],
-                                channels,
-                                height,
-                                width)
-
-                    elif self.data_format == 'channels_last':
-                        height = input_shape[1] * self.size[0] if input_shape[1] is not None else None
-                        width = input_shape[2] * self.size[1] if input_shape[2] is not None else None
-                        channels = input_shape[3] // self.size[0] // self.size[1]
-
-                        if channels * self.size[0] * self.size[1] != input_shape[3]:
-                            raise ValueError('channels of input and size are incompatible')
-
-                        return (input_shape[0],
-                                height,
-                                width,
-                                channels)
-
-                def get_config(self):
-                    config = {'size': self.size,
-                            'data_format': self.data_format}
-                    base_config = super(PixelShuffler, self).get_config()
-
-                    return dict(list(base_config.items()) + list(config.items()))
-        else:
-            class PixelShuffler(KL.Layer):
-                def __init__(self, size=(2, 2), data_format='channels_last', **kwargs):
-                    super(PixelShuffler, self).__init__(**kwargs)
-                    self.data_format = data_format
-                    self.size = size
-
-                def call(self, inputs):
-
-                    input_shape = K.shape(inputs)
-                    if K.int_shape(input_shape)[0] != 4:
-                        raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs)))
-
-                    if self.data_format == 'channels_first':
-                        batch_size, c, h, w = input_shape[0], K.int_shape(inputs)[1], input_shape[2], input_shape[3]
-                        rh, rw = self.size
-                        oh, ow = h * rh, w * rw
-                        oc = c // (rh * rw)
-
-                        out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w))
-                        out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2))
-                        out = K.reshape(out, (batch_size, oc, oh, ow))
-                        return out
-
-                    elif self.data_format == 'channels_last':
-                        batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1]
-                        rh, rw = self.size
-                        oh, ow = h * rh, w * rw
-                        oc = c // (rh * rw)
-
-                        out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc))
-                        out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5))
-                        out = K.reshape(out, (batch_size, oh, ow, oc))
-                        return out
-
-                def compute_output_shape(self, input_shape):
-                    if len(input_shape) != 4:
-                        raise ValueError('Inputs should have rank ' +
-                                        str(4) +
-                                        '; Received input shape:', str(input_shape))
-
-                    if self.data_format == 'channels_first':
-                        height = input_shape[2] * self.size[0] if input_shape[2] is not None else None
-                        width = input_shape[3] * self.size[1] if input_shape[3] is not None else None
-                        channels = input_shape[1] // self.size[0] // self.size[1]
-
-                        if channels * self.size[0] * self.size[1] != input_shape[1]:
-                            raise ValueError('channels of input and size are incompatible')
-
-                        return (input_shape[0],
-                                channels,
-                                height,
-                                width)
-
-                    elif self.data_format == 'channels_last':
-                        height = input_shape[1] * self.size[0] if input_shape[1] is not None else None
-                        width = input_shape[2] * self.size[1] if input_shape[2] is not None else None
-                        channels = input_shape[3] // self.size[0] // self.size[1]
-
-                        if channels * self.size[0] * self.size[1] != input_shape[3]:
-                            raise ValueError('channels of input and size are incompatible')
-
-                        return (input_shape[0],
-                                height,
-                                width,
-                                channels)
-
-                def get_config(self):
-                    config = {'size': self.size,
-                            'data_format': self.data_format}
-                    base_config = super(PixelShuffler, self).get_config()
-
-                    return dict(list(base_config.items()) + list(config.items()))
-
-        nnlib.PixelShuffler = PixelShuffler
-        nnlib.SubpixelUpscaler = PixelShuffler
-
-        if 'tensorflow' in backend:
-            class SubpixelDownscaler(KL.Layer):
-                def __init__(self, size=(2, 2), data_format='channels_last', **kwargs):
-                    super(SubpixelDownscaler, self).__init__(**kwargs)
-                    self.data_format = data_format
-                    self.size = size
-
-                def call(self, inputs):
-
-                    input_shape = K.shape(inputs)
-                    if K.int_shape(input_shape)[0] != 4:
-                        raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs)))
-
-                    return K.tf.space_to_depth(inputs, self.size[0], 'NHWC')
-
-                def compute_output_shape(self, input_shape):
-                    if len(input_shape) != 4:
-                        raise ValueError('Inputs should have rank ' +
-                                        str(4) +
-                                        '; Received input shape:', str(input_shape))
-
-                    height = input_shape[1] // self.size[0] if input_shape[1] is not None else None
-                    width = input_shape[2] // self.size[1] if input_shape[2] is not None else None
-                    channels = input_shape[3] * self.size[0] * self.size[1]
-
-                    return (input_shape[0], height, width, channels)
-
-                def get_config(self):
-                    config = {'size': self.size,
-                            'data_format': self.data_format}
-                    base_config = super(SubpixelDownscaler, self).get_config()
-
-                    return dict(list(base_config.items()) + list(config.items()))
-        else:
-            class SubpixelDownscaler(KL.Layer):
-                def __init__(self, size=(2, 2), data_format='channels_last', **kwargs):
-                    super(SubpixelDownscaler, self).__init__(**kwargs)
-                    self.data_format = data_format
-                    self.size = size
-
-                def call(self, inputs):
-
-                    input_shape = K.shape(inputs)
-                    if K.int_shape(input_shape)[0] != 4:
-                        raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs)))
-
-                    batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1]
-                    rh, rw = self.size
-                    oh, ow = h // rh, w // rw
-                    oc = c * (rh * rw)
-
-                    out = K.reshape(inputs, (batch_size, oh, rh, ow, rw, c))
-                    out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5))
-                    out = K.reshape(out, (batch_size, oh, ow, oc))
-                    return out
-
-                def compute_output_shape(self, input_shape):
-                    if len(input_shape) != 4:
-                        raise ValueError('Inputs should have rank ' +
-                                        str(4) +
-                                        '; Received input shape:', str(input_shape))
-
-                    height = input_shape[1] // self.size[0] if input_shape[1] is not None else None
-                    width = input_shape[2] // self.size[1] if input_shape[2] is not None else None
-                    channels = input_shape[3] * self.size[0] * self.size[1]
-
-                    return (input_shape[0], height, width, channels)
-
-                def get_config(self):
-                    config = {'size': self.size,
-                            'data_format': self.data_format}
-                    base_config = super(SubpixelDownscaler, self).get_config()
-
-                    return dict(list(base_config.items()) + list(config.items()))
-
-        nnlib.SubpixelDownscaler = SubpixelDownscaler
-
-        class BlurPool(KL.Layer):
-            """
-            https://arxiv.org/abs/1904.11486 https://github.com/adobe/antialiased-cnns
-            """
-            def __init__(self, filt_size=3, stride=2, **kwargs):
-                self.strides = (stride,stride)
-                self.filt_size = filt_size
-                self.padding = ( (int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ), (int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ) )
-                if(self.filt_size==1):
-                    self.a = np.array([1.,])
-                elif(self.filt_size==2):
-                    self.a = np.array([1., 1.])
-                elif(self.filt_size==3):
-                    self.a = np.array([1., 2., 1.])
-                elif(self.filt_size==4):
-                    self.a = np.array([1., 3., 3., 1.])
-                elif(self.filt_size==5):
-                    self.a = np.array([1., 4., 6., 4., 1.])
-                elif(self.filt_size==6):
-                    self.a = np.array([1., 5., 10., 10., 5., 1.])
-                elif(self.filt_size==7):
-                    self.a = np.array([1., 6., 15., 20., 15., 6., 1.])
-
-                super(BlurPool, self).__init__(**kwargs)
-
-            def compute_output_shape(self, input_shape):
-                height = input_shape[1] // self.strides[0]
-                width = input_shape[2] // self.strides[1]
-                channels = input_shape[3]
-                return (input_shape[0], height, width, channels)
-
-            def call(self, x):
-                k = self.a
-                k = k[:,None]*k[None,:]
-                k = k / np.sum(k)
-                k = np.tile (k[:,:,None,None], (1,1,K.int_shape(x)[-1],1) )
-                k = K.constant (k, dtype=K.floatx() )
-
-                x = K.spatial_2d_padding(x, padding=self.padding)
-                x = K.depthwise_conv2d(x, k, strides=self.strides, padding='valid')
-                return x
-
-        nnlib.BlurPool = BlurPool
-
-        class FUNITAdain(KL.Layer):
-            """
-            differents from NVLabs/FUNIT:
-            I moved two dense blocks inside this layer,
-                so we don't need to slice outter MLP block and assign weights every call, just pass MLP inside.
-                also size of dense blocks is calculated automatically
-            """
-            def __init__(self, axis=-1, epsilon=1e-5, momentum=0.99, kernel_initializer='glorot_uniform', **kwargs):
-                self.axis = axis
-                self.epsilon = epsilon
-                self.momentum = momentum
-                self.kernel_initializer = kernel_initializer
-                super(FUNITAdain, self).__init__(**kwargs)
-
-            def build(self, input_shape):
-                self.input_spec = None
-                x, mlp = input_shape
-                units = x[self.axis]
-
-                self.kernel1 = self.add_weight(shape=(units, units), initializer=self.kernel_initializer, name='kernel1')
-                self.bias1 = self.add_weight(shape=(units,), initializer='zeros', name='bias1')
-                self.kernel2 = self.add_weight(shape=(units, units), initializer=self.kernel_initializer, name='kernel2')
-                self.bias2 = self.add_weight(shape=(units,), initializer='zeros', name='bias2')
-
-                self.built = True
-
-            def call(self, inputs, training=None):
-                x, mlp = inputs
-
-                gamma = K.dot(mlp, self.kernel1)
-                gamma = K.bias_add(gamma, self.bias1, data_format='channels_last')
-
-                beta = K.dot(mlp, self.kernel2)
-                beta = K.bias_add(beta, self.bias2, data_format='channels_last')
-
-                input_shape = K.int_shape(x)
-
-                reduction_axes = list(range(len(input_shape)))
-                del reduction_axes[self.axis]
-                del reduction_axes[0]
-
-                broadcast_shape = [1] * len(input_shape)
-                broadcast_shape[self.axis] = input_shape[self.axis]
-                mean = K.mean(x, reduction_axes, keepdims=True)
-                stddev = K.std(x, reduction_axes, keepdims=True) + self.epsilon
-                normed = (x - mean) / stddev
-                normed *= K.reshape(gamma,[-1]+broadcast_shape[1:] )
-                normed += K.reshape(beta, [-1]+broadcast_shape[1:] )
-                return normed
-
-            def get_config(self):
-                config = {'axis': self.axis, 'epsilon': self.epsilon }
-
-                base_config = super(FUNITAdain, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-
-            def compute_output_shape(self, input_shape):
-                return input_shape
-        nnlib.FUNITAdain = FUNITAdain
-
-        class Scale(KL.Layer):
-            """
-            GAN Custom Scal Layer
-            Code borrows from https://github.com/flyyufelix/cnn_finetune
-            """
-            def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs):
-                self.axis = axis
-                self.gamma_init = keras.initializers.get(gamma_init)
-                self.initial_weights = weights
-                super(Scale, self).__init__(**kwargs)
-
-            def build(self, input_shape):
-                self.input_spec = [keras.engine.InputSpec(shape=input_shape)]
-
-                # Compatibility with TensorFlow >= 1.0.0
-                self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name))
-                self.trainable_weights = [self.gamma]
-
-                if self.initial_weights is not None:
-                    self.set_weights(self.initial_weights)
-                    del self.initial_weights
-
-            def call(self, x, mask=None):
-                return self.gamma * x
-
-            def get_config(self):
-                config = {"axis": self.axis}
-                base_config = super(Scale, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-        nnlib.Scale = Scale
-        
- 
-        """
-        unable to work in plaidML, due to unimplemented ops
-        
-        class BilinearInterpolation(KL.Layer):
-            def __init__(self, size=(2,2), **kwargs):
-                self.size = size
-                super(BilinearInterpolation, self).__init__(**kwargs)
-
-            def compute_output_shape(self, input_shape):
-                return (input_shape[0], input_shape[1]*self.size[1], input_shape[2]*self.size[0], input_shape[3])
-
-
-            def call(self, X):
-                _,h,w,_ = K.int_shape(X)
-
-                X = K.concatenate( [ X, X[:,:,-2:-1,:] ],axis=2 )
-                X = K.concatenate( [ X, X[:,:,-2:-1,:] ],axis=2 )
-                X = K.concatenate( [ X, X[:,-2:-1,:,:] ],axis=1 )
-                X = K.concatenate( [ X, X[:,-2:-1,:,:] ],axis=1 )
-
-                X_sh = K.shape(X)
-                batch_size, height, width, num_channels = X_sh[0], X_sh[1], X_sh[2], X_sh[3]
-
-                output_h, output_w = (h*self.size[1]+4, w*self.size[0]+4)
-                
-                x_linspace = np.linspace(-1. , 1. - 2/output_w, output_w)#
-                y_linspace = np.linspace(-1. , 1. - 2/output_h, output_h)#
-            
-                x_coordinates, y_coordinates = np.meshgrid(x_linspace, y_linspace)
-                x_coordinates = K.flatten(K.constant(x_coordinates, dtype=K.floatx() ))
-                y_coordinates = K.flatten(K.constant(y_coordinates, dtype=K.floatx() ))
-
-                grid = K.concatenate([x_coordinates, y_coordinates, K.ones_like(x_coordinates)], 0)
-                grid = K.flatten(grid)
-
-
-                grids = K.tile(grid, ( batch_size, ) )
-                grids = K.reshape(grids, (batch_size, 3, output_h * output_w ))
-
-
-                x = K.cast(K.flatten(grids[:, 0:1, :]), dtype='float32')
-                y = K.cast(K.flatten(grids[:, 1:2, :]), dtype='float32')
-                x = .5 * (x + 1.0) * K.cast(width, dtype='float32')
-                y = .5 * (y + 1.0) * K.cast(height, dtype='float32')
-                x0 = K.cast(x, 'int32')
-                x1 = x0 + 1
-                y0 = K.cast(y, 'int32')
-                y1 = y0 + 1
-                max_x = int(K.int_shape(X)[2] -1)
-                max_y = int(K.int_shape(X)[1] -1)
-
-                x0 = K.clip(x0, 0, max_x)
-                x1 = K.clip(x1, 0, max_x)
-                y0 = K.clip(y0, 0, max_y)
-                y1 = K.clip(y1, 0, max_y)
-
-
-                pixels_batch = K.constant ( np.arange(0, batch_size) * (height * width), dtype=K.floatx() ) 
-                
-                pixels_batch = K.expand_dims(pixels_batch, axis=-1)
-
-                base = K.tile(pixels_batch, (1, output_h * output_w ) )
-                base = K.flatten(base)
-
-                base_y0 = base + y0 * width
-  
-                base_y1 = base + y1 * width
-
-                indices_a = base_y0 + x0
-                indices_b = base_y1 + x0
-                indices_c = base_y0 + x1
-                indices_d = base_y1 + x1
-
-                flat_image = K.reshape(X, (-1, num_channels) )
-                flat_image = K.cast(flat_image, dtype='float32')
-                pixel_values_a = K.gather(flat_image, indices_a)
-                pixel_values_b = K.gather(flat_image, indices_b)
-                pixel_values_c = K.gather(flat_image, indices_c)
-                pixel_values_d = K.gather(flat_image, indices_d)
-
-                x0 = K.cast(x0, 'float32')
-                x1 = K.cast(x1, 'float32')
-                y0 = K.cast(y0, 'float32')
-                y1 = K.cast(y1, 'float32')
-
-                area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1)
-                area_b = K.expand_dims(((x1 - x) * (y - y0)), 1)
-                area_c = K.expand_dims(((x - x0) * (y1 - y)), 1)
-                area_d = K.expand_dims(((x - x0) * (y - y0)), 1)
-
-                values_a = area_a * pixel_values_a
-                values_b = area_b * pixel_values_b
-                values_c = area_c * pixel_values_c
-                values_d = area_d * pixel_values_d
-                interpolated_image = values_a + values_b + values_c + values_d
-        
-                new_shape = (batch_size, output_h, output_w, num_channels)
-                interpolated_image = K.reshape(interpolated_image, new_shape)
-
-                interpolated_image = interpolated_image[:,:-4,:-4,:]
-                return interpolated_image
-
-            def get_config(self):
-                config = {"size": self.size}
-                base_config = super(BilinearInterpolation, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-        """      
-        class BilinearInterpolation(KL.Layer):
-            def __init__(self, size=(2,2), **kwargs):
-                self.size = size
-                super(BilinearInterpolation, self).__init__(**kwargs)
-
-            def compute_output_shape(self, input_shape):
-                return (input_shape[0], input_shape[1]*self.size[1], input_shape[2]*self.size[0], input_shape[3])
-                
-            def call(self, X):
-                _,h,w,_ = K.int_shape(X)
-
-                return K.cast( K.tf.image.resize_images(X, (h*self.size[1],w*self.size[0]) ), K.floatx() )
-
-            def get_config(self):
-                config = {"size": self.size}
-                base_config = super(BilinearInterpolation, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-     
-        nnlib.BilinearInterpolation = BilinearInterpolation
-
-        class WScaleConv2DLayer(KL.Conv2D):
-            def __init__(self, *args, gain=None, **kwargs):               
-                kwargs['kernel_initializer'] = keras.initializers.random_normal()
-                
-                if gain is None:
-                    gain = np.sqrt(2)
-                    
-                self.gain = gain
-                
-                super(WScaleConv2DLayer,self).__init__(*args,**kwargs)
-
-            def build(self, input_shape):
-                super().build(input_shape)
-                kernel_shape = K.int_shape(self.kernel)
-                std = np.sqrt(self.gain) / np.sqrt( np.prod(kernel_shape[:-1]) )
-                self.wscale = K.constant(std, dtype=K.floatx() )
-
-            def call(self, input, **kwargs):
-                k = self.kernel                        
-                self.kernel = self.kernel*self.wscale
-                x = super().call(input,**kwargs)
-                self.kernel = k                        
-                return x
-                
-            def get_config(self):
-                config = {"gain": self.gain}
-                base_config = super(WScaleConv2DLayer, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-                
-        nnlib.WScaleConv2DLayer = WScaleConv2DLayer
-        
-        class SelfAttention(KL.Layer):
-            def __init__(self, nc, squeeze_factor=8, **kwargs):
-                assert nc//squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}"
-
-                self.nc = nc
-                self.squeeze_factor = squeeze_factor
-                super(SelfAttention, self).__init__(**kwargs)
-
-            def compute_output_shape(self, input_shape):
-                return (input_shape[0], input_shape[1], input_shape[2], self.nc)
-
-            def call(self, inp):
-                x = inp
-                shape_x = x.get_shape().as_list()
-
-                f = Conv2D(self.nc//self.squeeze_factor, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x)
-                g = Conv2D(self.nc//self.squeeze_factor, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x)
-                h = Conv2D(self.nc, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x)
-
-                shape_f = f.get_shape().as_list()
-                shape_g = g.get_shape().as_list()
-                shape_h = h.get_shape().as_list()
-                flat_f = Reshape( (-1, shape_f[-1]) )(f)
-                flat_g = Reshape( (-1, shape_g[-1]) )(g)
-                flat_h = Reshape( (-1, shape_h[-1]) )(h)
-
-                s = Lambda(lambda x: K.batch_dot(x[0], keras.layers.Permute((2,1))(x[1]) ))([flat_g, flat_f])
-                beta = keras.layers.Softmax(axis=-1)(s)
-                o = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta, flat_h])
-
-                o = Reshape(shape_x[1:])(o)
-                o = Scale()(o)
-
-                out = Add()([o, inp])
-                return out
-        nnlib.SelfAttention = SelfAttention
-
-        class RMSprop(keras.optimizers.Optimizer):
-            """RMSProp optimizer.
-            It is recommended to leave the parameters of this optimizer
-            at their default values
-            (except the learning rate, which can be freely tuned).
-            # Arguments
-                learning_rate: float >= 0. Learning rate.
-                rho: float >= 0.
-            # References
-                - [rmsprop: Divide the gradient by a running average of its recent magnitude
-                ](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)
-
-                tf_cpu_mode: only for tensorflow backend
-                              0 - default, no changes.
-                              1 - allows to train x2 bigger network on same VRAM consuming RAM
-                              2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power.
-            """
-
-            def __init__(self, learning_rate=0.001, rho=0.9, lr_dropout=0, tf_cpu_mode=0, **kwargs):
-                self.initial_decay = kwargs.pop('decay', 0.0)
-                self.epsilon = kwargs.pop('epsilon', K.epsilon())
-                self.lr_dropout = lr_dropout
-                self.tf_cpu_mode = tf_cpu_mode
-
-                learning_rate = kwargs.pop('lr', learning_rate)
-                super(RMSprop, self).__init__(**kwargs)
-                with K.name_scope(self.__class__.__name__):
-                    self.learning_rate = K.variable(learning_rate, name='learning_rate')
-                    self.rho = K.variable(rho, name='rho')
-                    self.decay = K.variable(self.initial_decay, name='decay')
-                    self.iterations = K.variable(0, dtype='int64', name='iterations')
-
-            def get_updates(self, loss, params):
-                grads = self.get_gradients(loss, params)
-
-
-                e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None
-                if e: e.__enter__()
-                accumulators = [K.zeros(K.int_shape(p),
-                                dtype=K.dtype(p),
-                                name='accumulator_' + str(i))
-                                for (i, p) in enumerate(params)]
-                if self.lr_dropout != 0:
-                    lr_rnds = [ K.random_binomial(K.int_shape(p), p=self.lr_dropout, dtype=K.dtype(p)) for p in params ]
-                if e: e.__exit__(None, None, None)
-
-                self.weights = [self.iterations] + accumulators
-                self.updates = [K.update_add(self.iterations, 1)]
-
-                lr = self.learning_rate
-                if self.initial_decay > 0:
-                    lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
-                                                            K.dtype(self.decay))))
-
-                for i, (p, g, a) in enumerate(zip(params, grads, accumulators)):
-                    # update accumulator
-                    e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None
-                    if e: e.__enter__()
-                    new_a = self.rho * a + (1. - self.rho) * K.square(g)
-                    p_diff = - lr * g / (K.sqrt(new_a) + self.epsilon)
-                    if self.lr_dropout != 0:
-                        p_diff *= lr_rnds[i]
-                    new_p = p + p_diff
-                    if e: e.__exit__(None, None, None)
-
-                    self.updates.append(K.update(a, new_a))
-
-                    # Apply constraints.
-                    if getattr(p, 'constraint', None) is not None:
-                        new_p = p.constraint(new_p)
-
-                    self.updates.append(K.update(p, new_p))
-                return self.updates
-
-            def set_weights(self, weights):
-                params = self.weights
-                # Override set_weights for backward compatibility of Keras 2.2.4 optimizer
-                # since it does not include iteration at head of the weight list. Set
-                # iteration to 0.
-                if len(params) == len(weights) + 1:
-                    weights = [np.array(0)] + weights
-                super(RMSprop, self).set_weights(weights)
-
-            def get_config(self):
-                config = {'learning_rate': float(K.get_value(self.learning_rate)),
-                        'rho': float(K.get_value(self.rho)),
-                        'decay': float(K.get_value(self.decay)),
-                        'epsilon': self.epsilon,
-                        'lr_dropout' : self.lr_dropout }
-                base_config = super(RMSprop, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-        nnlib.RMSprop = RMSprop
-
-        class Adam(keras.optimizers.Optimizer):
-            """Adam optimizer.
-
-            Default parameters follow those provided in the original paper.
-
-            # Arguments
-                lr: float >= 0. Learning rate.
-                beta_1: float, 0 < beta < 1. Generally close to 1.
-                beta_2: float, 0 < beta < 1. Generally close to 1.
-                epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`.
-                decay: float >= 0. Learning rate decay over each update.
-                amsgrad: boolean. Whether to apply the AMSGrad variant of this
-                    algorithm from the paper "On the Convergence of Adam and
-                    Beyond".
-                lr_dropout: float [0.0 .. 1.0] Learning rate dropout https://arxiv.org/pdf/1912.00144
-                tf_cpu_mode: only for tensorflow backend
-                              0 - default, no changes.
-                              1 - allows to train x2 bigger network on same VRAM consuming RAM
-                              2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power.
-
-            # References
-                - [Adam - A Method for Stochastic Optimization]
-                  (https://arxiv.org/abs/1412.6980v8)
-                - [On the Convergence of Adam and Beyond]
-                  (https://openreview.net/forum?id=ryQu7f-RZ)
-            """
-
-            def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999,
-                         epsilon=None, decay=0., amsgrad=False, lr_dropout=0, tf_cpu_mode=0, **kwargs):
-                super(Adam, self).__init__(**kwargs)
-                with K.name_scope(self.__class__.__name__):
-                    self.iterations = K.variable(0, dtype='int64', name='iterations')
-                    self.lr = K.variable(lr, name='lr')
-                    self.beta_1 = K.variable(beta_1, name='beta_1')
-                    self.beta_2 = K.variable(beta_2, name='beta_2')
-                    self.decay = K.variable(decay, name='decay')
-                if epsilon is None:
-                    epsilon = K.epsilon()
-                self.epsilon = epsilon
-                self.initial_decay = decay
-                self.amsgrad = amsgrad
-                self.lr_dropout = lr_dropout
-                self.tf_cpu_mode = tf_cpu_mode
-
-            def get_updates(self, loss, params):
-                grads = self.get_gradients(loss, params)
-                self.updates = [K.update_add(self.iterations, 1)]
-
-                lr = self.lr
-                if self.initial_decay > 0:
-                    lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
-                                                              K.dtype(self.decay))))
-
-                t = K.cast(self.iterations, K.floatx()) + 1
-                lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
-                                   (1. - K.pow(self.beta_1, t)))
-
-                e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None
-                if e: e.__enter__()
-                ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
-                vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
-                if self.amsgrad:
-                    vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
-                else:
-                    vhats = [K.zeros(1) for _ in params]
-
-
-                if self.lr_dropout != 0:
-                    lr_rnds = [ K.random_binomial(K.int_shape(p), p=self.lr_dropout, dtype=K.dtype(p)) for p in params ]
-
-                if e: e.__exit__(None, None, None)
-
-                self.weights = [self.iterations] + ms + vs + vhats
-
-                for i, (p, g, m, v, vhat) in enumerate( zip(params, grads, ms, vs, vhats) ):
-                    e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None
-                    if e: e.__enter__()
-                    m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
-                    v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
-
-                    if self.amsgrad:
-                        vhat_t = K.maximum(vhat, v_t)
-                        self.updates.append(K.update(vhat, vhat_t))
-                    if e: e.__exit__(None, None, None)
-
-                    if self.amsgrad:
-                        p_diff = - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
-                    else:
-                        p_diff = - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
-
-                    if self.lr_dropout != 0:
-                        p_diff *= lr_rnds[i]
-
-                    self.updates.append(K.update(m, m_t))
-                    self.updates.append(K.update(v, v_t))
-                    new_p = p + p_diff
-
-                    # Apply constraints.
-                    if getattr(p, 'constraint', None) is not None:
-                        new_p = p.constraint(new_p)
-
-                    self.updates.append(K.update(p, new_p))
-                return self.updates
-
-            def get_config(self):
-                config = {'lr': float(K.get_value(self.lr)),
-                          'beta_1': float(K.get_value(self.beta_1)),
-                          'beta_2': float(K.get_value(self.beta_2)),
-                          'decay': float(K.get_value(self.decay)),
-                          'epsilon': self.epsilon,
-                          'amsgrad': self.amsgrad,
-                          'lr_dropout' : self.lr_dropout}
-                base_config = super(Adam, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-        nnlib.Adam = Adam
-
-        class LookaheadOptimizer(keras.optimizers.Optimizer):
-            def __init__(self, optimizer, sync_period=5, slow_step=0.5, tf_cpu_mode=0, **kwargs):
-                super(LookaheadOptimizer, self).__init__(**kwargs)
-                self.optimizer = optimizer
-                self.tf_cpu_mode = tf_cpu_mode
-
-                with K.name_scope(self.__class__.__name__):
-                    self.sync_period = K.variable(sync_period, dtype='int64', name='sync_period')
-                    self.slow_step = K.variable(slow_step, name='slow_step')
-
-            @property
-            def lr(self):
-                return self.optimizer.lr
-
-            @lr.setter
-            def lr(self, lr):
-                self.optimizer.lr = lr
-
-            @property
-            def learning_rate(self):
-                return self.optimizer.learning_rate
-
-            @learning_rate.setter
-            def learning_rate(self, learning_rate):
-                self.optimizer.learning_rate = learning_rate
-
-            @property
-            def iterations(self):
-                return self.optimizer.iterations
-
-            def get_updates(self, loss, params):
-                sync_cond = K.equal((self.iterations + 1) // self.sync_period * self.sync_period, (self.iterations + 1))
-
-                e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None
-                if e: e.__enter__()
-                slow_params = [K.variable(K.get_value(p), name='sp_{}'.format(i)) for i, p in enumerate(params)]
-                if e: e.__exit__(None, None, None)
-
-
-                self.updates = self.optimizer.get_updates(loss, params)
-                slow_updates = []
-                for p, sp in zip(params, slow_params):
-
-                    e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None
-                    if e: e.__enter__()
-                    sp_t = sp + self.slow_step * (p - sp)
-                    if e: e.__exit__(None, None, None)
-
-                    slow_updates.append(K.update(sp, K.switch(
-                        sync_cond,
-                        sp_t,
-                        sp,
-                    )))
-                    slow_updates.append(K.update_add(p, K.switch(
-                        sync_cond,
-                        sp_t - p,
-                        K.zeros_like(p),
-                    )))
-
-                self.updates += slow_updates
-                self.weights = self.optimizer.weights + slow_params
-                return self.updates
-
-            def get_config(self):
-                config = {
-                    'optimizer': keras.optimizers.serialize(self.optimizer),
-                    'sync_period': int(K.get_value(self.sync_period)),
-                    'slow_step': float(K.get_value(self.slow_step)),
-                }
-                base_config = super(LookaheadOptimizer, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-
-            @classmethod
-            def from_config(cls, config):
-                optimizer = keras.optimizers.deserialize(config.pop('optimizer'))
-                return cls(optimizer, **config)
-        nnlib.LookaheadOptimizer = LookaheadOptimizer
-
-        class DenseMaxout(keras.layers.Layer):
-            """A dense maxout layer.
-            A `MaxoutDense` layer takes the element-wise maximum of
-            `nb_feature` `Dense(input_dim, output_dim)` linear layers.
-            This allows the layer to learn a convex,
-            piecewise linear activation function over the inputs.
-            Note that this is a *linear* layer;
-            if you wish to apply activation function
-            (you shouldn't need to --they are universal function approximators),
-            an `Activation` layer must be added after.
-            # Arguments
-                output_dim: int > 0.
-                nb_feature: number of Dense layers to use internally.
-                init: name of initialization function for the weights of the layer
-                    (see [initializations](../initializations.md)),
-                    or alternatively, Theano function to use for weights
-                    initialization. This parameter is only relevant
-                    if you don't pass a `weights` argument.
-                weights: list of Numpy arrays to set as initial weights.
-                    The list should have 2 elements, of shape `(input_dim, output_dim)`
-                    and (output_dim,) for weights and biases respectively.
-                W_regularizer: instance of [WeightRegularizer](../regularizers.md)
-                    (eg. L1 or L2 regularization), applied to the main weights matrix.
-                b_regularizer: instance of [WeightRegularizer](../regularizers.md),
-                    applied to the bias.
-                activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
-                    applied to the network output.
-                W_constraint: instance of the [constraints](../constraints.md) module
-                    (eg. maxnorm, nonneg), applied to the main weights matrix.
-                b_constraint: instance of the [constraints](../constraints.md) module,
-                    applied to the bias.
-                bias: whether to include a bias
-                    (i.e. make the layer affine rather than linear).
-                input_dim: dimensionality of the input (integer). This argument
-                    (or alternatively, the keyword argument `input_shape`)
-                    is required when using this layer as the first layer in a model.
-            # Input shape
-                2D tensor with shape: `(nb_samples, input_dim)`.
-            # Output shape
-                2D tensor with shape: `(nb_samples, output_dim)`.
-            # References
-                - [Maxout Networks](http://arxiv.org/abs/1302.4389)
-            """
-
-            def __init__(self, output_dim,
-                        nb_feature=4,
-                        kernel_initializer='glorot_uniform',
-                        weights=None,
-                        W_regularizer=None,
-                        b_regularizer=None,
-                        activity_regularizer=None,
-                        W_constraint=None,
-                        b_constraint=None,
-                        bias=True,
-                        input_dim=None,
-                        **kwargs):
-                self.output_dim = output_dim
-                self.nb_feature = nb_feature
-                self.kernel_initializer = keras.initializers.get(kernel_initializer)
-
-                self.W_regularizer = keras.regularizers.get(W_regularizer)
-                self.b_regularizer = keras.regularizers.get(b_regularizer)
-                self.activity_regularizer = keras.regularizers.get(activity_regularizer)
-
-                self.W_constraint = keras.constraints.get(W_constraint)
-                self.b_constraint = keras.constraints.get(b_constraint)
-
-                self.bias = bias
-                self.initial_weights = weights
-                self.input_spec = keras.layers.InputSpec(ndim=2)
-
-                self.input_dim = input_dim
-                if self.input_dim:
-                    kwargs['input_shape'] = (self.input_dim,)
-                super(DenseMaxout, self).__init__(**kwargs)
-
-            def build(self, input_shape):
-                input_dim = input_shape[1]
-                self.input_spec = keras.layers.InputSpec(dtype=K.floatx(),
-                                            shape=(None, input_dim))
-
-                self.W = self.add_weight(shape=(self.nb_feature, input_dim, self.output_dim),
-                                        initializer=self.kernel_initializer,
-                                        name='W',
-                                        regularizer=self.W_regularizer,
-                                        constraint=self.W_constraint)
-                if self.bias:
-                    self.b = self.add_weight(shape=(self.nb_feature, self.output_dim,),
-                                            initializer='zero',
-                                            name='b',
-                                            regularizer=self.b_regularizer,
-                                            constraint=self.b_constraint)
-                else:
-                    self.b = None
-
-                if self.initial_weights is not None:
-                    self.set_weights(self.initial_weights)
-                    del self.initial_weights
-                self.built = True
-
-            def compute_output_shape(self, input_shape):
-                assert input_shape and len(input_shape) == 2
-                return (input_shape[0], self.output_dim)
-
-            def call(self, x):
-                # no activation, this layer is only linear.
-                output = K.dot(x, self.W)
-                if self.bias:
-                    output += self.b
-                output = K.max(output, axis=1)
-                return output
-
-            def get_config(self):
-                config = {'output_dim': self.output_dim,
-                        'kernel_initializer': initializers.serialize(self.kernel_initializer),
-                        'nb_feature': self.nb_feature,
-                        'W_regularizer': regularizers.serialize(self.W_regularizer),
-                        'b_regularizer': regularizers.serialize(self.b_regularizer),
-                        'activity_regularizer': regularizers.serialize(self.activity_regularizer),
-                        'W_constraint': constraints.serialize(self.W_constraint),
-                        'b_constraint': constraints.serialize(self.b_constraint),
-                        'bias': self.bias,
-                        'input_dim': self.input_dim}
-                base_config = super(DenseMaxout, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-        nnlib.DenseMaxout = DenseMaxout
-        
-        class GeLU(KL.Layer):
-            """Gaussian Error Linear Unit.
-            A smoother version of ReLU generally used
-            in the BERT or BERT architecture based models.
-            Original paper: https://arxiv.org/abs/1606.08415
-            Input shape:
-                Arbitrary. Use the keyword argument `input_shape`
-                (tuple of integers, does not include the samples axis)
-                when using this layer as the first layer in a model.
-            Output shape:
-                Same shape as the input.
-            """
-
-            def __init__(self, approximate=True, **kwargs):
-                super(GeLU, self).__init__(**kwargs)
-                self.approximate = approximate
-                self.supports_masking = True
-
-            def call(self, inputs):
-                cdf = 0.5 * (1.0 + K.tanh((np.sqrt(2 / np.pi) * (inputs + 0.044715 * K.pow(inputs, 3)))))
-                return inputs * cdf
-
-            def get_config(self):
-                config = {'approximate': self.approximate}
-                base_config = super(GeLU, self).get_config()
-                return dict(list(base_config.items()) + list(config.items()))
-
-            def compute_output_shape(self, input_shape):
-                return input_shape
-        nnlib.GeLU = GeLU
-
-        def CAInitializerMP( conv_weights_list ):
-            #Convolution Aware Initialization https://arxiv.org/abs/1702.06295
-            data = [ (i, K.int_shape(conv_weights)) for i, conv_weights in enumerate(conv_weights_list) ]
-            data = sorted(data, key=lambda data: np.prod(data[1]) )
-            result = CAInitializerMPSubprocessor (data, K.floatx(), K.image_data_format() ).run()
-            for idx, weights in result:
-                K.set_value ( conv_weights_list[idx], weights )
-        nnlib.CAInitializerMP = CAInitializerMP
-
-
-        if backend == "plaidML":
-            class TileOP_ReflectionPadding2D(nnlib.PMLTile.Operation):
-                def __init__(self, input, w_pad, h_pad):
-                    if K.image_data_format() == 'channels_last':
-                        if input.shape.ndims == 4:
-                            H, W = input.shape.dims[1:3]
-                            if (type(H) == int and h_pad >= H) or \
-                                (type(W) == int and w_pad >= W):
-                                raise ValueError("Paddings must be less than dimensions.")
-
-                            c = """ function (I[B, H, W, C] ) -> (O) {{
-                                    WE = W + {w_pad}*2;
-                                    HE = H + {h_pad}*2;
-                                """.format(h_pad=h_pad, w_pad=w_pad)
-                            if w_pad > 0:
-                                c += """
-                                    LEFT_PAD [b, h, w , c : B, H, WE, C ] = =(I[b, h, {w_pad}-w,            c]), w < {w_pad} ;
-                                    HCENTER  [b, h, w , c : B, H, WE, C ] = =(I[b, h, w-{w_pad},            c]), w < W+{w_pad}-1 ;
-                                    RIGHT_PAD[b, h, w , c : B, H, WE, C ] = =(I[b, h, 2*W - (w-{w_pad}) -2, c]);
-                                    LCR = LEFT_PAD+HCENTER+RIGHT_PAD;
-                                """.format(h_pad=h_pad, w_pad=w_pad)
-                            else:
-                                c += "LCR = I;"
-
-                            if h_pad > 0:
-                                c += """
-                                    TOP_PAD   [b, h, w , c : B, HE, WE, C ] = =(LCR[b, {h_pad}-h,            w, c]), h < {h_pad};
-                                    VCENTER   [b, h, w , c : B, HE, WE, C ] = =(LCR[b, h-{h_pad},            w, c]), h < H+{h_pad}-1 ;
-                                    BOTTOM_PAD[b, h, w , c : B, HE, WE, C ] = =(LCR[b, 2*H - (h-{h_pad}) -2, w, c]);
-                                    TVB = TOP_PAD+VCENTER+BOTTOM_PAD;
-                                """.format(h_pad=h_pad, w_pad=w_pad)
-                            else:
-                                c += "TVB = LCR;"
-
-                            c += "O = TVB; }"
-
-                            inp_dims = input.shape.dims
-                            out_dims = (inp_dims[0], inp_dims[1]+h_pad*2, inp_dims[2]+w_pad*2, inp_dims[3])
-                        else:
-                            raise NotImplemented
-                    else:
-                        raise NotImplemented
-
-                    super(TileOP_ReflectionPadding2D, self).__init__(c, [('I', input) ],
-                            [('O', nnlib.PMLTile.Shape(input.shape.dtype, out_dims ) )])
-
-        class ReflectionPadding2D(keras.layers.Layer):
-            def __init__(self, padding=(1, 1), **kwargs):
-                self.padding = tuple(padding)
-                self.input_spec = [keras.layers.InputSpec(ndim=4)]
-                super(ReflectionPadding2D, self).__init__(**kwargs)
-
-            def compute_output_shape(self, s):
-                """ If you are using "channels_last" configuration"""
-                return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3])
-
-            def call(self, x, mask=None):
-                w_pad,h_pad = self.padding
-                if "tensorflow" in backend:
-                    return K.tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT')
-                elif backend == "plaidML":
-                    return TileOP_ReflectionPadding2D.function(x, self.padding[0], self.padding[1])
-                else:
-                    if K.image_data_format() == 'channels_last':
-                        if x.shape.ndims == 4:
-                            w = K.concatenate ([ x[:,:,w_pad:0:-1,:],
-                                                x,
-                                                x[:,:,-2:-w_pad-2:-1,:] ], axis=2 )
-                            h = K.concatenate ([ w[:,h_pad:0:-1,:,:],
-                                                w,
-                                                w[:,-2:-h_pad-2:-1,:,:] ], axis=1 )
-                            return h
-                        else:
-                            raise NotImplemented
-                    else:
-                        raise NotImplemented
-
-        nnlib.ReflectionPadding2D = ReflectionPadding2D
-
-        class Conv2D():
-            def __init__ (self, *args, **kwargs):
-                self.reflect_pad = False
-                padding = kwargs.get('padding','')
-                if padding == 'zero':
-                    kwargs['padding'] = 'same'
-                if padding == 'reflect':
-                    kernel_size = kwargs['kernel_size']
-                    if (kernel_size % 2) == 1:
-                        self.pad = (kernel_size // 2,)*2
-                        kwargs['padding'] = 'valid'
-                        self.reflect_pad = True
-                self.func = keras.layers.Conv2D (*args, **kwargs)
-
-            def __call__(self,x):
-                if self.reflect_pad:
-                    x = ReflectionPadding2D( self.pad ) (x)
-                return self.func(x)
-        nnlib.Conv2D = Conv2D
-
-        class Conv2DTranspose():
-            def __init__ (self, *args, **kwargs):
-                self.reflect_pad = False
-                padding = kwargs.get('padding','')
-                if padding == 'zero':
-                    kwargs['padding'] = 'same'
-                if padding == 'reflect':
-                    kernel_size = kwargs['kernel_size']
-                    if (kernel_size % 2) == 1:
-                        self.pad = (kernel_size // 2,)*2
-                        kwargs['padding'] = 'valid'
-                        self.reflect_pad = True
-                self.func = keras.layers.Conv2DTranspose (*args, **kwargs)
-
-            def __call__(self,x):
-                if self.reflect_pad:
-                    x = ReflectionPadding2D( self.pad ) (x)
-                return self.func(x)
-        nnlib.Conv2DTranspose = Conv2DTranspose
-
-        class EqualConv2D(KL.Conv2D):
-            def __init__(self, filters,
-                        kernel_size,
-                        strides=(1, 1),
-                        padding='valid',
-                        data_format=None,
-                        dilation_rate=(1, 1),
-                        activation=None,
-                        use_bias=True,
-                        gain=np.sqrt(2),
-                        **kwargs):
-                super().__init__(
-                    filters=filters,
-                    kernel_size=kernel_size,
-                    strides=strides,
-                    padding=padding,
-                    data_format=data_format,
-                    dilation_rate=dilation_rate,
-                    activation=activation,
-                    use_bias=use_bias,
-                    kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=1.0),
-                    bias_initializer='zeros',
-                    kernel_regularizer=None,
-                    bias_regularizer=None,
-                    activity_regularizer=None,
-                    kernel_constraint=None,
-                    bias_constraint=None,
-                    **kwargs)
-                self.gain = gain
-
-            def build(self, input_shape):
-                super().build(input_shape)
-
-                self.wscale = self.gain / np.sqrt( np.prod( K.int_shape(self.kernel)[:-1]) )
-                self.wscale_t = K.constant (self.wscale, dtype=K.floatx() )
-
-            def call(self, inputs):
-                k = self.kernel * self.wscale_t
-
-                outputs = K.conv2d(
-                        inputs,
-                        k,
-                        strides=self.strides,
-                        padding=self.padding,
-                        data_format=self.data_format,
-                        dilation_rate=self.dilation_rate)
-
-                if self.use_bias:
-                    outputs = K.bias_add(
-                        outputs,
-                        self.bias,
-                        data_format=self.data_format)
-
-                if self.activation is not None:
-                    return self.activation(outputs)
-                return outputs
-        nnlib.EqualConv2D = EqualConv2D
-
-        class PixelNormalization(KL.Layer):
-            # initialize the layer
-            def __init__(self, **kwargs):
-                super(PixelNormalization, self).__init__(**kwargs)
-
-            # perform the operation
-            def call(self, inputs):
-                # calculate square pixel values
-                values = inputs**2.0
-                # calculate the mean pixel values
-                mean_values = K.mean(values, axis=-1, keepdims=True)
-                # ensure the mean is not zero
-                mean_values += 1.0e-8
-                # calculate the sqrt of the mean squared value (L2 norm)
-                l2 = K.sqrt(mean_values)
-                # normalize values by the l2 norm
-                normalized = inputs / l2
-                return normalized
-
-            # define the output shape of the layer
-            def compute_output_shape(self, input_shape):
-                return input_shape
-        nnlib.PixelNormalization = PixelNormalization
-
-    @staticmethod
-    def import_keras_contrib(device_config):
-        if nnlib.keras_contrib is not None:
-            return nnlib.code_import_keras_contrib
-
-        import keras_contrib as keras_contrib_
-        nnlib.keras_contrib = keras_contrib_
-        nnlib.__initialize_keras_contrib_functions()
-        nnlib.code_import_keras_contrib = compile (nnlib.code_import_keras_contrib_string,'','exec')
-
-    @staticmethod
-    def __initialize_keras_contrib_functions():
-        pass
-
-    @staticmethod
-    def import_dlib( device_config = None):
-        if nnlib.dlib is not None:
-            return nnlib.code_import_dlib
-
-        import dlib as dlib_
-        nnlib.dlib = dlib_
-        if not device_config.cpu_only and "tensorflow" in device_config.backend and len(device_config.gpu_idxs) > 0:
-            nnlib.dlib.cuda.set_device(device_config.gpu_idxs[0])
-
-        nnlib.code_import_dlib = compile (nnlib.code_import_dlib_string,'','exec')
-
-    @staticmethod
-    def import_all(device_config = None):
-        if nnlib.code_import_all is None:
-            if device_config is None:
-                device_config = nnlib.active_DeviceConfig
-            else:
-                nnlib.active_DeviceConfig = device_config
-
-            nnlib.import_keras(device_config)
-            nnlib.import_keras_contrib(device_config)
-            nnlib.code_import_all = compile (nnlib.code_import_keras_string + '\n'
-                                            + nnlib.code_import_keras_contrib_string
-                                            + nnlib.code_import_all_string,'','exec')
-            nnlib.__initialize_all_functions()
-
-        return nnlib.code_import_all
-
-    @staticmethod
-    def __initialize_all_functions():
-        exec (nnlib.import_keras(nnlib.active_DeviceConfig), locals(), globals())
-        exec (nnlib.import_keras_contrib(nnlib.active_DeviceConfig), locals(), globals())
-
-        class DSSIMMSEMaskLoss(object):
-            def __init__(self, mask, is_mse=False):
-                self.mask = mask
-                self.is_mse = is_mse
-            def __call__(self,y_true, y_pred):
-                total_loss = None
-                mask = self.mask
-                if self.is_mse:
-                    blur_mask = gaussian_blur(max(1, K.int_shape(mask)[1] // 64))(mask)
-                    return K.mean ( 50*K.square( y_true*blur_mask - y_pred*blur_mask ) )
-                else:
-                    return 10*dssim() (y_true*mask, y_pred*mask)
-        nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss
-
-
-        '''
-        def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False):
-            exec (nnlib.import_all(), locals(), globals())
-
-            if not use_batch_norm:
-                use_bias = True
-                def XNormalization(x):
-                    return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x)
-            else:
-                use_bias = False
-                def XNormalization(x):
-                    return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)
-
-            def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None):
-                return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint )
-
-            def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None):
-                return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint)
-
-            def func(input):
-
-
-                def ResnetBlock(dim):
-                    def func(input):
-                        x = input
-
-                        x = ReflectionPadding2D((1,1))(x)
-                        x = Conv2D(dim, 3, 1, padding='valid')(x)
-                        x = XNormalization(x)
-                        x = ReLU()(x)
-
-                        if use_dropout:
-                            x = Dropout(0.5)(x)
-
-                        x = ReflectionPadding2D((1,1))(x)
-                        x = Conv2D(dim, 3, 1, padding='valid')(x)
-                        x = XNormalization(x)
-                        x = ReLU()(x)
-                        return Add()([x,input])
-                    return func
-
-                x = input
-
-                x = ReflectionPadding2D((3,3))(x)
-                x = Conv2D(ngf, 7, 1, 'valid')(x)
-
-                x = ReLU()(XNormalization(Conv2D(ngf*2, 4, 2, 'same')(x)))
-                x = ReLU()(XNormalization(Conv2D(ngf*4, 4, 2, 'same')(x)))
-
-                for i in range(n_blocks):
-                    x = ResnetBlock(ngf*4)(x)
-
-                x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf*2 *4, 3, 1, 'same')(x))))
-                x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf   *4, 3, 1, 'same')(x))))
-
-                x = ReflectionPadding2D((3,3))(x)
-                x = Conv2D(output_nc, 7, 1, 'valid')(x)
-                x = tanh(x)
-
-                return x
-
-            return func
-
-        nnlib.ResNet = ResNet
-
-        # Defines the Unet generator.
-        # |num_downs|: number of downsamplings in UNet. For example,
-        # if |num_downs| == 7, image of size 128x128 will become of size 1x1
-        # at the bottleneck
-        def UNet(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False):
-            exec (nnlib.import_all(), locals(), globals())
-
-            if not use_batch_norm:
-                use_bias = True
-                def XNormalization(x):
-                    return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x)
-            else:
-                use_bias = False
-                def XNormalization(x):
-                    return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)
-
-            def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None):
-                return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint )
-
-            def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None):
-                return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint)
-
-            def UNetSkipConnection(outer_nc, inner_nc, sub_model=None, outermost=False, innermost=False, use_dropout=False):
-                def func(inp):
-                    x = inp
-
-                    x = Conv2D(inner_nc, 4, 2, 'valid')(ReflectionPadding2D( (1,1) )(x))
-                    x = XNormalization(x)
-                    x = ReLU()(x)
-
-                    if not innermost:
-                        x = sub_model(x)
-
-                    if not outermost:
-                        x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x)
-                        x = XNormalization(x)
-                        x = ReLU()(x)
-
-                        if not innermost:
-                            if use_dropout:
-                                x = Dropout(0.5)(x)
-
-                        x = Concatenate(axis=3)([inp, x])
-                    else:
-                        x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x)
-                        x = tanh(x)
-
-
-                    return x
-
-                return func
-
-            def func(input):
-
-                unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=None, innermost=True)
-
-                for i in range(num_downs - 5):
-                    unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=unet_block, use_dropout=use_dropout)
-
-                unet_block = UNetSkipConnection(ngf * 4  , ngf * 8, sub_model=unet_block)
-                unet_block = UNetSkipConnection(ngf * 2  , ngf * 4, sub_model=unet_block)
-                unet_block = UNetSkipConnection(ngf      , ngf * 2, sub_model=unet_block)
-                unet_block = UNetSkipConnection(output_nc, ngf    , sub_model=unet_block, outermost=True)
-
-                return unet_block(input)
-            return func
-        nnlib.UNet = UNet
-
-        #predicts based on two past_image_tensors
-        def UNetTemporalPredictor(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False):
-            exec (nnlib.import_all(), locals(), globals())
-            def func(inputs):
-                past_2_image_tensor, past_1_image_tensor = inputs
-
-                x = Concatenate(axis=3)([ past_2_image_tensor, past_1_image_tensor ])
-                x = UNet(3, use_batch_norm, num_downs=num_downs, ngf=ngf, use_dropout=use_dropout) (x)
-
-                return x
-
-            return func
-        nnlib.UNetTemporalPredictor = UNetTemporalPredictor
-
-        def NLayerDiscriminator(use_batch_norm, ndf=64, n_layers=3):
-            exec (nnlib.import_all(), locals(), globals())
-
-            if not use_batch_norm:
-                use_bias = True
-                def XNormalization(x):
-                    return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x)
-            else:
-                use_bias = False
-                def XNormalization(x):
-                    return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)
-
-            def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None):
-                return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint )
-
-            def func(input):
-                x = input
-
-                x = ZeroPadding2D((1,1))(x)
-                x = Conv2D( ndf, 4, 2, 'valid')(x)
-                x = LeakyReLU(0.2)(x)
-
-                for i in range(1, n_layers):
-                    x = ZeroPadding2D((1,1))(x)
-                    x = Conv2D( ndf * min(2 ** i, 8), 4, 2, 'valid')(x)
-                    x = XNormalization(x)
-                    x = LeakyReLU(0.2)(x)
-
-                x = ZeroPadding2D((1,1))(x)
-                x = Conv2D( ndf * min(2 ** n_layers, 8), 4, 1, 'valid')(x)
-                x = XNormalization(x)
-                x = LeakyReLU(0.2)(x)
-
-                x = ZeroPadding2D((1,1))(x)
-                return Conv2D( 1, 4, 1, 'valid')(x)
-            return func
-        nnlib.NLayerDiscriminator = NLayerDiscriminator
-        '''
-    @staticmethod
-    def finalize_all():
-        if nnlib.keras_contrib is not None:
-            nnlib.keras_contrib = None
-
-        if nnlib.keras is not None:
-            nnlib.keras.backend.clear_session()
-            nnlib.keras = None
-
-        if nnlib.tf is not None:
-            nnlib.tf_sess = None
-            nnlib.tf = None
-
-
-class CAInitializerMPSubprocessor(Subprocessor):
-    class Cli(Subprocessor.Cli):
-
-        #override
-        def on_initialize(self, client_dict):
-            self.floatx = client_dict['floatx']
-            self.data_format = client_dict['data_format']
-
-        #override
-        def process_data(self, data):
-            idx, shape = data
-            weights = CAGenerateWeights (shape, self.floatx, self.data_format)
-            return idx, weights
-
-        #override
-        def get_data_name (self, data):
-            #return string identificator of your data
-            return "undefined"
-
-    #override
-    def __init__(self, idx_shapes_list, floatx, data_format ):
-        self.idx_shapes_list = idx_shapes_list
-        self.floatx = floatx
-        self.data_format = data_format
-
-        self.result = []
-        super().__init__('CAInitializerMP', CAInitializerMPSubprocessor.Cli)
-
-    #override
-    def on_clients_initialized(self):
-        io.progress_bar ("Initializing CA weights", len (self.idx_shapes_list))
-
-    #override
-    def on_clients_finalized(self):
-        io.progress_bar_close()
-
-    #override
-    def process_info_generator(self):
-        for i in range(multiprocessing.cpu_count()):
-            yield 'CPU%d' % (i), {}, {'device_idx': i,
-                                      'device_name': 'CPU%d' % (i),
-                                      'floatx' : self.floatx,
-                                      'data_format' : self.data_format
-                                      }
-
-    #override
-    def get_data(self, host_dict):
-        if len (self.idx_shapes_list) > 0:
-            return self.idx_shapes_list.pop(0)
-
-        return None
-
-    #override
-    def on_data_return (self, host_dict, data):
-        self.idx_shapes_list.insert(0, data)
-
-    #override
-    def on_result (self, host_dict, data, result):
-        self.result.append ( result )
-        io.progress_bar_inc(1)
-
-    #override
-    def get_result(self):
-        return self.result
diff --git a/project.code-workspace b/project.code-workspace
new file mode 100644
index 0000000..07fae2f
--- /dev/null
+++ b/project.code-workspace
@@ -0,0 +1,50 @@
+{
+	"folders": [
+		{
+			"path": "."
+		}
+	],
+	"settings": {
+        "workbench.colorTheme": "Visual Studio Light",
+        "diffEditor.ignoreTrimWhitespace": true,
+        "workbench.sideBar.location": "right",
+        "breadcrumbs.enabled": false,
+        "editor.renderWhitespace": "none",
+        "editor.minimap.enabled": false,
+        "workbench.activityBar.visible": true,
+        "window.menuBarVisibility": "default",
+        "editor.fastScrollSensitivity": 10,
+        "editor.mouseWheelScrollSensitivity": 2,
+        "window.zoomLevel": 0,
+        "extensions.ignoreRecommendations": true,
+        
+        "python.linting.pylintEnabled": false,
+        "python.linting.enabled": false,
+        "python.linting.pylamaEnabled": false,
+        "python.linting.pydocstyleEnabled": false,
+        "python.pythonPath": "${env:PYTHON_EXECUTABLE}",
+		"workbench.editor.tabCloseButton": "off",
+		"workbench.editor.tabSizing": "shrink",
+		"workbench.editor.highlightModifiedTabs": true,
+		"editor.mouseWheelScrollSensitivity": 3,
+		"editor.folding": false,
+		"editor.glyphMargin": false,
+		"files.exclude": {
+			"**/__pycache__": true,
+			"**/.github": true,
+			"**/.vscode": true,
+			"**/*.dat": true,
+			"**/*.h5": true,
+            "**/*.npy": true
+		},
+		"editor.quickSuggestions": {
+			"other": false,
+			"comments": false,
+			"strings": false
+			},
+			"editor.trimAutoWhitespace": false,
+			"python.linting.pylintArgs": [
+				"--disable=import-error"
+			]
+	}
+}
\ No newline at end of file
diff --git a/requirements-colab.txt b/requirements-colab.txt
index ccdf38b..496aaf0 100644
--- a/requirements-colab.txt
+++ b/requirements-colab.txt
@@ -1,16 +1,9 @@
+tqdm
 numpy==1.17.0
 h5py==2.9.0
-Keras==2.2.4
 opencv-python==4.1.0.25
-tensorflow-gpu==1.13.1
-plaidml-keras==0.5.0
-scikit-image
-tqdm
 ffmpeg-python==0.1.17
-git+https://www.github.com/keras-team/keras-contrib.git
-
-#
-# install following packages directly via pip!
-#
-# pip install torch===1.3.1 -f https://download.pytorch.org/whl/torch_stable.html
-# pip install torchvision===0.4.0 -f https://download.pytorch.org/whl/torch_stable.html
\ No newline at end of file
+scikit-image==0.14.2
+scipy==1.4.1
+colorama
+tensorflow-gpu==1.13.1
\ No newline at end of file
diff --git a/requirements-cpu.txt b/requirements-cpu.txt
deleted file mode 100644
index d44148c..0000000
--- a/requirements-cpu.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-numpy==1.17.0
-h5py==2.9.0
-Keras==2.2.4
-opencv-python==4.1.0.25
-tensorflow==1.12.0
-scikit-image
-tqdm
-ffmpeg-python==0.1.17
-git+https://www.github.com/keras-team/keras-contrib.git
-
-#
-# install following packages directly via pip!
-#
-# pip install torch===1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
-# pip install torchvision===0.4.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
diff --git a/requirements-cuda.txt b/requirements-cuda.txt
index edfa576..b1d5f55 100644
--- a/requirements-cuda.txt
+++ b/requirements-cuda.txt
@@ -1,17 +1,9 @@
+tqdm
 numpy==1.17.0
 h5py==2.9.0
-Keras==2.2.4
 opencv-python==4.1.0.25
-tensorflow-gpu==1.12.0
-plaidml==0.6.0
-plaidml-keras==0.5.0
-scikit-image
-tqdm
 ffmpeg-python==0.1.17
-git+https://www.github.com/keras-team/keras-contrib.git
-
-#
-# install following packages directly via pip!
-#
-# pip install torch===1.3.1 -f https://download.pytorch.org/whl/torch_stable.html
-# pip install torchvision===0.4.0 -f https://download.pytorch.org/whl/torch_stable.html
\ No newline at end of file
+scikit-image==0.14.2
+scipy==1.4.1
+colorama
+tensorflow-gpu==1.12.0
\ No newline at end of file
diff --git a/requirements-opencl.txt b/requirements-opencl.txt
deleted file mode 100644
index 44b0b00..0000000
--- a/requirements-opencl.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-numpy==1.17.0
-h5py==2.9.0
-Keras==2.2.4
-opencv-python==4.1.0.25
-tensorflow==1.12.0
-plaidml==0.6.0
-plaidml-keras==0.5.0
-scikit-image
-tqdm
-ffmpeg-python==0.1.17
-git+https://www.github.com/keras-team/keras-contrib.git
-
-#
-# install following packages directly via pip!
-#
-# pip install torch===1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
-# pip install torchvision===0.4.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
\ No newline at end of file
diff --git a/samplelib/PackedFaceset.py b/samplelib/PackedFaceset.py
index 57895c3..c194776 100644
--- a/samplelib/PackedFaceset.py
+++ b/samplelib/PackedFaceset.py
@@ -4,9 +4,9 @@ import struct
 from pathlib import Path
 
 import samplelib.SampleHost
-from interact import interact as io
+from core.interact import interact as io
 from samplelib import Sample
-from utils import Path_utils
+from core import pathex
 
 packed_faceset_filename = 'faceset.pak'
 
@@ -19,20 +19,20 @@ class PackedFaceset():
 
         if samples_dat_path.exists():
             io.log_info(f"{samples_dat_path} : file already exists !")
-            io.input_bool("Press enter to continue and overwrite.", False)
+            io.input("Press enter to continue and overwrite.")
 
         as_person_faceset = False
-        dir_names = Path_utils.get_all_dir_names(samples_path)
+        dir_names = pathex.get_all_dir_names(samples_path)
         if len(dir_names) != 0:
-            as_person_faceset = io.input_bool(f"{len(dir_names)} subdirectories found, process as person faceset? (y/n) skip:y : ", True)
+            as_person_faceset = io.input_bool(f"{len(dir_names)} subdirectories found, process as person faceset?", True)
 
         if as_person_faceset:
             image_paths = []
 
             for dir_name in dir_names:
-                image_paths += Path_utils.get_image_paths(samples_path / dir_name)
+                image_paths += pathex.get_image_paths(samples_path / dir_name)
         else:
-            image_paths = Path_utils.get_image_paths(samples_path)
+            image_paths = pathex.get_image_paths(samples_path)
 
         samples = samplelib.SampleHost.load_face_samples(image_paths)
         samples_len = len(samples)
diff --git a/samplelib/Sample.py b/samplelib/Sample.py
index 8012a64..3430315 100644
--- a/samplelib/Sample.py
+++ b/samplelib/Sample.py
@@ -4,10 +4,10 @@ from pathlib import Path
 import cv2
 import numpy as np
 
-from utils.cv2_utils import *
+from core.cv2ex import *
 from DFLIMG import *
 from facelib import LandmarksProcessor
-from imagelib import IEPolys
+from core.imagelib import IEPolys
 
 class SampleType(IntEnum):
     IMAGE = 0 #raw image
diff --git a/samplelib/SampleGeneratorBase.py b/samplelib/SampleGeneratorBase.py
index cf98d8d..ef98974 100644
--- a/samplelib/SampleGeneratorBase.py
+++ b/samplelib/SampleGeneratorBase.py
@@ -15,20 +15,16 @@ class SampleGeneratorBase(object):
         self.batch_size = 1 if self.debug else batch_size
         self.last_generation = None
         self.active = True
-        
+
     def set_active(self, is_active):
         self.active = is_active
-        
+
     def generate_next(self):
         if not self.active and self.last_generation is not None:
             return self.last_generation
         self.last_generation = next(self)
         return self.last_generation
-        
-    #overridable
-    def get_total_sample_count(self):
-        return 0
-        
+
     #overridable
     def __iter__(self):
         #implement your own iterator
diff --git a/samplelib/SampleGeneratorFace.py b/samplelib/SampleGeneratorFace.py
index a76e006..195e45a 100644
--- a/samplelib/SampleGeneratorFace.py
+++ b/samplelib/SampleGeneratorFace.py
@@ -1,13 +1,16 @@
 import multiprocessing
-import traceback
 import pickle
+import time
+import traceback
+
 import cv2
 import numpy as np
-import time
+
+from core import mplib
+from core.joblib import SubprocessGenerator, ThisThreadGenerator
 from facelib import LandmarksProcessor
 from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
                        SampleType)
-from utils import iter_utils, mp_utils
 
 
 '''
@@ -34,37 +37,33 @@ class SampleGeneratorFace(SampleGeneratorBase):
         if self.debug:
             self.generators_count = 1
         else:
-            self.generators_count = np.clip(multiprocessing.cpu_count(), 2, generators_count)
-            
+            self.generators_count = max(1, generators_count)
+
         samples = SampleHost.load (SampleType.FACE, self.samples_path)
         self.samples_len = len(samples)
 
         if self.samples_len == 0:
             raise ValueError('No training data provided.')
 
-        index_host = mp_utils.IndexHost(self.samples_len)
+        index_host = mplib.IndexHost(self.samples_len)
 
         if random_ct_samples_path is not None:
             ct_samples = SampleHost.load (SampleType.FACE, random_ct_samples_path)
-            ct_index_host = mp_utils.IndexHost( len(ct_samples) )
+            ct_index_host = mplib.IndexHost( len(ct_samples) )
         else:
             ct_samples = None
             ct_index_host = None
 
         pickled_samples = pickle.dumps(samples, 4)
         ct_pickled_samples = pickle.dumps(ct_samples, 4) if ct_samples is not None else None
-        
+
         if self.debug:
-            self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None) )]
+            self.generators = [ThisThreadGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None) )]
         else:
-            self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None), start_now=True ) for i in range(self.generators_count) ]
+            self.generators = [SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None), start_now=True ) for i in range(self.generators_count) ]
 
         self.generator_counter = -1
 
-    #overridable
-    def get_total_sample_count(self):
-        return self.samples_len
-
     def __iter__(self):
         return self
 
@@ -75,8 +74,8 @@ class SampleGeneratorFace(SampleGeneratorBase):
 
     def batch_func(self, param ):
         pickled_samples, index_host, ct_pickled_samples, ct_index_host = param
-        
-        samples = pickle.loads(pickled_samples)      
+
+        samples = pickle.loads(pickled_samples)
         ct_samples = pickle.loads(ct_pickled_samples) if ct_pickled_samples is not None else None
 
         bs = self.batch_size
@@ -89,9 +88,9 @@ class SampleGeneratorFace(SampleGeneratorBase):
             t = time.time()
             for n_batch in range(bs):
                 sample_idx = indexes[n_batch]
-                sample = samples[sample_idx]  
-                              
-                ct_sample = None        
+                sample = samples[sample_idx]
+
+                ct_sample = None
                 if ct_samples is not None:
                     ct_sample = ct_samples[ct_indexes[n_batch]]
 
diff --git a/samplelib/SampleGeneratorFacePerson.py b/samplelib/SampleGeneratorFacePerson.py
index d254063..d691341 100644
--- a/samplelib/SampleGeneratorFacePerson.py
+++ b/samplelib/SampleGeneratorFacePerson.py
@@ -5,10 +5,11 @@ import traceback
 import cv2
 import numpy as np
 
+from core import mplib
+from core.joblib import SubprocessGenerator, ThisThreadGenerator
 from facelib import LandmarksProcessor
 from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
                        SampleType)
-from utils import iter_utils, mp_utils
 
 
 '''
@@ -19,12 +20,12 @@ output_sample_types = [
                       ]
 '''
 class SampleGeneratorFacePerson(SampleGeneratorBase):
-    def __init__ (self, samples_path, debug=False, batch_size=1, 
-                        sample_process_options=SampleProcessor.Options(), 
-                        output_sample_types=[], 
+    def __init__ (self, samples_path, debug=False, batch_size=1,
+                        sample_process_options=SampleProcessor.Options(),
+                        output_sample_types=[],
                         person_id_mode=1,
                         **kwargs):
-                        
+
         super().__init__(samples_path, debug, batch_size)
         self.sample_process_options = sample_process_options
         self.output_sample_types = output_sample_types
@@ -39,13 +40,13 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
         if self.samples_len == 0:
             raise ValueError('No training data provided.')
 
-        unique_person_names = { sample.person_name for sample in samples } 
-        persons_name_idxs = { person_name : [] for person_name in unique_person_names }        
-        for i,sample in enumerate(samples):            
-            persons_name_idxs[sample.person_name].append (i)  
+        unique_person_names = { sample.person_name for sample in samples }
+        persons_name_idxs = { person_name : [] for person_name in unique_person_names }
+        for i,sample in enumerate(samples):
+            persons_name_idxs[sample.person_name].append (i)
         indexes2D = [ persons_name_idxs[person_name] for person_name in unique_person_names ]
-        index2d_host = mp_utils.Index2DHost(indexes2D)
-        
+        index2d_host = mplib.Index2DHost(indexes2D)
+
         if self.debug:
             self.generators_count = 1
             self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (samples_host.create_cli(), index2d_host.create_cli(),) )]
@@ -54,11 +55,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
             self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (samples_host.create_cli(), index2d_host.create_cli(),), start_now=True ) for i in range(self.generators_count) ]
 
         self.generator_counter = -1
-    
-    #overridable
-    def get_total_sample_count(self):
-        return self.samples_len
-        
+
     def __iter__(self):
         return self
 
@@ -67,14 +64,14 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
         generator = self.generators[self.generator_counter % len(self.generators) ]
         return next(generator)
 
-    def batch_func(self, param ):        
+    def batch_func(self, param ):
         samples, index2d_host, = param
         bs = self.batch_size
 
         while True:
-            person_idxs = index2d_host.get_1D(bs)            
+            person_idxs = index2d_host.get_1D(bs)
             samples_idxs = index2d_host.get_2D(person_idxs, 1)
-            
+
             batches = None
             for n_batch in range(bs):
                 person_id = person_idxs[n_batch]
@@ -85,10 +82,10 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
                     x, = SampleProcessor.process ([sample], self.sample_process_options, self.output_sample_types, self.debug)
                 except:
                     raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) )
-  
+
                 if batches is None:
                     batches = [ [] for _ in range(len(x)) ]
-                    
+
                     batches += [ [] ]
                     i_person_id = len(batches)-1
 
@@ -96,9 +93,9 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
                     batches[i].append ( x[i] )
 
                 batches[i_person_id].append ( np.array([person_id]) )
-            
+
             yield [ np.array(batch) for batch in batches]
-    
+
     @staticmethod
     def get_person_id_max_count(samples_path):
         return SampleHost.get_person_id_max_count(samples_path)
@@ -110,43 +107,43 @@ if self.person_id_mode==1:
             shuffle_idxs = []
         elif self.person_id_mode==2:
             persons_count = len(samples)
-            
+
             person_idxs = []
             for j in range(persons_count):
                 for i in range(j+1,persons_count):
                     person_idxs += [ [i,j] ]
 
             shuffle_person_idxs = []
-            
+
             samples_idxs = [None]*persons_count
             shuffle_idxs = [None]*persons_count
-            
+
             for i in range(persons_count):
                 samples_idxs[i] = [*range(len(samples[i]))]
                 shuffle_idxs[i] = []
         elif self.person_id_mode==3:
             persons_count = len(samples)
-            
+
             person_idxs = [ *range(persons_count) ]
             shuffle_person_idxs = []
-            
+
             samples_idxs = [None]*persons_count
             shuffle_idxs = [None]*persons_count
-            
+
             for i in range(persons_count):
                 samples_idxs[i] = [*range(len(samples[i]))]
                 shuffle_idxs[i] = []
-                
-if self.person_id_mode==2: 
+
+if self.person_id_mode==2:
                 if len(shuffle_person_idxs) == 0:
                     shuffle_person_idxs = person_idxs.copy()
                     np.random.shuffle(shuffle_person_idxs)
                 person_ids = shuffle_person_idxs.pop()
- 
-                        
+
+
             batches = None
             for n_batch in range(self.batch_size):
-   
+
                 if self.person_id_mode==1:
                     if len(shuffle_idxs) == 0:
                         shuffle_idxs = samples_idxs.copy()
@@ -154,7 +151,7 @@ if self.person_id_mode==2:
 
                     idx = shuffle_idxs.pop()
                     sample = samples[ idx ]
-    
+
                     try:
                         x, = SampleProcessor.process ([sample], self.sample_process_options, self.output_sample_types, self.debug)
                     except:
@@ -165,7 +162,7 @@ if self.person_id_mode==2:
 
                     if batches is None:
                         batches = [ [] for _ in range(len(x)) ]
-                        
+
                         batches += [ [] ]
                         i_person_id = len(batches)-1
 
@@ -174,30 +171,30 @@ if self.person_id_mode==2:
 
                     batches[i_person_id].append ( np.array([sample.person_id]) )
 
-                    
+
                 elif self.person_id_mode==2:
                     person_id1, person_id2 = person_ids
-                    
+
                     if len(shuffle_idxs[person_id1]) == 0:
                         shuffle_idxs[person_id1] = samples_idxs[person_id1].copy()
                         np.random.shuffle(shuffle_idxs[person_id1])
 
                     idx = shuffle_idxs[person_id1].pop()
                     sample1 = samples[person_id1][idx]
-                    
+
                     if len(shuffle_idxs[person_id2]) == 0:
                         shuffle_idxs[person_id2] = samples_idxs[person_id2].copy()
                         np.random.shuffle(shuffle_idxs[person_id2])
 
                     idx = shuffle_idxs[person_id2].pop()
                     sample2 = samples[person_id2][idx]
-                
+
                     if sample1 is not None and sample2 is not None:
                         try:
                             x1, = SampleProcessor.process ([sample1], self.sample_process_options, self.output_sample_types, self.debug)
                         except:
                             raise Exception ("Exception occured in sample %s. Error: %s" % (sample1.filename, traceback.format_exc() ) )
-                        
+
                         try:
                             x2, = SampleProcessor.process ([sample2], self.sample_process_options, self.output_sample_types, self.debug)
                         except:
@@ -205,50 +202,50 @@ if self.person_id_mode==2:
 
                         x1_len = len(x1)
                         if batches is None:
-                            batches = [ [] for _ in range(x1_len) ]                            
+                            batches = [ [] for _ in range(x1_len) ]
                             batches += [ [] ]
                             i_person_id1 = len(batches)-1
-                            
-                            batches += [ [] for _ in range(len(x2)) ]                            
+
+                            batches += [ [] for _ in range(len(x2)) ]
                             batches += [ [] ]
                             i_person_id2 = len(batches)-1
 
                         for i in range(x1_len):
                             batches[i].append ( x1[i] )
-                            
+
                         for i in range(len(x2)):
                             batches[x1_len+1+i].append ( x2[i] )
 
                         batches[i_person_id1].append ( np.array([sample1.person_id]) )
 
                         batches[i_person_id2].append ( np.array([sample2.person_id]) )
-                        
-                elif self.person_id_mode==3:             
+
+                elif self.person_id_mode==3:
                     if len(shuffle_person_idxs) == 0:
                         shuffle_person_idxs = person_idxs.copy()
                         np.random.shuffle(shuffle_person_idxs)
                     person_id = shuffle_person_idxs.pop()
-                       
+
                     if len(shuffle_idxs[person_id]) == 0:
                         shuffle_idxs[person_id] = samples_idxs[person_id].copy()
                         np.random.shuffle(shuffle_idxs[person_id])
 
                     idx = shuffle_idxs[person_id].pop()
                     sample1 = samples[person_id][idx]
-                    
+
                     if len(shuffle_idxs[person_id]) == 0:
                         shuffle_idxs[person_id] = samples_idxs[person_id].copy()
                         np.random.shuffle(shuffle_idxs[person_id])
 
                     idx = shuffle_idxs[person_id].pop()
                     sample2 = samples[person_id][idx]
-                
+
                     if sample1 is not None and sample2 is not None:
                         try:
                             x1, = SampleProcessor.process ([sample1], self.sample_process_options, self.output_sample_types, self.debug)
                         except:
                             raise Exception ("Exception occured in sample %s. Error: %s" % (sample1.filename, traceback.format_exc() ) )
-                        
+
                         try:
                             x2, = SampleProcessor.process ([sample2], self.sample_process_options, self.output_sample_types, self.debug)
                         except:
@@ -256,21 +253,21 @@ if self.person_id_mode==2:
 
                         x1_len = len(x1)
                         if batches is None:
-                            batches = [ [] for _ in range(x1_len) ]                            
+                            batches = [ [] for _ in range(x1_len) ]
                             batches += [ [] ]
                             i_person_id1 = len(batches)-1
-                            
-                            batches += [ [] for _ in range(len(x2)) ]                            
+
+                            batches += [ [] for _ in range(len(x2)) ]
                             batches += [ [] ]
                             i_person_id2 = len(batches)-1
 
                         for i in range(x1_len):
                             batches[i].append ( x1[i] )
-                            
+
                         for i in range(len(x2)):
                             batches[x1_len+1+i].append ( x2[i] )
 
                         batches[i_person_id1].append ( np.array([sample1.person_id]) )
 
-                        batches[i_person_id2].append ( np.array([sample2.person_id]) )    
-"""
\ No newline at end of file
+                        batches[i_person_id2].append ( np.array([sample2.person_id]) )
+"""
diff --git a/samplelib/SampleGeneratorFaceTemporal.py b/samplelib/SampleGeneratorFaceTemporal.py
deleted file mode 100644
index d1a6500..0000000
--- a/samplelib/SampleGeneratorFaceTemporal.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import pickle
-import traceback
-
-import cv2
-import numpy as np
-
-from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
-                       SampleType)
-from utils import iter_utils
-
-
-'''
-output_sample_types = [
-                        [SampleProcessor.TypeFlags, size, (optional) {} opts ] ,
-                        ...
-                      ]
-'''
-class SampleGeneratorFaceTemporal(SampleGeneratorBase):
-    def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], generators_count=2, **kwargs):
-        super().__init__(samples_path, debug, batch_size)
-
-        self.temporal_image_count = temporal_image_count
-        self.sample_process_options = sample_process_options
-        self.output_sample_types = output_sample_types
-
-        if self.debug:
-            self.generators_count = 1
-        else:
-            self.generators_count = generators_count
-
-        samples = SampleHost.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path)
-        samples_len = len(samples)
-        if samples_len == 0:
-            raise ValueError('No training data provided.')
-        
-        pickled_samples = pickle.dumps(samples, 4)
-        if self.debug:
-            self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (0, pickled_samples) )]
-        else:
-            self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, pickled_samples) ) for i in range(self.generators_count) ]
-
-        self.generator_counter = -1
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        self.generator_counter += 1
-        generator = self.generators[self.generator_counter % len(self.generators) ]
-        return next(generator)
-
-    def batch_func(self, param):
-        generator_id, pickled_samples = param
-        samples = pickle.loads(pickled_samples)
-        samples_len = len(samples)
-        
-        mult_max = 1
-        l = samples_len - ( (self.temporal_image_count)*mult_max - (mult_max-1)  )
-
-        samples_idxs = [ *range(l+1) ]
-
-        if len(samples_idxs) - self.temporal_image_count < 0:
-            raise ValueError('Not enough samples to fit temporal line.')
-
-        shuffle_idxs = []
-
-        while True:
-            batches = None
-            for n_batch in range(self.batch_size):
-                if len(shuffle_idxs) == 0:
-                    shuffle_idxs = samples_idxs.copy()
-                    np.random.shuffle (shuffle_idxs)
-
-                idx = shuffle_idxs.pop()
-
-                temporal_samples = []
-                mult = np.random.randint(mult_max)+1
-                for i in range( self.temporal_image_count ):
-                    sample = samples[ idx+i*mult ]
-                    try:
-                        temporal_samples += SampleProcessor.process ([sample], self.sample_process_options, self.output_sample_types, self.debug)[0]
-                    except:
-                        raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) )
-
-                if batches is None:
-                    batches = [ [] for _ in range(len(temporal_samples)) ]
-
-                for i in range(len(temporal_samples)):
-                    batches[i].append ( temporal_samples[i] )
-
-            yield [ np.array(batch) for batch in batches]
diff --git a/samplelib/SampleGeneratorImageTemporal.py b/samplelib/SampleGeneratorImageTemporal.py
index 57b91b1..69b0440 100644
--- a/samplelib/SampleGeneratorImageTemporal.py
+++ b/samplelib/SampleGeneratorImageTemporal.py
@@ -1,10 +1,12 @@
 import traceback
-import numpy as np
+
 import cv2
+import numpy as np
 
-from utils import iter_utils
+from core.joblib import SubprocessGenerator, ThisThreadGenerator
+from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
+                       SampleType)
 
-from samplelib import SampleType, SampleProcessor, SampleHost, SampleGeneratorBase
 
 '''
 output_sample_types = [
diff --git a/samplelib/SampleHost.py b/samplelib/SampleHost.py
index 0a53b71..8429915 100644
--- a/samplelib/SampleHost.py
+++ b/samplelib/SampleHost.py
@@ -1,14 +1,15 @@
 import multiprocessing
 import operator
+import pickle
 import traceback
 from pathlib import Path
-import pickle
+
 import samplelib.PackedFaceset
+from core import pathex
+from core.interact import interact as io
+from core.joblib import Subprocessor
 from DFLIMG import *
 from facelib import FaceType, LandmarksProcessor
-from interact import interact as io
-from joblib import Subprocessor
-from utils import Path_utils, mp_utils
 
 from .Sample import Sample, SampleType
 
@@ -45,7 +46,7 @@ class SampleHost:
 
         if            sample_type == SampleType.IMAGE:
             if  samples[sample_type] is None:
-                samples[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( Path_utils.get_image_paths(samples_path), "Loading") ]
+                samples[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( pathex.get_image_paths(samples_path), "Loading") ]
         
         elif          sample_type == SampleType.FACE:
             if  samples[sample_type] is None:
@@ -58,7 +59,7 @@ class SampleHost:
                     io.log_info (f"Loaded {len(result)} packed faces from {samples_path}")
 
                 if result is None:
-                    result = SampleHost.load_face_samples( Path_utils.get_image_paths(samples_path) )
+                    result = SampleHost.load_face_samples( pathex.get_image_paths(samples_path) )
                 samples[sample_type] = result
                 
         elif          sample_type == SampleType.FACE_TEMPORAL_SORTED:
@@ -68,6 +69,31 @@ class SampleHost:
                 
         return samples[sample_type]
 
+    @staticmethod
+    def load_face_samples ( image_paths):
+        result = FaceSamplesLoaderSubprocessor(image_paths).run()
+        sample_list = []
+
+        for filename, \
+                ( face_type,
+                  shape,
+                  landmarks,
+                  ie_polys,
+                  eyebrows_expand_mod,
+                  source_filename,
+                ) in result:
+            sample_list.append( Sample(filename=filename,
+                                        sample_type=SampleType.FACE,
+                                        face_type=FaceType.fromString (face_type),
+                                        shape=shape,
+                                        landmarks=landmarks,
+                                        ie_polys=ie_polys,
+                                        eyebrows_expand_mod=eyebrows_expand_mod,
+                                        source_filename=source_filename,
+                                    ))
+        return sample_list
+        
+    """
     @staticmethod
     def load_face_samples ( image_paths):
         sample_list = []
@@ -87,10 +113,80 @@ class SampleHost:
                                            source_filename=dflimg.get_source_filename(),
                                     ))
         return sample_list
-
+    """
+    
     @staticmethod
     def upgradeToFaceTemporalSortedSamples( samples ):
         new_s = [ (s, s.source_filename) for s in samples]
         new_s = sorted(new_s, key=operator.itemgetter(1))
 
         return [ s[0] for s in new_s]
+        
+        
+class FaceSamplesLoaderSubprocessor(Subprocessor):
+    #override
+    def __init__(self, image_paths ):
+        self.image_paths = image_paths
+        self.image_paths_len = len(image_paths)
+        self.idxs = [*range(self.image_paths_len)]
+        self.result = [None]*self.image_paths_len
+        super().__init__('FaceSamplesLoader', FaceSamplesLoaderSubprocessor.Cli, 60)
+
+    #override
+    def on_clients_initialized(self):
+        io.progress_bar ("Loading samples", len (self.image_paths))
+
+    #override
+    def on_clients_finalized(self):
+        io.progress_bar_close()
+
+    #override
+    def process_info_generator(self):
+        for i in range(min(multiprocessing.cpu_count(), 8) ):
+            yield 'CPU%d' % (i), {}, {}
+
+    #override
+    def get_data(self, host_dict):
+        if len (self.idxs) > 0:
+            idx = self.idxs.pop(0)
+            return idx, self.image_paths[idx]
+
+        return None
+
+    #override
+    def on_data_return (self, host_dict, data):
+        self.idxs.insert(0, data[0])
+
+    #override
+    def on_result (self, host_dict, data, result):
+        idx, dflimg = result
+        self.result[idx] = (self.image_paths[idx], dflimg)
+        io.progress_bar_inc(1)
+
+    #override
+    def get_result(self):
+        return self.result
+
+    class Cli(Subprocessor.Cli):
+        #override
+        def process_data(self, data):
+            idx, filename = data
+            dflimg = DFLIMG.load (Path(filename))
+
+            if dflimg is None:
+                self.log_err (f"FaceSamplesLoader: {filename} is not a dfl image file.")
+                data = None
+            else:
+                data = (dflimg.get_face_type(),
+                        dflimg.get_shape(),
+                        dflimg.get_landmarks(),
+                        dflimg.get_ie_polys(),
+                        dflimg.get_eyebrows_expand_mod(),
+                        dflimg.get_source_filename() )
+
+            return idx, data
+
+        #override
+        def get_data_name (self, data):
+            #return string identificator of your data
+            return data[1]
diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py
index 726920f..e5f67ee 100644
--- a/samplelib/SampleProcessor.py
+++ b/samplelib/SampleProcessor.py
@@ -4,7 +4,7 @@ from enum import IntEnum
 import cv2
 import numpy as np
 
-import imagelib
+from core import imagelib
 from facelib import FaceType, LandmarksProcessor
 
 
@@ -154,9 +154,9 @@ class SampleProcessor(object):
                         yaw = -yaw
 
                     if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID:
-                        pitch = (pitch+1.0) / 2.0
-                        yaw = (yaw+1.0) / 2.0
-                        roll = (roll+1.0) / 2.0
+                        pitch = np.clip( (pitch / math.pi) / 2.0 + 1.0, 0, 1)
+                        yaw =  np.clip( (yaw / math.pi) / 2.0 + 1.0, 0, 1)
+                        roll =  np.clip( (roll / math.pi) / 2.0 + 1.0, 0, 1)
 
                     img = (pitch, yaw, roll)
                 else:
diff --git a/samplelib/__init__.py b/samplelib/__init__.py
index 67630c5..ecfbfec 100644
--- a/samplelib/__init__.py
+++ b/samplelib/__init__.py
@@ -5,6 +5,5 @@ from .SampleProcessor import SampleProcessor
 from .SampleGeneratorBase import SampleGeneratorBase
 from .SampleGeneratorFace import SampleGeneratorFace
 from .SampleGeneratorFacePerson import SampleGeneratorFacePerson
-from .SampleGeneratorFaceTemporal import SampleGeneratorFaceTemporal
 from .SampleGeneratorImageTemporal import SampleGeneratorImageTemporal
 from .PackedFaceset import PackedFaceset
\ No newline at end of file
diff --git a/utils/pickle_utils.py b/utils/pickle_utils.py
deleted file mode 100644
index 37c4c72..0000000
--- a/utils/pickle_utils.py
+++ /dev/null
@@ -1,9 +0,0 @@
-class AntiPickler():
-    def __init__(self, obj):
-        self.obj = obj
-
-    def __getstate__(self):
-        return dict()
-
-    def __setstate__(self, d):
-        self.__dict__.update(d)
\ No newline at end of file