Upgraded to TF version 1.13.2

Removed the wait at first launch for most graphics cards. Increased speed of training by 10-20%, but you have to retrain all models from scratch. SAEHD: added option 'use float16' Experimental option. Reduces the model size by half. Increases the speed of training. Decreases the accuracy of the model. The model may collapse or not train. Model may not learn the mask in large resolutions. true_face_training option is replaced by "True face power". 0.0000 .. 1.0 Experimental option. Discriminates the result face to be more like the src face. Higher value - stronger discrimination. Comparison - https://i.imgur.com/czScS9q.png
2025-08-19 21:13:20 -07:00 · 2020-01-25 21:58:19 +04:00 · 2020-01-25 21:58:19 +04:00 · 76ca79216e
commit 76ca79216e
parent a3dfcb91b9
49 changed files with 1320 additions and 1297 deletions
--- a/core/leras/nn.py
+++ b/core/leras/nn.py
@ -1,51 +1,67 @@
 """
-Leras. 
+Leras.

 like lighter keras.
 This is my lightweight neural network library written from scratch
 based on pure tensorflow without keras.

 Provides:
-+ full freedom of tensorflow operations without keras model's restrictions 
+ full freedom of tensorflow operations without keras model's restrictions
 + easy model operations like in PyTorch, but in graph mode (no eager execution)
 + convenient and understandable logic

 Reasons why we cannot import tensorflow or any tensorflow.sub modules right here:
 1) change env variables based on DeviceConfig before import tensorflow
 2) multiprocesses will import tensorflow every spawn
+
+NCHW speed up training for 10-20%.
 """

 import os
 import sys
 from pathlib import Path
+
+import numpy as np
+
 from core.interact import interact as io
+
 from .device import Devices

+
 class nn():
    current_DeviceConfig = None

    tf = None
    tf_sess = None
    tf_sess_config = None
-    
+    tf_default_device = None
+
+    data_format = None
+    conv2d_ch_axis = None
+    conv2d_spatial_axes = None
+
+    tf_floatx = None
+    np_floatx = None
+
    # Tensor ops
    tf_get_value = None
    tf_batch_set_value = None
    tf_gradients = None
    tf_average_gv_list = None
    tf_average_tensor_list = None
-    tf_dot = None
+    tf_concat = None
    tf_gelu = None
    tf_upsample2d = None
    tf_upsample2d_bilinear = None
    tf_flatten = None
+    tf_reshape_4D = None
    tf_random_binomial = None
    tf_gaussian_blur = None
    tf_style_loss = None
-    tf_channel_histogram = None
-    tf_histogram = None
    tf_dssim = None
-    
+    tf_space_to_depth = None
+    tf_depth_to_space = None
+
    # Layers
    Saveable = None
    LayerBase = None
@ -55,16 +71,17 @@ class nn():
    BlurPool = None
    Dense = None
    BatchNorm2D = None
-    
+
    # Initializers
    initializers = None
-    
+
    # Optimizers
    TFBaseOptimizer = None
    TFRMSpropOptimizer = None
-    
+
    @staticmethod
-    def initialize(device_config=None):
+    def initialize(device_config=None, floatx="float32", data_format="NHWC"):
+
        if nn.tf is None:
            if device_config is None:
                device_config = nn.getCurrentDeviceConfig()
@ -74,11 +91,8 @@ class nn():
            if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
                os.environ.pop('CUDA_VISIBLE_DEVICES')

-            os.environ['CUDA_CACHE_MAXSIZE'] = '536870912' #512Mb (32mb default)
-            
            first_run = False
-            
-            if not device_config.cpu_only:
+            if len(device_config.devices) != 0:
                if sys.platform[0:3] == 'win':
                    if all( [ x.name == device_config.devices[0].name for x in device_config.devices ] ):
                        devices_str = "_" + device_config.devices[0].name.replace(' ','_')
@ -86,27 +100,33 @@ class nn():
                        devices_str = ""
                        for device in device_config.devices:
                            devices_str += "_" + device.name.replace(' ','_')
-                        
+
                    compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str)
                    if not compute_cache_path.exists():
                        first_run = True
                    os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)

+            os.environ['CUDA_CACHE_MAXSIZE'] = '536870912' #512Mb (32mb default)
            os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
            os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # tf log errors only

            import warnings
            warnings.simplefilter(action='ignore', category=FutureWarning)
-  
+
            if first_run:
                io.log_info("Caching GPU kernels...")

-            import tensorflow as tf            
+            import tensorflow as tf
+            import logging
+            logging.getLogger('tensorflow').setLevel(logging.ERROR)
+
            nn.tf = tf
-            
-            if device_config.cpu_only:
+
+            if len(device_config.devices) == 0:
+                nn.tf_default_device = "/CPU:0"
                config = tf.ConfigProto(device_count={'GPU': 0})
-            else:                
+            else:
+                nn.tf_default_device = "/GPU:0"
                config = tf.ConfigProto()
                config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])

@ -114,26 +134,81 @@ class nn():
            config.gpu_options.allow_growth = True
            nn.tf_sess_config = config

-            nn.tf_floatx = nn.tf.float32 #nn.tf.float16 if device_config.use_fp16 else nn.tf.float32
-            nn.np_floatx = nn.tf_floatx.as_numpy_dtype
-    
            from .tensor_ops import initialize_tensor_ops
            from .layers import initialize_layers
            from .initializers import initialize_initializers
            from .optimizers import initialize_optimizers
-            
+
            initialize_tensor_ops(nn)
            initialize_layers(nn)
            initialize_initializers(nn)
            initialize_optimizers(nn)
-            
+
        if nn.tf_sess is None:
            nn.tf_sess = tf.Session(config=nn.tf_sess_config)
-            
+
+        if floatx == "float32":
+            floatx = nn.tf.float32
+        elif floatx == "float16":
+            floatx = nn.tf.float16
+        else:
+            raise ValueError(f"unsupported floatx {floatx}")
+        nn.set_floatx(floatx)
+        nn.set_data_format(data_format)
+
    @staticmethod
    def initialize_main_env():
        Devices.initialize_main_env()
-    
+
+    @staticmethod
+    def set_floatx(tf_dtype):
+        """
+        set default float type for all layers when dtype is None for them
+        """
+        nn.tf_floatx = tf_dtype
+        nn.np_floatx = tf_dtype.as_numpy_dtype
+
+    @staticmethod
+    def set_data_format(data_format):
+        if data_format != "NHWC" and data_format != "NCHW":
+            raise ValueError(f"unsupported data_format {data_format}")
+        nn.data_format = data_format
+
+        if data_format == "NHWC":
+            nn.conv2d_ch_axis = 3
+            nn.conv2d_spatial_axes = [1,2]
+        elif data_format == "NCHW":
+            nn.conv2d_ch_axis = 1
+            nn.conv2d_spatial_axes = [2,3]
+
+    @staticmethod
+    def get4Dshape ( w, h, c, data_format=None ):
+        """
+        returns 4D shape based on current data_format
+        """
+        if data_format is None:
+            data_format = nn.data_format
+
+        if data_format == "NHWC":
+            return (None,h,w,c)
+        else:
+            return (None,c,h,w)
+
+    @staticmethod
+    def to_data_format( x, to_data_format, from_data_format=None):
+        if from_data_format is None:
+            from_data_format = nn.data_format
+
+        if to_data_format == from_data_format:
+            return x
+
+        if to_data_format == "NHWC":
+            return np.transpose(x, (0,2,3,1) )
+        elif to_data_format == "NCHW":
+            return np.transpose(x, (0,3,1,2) )
+        else:
+            raise ValueError(f"unsupported to_data_format {to_data_format}")
+
    @staticmethod
    def getCurrentDeviceConfig():
        if nn.current_DeviceConfig is None:
@ -151,27 +226,34 @@ class nn():
                nn.tf.reset_default_graph()
                nn.tf_sess.close()
                nn.tf_sess = nn.tf.Session(config=nn.tf_sess_config)
-            
+
    @staticmethod
-    def tf_close_session():        
+    def tf_close_session():
        if nn.tf_sess is not None:
            nn.tf.reset_default_graph()
            nn.tf_sess.close()
            nn.tf_sess = None

-            
+    @staticmethod
+    def tf_get_current_device():
+        # Undocumented access to last tf.device(...)
+        objs = nn.tf.get_default_graph()._device_function_stack.peek_objs()
+        if len(objs) != 0:
+            return objs[0].display_name
+        return nn.tf_default_device
+
    @staticmethod
    def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False, return_device_config=False):
        devices = Devices.getDevices()
        if len(devices) == 0:
            return []
-        
+
        all_devices_indexes = [device.index for device in devices]
-        
+
        if choose_only_one:
            suggest_best_multi_gpu = False
            suggest_all_gpu = False
-   
+
        if suggest_all_gpu:
            best_device_indexes = all_devices_indexes
        elif suggest_best_multi_gpu:
@ -179,84 +261,84 @@ class nn():
        else:
            best_device_indexes = [ devices.get_best_device().index ]
        best_device_indexes = ",".join([str(x) for x in best_device_indexes])
-                        
+
        io.log_info ("")
        if choose_only_one:
            io.log_info ("Choose one GPU idx.")
        else:
            io.log_info ("Choose one or several GPU idxs (separated by comma).")
        io.log_info ("")
-        
+
        if allow_cpu:
            io.log_info ("[CPU] : CPU")
        for device in devices:
            io.log_info (f"  [{device.index}] : {device.name}")
-        
+
        io.log_info ("")
-        
+
        while True:
            try:
                if choose_only_one:
                    choosed_idxs = io.input_str("Which GPU index to choose?", best_device_indexes)
                else:
                    choosed_idxs = io.input_str("Which GPU indexes to choose?", best_device_indexes)
-                
+
                if allow_cpu and choosed_idxs.lower() == "cpu":
                    choosed_idxs = []
                    break
-                
+
                choosed_idxs = [ int(x) for x in choosed_idxs.split(',') ]
-                
+
                if choose_only_one:
                    if len(choosed_idxs) == 1:
-                        break                    
+                        break
                else:
                    if all( [idx in all_devices_indexes for idx in choosed_idxs] ):
                        break
            except:
                pass
        io.log_info ("")
-        
+
        if return_device_config:
            return nn.DeviceConfig.GPUIndexes(choosed_idxs)
-        else:        
+        else:
            return choosed_idxs

-    class DeviceConfig():    
+    class DeviceConfig():
        def __init__ (self, devices=None):
-            devices = devices or []       
-            
+            devices = devices or []
+
            if not isinstance(devices, Devices):
                devices = Devices(devices)
-                 
-            self.devices = devices                   
-            self.cpu_only = len(devices) == 0      
-            
+
+            self.devices = devices
+            self.cpu_only = len(devices) == 0
+
        @staticmethod
-        def BestGPU():            
+        def BestGPU():
            devices = Devices.getDevices()
            if len(devices) == 0:
                return nn.DeviceConfig.CPU()
-            
+
            return nn.DeviceConfig([devices.get_best_device()])
-            
+
        @staticmethod
-        def WorstGPU():     
+        def WorstGPU():
            devices = Devices.getDevices()
            if len(devices) == 0:
                return nn.DeviceConfig.CPU()
-                    
+
            return nn.DeviceConfig([devices.get_worst_device()])
-            
+
        @staticmethod
        def GPUIndexes(indexes):
            if len(indexes) != 0:
                devices = Devices.getDevices().get_devices_from_index_list(indexes)
            else:
                devices = []
-                
+
            return nn.DeviceConfig(devices)
-            
+
        @staticmethod
-        def CPU():            
+        def CPU():
            return nn.DeviceConfig([])