now you can train models on multiple GPU's on same workspace without cloning any folders.

Model files names will be prefixed with GPU index if GPU choosed explicitly on train/convert start. if you leave GPU idx choice default, then best GPU idx will be choosed and model file names will not contain index prefix. It gives you possibility to train same fake with various models or options on multiple GPUs. H64 and H128: now you can choose 'Lighter autoencoder'. It is same as vram gb <= 4 before this update. added archived_models.zip contains old experiments RecycleGAN: archived devicelib: if your system has no NVML installed (some old cards), then it will work with gpu_idx=0 as 'Generic GeForce GPU' with 2GB vram. refactorings
2025-07-06 13:02:15 -07:00 · 2019-01-14 10:48:23 +04:00 · 2019-01-14 10:48:23 +04:00 · 1f2b1481ef
commit 1f2b1481ef
parent e2f4677987
9 changed files with 180 additions and 479 deletions
--- a/main.py
+++ b/main.py
@ -65,16 +65,6 @@ if __name__ == "__main__":
    sort_parser.set_defaults (func=process_sort)
    def process_train(arguments):      
        if 'DFL_TARGET_EPOCH' in os.environ.keys():
            arguments.session_target_epoch = int ( os.environ['DFL_TARGET_EPOCH'] )
        if 'DFL_BATCH_SIZE' in os.environ.keys():
            arguments.batch_size = int ( os.environ['DFL_BATCH_SIZE'] )
        if 'DFL_WORST_GPU' in os.environ.keys():
            arguments.choose_worst_gpu = True
        from mainscripts import Trainer
        Trainer.main (
            training_data_src_dir=arguments.training_data_src_dir, 
@ -83,10 +73,8 @@ if __name__ == "__main__":
            model_name=arguments.model_name,
            debug              = arguments.debug,
            #**options
-            choose_worst_gpu   = arguments.choose_worst_gpu,
+            force_gpu_idx = arguments.force_gpu_idx,
-            force_best_gpu_idx = arguments.force_best_gpu_idx,
+            cpu_only      = arguments.cpu_only
            force_gpu_idxs     = arguments.force_gpu_idxs,
            cpu_only           = arguments.cpu_only
            )
    train_parser = subparsers.add_parser( "train", help="Trainer") 
@ -96,9 +84,7 @@ if __name__ == "__main__":
    train_parser.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model")
    train_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.")  
    train_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.")
-    train_parser.add_argument('--force-gpu-idxs', type=str, dest="force_gpu_idxs", default=None, help="Override final GPU idxs. Example: 0,1,2.")
+    train_parser.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.")
    train_parser.add_argument('--choose-worst-gpu', action="store_true", dest="choose_worst_gpu", default=False, help="Choose worst GPU instead of best. Environment variable to force True: DFL_WORST_GPU")
    train_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best(worst).")
    train_parser.set_defaults (func=process_train)
@ -111,7 +97,7 @@ if __name__ == "__main__":
            model_dir=arguments.model_dir, 
            model_name=arguments.model_name, 
            debug = arguments.debug,
-            force_best_gpu_idx = arguments.force_best_gpu_idx,
+            force_gpu_idx = arguments.force_gpu_idx,
            cpu_only = arguments.cpu_only
            )
@ -122,7 +108,7 @@ if __name__ == "__main__":
    convert_parser.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.")
    convert_parser.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model")
    convert_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug converter.")
-    convert_parser.add_argument('--force-best-gpu-idx', type=int, dest="force_best_gpu_idx", default=-1, help="Force to choose this GPU idx as best.")
+    convert_parser.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.")
    convert_parser.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.")
    convert_parser.set_defaults(func=process_convert)
--- a/mainscripts/Extractor.py
+++ b/mainscripts/Extractor.py
@ -68,7 +68,7 @@ class ExtractSubprocessor(SubprocessorBase):
            if not multi_gpu or len(devices) == 0:
                devices = [nnlib.device.getBestDeviceIdx()]
-            if len(devices) == 0 or devices[0] == -1:
+            if len(devices) == 0:
                devices = [0]
            devices = [ (idx, nnlib.device.getDeviceName(idx), nnlib.device.getDeviceVRAMTotalGb(idx) ) for idx in devices]
@ -263,7 +263,7 @@ class ExtractSubprocessor(SubprocessorBase):
        self.e = None
-        device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_best_gpu_idx=self.device_idx, allow_growth=True)
+        device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True)
        if self.type == 'rects':
            if self.detector is not None:
                if self.detector == 'mt':
--- a/models/ModelBase.py
+++ b/models/ModelBase.py
@ -18,7 +18,18 @@ You can implement your own model. Check examples.
 class ModelBase(object):
    #DONT OVERRIDE
-    def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, debug = False, force_best_gpu_idx=-1, **in_options):
+    def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, debug = False, force_gpu_idx=-1, **in_options):
        if force_gpu_idx == -1: 
            idxs_names_list = nnlib.device.getAllDevicesIdxsWithNamesList()
            if len(idxs_names_list) > 1:
                print ("You have multi GPUs in a system: ")
                for idx, name in idxs_names_list:
                    print ("[%d] : %s" % (idx, name) )
                force_gpu_idx = input_int("Which GPU idx to choose? ( skip: best GPU ) : ", -1, [ x[0] for x in idxs_names_list] )
        self.force_gpu_idx = force_gpu_idx
        print ("Loading model...")
        self.model_path = model_path
        self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') )
@ -35,7 +46,7 @@ class ModelBase(object):
        self.debug = debug
        self.is_training_mode = (training_data_src_path is not None and training_data_dst_path is not None)
-        self.supress_std_once = ('TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1')
+        self.supress_std_once = os.environ.get('TF_SUPPRESS_STD', '0') == '1'
        self.epoch = 0
        self.options = {}
@ -48,21 +59,12 @@ class ModelBase(object):
                self.options = model_data['options']
                self.loss_history = model_data['loss_history'] if 'loss_history' in model_data.keys() else []
                self.sample_for_preview = model_data['sample_for_preview']  if 'sample_for_preview' in model_data.keys() else None
-            
+
        ask_override = self.is_training_mode and self.epoch != 0 and input_in_time ("Press enter in 2 seconds to override some model settings.", 2)
        if self.epoch == 0: 
            print ("\nModel first run. Enter model options as default for each run.")
        if (self.epoch == 0 or ask_override) and (force_best_gpu_idx == -1): 
            idxs_names_list = nnlib.device.getAllDevicesIdxsWithNamesList()
            if len(idxs_names_list) > 1:
                print ("You have multi GPUs in a system: ")
                for idx, name in idxs_names_list:
                    print ("[%d] : %s" % (idx, name) )
                force_best_gpu_idx = input_int("Which GPU idx to choose? ( skip: system choice ) : ", -1)
        if self.epoch == 0 or ask_override: 
            default_write_preview_history = False if self.epoch == 0 else self.options.get('write_preview_history',False)
            self.options['write_preview_history'] = input_bool("Write preview history? (y/n ?:help skip:n/default) : ", default_write_preview_history, help_message="Preview history will be writed to <ModelName>_history folder.")
@ -119,13 +121,8 @@ class ModelBase(object):
        self.onInitializeOptions(self.epoch == 0, ask_override)
-        nnlib.import_all ( nnlib.DeviceConfig(allow_growth=False, force_best_gpu_idx=force_best_gpu_idx, **in_options) )
+        nnlib.import_all ( nnlib.DeviceConfig(allow_growth=False, force_gpu_idx=self.force_gpu_idx, **in_options) )
        self.device_config = nnlib.active_DeviceConfig
        if self.epoch == 0: 
            self.created_vram_gb = self.options['created_vram_gb'] = self.device_config.gpu_total_vram_gb
        else:            
            self.created_vram_gb = self.options['created_vram_gb'] = self.options.get('created_vram_gb',self.device_config.gpu_total_vram_gb)
        self.onInitialize(**in_options)
@ -136,7 +133,10 @@ class ModelBase(object):
        if self.is_training_mode:
            if self.write_preview_history:
-                self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) )
+                if self.force_gpu_idx == -1:
                    self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) )
                else:
                    self.preview_history_path = self.model_path / ( '%d_%s_history' % (self.force_gpu_idx, self.get_model_name()) )
                if not self.preview_history_path.exists():
                    self.preview_history_path.mkdir(exist_ok=True)
@ -174,7 +174,7 @@ class ModelBase(object):
            for idx in self.device_config.gpu_idxs:
                print ("== |== [%d : %s]" % (idx, nnlib.device.getDeviceName(idx)) )
-        if not self.device_config.cpu_only and self.device_config.gpu_total_vram_gb == 2:
+        if not self.device_config.cpu_only and self.device_config.gpu_vram_gb[0] == 2:
            print ("==")
            print ("== WARNING: You are using 2GB GPU. Result quality may be significantly decreased.")
            print ("== If training does not start, close all programs and try again.")
@ -268,7 +268,7 @@ class ModelBase(object):
        if self.supress_std_once:
            supressor.__exit__()
-        
+            
        model_data = {
            'epoch': self.epoch,
            'options': self.options,
@ -367,7 +367,10 @@ class ModelBase(object):
        return self.generator_list
    def get_strpath_storage_for_file(self, filename):
-        return str( self.model_path / (self.get_model_name() + '_' + filename) )
+        if self.force_gpu_idx == -1:
            return str( self.model_path / ( self.get_model_name() + '_' + filename) )
        else:
            return str( self.model_path / ( str(self.force_gpu_idx) + '_' + self.get_model_name() + '_' + filename) )
    def set_vram_batch_requirements (self, d):
        #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} 
@ -379,7 +382,7 @@ class ModelBase(object):
        else:
            if self.batch_size == 0:        
                for x in keys:
-                    if self.device_config.gpu_total_vram_gb <= x:
+                    if self.device_config.gpu_vram_gb[0] <= x:
                        self.batch_size = d[x]
                        break
--- a/models/Model_H128/Model.py
+++ b/models/Model_H128/Model.py
@ -11,12 +11,22 @@ class Model(ModelBase):
    decoder_srcH5 = 'decoder_src.h5'
    decoder_dstH5 = 'decoder_dst.h5'
    #override
    def onInitializeOptions(self, is_first_run, ask_override):        
        if is_first_run:
            self.options['lighter_ae'] = input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.")
        else:
            default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future
            if 'created_vram_gb' in self.options.keys():
                self.options.pop ('created_vram_gb')
            self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae)
    #override
    def onInitialize(self, **in_options):
        exec(nnlib.import_all(), locals(), globals())        
        self.set_vram_batch_requirements( {2.5:2,3:2,4:2,4:4,5:8,6:12,7:16,8:16,9:24,10:24,11:32,12:32,13:48} )
-        bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.created_vram_gb)
+        bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] )
        if not self.is_first_run():
            self.encoder.load_weights     (self.get_strpath_storage_for_file(self.encoderH5))
            self.decoder_src.load_weights (self.get_strpath_storage_for_file(self.decoder_srcH5))
@ -120,7 +130,7 @@ class Model(ModelBase):
                               base_blur_mask_modifier=100,
                               **in_options)
-    def Build(self, created_vram_gb):
+    def Build(self, lighter_ae):
        exec(nnlib.code_import_all, locals(), globals())
        bgr_shape = (128, 128, 3)
@ -139,7 +149,7 @@ class Model(ModelBase):
        def Encoder(input_shape):
            input_layer = Input(input_shape)
            x = input_layer
-            if created_vram_gb >= 5:
+            if not lighter_ae:
                x = downscale(128)(x)
                x = downscale(256)(x)
                x = downscale(512)(x)
@ -161,7 +171,7 @@ class Model(ModelBase):
            return Model(input_layer, x)
        def Decoder():
-            if created_vram_gb >= 5:
+            if not lighter_ae:
                input_ = Input(shape=(16, 16, 512))
                x = input_
                x = upscale(512)(x)
--- a/models/Model_H64/Model.py
+++ b/models/Model_H64/Model.py
@ -11,13 +11,25 @@ class Model(ModelBase):
    encoderH5 = 'encoder.h5'
    decoder_srcH5 = 'decoder_src.h5'
    decoder_dstH5 = 'decoder_dst.h5'
    #override
    def onInitializeOptions(self, is_first_run, ask_override):        
        if is_first_run:
            self.options['lighter_ae'] = input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.")
        else:
            default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future
            if 'created_vram_gb' in self.options.keys():
                self.options.pop ('created_vram_gb')
            self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae)
    #override
    def onInitialize(self, **in_options):
        exec(nnlib.import_all(), locals(), globals())
        self.set_vram_batch_requirements( {1.5:2,2:2,3:8,4:16,5:24,6:32,7:40,8:48} )
-        bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.created_vram_gb)
+        
        bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae'])
        if not self.is_first_run():
            self.encoder.load_weights     (self.get_strpath_storage_for_file(self.encoderH5))
            self.decoder_src.load_weights (self.get_strpath_storage_for_file(self.decoder_srcH5))
@ -27,12 +39,12 @@ class Model(ModelBase):
        input_src_mask = Input(mask_shape)
        input_dst_bgr = Input(bgr_shape)
        input_dst_mask = Input(mask_shape)
-
+        
        rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) )        
        rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) )
        self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] )
-            
+        
        self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999),
                        loss=[ DSSIMMaskLoss([input_src_mask]), 'mae', DSSIMMaskLoss([input_dst_mask]), 'mae' ] )
@ -122,7 +134,7 @@ class Model(ModelBase):
                               base_blur_mask_modifier=100,
                               **in_options)
-    def Build(self, created_vram_gb):
+    def Build(self, lighter_ae):
        exec(nnlib.code_import_all, locals(), globals())
        bgr_shape = (64, 64, 3)
@ -141,7 +153,7 @@ class Model(ModelBase):
        def Encoder(input_shape):
            input_layer = Input(input_shape)
            x = input_layer
-            if created_vram_gb >= 4:
+            if not lighter_ae:
                x = downscale(128)(x)
                x = downscale(256)(x)
                x = downscale(512)(x)
@ -162,7 +174,7 @@ class Model(ModelBase):
            return Model(input_layer, x)
        def Decoder():
-            if created_vram_gb >= 4:    
+            if not lighter_ae:
                input_ = Input(shape=(8, 8, 512))
                x = input_
--- a/models/Model_RecycleGAN/Model.py
+++ b/models/Model_RecycleGAN/Model.py
@ -1,250 +0,0 @@
 from models import ModelBase
 import numpy as np
 import cv2
 from mathlib import get_power_of_two
 from nnlib import nnlib
 from facelib import FaceType
 from samples import *
 class Model(ModelBase):
    GAH5 = 'GA.h5'
    PAH5 = 'PA.h5'
    DAH5 = 'DA.h5'
    GBH5 = 'GB.h5'
    DBH5 = 'DB.h5'
    PBH5 = 'PB.h5'
    #override
    def onInitialize(self, batch_size=-1, **in_options):
        exec(nnlib.code_import_all, locals(), globals())
        created_batch_size = self.get_batch_size()
        if self.epoch == 0: 
            #first run
            try:
                created_resolution = int ( input ("Resolution (default:64, valid: 64,128,256) : ") )
            except:
                created_resolution = 64
            if created_resolution not in [64,128,256]:
                created_resolution = 64
            try:
                created_batch_size = int ( input ("Batch_size (minimum/default - 10) : ") )
            except:
                created_batch_size = 10
            created_batch_size = max(created_batch_size,1)
            print ("Done. If training won't start, decrease resolution")
            self.options['created_resolution'] = created_resolution
            self.options['created_batch_size'] = created_batch_size
            self.created_vram_gb = self.device_config.gpu_total_vram_gb
        else: 
            #not first run
            if 'created_batch_size' in self.options.keys():
                created_batch_size = self.options['created_batch_size']
            else:
                raise Exception("Continue training, but created_batch_size not found.")
            if 'created_resolution' in self.options.keys():
                created_resolution = self.options['created_resolution']
            else:
                raise Exception("Continue training, but created_resolution not found.")
        resolution = created_resolution
        bgr_shape = (resolution, resolution, 3)
        ngf = 64
        npf = 64
        ndf = 64
        lambda_A = 10
        lambda_B = 10
        self.set_batch_size(created_batch_size)
        use_batch_norm = False #created_batch_size > 1
        self.GA = modelify(ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape))
        self.GB = modelify(ResNet (bgr_shape[2], use_batch_norm, n_blocks=6, ngf=ngf, use_dropout=True))(Input(bgr_shape))
        #self.GA = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape))
        #self.GB = modelify(UNet (bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=ngf, use_dropout=True))(Input(bgr_shape))
        self.PA = modelify(UNetTemporalPredictor(bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=npf, use_dropout=True))([Input(bgr_shape), Input(bgr_shape)])
        self.PB = modelify(UNetTemporalPredictor(bgr_shape[2], use_batch_norm, num_downs=get_power_of_two(resolution)-1, ngf=npf, use_dropout=True))([Input(bgr_shape), Input(bgr_shape)])
        self.DA = modelify(NLayerDiscriminator(use_batch_norm, ndf=ndf, n_layers=3) ) (Input(bgr_shape))
        self.DB = modelify(NLayerDiscriminator(use_batch_norm, ndf=ndf, n_layers=3) ) (Input(bgr_shape))
        if not self.is_first_run():
            self.GA.load_weights (self.get_strpath_storage_for_file(self.GAH5))
            self.DA.load_weights (self.get_strpath_storage_for_file(self.DAH5))
            self.PA.load_weights (self.get_strpath_storage_for_file(self.PAH5))
            self.GB.load_weights (self.get_strpath_storage_for_file(self.GBH5))
            self.DB.load_weights (self.get_strpath_storage_for_file(self.DBH5))
            self.PB.load_weights (self.get_strpath_storage_for_file(self.PBH5))
        real_A0 = Input(bgr_shape, name="real_A0")
        real_A1 = Input(bgr_shape, name="real_A1")
        real_A2 = Input(bgr_shape, name="real_A2")
        real_B0 = Input(bgr_shape, name="real_B0")
        real_B1 = Input(bgr_shape, name="real_B1")
        real_B2 = Input(bgr_shape, name="real_B2")
        DA_ones =  K.ones ( K.int_shape(self.DA.outputs[0])[1:] )
        DA_zeros = K.zeros ( K.int_shape(self.DA.outputs[0])[1:] )
        DB_ones = K.ones ( K.int_shape(self.DB.outputs[0])[1:] )
        DB_zeros = K.zeros ( K.int_shape(self.DB.outputs[0])[1:] )
        def CycleLoss (t1,t2):
            return K.mean(K.square(t1 - t2))
        def RecurrentLOSS(t1,t2):
            return K.mean(K.square(t1 - t2))
        def RecycleLOSS(t1,t2):
            return K.mean(K.square(t1 - t2))
        fake_B0 = self.GA(real_A0)
        fake_B1 = self.GA(real_A1)
        fake_A0 = self.GB(real_B0)      
        fake_A1 = self.GB(real_B1)
        #rec_FB0 = self.GA(fake_A0)
        #rec_FB1 = self.GA(fake_A1)
        #rec_FA0 = self.GB(fake_B0)
        #rec_FA1 = self.GB(fake_B1)
        pred_A2 = self.PA ( [real_A0, real_A1])
        pred_B2 = self.PB ( [real_B0, real_B1])
        rec_A2 = self.GB ( self.PB ( [fake_B0, fake_B1]) )
        rec_B2 = self.GA ( self.PA ( [fake_A0, fake_A1]))
        loss_G = K.mean(K.square(self.DB(fake_B0) - DB_ones)) + \
                 K.mean(K.square(self.DB(fake_B1) - DB_ones)) + \
                 K.mean(K.square(self.DA(fake_A0) - DA_ones)) + \
                 K.mean(K.square(self.DA(fake_A1) - DA_ones)) + \
                 lambda_A * ( #CycleLoss(rec_FA0, real_A0) + \
                              #CycleLoss(rec_FA1, real_A1) + \
                              RecurrentLOSS(pred_A2, real_A2) + \
                              RecycleLOSS(rec_A2, real_A2) ) + \
                 lambda_B * ( #CycleLoss(rec_FB0, real_B0) + \
                              #CycleLoss(rec_FB1, real_B1) + \
                              RecurrentLOSS(pred_B2, real_B2) + \
                              RecycleLOSS(rec_B2, real_B2) )
        weights_G = self.GA.trainable_weights + self.GB.trainable_weights + self.PA.trainable_weights + self.PB.trainable_weights
        self.G_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_G],
                                    Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates(loss_G, weights_G) )
        ###########
        loss_D_A0 = ( K.mean(K.square( self.DA(real_A0) - DA_ones)) + \
                      K.mean(K.square( self.DA(fake_A0) - DA_zeros)) ) * 0.5
        loss_D_A1 = ( K.mean(K.square( self.DA(real_A1) - DA_ones)) + \
                      K.mean(K.square( self.DA(fake_A1) - DA_zeros)) ) * 0.5
        loss_D_A = loss_D_A0 + loss_D_A1
        self.DA_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_A],
                                    Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates(loss_D_A, self.DA.trainable_weights) )
        ############
        loss_D_B0 = ( K.mean(K.square( self.DB(real_B0) - DB_ones)) + \
                      K.mean(K.square( self.DB(fake_B0) - DB_zeros)) ) * 0.5
        loss_D_B1 = ( K.mean(K.square( self.DB(real_B1) - DB_ones)) + \
                      K.mean(K.square( self.DB(fake_B1) - DB_zeros)) ) * 0.5
        loss_D_B = loss_D_B0 + loss_D_B1
        self.DB_train = K.function ([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[loss_D_B],
                                    Adam(lr=2e-4, beta_1=0.5, beta_2=0.999).get_updates(loss_D_B, self.DB.trainable_weights) )
        ############
        self.G_view = K.function([real_A0, real_A1, real_A2, real_B0, real_B1, real_B2],[fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 ])
        self.G_convert = K.function([real_B0],[fake_A0])
        if self.is_training_mode:
            f = SampleProcessor.TypeFlags
            self.set_training_data_generators ([            
                    SampleGeneratorImageTemporal(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, 
                        temporal_image_count=3,
                        sample_process_options=SampleProcessor.Options(random_flip = False, normalize_tanh = True), 
                        output_sample_types=[ [f.SOURCE | f.MODE_BGR, resolution] ] ),
                    SampleGeneratorImageTemporal(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, 
                        temporal_image_count=3,
                        sample_process_options=SampleProcessor.Options(random_flip = False, normalize_tanh = True), 
                        output_sample_types=[ [f.SOURCE | f.MODE_BGR, resolution] ] ),
                   ])
    #override
    def onSave(self):
        self.save_weights_safe( [[self.GA,    self.get_strpath_storage_for_file(self.GAH5)],
                                 [self.GB,    self.get_strpath_storage_for_file(self.GBH5)],
                                 [self.DA,    self.get_strpath_storage_for_file(self.DAH5)],
                                 [self.DB,    self.get_strpath_storage_for_file(self.DBH5)],
                                 [self.PA,    self.get_strpath_storage_for_file(self.PAH5)],
                                 [self.PB,    self.get_strpath_storage_for_file(self.PBH5)] ])
    #override
    def onTrainOneEpoch(self, sample):
        source_src_0, source_src_1, source_src_2, = sample[0]
        source_dst_0, source_dst_1, source_dst_2, = sample[1]        
        feed = [source_src_0, source_src_1, source_src_2, source_dst_0, source_dst_1, source_dst_2]
        loss_G,  = self.G_train ( feed )
        loss_DA, = self.DA_train( feed )
        loss_DB, = self.DB_train( feed )
        #return ( ('G', loss_G), )
        return ( ('G', loss_G), ('DA', loss_DA),  ('DB', loss_DB)  )
    #override
    def onGetPreview(self, sample):
        test_A0   = sample[0][0]
        test_A1   = sample[0][1]
        test_A2   = sample[0][2]
        test_B0   = sample[1][0]
        test_B1   = sample[1][1]
        test_B2   = sample[1][2]
        G_view_result = self.G_view([test_A0, test_A1, test_A2, test_B0, test_B1, test_B2])        
        fake_A0, fake_A1, pred_A2, rec_A2, fake_B0, fake_B1, pred_B2, rec_B2 = [ x[0] / 2 + 0.5 for x in G_view_result]        
        test_A0, test_A1, test_A2, test_B0, test_B1, test_B2 = [ x[0] / 2 + 0.5 for x in [test_A0, test_A1, test_A2, test_B0, test_B1, test_B2] ]
        r = np.concatenate ((np.concatenate ( (test_A0, test_A1, test_A2, pred_A2, fake_B0, fake_B1, rec_A2), axis=1),
                             np.concatenate ( (test_B0, test_B1, test_B2, pred_B2, fake_A0, fake_A1, rec_B2), axis=1)
                             ), axis=0)                            
        return [ ('RecycleGAN, A0-A1-A2-PA2-FB0-FB1-RA2, B0-B1-B2-PB2-FA0-FA1-RB2, ', r ) ]
    def predictor_func (self, face):
        x = self.G_convert ( [ np.expand_dims(face *2 - 1,0)]  )[0]
        return x[0] / 2 + 0.5
    #override
    def get_converter(self, **in_options):
        from models import ConverterImage                   
        return ConverterImage(self.predictor_func, 
                              predictor_input_size=self.options['created_resolution'], 
                              output_size=self.options['created_resolution'], 
                              **in_options)
--- a/models/Model_RecycleGAN/init.py
+++ b/models/Model_RecycleGAN/init.py
@ -1 +0,0 @@
 from .Model import Model
--- a/models/archived_models.zip
+++ b/models/archived_models.zip
--- a/nnlib/devicelib.py
+++ b/nnlib/devicelib.py
@ -1,20 +1,26 @@
 from .pynvml import *
 try:
    nvmlInit()
    hasNVML = True
 except:
    hasNVML = False
 class devicelib:
    class Config():    
-        force_best_gpu_idx = -1
+        force_gpu_idx = -1
        multi_gpu = False
        force_gpu_idxs = None
        choose_worst_gpu = False
        gpu_idxs = []
        gpu_names = []
        gpu_total_vram_gb = 0
        gpu_compute_caps = []
        gpu_vram_gb = []
        allow_growth = True
        use_fp16 = False
        cpu_only = False
-        def __init__ (self, force_best_gpu_idx = -1, 
+        def __init__ (self, force_gpu_idx = -1, 
                            multi_gpu = False, 
                            force_gpu_idxs = None, 
                            choose_worst_gpu = False,
@ -27,219 +33,154 @@ class devicelib:
            if cpu_only:
                self.cpu_only = True
            else:
-                self.force_best_gpu_idx = force_best_gpu_idx
+                self.force_gpu_idx = force_gpu_idx
                self.multi_gpu = multi_gpu
                self.force_gpu_idxs = force_gpu_idxs
                self.choose_worst_gpu = choose_worst_gpu        
                self.allow_growth = allow_growth
                self.gpu_idxs = []
-                
+
-                if not devicelib.hasNVML():
+                if force_gpu_idxs is not None:
-                    self.gpu_idxs = [0]
+                    for idx in force_gpu_idxs.split(','):
-                    self.gpu_total_vram_gb = 2
+                        idx = int(idx)
-                    self.gpu_names += ['Generic GeForce GPU']
+                        if devicelib.isValidDeviceIdx(idx):
-                    self.gpu_compute_caps += [ 50 ]
+                            self.gpu_idxs.append(idx)     
                else:
-                    if force_gpu_idxs is not None:
+                    gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and devicelib.isValidDeviceIdx(force_gpu_idx)) else devicelib.getBestDeviceIdx() if not choose_worst_gpu else devicelib.getWorstDeviceIdx()
-                        for idx in force_gpu_idxs.split(','):
+                    if gpu_idx != -1:
-                            idx = int(idx)
+                        if self.multi_gpu:
-                            if devicelib.isValidDeviceIdx(idx):
+                            self.gpu_idxs = devicelib.getDeviceIdxsEqualModel( gpu_idx )
-                                self.gpu_idxs.append(idx)     
+                            if len(self.gpu_idxs) <= 1:
-                    else:
+                                self.multi_gpu = False
-                        gpu_idx = force_best_gpu_idx if (force_best_gpu_idx >= 0 and devicelib.isValidDeviceIdx(force_best_gpu_idx)) else devicelib.getBestDeviceIdx() if not choose_worst_gpu else devicelib.getWorstDeviceIdx()
+                        else:
-                        if gpu_idx != -1:
+                            self.gpu_idxs = [gpu_idx]
-                            if self.multi_gpu:
+                            
-                                self.gpu_idxs = devicelib.getDeviceIdxsEqualModel( gpu_idx )
+                self.cpu_only = (len(self.gpu_idxs) == 0)
-                                if len(self.gpu_idxs) <= 1:
+ 
-                                    self.multi_gpu = False
+                if not self.cpu_only:
-                            else:
+                    self.gpu_names = []
-                                self.gpu_idxs = [gpu_idx]
+                    self.gpu_compute_caps = []
-                                
+                    for gpu_idx in self.gpu_idxs:
-                    self.cpu_only = (len(self.gpu_idxs) == 0)
+                        self.gpu_names += [devicelib.getDeviceName(gpu_idx)]
-     
+                        self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ]
-                    if not self.cpu_only:
+                        self.gpu_vram_gb += [ devicelib.getDeviceVRAMTotalGb ( gpu_idx ) ]
-                        self.gpu_total_vram_gb = devicelib.getDeviceVRAMTotalGb ( self.gpu_idxs[0] )
+                        
                        self.gpu_names = []
                        self.gpu_compute_caps = []
                        for gpu_idx in self.gpu_idxs:
                            self.gpu_names += [devicelib.getDeviceName(gpu_idx)]
                            self.gpu_compute_caps += [ devicelib.getDeviceComputeCapability ( gpu_idx ) ]
    @staticmethod
    def hasNVML():
        try:
            nvmlInit()
            nvmlShutdown()
        except:
            return False
        return True    
    @staticmethod
    def getDevicesWithAtLeastFreeMemory(freememsize):
        result = []
        try:
            nvmlInit()
            for i in range(0, nvmlDeviceGetCount() ):
                handle = nvmlDeviceGetHandleByIndex(i)
                memInfo = nvmlDeviceGetMemoryInfo( handle )
                if (memInfo.total - memInfo.used) >= freememsize:
                    result.append (i)            
            nvmlShutdown()
        except:
            pass
        return result
    @staticmethod
    def getDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb):
        if not hasNVML and totalmemsize_gb <= 2:
            return [0]
        result = []
-        try:
+        for i in range(nvmlDeviceGetCount()):
-            nvmlInit()
+            handle = nvmlDeviceGetHandleByIndex(i)
-            for i in range(0, nvmlDeviceGetCount() ):
+            memInfo = nvmlDeviceGetMemoryInfo( handle )
-                handle = nvmlDeviceGetHandleByIndex(i)
+            if (memInfo.total) >= totalmemsize_gb*1024*1024*1024:
-                memInfo = nvmlDeviceGetMemoryInfo( handle )
+                result.append (i)
                if (memInfo.total) >= totalmemsize_gb*1024*1024*1024:
                    result.append (i)            
            nvmlShutdown()
        except:
            pass
        return result
    @staticmethod
-    def getAllDevicesIdxsList ():
+    def getAllDevicesIdxsList():
-        result = []
+        if not hasNVML:
-        try:
+            return [0]
-            nvmlInit()    
+            
-            result = [ i for i in range(0, nvmlDeviceGetCount() ) ]    
+        return [ i for i in range(0, nvmlDeviceGetCount() ) ]
            nvmlShutdown()
        except:
            pass
        return result
    @staticmethod
-    def getAllDevicesIdxsWithNamesList ():
+    def getAllDevicesIdxsWithNamesList():
-        result = []
+        if not hasNVML:
-        try:
+            return [ (0, devicelib.getDeviceName(0) ) ]
-            nvmlInit()    
+  
-            result = [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in range(0, nvmlDeviceGetCount() ) ]    
+        return [ (i, nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() ) for i in range(nvmlDeviceGetCount() ) ]
            nvmlShutdown()
        except:
            pass
        return result
    @staticmethod
    def getDeviceVRAMFree (idx):
-        result = 0
+        if not hasNVML:
-        try:
+            return 2
-            nvmlInit()
+
-            if idx < nvmlDeviceGetCount():    
+        if idx < nvmlDeviceGetCount():    
-                handle = nvmlDeviceGetHandleByIndex(idx)
+            memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(idx) )
-                memInfo = nvmlDeviceGetMemoryInfo( handle )
+            return memInfo.total - memInfo.used
-                result = (memInfo.total - memInfo.used)        
+
-            nvmlShutdown()
+        return 0
        except:
            pass
        return result
    @staticmethod
    def getDeviceVRAMTotalGb (idx):
-        result = 2
+        if not hasNVML:
-        try:
+            return 2
-            nvmlInit()
+            
-            if idx < nvmlDeviceGetCount():    
+        if idx < nvmlDeviceGetCount():    
-                handle = nvmlDeviceGetHandleByIndex(idx)
+            memInfo = nvmlDeviceGetMemoryInfo(  nvmlDeviceGetHandleByIndex(idx) )
-                memInfo = nvmlDeviceGetMemoryInfo( handle )
+            return round ( memInfo.total / (1024*1024*1024) )
-                result = memInfo.total / (1024*1024*1024)
+
-            nvmlShutdown()
+        return 0
            result = round(result)
        except:
            pass
        return result
    @staticmethod
    def getBestDeviceIdx():
-        idx = -1
+        if not hasNVML:
-        try:
+            return 0
-            nvmlInit()
+
-            idx_mem = 0
+        idx = -1
-            for i in range(0, nvmlDeviceGetCount() ):
+        idx_mem = 0
-                handle = nvmlDeviceGetHandleByIndex(i)
+        for i in range( nvmlDeviceGetCount() ):
-                memInfo = nvmlDeviceGetMemoryInfo( handle )
+            memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
-                if memInfo.total > idx_mem:
+            if memInfo.total > idx_mem:
-                    idx = i
+                idx = i
-                    idx_mem = memInfo.total
+                idx_mem = memInfo.total
            nvmlShutdown()
        except:
            pass
        return idx
    @staticmethod
    def getWorstDeviceIdx():
-        idx = -1
+        if not hasNVML:
-        try:
+            return 0
-            nvmlInit()    
+
-            
+        idx = -1
-            idx_mem = sys.maxsize
+        idx_mem = sys.maxsize
-            for i in range(0, nvmlDeviceGetCount() ):
+        for i in range( nvmlDeviceGetCount() ):
-                handle = nvmlDeviceGetHandleByIndex(i)
+            memInfo = nvmlDeviceGetMemoryInfo( nvmlDeviceGetHandleByIndex(i) )
-                memInfo = nvmlDeviceGetMemoryInfo( handle )
+            if memInfo.total < idx_mem:
-                if memInfo.total < idx_mem:
+                idx = i
-                    idx = i
+                idx_mem = memInfo.total
                    idx_mem = memInfo.total
            nvmlShutdown()
        except:
            pass
        return idx
    @staticmethod
    def isValidDeviceIdx(idx):
-        result = False
+        if not hasNVML:
-        try:
+            return (idx == 0)
-            nvmlInit()    
+   
-            result = (idx < nvmlDeviceGetCount())
+        return (idx < nvmlDeviceGetCount())
            nvmlShutdown()
        except:
            pass
        return result
    @staticmethod
    def getDeviceIdxsEqualModel(idx):
-        result = []
+        if not hasNVML:
-        try:
+            return [0] if idx == 0 else []            
-            nvmlInit()    
+        
-            idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
+        result = []  
        idx_name = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
        for i in range( nvmlDeviceGetCount() ):
            if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name:
                result.append (i)
            for i in range(0, nvmlDeviceGetCount() ):
                if nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(i)).decode() == idx_name:
                    result.append (i)
            nvmlShutdown()
        except:
            pass
        return result
    @staticmethod
    def getDeviceName (idx):
-        result = 'Generic GeForce GPU'
+        if not hasNVML:
-        try:
+            return 'Generic GeForce GPU'
-            nvmlInit()    
+            
-            if idx < nvmlDeviceGetCount():    
+        if idx < nvmlDeviceGetCount():    
-                result = nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
+            return nvmlDeviceGetName(nvmlDeviceGetHandleByIndex(idx)).decode()
-            nvmlShutdown()
+
-        except:
+        return None
            pass
        return result
    @staticmethod
    def getDeviceComputeCapability(idx):
-        result = 0
+        if not hasNVML:
-        try:
+            return 99 if idx == 0 else 0
-            nvmlInit()    
+            
-            if idx < nvmlDeviceGetCount():    
+        result = 0  
-                result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
+        if idx < nvmlDeviceGetCount():    
-            nvmlShutdown()
+            result = nvmlDeviceGetCudaComputeCapability(nvmlDeviceGetHandleByIndex(idx))
-        except:
+        return result[0] * 10 + result[1]
            pass
        return result[0] * 10 + result[1]