Merge branch 'master' into fix/web-ui-previews-png

2025-08-21 05:53:24 -07:00 · 2021-03-25 09:42:56 -07:00 · 2021-03-25 09:42:56 -07:00 · cf722cef43
commit cf722cef43
parent 08e736ba9a 9868577422
12 changed files with 363 additions and 63 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -6,9 +6,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ### In Progress
 - [MS-SSIM loss training option](https://github.com/faceshiftlabs/DeepFaceLab/tree/feature/ms-ssim-loss-2)
 - [Freezeable layers (encoder/decoder/etc.)](https://github.com/faceshiftlabs/DeepFaceLab/tree/feature/freezable-weights)
- [GAN stability improvements](https://github.com/faceshiftlabs/DeepFaceLab/tree/feature/gan-updates)
+
 ## [1.4.0] - 2020-03-24
 ### Added
 - [MS-SSIM loss training option](doc/features/ms-ssim)
 - GAN version option (v2 - late 2020 or v3 - current GAN)
 - [GAN label smoothing and label noise options](doc/features/gan-options)
 ### Fixed
 - Background Power now uses the entire image, not just the area outside of the mask for comparison.
 This should help with rough areas directly next to the mask
 ## [1.3.0] - 2020-03-20
 ### Added
@ -53,7 +60,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Reset stale master branch to [seranus/DeepFaceLab](https://github.com/seranus/DeepFaceLab), 
  21 commits ahead of [iperov/DeepFaceLab](https://github.com/iperov/DeepFaceLab) ([compare](https://github.com/iperov/DeepFaceLab/compare/4818183...seranus:3f5ae05))
-[Unreleased]: https://github.com/olivierlacan/keep-a-changelog/compare/v1.3.0...HEAD
+[Unreleased]: https://github.com/olivierlacan/keep-a-changelog/compare/v1.4.0...HEAD
 [1.4.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.3.0...v1.4.0
 [1.3.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.2.1...v1.3.0
 [1.2.1]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.2.0...v1.2.1
 [1.2.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.1.5...v1.2.0
--- a/README.md
+++ b/README.md
@ -3,6 +3,8 @@
 # CHANGELOG 
 ### [View most recent changes](CHANGELOG.md)
 ![](doc/dfl_cover.png)
 <table align="center" border="0">
 <tr><td colspan=2 align="center">
--- a/core/leras/layers/MsSsim.py
+++ b/core/leras/layers/MsSsim.py
@ -0,0 +1,30 @@
 from core.leras import nn
 tf = nn.tf
 class MsSsim(nn.LayerBase):
    default_power_factors = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
    def __init__(self, resolution, kernel_size=11, **kwargs):
        # restrict mssim factors to those greater/equal to kernel size
        power_factors = [p for i, p in enumerate(self.default_power_factors) if resolution//(2**i) >= kernel_size]
        # normalize power factors if reduced because of size
        if sum(power_factors) < 1.0:
            power_factors = [x/sum(power_factors) for x in power_factors]
        self.power_factors = power_factors
        self.kernel_size = kernel_size
        super().__init__(**kwargs)
    def __call__(self, y_true, y_pred, max_val):
        # Transpose images from NCHW to NHWC
        y_true_t = tf.transpose(tf.cast(y_true, tf.float32), [0, 2, 3, 1])
        y_pred_t = tf.transpose(tf.cast(y_pred, tf.float32), [0, 2, 3, 1])
        ms_ssim_val = tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors, filter_size=self.kernel_size)
        # ssim_multiscale returns values in range [0, 1] (where 1 is completely identical)
        # subtract from 1 to get loss
        return 1.0 - ms_ssim_val
 nn.MsSsim = MsSsim
--- a/core/leras/layers/init.py
+++ b/core/leras/layers/init.py
@ -14,3 +14,4 @@ from .TLU import *
 from .ScaleAdd import *
 from .DenseNorm import *
 from .AdaIN import *
 from .MsSsim import *
--- a/core/leras/models/PatchDiscriminator.py
+++ b/core/leras/models/PatchDiscriminator.py
@ -195,3 +195,117 @@ class UNetPatchDiscriminator(nn.ModelBase):
        return center_out, self.out_conv(x)
 nn.UNetPatchDiscriminator = UNetPatchDiscriminator
 class UNetPatchDiscriminatorV2(nn.ModelBase):
    """
    Inspired by https://arxiv.org/abs/2002.12655 "A U-Net Based Discriminator for Generative Adversarial Networks"
    """
    def calc_receptive_field_size(self, layers):
        """
        result the same as https://fomoro.com/research/article/receptive-field-calculatorindex.html
        """
        rf = 0
        ts = 1
        for i, (k, s) in enumerate(layers):
            if i == 0:
                rf = k
            else:
                rf += (k-1)*ts
            ts *= s
        return rf
    def find_archi(self, target_patch_size, max_layers=6):
        """
        Find the best configuration of layers using only 3x3 convs for target patch size
        """
        s = {}
        for layers_count in range(1,max_layers+1):
            val = 1 << (layers_count-1)
            while True:
                val -= 1
                layers = []
                sum_st = 0
                for i in range(layers_count-1):
                    st = 1 + (1 if val & (1 << i) !=0 else 0 )
                    layers.append ( [3, st ])
                    sum_st += st
                layers.append ( [3, 2])
                sum_st += 2
                rf = self.calc_receptive_field_size(layers)
                s_rf = s.get(rf, None)
                if s_rf is None:
                    s[rf] = (layers_count, sum_st, layers)
                else:
                    if layers_count < s_rf[0] or \
                            ( layers_count == s_rf[0] and sum_st > s_rf[1] ):
                        s[rf] = (layers_count, sum_st, layers)
                if val == 0:
                    break
        x = sorted(list(s.keys()))
        q=x[np.abs(np.array(x)-target_patch_size).argmin()]
        return s[q][2]
    def on_build(self, patch_size, in_ch):
        class ResidualBlock(nn.ModelBase):
            def on_build(self, ch, kernel_size=3 ):
                self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
                self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
            def forward(self, inp):
                x = self.conv1(inp)
                x = tf.nn.leaky_relu(x, 0.2)
                x = self.conv2(x)
                x = tf.nn.leaky_relu(inp + x, 0.2)
                return x
        prev_ch = in_ch
        self.convs = []
        self.res = []
        self.upconvs = []
        self.upres = []
        layers = self.find_archi(patch_size)
        base_ch = 16
        level_chs = { i-1:v for i,v in enumerate([ min( base_ch * (2**i), 512 ) for i in range(len(layers)+1)]) }
        self.in_conv = nn.Conv2D( in_ch, level_chs[-1], kernel_size=1, padding='VALID')
        for i, (kernel_size, strides) in enumerate(layers):
            self.convs.append ( nn.Conv2D( level_chs[i-1], level_chs[i], kernel_size=kernel_size, strides=strides, padding='SAME') )
            self.res.append ( ResidualBlock(level_chs[i]) )
            self.upconvs.insert (0, nn.Conv2DTranspose( level_chs[i]*(2 if i != len(layers)-1 else 1), level_chs[i-1], kernel_size=kernel_size, strides=strides, padding='SAME') )
            self.upres.insert (0, ResidualBlock(level_chs[i-1]*2) )
        self.out_conv = nn.Conv2D( level_chs[-1]*2, 1, kernel_size=1, padding='VALID')
        self.center_out  =  nn.Conv2D( level_chs[len(layers)-1], 1, kernel_size=1, padding='VALID')
        self.center_conv =  nn.Conv2D( level_chs[len(layers)-1], level_chs[len(layers)-1], kernel_size=1, padding='VALID')
    def forward(self, x):
        x = tf.nn.leaky_relu( self.in_conv(x), 0.1 )
        encs = []
        for conv, res in zip(self.convs, self.res):
            encs.insert(0, x)
            x = tf.nn.leaky_relu( conv(x), 0.1 )
            x = res(x)
        center_out, x = self.center_out(x), self.center_conv(x)
        for i, (upconv, enc, upres) in enumerate(zip(self.upconvs, encs, self.upres)):
            x = tf.nn.leaky_relu( upconv(x), 0.1 )
            x = tf.concat( [enc, x], axis=nn.conv2d_ch_axis)
            x = upres(x)
        return center_out, self.out_conv(x)
 nn.UNetPatchDiscriminatorV2 = UNetPatchDiscriminatorV2
--- a/core/leras/nn.py
+++ b/core/leras/nn.py
@ -112,7 +112,7 @@ class nn():
                config = tf.ConfigProto(device_count={'GPU': 0})
            else:
                nn.tf_default_device = "/GPU:0"
-                config = tf.ConfigProto()
+                config = tf.ConfigProto(allow_soft_placement=True)
                config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
            config.gpu_options.force_gpu_compatible = True
--- a/doc/dfl_cover.png
+++ b/doc/dfl_cover.png
--- a/doc/features/gan-options/README.md
+++ b/doc/features/gan-options/README.md
@ -0,0 +1,50 @@
 # GAN Options
 Allows you to use one-sided label smoothing and noisy labels when training the discriminator.
 - [ONE-SIDED LABEL SMOOTHING](#one-sided-label-smoothing)
 - [NOISY LABELS](#noisy-labels)
 ## ONE-SIDED LABEL SMOOTHING
 ![](tutorial-on-theory-and-application-of-generative-adversarial-networks-54-638.jpg)
 > Deep networks may suffer from overconfidence. For example, it uses very few features to classify an object. To 
 > mitigate the problem, deep learning uses regulation and dropout to avoid overconfidence. 
 > 
 > In GAN, if the discriminator depends on a small set of features to detect real images, the generator may just produce 
 > these features only to exploit the discriminator. The optimization may turn too greedy and produces no long term 
 > benefit. In GAN, overconfidence hurts badly. To avoid the problem, we penalize the discriminator when the prediction 
 > for any real images go beyond 0.9 (D(real image)>0.9). This is done by setting our target label value to be 0.9 
 > instead of 1.0.
 - [GAN — Ways to improve GAN performance](https://towardsdatascience.com/gan-ways-to-improve-gan-performance-acf37f9f59b)
 By setting the label smoothing value to any value > 0, the target label value used with the discriminator will be:
 ```
 target label value = 1 - (label smoothing value)
 ```
 ### USAGE
 ```
 [0.1] GAN label smoothing ( 0 - 0.5 ?:help ) : 0.1
 ```
 ## NOISY LABELS
 > make the labels the noisy for the discriminator: occasionally flip the labels when training the discriminator
 - [How to Train a GAN? Tips and tricks to make GANs work](https://github.com/soumith/ganhacks/blob/master/README.md#6-use-soft-and-noisy-labels)
 By setting the noisy labels value to any value > 0, then the target labels used with the discriminator will be flipped
 ("fake" => "real" / "real" => "fake") with probability p (where p is the noisy label value).
 E.g., if the value is 0.05, then ~5% of the labels will be flipped when training the discriminator
 ### USAGE
 ```
 [0.05] GAN noisy labels ( 0 - 0.5 ?:help ) : 0.05
 ```
--- a/doc/features/gan-options/tutorial-on-theory-and-application-of-generative-adversarial-networks-54-638.jpg
+++ b/doc/features/gan-options/tutorial-on-theory-and-application-of-generative-adversarial-networks-54-638.jpg
--- a/doc/features/ms-ssim/README.md
+++ b/doc/features/ms-ssim/README.md
@ -0,0 +1,43 @@
 # Multiscale SSIM (MS-SSIM)
 Allows you to train using the MS-SSIM (multiscale structural similarity index measure) as the main loss metric,
 a perceptually more accurate measure of image quality than MSE (mean squared error).
 As an added benefit, you may see a decrease in ms/iteration (when using the same batch size) with Multiscale loss
 enabled. You may also be able to train with a larger batch size with it enabled.
 - [DESCRIPTION](#description)
 - [USAGE](#usage)
 ## DESCRIPTION
 [SSIM](https://en.wikipedia.org/wiki/Structural_similarity) is metric for comparing the perceptial quality of an image:
 > SSIM is a perception-based model that considers image degradation as perceived change in structural information, 
 > while also incorporating important perceptual phenomena, including both luminance masking and contrast masking terms. 
 > [...]
 > Structural information is the idea that the pixels have strong inter-dependencies especially when they are spatially 
 > close. These dependencies carry important information about the structure of the objects in the visual scene. 
 > Luminance masking is a phenomenon whereby image distortions (in this context) tend to be less visible in bright 
 > regions, while contrast masking is a phenomenon whereby distortions become less visible where there is significant 
 > activity or "texture" in the image.
 The current loss metric is a combination of SSIM (structural similarity index measure) and 
 [MSE](https://en.wikipedia.org/wiki/Mean_squared_error) (mean squared error).
 [Multiscale SSIM](https://en.wikipedia.org/wiki/Structural_similarity#Multi-Scale_SSIM) is a variant of SSIM that
 improves upon SSIM by comparing the similarity at multiple scales (e.g.: full-size, half-size, 1/4 size, etc.)
 By using MS-SSIM as our main loss metric, we should expect the image similarity to improve across each scale, improving
 both the large scale and small scale detail of the predicted images.
 Original paper: [Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. 
 "Multiscale structural similarity for image quality assessment." 
 Signals, Systems and Computers, 2004.](https://www.cns.nyu.edu/pub/eero/wang03b.pdf)
 ## USAGE
 ```
 [n] Use multiscale loss? ( y/n ?:help ) : y
 ```
--- a/doc/features/random-color/README.md
+++ b/doc/features/random-color/README.md
@ -19,4 +19,7 @@ maintaining the same `C*` (chroma, relative saturation).
 ## USAGE
-`[n] Random color ( y/n ?:help ) : y`
+```
 [n] Random color ( y/n ?:help ) : y
 ```
--- a/models/Model_SAEHD/Model.py
+++ b/models/Model_SAEHD/Model.py
@ -53,6 +53,8 @@ class SAEHDModel(ModelBase):
        lr_dropout = {True:'y', False:'n'}.get(lr_dropout, lr_dropout) #backward comp
        default_lr_dropout         = self.options['lr_dropout'] = lr_dropout
        default_ms_ssim_loss       = self.options['ms_ssim_loss']       = self.load_or_def_option('ms_ssim_loss', False)
        default_random_warp        = self.options['random_warp']        = self.load_or_def_option('random_warp', True)
        default_background_power   = self.options['background_power']   = self.load_or_def_option('background_power', 0.0)
        default_true_face_power    = self.options['true_face_power']    = self.load_or_def_option('true_face_power', 0.0)
@ -138,9 +140,12 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
            self.options['uniform_yaw'] = io.input_bool ("Uniform yaw distribution of samples", default_uniform_yaw, help_message='Helps to fix blurry side faces due to small amount of them in the faceset.')
        default_gan_version        = self.options['gan_version']        = self.load_or_def_option('gan_version', 2)
        default_gan_power          = self.options['gan_power']          = self.load_or_def_option('gan_power', 0.0)
        default_gan_patch_size     = self.options['gan_patch_size']     = self.load_or_def_option('gan_patch_size', self.options['resolution'] // 8)
        default_gan_dims           = self.options['gan_dims']           = self.load_or_def_option('gan_dims', 16)
        default_gan_smoothing      = self.options['gan_smoothing']      = self.load_or_def_option('gan_smoothing', 0.1)
        default_gan_noise          = self.options['gan_noise']          = self.load_or_def_option('gan_noise', 0.05)
        if self.is_first_run() or ask_override:
            self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.")
@ -149,17 +154,28 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
            self.options['lr_dropout']  = io.input_str (f"Use learning rate dropout", default_lr_dropout, ['n','y','cpu'], help_message="When the face is trained enough, you can enable this option to get extra sharpness and reduce subpixel shake for less amount of iterations. Enabled it before `disable random warp` and before GAN. \nn - disabled.\ny - enabled\ncpu - enabled on CPU. This allows not to use extra VRAM, sacrificing 20% time of iteration.")
            self.options['ms_ssim_loss'] = io.input_bool("Use multiscale loss?", default_ms_ssim_loss, help_message="Use Multiscale structural similarity for image quality assessment.")
            self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness and reduce subpixel shake for less amount of iterations.")
            self.options['gan_version'] = np.clip (io.input_int("GAN version", default_gan_version, add_info="2 or 3", help_message="Choose GAN version (v2: 7/16/2020, v3: 1/3/2021):"), 2, 3)
            if self.options['gan_version'] == 2:
                self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 10.0", help_message="Train the network in Generative Adversarial manner. Forces the neural network to learn small details of the face. Enable it only when the face is trained enough and don't disable. Typical value is 0.1"), 0.0, 10.0 )
            else:
                self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 1.0", help_message="Forces the neural network to learn small details of the face. Enable it only when the face is trained enough with lr_dropout(on) and random_warp(off), and don't disable. The higher the value, the higher the chances of artifacts. Typical fine value is 0.1"), 0.0, 1.0 )
            if self.options['gan_power'] != 0.0:
                if self.options['gan_version'] == 3:
                    gan_patch_size = np.clip ( io.input_int("GAN patch size", default_gan_patch_size, add_info="3-640", help_message="The higher patch size, the higher the quality, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is resolution / 8." ), 3, 640 )
                    self.options['gan_patch_size'] = gan_patch_size
                    gan_dims = np.clip ( io.input_int("GAN dimensions", default_gan_dims, add_info="4-64", help_message="The dimensions of the GAN network. The higher dimensions, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is 16." ), 4, 64 )
                    self.options['gan_dims'] = gan_dims
                self.options['gan_smoothing'] = np.clip ( io.input_number("GAN label smoothing", default_gan_smoothing, add_info="0 - 0.5", help_message="Uses soft labels with values slightly off from 0/1 for GAN, has a regularizing effect"), 0, 0.5)
                self.options['gan_noise'] = np.clip ( io.input_number("GAN noisy labels", default_gan_noise, add_info="0 - 0.5", help_message="Marks some images with the wrong label, helps prevent collapse"), 0, 0.5)
            if 'df' in self.options['archi']:
                self.options['true_face_power'] = np.clip ( io.input_number ("'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Typical value is 0.01 . Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
            else:
@ -299,6 +315,10 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
            if self.is_training:
                if gan_power != 0:
                    if self.options['gan_version'] == 2:
                        self.D_src = nn.UNetPatchDiscriminatorV2(patch_size=resolution//16, in_ch=input_ch, name="D_src")
                        self.model_filename_list += [ [self.D_src, 'D_src_v2.npy'] ]
                    else:
                        self.D_src = nn.UNetPatchDiscriminator(patch_size=self.options['gan_patch_size'], in_ch=input_ch, base_ch=self.options['gan_dims'], name="D_src")
                        self.model_filename_list += [ [self.D_src, 'GAN.npy'] ]
@ -325,6 +345,11 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                    self.model_filename_list += [ (self.D_code_opt, 'D_code_opt.npy') ]
                if gan_power != 0:
                    if self.options['gan_version'] == 2:
                        self.D_src_dst_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_src_dst_opt')
                        self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights(), vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')#+self.D_src_x2.get_weights()
                        self.model_filename_list += [ (self.D_src_dst_opt, 'D_src_v2_opt.npy') ]
                    else:
                        self.D_src_dst_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='GAN_opt')
                        self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights(), vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')#+self.D_src_x2.get_weights()
                        self.model_filename_list += [ (self.D_src_dst_opt, 'GAN_opt.npy') ]
@ -416,19 +441,19 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                    gpu_target_dst_style_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_style_blur)
                    gpu_target_src_anti_masked = gpu_target_src*(1.0-gpu_target_srcm_blur)
                    gpu_target_dst_anti_masked = gpu_target_dst_style_anti_masked
                    gpu_target_src_masked_opt  = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
                    gpu_target_dst_masked_opt  = gpu_target_dst_masked if masked_training else gpu_target_dst
                    gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
                    gpu_pred_src_src_anti_masked = gpu_pred_src_src*(1.0-gpu_target_srcm_blur)
                    gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst
                    gpu_pred_dst_dst_anti_masked = gpu_pred_dst_dst*(1.0-gpu_target_dstm_blur)
                    gpu_psd_target_dst_style_masked = gpu_pred_src_dst*gpu_target_dstm_style_blur
                    gpu_psd_target_dst_style_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_style_blur)
                    if self.options['ms_ssim_loss']:
                        gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
                    else:
                        if resolution < 256:
                            gpu_src_loss =  tf.reduce_mean ( 10*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                        else:
@ -450,12 +475,15 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                    if self.options['background_power'] > 0:
                        bg_factor = self.options['background_power']
-                        if resolution < 256:
+                        if self.options['ms_ssim_loss']:
-                            gpu_src_loss +=  bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_src_anti_masked, gpu_pred_src_src_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                            gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
                        else:
-                            gpu_src_loss +=  bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src_anti_masked, gpu_pred_src_src_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                            if resolution < 256:
-                            gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src_anti_masked, gpu_pred_src_src_anti_masked, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
+                                gpu_src_loss +=  bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
-                        gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src_anti_masked - gpu_pred_src_src_anti_masked ), axis=[1,2,3])
+                            else:
                                gpu_src_loss +=  bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                                gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
                        gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])
                    face_style_power = self.options['face_style_power'] / 100.0
                    if face_style_power != 0 and not self.pretrain:
@ -466,6 +494,9 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                        gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.dssim( gpu_psd_target_dst_style_anti_masked,  gpu_target_dst_style_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                        gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square(gpu_psd_target_dst_style_anti_masked - gpu_target_dst_style_anti_masked), axis=[1,2,3] )
                    if self.options['ms_ssim_loss']:
                        gpu_dst_loss = 10 * nn.MsSsim(resolution)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
                    else:
                        if resolution < 256:
                            gpu_dst_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
                        else:
@ -486,12 +517,15 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                    if self.options['background_power'] > 0:
                        bg_factor = self.options['background_power']
-                        if resolution < 256:
+                        if self.options['ms_ssim_loss']:
-                            gpu_dst_loss +=  bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_anti_masked, gpu_pred_dst_dst_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                            gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
                        else:
-                            gpu_dst_loss +=  bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_anti_masked, gpu_pred_dst_dst_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
+                            if resolution < 256:
-                            gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_anti_masked, gpu_pred_dst_dst_anti_masked, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
+                                gpu_dst_loss +=  bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
-                        gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst_anti_masked - gpu_pred_dst_dst_anti_masked ), axis=[1,2,3])
+                            else:
                                gpu_dst_loss +=  bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                                gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
                        gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])
                    gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )
@ -521,22 +555,37 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                        gpu_pred_src_src_d, \
                        gpu_pred_src_src_d2           = self.D_src(gpu_pred_src_src_masked_opt)
-                        gpu_pred_src_src_d_ones  = tf.ones_like (gpu_pred_src_src_d)
+                        def get_smooth_noisy_labels(label, tensor, smoothing=0.1, noise=0.05):
-                        gpu_pred_src_src_d_zeros = tf.zeros_like(gpu_pred_src_src_d)
+                            num_labels = self.batch_size
                            for d in tensor.get_shape().as_list()[1:]:
                                num_labels *= d
-                        gpu_pred_src_src_d2_ones  = tf.ones_like (gpu_pred_src_src_d2)
+                            probs = tf.math.log([[noise, 1-noise]]) if label == 1 else tf.math.log([[1-noise, noise]])
-                        gpu_pred_src_src_d2_zeros = tf.zeros_like(gpu_pred_src_src_d2)
+                            x = tf.random.categorical(probs, num_labels)
                            x = tf.cast(x, tf.float32)
                            x = tf.math.scalar_mul(1-smoothing, x)
                            # x = x + (smoothing/num_labels)
                            x = tf.reshape(x, (self.batch_size,) + tensor.shape[1:])
                            return x
-                        gpu_target_src_d, \
+                        smoothing = self.options['gan_smoothing']
-                        gpu_target_src_d2            = self.D_src(gpu_target_src_masked_opt)
+                        noise = self.options['gan_noise']
-                        gpu_target_src_d_ones    = tf.ones_like(gpu_target_src_d)
+                        gpu_pred_src_src_d_ones = tf.ones_like(gpu_pred_src_src_d)
-                        gpu_target_src_d2_ones    = tf.ones_like(gpu_target_src_d2)
+                        gpu_pred_src_src_d2_ones = tf.ones_like(gpu_pred_src_src_d2)
-                        gpu_D_src_dst_loss = (DLoss(gpu_target_src_d_ones      , gpu_target_src_d) + \
+                        gpu_pred_src_src_d_smooth_zeros = get_smooth_noisy_labels(0, gpu_pred_src_src_d, smoothing=smoothing, noise=noise)
-                                              DLoss(gpu_pred_src_src_d_zeros   , gpu_pred_src_src_d) ) * 0.5 + \
+                        gpu_pred_src_src_d2_smooth_zeros = get_smooth_noisy_labels(0, gpu_pred_src_src_d2, smoothing=smoothing, noise=noise)
-                                             (DLoss(gpu_target_src_d2_ones      , gpu_target_src_d2) + \
+
-                                              DLoss(gpu_pred_src_src_d2_zeros   , gpu_pred_src_src_d2) ) * 0.5
+                        gpu_target_src_d, gpu_target_src_d2 = self.D_src(gpu_target_src_masked_opt)
                        gpu_target_src_d_smooth_ones = get_smooth_noisy_labels(1, gpu_target_src_d, smoothing=smoothing, noise=noise)
                        gpu_target_src_d2_smooth_ones = get_smooth_noisy_labels(1, gpu_target_src_d2, smoothing=smoothing, noise=noise)
                        gpu_D_src_dst_loss = DLoss(gpu_target_src_d_smooth_ones, gpu_target_src_d) \
                                             + DLoss(gpu_pred_src_src_d_smooth_zeros, gpu_pred_src_src_d) \
                                             + DLoss(gpu_target_src_d2_smooth_ones, gpu_target_src_d2) \
                                             + DLoss(gpu_pred_src_src_d2_smooth_zeros, gpu_pred_src_src_d2)
                        gpu_D_src_dst_loss_gvs += [ nn.gradients (gpu_D_src_dst_loss, self.D_src.get_weights() ) ]#+self.D_src_x2.get_weights()