Merge pull request #140 from faceshiftlabs/feat/ms-ssim+l1

Feat/ms ssim+l1
2025-08-19 13:09:56 -07:00 · 2021-05-04 13:51:12 -07:00 · 2021-05-04 13:51:12 -07:00 · ae5612f8c5
commit ae5612f8c5
parent f0a38bb995 a7d8d028db
4 changed files with 86 additions and 37 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -4,22 +4,26 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

-## [1.5.1] - 2020-04-23
+## [1.6.0] - 2021-05-04
+### Added
+- New loss function "MS-SSIM+L1", based on ["Loss Functions for Image Restoration with Neural Networks"](https://research.nvidia.com/publication/loss-functions-image-restoration-neural-networks)
+
+## [1.5.1] - 2021-04-23
 ### Fixed
 - Fixes bug with MS-SSIM when using a version of tensorflow < 1.14

-## [1.5.0] - 2020-03-29
+## [1.5.0] - 2021-03-29
 ### Changed
 - Web UI previews now show preview pane as PNG (loss-less), instead of JPG (lossy), so we can see the same output 
  as on desktop, without any changes from JPG compression. This has the side-effect of the preview images loading slower
  over web, as they are now larger, a future update may be considered which would give the option to view as JPG 
  instead.

-## [1.4.2] - 2020-03-26
+## [1.4.2] - 2021-03-26
 ### Fixed 
 - Fixes bug in background power with MS-SSIM, that misattributed loss from dst to src

-## [1.4.1] - 2020-03-25
+## [1.4.1] - 2021-03-25
 ### Fixed
 - When both Background Power and MS-SSIM were enabled, the src and dst losses were being overwritten with the 
  "background power" losses. Fixed so "background power" losses are properly added with the total losses.
@ -28,7 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
      experience an OOM error on models ran with both these features enabled. I may revisit this in another feature, 
      allowing you to manually disable certain loss calculations, for similar performance benefits.*

-## [1.4.0] - 2020-03-24
+## [1.4.0] - 2021-03-24
 ### Added
 - [MS-SSIM loss training option](doc/features/ms-ssim)
 - GAN version option (v2 - late 2020 or v3 - current GAN)
@ -37,41 +41,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Background Power now uses the entire image, not just the area outside of the mask for comparison.
 This should help with rough areas directly next to the mask

-## [1.3.0] - 2020-03-20
+## [1.3.0] - 2021-03-20
 ### Added
 - [Background Power training option](doc/features/background-power/README.md)

-## [1.2.1] - 2020-03-20
+## [1.2.1] - 2021-03-20
 ### Fixed
 - Fixes bug with `fs-aug` color mode.

-## [1.2.0] - 2020-03-17
+## [1.2.0] - 2021-03-17
 ### Added
 - [Random color training option](doc/features/random-color/README.md)

-## [1.1.5] - 2020-03-16
+## [1.1.5] - 2021-03-16
 ### Fixed
 - Fixed unclosed websocket in Web UI client when exiting

-## [1.1.4] - 2020-03-16
+## [1.1.4] - 2021-03-16
 ### Fixed
 - Fixed bug when exiting from Web UI

-## [1.1.3] - 2020-03-16
+## [1.1.3] - 2021-03-16
 ### Changed
 - Updated changelog with unreleased features, links to working branches

-## [1.1.2] - 2020-03-12
+## [1.1.2] - 2021-03-12
 ### Fixed
 - [Fixed missing predicted src mask in 'SAEHD masked' preview](doc/fixes/predicted_src_mask/README.md)

-## [1.1.1] - 2020-03-12
+## [1.1.1] - 2021-03-12
 ### Added
 - CHANGELOG file for tracking updates, new features, and bug fixes
 - Documentation for Web UI
 - Link to CHANGELOG at top of README

-## [1.1.0] - 2020-03-11
+## [1.1.0] - 2021-03-11
 ### Added
 - [Web UI for training preview](doc/features/webui/README.md)

@ -80,6 +84,7 @@ This should help with rough areas directly next to the mask
 - Reset stale master branch to [seranus/DeepFaceLab](https://github.com/seranus/DeepFaceLab), 
  21 commits ahead of [iperov/DeepFaceLab](https://github.com/iperov/DeepFaceLab) ([compare](https://github.com/iperov/DeepFaceLab/compare/4818183...seranus:3f5ae05))

+[1.6.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.5.1...v1.6.0
 [1.5.1]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.5.0...v1.5.1
 [1.5.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.4.2...v1.5.0
 [1.4.2]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.4.1...v1.4.2
--- a/core/leras/layers/MsSsim.py
+++ b/core/leras/layers/MsSsim.py
@ -4,15 +4,20 @@ tf = nn.tf

 class MsSsim(nn.LayerBase):
    default_power_factors = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
+    default_l1_alpha = 0.84

-    def __init__(self, resolution, kernel_size=11, **kwargs):
+    def __init__(self, batch_size, in_ch, resolution, kernel_size=11, use_l1=False, **kwargs):
        # restrict mssim factors to those greater/equal to kernel size
        power_factors = [p for i, p in enumerate(self.default_power_factors) if resolution//(2**i) >= kernel_size]
        # normalize power factors if reduced because of size
        if sum(power_factors) < 1.0:
            power_factors = [x/sum(power_factors) for x in power_factors]
        self.power_factors = power_factors
+        self.num_scale = len(power_factors)
        self.kernel_size = kernel_size
+        self.use_l1 = use_l1
+        if use_l1:
+            self.gaussian_weights = nn.get_gaussian_weights(batch_size, in_ch, resolution, num_scale=self.num_scale)

        super().__init__(**kwargs)

@ -21,14 +26,25 @@ class MsSsim(nn.LayerBase):
        y_true_t = tf.transpose(tf.cast(y_true, tf.float32), [0, 2, 3, 1])
        y_pred_t = tf.transpose(tf.cast(y_pred, tf.float32), [0, 2, 3, 1])

-        if tf.__version__ >= "1.14":
-            ms_ssim_val = tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors, filter_size=self.kernel_size)
-        else:
-            ms_ssim_val = tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors)
-
        # ssim_multiscale returns values in range [0, 1] (where 1 is completely identical)
        # subtract from 1 to get loss
-        return 1.0 - ms_ssim_val
+        if tf.__version__ >= "1.14":
+            ms_ssim_loss = 1.0 - tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors, filter_size=self.kernel_size)
+        else:
+            ms_ssim_loss = 1.0 - tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors)
+
+        # If use L1 is enabled, use mix of ms-ssim and L1 (weighted by gaussian filters)
+        # H. Zhao, O. Gallo, I. Frosio and J. Kautz, "Loss Functions for Image Restoration With Neural Networks,"
+        # in IEEE Transactions on Computational Imaging, vol. 3, no. 1, pp. 47-57, March 2017,
+        # doi: 10.1109/TCI.2016.2644865.
+        # https://research.nvidia.com/publication/loss-functions-image-restoration-neural-networks
+
+        if self.use_l1:
+            diff = tf.tile(tf.expand_dims(tf.abs(y_true - y_pred), axis=0), multiples=[self.num_scale, 1, 1, 1, 1])
+            l1_loss = tf.reduce_mean(tf.reduce_sum(self.gaussian_weights[-1, :, :, :, :] * diff, axis=[0, 3, 4]), axis=[1])
+            return self.default_l1_alpha * ms_ssim_loss + (1 - self.default_l1_alpha) * l1_loss
+
+        return ms_ssim_loss


 nn.MsSsim = MsSsim
--- a/core/leras/ops/init.py
+++ b/core/leras/ops/init.py
@ -237,6 +237,19 @@ def gaussian_blur(input, radius=2.0):
    return x
 nn.gaussian_blur = gaussian_blur

+def get_gaussian_weights(batch_size, in_ch, resolution, num_scale=5, sigma=(0.5, 1., 2., 4., 8.)):
+    w = np.empty((num_scale, batch_size, in_ch, resolution, resolution))
+    for i in range(num_scale):
+        gaussian = np.exp(-1.*np.arange(-(resolution/2-0.5), resolution/2+0.5)**2/(2*sigma[i]**2))
+        gaussian = np.outer(gaussian, gaussian.reshape((resolution, 1)))  # extend to 2D
+        gaussian = gaussian/np.sum(gaussian)							  # normalization
+        gaussian = np.reshape(gaussian, (1, 1, resolution, resolution)) 	  # reshape to 3D
+        gaussian = np.tile(gaussian, (batch_size, in_ch, 1, 1))
+        w[i, :, :, :, :] = gaussian
+    return w
+
+nn.get_gaussian_weights = get_gaussian_weights
+
 def style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1):
    def sd(content, style, loss_weight):
        content_nc = content.shape[ nn.conv2d_ch_axis ]
@ -385,7 +398,7 @@ def total_variation_mse(images):
    """
    pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
    pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
-    
+
    tot_var = ( tf.reduce_sum(tf.square(pixel_dif1), axis=[1,2,3]) +
                tf.reduce_sum(tf.square(pixel_dif2), axis=[1,2,3]) )
    return tot_var
@ -400,4 +413,4 @@ def tf_suppress_lower_mean(t, eps=0.00001):
    q = tf.clip_by_value(q-t_mean_eps, 0, eps)
    q = q * (t/eps)
    return q
-"""
+"""
--- a/models/Model_SAEHD/Model.py
+++ b/models/Model_SAEHD/Model.py
@ -53,7 +53,7 @@ class SAEHDModel(ModelBase):
        lr_dropout = {True:'y', False:'n'}.get(lr_dropout, lr_dropout) #backward comp
        default_lr_dropout         = self.options['lr_dropout'] = lr_dropout

-        default_ms_ssim_loss       = self.options['ms_ssim_loss']       = self.load_or_def_option('ms_ssim_loss', False)
+        default_loss_function      = self.options['loss_function']      = self.load_or_def_option('loss_function', 'SSIM')

        default_random_warp        = self.options['random_warp']        = self.load_or_def_option('random_warp', True)
        default_background_power   = self.options['background_power']   = self.load_or_def_option('background_power', 0.0)
@ -154,7 +154,8 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...

            self.options['lr_dropout']  = io.input_str (f"Use learning rate dropout", default_lr_dropout, ['n','y','cpu'], help_message="When the face is trained enough, you can enable this option to get extra sharpness and reduce subpixel shake for less amount of iterations. Enabled it before `disable random warp` and before GAN. \nn - disabled.\ny - enabled\ncpu - enabled on CPU. This allows not to use extra VRAM, sacrificing 20% time of iteration.")

-            self.options['ms_ssim_loss'] = io.input_bool("Use multiscale loss?", default_ms_ssim_loss, help_message="Use Multiscale structural similarity for image quality assessment.")
+            self.options['loss_function'] = io.input_str(f"Loss function", default_loss_function, ['SSIM', 'MS-SSIM', 'MS-SSIM+L1'],
+                                                         help_message="Change loss function used for image quality assessment.")

            self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness and reduce subpixel shake for less amount of iterations.")

@ -451,15 +452,18 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                    gpu_psd_target_dst_style_masked = gpu_pred_src_dst*gpu_target_dstm_style_blur
                    gpu_psd_target_dst_style_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_style_blur)

-                    if self.options['ms_ssim_loss']:
-                        gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
+                    if self.options['loss_function'] == 'MS-SSIM':
+                        gpu_src_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
+                        gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
+                    elif self.options['loss_function'] == 'MS-SSIM+L1':
+                        gpu_src_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
                    else:
                        if resolution < 256:
                            gpu_src_loss =  tf.reduce_mean ( 10*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                        else:
                            gpu_src_loss =  tf.reduce_mean ( 5*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                            gpu_src_loss += tf.reduce_mean ( 5*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
-                    gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
+                        gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])

                    if eyes_prio or mouth_prio:
                        if eyes_prio and mouth_prio:
@ -475,15 +479,19 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...

                    if self.options['background_power'] > 0:
                        bg_factor = self.options['background_power']
-                        if self.options['ms_ssim_loss']:
-                            gpu_src_loss += bg_factor * 10 * nn.MsSsim(resolution)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
+
+                        if self.options['loss_function'] == 'MS-SSIM':
+                            gpu_src_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
+                            gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])
+                        elif self.options['loss_function'] == 'MS-SSIM+L1':
+                            gpu_src_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
                        else:
                            if resolution < 256:
                                gpu_src_loss +=  bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                            else:
                                gpu_src_loss +=  bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                                gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
-                        gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])
+                            gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])

                    face_style_power = self.options['face_style_power'] / 100.0
                    if face_style_power != 0 and not self.pretrain:
@ -494,15 +502,18 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
                        gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.dssim( gpu_psd_target_dst_style_anti_masked,  gpu_target_dst_style_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                        gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square(gpu_psd_target_dst_style_anti_masked - gpu_target_dst_style_anti_masked), axis=[1,2,3] )

-                    if self.options['ms_ssim_loss']:
-                        gpu_dst_loss = 10 * nn.MsSsim(resolution)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
+                    if self.options['loss_function'] == 'MS-SSIM':
+                        gpu_dst_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
+                        gpu_dst_loss += tf.reduce_mean ( 10*tf.square(  gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
+                    elif self.options['loss_function'] == 'MS-SSIM+L1':
+                        gpu_dst_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
                    else:
                        if resolution < 256:
                            gpu_dst_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
                        else:
                            gpu_dst_loss = tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
                            gpu_dst_loss += tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/23.2) ), axis=[1])
-                    gpu_dst_loss += tf.reduce_mean ( 10*tf.square(  gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
+                        gpu_dst_loss += tf.reduce_mean ( 10*tf.square(  gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])


                    if eyes_prio or mouth_prio:
@ -517,15 +528,19 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...

                    if self.options['background_power'] > 0:
                        bg_factor = self.options['background_power']
-                        if self.options['ms_ssim_loss']:
-                            gpu_dst_loss += bg_factor * 10 * nn.MsSsim(resolution)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
+
+                        if self.options['loss_function'] == 'MS-SSIM':
+                            gpu_dst_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
+                            gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])
+                        elif self.options['loss_function'] == 'MS-SSIM+L1':
+                            gpu_dst_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
                        else:
                            if resolution < 256:
                                gpu_dst_loss +=  bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                            else:
                                gpu_dst_loss +=  bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
                                gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
-                        gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])
+                            gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])

                    gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )