Merge pull request #140 from faceshiftlabs/feat/ms-ssim+l1

Feat/ms ssim+l1
This commit is contained in:
Jeremy Hummel 2021-05-04 13:51:12 -07:00 committed by GitHub
commit ae5612f8c5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 86 additions and 37 deletions

View file

@ -4,22 +4,26 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [1.5.1] - 2020-04-23
## [1.6.0] - 2021-05-04
### Added
- New loss function "MS-SSIM+L1", based on ["Loss Functions for Image Restoration with Neural Networks"](https://research.nvidia.com/publication/loss-functions-image-restoration-neural-networks)
## [1.5.1] - 2021-04-23
### Fixed
- Fixes bug with MS-SSIM when using a version of tensorflow < 1.14
## [1.5.0] - 2020-03-29
## [1.5.0] - 2021-03-29
### Changed
- Web UI previews now show preview pane as PNG (loss-less), instead of JPG (lossy), so we can see the same output
as on desktop, without any changes from JPG compression. This has the side-effect of the preview images loading slower
over web, as they are now larger, a future update may be considered which would give the option to view as JPG
instead.
## [1.4.2] - 2020-03-26
## [1.4.2] - 2021-03-26
### Fixed
- Fixes bug in background power with MS-SSIM, that misattributed loss from dst to src
## [1.4.1] - 2020-03-25
## [1.4.1] - 2021-03-25
### Fixed
- When both Background Power and MS-SSIM were enabled, the src and dst losses were being overwritten with the
"background power" losses. Fixed so "background power" losses are properly added with the total losses.
@ -28,7 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
experience an OOM error on models ran with both these features enabled. I may revisit this in another feature,
allowing you to manually disable certain loss calculations, for similar performance benefits.*
## [1.4.0] - 2020-03-24
## [1.4.0] - 2021-03-24
### Added
- [MS-SSIM loss training option](doc/features/ms-ssim)
- GAN version option (v2 - late 2020 or v3 - current GAN)
@ -37,41 +41,41 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Background Power now uses the entire image, not just the area outside of the mask for comparison.
This should help with rough areas directly next to the mask
## [1.3.0] - 2020-03-20
## [1.3.0] - 2021-03-20
### Added
- [Background Power training option](doc/features/background-power/README.md)
## [1.2.1] - 2020-03-20
## [1.2.1] - 2021-03-20
### Fixed
- Fixes bug with `fs-aug` color mode.
## [1.2.0] - 2020-03-17
## [1.2.0] - 2021-03-17
### Added
- [Random color training option](doc/features/random-color/README.md)
## [1.1.5] - 2020-03-16
## [1.1.5] - 2021-03-16
### Fixed
- Fixed unclosed websocket in Web UI client when exiting
## [1.1.4] - 2020-03-16
## [1.1.4] - 2021-03-16
### Fixed
- Fixed bug when exiting from Web UI
## [1.1.3] - 2020-03-16
## [1.1.3] - 2021-03-16
### Changed
- Updated changelog with unreleased features, links to working branches
## [1.1.2] - 2020-03-12
## [1.1.2] - 2021-03-12
### Fixed
- [Fixed missing predicted src mask in 'SAEHD masked' preview](doc/fixes/predicted_src_mask/README.md)
## [1.1.1] - 2020-03-12
## [1.1.1] - 2021-03-12
### Added
- CHANGELOG file for tracking updates, new features, and bug fixes
- Documentation for Web UI
- Link to CHANGELOG at top of README
## [1.1.0] - 2020-03-11
## [1.1.0] - 2021-03-11
### Added
- [Web UI for training preview](doc/features/webui/README.md)
@ -80,6 +84,7 @@ This should help with rough areas directly next to the mask
- Reset stale master branch to [seranus/DeepFaceLab](https://github.com/seranus/DeepFaceLab),
21 commits ahead of [iperov/DeepFaceLab](https://github.com/iperov/DeepFaceLab) ([compare](https://github.com/iperov/DeepFaceLab/compare/4818183...seranus:3f5ae05))
[1.6.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.5.1...v1.6.0
[1.5.1]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.5.0...v1.5.1
[1.5.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.4.2...v1.5.0
[1.4.2]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.4.1...v1.4.2

View file

@ -4,15 +4,20 @@ tf = nn.tf
class MsSsim(nn.LayerBase):
default_power_factors = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
default_l1_alpha = 0.84
def __init__(self, resolution, kernel_size=11, **kwargs):
def __init__(self, batch_size, in_ch, resolution, kernel_size=11, use_l1=False, **kwargs):
# restrict mssim factors to those greater/equal to kernel size
power_factors = [p for i, p in enumerate(self.default_power_factors) if resolution//(2**i) >= kernel_size]
# normalize power factors if reduced because of size
if sum(power_factors) < 1.0:
power_factors = [x/sum(power_factors) for x in power_factors]
self.power_factors = power_factors
self.num_scale = len(power_factors)
self.kernel_size = kernel_size
self.use_l1 = use_l1
if use_l1:
self.gaussian_weights = nn.get_gaussian_weights(batch_size, in_ch, resolution, num_scale=self.num_scale)
super().__init__(**kwargs)
@ -21,14 +26,25 @@ class MsSsim(nn.LayerBase):
y_true_t = tf.transpose(tf.cast(y_true, tf.float32), [0, 2, 3, 1])
y_pred_t = tf.transpose(tf.cast(y_pred, tf.float32), [0, 2, 3, 1])
if tf.__version__ >= "1.14":
ms_ssim_val = tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors, filter_size=self.kernel_size)
else:
ms_ssim_val = tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors)
# ssim_multiscale returns values in range [0, 1] (where 1 is completely identical)
# subtract from 1 to get loss
return 1.0 - ms_ssim_val
if tf.__version__ >= "1.14":
ms_ssim_loss = 1.0 - tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors, filter_size=self.kernel_size)
else:
ms_ssim_loss = 1.0 - tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors)
# If use L1 is enabled, use mix of ms-ssim and L1 (weighted by gaussian filters)
# H. Zhao, O. Gallo, I. Frosio and J. Kautz, "Loss Functions for Image Restoration With Neural Networks,"
# in IEEE Transactions on Computational Imaging, vol. 3, no. 1, pp. 47-57, March 2017,
# doi: 10.1109/TCI.2016.2644865.
# https://research.nvidia.com/publication/loss-functions-image-restoration-neural-networks
if self.use_l1:
diff = tf.tile(tf.expand_dims(tf.abs(y_true - y_pred), axis=0), multiples=[self.num_scale, 1, 1, 1, 1])
l1_loss = tf.reduce_mean(tf.reduce_sum(self.gaussian_weights[-1, :, :, :, :] * diff, axis=[0, 3, 4]), axis=[1])
return self.default_l1_alpha * ms_ssim_loss + (1 - self.default_l1_alpha) * l1_loss
return ms_ssim_loss
nn.MsSsim = MsSsim

View file

@ -237,6 +237,19 @@ def gaussian_blur(input, radius=2.0):
return x
nn.gaussian_blur = gaussian_blur
def get_gaussian_weights(batch_size, in_ch, resolution, num_scale=5, sigma=(0.5, 1., 2., 4., 8.)):
w = np.empty((num_scale, batch_size, in_ch, resolution, resolution))
for i in range(num_scale):
gaussian = np.exp(-1.*np.arange(-(resolution/2-0.5), resolution/2+0.5)**2/(2*sigma[i]**2))
gaussian = np.outer(gaussian, gaussian.reshape((resolution, 1))) # extend to 2D
gaussian = gaussian/np.sum(gaussian) # normalization
gaussian = np.reshape(gaussian, (1, 1, resolution, resolution)) # reshape to 3D
gaussian = np.tile(gaussian, (batch_size, in_ch, 1, 1))
w[i, :, :, :, :] = gaussian
return w
nn.get_gaussian_weights = get_gaussian_weights
def style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1):
def sd(content, style, loss_weight):
content_nc = content.shape[ nn.conv2d_ch_axis ]
@ -385,7 +398,7 @@ def total_variation_mse(images):
"""
pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
tot_var = ( tf.reduce_sum(tf.square(pixel_dif1), axis=[1,2,3]) +
tf.reduce_sum(tf.square(pixel_dif2), axis=[1,2,3]) )
return tot_var
@ -400,4 +413,4 @@ def tf_suppress_lower_mean(t, eps=0.00001):
q = tf.clip_by_value(q-t_mean_eps, 0, eps)
q = q * (t/eps)
return q
"""
"""

View file

@ -53,7 +53,7 @@ class SAEHDModel(ModelBase):
lr_dropout = {True:'y', False:'n'}.get(lr_dropout, lr_dropout) #backward comp
default_lr_dropout = self.options['lr_dropout'] = lr_dropout
default_ms_ssim_loss = self.options['ms_ssim_loss'] = self.load_or_def_option('ms_ssim_loss', False)
default_loss_function = self.options['loss_function'] = self.load_or_def_option('loss_function', 'SSIM')
default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True)
default_background_power = self.options['background_power'] = self.load_or_def_option('background_power', 0.0)
@ -154,7 +154,8 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
self.options['lr_dropout'] = io.input_str (f"Use learning rate dropout", default_lr_dropout, ['n','y','cpu'], help_message="When the face is trained enough, you can enable this option to get extra sharpness and reduce subpixel shake for less amount of iterations. Enabled it before `disable random warp` and before GAN. \nn - disabled.\ny - enabled\ncpu - enabled on CPU. This allows not to use extra VRAM, sacrificing 20% time of iteration.")
self.options['ms_ssim_loss'] = io.input_bool("Use multiscale loss?", default_ms_ssim_loss, help_message="Use Multiscale structural similarity for image quality assessment.")
self.options['loss_function'] = io.input_str(f"Loss function", default_loss_function, ['SSIM', 'MS-SSIM', 'MS-SSIM+L1'],
help_message="Change loss function used for image quality assessment.")
self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness and reduce subpixel shake for less amount of iterations.")
@ -451,15 +452,18 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
gpu_psd_target_dst_style_masked = gpu_pred_src_dst*gpu_target_dstm_style_blur
gpu_psd_target_dst_style_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_style_blur)
if self.options['ms_ssim_loss']:
gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
if self.options['loss_function'] == 'MS-SSIM':
gpu_src_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
elif self.options['loss_function'] == 'MS-SSIM+L1':
gpu_src_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
else:
if resolution < 256:
gpu_src_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
else:
gpu_src_loss = tf.reduce_mean ( 5*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += tf.reduce_mean ( 5*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
if eyes_prio or mouth_prio:
if eyes_prio and mouth_prio:
@ -475,15 +479,19 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
if self.options['background_power'] > 0:
bg_factor = self.options['background_power']
if self.options['ms_ssim_loss']:
gpu_src_loss += bg_factor * 10 * nn.MsSsim(resolution)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
if self.options['loss_function'] == 'MS-SSIM':
gpu_src_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])
elif self.options['loss_function'] == 'MS-SSIM+L1':
gpu_src_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
else:
if resolution < 256:
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
else:
gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])
face_style_power = self.options['face_style_power'] / 100.0
if face_style_power != 0 and not self.pretrain:
@ -494,15 +502,18 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.dssim( gpu_psd_target_dst_style_anti_masked, gpu_target_dst_style_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square(gpu_psd_target_dst_style_anti_masked - gpu_target_dst_style_anti_masked), axis=[1,2,3] )
if self.options['ms_ssim_loss']:
gpu_dst_loss = 10 * nn.MsSsim(resolution)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
if self.options['loss_function'] == 'MS-SSIM':
gpu_dst_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
elif self.options['loss_function'] == 'MS-SSIM+L1':
gpu_dst_loss = 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
else:
if resolution < 256:
gpu_dst_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
else:
gpu_dst_loss = tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
gpu_dst_loss += tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/23.2) ), axis=[1])
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
if eyes_prio or mouth_prio:
@ -517,15 +528,19 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
if self.options['background_power'] > 0:
bg_factor = self.options['background_power']
if self.options['ms_ssim_loss']:
gpu_dst_loss += bg_factor * 10 * nn.MsSsim(resolution)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
if self.options['loss_function'] == 'MS-SSIM':
gpu_dst_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])
elif self.options['loss_function'] == 'MS-SSIM+L1':
gpu_dst_loss += bg_factor * 10 * nn.MsSsim(bs_per_gpu, input_ch, resolution, use_l1=True)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
else:
if resolution < 256:
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
else:
gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )