mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-08-21 05:53:24 -07:00
Merge branch 'master' into fix/web-ui-previews-png
This commit is contained in:
commit
cf722cef43
12 changed files with 363 additions and 63 deletions
14
CHANGELOG.md
14
CHANGELOG.md
|
@ -6,9 +6,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
### In Progress
|
### In Progress
|
||||||
- [MS-SSIM loss training option](https://github.com/faceshiftlabs/DeepFaceLab/tree/feature/ms-ssim-loss-2)
|
|
||||||
- [Freezeable layers (encoder/decoder/etc.)](https://github.com/faceshiftlabs/DeepFaceLab/tree/feature/freezable-weights)
|
- [Freezeable layers (encoder/decoder/etc.)](https://github.com/faceshiftlabs/DeepFaceLab/tree/feature/freezable-weights)
|
||||||
- [GAN stability improvements](https://github.com/faceshiftlabs/DeepFaceLab/tree/feature/gan-updates)
|
|
||||||
|
## [1.4.0] - 2020-03-24
|
||||||
|
### Added
|
||||||
|
- [MS-SSIM loss training option](doc/features/ms-ssim)
|
||||||
|
- GAN version option (v2 - late 2020 or v3 - current GAN)
|
||||||
|
- [GAN label smoothing and label noise options](doc/features/gan-options)
|
||||||
|
### Fixed
|
||||||
|
- Background Power now uses the entire image, not just the area outside of the mask for comparison.
|
||||||
|
This should help with rough areas directly next to the mask
|
||||||
|
|
||||||
## [1.3.0] - 2020-03-20
|
## [1.3.0] - 2020-03-20
|
||||||
### Added
|
### Added
|
||||||
|
@ -53,7 +60,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
- Reset stale master branch to [seranus/DeepFaceLab](https://github.com/seranus/DeepFaceLab),
|
- Reset stale master branch to [seranus/DeepFaceLab](https://github.com/seranus/DeepFaceLab),
|
||||||
21 commits ahead of [iperov/DeepFaceLab](https://github.com/iperov/DeepFaceLab) ([compare](https://github.com/iperov/DeepFaceLab/compare/4818183...seranus:3f5ae05))
|
21 commits ahead of [iperov/DeepFaceLab](https://github.com/iperov/DeepFaceLab) ([compare](https://github.com/iperov/DeepFaceLab/compare/4818183...seranus:3f5ae05))
|
||||||
|
|
||||||
[Unreleased]: https://github.com/olivierlacan/keep-a-changelog/compare/v1.3.0...HEAD
|
[Unreleased]: https://github.com/olivierlacan/keep-a-changelog/compare/v1.4.0...HEAD
|
||||||
|
[1.4.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.3.0...v1.4.0
|
||||||
[1.3.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.2.1...v1.3.0
|
[1.3.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.2.1...v1.3.0
|
||||||
[1.2.1]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.2.0...v1.2.1
|
[1.2.1]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.2.0...v1.2.1
|
||||||
[1.2.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.1.5...v1.2.0
|
[1.2.0]: https://github.com/faceshiftlabs/DeepFaceLab/compare/v1.1.5...v1.2.0
|
||||||
|
|
|
@ -3,6 +3,8 @@
|
||||||
# CHANGELOG
|
# CHANGELOG
|
||||||
### [View most recent changes](CHANGELOG.md)
|
### [View most recent changes](CHANGELOG.md)
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
<table align="center" border="0">
|
<table align="center" border="0">
|
||||||
|
|
||||||
<tr><td colspan=2 align="center">
|
<tr><td colspan=2 align="center">
|
||||||
|
|
30
core/leras/layers/MsSsim.py
Normal file
30
core/leras/layers/MsSsim.py
Normal file
|
@ -0,0 +1,30 @@
|
||||||
|
from core.leras import nn
|
||||||
|
tf = nn.tf
|
||||||
|
|
||||||
|
|
||||||
|
class MsSsim(nn.LayerBase):
|
||||||
|
default_power_factors = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
|
||||||
|
|
||||||
|
def __init__(self, resolution, kernel_size=11, **kwargs):
|
||||||
|
# restrict mssim factors to those greater/equal to kernel size
|
||||||
|
power_factors = [p for i, p in enumerate(self.default_power_factors) if resolution//(2**i) >= kernel_size]
|
||||||
|
# normalize power factors if reduced because of size
|
||||||
|
if sum(power_factors) < 1.0:
|
||||||
|
power_factors = [x/sum(power_factors) for x in power_factors]
|
||||||
|
self.power_factors = power_factors
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
|
def __call__(self, y_true, y_pred, max_val):
|
||||||
|
# Transpose images from NCHW to NHWC
|
||||||
|
y_true_t = tf.transpose(tf.cast(y_true, tf.float32), [0, 2, 3, 1])
|
||||||
|
y_pred_t = tf.transpose(tf.cast(y_pred, tf.float32), [0, 2, 3, 1])
|
||||||
|
|
||||||
|
ms_ssim_val = tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors, filter_size=self.kernel_size)
|
||||||
|
# ssim_multiscale returns values in range [0, 1] (where 1 is completely identical)
|
||||||
|
# subtract from 1 to get loss
|
||||||
|
return 1.0 - ms_ssim_val
|
||||||
|
|
||||||
|
|
||||||
|
nn.MsSsim = MsSsim
|
|
@ -14,3 +14,4 @@ from .TLU import *
|
||||||
from .ScaleAdd import *
|
from .ScaleAdd import *
|
||||||
from .DenseNorm import *
|
from .DenseNorm import *
|
||||||
from .AdaIN import *
|
from .AdaIN import *
|
||||||
|
from .MsSsim import *
|
||||||
|
|
|
@ -195,3 +195,117 @@ class UNetPatchDiscriminator(nn.ModelBase):
|
||||||
return center_out, self.out_conv(x)
|
return center_out, self.out_conv(x)
|
||||||
|
|
||||||
nn.UNetPatchDiscriminator = UNetPatchDiscriminator
|
nn.UNetPatchDiscriminator = UNetPatchDiscriminator
|
||||||
|
|
||||||
|
class UNetPatchDiscriminatorV2(nn.ModelBase):
|
||||||
|
"""
|
||||||
|
Inspired by https://arxiv.org/abs/2002.12655 "A U-Net Based Discriminator for Generative Adversarial Networks"
|
||||||
|
"""
|
||||||
|
def calc_receptive_field_size(self, layers):
|
||||||
|
"""
|
||||||
|
result the same as https://fomoro.com/research/article/receptive-field-calculatorindex.html
|
||||||
|
"""
|
||||||
|
rf = 0
|
||||||
|
ts = 1
|
||||||
|
for i, (k, s) in enumerate(layers):
|
||||||
|
if i == 0:
|
||||||
|
rf = k
|
||||||
|
else:
|
||||||
|
rf += (k-1)*ts
|
||||||
|
ts *= s
|
||||||
|
return rf
|
||||||
|
|
||||||
|
def find_archi(self, target_patch_size, max_layers=6):
|
||||||
|
"""
|
||||||
|
Find the best configuration of layers using only 3x3 convs for target patch size
|
||||||
|
"""
|
||||||
|
s = {}
|
||||||
|
for layers_count in range(1,max_layers+1):
|
||||||
|
val = 1 << (layers_count-1)
|
||||||
|
while True:
|
||||||
|
val -= 1
|
||||||
|
|
||||||
|
layers = []
|
||||||
|
sum_st = 0
|
||||||
|
for i in range(layers_count-1):
|
||||||
|
st = 1 + (1 if val & (1 << i) !=0 else 0 )
|
||||||
|
layers.append ( [3, st ])
|
||||||
|
sum_st += st
|
||||||
|
layers.append ( [3, 2])
|
||||||
|
sum_st += 2
|
||||||
|
|
||||||
|
rf = self.calc_receptive_field_size(layers)
|
||||||
|
|
||||||
|
s_rf = s.get(rf, None)
|
||||||
|
if s_rf is None:
|
||||||
|
s[rf] = (layers_count, sum_st, layers)
|
||||||
|
else:
|
||||||
|
if layers_count < s_rf[0] or \
|
||||||
|
( layers_count == s_rf[0] and sum_st > s_rf[1] ):
|
||||||
|
s[rf] = (layers_count, sum_st, layers)
|
||||||
|
|
||||||
|
if val == 0:
|
||||||
|
break
|
||||||
|
|
||||||
|
x = sorted(list(s.keys()))
|
||||||
|
q=x[np.abs(np.array(x)-target_patch_size).argmin()]
|
||||||
|
return s[q][2]
|
||||||
|
|
||||||
|
def on_build(self, patch_size, in_ch):
|
||||||
|
class ResidualBlock(nn.ModelBase):
|
||||||
|
def on_build(self, ch, kernel_size=3 ):
|
||||||
|
self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
|
||||||
|
self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
|
||||||
|
|
||||||
|
def forward(self, inp):
|
||||||
|
x = self.conv1(inp)
|
||||||
|
x = tf.nn.leaky_relu(x, 0.2)
|
||||||
|
x = self.conv2(x)
|
||||||
|
x = tf.nn.leaky_relu(inp + x, 0.2)
|
||||||
|
return x
|
||||||
|
|
||||||
|
prev_ch = in_ch
|
||||||
|
self.convs = []
|
||||||
|
self.res = []
|
||||||
|
self.upconvs = []
|
||||||
|
self.upres = []
|
||||||
|
layers = self.find_archi(patch_size)
|
||||||
|
base_ch = 16
|
||||||
|
|
||||||
|
level_chs = { i-1:v for i,v in enumerate([ min( base_ch * (2**i), 512 ) for i in range(len(layers)+1)]) }
|
||||||
|
|
||||||
|
self.in_conv = nn.Conv2D( in_ch, level_chs[-1], kernel_size=1, padding='VALID')
|
||||||
|
|
||||||
|
for i, (kernel_size, strides) in enumerate(layers):
|
||||||
|
self.convs.append ( nn.Conv2D( level_chs[i-1], level_chs[i], kernel_size=kernel_size, strides=strides, padding='SAME') )
|
||||||
|
|
||||||
|
self.res.append ( ResidualBlock(level_chs[i]) )
|
||||||
|
|
||||||
|
self.upconvs.insert (0, nn.Conv2DTranspose( level_chs[i]*(2 if i != len(layers)-1 else 1), level_chs[i-1], kernel_size=kernel_size, strides=strides, padding='SAME') )
|
||||||
|
|
||||||
|
self.upres.insert (0, ResidualBlock(level_chs[i-1]*2) )
|
||||||
|
|
||||||
|
self.out_conv = nn.Conv2D( level_chs[-1]*2, 1, kernel_size=1, padding='VALID')
|
||||||
|
|
||||||
|
self.center_out = nn.Conv2D( level_chs[len(layers)-1], 1, kernel_size=1, padding='VALID')
|
||||||
|
self.center_conv = nn.Conv2D( level_chs[len(layers)-1], level_chs[len(layers)-1], kernel_size=1, padding='VALID')
|
||||||
|
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = tf.nn.leaky_relu( self.in_conv(x), 0.1 )
|
||||||
|
|
||||||
|
encs = []
|
||||||
|
for conv, res in zip(self.convs, self.res):
|
||||||
|
encs.insert(0, x)
|
||||||
|
x = tf.nn.leaky_relu( conv(x), 0.1 )
|
||||||
|
x = res(x)
|
||||||
|
|
||||||
|
center_out, x = self.center_out(x), self.center_conv(x)
|
||||||
|
|
||||||
|
for i, (upconv, enc, upres) in enumerate(zip(self.upconvs, encs, self.upres)):
|
||||||
|
x = tf.nn.leaky_relu( upconv(x), 0.1 )
|
||||||
|
x = tf.concat( [enc, x], axis=nn.conv2d_ch_axis)
|
||||||
|
x = upres(x)
|
||||||
|
|
||||||
|
return center_out, self.out_conv(x)
|
||||||
|
|
||||||
|
nn.UNetPatchDiscriminatorV2 = UNetPatchDiscriminatorV2
|
||||||
|
|
|
@ -112,7 +112,7 @@ class nn():
|
||||||
config = tf.ConfigProto(device_count={'GPU': 0})
|
config = tf.ConfigProto(device_count={'GPU': 0})
|
||||||
else:
|
else:
|
||||||
nn.tf_default_device = "/GPU:0"
|
nn.tf_default_device = "/GPU:0"
|
||||||
config = tf.ConfigProto()
|
config = tf.ConfigProto(allow_soft_placement=True)
|
||||||
config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
|
config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
|
||||||
|
|
||||||
config.gpu_options.force_gpu_compatible = True
|
config.gpu_options.force_gpu_compatible = True
|
||||||
|
|
BIN
doc/dfl_cover.png
Normal file
BIN
doc/dfl_cover.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 326 KiB |
50
doc/features/gan-options/README.md
Normal file
50
doc/features/gan-options/README.md
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
# GAN Options
|
||||||
|
|
||||||
|
Allows you to use one-sided label smoothing and noisy labels when training the discriminator.
|
||||||
|
|
||||||
|
- [ONE-SIDED LABEL SMOOTHING](#one-sided-label-smoothing)
|
||||||
|
- [NOISY LABELS](#noisy-labels)
|
||||||
|
|
||||||
|
## ONE-SIDED LABEL SMOOTHING
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
> Deep networks may suffer from overconfidence. For example, it uses very few features to classify an object. To
|
||||||
|
> mitigate the problem, deep learning uses regulation and dropout to avoid overconfidence.
|
||||||
|
>
|
||||||
|
> In GAN, if the discriminator depends on a small set of features to detect real images, the generator may just produce
|
||||||
|
> these features only to exploit the discriminator. The optimization may turn too greedy and produces no long term
|
||||||
|
> benefit. In GAN, overconfidence hurts badly. To avoid the problem, we penalize the discriminator when the prediction
|
||||||
|
> for any real images go beyond 0.9 (D(real image)>0.9). This is done by setting our target label value to be 0.9
|
||||||
|
> instead of 1.0.
|
||||||
|
- [GAN — Ways to improve GAN performance](https://towardsdatascience.com/gan-ways-to-improve-gan-performance-acf37f9f59b)
|
||||||
|
|
||||||
|
By setting the label smoothing value to any value > 0, the target label value used with the discriminator will be:
|
||||||
|
```
|
||||||
|
target label value = 1 - (label smoothing value)
|
||||||
|
```
|
||||||
|
### USAGE
|
||||||
|
|
||||||
|
```
|
||||||
|
[0.1] GAN label smoothing ( 0 - 0.5 ?:help ) : 0.1
|
||||||
|
```
|
||||||
|
|
||||||
|
## NOISY LABELS
|
||||||
|
|
||||||
|
> make the labels the noisy for the discriminator: occasionally flip the labels when training the discriminator
|
||||||
|
- [How to Train a GAN? Tips and tricks to make GANs work](https://github.com/soumith/ganhacks/blob/master/README.md#6-use-soft-and-noisy-labels)
|
||||||
|
|
||||||
|
By setting the noisy labels value to any value > 0, then the target labels used with the discriminator will be flipped
|
||||||
|
("fake" => "real" / "real" => "fake") with probability p (where p is the noisy label value).
|
||||||
|
|
||||||
|
E.g., if the value is 0.05, then ~5% of the labels will be flipped when training the discriminator
|
||||||
|
|
||||||
|
### USAGE
|
||||||
|
```
|
||||||
|
[0.05] GAN noisy labels ( 0 - 0.5 ?:help ) : 0.05
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 62 KiB |
43
doc/features/ms-ssim/README.md
Normal file
43
doc/features/ms-ssim/README.md
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
# Multiscale SSIM (MS-SSIM)
|
||||||
|
|
||||||
|
Allows you to train using the MS-SSIM (multiscale structural similarity index measure) as the main loss metric,
|
||||||
|
a perceptually more accurate measure of image quality than MSE (mean squared error).
|
||||||
|
|
||||||
|
As an added benefit, you may see a decrease in ms/iteration (when using the same batch size) with Multiscale loss
|
||||||
|
enabled. You may also be able to train with a larger batch size with it enabled.
|
||||||
|
|
||||||
|
- [DESCRIPTION](#description)
|
||||||
|
- [USAGE](#usage)
|
||||||
|
|
||||||
|
## DESCRIPTION
|
||||||
|
|
||||||
|
[SSIM](https://en.wikipedia.org/wiki/Structural_similarity) is metric for comparing the perceptial quality of an image:
|
||||||
|
> SSIM is a perception-based model that considers image degradation as perceived change in structural information,
|
||||||
|
> while also incorporating important perceptual phenomena, including both luminance masking and contrast masking terms.
|
||||||
|
> [...]
|
||||||
|
> Structural information is the idea that the pixels have strong inter-dependencies especially when they are spatially
|
||||||
|
> close. These dependencies carry important information about the structure of the objects in the visual scene.
|
||||||
|
> Luminance masking is a phenomenon whereby image distortions (in this context) tend to be less visible in bright
|
||||||
|
> regions, while contrast masking is a phenomenon whereby distortions become less visible where there is significant
|
||||||
|
> activity or "texture" in the image.
|
||||||
|
|
||||||
|
The current loss metric is a combination of SSIM (structural similarity index measure) and
|
||||||
|
[MSE](https://en.wikipedia.org/wiki/Mean_squared_error) (mean squared error).
|
||||||
|
|
||||||
|
[Multiscale SSIM](https://en.wikipedia.org/wiki/Structural_similarity#Multi-Scale_SSIM) is a variant of SSIM that
|
||||||
|
improves upon SSIM by comparing the similarity at multiple scales (e.g.: full-size, half-size, 1/4 size, etc.)
|
||||||
|
By using MS-SSIM as our main loss metric, we should expect the image similarity to improve across each scale, improving
|
||||||
|
both the large scale and small scale detail of the predicted images.
|
||||||
|
|
||||||
|
Original paper: [Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik.
|
||||||
|
"Multiscale structural similarity for image quality assessment."
|
||||||
|
Signals, Systems and Computers, 2004.](https://www.cns.nyu.edu/pub/eero/wang03b.pdf)
|
||||||
|
|
||||||
|
## USAGE
|
||||||
|
|
||||||
|
```
|
||||||
|
[n] Use multiscale loss? ( y/n ?:help ) : y
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -19,4 +19,7 @@ maintaining the same `C*` (chroma, relative saturation).
|
||||||
|
|
||||||
## USAGE
|
## USAGE
|
||||||
|
|
||||||
`[n] Random color ( y/n ?:help ) : y`
|
```
|
||||||
|
[n] Random color ( y/n ?:help ) : y
|
||||||
|
```
|
||||||
|
|
||||||
|
|
|
@ -53,6 +53,8 @@ class SAEHDModel(ModelBase):
|
||||||
lr_dropout = {True:'y', False:'n'}.get(lr_dropout, lr_dropout) #backward comp
|
lr_dropout = {True:'y', False:'n'}.get(lr_dropout, lr_dropout) #backward comp
|
||||||
default_lr_dropout = self.options['lr_dropout'] = lr_dropout
|
default_lr_dropout = self.options['lr_dropout'] = lr_dropout
|
||||||
|
|
||||||
|
default_ms_ssim_loss = self.options['ms_ssim_loss'] = self.load_or_def_option('ms_ssim_loss', False)
|
||||||
|
|
||||||
default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True)
|
default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True)
|
||||||
default_background_power = self.options['background_power'] = self.load_or_def_option('background_power', 0.0)
|
default_background_power = self.options['background_power'] = self.load_or_def_option('background_power', 0.0)
|
||||||
default_true_face_power = self.options['true_face_power'] = self.load_or_def_option('true_face_power', 0.0)
|
default_true_face_power = self.options['true_face_power'] = self.load_or_def_option('true_face_power', 0.0)
|
||||||
|
@ -138,9 +140,12 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
|
|
||||||
self.options['uniform_yaw'] = io.input_bool ("Uniform yaw distribution of samples", default_uniform_yaw, help_message='Helps to fix blurry side faces due to small amount of them in the faceset.')
|
self.options['uniform_yaw'] = io.input_bool ("Uniform yaw distribution of samples", default_uniform_yaw, help_message='Helps to fix blurry side faces due to small amount of them in the faceset.')
|
||||||
|
|
||||||
|
default_gan_version = self.options['gan_version'] = self.load_or_def_option('gan_version', 2)
|
||||||
default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0)
|
default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0)
|
||||||
default_gan_patch_size = self.options['gan_patch_size'] = self.load_or_def_option('gan_patch_size', self.options['resolution'] // 8)
|
default_gan_patch_size = self.options['gan_patch_size'] = self.load_or_def_option('gan_patch_size', self.options['resolution'] // 8)
|
||||||
default_gan_dims = self.options['gan_dims'] = self.load_or_def_option('gan_dims', 16)
|
default_gan_dims = self.options['gan_dims'] = self.load_or_def_option('gan_dims', 16)
|
||||||
|
default_gan_smoothing = self.options['gan_smoothing'] = self.load_or_def_option('gan_smoothing', 0.1)
|
||||||
|
default_gan_noise = self.options['gan_noise'] = self.load_or_def_option('gan_noise', 0.05)
|
||||||
|
|
||||||
if self.is_first_run() or ask_override:
|
if self.is_first_run() or ask_override:
|
||||||
self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.")
|
self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.")
|
||||||
|
@ -149,17 +154,28 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
|
|
||||||
self.options['lr_dropout'] = io.input_str (f"Use learning rate dropout", default_lr_dropout, ['n','y','cpu'], help_message="When the face is trained enough, you can enable this option to get extra sharpness and reduce subpixel shake for less amount of iterations. Enabled it before `disable random warp` and before GAN. \nn - disabled.\ny - enabled\ncpu - enabled on CPU. This allows not to use extra VRAM, sacrificing 20% time of iteration.")
|
self.options['lr_dropout'] = io.input_str (f"Use learning rate dropout", default_lr_dropout, ['n','y','cpu'], help_message="When the face is trained enough, you can enable this option to get extra sharpness and reduce subpixel shake for less amount of iterations. Enabled it before `disable random warp` and before GAN. \nn - disabled.\ny - enabled\ncpu - enabled on CPU. This allows not to use extra VRAM, sacrificing 20% time of iteration.")
|
||||||
|
|
||||||
|
self.options['ms_ssim_loss'] = io.input_bool("Use multiscale loss?", default_ms_ssim_loss, help_message="Use Multiscale structural similarity for image quality assessment.")
|
||||||
|
|
||||||
self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness and reduce subpixel shake for less amount of iterations.")
|
self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness and reduce subpixel shake for less amount of iterations.")
|
||||||
|
|
||||||
|
self.options['gan_version'] = np.clip (io.input_int("GAN version", default_gan_version, add_info="2 or 3", help_message="Choose GAN version (v2: 7/16/2020, v3: 1/3/2021):"), 2, 3)
|
||||||
|
|
||||||
|
if self.options['gan_version'] == 2:
|
||||||
|
self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 10.0", help_message="Train the network in Generative Adversarial manner. Forces the neural network to learn small details of the face. Enable it only when the face is trained enough and don't disable. Typical value is 0.1"), 0.0, 10.0 )
|
||||||
|
else:
|
||||||
self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 1.0", help_message="Forces the neural network to learn small details of the face. Enable it only when the face is trained enough with lr_dropout(on) and random_warp(off), and don't disable. The higher the value, the higher the chances of artifacts. Typical fine value is 0.1"), 0.0, 1.0 )
|
self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 1.0", help_message="Forces the neural network to learn small details of the face. Enable it only when the face is trained enough with lr_dropout(on) and random_warp(off), and don't disable. The higher the value, the higher the chances of artifacts. Typical fine value is 0.1"), 0.0, 1.0 )
|
||||||
|
|
||||||
if self.options['gan_power'] != 0.0:
|
if self.options['gan_power'] != 0.0:
|
||||||
|
if self.options['gan_version'] == 3:
|
||||||
gan_patch_size = np.clip ( io.input_int("GAN patch size", default_gan_patch_size, add_info="3-640", help_message="The higher patch size, the higher the quality, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is resolution / 8." ), 3, 640 )
|
gan_patch_size = np.clip ( io.input_int("GAN patch size", default_gan_patch_size, add_info="3-640", help_message="The higher patch size, the higher the quality, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is resolution / 8." ), 3, 640 )
|
||||||
self.options['gan_patch_size'] = gan_patch_size
|
self.options['gan_patch_size'] = gan_patch_size
|
||||||
|
|
||||||
gan_dims = np.clip ( io.input_int("GAN dimensions", default_gan_dims, add_info="4-64", help_message="The dimensions of the GAN network. The higher dimensions, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is 16." ), 4, 64 )
|
gan_dims = np.clip ( io.input_int("GAN dimensions", default_gan_dims, add_info="4-64", help_message="The dimensions of the GAN network. The higher dimensions, the more VRAM is required. You can get sharper edges even at the lowest setting. Typical fine value is 16." ), 4, 64 )
|
||||||
self.options['gan_dims'] = gan_dims
|
self.options['gan_dims'] = gan_dims
|
||||||
|
|
||||||
|
self.options['gan_smoothing'] = np.clip ( io.input_number("GAN label smoothing", default_gan_smoothing, add_info="0 - 0.5", help_message="Uses soft labels with values slightly off from 0/1 for GAN, has a regularizing effect"), 0, 0.5)
|
||||||
|
self.options['gan_noise'] = np.clip ( io.input_number("GAN noisy labels", default_gan_noise, add_info="0 - 0.5", help_message="Marks some images with the wrong label, helps prevent collapse"), 0, 0.5)
|
||||||
|
|
||||||
if 'df' in self.options['archi']:
|
if 'df' in self.options['archi']:
|
||||||
self.options['true_face_power'] = np.clip ( io.input_number ("'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Typical value is 0.01 . Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
|
self.options['true_face_power'] = np.clip ( io.input_number ("'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Typical value is 0.01 . Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
|
||||||
else:
|
else:
|
||||||
|
@ -299,6 +315,10 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
|
|
||||||
if self.is_training:
|
if self.is_training:
|
||||||
if gan_power != 0:
|
if gan_power != 0:
|
||||||
|
if self.options['gan_version'] == 2:
|
||||||
|
self.D_src = nn.UNetPatchDiscriminatorV2(patch_size=resolution//16, in_ch=input_ch, name="D_src")
|
||||||
|
self.model_filename_list += [ [self.D_src, 'D_src_v2.npy'] ]
|
||||||
|
else:
|
||||||
self.D_src = nn.UNetPatchDiscriminator(patch_size=self.options['gan_patch_size'], in_ch=input_ch, base_ch=self.options['gan_dims'], name="D_src")
|
self.D_src = nn.UNetPatchDiscriminator(patch_size=self.options['gan_patch_size'], in_ch=input_ch, base_ch=self.options['gan_dims'], name="D_src")
|
||||||
self.model_filename_list += [ [self.D_src, 'GAN.npy'] ]
|
self.model_filename_list += [ [self.D_src, 'GAN.npy'] ]
|
||||||
|
|
||||||
|
@ -325,6 +345,11 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
self.model_filename_list += [ (self.D_code_opt, 'D_code_opt.npy') ]
|
self.model_filename_list += [ (self.D_code_opt, 'D_code_opt.npy') ]
|
||||||
|
|
||||||
if gan_power != 0:
|
if gan_power != 0:
|
||||||
|
if self.options['gan_version'] == 2:
|
||||||
|
self.D_src_dst_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_src_dst_opt')
|
||||||
|
self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights(), vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')#+self.D_src_x2.get_weights()
|
||||||
|
self.model_filename_list += [ (self.D_src_dst_opt, 'D_src_v2_opt.npy') ]
|
||||||
|
else:
|
||||||
self.D_src_dst_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='GAN_opt')
|
self.D_src_dst_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='GAN_opt')
|
||||||
self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights(), vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')#+self.D_src_x2.get_weights()
|
self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights(), vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')#+self.D_src_x2.get_weights()
|
||||||
self.model_filename_list += [ (self.D_src_dst_opt, 'GAN_opt.npy') ]
|
self.model_filename_list += [ (self.D_src_dst_opt, 'GAN_opt.npy') ]
|
||||||
|
@ -416,19 +441,19 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
gpu_target_dst_style_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_style_blur)
|
gpu_target_dst_style_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_style_blur)
|
||||||
|
|
||||||
gpu_target_src_anti_masked = gpu_target_src*(1.0-gpu_target_srcm_blur)
|
gpu_target_src_anti_masked = gpu_target_src*(1.0-gpu_target_srcm_blur)
|
||||||
gpu_target_dst_anti_masked = gpu_target_dst_style_anti_masked
|
|
||||||
|
|
||||||
gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
|
gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
|
||||||
gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst
|
gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst
|
||||||
|
|
||||||
gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
|
gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
|
||||||
gpu_pred_src_src_anti_masked = gpu_pred_src_src*(1.0-gpu_target_srcm_blur)
|
gpu_pred_src_src_anti_masked = gpu_pred_src_src*(1.0-gpu_target_srcm_blur)
|
||||||
gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst
|
gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst
|
||||||
gpu_pred_dst_dst_anti_masked = gpu_pred_dst_dst*(1.0-gpu_target_dstm_blur)
|
|
||||||
|
|
||||||
gpu_psd_target_dst_style_masked = gpu_pred_src_dst*gpu_target_dstm_style_blur
|
gpu_psd_target_dst_style_masked = gpu_pred_src_dst*gpu_target_dstm_style_blur
|
||||||
gpu_psd_target_dst_style_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_style_blur)
|
gpu_psd_target_dst_style_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_style_blur)
|
||||||
|
|
||||||
|
if self.options['ms_ssim_loss']:
|
||||||
|
gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0)
|
||||||
|
else:
|
||||||
if resolution < 256:
|
if resolution < 256:
|
||||||
gpu_src_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
gpu_src_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||||
else:
|
else:
|
||||||
|
@ -450,12 +475,15 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
|
|
||||||
if self.options['background_power'] > 0:
|
if self.options['background_power'] > 0:
|
||||||
bg_factor = self.options['background_power']
|
bg_factor = self.options['background_power']
|
||||||
if resolution < 256:
|
if self.options['ms_ssim_loss']:
|
||||||
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_src_anti_masked, gpu_pred_src_src_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_src, gpu_pred_src_src, max_val=1.0)
|
||||||
else:
|
else:
|
||||||
gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src_anti_masked, gpu_pred_src_src_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
if resolution < 256:
|
||||||
gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src_anti_masked, gpu_pred_src_src_anti_masked, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
|
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||||
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src_anti_masked - gpu_pred_src_src_anti_masked ), axis=[1,2,3])
|
else:
|
||||||
|
gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||||
|
gpu_src_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_src, gpu_pred_src_src, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
|
||||||
|
gpu_src_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_src - gpu_pred_src_src ), axis=[1,2,3])
|
||||||
|
|
||||||
face_style_power = self.options['face_style_power'] / 100.0
|
face_style_power = self.options['face_style_power'] / 100.0
|
||||||
if face_style_power != 0 and not self.pretrain:
|
if face_style_power != 0 and not self.pretrain:
|
||||||
|
@ -466,6 +494,9 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.dssim( gpu_psd_target_dst_style_anti_masked, gpu_target_dst_style_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.dssim( gpu_psd_target_dst_style_anti_masked, gpu_target_dst_style_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||||
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square(gpu_psd_target_dst_style_anti_masked - gpu_target_dst_style_anti_masked), axis=[1,2,3] )
|
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square(gpu_psd_target_dst_style_anti_masked - gpu_target_dst_style_anti_masked), axis=[1,2,3] )
|
||||||
|
|
||||||
|
if self.options['ms_ssim_loss']:
|
||||||
|
gpu_dst_loss = 10 * nn.MsSsim(resolution)(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0)
|
||||||
|
else:
|
||||||
if resolution < 256:
|
if resolution < 256:
|
||||||
gpu_dst_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
|
gpu_dst_loss = tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
|
||||||
else:
|
else:
|
||||||
|
@ -486,12 +517,15 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
|
|
||||||
if self.options['background_power'] > 0:
|
if self.options['background_power'] > 0:
|
||||||
bg_factor = self.options['background_power']
|
bg_factor = self.options['background_power']
|
||||||
if resolution < 256:
|
if self.options['ms_ssim_loss']:
|
||||||
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_dst_anti_masked, gpu_pred_dst_dst_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
gpu_src_loss = 10 * nn.MsSsim(resolution)(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0)
|
||||||
else:
|
else:
|
||||||
gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_anti_masked, gpu_pred_dst_dst_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
if resolution < 256:
|
||||||
gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst_anti_masked, gpu_pred_dst_dst_anti_masked, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
|
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||||
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst_anti_masked - gpu_pred_dst_dst_anti_masked ), axis=[1,2,3])
|
else:
|
||||||
|
gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||||
|
gpu_dst_loss += bg_factor * tf.reduce_mean ( 5*nn.dssim(gpu_target_dst, gpu_pred_dst_dst, max_val=1.0, filter_size=int(resolution/23.2)), axis=[1])
|
||||||
|
gpu_dst_loss += bg_factor * tf.reduce_mean ( 10*tf.square ( gpu_target_dst - gpu_pred_dst_dst ), axis=[1,2,3])
|
||||||
|
|
||||||
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )
|
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )
|
||||||
|
|
||||||
|
@ -521,22 +555,37 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
||||||
gpu_pred_src_src_d, \
|
gpu_pred_src_src_d, \
|
||||||
gpu_pred_src_src_d2 = self.D_src(gpu_pred_src_src_masked_opt)
|
gpu_pred_src_src_d2 = self.D_src(gpu_pred_src_src_masked_opt)
|
||||||
|
|
||||||
gpu_pred_src_src_d_ones = tf.ones_like (gpu_pred_src_src_d)
|
def get_smooth_noisy_labels(label, tensor, smoothing=0.1, noise=0.05):
|
||||||
gpu_pred_src_src_d_zeros = tf.zeros_like(gpu_pred_src_src_d)
|
num_labels = self.batch_size
|
||||||
|
for d in tensor.get_shape().as_list()[1:]:
|
||||||
|
num_labels *= d
|
||||||
|
|
||||||
gpu_pred_src_src_d2_ones = tf.ones_like (gpu_pred_src_src_d2)
|
probs = tf.math.log([[noise, 1-noise]]) if label == 1 else tf.math.log([[1-noise, noise]])
|
||||||
gpu_pred_src_src_d2_zeros = tf.zeros_like(gpu_pred_src_src_d2)
|
x = tf.random.categorical(probs, num_labels)
|
||||||
|
x = tf.cast(x, tf.float32)
|
||||||
|
x = tf.math.scalar_mul(1-smoothing, x)
|
||||||
|
# x = x + (smoothing/num_labels)
|
||||||
|
x = tf.reshape(x, (self.batch_size,) + tensor.shape[1:])
|
||||||
|
return x
|
||||||
|
|
||||||
gpu_target_src_d, \
|
smoothing = self.options['gan_smoothing']
|
||||||
gpu_target_src_d2 = self.D_src(gpu_target_src_masked_opt)
|
noise = self.options['gan_noise']
|
||||||
|
|
||||||
gpu_target_src_d_ones = tf.ones_like(gpu_target_src_d)
|
gpu_pred_src_src_d_ones = tf.ones_like(gpu_pred_src_src_d)
|
||||||
gpu_target_src_d2_ones = tf.ones_like(gpu_target_src_d2)
|
gpu_pred_src_src_d2_ones = tf.ones_like(gpu_pred_src_src_d2)
|
||||||
|
|
||||||
gpu_D_src_dst_loss = (DLoss(gpu_target_src_d_ones , gpu_target_src_d) + \
|
gpu_pred_src_src_d_smooth_zeros = get_smooth_noisy_labels(0, gpu_pred_src_src_d, smoothing=smoothing, noise=noise)
|
||||||
DLoss(gpu_pred_src_src_d_zeros , gpu_pred_src_src_d) ) * 0.5 + \
|
gpu_pred_src_src_d2_smooth_zeros = get_smooth_noisy_labels(0, gpu_pred_src_src_d2, smoothing=smoothing, noise=noise)
|
||||||
(DLoss(gpu_target_src_d2_ones , gpu_target_src_d2) + \
|
|
||||||
DLoss(gpu_pred_src_src_d2_zeros , gpu_pred_src_src_d2) ) * 0.5
|
gpu_target_src_d, gpu_target_src_d2 = self.D_src(gpu_target_src_masked_opt)
|
||||||
|
|
||||||
|
gpu_target_src_d_smooth_ones = get_smooth_noisy_labels(1, gpu_target_src_d, smoothing=smoothing, noise=noise)
|
||||||
|
gpu_target_src_d2_smooth_ones = get_smooth_noisy_labels(1, gpu_target_src_d2, smoothing=smoothing, noise=noise)
|
||||||
|
|
||||||
|
gpu_D_src_dst_loss = DLoss(gpu_target_src_d_smooth_ones, gpu_target_src_d) \
|
||||||
|
+ DLoss(gpu_pred_src_src_d_smooth_zeros, gpu_pred_src_src_d) \
|
||||||
|
+ DLoss(gpu_target_src_d2_smooth_ones, gpu_target_src_d2) \
|
||||||
|
+ DLoss(gpu_pred_src_src_d2_smooth_zeros, gpu_pred_src_src_d2)
|
||||||
|
|
||||||
gpu_D_src_dst_loss_gvs += [ nn.gradients (gpu_D_src_dst_loss, self.D_src.get_weights() ) ]#+self.D_src_x2.get_weights()
|
gpu_D_src_dst_loss_gvs += [ nn.gradients (gpu_D_src_dst_loss, self.D_src.get_weights() ) ]#+self.D_src_x2.get_weights()
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue