mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-06 04:52:13 -07:00
SAEHD: added new option Use AdaBelief optimizer?
Experimental AdaBelief optimizer. It requires more VRAM, but the accuracy of the model is higher, and lr_dropout is not needed.
This commit is contained in:
parent
e7d36b4287
commit
704b5dc072
1 changed files with 22 additions and 7 deletions
|
@ -46,9 +46,11 @@ class SAEHDModel(ModelBase):
|
|||
default_eyes_prio = self.options['eyes_prio'] = self.load_or_def_option('eyes_prio', False)
|
||||
default_uniform_yaw = self.options['uniform_yaw'] = self.load_or_def_option('uniform_yaw', False)
|
||||
|
||||
default_adabelief = self.options['adabelief'] = self.load_or_def_option('adabelief', False)
|
||||
|
||||
lr_dropout = self.load_or_def_option('lr_dropout', 'n')
|
||||
lr_dropout = {True:'y', False:'n'}.get(lr_dropout, lr_dropout) #backward comp
|
||||
default_lr_dropout = self.options['lr_dropout'] = lr_dropout
|
||||
default_lr_dropout = self.options['lr_dropout'] = lr_dropout
|
||||
|
||||
default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True)
|
||||
default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0)
|
||||
|
@ -135,7 +137,10 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
|||
if self.is_first_run() or ask_override:
|
||||
self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.")
|
||||
|
||||
self.options['lr_dropout'] = io.input_str (f"Use learning rate dropout", default_lr_dropout, ['n','y','cpu'], help_message="When the face is trained enough, you can enable this option to get extra sharpness and reduce subpixel shake for less amount of iterations. Enabled it before `disable random warp` and before GAN. \nn - disabled.\ny - enabled\ncpu - enabled on CPU. This allows not to use extra VRAM, sacrificing 20% time of iteration.")
|
||||
self.options['adabelief'] = io.input_bool ("Use AdaBelief optimizer?", default_adabelief, help_message="Experimental AdaBelief optimizer. It requires more VRAM, but the accuracy of the model is higher, and lr_dropout is not needed.")
|
||||
|
||||
if not self.options['adabelief']:
|
||||
self.options['lr_dropout'] = io.input_str (f"Use learning rate dropout", default_lr_dropout, ['n','y','cpu'], help_message="When the face is trained enough, you can enable this option to get extra sharpness and reduce subpixel shake for less amount of iterations. Enabled it before `disable random warp` and before GAN. \nn - disabled.\ny - enabled\ncpu - enabled on CPU. This allows not to use extra VRAM, sacrificing 20% time of iteration.")
|
||||
|
||||
self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness and reduce subpixel shake for less amount of iterations.")
|
||||
|
||||
|
@ -275,24 +280,33 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
|||
# Initialize optimizers
|
||||
lr=5e-5
|
||||
lr_dropout = 0.3 if self.options['lr_dropout'] in ['y','cpu'] and not self.pretrain else 1.0
|
||||
lr_cos = 0
|
||||
adabelief = self.options['adabelief']
|
||||
OptimizerClass = nn.RMSprop
|
||||
if adabelief:
|
||||
lr_dropout = 1.0
|
||||
lr_cos = 2000
|
||||
OptimizerClass = nn.AdaBelief
|
||||
clipnorm = 1.0 if self.options['clipgrad'] else 0.0
|
||||
|
||||
if 'df' in archi_type:
|
||||
self.src_dst_trainable_weights = self.encoder.get_weights() + self.inter.get_weights() + self.decoder_src.get_weights() + self.decoder_dst.get_weights()
|
||||
elif 'liae' in archi_type:
|
||||
self.src_dst_trainable_weights = self.encoder.get_weights() + self.inter_AB.get_weights() + self.inter_B.get_weights() + self.decoder.get_weights()
|
||||
|
||||
|
||||
|
||||
self.src_dst_opt = nn.RMSprop(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='src_dst_opt')
|
||||
self.src_dst_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, lr_cos=lr_cos, clipnorm=clipnorm, name='src_dst_opt')
|
||||
self.src_dst_opt.initialize_variables (self.src_dst_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')
|
||||
self.model_filename_list += [ (self.src_dst_opt, 'src_dst_opt.npy') ]
|
||||
|
||||
if self.options['true_face_power'] != 0:
|
||||
self.D_code_opt = nn.RMSprop(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_code_opt')
|
||||
self.D_code_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, lr_cos=lr_cos, clipnorm=clipnorm, name='D_code_opt')
|
||||
self.D_code_opt.initialize_variables ( self.code_discriminator.get_weights(), vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')
|
||||
self.model_filename_list += [ (self.D_code_opt, 'D_code_opt.npy') ]
|
||||
|
||||
if gan_power != 0:
|
||||
self.D_src_dst_opt = nn.RMSprop(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_src_dst_opt')
|
||||
self.D_src_dst_opt = OptimizerClass(lr=lr, lr_dropout=lr_dropout, lr_cos=lr_cos, clipnorm=clipnorm, name='D_src_dst_opt')
|
||||
self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights(), vars_on_cpu=optimizer_vars_on_cpu, lr_dropout_on_cpu=self.options['lr_dropout']=='cpu')#+self.D_src_x2.get_weights()
|
||||
self.model_filename_list += [ (self.D_src_dst_opt, 'D_src_v2_opt.npy') ]
|
||||
|
||||
|
@ -471,14 +485,15 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
|
|||
|
||||
|
||||
# Average losses and gradients, and create optimizer update ops
|
||||
with tf.device (models_opt_device):
|
||||
with tf.device(f'/CPU:0'):
|
||||
pred_src_src = nn.concat(gpu_pred_src_src_list, 0)
|
||||
pred_dst_dst = nn.concat(gpu_pred_dst_dst_list, 0)
|
||||
pred_src_dst = nn.concat(gpu_pred_src_dst_list, 0)
|
||||
pred_src_srcm = nn.concat(gpu_pred_src_srcm_list, 0)
|
||||
pred_dst_dstm = nn.concat(gpu_pred_dst_dstm_list, 0)
|
||||
pred_src_dstm = nn.concat(gpu_pred_src_dstm_list, 0)
|
||||
|
||||
|
||||
with tf.device (models_opt_device):
|
||||
src_loss = tf.concat(gpu_src_losses, 0)
|
||||
dst_loss = tf.concat(gpu_dst_losses, 0)
|
||||
src_dst_loss_gv_op = self.src_dst_opt.get_update_op (nn.average_gv_list (gpu_G_loss_gvs))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue