SAE: added option "simple optimizer" allows to train bigger networks on same VRAM

nnlib: added DFLOptimizer is my own optimizer
This commit is contained in:
iperov 2019-03-12 09:32:35 +04:00
parent 3bad8dd8ec
commit fd3b9add2f
2 changed files with 30 additions and 20 deletions

View file

@ -35,11 +35,20 @@ class SAEModel(ModelBase):
self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.")
self.options['archi'] = io.input_str ("AE architecture (df, liae, vg ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae','vg'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'vg' - currently testing.").lower()
else:
self.options['resolution'] = self.options.get('resolution', default_resolution)
self.options['face_type'] = self.options.get('face_type', default_face_type)
self.options['learn_mask'] = self.options.get('learn_mask', True)
self.options['learn_mask'] = self.options.get('learn_mask', True)
if is_first_run or ask_override:
def_simple_optimizer = self.options.get('simple_optimizer', False)
self.options['simple_optimizer'] = io.input_bool ("Use simple optimizer? (y/n, ?:help skip:%s) : " % ( {True:'y',False:'n'}[def_simple_optimizer] ), def_simple_optimizer, help_message="Simple optimizer allows you to train bigger network or more batch size, sacrificing training accuracy.")
else:
self.options['simple_optimizer'] = self.options.get('simple_optimizer', False)
if is_first_run:
self.options['archi'] = io.input_str ("AE architecture (df, liae, vg ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae','vg'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'vg' - currently testing.").lower()
else:
self.options['archi'] = self.options.get('archi', default_archi)
default_ae_dims = 256 if self.options['archi'] == 'liae' else 512
@ -260,8 +269,12 @@ class SAEModel(ModelBase):
psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))]
if self.is_training_mode:
self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)
self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)
if self.options['simple_optimizer']:
self.src_dst_opt = DFLOptimizer(lr=5e-5)
self.src_dst_mask_opt = DFLOptimizer(lr=5e-5)
else:
self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)
self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)
if self.options['archi'] == 'liae':
src_dst_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights

View file

@ -72,7 +72,7 @@ RandomNormal = keras.initializers.RandomNormal
Model = keras.models.Model
Adam = keras.optimizers.Adam
FastAdam = nnlib.FastAdam
DFLOptimizer = nnlib.DFLOptimizer
modelify = nnlib.modelify
gaussian_blur = nnlib.gaussian_blur
@ -434,16 +434,14 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
return dict(list(base_config.items()) + list(config.items()))
nnlib.Scale = Scale
class FastAdam(keras.optimizers.Optimizer):
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, iterations=0, **kwargs):
super(FastAdam, self).__init__(**kwargs)
class DFLOptimizer(keras.optimizers.Optimizer):
def __init__(self, lr=0.001, **kwargs):
super(DFLOptimizer, self).__init__(**kwargs)
with K.name_scope(self.__class__.__name__):
self.iterations = K.variable(iterations, dtype='int64', name='iterations')
self.iterations = K.variable(0, dtype='int64', name='iterations')
self.lr = K.variable(lr, name='lr')
self.beta_1 = K.variable(beta_1, name='beta_1')
self.beta_2 = K.variable(beta_2, name='beta_2')
self.epsilon = K.epsilon()
self.beta_1 = K.variable(0.9, name='beta_1')
self.beta_2 = K.variable(0.998, name='beta_2')
@keras.legacy.interfaces.legacy_get_updates_support
def get_updates(self, loss, params):
@ -451,16 +449,16 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
self.updates = [K.update_add(self.iterations, 1)]
lr = self.lr
t = K.cast(self.iterations, K.floatx()) + 1
t = ( K.cast(self.iterations, K.floatx()) ) % 1000 + 1
lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
(1. - K.pow(self.beta_1, t)))
self.weights = [self.iterations]
self.weights = []
for p, g in zip(params, grads):
m_t = (1. - self.beta_1) * g
v_t = (1. - self.beta_2) * K.square(g)
p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)
p_t = p - lr_t * m_t / (K.sqrt(v_t) + K.epsilon() )
new_p = p_t
# Apply constraints.
@ -471,15 +469,14 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
return self.updates
def get_config(self):
config = {'iterations': int(K.get_value(self.iterations)),
'lr': float(K.get_value(self.lr)),
config = {'lr': float(K.get_value(self.lr)),
'beta_1': float(K.get_value(self.beta_1)),
'beta_2': float(K.get_value(self.beta_2))
}
base_config = super(FastAdam, self).get_config()
base_config = super(DFLOptimizer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
nnlib.FastAdam = FastAdam
nnlib.DFLOptimizer = DFLOptimizer
'''
not implemented in plaidML