SAE: added option "simple optimizer" allows to train bigger networks on same VRAM

nnlib: added DFLOptimizer is my own optimizer
This commit is contained in:
iperov 2019-03-12 09:32:35 +04:00
parent 3bad8dd8ec
commit fd3b9add2f
2 changed files with 30 additions and 20 deletions

View file

@ -35,11 +35,20 @@ class SAEModel(ModelBase):
self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower()
self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.") self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.")
self.options['archi'] = io.input_str ("AE architecture (df, liae, vg ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae','vg'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'vg' - currently testing.").lower()
else: else:
self.options['resolution'] = self.options.get('resolution', default_resolution) self.options['resolution'] = self.options.get('resolution', default_resolution)
self.options['face_type'] = self.options.get('face_type', default_face_type) self.options['face_type'] = self.options.get('face_type', default_face_type)
self.options['learn_mask'] = self.options.get('learn_mask', True) self.options['learn_mask'] = self.options.get('learn_mask', True)
if is_first_run or ask_override:
def_simple_optimizer = self.options.get('simple_optimizer', False)
self.options['simple_optimizer'] = io.input_bool ("Use simple optimizer? (y/n, ?:help skip:%s) : " % ( {True:'y',False:'n'}[def_simple_optimizer] ), def_simple_optimizer, help_message="Simple optimizer allows you to train bigger network or more batch size, sacrificing training accuracy.")
else:
self.options['simple_optimizer'] = self.options.get('simple_optimizer', False)
if is_first_run:
self.options['archi'] = io.input_str ("AE architecture (df, liae, vg ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae','vg'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'vg' - currently testing.").lower()
else:
self.options['archi'] = self.options.get('archi', default_archi) self.options['archi'] = self.options.get('archi', default_archi)
default_ae_dims = 256 if self.options['archi'] == 'liae' else 512 default_ae_dims = 256 if self.options['archi'] == 'liae' else 512
@ -260,6 +269,10 @@ class SAEModel(ModelBase):
psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))]
if self.is_training_mode: if self.is_training_mode:
if self.options['simple_optimizer']:
self.src_dst_opt = DFLOptimizer(lr=5e-5)
self.src_dst_mask_opt = DFLOptimizer(lr=5e-5)
else:
self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)
self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999)

View file

@ -72,7 +72,7 @@ RandomNormal = keras.initializers.RandomNormal
Model = keras.models.Model Model = keras.models.Model
Adam = keras.optimizers.Adam Adam = keras.optimizers.Adam
FastAdam = nnlib.FastAdam DFLOptimizer = nnlib.DFLOptimizer
modelify = nnlib.modelify modelify = nnlib.modelify
gaussian_blur = nnlib.gaussian_blur gaussian_blur = nnlib.gaussian_blur
@ -434,16 +434,14 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
return dict(list(base_config.items()) + list(config.items())) return dict(list(base_config.items()) + list(config.items()))
nnlib.Scale = Scale nnlib.Scale = Scale
class FastAdam(keras.optimizers.Optimizer): class DFLOptimizer(keras.optimizers.Optimizer):
def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, iterations=0, **kwargs): def __init__(self, lr=0.001, **kwargs):
super(FastAdam, self).__init__(**kwargs) super(DFLOptimizer, self).__init__(**kwargs)
with K.name_scope(self.__class__.__name__): with K.name_scope(self.__class__.__name__):
self.iterations = K.variable(iterations, dtype='int64', name='iterations') self.iterations = K.variable(0, dtype='int64', name='iterations')
self.lr = K.variable(lr, name='lr') self.lr = K.variable(lr, name='lr')
self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_1 = K.variable(0.9, name='beta_1')
self.beta_2 = K.variable(beta_2, name='beta_2') self.beta_2 = K.variable(0.998, name='beta_2')
self.epsilon = K.epsilon()
@keras.legacy.interfaces.legacy_get_updates_support @keras.legacy.interfaces.legacy_get_updates_support
def get_updates(self, loss, params): def get_updates(self, loss, params):
@ -451,16 +449,16 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
self.updates = [K.update_add(self.iterations, 1)] self.updates = [K.update_add(self.iterations, 1)]
lr = self.lr lr = self.lr
t = K.cast(self.iterations, K.floatx()) + 1 t = ( K.cast(self.iterations, K.floatx()) ) % 1000 + 1
lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
(1. - K.pow(self.beta_1, t))) (1. - K.pow(self.beta_1, t)))
self.weights = [self.iterations]
self.weights = []
for p, g in zip(params, grads): for p, g in zip(params, grads):
m_t = (1. - self.beta_1) * g m_t = (1. - self.beta_1) * g
v_t = (1. - self.beta_2) * K.square(g) v_t = (1. - self.beta_2) * K.square(g)
p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) p_t = p - lr_t * m_t / (K.sqrt(v_t) + K.epsilon() )
new_p = p_t new_p = p_t
# Apply constraints. # Apply constraints.
@ -471,15 +469,14 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
return self.updates return self.updates
def get_config(self): def get_config(self):
config = {'iterations': int(K.get_value(self.iterations)), config = {'lr': float(K.get_value(self.lr)),
'lr': float(K.get_value(self.lr)),
'beta_1': float(K.get_value(self.beta_1)), 'beta_1': float(K.get_value(self.beta_1)),
'beta_2': float(K.get_value(self.beta_2)) 'beta_2': float(K.get_value(self.beta_2))
} }
base_config = super(FastAdam, self).get_config() base_config = super(DFLOptimizer, self).get_config()
return dict(list(base_config.items()) + list(config.items())) return dict(list(base_config.items()) + list(config.items()))
nnlib.FastAdam = FastAdam nnlib.DFLOptimizer = DFLOptimizer
''' '''
not implemented in plaidML not implemented in plaidML