diff --git a/models/Model_SAE/Model.py b/models/Model_SAE/Model.py index 756010a..c37148c 100644 --- a/models/Model_SAE/Model.py +++ b/models/Model_SAE/Model.py @@ -35,11 +35,20 @@ class SAEModel(ModelBase): self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() self.options['learn_mask'] = io.input_bool ("Learn mask? (y/n, ?:help skip:y) : ", True, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.") - self.options['archi'] = io.input_str ("AE architecture (df, liae, vg ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae','vg'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'vg' - currently testing.").lower() else: self.options['resolution'] = self.options.get('resolution', default_resolution) self.options['face_type'] = self.options.get('face_type', default_face_type) - self.options['learn_mask'] = self.options.get('learn_mask', True) + self.options['learn_mask'] = self.options.get('learn_mask', True) + + if is_first_run or ask_override: + def_simple_optimizer = self.options.get('simple_optimizer', False) + self.options['simple_optimizer'] = io.input_bool ("Use simple optimizer? (y/n, ?:help skip:%s) : " % ( {True:'y',False:'n'}[def_simple_optimizer] ), def_simple_optimizer, help_message="Simple optimizer allows you to train bigger network or more batch size, sacrificing training accuracy.") + else: + self.options['simple_optimizer'] = self.options.get('simple_optimizer', False) + + if is_first_run: + self.options['archi'] = io.input_str ("AE architecture (df, liae, vg ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae','vg'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'vg' - currently testing.").lower() + else: self.options['archi'] = self.options.get('archi', default_archi) default_ae_dims = 256 if self.options['archi'] == 'liae' else 512 @@ -260,8 +269,12 @@ class SAEModel(ModelBase): psd_target_dst_anti_masked_ar = [ pred_src_dst_sigm_ar[i]*target_dstm_anti_sigm_ar[i] for i in range(len(pred_src_dst_sigm_ar))] if self.is_training_mode: - self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) - self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + if self.options['simple_optimizer']: + self.src_dst_opt = DFLOptimizer(lr=5e-5) + self.src_dst_mask_opt = DFLOptimizer(lr=5e-5) + else: + self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) + self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999) if self.options['archi'] == 'liae': src_dst_loss_train_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py index 0debf31..e3b24cb 100644 --- a/nnlib/nnlib.py +++ b/nnlib/nnlib.py @@ -72,7 +72,7 @@ RandomNormal = keras.initializers.RandomNormal Model = keras.models.Model Adam = keras.optimizers.Adam -FastAdam = nnlib.FastAdam +DFLOptimizer = nnlib.DFLOptimizer modelify = nnlib.modelify gaussian_blur = nnlib.gaussian_blur @@ -434,16 +434,14 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator return dict(list(base_config.items()) + list(config.items())) nnlib.Scale = Scale - class FastAdam(keras.optimizers.Optimizer): - def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, iterations=0, **kwargs): - super(FastAdam, self).__init__(**kwargs) + class DFLOptimizer(keras.optimizers.Optimizer): + def __init__(self, lr=0.001, **kwargs): + super(DFLOptimizer, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(iterations, dtype='int64', name='iterations') + self.iterations = K.variable(0, dtype='int64', name='iterations') self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - - self.epsilon = K.epsilon() + self.beta_1 = K.variable(0.9, name='beta_1') + self.beta_2 = K.variable(0.998, name='beta_2') @keras.legacy.interfaces.legacy_get_updates_support def get_updates(self, loss, params): @@ -451,16 +449,16 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator self.updates = [K.update_add(self.iterations, 1)] lr = self.lr - t = K.cast(self.iterations, K.floatx()) + 1 + t = ( K.cast(self.iterations, K.floatx()) ) % 1000 + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) - self.weights = [self.iterations] + self.weights = [] for p, g in zip(params, grads): m_t = (1. - self.beta_1) * g v_t = (1. - self.beta_2) * K.square(g) - p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) + p_t = p - lr_t * m_t / (K.sqrt(v_t) + K.epsilon() ) new_p = p_t # Apply constraints. @@ -471,15 +469,14 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator return self.updates def get_config(self): - config = {'iterations': int(K.get_value(self.iterations)), - 'lr': float(K.get_value(self.lr)), + config = {'lr': float(K.get_value(self.lr)), 'beta_1': float(K.get_value(self.beta_1)), 'beta_2': float(K.get_value(self.beta_2)) } - base_config = super(FastAdam, self).get_config() + base_config = super(DFLOptimizer, self).get_config() return dict(list(base_config.items()) + list(config.items())) - nnlib.FastAdam = FastAdam + nnlib.DFLOptimizer = DFLOptimizer ''' not implemented in plaidML