AMP, SAEHD: added option use_fp16: Increases training/inference speed, reduces model size. Model may crash. Enable it after 1-5k iters.

AMP: removed lr_dropout, now it is enabled by default;
2025-07-06 04:52:13 -07:00 · 2021-07-14 23:03:54 +04:00 · 2021-07-14 23:03:54 +04:00 · 35877dbfd7
commit 35877dbfd7
parent ee1bc83a14
4 changed files with 102 additions and 98 deletions
--- a/models/Model_SAEHD/Model.py
+++ b/models/Model_SAEHD/Model.py
@ -29,7 +29,8 @@ class SAEHDModel(ModelBase):
        yn_str = {True:'y',False:'n'}
        min_res = 64
        max_res = 640
-
+    
+        default_usefp16            = self.options['use_fp16']           = self.load_or_def_option('use_fp16', False)
        default_resolution         = self.options['resolution']         = self.load_or_def_option('resolution', 128)
        default_face_type          = self.options['face_type']          = self.load_or_def_option('face_type', 'f')
        default_models_opt_on_gpu  = self.options['models_opt_on_gpu']  = self.load_or_def_option('models_opt_on_gpu', True)
@ -68,7 +69,8 @@ class SAEHDModel(ModelBase):
            self.ask_random_src_flip()
            self.ask_random_dst_flip()
            self.ask_batch_size(suggest_batch_size)
-
+            self.options['use_fp16'] = io.input_bool ("Use fp16", default_usefp16, help_message='Increases training/inference speed, reduces model size. Model may crash. Enable it after 1-5k iters.')
+            
        if self.is_first_run():
            resolution = io.input_int("Resolution", default_resolution, add_info="64-640", help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16 and 32 for -d archi.")
            resolution = np.clip ( (resolution // 16) * 16, min_res, max_res)
@ -260,7 +262,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...
            self.target_dstm_em = tf.placeholder (nn.floatx, mask_shape, name='target_dstm_em')

        # Initializing model classes
-        model_archi = nn.DeepFakeArchi(resolution, opts=archi_opts)
+        model_archi = nn.DeepFakeArchi(resolution, use_fp16=self.options['use_fp16'], opts=archi_opts)

        with tf.device (models_opt_device):
            if 'df' in archi_type:
@ -301,7 +303,7 @@ Examples: df, liae, df-d, df-ud, liae-ud, ...

            if self.is_training:
                if gan_power != 0:
-                    self.D_src = nn.UNetPatchDiscriminator(patch_size=self.options['gan_patch_size'], in_ch=input_ch, base_ch=self.options['gan_dims'], name="D_src")
+                    self.D_src = nn.UNetPatchDiscriminator(patch_size=self.options['gan_patch_size'], in_ch=input_ch, base_ch=self.options['gan_dims'], use_fp16=self.options['use_fp16'], name="D_src")
                    self.model_filename_list += [ [self.D_src, 'GAN.npy'] ]

                # Initialize optimizers