AMP, SAEHD: added option use_fp16: Increases training/inference speed, reduces model size. Model may crash. Enable it after 1-5k iters.

AMP: removed lr_dropout, now it is enabled by default;
2025-07-05 20:42:11 -07:00 · 2021-07-14 23:03:54 +04:00 · 2021-07-14 23:03:54 +04:00 · 35877dbfd7
commit 35877dbfd7
parent ee1bc83a14
4 changed files with 102 additions and 98 deletions
--- a/core/leras/archis/DeepFakeArchi.py
+++ b/core/leras/archis/DeepFakeArchi.py
@ -8,12 +8,15 @@ class DeepFakeArchi(nn.ArchiBase):
    mod     None - default
            'quick'
    """
-    def __init__(self, resolution, mod=None, opts=None):
+    def __init__(self, resolution, use_fp16=False, mod=None, opts=None):
        super().__init__()

        if opts is None:
            opts = ''

+
+        conv_dtype = tf.float16 if use_fp16 else tf.float32
+        
        if mod is None:
            class Downscale(nn.ModelBase):
                def __init__(self, in_ch, out_ch, kernel_size=5, *kwargs ):
@ -23,7 +26,7 @@ class DeepFakeArchi(nn.ArchiBase):
                    super().__init__(*kwargs)

                def on_build(self, *args, **kwargs ):
-                    self.conv1 = nn.Conv2D( self.in_ch, self.out_ch, kernel_size=self.kernel_size, strides=2, padding='SAME')
+                    self.conv1 = nn.Conv2D( self.in_ch, self.out_ch, kernel_size=self.kernel_size, strides=2, padding='SAME', dtype=conv_dtype)

                def forward(self, x):
                    x = self.conv1(x)
@ -40,7 +43,7 @@ class DeepFakeArchi(nn.ArchiBase):
                    last_ch = in_ch
                    for i in range(n_downscales):
                        cur_ch = ch*( min(2**i, 8)  )
-                        self.downs.append ( Downscale(last_ch, cur_ch, kernel_size=kernel_size) )
+                        self.downs.append ( Downscale(last_ch, cur_ch, kernel_size=kernel_size))
                        last_ch = self.downs[-1].get_out_ch()

                def forward(self, inp):
@ -50,8 +53,8 @@ class DeepFakeArchi(nn.ArchiBase):
                    return x

            class Upscale(nn.ModelBase):
-                def on_build(self, in_ch, out_ch, kernel_size=3 ):
-                    self.conv1 = nn.Conv2D( in_ch, out_ch*4, kernel_size=kernel_size, padding='SAME')
+                def on_build(self, in_ch, out_ch, kernel_size=3):
+                    self.conv1 = nn.Conv2D( in_ch, out_ch*4, kernel_size=kernel_size, padding='SAME', dtype=conv_dtype)

                def forward(self, x):
                    x = self.conv1(x)
@ -60,9 +63,9 @@ class DeepFakeArchi(nn.ArchiBase):
                    return x

            class ResidualBlock(nn.ModelBase):
-                def on_build(self, ch, kernel_size=3 ):
-                    self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
-                    self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
+                def on_build(self, ch, kernel_size=3):
+                    self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', dtype=conv_dtype)
+                    self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', dtype=conv_dtype)

                def forward(self, inp):
                    x = self.conv1(inp)
@ -80,8 +83,13 @@ class DeepFakeArchi(nn.ArchiBase):
                def on_build(self):                    
                    self.down1 = DownscaleBlock(self.in_ch, self.e_ch, n_downscales=4, kernel_size=5)

-                def forward(self, inp):
-                    return nn.flatten(self.down1(inp))
+                def forward(self, x):
+                    if use_fp16:
+                        x = tf.cast(x, tf.float16)
+                    x = nn.flatten(self.down1(x))
+                    if use_fp16:
+                        x = tf.cast(x, tf.float32)
+                    return x
                    
                def get_out_res(self, res):
                    return res // (2**4)
@ -98,9 +106,10 @@ class DeepFakeArchi(nn.ArchiBase):

                def on_build(self):
                    in_ch, ae_ch, ae_out_ch = self.in_ch, self.ae_ch, self.ae_out_ch
+    
                    if 'u' in opts:
                        self.dense_norm = nn.DenseNorm()
-
+ 
                    self.dense1 = nn.Dense( in_ch, ae_ch )
                    self.dense2 = nn.Dense( ae_ch, lowest_dense_res * lowest_dense_res * ae_out_ch )
                    self.upscale1 = Upscale(ae_out_ch, ae_out_ch)
@ -112,6 +121,9 @@ class DeepFakeArchi(nn.ArchiBase):
                    x = self.dense1(x)
                    x = self.dense2(x)
                    x = nn.reshape_4D (x, lowest_dense_res, lowest_dense_res, self.ae_out_ch)
+                    
+                    if use_fp16:
+                        x = tf.cast(x, tf.float16)
                    x = self.upscale1(x)
                    return x

@ -122,7 +134,7 @@ class DeepFakeArchi(nn.ArchiBase):
                    return self.ae_out_ch

            class Decoder(nn.ModelBase):
-                def on_build(self, in_ch, d_ch, d_mask_ch ):
+                def on_build(self, in_ch, d_ch, d_mask_ch):                    
                    self.upscale0 = Upscale(in_ch, d_ch*8, kernel_size=3)
                    self.upscale1 = Upscale(d_ch*8, d_ch*4, kernel_size=3)
                    self.upscale2 = Upscale(d_ch*4, d_ch*2, kernel_size=3)
@ -131,25 +143,23 @@ class DeepFakeArchi(nn.ArchiBase):
                    self.res1 = ResidualBlock(d_ch*4, kernel_size=3)
                    self.res2 = ResidualBlock(d_ch*2, kernel_size=3)

-                    self.out_conv  = nn.Conv2D( d_ch*2, 3, kernel_size=1, padding='SAME')
+                    self.out_conv  = nn.Conv2D( d_ch*2, 3, kernel_size=1, padding='SAME', dtype=conv_dtype)

                    self.upscalem0 = Upscale(in_ch, d_mask_ch*8, kernel_size=3)
                    self.upscalem1 = Upscale(d_mask_ch*8, d_mask_ch*4, kernel_size=3)
                    self.upscalem2 = Upscale(d_mask_ch*4, d_mask_ch*2, kernel_size=3)
-                    self.out_convm = nn.Conv2D( d_mask_ch*2, 1, kernel_size=1, padding='SAME')
+                    self.out_convm = nn.Conv2D( d_mask_ch*2, 1, kernel_size=1, padding='SAME', dtype=conv_dtype)

                    if 'd' in opts:
-                        self.out_conv1  = nn.Conv2D( d_ch*2, 3, kernel_size=3, padding='SAME')
-                        self.out_conv2  = nn.Conv2D( d_ch*2, 3, kernel_size=3, padding='SAME')
-                        self.out_conv3  = nn.Conv2D( d_ch*2, 3, kernel_size=3, padding='SAME')
+                        self.out_conv1 = nn.Conv2D( d_ch*2, 3, kernel_size=3, padding='SAME', dtype=conv_dtype)
+                        self.out_conv2 = nn.Conv2D( d_ch*2, 3, kernel_size=3, padding='SAME', dtype=conv_dtype)
+                        self.out_conv3 = nn.Conv2D( d_ch*2, 3, kernel_size=3, padding='SAME', dtype=conv_dtype)
                        self.upscalem3 = Upscale(d_mask_ch*2, d_mask_ch*1, kernel_size=3)
-                        self.out_convm = nn.Conv2D( d_mask_ch*1, 1, kernel_size=1, padding='SAME')
+                        self.out_convm = nn.Conv2D( d_mask_ch*1, 1, kernel_size=1, padding='SAME', dtype=conv_dtype)
                    else:
-                        self.out_convm = nn.Conv2D( d_mask_ch*2, 1, kernel_size=1, padding='SAME')
-
-                def forward(self, inp):
-                    z = inp
+                        self.out_convm = nn.Conv2D( d_mask_ch*2, 1, kernel_size=1, padding='SAME', dtype=conv_dtype)

+                def forward(self, z):
                    x = self.upscale0(z)
                    x = self.res0(x)
                    x = self.upscale1(x)
@ -157,40 +167,11 @@ class DeepFakeArchi(nn.ArchiBase):
                    x = self.upscale2(x)
                    x = self.res2(x)

-
                    if 'd' in opts:
-                        x0 = tf.nn.sigmoid(self.out_conv(x))
-                        x0 = nn.upsample2d(x0)
-                        x1 = tf.nn.sigmoid(self.out_conv1(x))
-                        x1 = nn.upsample2d(x1)
-                        x2 = tf.nn.sigmoid(self.out_conv2(x))
-                        x2 = nn.upsample2d(x2)
-                        x3 = tf.nn.sigmoid(self.out_conv3(x))
-                        x3 = nn.upsample2d(x3)
-
-                        if nn.data_format == "NHWC":
-                            tile_cfg = ( 1, resolution // 2, resolution //2, 1)
-                        else:
-                            tile_cfg = ( 1, 1, resolution // 2, resolution //2 )
-
-                        z0 =  tf.concat ( ( tf.concat ( (  tf.ones ( (1,1,1,1) ), tf.zeros ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ),
-                                            tf.concat ( ( tf.zeros ( (1,1,1,1) ), tf.zeros ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ) ), axis=nn.conv2d_spatial_axes[0] )
-
-                        z0 = tf.tile ( z0, tile_cfg )
-
-                        z1 =  tf.concat ( ( tf.concat ( ( tf.zeros ( (1,1,1,1) ), tf.ones ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ),
-                                            tf.concat ( ( tf.zeros ( (1,1,1,1) ), tf.zeros ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ) ), axis=nn.conv2d_spatial_axes[0] )
-                        z1 = tf.tile ( z1, tile_cfg )
-
-                        z2 =  tf.concat ( ( tf.concat ( (  tf.zeros ( (1,1,1,1) ), tf.zeros ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ),
-                                            tf.concat ( (  tf.ones ( (1,1,1,1) ), tf.zeros ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ) ), axis=nn.conv2d_spatial_axes[0] )
-                        z2 = tf.tile ( z2, tile_cfg )
-
-                        z3 =  tf.concat ( ( tf.concat ( (  tf.zeros ( (1,1,1,1) ), tf.zeros ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ),
-                                            tf.concat ( (  tf.zeros ( (1,1,1,1) ), tf.ones ( (1,1,1,1) ) ), axis=nn.conv2d_spatial_axes[1] ) ), axis=nn.conv2d_spatial_axes[0] )
-                        z3 = tf.tile ( z3, tile_cfg )
-
-                        x = x0*z0 + x1*z1 + x2*z2 + x3*z3
+                        x = tf.nn.sigmoid( nn.depth_to_space(tf.concat( (self.out_conv(x),
+                                                                         self.out_conv1(x),
+                                                                         self.out_conv2(x),
+                                                                         self.out_conv3(x)), nn.conv2d_ch_axis), 2) )
                    else:
                        x = tf.nn.sigmoid(self.out_conv(x))

@ -201,7 +182,11 @@ class DeepFakeArchi(nn.ArchiBase):
                    if 'd' in opts:
                        m = self.upscalem3(m)
                    m = tf.nn.sigmoid(self.out_convm(m))
-
+                    
+                    if use_fp16:
+                        x = tf.cast(x, tf.float32)  
+                        m = tf.cast(m, tf.float32)
+                        
                    return x, m
        
        self.Encoder = Encoder
--- a/core/leras/models/PatchDiscriminator.py
+++ b/core/leras/models/PatchDiscriminator.py
@ -130,12 +130,14 @@ class UNetPatchDiscriminator(nn.ModelBase):
        q=x[np.abs(np.array(x)-target_patch_size).argmin()]
        return s[q][2]

-    def on_build(self, patch_size, in_ch, base_ch = 16):
-    
+    def on_build(self, patch_size, in_ch, base_ch = 16, use_fp16 = False):
+        self.use_fp16 = use_fp16
+        conv_dtype = tf.float16 if use_fp16 else tf.float32 
+        
        class ResidualBlock(nn.ModelBase):
            def on_build(self, ch, kernel_size=3 ):
-                self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
-                self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
+                self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', dtype=conv_dtype)
+                self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', dtype=conv_dtype)

            def forward(self, inp):
                x = self.conv1(inp)
@ -151,20 +153,23 @@ class UNetPatchDiscriminator(nn.ModelBase):
        
        level_chs = { i-1:v for i,v in enumerate([ min( base_ch * (2**i), 512 ) for i in range(len(layers)+1)]) }

-        self.in_conv = nn.Conv2D( in_ch, level_chs[-1], kernel_size=1, padding='VALID')
+        self.in_conv = nn.Conv2D( in_ch, level_chs[-1], kernel_size=1, padding='VALID', dtype=conv_dtype)

        for i, (kernel_size, strides) in enumerate(layers):
-            self.convs.append ( nn.Conv2D( level_chs[i-1], level_chs[i], kernel_size=kernel_size, strides=strides, padding='SAME') )
+            self.convs.append ( nn.Conv2D( level_chs[i-1], level_chs[i], kernel_size=kernel_size, strides=strides, padding='SAME', dtype=conv_dtype) )

-            self.upconvs.insert (0, nn.Conv2DTranspose( level_chs[i]*(2 if i != len(layers)-1 else 1), level_chs[i-1], kernel_size=kernel_size, strides=strides, padding='SAME') )
+            self.upconvs.insert (0, nn.Conv2DTranspose( level_chs[i]*(2 if i != len(layers)-1 else 1), level_chs[i-1], kernel_size=kernel_size, strides=strides, padding='SAME', dtype=conv_dtype) )

-        self.out_conv = nn.Conv2D( level_chs[-1]*2, 1, kernel_size=1, padding='VALID')
+        self.out_conv = nn.Conv2D( level_chs[-1]*2, 1, kernel_size=1, padding='VALID', dtype=conv_dtype)

-        self.center_out  =  nn.Conv2D( level_chs[len(layers)-1], 1, kernel_size=1, padding='VALID')
-        self.center_conv =  nn.Conv2D( level_chs[len(layers)-1], level_chs[len(layers)-1], kernel_size=1, padding='VALID')
+        self.center_out  =  nn.Conv2D( level_chs[len(layers)-1], 1, kernel_size=1, padding='VALID', dtype=conv_dtype)
+        self.center_conv =  nn.Conv2D( level_chs[len(layers)-1], level_chs[len(layers)-1], kernel_size=1, padding='VALID', dtype=conv_dtype)


    def forward(self, x):
+        if self.use_fp16:
+            x = tf.cast(x, tf.float16)
+            
        x = tf.nn.leaky_relu( self.in_conv(x), 0.2 )

        encs = []
@ -178,6 +183,12 @@ class UNetPatchDiscriminator(nn.ModelBase):
            x = tf.nn.leaky_relu( upconv(x), 0.2 )
            x = tf.concat( [enc, x], axis=nn.conv2d_ch_axis)

-        return center_out, self.out_conv(x)
+        x = self.out_conv(x)
+        
+        if self.use_fp16:
+            center_out = tf.cast(center_out, tf.float32)
+            x = tf.cast(x, tf.float32)
+
+        return center_out, x

 nn.UNetPatchDiscriminator = UNetPatchDiscriminator