SAEHD: added 'dfuhd' and 'liaeuhd' archi

2025-08-20 05:23:22 -07:00 · 2020-03-23 22:01:44 +04:00 · 2020-03-23 22:01:44 +04:00 · eddebedcf6
commit eddebedcf6
parent e5f736680d
5 changed files with 190 additions and 155 deletions
--- a/core/leras/archis/DeepFakeArchi.py
+++ b/core/leras/archis/DeepFakeArchi.py
@ -6,7 +6,7 @@ class DeepFakeArchi(nn.ArchiBase):
    resolution
    
    mod     None - default
-            'chervonij'
+            'uhd'
            'quick'
    """
    def __init__(self, resolution, mod=None):        
@ -197,158 +197,7 @@ class DeepFakeArchi(nn.ArchiBase):

                    return tf.nn.sigmoid(self.out_conv(x)), \
                           tf.nn.sigmoid(self.out_convm(m))
-        
-        elif mod == 'chervonij':
-            class Downscale(nn.ModelBase):
-                def __init__(self, in_ch, kernel_size=3, dilations=1, *kwargs ):
-                    self.in_ch = in_ch
-                    self.kernel_size = kernel_size
-                    self.dilations = dilations
-                    super().__init__(*kwargs)

-                def on_build(self, *args, **kwargs ):
-                    self.conv_base1 = nn.Conv2D( self.in_ch, self.in_ch//2, kernel_size=1, strides=1, padding='SAME', dilations=self.dilations)
-                    self.conv_l1 = nn.Conv2D( self.in_ch//2, self.in_ch//2, kernel_size=self.kernel_size, strides=1, padding='SAME', dilations=self.dilations)
-                    self.conv_l2 = nn.Conv2D( self.in_ch//2, self.in_ch//2, kernel_size=self.kernel_size, strides=2, padding='SAME', dilations=self.dilations)
-
-                    self.conv_base2 = nn.Conv2D( self.in_ch, self.in_ch//2, kernel_size=1, strides=1, padding='SAME', dilations=self.dilations)
-                    self.conv_r1 = nn.Conv2D( self.in_ch//2, self.in_ch//2, kernel_size=self.kernel_size, strides=2, padding='SAME', dilations=self.dilations)
-
-                    self.pool_size = [1,1,2,2] if nn.data_format == 'NCHW' else [1,2,2,1]
-                def forward(self, x):
-
-                    x_l = self.conv_base1(x)
-                    x_l = self.conv_l1(x_l)
-                    x_l = self.conv_l2(x_l)
-
-                    x_r = self.conv_base2(x)
-                    x_r = self.conv_r1(x_r)
-
-                    x_pool = tf.nn.max_pool(x, ksize=self.pool_size, strides=self.pool_size, padding='SAME', data_format=nn.data_format)
-
-                    x = tf.concat([x_l, x_r, x_pool], axis=nn.conv2d_ch_axis)
-                    x = tf.nn.leaky_relu(x, 0.1)
-                    return x
-
-            class Upscale(nn.ModelBase):
-                def on_build(self, in_ch, out_ch, kernel_size=3 ):
-                    self.conv1 = nn.Conv2D( in_ch, out_ch, kernel_size=kernel_size, padding='SAME')
-                    self.conv2 = nn.Conv2D( out_ch, out_ch, kernel_size=kernel_size, padding='SAME')
-                    self.conv3 = nn.Conv2D( out_ch, out_ch, kernel_size=kernel_size, padding='SAME')
-                    self.conv4 = nn.Conv2D( out_ch, out_ch, kernel_size=kernel_size, padding='SAME')
-
-                def forward(self, x):
-                    x0 = self.conv1(x)
-                    x1 = self.conv2(x0)
-                    x2 = self.conv3(x1)
-                    x3 = self.conv4(x2)
-                    x = tf.concat([x0, x1, x2, x3], axis=nn.conv2d_ch_axis)
-                    x = tf.nn.leaky_relu(x, 0.1)
-                    x = nn.depth_to_space(x, 2)
-                    return x
-
-            class ResidualBlock(nn.ModelBase):
-                def on_build(self, ch, kernel_size=3 ):
-                    self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
-                    self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
-                    self.norm = nn.FRNorm2D(ch)
-
-                def forward(self, inp):
-                    x = self.conv1(inp)
-                    x = tf.nn.leaky_relu(x, 0.2)
-                    x = self.conv2(x)
-                    x = self.norm(inp + x)
-                    x = tf.nn.leaky_relu(x, 0.2)
-                    return x
-
-            class Encoder(nn.ModelBase):
-                def on_build(self, in_ch, e_ch, **kwargs):
-                    self.conv0 = nn.Conv2D(in_ch, e_ch, kernel_size=3, padding='SAME')
-
-                    self.down0 = Downscale(e_ch)
-                    self.down1 = Downscale(e_ch*2)
-                    self.down2 = Downscale(e_ch*4)
-                    self.down3 = Downscale(e_ch*8)
-                    self.down4 = Downscale(e_ch*16)
-
-                def forward(self, inp):
-                    x = self.conv0(inp)
-                    x = self.down0(x)
-                    x = self.down1(x)
-                    x = self.down2(x)
-                    x = self.down3(x)
-                    x = self.down4(x)
-                    x = nn.flatten(x)
-                    return x
-            
-            lowest_dense_res = resolution // 32
-            
-            class Inter(nn.ModelBase):
-                def __init__(self, in_ch, ae_ch, ae_out_ch, **kwargs):
-                    self.in_ch, self.ae_ch, self.ae_out_ch = in_ch, ae_ch, ae_out_ch
-                    super().__init__(**kwargs)
-
-                def on_build(self, **kwargs):
-                    in_ch, ae_ch, ae_out_ch = self.in_ch, self.ae_ch, self.ae_out_ch
-
-                    self.dense_l = nn.Dense( in_ch, ae_ch//2, kernel_initializer=tf.initializers.orthogonal)
-                    self.dense_r = nn.Dense( in_ch, ae_ch//2, kernel_initializer=tf.initializers.orthogonal)#maxout_ch=4, 
-                    self.dense = nn.Dense( ae_ch, lowest_dense_res * lowest_dense_res * (ae_out_ch//2), kernel_initializer=tf.initializers.orthogonal)
-                    self.upscale1 = Upscale(ae_out_ch//2, ae_out_ch//2)
-
-                def forward(self, inp):
-                    x0 = self.dense_l(inp)
-                    x1 = self.dense_r(inp)
-                    x = tf.concat([x0, x1], axis=-1)
-                    x = self.dense(x)
-                    x = nn.reshape_4D (x, lowest_dense_res, lowest_dense_res, self.ae_out_ch//2)
-                    x = self.upscale1(x)
-
-                    return x
-
-                def get_out_ch(self):
-                    return self.ae_out_ch//2
-
-            class Decoder(nn.ModelBase):
-                def on_build(self, in_ch, d_ch, d_mask_ch, **kwargs):
-
-                    self.upscale0 = Upscale(in_ch, d_ch*8)
-                    self.upscale1 = Upscale(d_ch*8, d_ch*4)
-                    self.upscale2 = Upscale(d_ch*4, d_ch*2)
-                    self.upscale3 = Upscale(d_ch*2, d_ch)
-
-                    self.res0 = ResidualBlock(d_ch*8)
-                    self.res1 = ResidualBlock(d_ch*4)
-                    self.res2 = ResidualBlock(d_ch*2)
-                    self.res3 = ResidualBlock(d_ch)
-
-                    self.out_conv  = nn.Conv2D( d_ch, 3, kernel_size=1, padding='SAME')
-
-                    self.upscalem0 = Upscale(in_ch, d_mask_ch*8, kernel_size=3)
-                    self.upscalem1 = Upscale(d_mask_ch*8, d_mask_ch*4, kernel_size=3)
-                    self.upscalem2 = Upscale(d_mask_ch*4, d_mask_ch*2, kernel_size=3)
-                    self.upscalem3 = Upscale(d_mask_ch*2, d_mask_ch, kernel_size=3)
-                    self.out_convm = nn.Conv2D( d_mask_ch, 1, kernel_size=1, padding='SAME')
-
-                def forward(self, inp):
-                    z = inp
-
-                    x = self.upscale0(z)
-                    x = self.res0(x)
-                    x = self.upscale1(x)
-                    x = self.res1(x)
-                    x = self.upscale2(x)
-                    x = self.res2(x)
-                    x = self.upscale3(x)
-                    x = self.res3(x)
-
-                    m = self.upscalem0(z)
-                    m = self.upscalem1(m)
-                    m = self.upscalem2(m)
-                    m = self.upscalem3(m)
-
-                    return tf.nn.sigmoid(self.out_conv(x)), \
-                            tf.nn.sigmoid(self.out_convm(m))
        elif mod == 'quick':
            class Downscale(nn.ModelBase):
                def __init__(self, in_ch, out_ch, kernel_size=5, dilations=1, subpixel=True, use_activator=True, *kwargs ):
@ -482,7 +331,144 @@ class DeepFakeArchi(nn.ArchiBase):

                    return tf.nn.sigmoid(self.out_conv(x)), \
                           tf.nn.sigmoid(self.out_convm(y))
+        elif mod == 'uhd':
+            
+            class Downscale(nn.ModelBase):
+                def __init__(self, in_ch, out_ch, kernel_size=5, dilations=1, subpixel=True, use_activator=True, *kwargs ):
+                    self.in_ch = in_ch
+                    self.out_ch = out_ch
+                    self.kernel_size = kernel_size
+                    self.dilations = dilations
+                    self.subpixel = subpixel
+                    self.use_activator = use_activator
+                    super().__init__(*kwargs)

+                def on_build(self, *args, **kwargs ):
+                    self.conv1 = nn.Conv2D( self.in_ch,
+                                            self.out_ch // (4 if self.subpixel else 1),
+                                            kernel_size=self.kernel_size,
+                                            strides=1 if self.subpixel else 2,
+                                            padding='SAME', dilations=self.dilations)
+
+                def forward(self, x):
+                    x = self.conv1(x)
+                    if self.subpixel:
+                        x = nn.space_to_depth(x, 2)
+                    if self.use_activator:
+                        x = tf.nn.leaky_relu(x, 0.1)
+                    return x
+
+                def get_out_ch(self):
+                    return (self.out_ch // 4) * 4
+
+            class DownscaleBlock(nn.ModelBase):
+                def on_build(self, in_ch, ch, n_downscales, kernel_size, dilations=1, subpixel=True):
+                    self.downs = []
+
+                    last_ch = in_ch
+                    for i in range(n_downscales):
+                        cur_ch = ch*( min(2**i, 8)  )
+                        self.downs.append ( Downscale(last_ch, cur_ch, kernel_size=kernel_size, dilations=dilations, subpixel=subpixel) )
+                        last_ch = self.downs[-1].get_out_ch()
+
+                def forward(self, inp):
+                    x = inp
+                    for down in self.downs:
+                        x = down(x)
+                    return x
+
+            class Upscale(nn.ModelBase):
+                def on_build(self, in_ch, out_ch, kernel_size=3 ):
+                    self.conv1 = nn.Conv2D( in_ch, out_ch*4, kernel_size=kernel_size, padding='SAME')
+
+                def forward(self, x):
+                    x = self.conv1(x)
+                    x = tf.nn.leaky_relu(x, 0.1)
+                    x = nn.depth_to_space(x, 2)
+                    return x
+
+            class ResidualBlock(nn.ModelBase):
+                def on_build(self, ch, kernel_size=3 ):
+                    self.conv1 = nn.Conv2D( ch, ch*2, kernel_size=kernel_size, padding='SAME')
+                    self.conv2 = nn.Conv2D( ch*2, ch, kernel_size=kernel_size, padding='SAME')
+                    self.scale_add = nn.ScaleAdd(ch)
+
+                def forward(self, inp):
+                    x = self.conv1(inp)
+                    x = tf.nn.leaky_relu(x, 0.2)
+                    x = self.conv2(x)
+                    x = tf.nn.leaky_relu(x, 0.2)
+                    x = self.scale_add([inp, x])
+                    return x
+
+            class Encoder(nn.ModelBase):
+                def on_build(self, in_ch, e_ch, **kwargs):
+                    self.down1 = DownscaleBlock(in_ch, e_ch, n_downscales=4, kernel_size=5, dilations=1, subpixel=False)
+
+                def forward(self, inp):
+                    x = nn.flatten(self.down1(inp))
+                    return x
+
+            lowest_dense_res = resolution // 16
+
+            class Inter(nn.ModelBase):
+                def on_build(self, in_ch, ae_ch, ae_out_ch, **kwargs):
+                    self.ae_out_ch = ae_out_ch
+                    self.dense_norm = nn.DenseNorm()
+                    self.dense1 = nn.Dense( in_ch, ae_ch )
+                    self.dense2 = nn.Dense( ae_ch, lowest_dense_res * lowest_dense_res * ae_out_ch )
+                    self.upscale1 = Upscale(ae_out_ch, ae_out_ch)                   
+
+                def forward(self, inp):
+                    x = self.dense_norm(inp)
+                    x = self.dense1(x)
+                    x = self.dense2(x)
+                    x = nn.reshape_4D (x, lowest_dense_res, lowest_dense_res, self.ae_out_ch)
+                    x = self.upscale1(x)
+                    return x
+
+                @staticmethod
+                def get_code_res():
+                    return lowest_dense_res
+
+                def get_out_ch(self):
+                    return self.ae_out_ch
+
+            class Decoder(nn.ModelBase):
+                def on_build(self, in_ch, d_ch, d_mask_ch, **kwargs ):
+
+                    self.upscale0 = Upscale(in_ch, d_ch*8, kernel_size=3)
+                    self.upscale1 = Upscale(d_ch*8, d_ch*4, kernel_size=3)
+                    self.upscale2 = Upscale(d_ch*4, d_ch*2, kernel_size=3)
+
+                    self.res0 = ResidualBlock(d_ch*8, kernel_size=3)
+                    self.res1 = ResidualBlock(d_ch*4, kernel_size=3)
+                    self.res2 = ResidualBlock(d_ch*2, kernel_size=3)
+
+                    self.out_conv  = nn.Conv2D( d_ch*2, 3, kernel_size=1, padding='SAME')
+
+                    self.upscalem0 = Upscale(in_ch, d_mask_ch*8, kernel_size=3)
+                    self.upscalem1 = Upscale(d_mask_ch*8, d_mask_ch*4, kernel_size=3)
+                    self.upscalem2 = Upscale(d_mask_ch*4, d_mask_ch*2, kernel_size=3)
+                    self.out_convm = nn.Conv2D( d_mask_ch*2, 1, kernel_size=1, padding='SAME')
+
+                def forward(self, inp):
+                    z = inp
+
+                    x = self.upscale0(z)
+                    x = self.res0(x)
+                    x = self.upscale1(x)
+                    x = self.res1(x)
+                    x = self.upscale2(x)
+                    x = self.res2(x)
+
+                    m = self.upscalem0(z)
+                    m = self.upscalem1(m)
+                    m = self.upscalem2(m)
+
+                    return tf.nn.sigmoid(self.out_conv(x)), \
+                           tf.nn.sigmoid(self.out_convm(m))
+                           
        self.Encoder = Encoder
        self.Inter = Inter
        self.Decoder = Decoder
--- a/core/leras/layers/DenseNorm.py
+++ b/core/leras/layers/DenseNorm.py
@ -0,0 +1,16 @@
+from core.leras import nn
+tf = nn.tf
+
+class DenseNorm(nn.LayerBase):
+    def __init__(self, dense=False, eps=1e-06, dtype=None, **kwargs):
+        self.dense = dense        
+        if dtype is None:
+            dtype = nn.floatx
+        self.eps = tf.constant(eps, dtype=dtype, name="epsilon")
+
+        super().__init__(**kwargs)
+
+    def __call__(self, x):
+        return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=-1, keepdims=True) + self.eps)
+        
+nn.DenseNorm = DenseNorm
--- a/core/leras/layers/ScaleAdd.py
+++ b/core/leras/layers/ScaleAdd.py
@ -0,0 +1,31 @@
+from core.leras import nn
+tf = nn.tf
+
+class ScaleAdd(nn.LayerBase):
+    def __init__(self, ch, dtype=None, **kwargs):
+        if dtype is None:
+            dtype = nn.floatx
+        self.dtype = dtype
+        self.ch = ch
+
+        super().__init__(**kwargs)
+
+    def build_weights(self):
+        self.weight = tf.get_variable("weight",(self.ch,), dtype=self.dtype, initializer=tf.initializers.zeros() )
+
+    def get_weights(self):
+        return [self.weight]
+
+    def forward(self, inputs):
+        if nn.data_format == "NHWC":
+            shape = (1,1,1,self.ch)
+        else:
+            shape = (1,self.ch,1,1)
+
+        weight = tf.reshape ( self.weight, shape )
+
+        x0, x1 = inputs
+        x = x0 + x1*weight
+
+        return x
+nn.ScaleAdd = ScaleAdd
--- a/core/leras/layers/init.py
+++ b/core/leras/layers/init.py
@ -9,4 +9,6 @@ from .BlurPool import *
 from .BatchNorm2D import *
 from .FRNorm2D import *

-from .TLU import *
+from .TLU import *
+from .ScaleAdd import *
+from .DenseNorm import *
--- a/models/Model_SAEHD/Model.py
+++ b/models/Model_SAEHD/Model.py
@ -61,7 +61,7 @@ class SAEHDModel(ModelBase):
            resolution = np.clip ( (resolution // 16) * 16, 64, 512)
            self.options['resolution'] = resolution
            self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf'], help_message="Half / mid face / full face / whole face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face. 'Whole face' covers full area of face include forehead, but requires manual merge in Adobe After Effects.").lower()
-            self.options['archi'] = io.input_str ("AE architecture", default_archi, ['df','liae','dfhd','liaehd'], help_message="'df' keeps faces more natural.\n'liae' can fix overly different face shapes.\n'hd' are experimental versions.").lower()
+            self.options['archi'] = io.input_str ("AE architecture", default_archi, ['df','liae','dfhd','liaehd','dfuhd','liaeuhd'], help_message="'df' keeps faces more natural.\n'liae' can fix overly different face shapes.\n'hd' are experimental versions.").lower()

        default_d_dims             = 48 if self.options['archi'] == 'dfhd' else 64
        default_d_dims             = self.options['d_dims']             = self.load_or_def_option('d_dims', default_d_dims)
@ -169,7 +169,7 @@ class SAEHDModel(ModelBase):
            self.target_dstm_all = tf.placeholder (nn.floatx, mask_shape)
            
        # Initializing model classes
-        model_archi = nn.DeepFakeArchi(resolution)  
+        model_archi = nn.DeepFakeArchi(resolution, mod='uhd' if 'uhd' in archi else None)  
        
        with tf.device (models_opt_device):
            if 'df' in archi: