add GeLU

2025-07-07 13:32:09 -07:00 · 2019-12-27 21:25:59 +04:00 · 2019-12-27 21:25:59 +04:00 · 3849172a43
commit 3849172a43
parent 0833a38bb9
1 changed files with 32 additions and 0 deletions
--- a/nnlib/nnlib.py
+++ b/nnlib/nnlib.py
@ -70,6 +70,7 @@ PixelNormalization = nnlib.PixelNormalization
 Activation = KL.Activation
 LeakyReLU = KL.LeakyReLU
 ELU = KL.ELU
+GeLU = nnlib.GeLU
 ReLU = KL.ReLU
 PReLU = KL.PReLU
 tanh = KL.Activation('tanh')
@ -1300,6 +1301,37 @@ NLayerDiscriminator = nnlib.NLayerDiscriminator
                return dict(list(base_config.items()) + list(config.items()))
        nnlib.DenseMaxout = DenseMaxout
        
+        class GeLU(KL.Layer):
+            """Gaussian Error Linear Unit.
+            A smoother version of ReLU generally used
+            in the BERT or BERT architecture based models.
+            Original paper: https://arxiv.org/abs/1606.08415
+            Input shape:
+                Arbitrary. Use the keyword argument `input_shape`
+                (tuple of integers, does not include the samples axis)
+                when using this layer as the first layer in a model.
+            Output shape:
+                Same shape as the input.
+            """
+
+            def __init__(self, approximate=True, **kwargs):
+                super(GeLU, self).__init__(**kwargs)
+                self.approximate = approximate
+                self.supports_masking = True
+
+            def call(self, inputs):
+                cdf = 0.5 * (1.0 + K.tanh((np.sqrt(2 / np.pi) * (inputs + 0.044715 * K.pow(inputs, 3)))))
+                return inputs * cdf
+
+            def get_config(self):
+                config = {'approximate': self.approximate}
+                base_config = super(GeLU, self).get_config()
+                return dict(list(base_config.items()) + list(config.items()))
+
+            def compute_output_shape(self, input_shape):
+                return input_shape
+        nnlib.GeLU = GeLU
+
        def CAInitializerMP( conv_weights_list ):
            #Convolution Aware Initialization https://arxiv.org/abs/1702.06295
            data = [ (i, K.int_shape(conv_weights)) for i, conv_weights in enumerate(conv_weights_list) ]