Upgraded to TF version 1.13.2

Removed the wait at first launch for most graphics cards.

Increased speed of training by 10-20%, but you have to retrain all models from scratch.

SAEHD:

added option 'use float16'
	Experimental option. Reduces the model size by half.
	Increases the speed of training.
	Decreases the accuracy of the model.
	The model may collapse or not train.
	Model may not learn the mask in large resolutions.

true_face_training option is replaced by
"True face power". 0.0000 .. 1.0
Experimental option. Discriminates the result face to be more like the src face. Higher value - stronger discrimination.
Comparison - https://i.imgur.com/czScS9q.png
This commit is contained in:
Colombo 2020-01-25 21:58:19 +04:00
commit 76ca79216e
49 changed files with 1320 additions and 1297 deletions

View file

@ -1,7 +1,7 @@
import sys
import ctypes
import os
class Device(object):
def __init__(self, index, name, total_mem, free_mem, cc=0):
self.index = index
@ -11,25 +11,25 @@ class Device(object):
self.total_mem_gb = total_mem / 1024**3
self.free_mem = free_mem
self.free_mem_gb = free_mem / 1024**3
def __str__(self):
return f"[{self.index}]:[{self.name}][{self.free_mem_gb:.3}/{self.total_mem_gb :.3}]"
class Devices(object):
all_devices = None
def __init__(self, devices):
self.devices = devices
def __len__(self):
return len(self.devices)
def __getitem__(self, key):
result = self.devices[key]
if isinstance(key, slice):
return Devices(result)
return result
def __iter__(self):
for device in self.devices:
yield device
@ -59,14 +59,14 @@ class Devices(object):
if device.index == idx:
return device
return None
def get_devices_from_index_list(self, idx_list):
result = []
for device in self.devices:
if device.index in idx_list:
result += [device]
return Devices(result)
def get_equal_devices(self, device):
device_name = device.name
result = []
@ -74,7 +74,7 @@ class Devices(object):
if device.name == device_name:
result.append (device)
return Devices(result)
def get_devices_at_least_mem(self, totalmemsize_gb):
result = []
for device in self.devices:
@ -84,7 +84,7 @@ class Devices(object):
@staticmethod
def initialize_main_env():
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
for libname in libnames:
try:
@ -122,40 +122,40 @@ class Devices(object):
if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
cc = cc_major.value * 10 + cc_minor.value
if cc >= min_cc:
devices.append ( {'name' : name.split(b'\0', 1)[0].decode(),
devices.append ( {'name' : name.split(b'\0', 1)[0].decode(),
'total_mem' : totalMem.value,
'free_mem' : freeMem.value,
'cc' : cc
})
cuda.cuCtxDetach(context)
os.environ['NN_DEVICES_INITIALIZED'] = '1'
os.environ['NN_DEVICES_COUNT'] = str(len(devices))
for i, device in enumerate(devices):
os.environ['NN_DEVICES_COUNT'] = str(len(devices))
for i, device in enumerate(devices):
os.environ[f'NN_DEVICE_{i}_NAME'] = device['name']
os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
@staticmethod
def getDevices():
if Devices.all_devices is None:
def getDevices():
if Devices.all_devices is None:
if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
devices = []
for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
devices.append ( Device(index=i,
name=os.environ[f'NN_DEVICE_{i}_NAME'],
name=os.environ[f'NN_DEVICE_{i}_NAME'],
total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']),
cc=int(os.environ[f'NN_DEVICE_{i}_CC']) ))
Devices.all_devices = Devices(devices)
return Devices.all_devices
"""
if Devices.all_devices is None:
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
if Devices.all_devices is None:
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
for libname in libnames:
@ -195,7 +195,7 @@ if Devices.all_devices is None:
cc = cc_major.value * 10 + cc_minor.value
if cc >= min_cc:
devices.append ( Device(index=i,
name=name.split(b'\0', 1)[0].decode(),
name=name.split(b'\0', 1)[0].decode(),
total_mem=totalMem.value,
free_mem=freeMem.value,
cc=cc) )

View file

@ -11,17 +11,14 @@ def initialize_initializers(nn):
class initializers():
class ca (init_ops.Initializer):
def __init__(self, dtype=None):
pass
def __call__(self, shape, dtype=None, partition_info=None):
return tf.zeros( shape, name="_cai_")
return tf.zeros( shape, dtype=dtype, name="_cai_")
@staticmethod
def generate_batch( data_list, eps_std=0.05 ):
# list of (shape, np.dtype)
return CAInitializerSubprocessor (data_list).run()
nn.initializers = initializers
class CAInitializerSubprocessor(Subprocessor):
@ -62,7 +59,7 @@ class CAInitializerSubprocessor(Subprocessor):
x = x * np.sqrt( (2/fan_in) / np.var(x) )
x = np.transpose( x, (2, 3, 1, 0) )
return x.astype(dtype)
class Cli(Subprocessor.Cli):
#override
def process_data(self, data):

View file

@ -8,7 +8,7 @@ import numpy as np
def initialize_layers(nn):
tf = nn.tf
class Saveable():
def __init__(self, name=None):
self.name = name
@ -65,6 +65,8 @@ def initialize_layers(nn):
sub_w_name = "/".join(w_name_split[1:])
w_val = d.get(sub_w_name, None)
w_val = np.reshape( w_val, w.shape.as_list() )
if w_val is None:
io.log_err(f"Weight {w.name} was not loaded from file {filename}")
tuples.append ( (w, w.initializer) )
@ -77,8 +79,8 @@ def initialize_layers(nn):
def init_weights(self):
ops = []
ca_tuples_w = []
ca_tuples_w = []
ca_tuples = []
for w in self.get_weights():
initializer = w.initializer
@ -92,12 +94,12 @@ def initialize_layers(nn):
if len(ops) != 0:
nn.tf_sess.run (ops)
if len(ca_tuples) != 0:
nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] )
nn.Saveable = Saveable
class LayerBase():
def __init__(self, name=None, **kwargs):
self.name = name
@ -124,7 +126,7 @@ def initialize_layers(nn):
nn.tf_batch_set_value (tuples)
nn.LayerBase = LayerBase
class ModelBase(Saveable):
def __init__(self, *args, name=None, **kwargs):
super().__init__(name=name)
@ -157,33 +159,33 @@ def initialize_layers(nn):
def build(self):
with tf.variable_scope(self.name):
current_vars = []
generator = None
while True:
if generator is None:
generator = self.on_build(*self.args, **self.kwargs)
if not isinstance(generator, types.GeneratorType):
generator = None
if generator is not None:
try:
next(generator)
except StopIteration:
generator = None
v = vars(self)
v = vars(self)
new_vars = self.xor_list (current_vars, list(v.keys()) )
for name in new_vars:
self._build_sub(v[name],name)
current_vars += new_vars
if generator is None:
break
break
self.built = True
#override
@ -211,9 +213,9 @@ def initialize_layers(nn):
def on_build(self, *args, **kwargs):
"""
init model layers here
return 'yield' if build is not finished
therefore dependency models will be initialized
therefore dependency models will be initialized
"""
pass
@ -227,16 +229,16 @@ def initialize_layers(nn):
self.build()
return self.forward(*args, **kwargs)
def compute_output_shape(self, shapes):
if not self.built:
self.build()
not_list = False
if not isinstance(shapes, list):
not_list = True
shapes = [shapes]
with tf.device('/CPU:0'):
# CPU tensors will not impact any performance, only slightly RAM "leakage"
phs = []
@ -244,24 +246,33 @@ def initialize_layers(nn):
phs += [ tf.placeholder(dtype, sh) ]
result = self.__call__(phs[0] if not_list else phs)
if not isinstance(result, list):
result = [result]
result_shapes = []
for t in result:
result_shapes += [ t.shape.as_list() ]
result_shapes += [ t.shape.as_list() ]
return result_shapes[0] if not_list else result_shapes
def compute_output_channels(self, shapes):
shape = self.compute_output_shape(shapes)
shape_len = len(shape)
if shape_len == 4:
if nn.data_format == "NCHW":
return shape[1]
return shape[-1]
def build_for_run(self, shapes_list):
if not isinstance(shapes_list, list):
raise ValueError("shapes_list must be a list.")
self.run_placeholders = []
for dtype,sh in shapes_list:
self.run_placeholders.append ( tf.placeholder(dtype, (None,)+sh) )
self.run_placeholders.append ( tf.placeholder(dtype, sh) )
self.run_output = self.__call__(self.run_placeholders)
@ -279,7 +290,7 @@ def initialize_layers(nn):
return nn.tf_sess.run ( self.run_output, feed_dict=feed_dict)
nn.ModelBase = ModelBase
class Conv2D(LayerBase):
"""
use_wscale bool enables equalized learning rate, kernel_initializer will be forced to random_normal
@ -292,6 +303,9 @@ def initialize_layers(nn):
if not isinstance(dilations, int):
raise ValueError ("dilations must be an int type")
if dtype is None:
dtype = nn.tf_floatx
if isinstance(padding, str):
if padding == "SAME":
padding = ( (kernel_size - 1) * dilations + 1 ) // 2
@ -302,37 +316,48 @@ def initialize_layers(nn):
if isinstance(padding, int):
if padding != 0:
padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ]
if nn.data_format == "NHWC":
padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ]
else:
padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ]
else:
padding = None
if nn.data_format == "NHWC":
strides = [1,strides,strides,1]
else:
strides = [1,1,strides,strides]
if nn.data_format == "NHWC":
dilations = [1,dilations,dilations,1]
else:
dilations = [1,1,dilations,dilations]
self.in_ch = in_ch
self.out_ch = out_ch
self.kernel_size = kernel_size
self.strides = [1,strides,strides,1]
self.strides = strides
self.padding = padding
self.dilations = [1,dilations,dilations,1]
self.dilations = dilations
self.use_bias = use_bias
self.use_wscale = use_wscale
self.kernel_initializer = None if use_wscale else kernel_initializer
self.kernel_initializer = kernel_initializer
self.bias_initializer = bias_initializer
self.trainable = trainable
if dtype is None:
dtype = nn.tf_floatx
self.dtype = dtype
super().__init__(**kwargs)
def build_weights(self):
kernel_initializer = self.kernel_initializer
if self.use_wscale:
gain = 1.0 if self.kernel_size == 1 else np.sqrt(2)
fan_in = self.kernel_size*self.kernel_size*self.in_ch
he_std = gain / np.sqrt(fan_in) # He init
self.wscale = tf.constant(he_std, dtype=self.dtype )
kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
if kernel_initializer is None:
if self.use_wscale:
gain = 1.0 if self.kernel_size == 1 else np.sqrt(2)
fan_in = self.kernel_size*self.kernel_size*self.in_ch
he_std = gain / np.sqrt(fan_in) # He init
self.wscale = tf.constant(he_std, dtype=self.dtype )
kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
else:
kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.in_ch,self.out_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable )
@ -341,7 +366,7 @@ def initialize_layers(nn):
if bias_initializer is None:
bias_initializer = tf.initializers.zeros(dtype=self.dtype)
self.bias = tf.get_variable("bias", (1,1,1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
self.bias = tf.get_variable("bias", (self.out_ch,), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
def get_weights(self):
weights = [self.weight]
@ -357,9 +382,13 @@ def initialize_layers(nn):
if self.padding is not None:
x = tf.pad (x, self.padding, mode='CONSTANT')
x = tf.nn.conv2d(x, weight, self.strides, 'VALID', dilations=self.dilations)
x = tf.nn.conv2d(x, weight, self.strides, 'VALID', dilations=self.dilations, data_format=nn.data_format)
if self.use_bias:
x = x + self.bias
if nn.data_format == "NHWC":
bias = tf.reshape (self.bias, (1,1,1,self.out_ch) )
else:
bias = tf.reshape (self.bias, (1,self.out_ch,1,1) )
x = tf.add(x, bias)
return x
def __str__(self):
@ -367,7 +396,7 @@ def initialize_layers(nn):
return r
nn.Conv2D = Conv2D
class Conv2DTranspose(LayerBase):
"""
use_wscale enables weight scale (equalized learning rate)
@ -376,6 +405,10 @@ def initialize_layers(nn):
def __init__(self, in_ch, out_ch, kernel_size, strides=2, padding='SAME', use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ):
if not isinstance(strides, int):
raise ValueError ("strides must be an int type")
if dtype is None:
dtype = nn.tf_floatx
self.in_ch = in_ch
self.out_ch = out_ch
self.kernel_size = kernel_size
@ -383,33 +416,30 @@ def initialize_layers(nn):
self.padding = padding
self.use_bias = use_bias
self.use_wscale = use_wscale
self.kernel_initializer = None if use_wscale else kernel_initializer
self.kernel_initializer = kernel_initializer
self.bias_initializer = bias_initializer
self.trainable = trainable
if dtype is None:
dtype = nn.tf_floatx
self.dtype = dtype
super().__init__(**kwargs)
def build_weights(self):
kernel_initializer = self.kernel_initializer
if self.use_wscale:
gain = 1.0 if self.kernel_size == 1 else np.sqrt(2)
fan_in = self.kernel_size*self.kernel_size*self.in_ch
he_std = gain / np.sqrt(fan_in) # He init
self.wscale = tf.constant(he_std, dtype=self.dtype )
kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
if kernel_initializer is None:
if self.use_wscale:
gain = 1.0 if self.kernel_size == 1 else np.sqrt(2)
fan_in = self.kernel_size*self.kernel_size*self.in_ch
he_std = gain / np.sqrt(fan_in) # He init
self.wscale = tf.constant(he_std, dtype=self.dtype )
kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
else:
kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.out_ch,self.in_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable )
if self.use_bias:
bias_initializer = self.bias_initializer
if bias_initializer is None:
bias_initializer = tf.initializers.zeros(dtype=self.dtype)
self.bias = tf.get_variable("bias", (1,1,1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
self.bias = tf.get_variable("bias", (self.out_ch,), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
def get_weights(self):
weights = [self.weight]
@ -420,21 +450,34 @@ def initialize_layers(nn):
def __call__(self, x):
shape = x.shape
h,w,c = shape[1], shape[2], shape[3]
output_shape = tf.stack ( (tf.shape(x)[0],
self.deconv_length(w, self.strides, self.kernel_size, self.padding),
self.deconv_length(h, self.strides, self.kernel_size, self.padding),
self.out_ch) )
if nn.data_format == "NHWC":
h,w,c = shape[1], shape[2], shape[3]
output_shape = tf.stack ( (tf.shape(x)[0],
self.deconv_length(w, self.strides, self.kernel_size, self.padding),
self.deconv_length(h, self.strides, self.kernel_size, self.padding),
self.out_ch) )
strides = [1,self.strides,self.strides,1]
else:
c,h,w = shape[1], shape[2], shape[3]
output_shape = tf.stack ( (tf.shape(x)[0],
self.out_ch,
self.deconv_length(w, self.strides, self.kernel_size, self.padding),
self.deconv_length(h, self.strides, self.kernel_size, self.padding),
) )
strides = [1,1,self.strides,self.strides]
weight = self.weight
if self.use_wscale:
weight = weight * self.wscale
x = tf.nn.conv2d_transpose(x, weight, output_shape, [1,self.strides,self.strides,1], padding=self.padding)
x = tf.nn.conv2d_transpose(x, weight, output_shape, strides, padding=self.padding, data_format=nn.data_format)
if self.use_bias:
x = x + self.bias
if nn.data_format == "NHWC":
bias = tf.reshape (self.bias, (1,1,1,self.out_ch) )
else:
bias = tf.reshape (self.bias, (1,self.out_ch,1,1) )
x = tf.add(x, bias)
return x
def __str__(self):
@ -454,15 +497,18 @@ def initialize_layers(nn):
dim_size = dim_size * stride_size
return dim_size
nn.Conv2DTranspose = Conv2DTranspose
class BlurPool(LayerBase):
def __init__(self, filt_size=3, stride=2, **kwargs ):
self.strides = [1,stride,stride,1]
self.filt_size = filt_size
self.padding = [ [0,0],
[ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ],
[ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ],
[0,0] ]
pad = [ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ]
if nn.data_format == "NHWC":
self.padding = [ [0,0], pad, pad, [0,0] ]
else:
self.padding = [ [0,0], [0,0], pad, pad ]
if(self.filt_size==1):
a = np.array([1.,])
elif(self.filt_size==2):
@ -493,16 +539,16 @@ def initialize_layers(nn):
x = tf.nn.depthwise_conv2d(x, k, self.strides, 'VALID')
return x
nn.BlurPool = BlurPool
class Dense(LayerBase):
def __init__(self, in_ch, out_ch, use_bias=True, use_wscale=False, maxout_ch=0, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ):
"""
use_wscale enables weight scale (equalized learning rate)
kernel_initializer will be forced to random_normal
maxout_ch https://link.springer.com/article/10.1186/s40537-019-0233-0
typical 2-4 if you want to enable DenseMaxout behaviour
"""
typical 2-4 if you want to enable DenseMaxout behaviour
"""
self.in_ch = in_ch
self.out_ch = out_ch
self.use_bias = use_bias
@ -512,7 +558,8 @@ def initialize_layers(nn):
self.bias_initializer = bias_initializer
self.trainable = trainable
if dtype is None:
dtype = tf.float32
dtype = nn.tf_floatx
self.dtype = dtype
super().__init__(**kwargs)
@ -521,25 +568,26 @@ def initialize_layers(nn):
weight_shape = (self.in_ch,self.out_ch*self.maxout_ch)
else:
weight_shape = (self.in_ch,self.out_ch)
kernel_initializer = self.kernel_initializer
if self.use_wscale:
gain = 1.0
fan_in = np.prod( weight_shape[:-1] )
he_std = gain / np.sqrt(fan_in) # He init
self.wscale = tf.constant(he_std, dtype=self.dtype )
kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
if kernel_initializer is None:
if self.use_wscale:
gain = 1.0
fan_in = np.prod( weight_shape[:-1] )
he_std = gain / np.sqrt(fan_in) # He init
self.wscale = tf.constant(he_std, dtype=self.dtype )
kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype)
else:
kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype)
self.weight = tf.get_variable("weight", weight_shape, dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable )
if self.use_bias:
bias_initializer = self.bias_initializer
if bias_initializer is None:
bias_initializer = tf.initializers.zeros(dtype=self.dtype)
self.bias = tf.get_variable("bias", (1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
self.bias = tf.get_variable("bias", (self.out_ch,), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable )
def get_weights(self):
weights = [self.weight]
@ -553,46 +601,53 @@ def initialize_layers(nn):
weight = weight * self.wscale
x = tf.matmul(x, weight)
if self.maxout_ch > 1:
if self.maxout_ch > 1:
x = tf.reshape (x, (-1, self.out_ch, self.maxout_ch) )
x = tf.reduce_max(x, axis=-1)
if self.use_bias:
x = x + self.bias
x = tf.add(x, tf.reshape(self.bias, (1,self.out_ch) ) )
return x
nn.Dense = Dense
class BatchNorm2D(LayerBase):
"""
currently not for training
"""
def __init__(self, dim, eps=1e-05, momentum=0.1, dtype=None, **kwargs ):
def __init__(self, dim, eps=1e-05, momentum=0.1, dtype=None, **kwargs):
self.dim = dim
self.eps = eps
self.momentum = momentum
if dtype is None:
dtype = nn.tf_floatx
self.dtype = dtype
self.shape = (1,1,1,dim)
super().__init__(**kwargs)
def build_weights(self):
self.weight = tf.get_variable("weight", self.shape, dtype=self.dtype, initializer=tf.initializers.ones() )
self.bias = tf.get_variable("bias", self.shape, dtype=self.dtype, initializer=tf.initializers.zeros() )
self.running_mean = tf.get_variable("running_mean", self.shape, dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False )
self.running_var = tf.get_variable("running_var", self.shape, dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False )
self.weight = tf.get_variable("weight", (self.dim,), dtype=self.dtype, initializer=tf.initializers.ones() )
self.bias = tf.get_variable("bias", (self.dim,), dtype=self.dtype, initializer=tf.initializers.zeros() )
self.running_mean = tf.get_variable("running_mean", (self.dim,), dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False )
self.running_var = tf.get_variable("running_var", (self.dim,), dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False )
def get_weights(self):
return [self.weight, self.bias, self.running_mean, self.running_var]
def __call__(self, x):
x = (x - self.running_mean) / tf.sqrt( self.running_var + self.eps )
x *= self.weight
x += self.bias
if nn.data_format == "NHWC":
shape = (1,1,1,self.dim)
else:
shape = (1,self.dim,1,1)
weight = tf.reshape ( self.weight , shape )
bias = tf.reshape ( self.bias , shape )
running_mean = tf.reshape ( self.running_mean, shape )
running_var = tf.reshape ( self.running_var , shape )
x = (x - running_mean) / tf.sqrt( running_var + self.eps )
x *= weight
x += bias
return x
nn.BatchNorm2D = BatchNorm2D

View file

@ -1,51 +1,67 @@
"""
Leras.
Leras.
like lighter keras.
This is my lightweight neural network library written from scratch
based on pure tensorflow without keras.
Provides:
+ full freedom of tensorflow operations without keras model's restrictions
+ full freedom of tensorflow operations without keras model's restrictions
+ easy model operations like in PyTorch, but in graph mode (no eager execution)
+ convenient and understandable logic
Reasons why we cannot import tensorflow or any tensorflow.sub modules right here:
1) change env variables based on DeviceConfig before import tensorflow
2) multiprocesses will import tensorflow every spawn
NCHW speed up training for 10-20%.
"""
import os
import sys
from pathlib import Path
import numpy as np
from core.interact import interact as io
from .device import Devices
class nn():
current_DeviceConfig = None
tf = None
tf_sess = None
tf_sess_config = None
tf_default_device = None
data_format = None
conv2d_ch_axis = None
conv2d_spatial_axes = None
tf_floatx = None
np_floatx = None
# Tensor ops
tf_get_value = None
tf_batch_set_value = None
tf_gradients = None
tf_average_gv_list = None
tf_average_tensor_list = None
tf_dot = None
tf_concat = None
tf_gelu = None
tf_upsample2d = None
tf_upsample2d_bilinear = None
tf_flatten = None
tf_reshape_4D = None
tf_random_binomial = None
tf_gaussian_blur = None
tf_style_loss = None
tf_channel_histogram = None
tf_histogram = None
tf_dssim = None
tf_space_to_depth = None
tf_depth_to_space = None
# Layers
Saveable = None
LayerBase = None
@ -55,16 +71,17 @@ class nn():
BlurPool = None
Dense = None
BatchNorm2D = None
# Initializers
initializers = None
# Optimizers
TFBaseOptimizer = None
TFRMSpropOptimizer = None
@staticmethod
def initialize(device_config=None):
def initialize(device_config=None, floatx="float32", data_format="NHWC"):
if nn.tf is None:
if device_config is None:
device_config = nn.getCurrentDeviceConfig()
@ -74,11 +91,8 @@ class nn():
if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
os.environ.pop('CUDA_VISIBLE_DEVICES')
os.environ['CUDA_CACHE_MAXSIZE'] = '536870912' #512Mb (32mb default)
first_run = False
if not device_config.cpu_only:
if len(device_config.devices) != 0:
if sys.platform[0:3] == 'win':
if all( [ x.name == device_config.devices[0].name for x in device_config.devices ] ):
devices_str = "_" + device_config.devices[0].name.replace(' ','_')
@ -86,27 +100,33 @@ class nn():
devices_str = ""
for device in device_config.devices:
devices_str += "_" + device.name.replace(' ','_')
compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str)
if not compute_cache_path.exists():
first_run = True
os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
os.environ['CUDA_CACHE_MAXSIZE'] = '536870912' #512Mb (32mb default)
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # tf log errors only
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
if first_run:
io.log_info("Caching GPU kernels...")
import tensorflow as tf
import tensorflow as tf
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
nn.tf = tf
if device_config.cpu_only:
if len(device_config.devices) == 0:
nn.tf_default_device = "/CPU:0"
config = tf.ConfigProto(device_count={'GPU': 0})
else:
else:
nn.tf_default_device = "/GPU:0"
config = tf.ConfigProto()
config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
@ -114,26 +134,81 @@ class nn():
config.gpu_options.allow_growth = True
nn.tf_sess_config = config
nn.tf_floatx = nn.tf.float32 #nn.tf.float16 if device_config.use_fp16 else nn.tf.float32
nn.np_floatx = nn.tf_floatx.as_numpy_dtype
from .tensor_ops import initialize_tensor_ops
from .layers import initialize_layers
from .initializers import initialize_initializers
from .optimizers import initialize_optimizers
initialize_tensor_ops(nn)
initialize_layers(nn)
initialize_initializers(nn)
initialize_optimizers(nn)
if nn.tf_sess is None:
nn.tf_sess = tf.Session(config=nn.tf_sess_config)
if floatx == "float32":
floatx = nn.tf.float32
elif floatx == "float16":
floatx = nn.tf.float16
else:
raise ValueError(f"unsupported floatx {floatx}")
nn.set_floatx(floatx)
nn.set_data_format(data_format)
@staticmethod
def initialize_main_env():
Devices.initialize_main_env()
@staticmethod
def set_floatx(tf_dtype):
"""
set default float type for all layers when dtype is None for them
"""
nn.tf_floatx = tf_dtype
nn.np_floatx = tf_dtype.as_numpy_dtype
@staticmethod
def set_data_format(data_format):
if data_format != "NHWC" and data_format != "NCHW":
raise ValueError(f"unsupported data_format {data_format}")
nn.data_format = data_format
if data_format == "NHWC":
nn.conv2d_ch_axis = 3
nn.conv2d_spatial_axes = [1,2]
elif data_format == "NCHW":
nn.conv2d_ch_axis = 1
nn.conv2d_spatial_axes = [2,3]
@staticmethod
def get4Dshape ( w, h, c, data_format=None ):
"""
returns 4D shape based on current data_format
"""
if data_format is None:
data_format = nn.data_format
if data_format == "NHWC":
return (None,h,w,c)
else:
return (None,c,h,w)
@staticmethod
def to_data_format( x, to_data_format, from_data_format=None):
if from_data_format is None:
from_data_format = nn.data_format
if to_data_format == from_data_format:
return x
if to_data_format == "NHWC":
return np.transpose(x, (0,2,3,1) )
elif to_data_format == "NCHW":
return np.transpose(x, (0,3,1,2) )
else:
raise ValueError(f"unsupported to_data_format {to_data_format}")
@staticmethod
def getCurrentDeviceConfig():
if nn.current_DeviceConfig is None:
@ -151,27 +226,34 @@ class nn():
nn.tf.reset_default_graph()
nn.tf_sess.close()
nn.tf_sess = nn.tf.Session(config=nn.tf_sess_config)
@staticmethod
def tf_close_session():
def tf_close_session():
if nn.tf_sess is not None:
nn.tf.reset_default_graph()
nn.tf_sess.close()
nn.tf_sess = None
@staticmethod
def tf_get_current_device():
# Undocumented access to last tf.device(...)
objs = nn.tf.get_default_graph()._device_function_stack.peek_objs()
if len(objs) != 0:
return objs[0].display_name
return nn.tf_default_device
@staticmethod
def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False, return_device_config=False):
devices = Devices.getDevices()
if len(devices) == 0:
return []
all_devices_indexes = [device.index for device in devices]
if choose_only_one:
suggest_best_multi_gpu = False
suggest_all_gpu = False
if suggest_all_gpu:
best_device_indexes = all_devices_indexes
elif suggest_best_multi_gpu:
@ -179,84 +261,84 @@ class nn():
else:
best_device_indexes = [ devices.get_best_device().index ]
best_device_indexes = ",".join([str(x) for x in best_device_indexes])
io.log_info ("")
if choose_only_one:
io.log_info ("Choose one GPU idx.")
else:
io.log_info ("Choose one or several GPU idxs (separated by comma).")
io.log_info ("")
if allow_cpu:
io.log_info ("[CPU] : CPU")
for device in devices:
io.log_info (f" [{device.index}] : {device.name}")
io.log_info ("")
while True:
try:
if choose_only_one:
choosed_idxs = io.input_str("Which GPU index to choose?", best_device_indexes)
else:
choosed_idxs = io.input_str("Which GPU indexes to choose?", best_device_indexes)
if allow_cpu and choosed_idxs.lower() == "cpu":
choosed_idxs = []
break
choosed_idxs = [ int(x) for x in choosed_idxs.split(',') ]
if choose_only_one:
if len(choosed_idxs) == 1:
break
break
else:
if all( [idx in all_devices_indexes for idx in choosed_idxs] ):
break
except:
pass
io.log_info ("")
if return_device_config:
return nn.DeviceConfig.GPUIndexes(choosed_idxs)
else:
else:
return choosed_idxs
class DeviceConfig():
class DeviceConfig():
def __init__ (self, devices=None):
devices = devices or []
devices = devices or []
if not isinstance(devices, Devices):
devices = Devices(devices)
self.devices = devices
self.cpu_only = len(devices) == 0
self.devices = devices
self.cpu_only = len(devices) == 0
@staticmethod
def BestGPU():
def BestGPU():
devices = Devices.getDevices()
if len(devices) == 0:
return nn.DeviceConfig.CPU()
return nn.DeviceConfig([devices.get_best_device()])
@staticmethod
def WorstGPU():
def WorstGPU():
devices = Devices.getDevices()
if len(devices) == 0:
return nn.DeviceConfig.CPU()
return nn.DeviceConfig([devices.get_worst_device()])
@staticmethod
def GPUIndexes(indexes):
if len(indexes) != 0:
devices = Devices.getDevices().get_devices_from_index_list(indexes)
else:
devices = []
return nn.DeviceConfig(devices)
@staticmethod
def CPU():
def CPU():
return nn.DeviceConfig([])

View file

@ -73,7 +73,7 @@ def initialize_optimizers(nn):
e = tf.device('/CPU:0') if vars_on_cpu else None
if e: e.__enter__()
with tf.variable_scope(self.name):
accumulators = [ tf.get_variable ( f'acc_{i+self.accumulator_counter}', v.shape, initializer=tf.initializers.constant(0.0), trainable=False)
accumulators = [ tf.get_variable ( f'acc_{i+self.accumulator_counter}', v.shape, dtype=v.dtype, initializer=tf.initializers.constant(0.0), trainable=False)
for (i, v ) in enumerate(trainable_weights) ]
self.accumulators_dict.update ( { v.name : acc for v,acc in zip(trainable_weights,accumulators) } )
@ -81,13 +81,13 @@ def initialize_optimizers(nn):
self.accumulator_counter += len(trainable_weights)
if self.lr_dropout != 1.0:
lr_rnds = [ nn.tf_random_binomial( v.shape, p=self.lr_dropout) for v in trainable_weights ]
lr_rnds = [ nn.tf_random_binomial( v.shape, p=self.lr_dropout, dtype=v.dtype) for v in trainable_weights ]
self.lr_rnds_dict.update ( { v.name : rnd for v,rnd in zip(trainable_weights,lr_rnds) } )
if e: e.__exit__(None, None, None)
def get_update_op(self, grads_vars):
updates = []
lr = self.lr
if self.clipnorm > 0.0:
norm = tf.sqrt( sum([tf.reduce_sum(tf.square(g)) for g,v in grads_vars]))
updates += [ state_ops.assign_add( self.iterations, 1) ]
@ -96,8 +96,14 @@ def initialize_optimizers(nn):
g = self.tf_clip_norm(g, self.clipnorm, norm)
a = self.accumulators_dict[v.name]
new_a = self.rho * a + (1. - self.rho) * tf.square(g)
v_diff = - lr * g / (tf.sqrt(new_a) + self.epsilon)
rho = tf.cast(self.rho, a.dtype)
new_a = rho * a + (1. - rho) * tf.square(g)
lr = tf.cast(self.lr, a.dtype)
epsilon = tf.cast(self.epsilon, a.dtype)
v_diff = - lr * g / (tf.sqrt(new_a) + epsilon)
if self.lr_dropout != 1.0:
lr_rnd = self.lr_rnds_dict[v.name]
v_diff *= lr_rnd

View file

@ -2,14 +2,14 @@ import numpy as np
def initialize_tensor_ops(nn):
tf = nn.tf
from tensorflow.python.ops import array_ops, random_ops, math_ops, sparse_ops, gradients
from tensorflow.python.ops import array_ops, random_ops, math_ops, sparse_ops, gradients
from tensorflow.python.framework import sparse_tensor
def tf_get_value(tensor):
return nn.tf_sess.run (tensor)
nn.tf_get_value = tf_get_value
def tf_batch_set_value(tuples):
if len(tuples) != 0:
with nn.tf.device('/CPU:0'):
@ -28,8 +28,8 @@ def initialize_tensor_ops(nn):
nn.tf_sess.run(assign_ops, feed_dict=feed_dict)
nn.tf_batch_set_value = tf_batch_set_value
def tf_gradients ( loss, vars ):
grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True )
gv = [*zip(grads,vars)]
@ -38,8 +38,11 @@ def initialize_tensor_ops(nn):
raise Exception("No gradient for variable {v.name}")
return gv
nn.tf_gradients = tf_gradients
def tf_average_gv_list(grad_var_list, tf_device_string=None):
if len(grad_var_list) == 1:
return grad_var_list[0]
e = tf.device(tf_device_string) if tf_device_string is not None else None
if e is not None: e.__enter__()
result = []
@ -56,71 +59,65 @@ def initialize_tensor_ops(nn):
if e is not None: e.__exit__(None,None,None)
return result
nn.tf_average_gv_list = tf_average_gv_list
def tf_average_tensor_list(tensors_list, tf_device_string=None):
if len(tensors_list) == 1:
return tensors_list[0]
e = tf.device(tf_device_string) if tf_device_string is not None else None
if e is not None: e.__enter__()
result = tf.reduce_mean(tf.concat ([tf.expand_dims(t, 0) for t in tensors_list], 0), 0)
if e is not None: e.__exit__(None,None,None)
return result
nn.tf_average_tensor_list = tf_average_tensor_list
def tf_dot(x, y):
if x.shape.ndims > 2 or y.shape.ndims > 2:
x_shape = []
for i, s in zip( x.shape.as_list(), array_ops.unstack(array_ops.shape(x))):
if i is not None:
x_shape.append(i)
else:
x_shape.append(s)
x_shape = tuple(x_shape)
y_shape = []
for i, s in zip( y.shape.as_list(), array_ops.unstack(array_ops.shape(y))):
if i is not None:
y_shape.append(i)
else:
y_shape.append(s)
y_shape = tuple(y_shape)
y_permute_dim = list(range(y.shape.ndims))
y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim
xt = array_ops.reshape(x, [-1, x_shape[-1]])
yt = array_ops.reshape(array_ops.transpose(y, perm=y_permute_dim), [y_shape[-2], -1])
import code
code.interact(local=dict(globals(), **locals()))
return array_ops.reshape(math_ops.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:])
if isinstance(x, sparse_tensor.SparseTensor):
out = sparse_ops.sparse_tensor_dense_matmul(x, y)
else:
out = math_ops.matmul(x, y)
return out
nn.tf_dot = tf_dot
def tf_concat (tensors_list, axis):
"""
Better version.
"""
if len(tensors_list) == 1:
return tensors_list[0]
return tf.concat(tensors_list, axis)
nn.tf_concat = tf_concat
def tf_gelu(x):
cdf = 0.5 * (1.0 + tf.nn.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3)))))
return x * cdf
nn.tf_gelu = tf_gelu
def tf_upsample2d(x, size=2):
return tf.image.resize_nearest_neighbor(x, (x.shape[1]*size, x.shape[2]*size) )
if nn.data_format == "NCHW":
b,c,h,w = x.shape.as_list()
x = tf.reshape (x, (-1,c,h,1,w,1) )
x = tf.tile(x, (1,1,1,size,1,size) )
x = tf.reshape (x, (-1,c,h*size,w*size) )
return x
else:
return tf.image.resize_nearest_neighbor(x, (x.shape[1]*size, x.shape[2]*size) )
nn.tf_upsample2d = tf_upsample2d
def tf_upsample2d_bilinear(x, size=2):
return tf.image.resize_images(x, (x.shape[1]*size, x.shape[2]*size) )
nn.tf_upsample2d_bilinear = tf_upsample2d_bilinear
def tf_flatten(x, dynamic_dims=False):
"""
dynamic_dims allows to flatten without knowing size on input dims
"""
if dynamic_dims:
sh = tf.shape(x)
return tf.reshape (x, (sh[0], tf.reduce_prod(sh[1:]) ) )
else:
return tf.reshape (x, (-1, np.prod(x.shape[1:])) )
def tf_flatten(x):
if nn.data_format == "NHWC":
# match NCHW version in order to switch data_format without problems
x = tf.transpose(x, (0,3,1,2) )
return tf.reshape (x, (-1, np.prod(x.shape[1:])) )
nn.tf_flatten = tf_flatten
def tf_reshape_4D(x, w,h,c):
if nn.data_format == "NHWC":
# match NCHW version in order to switch data_format without problems
x = tf.reshape (x, (-1,c,h,w))
x = tf.transpose(x, (0,2,3,1) )
return x
else:
return tf.reshape (x, (-1,c,h,w))
nn.tf_reshape_4D = tf_reshape_4D
def tf_random_binomial(shape, p=0.0, dtype=None, seed=None):
if dtype is None:
dtype=tf.float32
@ -131,7 +128,7 @@ def initialize_tensor_ops(nn):
random_ops.random_uniform(shape, dtype=tf.float16, seed=seed) < p,
array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
nn.tf_random_binomial = tf_random_binomial
def tf_gaussian_blur(input, radius=2.0):
def gaussian(x, mu, sigma):
return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2))
@ -142,41 +139,42 @@ def initialize_tensor_ops(nn):
kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)])
np_kernel = np.outer(kernel_1d, kernel_1d).astype(np.float32)
kernel = np_kernel / np.sum(np_kernel)
return kernel
return kernel, kernel_size
gauss_kernel = make_kernel(radius)
gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis]
kernel_size = gauss_kernel.shape[0]
inputs = [ input[:,:,:,i:i+1] for i in range( input.shape[-1] ) ]
gauss_kernel, kernel_size = make_kernel(radius)
padding = kernel_size//2
if padding != 0:
if nn.data_format == "NHWC":
padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ]
else:
padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ]
else:
padding = None
gauss_kernel = gauss_kernel[:,:,None,None]
outputs = []
for i in range(len(inputs)):
x = inputs[i]
if kernel_size != 0:
padding = kernel_size//2
x = tf.pad (x, [ [0,0], [padding,padding], [padding,padding], [0,0] ] )
for i in range(input.shape[nn.conv2d_ch_axis]):
x = input[:,:,:,i:i+1] if nn.data_format == "NHWC" \
else input[:,i:i+1,:,:]
outputs += [ tf.nn.conv2d(x, tf.constant(gauss_kernel, dtype=nn.tf_floatx ) , strides=[1,1,1,1], padding="VALID") ]
if padding is not None:
x = tf.pad (x, padding)
outputs += [ tf.nn.conv2d(x, tf.constant(gauss_kernel, dtype=input.dtype ), strides=[1,1,1,1], padding="VALID", data_format=nn.data_format) ]
return tf.concat (outputs, axis=-1)
return tf.concat (outputs, axis=nn.conv2d_ch_axis)
nn.tf_gaussian_blur = tf_gaussian_blur
def tf_style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1):
def sd(content, style, loss_weight):
content_nc = content.shape[-1]
style_nc = style.shape[-1]
content_nc = content.shape[ nn.conv2d_ch_axis ]
style_nc = style.shape[nn.conv2d_ch_axis]
if content_nc != style_nc:
raise Exception("style_loss() content_nc != style_nc")
axes = [1,2]
c_mean, c_var = tf.nn.moments(content, axes=axes, keep_dims=True)
s_mean, s_var = tf.nn.moments(style, axes=axes, keep_dims=True)
c_mean, c_var = tf.nn.moments(content, axes=nn.conv2d_spatial_axes, keep_dims=True)
s_mean, s_var = tf.nn.moments(style, axes=nn.conv2d_spatial_axes, keep_dims=True)
c_std, s_std = tf.sqrt(c_var + 1e-5), tf.sqrt(s_var + 1e-5)
mean_loss = tf.reduce_sum(tf.square(c_mean-s_mean), axis=[1,2,3])
std_loss = tf.reduce_sum(tf.square(c_std-s_std), axis=[1,2,3])
return (mean_loss + std_loss) * ( loss_weight / content_nc.value )
if gaussian_blur_radius > 0.0:
@ -186,47 +184,30 @@ def initialize_tensor_ops(nn):
return sd( target, style, loss_weight=loss_weight )
nn.tf_style_loss = tf_style_loss
def tf_channel_histogram (input, bins, data_range):
range_min, range_max = data_range
bin_range = (range_max-range_min) / (bins-1)
reduce_axes = [*range(input.shape.ndims)][1:]
x = input
x += bin_range/2
output = []
for i in range(bins-1, -1, -1):
y = x - (i*bin_range)
ones_mask = tf.sign( tf.nn.relu(y) )
x = x * (1.0 - ones_mask)
output.append ( tf.expand_dims(tf.reduce_sum (ones_mask, axis=reduce_axes ), -1) )
return tf.concat(output[::-1],-1)
nn.tf_channel_histogram = tf_channel_histogram
def tf_histogram(input, bins=256, data_range=(0,1.0)):
return tf.concat ( [tf.expand_dims( tf_channel_histogram( input[...,i], bins=bins, data_range=data_range ), -1 ) for i in range(input.shape[-1])], -1 )
nn.tf_histogram = tf_histogram
def tf_dssim(img1,img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
ch = img2.shape[-1]
if img1.dtype != img2.dtype:
raise ValueError("img1.dtype != img2.dtype")
def _fspecial_gauss(size, sigma):
#Function to mimic the 'fspecial' gaussian MATLAB function.
coords = np.arange(0, size, dtype=nn.np_floatx)
coords -= (size - 1 ) / 2.0
g = coords**2
g *= ( -0.5 / (sigma**2) )
g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) )
g = tf.constant ( np.reshape (g, (1,-1)), dtype=nn.tf_floatx )
g = tf.nn.softmax(g)
g = tf.reshape (g, (size, size, 1, 1))
g = tf.tile (g, (1,1,ch,1))
return g
not_float32 = img1.dtype != tf.float32
kernel = _fspecial_gauss(filter_size,filter_sigma)
if not_float32:
img_dtype = img1.dtype
img1 = tf.cast(img1, tf.float32)
img2 = tf.cast(img2, tf.float32)
kernel = np.arange(0, filter_size, dtype=np.float32)
kernel -= (filter_size - 1 ) / 2.0
kernel = kernel**2
kernel *= ( -0.5 / (filter_sigma**2) )
kernel = np.reshape (kernel, (1,-1)) + np.reshape(kernel, (-1,1) )
kernel = tf.constant ( np.reshape (kernel, (1,-1)), dtype=tf.float32 )
kernel = tf.nn.softmax(kernel)
kernel = tf.reshape (kernel, (filter_size, filter_size, 1, 1))
kernel = tf.tile (kernel, (1,1, img1.shape[ nn.conv2d_ch_axis ] ,1))
def reducer(x):
return tf.nn.depthwise_conv2d(x, kernel, strides=[1,1,1,1], padding='VALID')
return tf.nn.depthwise_conv2d(x, kernel, strides=[1,1,1,1], padding='VALID', data_format=nn.data_format)
c1 = (k1 * max_val) ** 2
c2 = (k2 * max_val) ** 2
@ -242,10 +223,44 @@ def initialize_tensor_ops(nn):
c2 *= 1.0 #compensation factor
cs = (num1 - num0 + c2) / (den1 - den0 + c2)
ssim_val = tf.reduce_mean(luminance * cs, axis=(-3, -2) )
return(1.0 - ssim_val ) / 2.0
ssim_val = tf.reduce_mean(luminance * cs, axis=nn.conv2d_spatial_axes )
dssim = (1.0 - ssim_val ) / 2.0
if not_float32:
dssim = tf.cast(dssim, img_dtype)
return dssim
nn.tf_dssim = tf_dssim
def tf_space_to_depth(x, size):
if nn.data_format == "NHWC":
# match NCHW version in order to switch data_format without problems
b,h,w,c = x.shape.as_list()
oh, ow = h // size, w // size
x = tf.reshape(x, (-1, size, oh, size, ow, c))
x = tf.transpose(x, (0, 2, 4, 1, 3, 5))
x = tf.reshape(x, (-1, oh, ow, size* size* c ))
return x
else:
return tf.space_to_depth(x, size, data_format=nn.data_format)
nn.tf_space_to_depth = tf_space_to_depth
def tf_depth_to_space(x, size):
if nn.data_format == "NHWC":
# match NCHW version in order to switch data_format without problems
b,h,w,c = x.shape.as_list()
oh, ow = h * size, w * size
oc = c // (size * size)
x = tf.reshape(x, (-1, h, w, size, size, oc, ) )
x = tf.transpose(x, (0, 1, 3, 2, 4, 5))
x = tf.reshape(x, (-1, oh, ow, oc, ))
return x
else:
return tf.depth_to_space(x, size, data_format=nn.data_format)
nn.tf_depth_to_space = tf_depth_to_space
def tf_rgb_to_lab(srgb):
srgb_pixels = tf.reshape(srgb, [-1, 3])
linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
@ -275,14 +290,14 @@ def initialize_tensor_ops(nn):
lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
return tf.reshape(lab_pixels, tf.shape(srgb))
nn.tf_rgb_to_lab = tf_rgb_to_lab
def tf_suppress_lower_mean(t, eps=0.00001):
def tf_suppress_lower_mean(t, eps=0.00001):
if t.shape.ndims != 1:
raise ValueError("tf_suppress_lower_mean: t rank must be 1")
t_mean_eps = tf.reduce_mean(t) - eps
q = tf.clip_by_value(t, t_mean_eps, tf.reduce_max(t) )
raise ValueError("tf_suppress_lower_mean: t rank must be 1")
t_mean_eps = tf.reduce_mean(t) - eps
q = tf.clip_by_value(t, t_mean_eps, tf.reduce_max(t) )
q = tf.clip_by_value(q-t_mean_eps, 0, eps)
q = q * (t/eps)
q = q * (t/eps)
return q
"""
class GeLU(KL.Layer):