### Collect facesets
diff --git a/XSegEditor/QIconDB.py b/XSegEditor/QIconDB.py
index a2427c3..1fd9e3e 100644
--- a/XSegEditor/QIconDB.py
+++ b/XSegEditor/QIconDB.py
@@ -17,6 +17,7 @@ class QIconDB():
QIconDB.poly_type_exclude = QIcon ( str(icon_path / 'poly_type_exclude.png') )
QIconDB.left = QIcon ( str(icon_path / 'left.png') )
QIconDB.right = QIcon ( str(icon_path / 'right.png') )
+ QIconDB.trashcan = QIcon ( str(icon_path / 'trashcan.png') )
QIconDB.pt_edit_mode = QIcon ( str(icon_path / 'pt_edit_mode.png') )
QIconDB.view_lock_center = QIcon ( str(icon_path / 'view_lock_center.png') )
QIconDB.view_baked = QIcon ( str(icon_path / 'view_baked.png') )
diff --git a/XSegEditor/QStringDB.py b/XSegEditor/QStringDB.py
index 632419e..b9100d2 100644
--- a/XSegEditor/QStringDB.py
+++ b/XSegEditor/QStringDB.py
@@ -85,6 +85,11 @@ class QStringDB():
'zh' : '保存并转到下一张图片\n按住SHIFT : 加快\n按住CTRL : 跳过未标记的\n',
}[lang]
+ QStringDB.btn_delete_image_tip = { 'en' : 'Move to _trash and Next image\n',
+ 'ru' : 'Переместить в _trash и следующее изображение\n',
+ 'zh' : '移至_trash,转到下一张图片 ',
+ }[lang]
+
QStringDB.loading_tip = {'en' : 'Loading',
'ru' : 'Загрузка',
'zh' : '正在载入',
diff --git a/XSegEditor/XSegEditor.py b/XSegEditor/XSegEditor.py
index c9cb6aa..affc9f6 100644
--- a/XSegEditor/XSegEditor.py
+++ b/XSegEditor/XSegEditor.py
@@ -1164,6 +1164,7 @@ class MainWindow(QXMainWindow):
super().__init__()
self.input_dirpath = input_dirpath
+ self.trash_dirpath = input_dirpath.parent / (input_dirpath.name + '_trash')
self.cfg_root_path = cfg_root_path
self.cfg_path = cfg_root_path / 'MainWindow_cfg.dat'
@@ -1341,7 +1342,18 @@ class MainWindow(QXMainWindow):
self.update_cached_images()
self.update_preview_bar()
-
+
+ def trash_current_image(self):
+ self.process_next_image()
+
+ img_path = self.image_paths_done.pop(-1)
+ img_path = Path(img_path)
+ self.trash_dirpath.mkdir(parents=True, exist_ok=True)
+ img_path.rename( self.trash_dirpath / img_path.name )
+
+ self.update_cached_images()
+ self.update_preview_bar()
+
def initialize_ui(self):
self.canvas = QCanvas()
@@ -1356,20 +1368,36 @@ class MainWindow(QXMainWindow):
btn_next_image = QXIconButton(QIconDB.right, QStringDB.btn_next_image_tip, shortcut='D', click_func=self.process_next_image)
btn_next_image.setIconSize(QUIConfig.preview_bar_icon_q_size)
-
+ btn_delete_image = QXIconButton(QIconDB.trashcan, QStringDB.btn_delete_image_tip, shortcut='X', click_func=self.trash_current_image)
+ btn_delete_image.setIconSize(QUIConfig.preview_bar_icon_q_size)
+
+ pad_image = QWidget()
+ pad_image.setFixedSize(QUIConfig.preview_bar_icon_q_size)
+
preview_image_bar_frame_l = QHBoxLayout()
preview_image_bar_frame_l.setContentsMargins(0,0,0,0)
+ preview_image_bar_frame_l.addWidget ( pad_image, alignment=Qt.AlignCenter)
preview_image_bar_frame_l.addWidget ( btn_prev_image, alignment=Qt.AlignCenter)
preview_image_bar_frame_l.addWidget ( image_bar)
preview_image_bar_frame_l.addWidget ( btn_next_image, alignment=Qt.AlignCenter)
+ #preview_image_bar_frame_l.addWidget ( btn_delete_image, alignment=Qt.AlignCenter)
preview_image_bar_frame = QFrame()
preview_image_bar_frame.setSizePolicy ( QSizePolicy.Fixed, QSizePolicy.Fixed )
preview_image_bar_frame.setLayout(preview_image_bar_frame_l)
- preview_image_bar_l = QHBoxLayout()
- preview_image_bar_l.addWidget (preview_image_bar_frame)
+ preview_image_bar_frame2_l = QHBoxLayout()
+ preview_image_bar_frame2_l.setContentsMargins(0,0,0,0)
+ preview_image_bar_frame2_l.addWidget ( btn_delete_image, alignment=Qt.AlignCenter)
+ preview_image_bar_frame2 = QFrame()
+ preview_image_bar_frame2.setSizePolicy ( QSizePolicy.Fixed, QSizePolicy.Fixed )
+ preview_image_bar_frame2.setLayout(preview_image_bar_frame2_l)
+
+ preview_image_bar_l = QHBoxLayout()
+ preview_image_bar_l.addWidget (preview_image_bar_frame, alignment=Qt.AlignCenter)
+ preview_image_bar_l.addWidget (preview_image_bar_frame2)
+
preview_image_bar = QFrame()
preview_image_bar.setFrameShape(QFrame.StyledPanel)
preview_image_bar.setSizePolicy ( QSizePolicy.Expanding, QSizePolicy.Fixed )
diff --git a/XSegEditor/gfx/icons/trashcan.png b/XSegEditor/gfx/icons/trashcan.png
new file mode 100644
index 0000000..a31285b
Binary files /dev/null and b/XSegEditor/gfx/icons/trashcan.png differ
diff --git a/core/imagelib/SegIEPolys.py b/core/imagelib/SegIEPolys.py
index e658711..1a4c3d2 100644
--- a/core/imagelib/SegIEPolys.py
+++ b/core/imagelib/SegIEPolys.py
@@ -77,6 +77,8 @@ class SegIEPoly():
self.pts = np.array(pts)
self.n_max = self.n = len(pts)
+ def mult_points(self, val):
+ self.pts *= val
@@ -136,7 +138,11 @@ class SegIEPolys():
def dump(self):
return {'polys' : [ poly.dump() for poly in self.polys ] }
-
+
+ def mult_points(self, val):
+ for poly in self.polys:
+ poly.mult_points(val)
+
@staticmethod
def load(data=None):
ie_polys = SegIEPolys()
diff --git a/core/imagelib/__init__.py b/core/imagelib/__init__.py
index e74d427..1ed95dc 100644
--- a/core/imagelib/__init__.py
+++ b/core/imagelib/__init__.py
@@ -14,14 +14,19 @@ from .reduce_colors import reduce_colors
from .color_transfer import color_transfer, color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, color_augmentation
-from .common import normalize_channels, cut_odd_image, overlay_alpha_image
+from .common import random_crop, normalize_channels, cut_odd_image, overlay_alpha_image
from .SegIEPolys import *
from .blursharpen import LinearMotionBlur, blursharpen
from .filters import apply_random_rgb_levels, \
+ apply_random_overlay_triangle, \
apply_random_hsv_shift, \
+ apply_random_sharpen, \
apply_random_motion_blur, \
apply_random_gaussian_blur, \
- apply_random_bilinear_resize
+ apply_random_nearest_resize, \
+ apply_random_bilinear_resize, \
+ apply_random_jpeg_compress, \
+ apply_random_relight
diff --git a/core/imagelib/color_transfer.py b/core/imagelib/color_transfer.py
index aaadd88..c9cafc5 100644
--- a/core/imagelib/color_transfer.py
+++ b/core/imagelib/color_transfer.py
@@ -373,6 +373,7 @@ def color_transfer(ct_mode, img_src, img_trg):
# imported from faceswap
def color_augmentation(img, seed=None):
""" Color adjust RGB image """
+ img = img.astype(np.float32)
face = img
face = np.clip(face*255.0, 0, 255).astype(np.uint8)
face = random_clahe(face, seed)
@@ -381,6 +382,25 @@ def color_augmentation(img, seed=None):
return (face / 255.0).astype(np.float32)
+def random_lab_rotation(image, seed=None):
+ """
+ Randomly rotates image color around the L axis in LAB colorspace,
+ keeping perceptual lightness constant.
+ """
+ image = cv2.cvtColor(image.astype(np.float32), cv2.COLOR_BGR2LAB)
+ M = np.eye(3)
+ M[1:, 1:] = special_ortho_group.rvs(2, 1, seed)
+ image = image.dot(M)
+ l, a, b = cv2.split(image)
+ l = np.clip(l, 0, 100)
+ a = np.clip(a, -127, 127)
+ b = np.clip(b, -127, 127)
+ image = cv2.merge([l, a, b])
+ image = cv2.cvtColor(image.astype(np.float32), cv2.COLOR_LAB2BGR)
+ np.clip(image, 0, 1, out=image)
+ return image
+
+
def random_lab(image, seed=None):
""" Perform random color/lightness adjustment in L*a*b* colorspace """
random.seed(seed)
diff --git a/core/imagelib/common.py b/core/imagelib/common.py
index 6566819..4219d7d 100644
--- a/core/imagelib/common.py
+++ b/core/imagelib/common.py
@@ -1,5 +1,16 @@
import numpy as np
+def random_crop(img, w, h):
+ height, width = img.shape[:2]
+
+ h_rnd = height - h
+ w_rnd = width - w
+
+ y = np.random.randint(0, h_rnd) if h_rnd > 0 else 0
+ x = np.random.randint(0, w_rnd) if w_rnd > 0 else 0
+
+ return img[y:y+height, x:x+width]
+
def normalize_channels(img, target_channels):
img_shape_len = len(img.shape)
if img_shape_len == 2:
diff --git a/core/imagelib/filters.py b/core/imagelib/filters.py
index ba51e07..6b69576 100644
--- a/core/imagelib/filters.py
+++ b/core/imagelib/filters.py
@@ -1,47 +1,65 @@
import numpy as np
-from .blursharpen import LinearMotionBlur
+from .blursharpen import LinearMotionBlur, blursharpen
import cv2
def apply_random_rgb_levels(img, mask=None, rnd_state=None):
if rnd_state is None:
rnd_state = np.random
np_rnd = rnd_state.rand
-
+
inBlack = np.array([np_rnd()*0.25 , np_rnd()*0.25 , np_rnd()*0.25], dtype=np.float32)
inWhite = np.array([1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25], dtype=np.float32)
inGamma = np.array([0.5+np_rnd(), 0.5+np_rnd(), 0.5+np_rnd()], dtype=np.float32)
-
+
outBlack = np.array([np_rnd()*0.25 , np_rnd()*0.25 , np_rnd()*0.25], dtype=np.float32)
outWhite = np.array([1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25], dtype=np.float32)
result = np.clip( (img - inBlack) / (inWhite - inBlack), 0, 1 )
result = ( result ** (1/inGamma) ) * (outWhite - outBlack) + outBlack
result = np.clip(result, 0, 1)
-
+
if mask is not None:
result = img*(1-mask) + result*mask
-
+
return result
-
+
def apply_random_hsv_shift(img, mask=None, rnd_state=None):
if rnd_state is None:
rnd_state = np.random
-
+
h, s, v = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
h = ( h + rnd_state.randint(360) ) % 360
s = np.clip ( s + rnd_state.random()-0.5, 0, 1 )
- v = np.clip ( v + rnd_state.random()-0.5, 0, 1 )
-
+ v = np.clip ( v + rnd_state.random()-0.5, 0, 1 )
+
result = np.clip( cv2.cvtColor(cv2.merge([h, s, v]), cv2.COLOR_HSV2BGR) , 0, 1 )
if mask is not None:
result = img*(1-mask) + result*mask
-
+
return result
-
+
+def apply_random_sharpen( img, chance, kernel_max_size, mask=None, rnd_state=None ):
+ if rnd_state is None:
+ rnd_state = np.random
+
+ sharp_rnd_kernel = rnd_state.randint(kernel_max_size)+1
+
+ result = img
+ if rnd_state.randint(100) < np.clip(chance, 0, 100):
+ if rnd_state.randint(2) == 0:
+ result = blursharpen(result, 1, sharp_rnd_kernel, rnd_state.randint(10) )
+ else:
+ result = blursharpen(result, 2, sharp_rnd_kernel, rnd_state.randint(50) )
+
+ if mask is not None:
+ result = img*(1-mask) + result*mask
+
+ return result
+
def apply_random_motion_blur( img, chance, mb_max_size, mask=None, rnd_state=None ):
if rnd_state is None:
rnd_state = np.random
-
+
mblur_rnd_kernel = rnd_state.randint(mb_max_size)+1
mblur_rnd_deg = rnd_state.randint(360)
@@ -50,38 +68,178 @@ def apply_random_motion_blur( img, chance, mb_max_size, mask=None, rnd_state=Non
result = LinearMotionBlur (result, mblur_rnd_kernel, mblur_rnd_deg )
if mask is not None:
result = img*(1-mask) + result*mask
-
+
return result
-
+
def apply_random_gaussian_blur( img, chance, kernel_max_size, mask=None, rnd_state=None ):
if rnd_state is None:
rnd_state = np.random
-
+
result = img
if rnd_state.randint(100) < np.clip(chance, 0, 100):
gblur_rnd_kernel = rnd_state.randint(kernel_max_size)*2+1
result = cv2.GaussianBlur(result, (gblur_rnd_kernel,)*2 , 0)
if mask is not None:
result = img*(1-mask) + result*mask
-
+
return result
-
-
-def apply_random_bilinear_resize( img, chance, max_size_per, mask=None, rnd_state=None ):
+
+def apply_random_resize( img, chance, max_size_per, interpolation=cv2.INTER_LINEAR, mask=None, rnd_state=None ):
if rnd_state is None:
rnd_state = np.random
result = img
if rnd_state.randint(100) < np.clip(chance, 0, 100):
h,w,c = result.shape
-
+
trg = rnd_state.rand()
- rw = w - int( trg * int(w*(max_size_per/100.0)) )
- rh = h - int( trg * int(h*(max_size_per/100.0)) )
-
- result = cv2.resize (result, (rw,rh), interpolation=cv2.INTER_LINEAR )
- result = cv2.resize (result, (w,h), interpolation=cv2.INTER_LINEAR )
+ rw = w - int( trg * int(w*(max_size_per/100.0)) )
+ rh = h - int( trg * int(h*(max_size_per/100.0)) )
+
+ result = cv2.resize (result, (rw,rh), interpolation=interpolation )
+ result = cv2.resize (result, (w,h), interpolation=interpolation )
if mask is not None:
result = img*(1-mask) + result*mask
-
+
+ return result
+
+def apply_random_nearest_resize( img, chance, max_size_per, mask=None, rnd_state=None ):
+ return apply_random_resize( img, chance, max_size_per, interpolation=cv2.INTER_NEAREST, mask=mask, rnd_state=rnd_state )
+
+def apply_random_bilinear_resize( img, chance, max_size_per, mask=None, rnd_state=None ):
+ return apply_random_resize( img, chance, max_size_per, interpolation=cv2.INTER_LINEAR, mask=mask, rnd_state=rnd_state )
+
+def apply_random_jpeg_compress( img, chance, mask=None, rnd_state=None ):
+ if rnd_state is None:
+ rnd_state = np.random
+
+ result = img
+ if rnd_state.randint(100) < np.clip(chance, 0, 100):
+ h,w,c = result.shape
+
+ quality = rnd_state.randint(10,101)
+
+ ret, result = cv2.imencode('.jpg', np.clip(img*255, 0,255).astype(np.uint8), [int(cv2.IMWRITE_JPEG_QUALITY), quality] )
+ if ret == True:
+ result = cv2.imdecode(result, flags=cv2.IMREAD_UNCHANGED)
+ result = result.astype(np.float32) / 255.0
+ if mask is not None:
+ result = img*(1-mask) + result*mask
+
+ return result
+
+def apply_random_overlay_triangle( img, max_alpha, mask=None, rnd_state=None ):
+ if rnd_state is None:
+ rnd_state = np.random
+
+ h,w,c = img.shape
+ pt1 = [rnd_state.randint(w), rnd_state.randint(h) ]
+ pt2 = [rnd_state.randint(w), rnd_state.randint(h) ]
+ pt3 = [rnd_state.randint(w), rnd_state.randint(h) ]
+
+ alpha = rnd_state.uniform()*max_alpha
+
+ tri_mask = cv2.fillPoly( np.zeros_like(img), [ np.array([pt1,pt2,pt3], np.int32) ], (alpha,)*c )
+
+ if rnd_state.randint(2) == 0:
+ result = np.clip(img+tri_mask, 0, 1)
+ else:
+ result = np.clip(img-tri_mask, 0, 1)
+
+ if mask is not None:
+ result = img*(1-mask) + result*mask
+
+ return result
+
+def _min_resize(x, m):
+ if x.shape[0] < x.shape[1]:
+ s0 = m
+ s1 = int(float(m) / float(x.shape[0]) * float(x.shape[1]))
+ else:
+ s0 = int(float(m) / float(x.shape[1]) * float(x.shape[0]))
+ s1 = m
+ new_max = min(s1, s0)
+ raw_max = min(x.shape[0], x.shape[1])
+ return cv2.resize(x, (s1, s0), interpolation=cv2.INTER_LANCZOS4)
+
+def _d_resize(x, d, fac=1.0):
+ new_min = min(int(d[1] * fac), int(d[0] * fac))
+ raw_min = min(x.shape[0], x.shape[1])
+ if new_min < raw_min:
+ interpolation = cv2.INTER_AREA
+ else:
+ interpolation = cv2.INTER_LANCZOS4
+ y = cv2.resize(x, (int(d[1] * fac), int(d[0] * fac)), interpolation=interpolation)
+ return y
+
+def _get_image_gradient(dist):
+ cols = cv2.filter2D(dist, cv2.CV_32F, np.array([[-1, 0, +1], [-2, 0, +2], [-1, 0, +1]]))
+ rows = cv2.filter2D(dist, cv2.CV_32F, np.array([[-1, -2, -1], [0, 0, 0], [+1, +2, +1]]))
+ return cols, rows
+
+def _generate_lighting_effects(content):
+ h512 = content
+ h256 = cv2.pyrDown(h512)
+ h128 = cv2.pyrDown(h256)
+ h64 = cv2.pyrDown(h128)
+ h32 = cv2.pyrDown(h64)
+ h16 = cv2.pyrDown(h32)
+ c512, r512 = _get_image_gradient(h512)
+ c256, r256 = _get_image_gradient(h256)
+ c128, r128 = _get_image_gradient(h128)
+ c64, r64 = _get_image_gradient(h64)
+ c32, r32 = _get_image_gradient(h32)
+ c16, r16 = _get_image_gradient(h16)
+ c = c16
+ c = _d_resize(cv2.pyrUp(c), c32.shape) * 4.0 + c32
+ c = _d_resize(cv2.pyrUp(c), c64.shape) * 4.0 + c64
+ c = _d_resize(cv2.pyrUp(c), c128.shape) * 4.0 + c128
+ c = _d_resize(cv2.pyrUp(c), c256.shape) * 4.0 + c256
+ c = _d_resize(cv2.pyrUp(c), c512.shape) * 4.0 + c512
+ r = r16
+ r = _d_resize(cv2.pyrUp(r), r32.shape) * 4.0 + r32
+ r = _d_resize(cv2.pyrUp(r), r64.shape) * 4.0 + r64
+ r = _d_resize(cv2.pyrUp(r), r128.shape) * 4.0 + r128
+ r = _d_resize(cv2.pyrUp(r), r256.shape) * 4.0 + r256
+ r = _d_resize(cv2.pyrUp(r), r512.shape) * 4.0 + r512
+ coarse_effect_cols = c
+ coarse_effect_rows = r
+ EPS = 1e-10
+
+ max_effect = np.max((coarse_effect_cols**2 + coarse_effect_rows**2)**0.5, axis=0, keepdims=True, ).max(1, keepdims=True)
+ coarse_effect_cols = (coarse_effect_cols + EPS) / (max_effect + EPS)
+ coarse_effect_rows = (coarse_effect_rows + EPS) / (max_effect + EPS)
+
+ return np.stack([ np.zeros_like(coarse_effect_rows), coarse_effect_rows, coarse_effect_cols], axis=-1)
+
+def apply_random_relight(img, mask=None, rnd_state=None):
+ if rnd_state is None:
+ rnd_state = np.random
+
+ def_img = img
+
+ if rnd_state.randint(2) == 0:
+ light_pos_y = 1.0 if rnd_state.randint(2) == 0 else -1.0
+ light_pos_x = rnd_state.uniform()*2-1.0
+ else:
+ light_pos_y = rnd_state.uniform()*2-1.0
+ light_pos_x = 1.0 if rnd_state.randint(2) == 0 else -1.0
+
+ light_source_height = 0.3*rnd_state.uniform()*0.7
+ light_intensity = 1.0+rnd_state.uniform()
+ ambient_intensity = 0.5
+
+ light_source_location = np.array([[[light_source_height, light_pos_y, light_pos_x ]]], dtype=np.float32)
+ light_source_direction = light_source_location / np.sqrt(np.sum(np.square(light_source_location)))
+
+ lighting_effect = _generate_lighting_effects(img)
+ lighting_effect = np.sum(lighting_effect * light_source_direction, axis=-1).clip(0, 1)
+ lighting_effect = np.mean(lighting_effect, axis=-1, keepdims=True)
+
+ result = def_img * (ambient_intensity + lighting_effect * light_intensity) #light_source_color
+ result = np.clip(result, 0, 1)
+
+ if mask is not None:
+ result = def_img*(1-mask) + result*mask
+
return result
\ No newline at end of file
diff --git a/core/imagelib/sd/__init__.py b/core/imagelib/sd/__init__.py
index 2eafd4c..1cddc19 100644
--- a/core/imagelib/sd/__init__.py
+++ b/core/imagelib/sd/__init__.py
@@ -1,2 +1,2 @@
-from .draw import *
+from .draw import circle_faded, random_circle_faded, bezier, random_bezier_split_faded, random_faded
from .calc import *
\ No newline at end of file
diff --git a/core/imagelib/sd/draw.py b/core/imagelib/sd/draw.py
index 77e9a46..711ad33 100644
--- a/core/imagelib/sd/draw.py
+++ b/core/imagelib/sd/draw.py
@@ -1,23 +1,36 @@
"""
Signed distance drawing functions using numpy.
"""
+import math
import numpy as np
from numpy import linalg as npla
-def circle_faded( hw, center, fade_dists ):
+
+def vector2_dot(a,b):
+ return a[...,0]*b[...,0]+a[...,1]*b[...,1]
+
+def vector2_dot2(a):
+ return a[...,0]*a[...,0]+a[...,1]*a[...,1]
+
+def vector2_cross(a,b):
+ return a[...,0]*b[...,1]-a[...,1]*b[...,0]
+
+
+def circle_faded( wh, center, fade_dists ):
"""
returns drawn circle in [h,w,1] output range [0..1.0] float32
- hw = [h,w] resolution
- center = [y,x] center of circle
+ wh = [w,h] resolution
+ center = [x,y] center of circle
fade_dists = [fade_start, fade_end] fade values
"""
- h,w = hw
+ w,h = wh
pts = np.empty( (h,w,2), dtype=np.float32 )
- pts[...,1] = np.arange(h)[None,:]
pts[...,0] = np.arange(w)[:,None]
+ pts[...,1] = np.arange(h)[None,:]
+
pts = pts.reshape ( (h*w, -1) )
pts_dists = np.abs ( npla.norm(pts-center, axis=-1) )
@@ -30,15 +43,158 @@ def circle_faded( hw, center, fade_dists ):
pts_dists = np.clip( 1-pts_dists, 0, 1)
return pts_dists.reshape ( (h,w,1) ).astype(np.float32)
+
+
+def bezier( wh, A, B, C ):
+ """
+ returns drawn bezier in [h,w,1] output range float32,
+ every pixel contains signed distance to bezier line
+
+ wh [w,h] resolution
+ A,B,C points [x,y]
+ """
-def random_circle_faded ( hw, rnd_state=None ):
+ width,height = wh
+
+ A = np.float32(A)
+ B = np.float32(B)
+ C = np.float32(C)
+
+
+ pos = np.empty( (height,width,2), dtype=np.float32 )
+ pos[...,0] = np.arange(width)[:,None]
+ pos[...,1] = np.arange(height)[None,:]
+
+
+ a = B-A
+ b = A - 2.0*B + C
+ c = a * 2.0
+ d = A - pos
+
+ b_dot = vector2_dot(b,b)
+ if b_dot == 0.0:
+ return np.zeros( (height,width), dtype=np.float32 )
+
+ kk = 1.0 / b_dot
+
+ kx = kk * vector2_dot(a,b)
+ ky = kk * (2.0*vector2_dot(a,a)+vector2_dot(d,b))/3.0;
+ kz = kk * vector2_dot(d,a);
+
+ res = 0.0;
+ sgn = 0.0;
+
+ p = ky - kx*kx;
+
+ p3 = p*p*p;
+ q = kx*(2.0*kx*kx - 3.0*ky) + kz;
+ h = q*q + 4.0*p3;
+
+ hp_sel = h >= 0.0
+
+ hp_p = h[hp_sel]
+ hp_p = np.sqrt(hp_p)
+
+ hp_x = ( np.stack( (hp_p,-hp_p), -1) -q[hp_sel,None] ) / 2.0
+ hp_uv = np.sign(hp_x) * np.power( np.abs(hp_x), [1.0/3.0, 1.0/3.0] )
+ hp_t = np.clip( hp_uv[...,0] + hp_uv[...,1] - kx, 0.0, 1.0 )
+
+ hp_t = hp_t[...,None]
+ hp_q = d[hp_sel]+(c+b*hp_t)*hp_t
+ hp_res = vector2_dot2(hp_q)
+ hp_sgn = vector2_cross(c+2.0*b*hp_t,hp_q)
+
+ hl_sel = h < 0.0
+
+ hl_q = q[hl_sel]
+ hl_p = p[hl_sel]
+ hl_z = np.sqrt(-hl_p)
+ hl_v = np.arccos( hl_q / (hl_p*hl_z*2.0)) / 3.0
+
+ hl_m = np.cos(hl_v)
+ hl_n = np.sin(hl_v)*1.732050808;
+
+ hl_t = np.clip( np.stack( (hl_m+hl_m,-hl_n-hl_m,hl_n-hl_m), -1)*hl_z[...,None]-kx, 0.0, 1.0 );
+
+ hl_d = d[hl_sel]
+
+ hl_qx = hl_d+(c+b*hl_t[...,0:1])*hl_t[...,0:1]
+
+ hl_dx = vector2_dot2(hl_qx)
+ hl_sx = vector2_cross(c+2.0*b*hl_t[...,0:1], hl_qx)
+
+ hl_qy = hl_d+(c+b*hl_t[...,1:2])*hl_t[...,1:2]
+ hl_dy = vector2_dot2(hl_qy)
+ hl_sy = vector2_cross(c+2.0*b*hl_t[...,1:2],hl_qy);
+
+ hl_dx_l_dy = hl_dx=hl_dy
+
+ hl_res = np.empty_like(hl_dx)
+ hl_res[hl_dx_l_dy] = hl_dx[hl_dx_l_dy]
+ hl_res[hl_dx_ge_dy] = hl_dy[hl_dx_ge_dy]
+
+ hl_sgn = np.empty_like(hl_sx)
+ hl_sgn[hl_dx_l_dy] = hl_sx[hl_dx_l_dy]
+ hl_sgn[hl_dx_ge_dy] = hl_sy[hl_dx_ge_dy]
+
+ res = np.empty( (height, width), np.float32 )
+ res[hp_sel] = hp_res
+ res[hl_sel] = hl_res
+
+ sgn = np.empty( (height, width), np.float32 )
+ sgn[hp_sel] = hp_sgn
+ sgn[hl_sel] = hl_sgn
+
+ sgn = np.sign(sgn)
+ res = np.sqrt(res)*sgn
+
+ return res[...,None]
+
+def random_faded(wh):
+ """
+ apply one of them:
+ random_circle_faded
+ random_bezier_split_faded
+ """
+ rnd = np.random.randint(2)
+ if rnd == 0:
+ return random_circle_faded(wh)
+ elif rnd == 1:
+ return random_bezier_split_faded(wh)
+
+def random_circle_faded ( wh, rnd_state=None ):
if rnd_state is None:
rnd_state = np.random
- h,w = hw
- hw_max = max(h,w)
- fade_start = rnd_state.randint(hw_max)
- fade_end = fade_start + rnd_state.randint(hw_max- fade_start)
+ w,h = wh
+ wh_max = max(w,h)
+ fade_start = rnd_state.randint(wh_max)
+ fade_end = fade_start + rnd_state.randint(wh_max- fade_start)
- return circle_faded (hw, [ rnd_state.randint(h), rnd_state.randint(w) ],
- [fade_start, fade_end] )
\ No newline at end of file
+ return circle_faded (wh, [ rnd_state.randint(h), rnd_state.randint(w) ],
+ [fade_start, fade_end] )
+
+def random_bezier_split_faded( wh ):
+ width, height = wh
+
+ degA = np.random.randint(360)
+ degB = np.random.randint(360)
+ degC = np.random.randint(360)
+
+ deg_2_rad = math.pi / 180.0
+
+ center = np.float32([width / 2.0, height / 2.0])
+
+ radius = max(width, height)
+
+ A = center + radius*np.float32([ math.sin( degA * deg_2_rad), math.cos( degA * deg_2_rad) ] )
+ B = center + np.random.randint(radius)*np.float32([ math.sin( degB * deg_2_rad), math.cos( degB * deg_2_rad) ] )
+ C = center + radius*np.float32([ math.sin( degC * deg_2_rad), math.cos( degC * deg_2_rad) ] )
+
+ x = bezier( (width,height), A, B, C )
+
+ x = x / (1+np.random.randint(radius)) + 0.5
+
+ x = np.clip(x, 0, 1)
+ return x
diff --git a/core/imagelib/warp.py b/core/imagelib/warp.py
index 559c2e8..fd4b3a9 100644
--- a/core/imagelib/warp.py
+++ b/core/imagelib/warp.py
@@ -2,7 +2,7 @@ import numpy as np
import cv2
from core import randomex
-def gen_warp_params (w, flip, rotation_range=[-2,2], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05], rnd_state=None ):
+def gen_warp_params (w, flip=False, rotation_range=[-2,2], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05], rnd_state=None ):
if rnd_state is None:
rnd_state = np.random
diff --git a/core/leras/device.py b/core/leras/device.py
index 4d157f0..31d2f88 100644
--- a/core/leras/device.py
+++ b/core/leras/device.py
@@ -1,12 +1,19 @@
import sys
import ctypes
import os
+import multiprocessing
+import json
+import time
+from pathlib import Path
+from core.interact import interact as io
+
class Device(object):
- def __init__(self, index, name, total_mem, free_mem, cc=0):
+ def __init__(self, index, tf_dev_type, name, total_mem, free_mem):
self.index = index
+ self.tf_dev_type = tf_dev_type
self.name = name
- self.cc = cc
+
self.total_mem = total_mem
self.total_mem_gb = total_mem / 1024**3
self.free_mem = free_mem
@@ -82,12 +89,135 @@ class Devices(object):
result.append (device)
return Devices(result)
+ @staticmethod
+ def _get_tf_devices_proc(q : multiprocessing.Queue):
+
+ if sys.platform[0:3] == 'win':
+ compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_ALL')
+ os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
+ if not compute_cache_path.exists():
+ io.log_info("Caching GPU kernels...")
+ compute_cache_path.mkdir(parents=True, exist_ok=True)
+
+ import tensorflow
+
+ tf_version = tensorflow.version.VERSION
+ #if tf_version is None:
+ # tf_version = tensorflow.version.GIT_VERSION
+ if tf_version[0] == 'v':
+ tf_version = tf_version[1:]
+ if tf_version[0] == '2':
+ tf = tensorflow.compat.v1
+ else:
+ tf = tensorflow
+
+ import logging
+ # Disable tensorflow warnings
+ tf_logger = logging.getLogger('tensorflow')
+ tf_logger.setLevel(logging.ERROR)
+
+ from tensorflow.python.client import device_lib
+
+ devices = []
+
+ physical_devices = device_lib.list_local_devices()
+ physical_devices_f = {}
+ for dev in physical_devices:
+ dev_type = dev.device_type
+ dev_tf_name = dev.name
+ dev_tf_name = dev_tf_name[ dev_tf_name.index(dev_type) : ]
+
+ dev_idx = int(dev_tf_name.split(':')[-1])
+
+ if dev_type in ['GPU','DML']:
+ dev_name = dev_tf_name
+
+ dev_desc = dev.physical_device_desc
+ if len(dev_desc) != 0:
+ if dev_desc[0] == '{':
+ dev_desc_json = json.loads(dev_desc)
+ dev_desc_json_name = dev_desc_json.get('name',None)
+ if dev_desc_json_name is not None:
+ dev_name = dev_desc_json_name
+ else:
+ for param, value in ( v.split(':') for v in dev_desc.split(',') ):
+ param = param.strip()
+ value = value.strip()
+ if param == 'name':
+ dev_name = value
+ break
+
+ physical_devices_f[dev_idx] = (dev_type, dev_name, dev.memory_limit)
+
+ q.put(physical_devices_f)
+ time.sleep(0.1)
+
+
@staticmethod
def initialize_main_env():
- os.environ['NN_DEVICES_INITIALIZED'] = '1'
- os.environ['NN_DEVICES_COUNT'] = '0'
+ if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 0:
+ return
+
+ if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
+ os.environ.pop('CUDA_VISIBLE_DEVICES')
os.environ['CUDA_CACHE_MAXSIZE'] = '2147483647'
+ os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only
+
+ q = multiprocessing.Queue()
+ p = multiprocessing.Process(target=Devices._get_tf_devices_proc, args=(q,), daemon=True)
+ p.start()
+ p.join()
+
+ visible_devices = q.get()
+
+ os.environ['NN_DEVICES_INITIALIZED'] = '1'
+ os.environ['NN_DEVICES_COUNT'] = str(len(visible_devices))
+
+ for i in visible_devices:
+ dev_type, name, total_mem = visible_devices[i]
+
+ os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'] = dev_type
+ os.environ[f'NN_DEVICE_{i}_NAME'] = name
+ os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(total_mem)
+ os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(total_mem)
+
+
+
+ @staticmethod
+ def getDevices():
+ if Devices.all_devices is None:
+ if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
+ raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
+ devices = []
+ for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
+ devices.append ( Device(index=i,
+ tf_dev_type=os.environ[f'NN_DEVICE_{i}_TF_DEV_TYPE'],
+ name=os.environ[f'NN_DEVICE_{i}_NAME'],
+ total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
+ free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']), )
+ )
+ Devices.all_devices = Devices(devices)
+
+ return Devices.all_devices
+
+"""
+
+
+ # {'name' : name.split(b'\0', 1)[0].decode(),
+ # 'total_mem' : totalMem.value
+ # }
+
+
+
+
+
+ return
+
+
+
+
min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
for libname in libnames:
@@ -139,70 +269,4 @@ class Devices(object):
os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem'])
os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem'])
os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc'])
-
- @staticmethod
- def getDevices():
- if Devices.all_devices is None:
- if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1:
- raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.")
- devices = []
- for i in range ( int(os.environ['NN_DEVICES_COUNT']) ):
- devices.append ( Device(index=i,
- name=os.environ[f'NN_DEVICE_{i}_NAME'],
- total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']),
- free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']),
- cc=int(os.environ[f'NN_DEVICE_{i}_CC']) ))
- Devices.all_devices = Devices(devices)
-
- return Devices.all_devices
-
-"""
-if Devices.all_devices is None:
- min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35))
-
- libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll')
- for libname in libnames:
- try:
- cuda = ctypes.CDLL(libname)
- except:
- continue
- else:
- break
- else:
- return Devices([])
-
- nGpus = ctypes.c_int()
- name = b' ' * 200
- cc_major = ctypes.c_int()
- cc_minor = ctypes.c_int()
- freeMem = ctypes.c_size_t()
- totalMem = ctypes.c_size_t()
-
- result = ctypes.c_int()
- device = ctypes.c_int()
- context = ctypes.c_void_p()
- error_str = ctypes.c_char_p()
-
- devices = []
-
- if cuda.cuInit(0) == 0 and \
- cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0:
- for i in range(nGpus.value):
- if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \
- cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \
- cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0:
- continue
-
- if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0:
- if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0:
- cc = cc_major.value * 10 + cc_minor.value
- if cc >= min_cc:
- devices.append ( Device(index=i,
- name=name.split(b'\0', 1)[0].decode(),
- total_mem=totalMem.value,
- free_mem=freeMem.value,
- cc=cc) )
- cuda.cuCtxDetach(context)
- Devices.all_devices = Devices(devices)
- return Devices.all_devices
"""
\ No newline at end of file
diff --git a/core/leras/layers/Conv2D.py b/core/leras/layers/Conv2D.py
index ae37c50..7d4d444 100644
--- a/core/leras/layers/Conv2D.py
+++ b/core/leras/layers/Conv2D.py
@@ -23,28 +23,13 @@ class Conv2D(nn.LayerBase):
if padding == "SAME":
padding = ( (kernel_size - 1) * dilations + 1 ) // 2
elif padding == "VALID":
- padding = 0
+ padding = None
else:
raise ValueError ("Wrong padding type. Should be VALID SAME or INT or 4x INTs")
-
- if isinstance(padding, int):
- if padding != 0:
- if nn.data_format == "NHWC":
- padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ]
- else:
- padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ]
- else:
- padding = None
-
- if nn.data_format == "NHWC":
- strides = [1,strides,strides,1]
else:
- strides = [1,1,strides,strides]
-
- if nn.data_format == "NHWC":
- dilations = [1,dilations,dilations,1]
- else:
- dilations = [1,1,dilations,dilations]
+ padding = int(padding)
+
+
self.in_ch = in_ch
self.out_ch = out_ch
@@ -93,10 +78,27 @@ class Conv2D(nn.LayerBase):
if self.use_wscale:
weight = weight * self.wscale
- if self.padding is not None:
- x = tf.pad (x, self.padding, mode='CONSTANT')
+ padding = self.padding
+ if padding is not None:
+ if nn.data_format == "NHWC":
+ padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ]
+ else:
+ padding = [ [0,0], [0,0], [padding,padding], [padding,padding] ]
+ x = tf.pad (x, padding, mode='CONSTANT')
+
+ strides = self.strides
+ if nn.data_format == "NHWC":
+ strides = [1,strides,strides,1]
+ else:
+ strides = [1,1,strides,strides]
- x = tf.nn.conv2d(x, weight, self.strides, 'VALID', dilations=self.dilations, data_format=nn.data_format)
+ dilations = self.dilations
+ if nn.data_format == "NHWC":
+ dilations = [1,dilations,dilations,1]
+ else:
+ dilations = [1,1,dilations,dilations]
+
+ x = tf.nn.conv2d(x, weight, strides, 'VALID', dilations=dilations, data_format=nn.data_format)
if self.use_bias:
if nn.data_format == "NHWC":
bias = tf.reshape (self.bias, (1,1,1,self.out_ch) )
diff --git a/core/leras/layers/MsSsim.py b/core/leras/layers/MsSsim.py
new file mode 100644
index 0000000..d4987ed
--- /dev/null
+++ b/core/leras/layers/MsSsim.py
@@ -0,0 +1,50 @@
+from core.leras import nn
+tf = nn.tf
+
+
+class MsSsim(nn.LayerBase):
+ default_power_factors = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333)
+ default_l1_alpha = 0.84
+
+ def __init__(self, batch_size, in_ch, resolution, kernel_size=11, use_l1=False, **kwargs):
+ # restrict mssim factors to those greater/equal to kernel size
+ power_factors = [p for i, p in enumerate(self.default_power_factors) if resolution//(2**i) >= kernel_size]
+ # normalize power factors if reduced because of size
+ if sum(power_factors) < 1.0:
+ power_factors = [x/sum(power_factors) for x in power_factors]
+ self.power_factors = power_factors
+ self.num_scale = len(power_factors)
+ self.kernel_size = kernel_size
+ self.use_l1 = use_l1
+ if use_l1:
+ self.gaussian_weights = nn.get_gaussian_weights(batch_size, in_ch, resolution, num_scale=self.num_scale)
+
+ super().__init__(**kwargs)
+
+ def __call__(self, y_true, y_pred, max_val):
+ # Transpose images from NCHW to NHWC
+ y_true_t = tf.transpose(tf.cast(y_true, tf.float32), [0, 2, 3, 1])
+ y_pred_t = tf.transpose(tf.cast(y_pred, tf.float32), [0, 2, 3, 1])
+
+ # ssim_multiscale returns values in range [0, 1] (where 1 is completely identical)
+ # subtract from 1 to get loss
+ if tf.__version__ >= "1.14":
+ ms_ssim_loss = 1.0 - tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors, filter_size=self.kernel_size)
+ else:
+ ms_ssim_loss = 1.0 - tf.image.ssim_multiscale(y_true_t, y_pred_t, max_val, power_factors=self.power_factors)
+
+ # If use L1 is enabled, use mix of ms-ssim and L1 (weighted by gaussian filters)
+ # H. Zhao, O. Gallo, I. Frosio and J. Kautz, "Loss Functions for Image Restoration With Neural Networks,"
+ # in IEEE Transactions on Computational Imaging, vol. 3, no. 1, pp. 47-57, March 2017,
+ # doi: 10.1109/TCI.2016.2644865.
+ # https://research.nvidia.com/publication/loss-functions-image-restoration-neural-networks
+
+ if self.use_l1:
+ diff = tf.tile(tf.expand_dims(tf.abs(y_true - y_pred), axis=0), multiples=[self.num_scale, 1, 1, 1, 1])
+ l1_loss = tf.reduce_mean(tf.reduce_sum(self.gaussian_weights[-1, :, :, :, :] * diff, axis=[0, 3, 4]), axis=[1])
+ return self.default_l1_alpha * ms_ssim_loss + (1 - self.default_l1_alpha) * l1_loss
+
+ return ms_ssim_loss
+
+
+nn.MsSsim = MsSsim
diff --git a/core/leras/layers/__init__.py b/core/leras/layers/__init__.py
index 1c81963..d8f1c9d 100644
--- a/core/leras/layers/__init__.py
+++ b/core/leras/layers/__init__.py
@@ -13,4 +13,5 @@ from .FRNorm2D import *
from .TLU import *
from .ScaleAdd import *
from .DenseNorm import *
-from .AdaIN import *
\ No newline at end of file
+from .AdaIN import *
+from .MsSsim import *
diff --git a/core/leras/models/PatchDiscriminator.py b/core/leras/models/PatchDiscriminator.py
index 343e000..dcaf941 100644
--- a/core/leras/models/PatchDiscriminator.py
+++ b/core/leras/models/PatchDiscriminator.py
@@ -111,7 +111,7 @@ class UNetPatchDiscriminator(nn.ModelBase):
for i in range(layers_count-1):
st = 1 + (1 if val & (1 << i) !=0 else 0 )
layers.append ( [3, st ])
- sum_st += st
+ sum_st += st
rf = self.calc_receptive_field_size(layers)
@@ -131,7 +131,7 @@ class UNetPatchDiscriminator(nn.ModelBase):
return s[q][2]
def on_build(self, patch_size, in_ch, base_ch = 16):
-
+
class ResidualBlock(nn.ModelBase):
def on_build(self, ch, kernel_size=3 ):
self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
@@ -152,7 +152,7 @@ class UNetPatchDiscriminator(nn.ModelBase):
self.upres1 = []
self.upres2 = []
layers = self.find_archi(patch_size)
-
+
level_chs = { i-1:v for i,v in enumerate([ min( base_ch * (2**i), 512 ) for i in range(len(layers)+1)]) }
self.in_conv = nn.Conv2D( in_ch, level_chs[-1], kernel_size=1, padding='VALID')
@@ -162,12 +162,12 @@ class UNetPatchDiscriminator(nn.ModelBase):
self.res1.append ( ResidualBlock(level_chs[i]) )
self.res2.append ( ResidualBlock(level_chs[i]) )
-
+
self.upconvs.insert (0, nn.Conv2DTranspose( level_chs[i]*(2 if i != len(layers)-1 else 1), level_chs[i-1], kernel_size=kernel_size, strides=strides, padding='SAME') )
self.upres1.insert (0, ResidualBlock(level_chs[i-1]*2) )
self.upres2.insert (0, ResidualBlock(level_chs[i-1]*2) )
-
+
self.out_conv = nn.Conv2D( level_chs[-1]*2, 1, kernel_size=1, padding='VALID')
self.center_out = nn.Conv2D( level_chs[len(layers)-1], 1, kernel_size=1, padding='VALID')
@@ -183,7 +183,7 @@ class UNetPatchDiscriminator(nn.ModelBase):
x = tf.nn.leaky_relu( conv(x), 0.2 )
x = res1(x)
x = res2(x)
-
+
center_out, x = self.center_out(x), tf.nn.leaky_relu( self.center_conv(x), 0.2 )
for i, (upconv, enc, upres1, upres2 ) in enumerate(zip(self.upconvs, encs, self.upres1, self.upres2)):
@@ -195,3 +195,117 @@ class UNetPatchDiscriminator(nn.ModelBase):
return center_out, self.out_conv(x)
nn.UNetPatchDiscriminator = UNetPatchDiscriminator
+
+class UNetPatchDiscriminatorV2(nn.ModelBase):
+ """
+ Inspired by https://arxiv.org/abs/2002.12655 "A U-Net Based Discriminator for Generative Adversarial Networks"
+ """
+ def calc_receptive_field_size(self, layers):
+ """
+ result the same as https://fomoro.com/research/article/receptive-field-calculatorindex.html
+ """
+ rf = 0
+ ts = 1
+ for i, (k, s) in enumerate(layers):
+ if i == 0:
+ rf = k
+ else:
+ rf += (k-1)*ts
+ ts *= s
+ return rf
+
+ def find_archi(self, target_patch_size, max_layers=6):
+ """
+ Find the best configuration of layers using only 3x3 convs for target patch size
+ """
+ s = {}
+ for layers_count in range(1,max_layers+1):
+ val = 1 << (layers_count-1)
+ while True:
+ val -= 1
+
+ layers = []
+ sum_st = 0
+ for i in range(layers_count-1):
+ st = 1 + (1 if val & (1 << i) !=0 else 0 )
+ layers.append ( [3, st ])
+ sum_st += st
+ layers.append ( [3, 2])
+ sum_st += 2
+
+ rf = self.calc_receptive_field_size(layers)
+
+ s_rf = s.get(rf, None)
+ if s_rf is None:
+ s[rf] = (layers_count, sum_st, layers)
+ else:
+ if layers_count < s_rf[0] or \
+ ( layers_count == s_rf[0] and sum_st > s_rf[1] ):
+ s[rf] = (layers_count, sum_st, layers)
+
+ if val == 0:
+ break
+
+ x = sorted(list(s.keys()))
+ q=x[np.abs(np.array(x)-target_patch_size).argmin()]
+ return s[q][2]
+
+ def on_build(self, patch_size, in_ch):
+ class ResidualBlock(nn.ModelBase):
+ def on_build(self, ch, kernel_size=3 ):
+ self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
+ self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME')
+
+ def forward(self, inp):
+ x = self.conv1(inp)
+ x = tf.nn.leaky_relu(x, 0.2)
+ x = self.conv2(x)
+ x = tf.nn.leaky_relu(inp + x, 0.2)
+ return x
+
+ prev_ch = in_ch
+ self.convs = []
+ self.res = []
+ self.upconvs = []
+ self.upres = []
+ layers = self.find_archi(patch_size)
+ base_ch = 16
+
+ level_chs = { i-1:v for i,v in enumerate([ min( base_ch * (2**i), 512 ) for i in range(len(layers)+1)]) }
+
+ self.in_conv = nn.Conv2D( in_ch, level_chs[-1], kernel_size=1, padding='VALID')
+
+ for i, (kernel_size, strides) in enumerate(layers):
+ self.convs.append ( nn.Conv2D( level_chs[i-1], level_chs[i], kernel_size=kernel_size, strides=strides, padding='SAME') )
+
+ self.res.append ( ResidualBlock(level_chs[i]) )
+
+ self.upconvs.insert (0, nn.Conv2DTranspose( level_chs[i]*(2 if i != len(layers)-1 else 1), level_chs[i-1], kernel_size=kernel_size, strides=strides, padding='SAME') )
+
+ self.upres.insert (0, ResidualBlock(level_chs[i-1]*2) )
+
+ self.out_conv = nn.Conv2D( level_chs[-1]*2, 1, kernel_size=1, padding='VALID')
+
+ self.center_out = nn.Conv2D( level_chs[len(layers)-1], 1, kernel_size=1, padding='VALID')
+ self.center_conv = nn.Conv2D( level_chs[len(layers)-1], level_chs[len(layers)-1], kernel_size=1, padding='VALID')
+
+
+ def forward(self, x):
+ x = tf.nn.leaky_relu( self.in_conv(x), 0.1 )
+
+ encs = []
+ for conv, res in zip(self.convs, self.res):
+ encs.insert(0, x)
+ x = tf.nn.leaky_relu( conv(x), 0.1 )
+ x = res(x)
+
+ center_out, x = self.center_out(x), self.center_conv(x)
+
+ for i, (upconv, enc, upres) in enumerate(zip(self.upconvs, encs, self.upres)):
+ x = tf.nn.leaky_relu( upconv(x), 0.1 )
+ x = tf.concat( [enc, x], axis=nn.conv2d_ch_axis)
+ x = upres(x)
+
+ return center_out, self.out_conv(x)
+
+nn.UNetPatchDiscriminatorV2 = UNetPatchDiscriminatorV2
diff --git a/core/leras/models/XSeg.py b/core/leras/models/XSeg.py
index 0ba19a6..e6bde65 100644
--- a/core/leras/models/XSeg.py
+++ b/core/leras/models/XSeg.py
@@ -28,11 +28,12 @@ class XSeg(nn.ModelBase):
x = self.frn(x)
x = self.tlu(x)
return x
+
+ self.base_ch = base_ch
self.conv01 = ConvBlock(in_ch, base_ch)
self.conv02 = ConvBlock(base_ch, base_ch)
- self.bp0 = nn.BlurPool (filt_size=3)
-
+ self.bp0 = nn.BlurPool (filt_size=4)
self.conv11 = ConvBlock(base_ch, base_ch*2)
self.conv12 = ConvBlock(base_ch*2, base_ch*2)
@@ -40,19 +41,30 @@ class XSeg(nn.ModelBase):
self.conv21 = ConvBlock(base_ch*2, base_ch*4)
self.conv22 = ConvBlock(base_ch*4, base_ch*4)
- self.conv23 = ConvBlock(base_ch*4, base_ch*4)
- self.bp2 = nn.BlurPool (filt_size=3)
-
+ self.bp2 = nn.BlurPool (filt_size=2)
self.conv31 = ConvBlock(base_ch*4, base_ch*8)
self.conv32 = ConvBlock(base_ch*8, base_ch*8)
self.conv33 = ConvBlock(base_ch*8, base_ch*8)
- self.bp3 = nn.BlurPool (filt_size=3)
+ self.bp3 = nn.BlurPool (filt_size=2)
self.conv41 = ConvBlock(base_ch*8, base_ch*8)
self.conv42 = ConvBlock(base_ch*8, base_ch*8)
self.conv43 = ConvBlock(base_ch*8, base_ch*8)
- self.bp4 = nn.BlurPool (filt_size=3)
+ self.bp4 = nn.BlurPool (filt_size=2)
+
+ self.conv51 = ConvBlock(base_ch*8, base_ch*8)
+ self.conv52 = ConvBlock(base_ch*8, base_ch*8)
+ self.conv53 = ConvBlock(base_ch*8, base_ch*8)
+ self.bp5 = nn.BlurPool (filt_size=2)
+
+ self.dense1 = nn.Dense ( 4*4* base_ch*8, 512)
+ self.dense2 = nn.Dense ( 512, 4*4* base_ch*8)
+
+ self.up5 = UpConvBlock (base_ch*8, base_ch*4)
+ self.uconv53 = ConvBlock(base_ch*12, base_ch*8)
+ self.uconv52 = ConvBlock(base_ch*8, base_ch*8)
+ self.uconv51 = ConvBlock(base_ch*8, base_ch*8)
self.up4 = UpConvBlock (base_ch*8, base_ch*4)
self.uconv43 = ConvBlock(base_ch*12, base_ch*8)
@@ -65,8 +77,7 @@ class XSeg(nn.ModelBase):
self.uconv31 = ConvBlock(base_ch*8, base_ch*8)
self.up2 = UpConvBlock (base_ch*8, base_ch*4)
- self.uconv23 = ConvBlock(base_ch*8, base_ch*4)
- self.uconv22 = ConvBlock(base_ch*4, base_ch*4)
+ self.uconv22 = ConvBlock(base_ch*8, base_ch*4)
self.uconv21 = ConvBlock(base_ch*4, base_ch*4)
self.up1 = UpConvBlock (base_ch*4, base_ch*2)
@@ -78,8 +89,7 @@ class XSeg(nn.ModelBase):
self.uconv01 = ConvBlock(base_ch, base_ch)
self.out_conv = nn.Conv2D (base_ch, out_ch, kernel_size=3, padding='SAME')
- self.conv_center = ConvBlock(base_ch*8, base_ch*8)
-
+
def forward(self, inp):
x = inp
@@ -92,8 +102,7 @@ class XSeg(nn.ModelBase):
x = self.bp1(x)
x = self.conv21(x)
- x = self.conv22(x)
- x = x2 = self.conv23(x)
+ x = x2 = self.conv22(x)
x = self.bp2(x)
x = self.conv31(x)
@@ -106,8 +115,21 @@ class XSeg(nn.ModelBase):
x = x4 = self.conv43(x)
x = self.bp4(x)
- x = self.conv_center(x)
-
+ x = self.conv51(x)
+ x = self.conv52(x)
+ x = x5 = self.conv53(x)
+ x = self.bp5(x)
+
+ x = nn.flatten(x)
+ x = self.dense1(x)
+ x = self.dense2(x)
+ x = nn.reshape_4D (x, 4, 4, self.base_ch*8 )
+
+ x = self.up5(x)
+ x = self.uconv53(tf.concat([x,x5],axis=nn.conv2d_ch_axis))
+ x = self.uconv52(x)
+ x = self.uconv51(x)
+
x = self.up4(x)
x = self.uconv43(tf.concat([x,x4],axis=nn.conv2d_ch_axis))
x = self.uconv42(x)
@@ -119,8 +141,7 @@ class XSeg(nn.ModelBase):
x = self.uconv31(x)
x = self.up2(x)
- x = self.uconv23(tf.concat([x,x2],axis=nn.conv2d_ch_axis))
- x = self.uconv22(x)
+ x = self.uconv22(tf.concat([x,x2],axis=nn.conv2d_ch_axis))
x = self.uconv21(x)
x = self.up1(x)
diff --git a/core/leras/nn.py b/core/leras/nn.py
index ef5c2c9..7c28874 100644
--- a/core/leras/nn.py
+++ b/core/leras/nn.py
@@ -33,14 +33,14 @@ class nn():
tf = None
tf_sess = None
tf_sess_config = None
- tf_default_device = None
-
+ tf_default_device_name = None
+
data_format = None
conv2d_ch_axis = None
conv2d_spatial_axes = None
floatx = None
-
+
@staticmethod
def initialize(device_config=None, floatx="float32", data_format="NHWC"):
@@ -50,9 +50,6 @@ class nn():
nn.setCurrentDeviceConfig(device_config)
# Manipulate environment variables before import tensorflow
-
- if 'CUDA_VISIBLE_DEVICES' in os.environ.keys():
- os.environ.pop('CUDA_VISIBLE_DEVICES')
first_run = False
if len(device_config.devices) != 0:
@@ -68,22 +65,19 @@ class nn():
compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str)
if not compute_cache_path.exists():
first_run = True
+ compute_cache_path.mkdir(parents=True, exist_ok=True)
os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path)
-
- os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2'
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # tf log errors only
-
+
if first_run:
io.log_info("Caching GPU kernels...")
import tensorflow
-
- tf_version = getattr(tensorflow,'VERSION', None)
- if tf_version is None:
- tf_version = tensorflow.version.GIT_VERSION
- if tf_version[0] == 'v':
- tf_version = tf_version[1:]
-
+
+ tf_version = tensorflow.version.VERSION
+ #if tf_version is None:
+ # tf_version = tensorflow.version.GIT_VERSION
+ if tf_version[0] == 'v':
+ tf_version = tf_version[1:]
if tf_version[0] == '2':
tf = tensorflow.compat.v1
else:
@@ -93,7 +87,7 @@ class nn():
# Disable tensorflow warnings
tf_logger = logging.getLogger('tensorflow')
tf_logger.setLevel(logging.ERROR)
-
+
if tf_version[0] == '2':
tf.disable_v2_behavior()
nn.tf = tf
@@ -105,20 +99,21 @@ class nn():
import core.leras.optimizers
import core.leras.models
import core.leras.archis
-
+
# Configure tensorflow session-config
if len(device_config.devices) == 0:
- nn.tf_default_device = "/CPU:0"
config = tf.ConfigProto(device_count={'GPU': 0})
+ nn.tf_default_device_name = '/CPU:0'
else:
- nn.tf_default_device = "/GPU:0"
- config = tf.ConfigProto()
+ nn.tf_default_device_name = f'/{device_config.devices[0].tf_dev_type}:0'
+
+ config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices])
-
+
config.gpu_options.force_gpu_compatible = True
config.gpu_options.allow_growth = True
nn.tf_sess_config = config
-
+
if nn.tf_sess is None:
nn.tf_sess = tf.Session(config=nn.tf_sess_config)
@@ -202,14 +197,6 @@ class nn():
nn.tf_sess.close()
nn.tf_sess = None
- @staticmethod
- def get_current_device():
- # Undocumented access to last tf.device(...)
- objs = nn.tf.get_default_graph()._device_function_stack.peek_objs()
- if len(objs) != 0:
- return objs[0].display_name
- return nn.tf_default_device
-
@staticmethod
def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False):
devices = Devices.getDevices()
@@ -273,7 +260,7 @@ class nn():
@staticmethod
def ask_choose_device(*args, **kwargs):
return nn.DeviceConfig.GPUIndexes( nn.ask_choose_device_idxs(*args,**kwargs) )
-
+
def __init__ (self, devices=None):
devices = devices or []
diff --git a/core/leras/ops/__init__.py b/core/leras/ops/__init__.py
index 500a22a..ef4efd6 100644
--- a/core/leras/ops/__init__.py
+++ b/core/leras/ops/__init__.py
@@ -204,7 +204,7 @@ def random_binomial(shape, p=0.0, dtype=None, seed=None):
seed = np.random.randint(10e6)
return array_ops.where(
random_ops.random_uniform(shape, dtype=tf.float16, seed=seed) < p,
- array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
+ array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
nn.random_binomial = random_binomial
def gaussian_blur(input, radius=2.0):
@@ -237,6 +237,19 @@ def gaussian_blur(input, radius=2.0):
return x
nn.gaussian_blur = gaussian_blur
+def get_gaussian_weights(batch_size, in_ch, resolution, num_scale=5, sigma=(0.5, 1., 2., 4., 8.)):
+ w = np.empty((num_scale, batch_size, in_ch, resolution, resolution))
+ for i in range(num_scale):
+ gaussian = np.exp(-1.*np.arange(-(resolution/2-0.5), resolution/2+0.5)**2/(2*sigma[i]**2))
+ gaussian = np.outer(gaussian, gaussian.reshape((resolution, 1))) # extend to 2D
+ gaussian = gaussian/np.sum(gaussian) # normalization
+ gaussian = np.reshape(gaussian, (1, 1, resolution, resolution)) # reshape to 3D
+ gaussian = np.tile(gaussian, (batch_size, in_ch, 1, 1))
+ w[i, :, :, :, :] = gaussian
+ return w
+
+nn.get_gaussian_weights = get_gaussian_weights
+
def style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1):
def sd(content, style, loss_weight):
content_nc = content.shape[ nn.conv2d_ch_axis ]
@@ -333,7 +346,9 @@ def depth_to_space(x, size):
x = tf.reshape(x, (-1, oh, ow, oc, ))
return x
else:
- return tf.depth_to_space(x, size, data_format=nn.data_format)
+ cfg = nn.getCurrentDeviceConfig()
+ if not cfg.cpu_only:
+ return tf.depth_to_space(x, size, data_format=nn.data_format)
b,c,h,w = x.shape.as_list()
oh, ow = h * size, w * size
oc = c // (size * size)
@@ -344,11 +359,6 @@ def depth_to_space(x, size):
return x
nn.depth_to_space = depth_to_space
-def pixel_norm(x, power = 1.0):
- return x * power * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=nn.conv2d_spatial_axes, keepdims=True) + 1e-06)
-nn.pixel_norm = pixel_norm
-
-
def rgb_to_lab(srgb):
srgb_pixels = tf.reshape(srgb, [-1, 3])
linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
@@ -385,12 +395,17 @@ def total_variation_mse(images):
"""
pixel_dif1 = images[:, 1:, :, :] - images[:, :-1, :, :]
pixel_dif2 = images[:, :, 1:, :] - images[:, :, :-1, :]
-
+
tot_var = ( tf.reduce_sum(tf.square(pixel_dif1), axis=[1,2,3]) +
tf.reduce_sum(tf.square(pixel_dif2), axis=[1,2,3]) )
return tot_var
nn.total_variation_mse = total_variation_mse
+
+def pixel_norm(x, axes):
+ return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=axes, keepdims=True) + 1e-06)
+nn.pixel_norm = pixel_norm
+
"""
def tf_suppress_lower_mean(t, eps=0.00001):
if t.shape.ndims != 1:
@@ -400,4 +415,4 @@ def tf_suppress_lower_mean(t, eps=0.00001):
q = tf.clip_by_value(q-t_mean_eps, 0, eps)
q = q * (t/eps)
return q
-"""
\ No newline at end of file
+"""
diff --git a/core/mathlib/__init__.py b/core/mathlib/__init__.py
index a11e725..7e5fa13 100644
--- a/core/mathlib/__init__.py
+++ b/core/mathlib/__init__.py
@@ -1,7 +1,12 @@
-import numpy as np
import math
+
+import cv2
+import numpy as np
+import numpy.linalg as npla
+
from .umeyama import umeyama
+
def get_power_of_two(x):
i = 0
while (1 << i) < x:
@@ -23,3 +28,70 @@ def rotationMatrixToEulerAngles(R) :
def polygon_area(x,y):
return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
+
+def rotate_point(origin, point, deg):
+ """
+ Rotate a point counterclockwise by a given angle around a given origin.
+
+ The angle should be given in radians.
+ """
+ ox, oy = origin
+ px, py = point
+
+ rad = deg * math.pi / 180.0
+ qx = ox + math.cos(rad) * (px - ox) - math.sin(rad) * (py - oy)
+ qy = oy + math.sin(rad) * (px - ox) + math.cos(rad) * (py - oy)
+ return np.float32([qx, qy])
+
+def transform_points(points, mat, invert=False):
+ if invert:
+ mat = cv2.invertAffineTransform (mat)
+ points = np.expand_dims(points, axis=1)
+ points = cv2.transform(points, mat, points.shape)
+ points = np.squeeze(points)
+ return points
+
+
+def transform_mat(mat, res, tx, ty, rotation, scale):
+ """
+ transform mat in local space of res
+ scale -> translate -> rotate
+
+ tx,ty float
+ rotation int degrees
+ scale float
+ """
+
+
+ lt, rt, lb, ct = transform_points ( np.float32([(0,0),(res,0),(0,res),(res / 2, res/2) ]),mat, True)
+
+ hor_v = (rt-lt).astype(np.float32)
+ hor_size = npla.norm(hor_v)
+ hor_v /= hor_size
+
+ ver_v = (lb-lt).astype(np.float32)
+ ver_size = npla.norm(ver_v)
+ ver_v /= ver_size
+
+ bt_diag_vec = (rt-ct).astype(np.float32)
+ half_diag_len = npla.norm(bt_diag_vec)
+ bt_diag_vec /= half_diag_len
+
+ tb_diag_vec = np.float32( [ -bt_diag_vec[1], bt_diag_vec[0] ] )
+
+ rt = ct + bt_diag_vec*half_diag_len*scale
+ lb = ct - bt_diag_vec*half_diag_len*scale
+ lt = ct - tb_diag_vec*half_diag_len*scale
+
+ rt[0] += tx*hor_size
+ lb[0] += tx*hor_size
+ lt[0] += tx*hor_size
+ rt[1] += ty*ver_size
+ lb[1] += ty*ver_size
+ lt[1] += ty*ver_size
+
+ rt = rotate_point(ct, rt, rotation)
+ lb = rotate_point(ct, lb, rotation)
+ lt = rotate_point(ct, lt, rotation)
+
+ return cv2.getAffineTransform( np.float32([lt, rt, lb]), np.float32([ [0,0], [res,0], [0,res] ]) )
diff --git a/doc/Alipay_donation.jpg b/doc/Alipay_donation.jpg
deleted file mode 100644
index 48781e1..0000000
Binary files a/doc/Alipay_donation.jpg and /dev/null differ
diff --git a/doc/dfl_cover.png b/doc/dfl_cover.png
new file mode 100644
index 0000000..53f2b7f
Binary files /dev/null and b/doc/dfl_cover.png differ
diff --git a/doc/features/background-power/README.md b/doc/features/background-power/README.md
new file mode 100644
index 0000000..0c83e52
--- /dev/null
+++ b/doc/features/background-power/README.md
@@ -0,0 +1,32 @@
+# Background Power option
+
+Allows you to train the model to include the background, which may help with areas around the mask.
+Unlike **Background Style Power**, this does not use any additional VRAM, and does not require lowering the batch size.
+
+- [DESCRIPTION](#description)
+- [USAGE](#usage)
+- [DIFFERENCE WITH BACKGROUND STYLE POWER](#difference-with-background-style-power)
+
+*Examples trained with background power `0.3`:*
+
+
+
+## DESCRIPTION
+
+Applies the same loss calculation used for the area *inside* the mask, to the area *outside* the mask, multiplied with
+the chosen background power value.
+
+E.g. (simplified): Source Loss = Masked area image difference + Background Power * Non-masked area image difference
+
+## USAGE
+
+`[0.0] Background power ( 0.0..1.0 ?:help ) : 0.3`
+
+## DIFFERENCE WITH BACKGROUND STYLE POWER
+
+**Background Style Power** applies a loss to the source by comparing the background of the dest to that of the
+predicted src/dest (5th column). This operation requires additional VRAM, due to the face that the predicted src/dest
+outputs are not normally used in training (other then being viewable in the preview window).
+
+**Background Power** does *not* use the src/dest images whatsoever, instead comparing the background of the predicted
+source to that of the original source, and the same for the background of the dest images.
diff --git a/doc/features/background-power/example.jpeg b/doc/features/background-power/example.jpeg
new file mode 100644
index 0000000..004d149
Binary files /dev/null and b/doc/features/background-power/example.jpeg differ
diff --git a/doc/features/background-power/example2.jpeg b/doc/features/background-power/example2.jpeg
new file mode 100644
index 0000000..7d00de9
Binary files /dev/null and b/doc/features/background-power/example2.jpeg differ
diff --git a/doc/features/gan-options/README.md b/doc/features/gan-options/README.md
new file mode 100644
index 0000000..45d1adb
--- /dev/null
+++ b/doc/features/gan-options/README.md
@@ -0,0 +1,50 @@
+# GAN Options
+
+Allows you to use one-sided label smoothing and noisy labels when training the discriminator.
+
+- [ONE-SIDED LABEL SMOOTHING](#one-sided-label-smoothing)
+- [NOISY LABELS](#noisy-labels)
+
+## ONE-SIDED LABEL SMOOTHING
+
+
+
+> Deep networks may suffer from overconfidence. For example, it uses very few features to classify an object. To
+> mitigate the problem, deep learning uses regulation and dropout to avoid overconfidence.
+>
+> In GAN, if the discriminator depends on a small set of features to detect real images, the generator may just produce
+> these features only to exploit the discriminator. The optimization may turn too greedy and produces no long term
+> benefit. In GAN, overconfidence hurts badly. To avoid the problem, we penalize the discriminator when the prediction
+> for any real images go beyond 0.9 (D(real image)>0.9). This is done by setting our target label value to be 0.9
+> instead of 1.0.
+ - [GAN — Ways to improve GAN performance](https://towardsdatascience.com/gan-ways-to-improve-gan-performance-acf37f9f59b)
+
+By setting the label smoothing value to any value > 0, the target label value used with the discriminator will be:
+```
+target label value = 1 - (label smoothing value)
+```
+### USAGE
+
+```
+[0.1] GAN label smoothing ( 0 - 0.5 ?:help ) : 0.1
+```
+
+## NOISY LABELS
+
+> make the labels the noisy for the discriminator: occasionally flip the labels when training the discriminator
+ - [How to Train a GAN? Tips and tricks to make GANs work](https://github.com/soumith/ganhacks/blob/master/README.md#6-use-soft-and-noisy-labels)
+
+By setting the noisy labels value to any value > 0, then the target labels used with the discriminator will be flipped
+("fake" => "real" / "real" => "fake") with probability p (where p is the noisy label value).
+
+E.g., if the value is 0.05, then ~5% of the labels will be flipped when training the discriminator
+
+### USAGE
+```
+[0.05] GAN noisy labels ( 0 - 0.5 ?:help ) : 0.05
+```
+
+
+
+
+
diff --git a/doc/features/gan-options/tutorial-on-theory-and-application-of-generative-adversarial-networks-54-638.jpg b/doc/features/gan-options/tutorial-on-theory-and-application-of-generative-adversarial-networks-54-638.jpg
new file mode 100644
index 0000000..e3698e9
Binary files /dev/null and b/doc/features/gan-options/tutorial-on-theory-and-application-of-generative-adversarial-networks-54-638.jpg differ
diff --git a/doc/features/ms-ssim/README.md b/doc/features/ms-ssim/README.md
new file mode 100644
index 0000000..41039d3
--- /dev/null
+++ b/doc/features/ms-ssim/README.md
@@ -0,0 +1,43 @@
+# Multiscale SSIM (MS-SSIM)
+
+Allows you to train using the MS-SSIM (multiscale structural similarity index measure) as the main loss metric,
+a perceptually more accurate measure of image quality than MSE (mean squared error).
+
+As an added benefit, you may see a decrease in ms/iteration (when using the same batch size) with Multiscale loss
+enabled. You may also be able to train with a larger batch size with it enabled.
+
+- [DESCRIPTION](#description)
+- [USAGE](#usage)
+
+## DESCRIPTION
+
+[SSIM](https://en.wikipedia.org/wiki/Structural_similarity) is metric for comparing the perceptial quality of an image:
+> SSIM is a perception-based model that considers image degradation as perceived change in structural information,
+> while also incorporating important perceptual phenomena, including both luminance masking and contrast masking terms.
+> [...]
+> Structural information is the idea that the pixels have strong inter-dependencies especially when they are spatially
+> close. These dependencies carry important information about the structure of the objects in the visual scene.
+> Luminance masking is a phenomenon whereby image distortions (in this context) tend to be less visible in bright
+> regions, while contrast masking is a phenomenon whereby distortions become less visible where there is significant
+> activity or "texture" in the image.
+
+The current loss metric is a combination of SSIM (structural similarity index measure) and
+[MSE](https://en.wikipedia.org/wiki/Mean_squared_error) (mean squared error).
+
+[Multiscale SSIM](https://en.wikipedia.org/wiki/Structural_similarity#Multi-Scale_SSIM) is a variant of SSIM that
+improves upon SSIM by comparing the similarity at multiple scales (e.g.: full-size, half-size, 1/4 size, etc.)
+By using MS-SSIM as our main loss metric, we should expect the image similarity to improve across each scale, improving
+both the large scale and small scale detail of the predicted images.
+
+Original paper: [Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik.
+"Multiscale structural similarity for image quality assessment."
+Signals, Systems and Computers, 2004.](https://www.cns.nyu.edu/pub/eero/wang03b.pdf)
+
+## USAGE
+
+```
+[n] Use multiscale loss? ( y/n ?:help ) : y
+```
+
+
+
diff --git a/doc/features/random-color/README.md b/doc/features/random-color/README.md
new file mode 100644
index 0000000..d1aeac1
--- /dev/null
+++ b/doc/features/random-color/README.md
@@ -0,0 +1,25 @@
+# Random Color option
+
+Helps train the model to generalize perceptual color and lightness, and improves color transfer between src and dst.
+
+- [DESCRIPTION](#description)
+- [USAGE](#usage)
+
+
+
+## DESCRIPTION
+
+Converts images to [CIE L\*a\*b* colorspace](https://en.wikipedia.org/wiki/CIELAB_color_space),
+and then randomly rotates around the `L*` axis. While the perceptual lightness stays constant, only the `a*` and `b*`
+color channels are modified. After rotation, converts back to BGR (blue/green/red) colorspace.
+
+If visualized using the [CIE L\*a\*b* cylindical model](https://en.wikipedia.org/wiki/CIELAB_color_space#Cylindrical_model),
+this is a random rotation of `h°` (hue angle, angle of the hue in the CIELAB color wheel),
+maintaining the same `C*` (chroma, relative saturation).
+
+## USAGE
+
+```
+[n] Random color ( y/n ?:help ) : y
+```
+
diff --git a/doc/features/random-color/example.jpeg b/doc/features/random-color/example.jpeg
new file mode 100644
index 0000000..2a69632
Binary files /dev/null and b/doc/features/random-color/example.jpeg differ
diff --git a/doc/features/webui/README.md b/doc/features/webui/README.md
new file mode 100644
index 0000000..c181fca
--- /dev/null
+++ b/doc/features/webui/README.md
@@ -0,0 +1,45 @@
+# Web UI
+
+View and interact with the training preview window with your web browser.
+Allows you to view and control the preview remotely, and train on headless machines.
+
+- [INSTALLATION](#installation)
+- [DESCRIPTION](#description)
+- [USAGE](#usage)
+- [SSH PORT FORWARDING](#ssh-port-forwarding)
+
+
+
+## INSTALLATION
+
+Requires additional Python dependencies to be installed:
+- [Flask](https://palletsprojects.com/p/flask/),
+ version [1.1.1](https://pypi.org/project/Flask/1.1.1/)
+- [Flask-SocketIO](https://github.com/miguelgrinberg/Flask-SocketIO/),
+ version [4.2.1](https://pypi.org/project/Flask-SocketIO/4.2.1/)
+
+```
+pip install Flask==1.1.1
+pip install Flask-SocketIO==4.2.1
+```
+
+## DESCRIPTION
+
+Launches a Flask web application which sends commands to the training thread
+(save/exit/fetch new preview, etc.), and displays live updates for the log output
+e.g.: `[09:50:53][#106913][0503ms][0.3109][0.2476]`, and updates the graph/preview image.
+
+## USAGE
+
+Enable the Web UI by appending `--flask-preview` to the `train` command.
+Once training begins, Web UI will start, and can be accessed at http://localhost:5000/
+
+## SSH PORT FORWARDING
+
+When running on a remote/headless box, view the Web UI in your local browser simply by
+adding the ssh option `-L 5000:localhost:5000`. Once connected, the Web UI can be viewed
+locally at http://localhost:5000/
+
+Several Android/iOS SSH apps (such as [JuiceSSH](https://juicessh.com/)
+exist which support port forwarding, allowing you to interact with the preview pane
+from anywhere with your phone.
diff --git a/doc/features/webui/example.png b/doc/features/webui/example.png
new file mode 100644
index 0000000..4cc7c05
Binary files /dev/null and b/doc/features/webui/example.png differ
diff --git a/doc/fixes/predicted_src_mask/README.md b/doc/fixes/predicted_src_mask/README.md
new file mode 100644
index 0000000..1247442
--- /dev/null
+++ b/doc/fixes/predicted_src_mask/README.md
@@ -0,0 +1,5 @@
+# Example of bug:
+
+
+# Demonstration of fix:
+
diff --git a/doc/fixes/predicted_src_mask/preview_image_bug.jpeg b/doc/fixes/predicted_src_mask/preview_image_bug.jpeg
new file mode 100644
index 0000000..52767fa
Binary files /dev/null and b/doc/fixes/predicted_src_mask/preview_image_bug.jpeg differ
diff --git a/doc/fixes/predicted_src_mask/preview_image_fix.jpeg b/doc/fixes/predicted_src_mask/preview_image_fix.jpeg
new file mode 100644
index 0000000..43a8b19
Binary files /dev/null and b/doc/fixes/predicted_src_mask/preview_image_fix.jpeg differ
diff --git a/doc/logo_directx.png b/doc/logo_directx.png
new file mode 100644
index 0000000..f9fb10a
Binary files /dev/null and b/doc/logo_directx.png differ
diff --git a/facelib/FaceEnhancer.py b/facelib/FaceEnhancer.py
index 0b5ced3..1dc0dd9 100644
--- a/facelib/FaceEnhancer.py
+++ b/facelib/FaceEnhancer.py
@@ -161,11 +161,11 @@ class FaceEnhancer(object):
if not model_path.exists():
raise Exception("Unable to load FaceEnhancer.npy")
- with tf.device ('/CPU:0' if place_model_on_cpu else '/GPU:0'):
+ with tf.device ('/CPU:0' if place_model_on_cpu else nn.tf_default_device_name):
self.model = FaceEnhancer()
self.model.load_weights (model_path)
- with tf.device ('/CPU:0' if run_on_cpu else '/GPU:0'):
+ with tf.device ('/CPU:0' if run_on_cpu else nn.tf_default_device_name):
self.model.build_for_run ([ (tf.float32, nn.get4Dshape (192,192,3) ),
(tf.float32, (None,1,) ),
(tf.float32, (None,1,) ),
diff --git a/facelib/XSegNet.py b/facelib/XSegNet.py
index 761ab94..5621a65 100644
--- a/facelib/XSegNet.py
+++ b/facelib/XSegNet.py
@@ -39,7 +39,7 @@ class XSegNet(object):
self.target_t = tf.placeholder (nn.floatx, nn.get4Dshape(resolution,resolution,1) )
# Initializing model classes
- with tf.device ('/CPU:0' if place_model_on_cpu else '/GPU:0'):
+ with tf.device ('/CPU:0' if place_model_on_cpu else nn.tf_default_device_name):
self.model = nn.XSeg(3, 32, 1, name=name)
self.model_weights = self.model.get_weights()
if training:
@@ -53,7 +53,7 @@ class XSegNet(object):
self.model_filename_list += [ [self.model, f'{model_name}.npy'] ]
if not training:
- with tf.device ('/CPU:0' if run_on_cpu else '/GPU:0'):
+ with tf.device ('/CPU:0' if run_on_cpu else nn.tf_default_device_name):
_, pred = self.model(self.input_t)
def net_run(input_np):
diff --git a/flaskr/__init__.py b/flaskr/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/flaskr/app.py b/flaskr/app.py
new file mode 100644
index 0000000..b56943e
--- /dev/null
+++ b/flaskr/app.py
@@ -0,0 +1,102 @@
+from pathlib import Path
+
+from flask import Flask, send_file, Response, render_template, render_template_string, request, g
+from flask_socketio import SocketIO, emit
+import logging
+
+
+def create_flask_app(s2c, c2s, s2flask, kwargs):
+ app = Flask(__name__, template_folder="templates", static_folder="static")
+ log = logging.getLogger('werkzeug')
+ log.disabled = True
+ model_path = Path(kwargs.get('saved_models_path', ''))
+ filename = 'preview.png'
+ preview_file = str(model_path / filename)
+
+ def gen():
+ frame = open(preview_file, 'rb').read()
+ while True:
+ try:
+ frame = open(preview_file, 'rb').read()
+ except:
+ pass
+ yield b'--frame\r\nContent-Type: image/png\r\n\r\n'
+ yield frame
+ yield b'\r\n\r\n'
+
+ def send(queue, op):
+ queue.put({'op': op})
+
+ def send_and_wait(queue, op):
+ while not s2flask.empty():
+ s2flask.get()
+ queue.put({'op': op})
+ while s2flask.empty():
+ pass
+ s2flask.get()
+
+ @app.route('/save', methods=['POST'])
+ def save():
+ send(s2c, 'save')
+ return '', 204
+
+ @app.route('/exit', methods=['POST'])
+ def exit():
+ send(c2s, 'close')
+ request.environ.get('werkzeug.server.shutdown')()
+ return '', 204
+
+ @app.route('/update', methods=['POST'])
+ def update():
+ send(c2s, 'update')
+ return '', 204
+
+ @app.route('/next_preview', methods=['POST'])
+ def next_preview():
+ send(c2s, 'next_preview')
+ return '', 204
+
+ @app.route('/change_history_range', methods=['POST'])
+ def change_history_range():
+ send(c2s, 'change_history_range')
+ return '', 204
+
+ @app.route('/zoom_prev', methods=['POST'])
+ def zoom_prev():
+ send(c2s, 'zoom_prev')
+ return '', 204
+
+ @app.route('/zoom_next', methods=['POST'])
+ def zoom_next():
+ send(c2s, 'zoom_next')
+ return '', 204
+
+ @app.route('/')
+ def index():
+ return render_template('index.html')
+
+ # @app.route('/preview_image')
+ # def preview_image():
+ # return Response(gen(), mimetype='multipart/x-mixed-replace;boundary=frame')
+
+ @app.route('/preview_image')
+ def preview_image():
+ return send_file(preview_file, mimetype='image/png', cache_timeout=-1)
+
+ socketio = SocketIO(app)
+
+ @socketio.on('connect', namespace='/')
+ def test_connect():
+ emit('my response', {'data': 'Connected'})
+
+ @socketio.on('disconnect', namespace='/test')
+ def test_disconnect():
+ print('Client disconnected')
+
+ return socketio, app
+
+
+
+
+
+
diff --git a/flaskr/static/favicon.ico b/flaskr/static/favicon.ico
new file mode 100644
index 0000000..46aec07
Binary files /dev/null and b/flaskr/static/favicon.ico differ
diff --git a/flaskr/templates/index.html b/flaskr/templates/index.html
new file mode 100644
index 0000000..4ab78dd
--- /dev/null
+++ b/flaskr/templates/index.html
@@ -0,0 +1,95 @@
+
+
+
+
+
+
+ Training Preview
+
+
+
+
+
+Training Preview
+
+
+ Save
+ Exit
+ Update
+ Next preview
+ Change History Range
+ Zoom -
+ Zoom +
+
+
+
+