mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-07-06 21:12:07 -07:00
We cannot use GPU for lab converter in converter multiprocesses, because almost all VRAM ate by model process, so even 300Mb free VRAM not enough for tensorflow lab converter. Removed skimage dependency. Refactorings.
378 lines
No EOL
15 KiB
Python
378 lines
No EOL
15 KiB
Python
import sys
|
|
from utils import random_utils
|
|
import numpy as np
|
|
import cv2
|
|
import localization
|
|
from scipy.spatial import Delaunay
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
|
|
def channel_hist_match(source, template, mask=None):
|
|
# Code borrowed from:
|
|
# https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x
|
|
masked_source = source
|
|
masked_template = template
|
|
|
|
if mask is not None:
|
|
masked_source = source * mask
|
|
masked_template = template * mask
|
|
|
|
oldshape = source.shape
|
|
source = source.ravel()
|
|
template = template.ravel()
|
|
masked_source = masked_source.ravel()
|
|
masked_template = masked_template.ravel()
|
|
s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
|
|
return_counts=True)
|
|
t_values, t_counts = np.unique(template, return_counts=True)
|
|
ms_values, mbin_idx, ms_counts = np.unique(source, return_inverse=True,
|
|
return_counts=True)
|
|
mt_values, mt_counts = np.unique(template, return_counts=True)
|
|
|
|
s_quantiles = np.cumsum(s_counts).astype(np.float64)
|
|
s_quantiles /= s_quantiles[-1]
|
|
t_quantiles = np.cumsum(t_counts).astype(np.float64)
|
|
t_quantiles /= t_quantiles[-1]
|
|
interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
|
|
|
|
return interp_t_values[bin_idx].reshape(oldshape)
|
|
|
|
def color_hist_match(src_im, tar_im, mask=None):
|
|
h,w,c = src_im.shape
|
|
matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], mask)
|
|
matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], mask)
|
|
matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], mask)
|
|
|
|
to_stack = (matched_R, matched_G, matched_B)
|
|
for i in range(3, c):
|
|
to_stack += ( src_im[:,:,i],)
|
|
|
|
|
|
matched = np.stack(to_stack, axis=-1).astype(src_im.dtype)
|
|
return matched
|
|
|
|
|
|
pil_fonts = {}
|
|
def _get_pil_font (font, size):
|
|
global pil_fonts
|
|
try:
|
|
font_str_id = '%s_%d' % (font, size)
|
|
if font_str_id not in pil_fonts.keys():
|
|
pil_fonts[font_str_id] = ImageFont.truetype(font + ".ttf", size=size, encoding="unic")
|
|
pil_font = pil_fonts[font_str_id]
|
|
return pil_font
|
|
except:
|
|
return ImageFont.load_default()
|
|
|
|
def get_text_image( shape, text, color=(1,1,1), border=0.2, font=None):
|
|
try:
|
|
size = shape[1]
|
|
pil_font = _get_pil_font( localization.get_default_ttf_font_name() , size)
|
|
text_width, text_height = pil_font.getsize(text)
|
|
|
|
canvas = Image.new('RGB', shape[0:2], (0,0,0) )
|
|
draw = ImageDraw.Draw(canvas)
|
|
offset = ( 0, 0)
|
|
draw.text(offset, text, font=pil_font, fill=tuple((np.array(color)*255).astype(np.int)) )
|
|
|
|
result = np.asarray(canvas) / 255
|
|
if shape[2] != 3:
|
|
result = np.concatenate ( (result, np.ones ( (shape[1],) + (shape[0],) + (shape[2]-3,)) ), axis=2 )
|
|
|
|
return result
|
|
except:
|
|
return np.zeros ( (shape[1], shape[0], shape[2]), dtype=np.float32 )
|
|
|
|
def draw_text( image, rect, text, color=(1,1,1), border=0.2, font=None):
|
|
h,w,c = image.shape
|
|
|
|
l,t,r,b = rect
|
|
l = np.clip (l, 0, w-1)
|
|
r = np.clip (r, 0, w-1)
|
|
t = np.clip (t, 0, h-1)
|
|
b = np.clip (b, 0, h-1)
|
|
|
|
image[t:b, l:r] += get_text_image ( (r-l,b-t,c) , text, color, border, font )
|
|
|
|
def draw_text_lines (image, rect, text_lines, color=(1,1,1), border=0.2, font=None):
|
|
text_lines_len = len(text_lines)
|
|
if text_lines_len == 0:
|
|
return
|
|
|
|
l,t,r,b = rect
|
|
h = b-t
|
|
h_per_line = h // text_lines_len
|
|
|
|
for i in range(0, text_lines_len):
|
|
draw_text (image, (l, i*h_per_line, r, (i+1)*h_per_line), text_lines[i], color, border, font)
|
|
|
|
def get_draw_text_lines ( image, rect, text_lines, color=(1,1,1), border=0.2, font=None):
|
|
image = np.zeros ( image.shape, dtype=np.float )
|
|
draw_text_lines ( image, rect, text_lines, color, border, font)
|
|
return image
|
|
|
|
|
|
def draw_polygon (image, points, color, thickness = 1):
|
|
points_len = len(points)
|
|
for i in range (0, points_len):
|
|
p0 = tuple( points[i] )
|
|
p1 = tuple( points[ (i+1) % points_len] )
|
|
cv2.line (image, p0, p1, color, thickness=thickness)
|
|
|
|
def draw_rect(image, rect, color, thickness=1):
|
|
l,t,r,b = rect
|
|
draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness)
|
|
|
|
def rectContains(rect, point) :
|
|
return not (point[0] < rect[0] or point[0] >= rect[2] or point[1] < rect[1] or point[1] >= rect[3])
|
|
|
|
def applyAffineTransform(src, srcTri, dstTri, size) :
|
|
warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) )
|
|
return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 )
|
|
|
|
def morphTriangle(dst_img, src_img, st, dt) :
|
|
(h,w,c) = dst_img.shape
|
|
sr = np.array( cv2.boundingRect(np.float32(st)) )
|
|
dr = np.array( cv2.boundingRect(np.float32(dt)) )
|
|
sRect = st - sr[0:2]
|
|
dRect = dt - dr[0:2]
|
|
d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32)
|
|
cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0);
|
|
imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]]
|
|
size = (dr[2], dr[3])
|
|
warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size)
|
|
dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask
|
|
|
|
def morph_by_points (image, sp, dp):
|
|
if sp.shape != dp.shape:
|
|
raise ValueError ('morph_by_points() sp.shape != dp.shape')
|
|
(h,w,c) = image.shape
|
|
|
|
result_image = np.zeros(image.shape, dtype = image.dtype)
|
|
|
|
for tri in Delaunay(dp).simplices:
|
|
morphTriangle(result_image, image, sp[tri], dp[tri])
|
|
|
|
return result_image
|
|
|
|
def equalize_and_stack_square (images, axis=1):
|
|
max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] )
|
|
|
|
target_wh = 99999
|
|
for i,image in enumerate(images):
|
|
if len(image.shape) == 2:
|
|
h,w = image.shape
|
|
c = 1
|
|
else:
|
|
h,w,c = image.shape
|
|
|
|
if h < target_wh:
|
|
target_wh = h
|
|
|
|
if w < target_wh:
|
|
target_wh = w
|
|
|
|
for i,image in enumerate(images):
|
|
if len(image.shape) == 2:
|
|
h,w = image.shape
|
|
c = 1
|
|
else:
|
|
h,w,c = image.shape
|
|
|
|
if c < max_c:
|
|
if c == 1:
|
|
if len(image.shape) == 2:
|
|
image = np.expand_dims ( image, -1 )
|
|
image = np.concatenate ( (image,)*max_c, -1 )
|
|
elif c == 2: #GA
|
|
image = np.expand_dims ( image[...,0], -1 )
|
|
image = np.concatenate ( (image,)*max_c, -1 )
|
|
else:
|
|
image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 )
|
|
|
|
if h != target_wh or w != target_wh:
|
|
image = cv2.resize ( image, (target_wh, target_wh) )
|
|
h,w,c = image.shape
|
|
|
|
images[i] = image
|
|
|
|
return np.concatenate ( images, axis = 1 )
|
|
|
|
def bgr2hsv (img):
|
|
return cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
|
|
|
|
def hsv2bgr (img):
|
|
return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
|
|
|
|
def bgra2hsva (img):
|
|
return np.concatenate ( (cv2.cvtColor(img[...,0:3], cv2.COLOR_BGR2HSV ), np.expand_dims (img[...,3], -1)), -1 )
|
|
|
|
def bgra2hsva_list (imgs):
|
|
return [ bgra2hsva(img) for img in imgs ]
|
|
|
|
def hsva2bgra (img):
|
|
return np.concatenate ( (cv2.cvtColor(img[...,0:3], cv2.COLOR_HSV2BGR ), np.expand_dims (img[...,3], -1)), -1 )
|
|
|
|
def hsva2bgra_list (imgs):
|
|
return [ hsva2bgra(img) for img in imgs ]
|
|
|
|
def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ):
|
|
h,w,c = source.shape
|
|
if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024):
|
|
raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.')
|
|
|
|
rotation = np.random.uniform( rotation_range[0], rotation_range[1] )
|
|
scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1])
|
|
tx = np.random.uniform( tx_range[0], tx_range[1] )
|
|
ty = np.random.uniform( ty_range[0], ty_range[1] )
|
|
|
|
#random warp by grid
|
|
cell_size = [ w // (2**i) for i in range(1,4) ] [ np.random.randint(3) ]
|
|
cell_count = w // cell_size + 1
|
|
|
|
grid_points = np.linspace( 0, w, cell_count)
|
|
mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy()
|
|
mapy = mapx.T
|
|
|
|
mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
|
|
mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
|
|
|
|
half_cell_size = cell_size // 2
|
|
|
|
mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32)
|
|
mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32)
|
|
|
|
#random transform
|
|
random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale)
|
|
random_transform_mat[:, 2] += (tx*w, ty*w)
|
|
|
|
params = dict()
|
|
params['mapx'] = mapx
|
|
params['mapy'] = mapy
|
|
params['rmat'] = random_transform_mat
|
|
params['w'] = w
|
|
params['flip'] = flip and np.random.randint(10) < 4
|
|
|
|
return params
|
|
|
|
def warp_by_params (params, img, warp, transform, flip):
|
|
if warp:
|
|
img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_LANCZOS4 )
|
|
if transform:
|
|
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_LANCZOS4 )
|
|
if flip and params['flip']:
|
|
img = img[:,::-1,:]
|
|
return img
|
|
|
|
#n_colors = [0..256]
|
|
def reduce_colors (img_bgr, n_colors):
|
|
img_rgb = (cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) * 255.0).astype(np.uint8)
|
|
img_rgb_pil = Image.fromarray(img_rgb)
|
|
img_rgb_pil_p = img_rgb_pil.convert('P', palette=Image.ADAPTIVE, colors=n_colors)
|
|
|
|
img_rgb_p = img_rgb_pil_p.convert('RGB')
|
|
img_bgr = cv2.cvtColor( np.array(img_rgb_p, dtype=np.float32) / 255.0, cv2.COLOR_RGB2BGR )
|
|
|
|
return img_bgr
|
|
|
|
|
|
class TFLabConverter():
|
|
|
|
|
|
|
|
def __init__(self,):
|
|
import gpufmkmgr
|
|
|
|
self.tf_module = gpufmkmgr.import_tf()
|
|
self.tf_session = gpufmkmgr.get_tf_session()
|
|
|
|
self.bgr_input_tensor = self.tf_module.placeholder("float", [None, None, 3])
|
|
self.lab_input_tensor = self.tf_module.placeholder("float", [None, None, 3])
|
|
|
|
self.lab_output_tensor = self.rgb_to_lab(self.tf_module, self.bgr_input_tensor)
|
|
self.bgr_output_tensor = self.lab_to_rgb(self.tf_module, self.lab_input_tensor)
|
|
|
|
|
|
def bgr2lab(self, bgr):
|
|
return self.tf_session.run(self.lab_output_tensor, feed_dict={self.bgr_input_tensor: bgr})
|
|
|
|
def lab2bgr(self, lab):
|
|
return self.tf_session.run(self.bgr_output_tensor, feed_dict={self.lab_input_tensor: lab})
|
|
|
|
def rgb_to_lab(self, tf, rgb_input):
|
|
with tf.name_scope("rgb_to_lab"):
|
|
srgb_pixels = tf.reshape(rgb_input, [-1, 3])
|
|
|
|
with tf.name_scope("srgb_to_xyz"):
|
|
linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32)
|
|
exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32)
|
|
rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask
|
|
rgb_to_xyz = tf.constant([
|
|
# X Y Z
|
|
[0.412453, 0.212671, 0.019334], # R
|
|
[0.357580, 0.715160, 0.119193], # G
|
|
[0.180423, 0.072169, 0.950227], # B
|
|
])
|
|
xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz)
|
|
|
|
# https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
|
|
with tf.name_scope("xyz_to_cielab"):
|
|
# convert to fx = f(X/Xn), fy = f(Y/Yn), fz = f(Z/Zn)
|
|
|
|
# normalize for D65 white point
|
|
xyz_normalized_pixels = tf.multiply(xyz_pixels, [1/0.950456, 1.0, 1/1.088754])
|
|
|
|
epsilon = 6/29
|
|
linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32)
|
|
exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32)
|
|
fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4/29) * linear_mask + (xyz_normalized_pixels ** (1/3)) * exponential_mask
|
|
|
|
# convert to lab
|
|
fxfyfz_to_lab = tf.constant([
|
|
# l a b
|
|
[ 0.0, 500.0, 0.0], # fx
|
|
[116.0, -500.0, 200.0], # fy
|
|
[ 0.0, 0.0, -200.0], # fz
|
|
])
|
|
lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0])
|
|
|
|
return tf.reshape(lab_pixels, tf.shape(rgb_input))
|
|
|
|
def lab_to_rgb(self, tf, lab):
|
|
with tf.name_scope("lab_to_rgb"):
|
|
lab_pixels = tf.reshape(lab, [-1, 3])
|
|
|
|
# https://en.wikipedia.org/wiki/Lab_color_space#CIELAB-CIEXYZ_conversions
|
|
with tf.name_scope("cielab_to_xyz"):
|
|
# convert to fxfyfz
|
|
lab_to_fxfyfz = tf.constant([
|
|
# fx fy fz
|
|
[1/116.0, 1/116.0, 1/116.0], # l
|
|
[1/500.0, 0.0, 0.0], # a
|
|
[ 0.0, 0.0, -1/200.0], # b
|
|
])
|
|
fxfyfz_pixels = tf.matmul(lab_pixels + tf.constant([16.0, 0.0, 0.0]), lab_to_fxfyfz)
|
|
|
|
# convert to xyz
|
|
epsilon = 6/29
|
|
linear_mask = tf.cast(fxfyfz_pixels <= epsilon, dtype=tf.float32)
|
|
exponential_mask = tf.cast(fxfyfz_pixels > epsilon, dtype=tf.float32)
|
|
xyz_pixels = (3 * epsilon**2 * (fxfyfz_pixels - 4/29)) * linear_mask + (fxfyfz_pixels ** 3) * exponential_mask
|
|
|
|
# denormalize for D65 white point
|
|
xyz_pixels = tf.multiply(xyz_pixels, [0.950456, 1.0, 1.088754])
|
|
|
|
with tf.name_scope("xyz_to_srgb"):
|
|
xyz_to_rgb = tf.constant([
|
|
# r g b
|
|
[ 3.2404542, -0.9692660, 0.0556434], # x
|
|
[-1.5371385, 1.8760108, -0.2040259], # y
|
|
[-0.4985314, 0.0415560, 1.0572252], # z
|
|
])
|
|
rgb_pixels = tf.matmul(xyz_pixels, xyz_to_rgb)
|
|
# avoid a slightly negative number messing up the conversion
|
|
rgb_pixels = tf.clip_by_value(rgb_pixels, 0.0, 1.0)
|
|
linear_mask = tf.cast(rgb_pixels <= 0.0031308, dtype=tf.float32)
|
|
exponential_mask = tf.cast(rgb_pixels > 0.0031308, dtype=tf.float32)
|
|
srgb_pixels = (rgb_pixels * 12.92 * linear_mask) + ((rgb_pixels ** (1/2.4) * 1.055) - 0.055) * exponential_mask
|
|
|
|
return tf.reshape(srgb_pixels, tf.shape(lab)) |