DeepFaceLab/utils/image_utils.py
iperov 2bd983703e ConverterMasked: removed default transfercolor,
added Apply color transfer to predicted face - modes rct / lct
2019-02-13 16:27:57 +04:00

446 lines
No EOL
17 KiB
Python

import sys
from utils import random_utils
import numpy as np
import cv2
import localization
from scipy.spatial import Delaunay
from PIL import Image, ImageDraw, ImageFont
from nnlib import nnlib
def reinhard_color_transfer(target, source, clip=False, preserve_paper=False, source_mask=None, target_mask=None):
"""
Transfers the color distribution from the source to the target
image using the mean and standard deviations of the L*a*b*
color space.
This implementation is (loosely) based on to the "Color Transfer
between Images" paper by Reinhard et al., 2001.
Parameters:
-------
source: NumPy array
OpenCV image in BGR color space (the source image)
target: NumPy array
OpenCV image in BGR color space (the target image)
clip: Should components of L*a*b* image be scaled by np.clip before
converting back to BGR color space?
If False then components will be min-max scaled appropriately.
Clipping will keep target image brightness truer to the input.
Scaling will adjust image brightness to avoid washed out portions
in the resulting color transfer that can be caused by clipping.
preserve_paper: Should color transfer strictly follow methodology
layed out in original paper? The method does not always produce
aesthetically pleasing results.
If False then L*a*b* components will scaled using the reciprocal of
the scaling factor proposed in the paper. This method seems to produce
more consistently aesthetically pleasing results
Returns:
-------
transfer: NumPy array
OpenCV image (w, h, 3) NumPy array (uint8)
"""
# convert the images from the RGB to L*ab* color space, being
# sure to utilizing the floating point data type (note: OpenCV
# expects floats to be 32-bit, so use that instead of 64-bit)
source = cv2.cvtColor(source, cv2.COLOR_BGR2LAB).astype(np.float32)
target = cv2.cvtColor(target, cv2.COLOR_BGR2LAB).astype(np.float32)
# compute color statistics for the source and target images
src_input = source if source_mask is None else source*source_mask
tgt_input = target if target_mask is None else target*target_mask
(lMeanSrc, lStdSrc, aMeanSrc, aStdSrc, bMeanSrc, bStdSrc) = lab_image_stats(src_input)
(lMeanTar, lStdTar, aMeanTar, aStdTar, bMeanTar, bStdTar) = lab_image_stats(tgt_input)
# subtract the means from the target image
(l, a, b) = cv2.split(target)
l -= lMeanTar
a -= aMeanTar
b -= bMeanTar
if preserve_paper:
# scale by the standard deviations using paper proposed factor
l = (lStdTar / lStdSrc) * l
a = (aStdTar / aStdSrc) * a
b = (bStdTar / bStdSrc) * b
else:
# scale by the standard deviations using reciprocal of paper proposed factor
l = (lStdSrc / lStdTar) * l
a = (aStdSrc / aStdTar) * a
b = (bStdSrc / bStdTar) * b
# add in the source mean
l += lMeanSrc
a += aMeanSrc
b += bMeanSrc
# clip/scale the pixel intensities to [0, 255] if they fall
# outside this range
l = _scale_array(l, clip=clip)
a = _scale_array(a, clip=clip)
b = _scale_array(b, clip=clip)
# merge the channels together and convert back to the RGB color
# space, being sure to utilize the 8-bit unsigned integer data
# type
transfer = cv2.merge([l, a, b])
transfer = cv2.cvtColor(transfer.astype(np.uint8), cv2.COLOR_LAB2BGR)
# return the color transferred image
return transfer
def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5):
'''
Matches the colour distribution of the target image to that of the source image
using a linear transform.
Images are expected to be of form (w,h,c) and float in [0,1].
Modes are chol, pca or sym for different choices of basis.
'''
mu_t = target_img.mean(0).mean(0)
t = target_img - mu_t
t = t.transpose(2,0,1).reshape(3,-1)
Ct = t.dot(t.T) / t.shape[1] + eps * np.eye(t.shape[0])
mu_s = source_img.mean(0).mean(0)
s = source_img - mu_s
s = s.transpose(2,0,1).reshape(3,-1)
Cs = s.dot(s.T) / s.shape[1] + eps * np.eye(s.shape[0])
if mode == 'chol':
chol_t = np.linalg.cholesky(Ct)
chol_s = np.linalg.cholesky(Cs)
ts = chol_s.dot(np.linalg.inv(chol_t)).dot(t)
if mode == 'pca':
eva_t, eve_t = np.linalg.eigh(Ct)
Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T)
eva_s, eve_s = np.linalg.eigh(Cs)
Qs = eve_s.dot(np.sqrt(np.diag(eva_s))).dot(eve_s.T)
ts = Qs.dot(np.linalg.inv(Qt)).dot(t)
if mode == 'sym':
eva_t, eve_t = np.linalg.eigh(Ct)
Qt = eve_t.dot(np.sqrt(np.diag(eva_t))).dot(eve_t.T)
Qt_Cs_Qt = Qt.dot(Cs).dot(Qt)
eva_QtCsQt, eve_QtCsQt = np.linalg.eigh(Qt_Cs_Qt)
QtCsQt = eve_QtCsQt.dot(np.sqrt(np.diag(eva_QtCsQt))).dot(eve_QtCsQt.T)
ts = np.linalg.inv(Qt).dot(QtCsQt).dot(np.linalg.inv(Qt)).dot(t)
matched_img = ts.reshape(*target_img.transpose(2,0,1).shape).transpose(1,2,0)
matched_img += mu_s
matched_img[matched_img>1] = 1
matched_img[matched_img<0] = 0
return matched_img
def lab_image_stats(image):
# compute the mean and standard deviation of each channel
(l, a, b) = cv2.split(image)
(lMean, lStd) = (l.mean(), l.std())
(aMean, aStd) = (a.mean(), a.std())
(bMean, bStd) = (b.mean(), b.std())
# return the color statistics
return (lMean, lStd, aMean, aStd, bMean, bStd)
def _scale_array(arr, clip=True):
if clip:
return np.clip(arr, 0, 255)
mn = arr.min()
mx = arr.max()
scale_range = (max([mn, 0]), min([mx, 255]))
if mn < scale_range[0] or mx > scale_range[1]:
return (scale_range[1] - scale_range[0]) * (arr - mn) / (mx - mn) + scale_range[0]
return arr
def channel_hist_match(source, template, hist_match_threshold=255, mask=None):
# Code borrowed from:
# https://stackoverflow.com/questions/32655686/histogram-matching-of-two-images-in-python-2-x
masked_source = source
masked_template = template
if mask is not None:
masked_source = source * mask
masked_template = template * mask
oldshape = source.shape
source = source.ravel()
template = template.ravel()
masked_source = masked_source.ravel()
masked_template = masked_template.ravel()
s_values, bin_idx, s_counts = np.unique(source, return_inverse=True,
return_counts=True)
t_values, t_counts = np.unique(template, return_counts=True)
ms_values, mbin_idx, ms_counts = np.unique(source, return_inverse=True,
return_counts=True)
mt_values, mt_counts = np.unique(template, return_counts=True)
s_quantiles = np.cumsum(s_counts).astype(np.float64)
s_quantiles = hist_match_threshold * s_quantiles / s_quantiles[-1]
t_quantiles = np.cumsum(t_counts).astype(np.float64)
t_quantiles = 255 * t_quantiles / t_quantiles[-1]
interp_t_values = np.interp(s_quantiles, t_quantiles, t_values)
return interp_t_values[bin_idx].reshape(oldshape)
def color_hist_match(src_im, tar_im, hist_match_threshold=255):
h,w,c = src_im.shape
matched_R = channel_hist_match(src_im[:,:,0], tar_im[:,:,0], hist_match_threshold, None)
matched_G = channel_hist_match(src_im[:,:,1], tar_im[:,:,1], hist_match_threshold, None)
matched_B = channel_hist_match(src_im[:,:,2], tar_im[:,:,2], hist_match_threshold, None)
to_stack = (matched_R, matched_G, matched_B)
for i in range(3, c):
to_stack += ( src_im[:,:,i],)
matched = np.stack(to_stack, axis=-1).astype(src_im.dtype)
return matched
pil_fonts = {}
def _get_pil_font (font, size):
global pil_fonts
try:
font_str_id = '%s_%d' % (font, size)
if font_str_id not in pil_fonts.keys():
pil_fonts[font_str_id] = ImageFont.truetype(font + ".ttf", size=size, encoding="unic")
pil_font = pil_fonts[font_str_id]
return pil_font
except:
return ImageFont.load_default()
def get_text_image( shape, text, color=(1,1,1), border=0.2, font=None):
try:
size = shape[1]
pil_font = _get_pil_font( localization.get_default_ttf_font_name() , size)
text_width, text_height = pil_font.getsize(text)
canvas = Image.new('RGB', shape[0:2], (0,0,0) )
draw = ImageDraw.Draw(canvas)
offset = ( 0, 0)
draw.text(offset, text, font=pil_font, fill=tuple((np.array(color)*255).astype(np.int)) )
result = np.asarray(canvas) / 255
if shape[2] != 3:
result = np.concatenate ( (result, np.ones ( (shape[1],) + (shape[0],) + (shape[2]-3,)) ), axis=2 )
return result
except:
return np.zeros ( (shape[1], shape[0], shape[2]), dtype=np.float32 )
def draw_text( image, rect, text, color=(1,1,1), border=0.2, font=None):
h,w,c = image.shape
l,t,r,b = rect
l = np.clip (l, 0, w-1)
r = np.clip (r, 0, w-1)
t = np.clip (t, 0, h-1)
b = np.clip (b, 0, h-1)
image[t:b, l:r] += get_text_image ( (r-l,b-t,c) , text, color, border, font )
def draw_text_lines (image, rect, text_lines, color=(1,1,1), border=0.2, font=None):
text_lines_len = len(text_lines)
if text_lines_len == 0:
return
l,t,r,b = rect
h = b-t
h_per_line = h // text_lines_len
for i in range(0, text_lines_len):
draw_text (image, (l, i*h_per_line, r, (i+1)*h_per_line), text_lines[i], color, border, font)
def get_draw_text_lines ( image, rect, text_lines, color=(1,1,1), border=0.2, font=None):
image = np.zeros ( image.shape, dtype=np.float )
draw_text_lines ( image, rect, text_lines, color, border, font)
return image
def draw_polygon (image, points, color, thickness = 1):
points_len = len(points)
for i in range (0, points_len):
p0 = tuple( points[i] )
p1 = tuple( points[ (i+1) % points_len] )
cv2.line (image, p0, p1, color, thickness=thickness)
def draw_rect(image, rect, color, thickness=1):
l,t,r,b = rect
draw_polygon (image, [ (l,t), (r,t), (r,b), (l,b ) ], color, thickness)
def rectContains(rect, point) :
return not (point[0] < rect[0] or point[0] >= rect[2] or point[1] < rect[1] or point[1] >= rect[3])
def applyAffineTransform(src, srcTri, dstTri, size) :
warpMat = cv2.getAffineTransform( np.float32(srcTri), np.float32(dstTri) )
return cv2.warpAffine( src, warpMat, (size[0], size[1]), None, flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_REFLECT_101 )
def morphTriangle(dst_img, src_img, st, dt) :
(h,w,c) = dst_img.shape
sr = np.array( cv2.boundingRect(np.float32(st)) )
dr = np.array( cv2.boundingRect(np.float32(dt)) )
sRect = st - sr[0:2]
dRect = dt - dr[0:2]
d_mask = np.zeros((dr[3], dr[2], c), dtype = np.float32)
cv2.fillConvexPoly(d_mask, np.int32(dRect), (1.0,)*c, 8, 0);
imgRect = src_img[sr[1]:sr[1] + sr[3], sr[0]:sr[0] + sr[2]]
size = (dr[2], dr[3])
warpImage1 = applyAffineTransform(imgRect, sRect, dRect, size)
if c == 1:
warpImage1 = np.expand_dims( warpImage1, -1 )
dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]] = dst_img[dr[1]:dr[1]+dr[3], dr[0]:dr[0]+dr[2]]*(1-d_mask) + warpImage1 * d_mask
def morph_by_points (image, sp, dp):
if sp.shape != dp.shape:
raise ValueError ('morph_by_points() sp.shape != dp.shape')
(h,w,c) = image.shape
result_image = np.zeros(image.shape, dtype = image.dtype)
for tri in Delaunay(dp).simplices:
morphTriangle(result_image, image, sp[tri], dp[tri])
return result_image
def equalize_and_stack_square (images, axis=1):
max_c = max ([ 1 if len(image.shape) == 2 else image.shape[2] for image in images ] )
target_wh = 99999
for i,image in enumerate(images):
if len(image.shape) == 2:
h,w = image.shape
c = 1
else:
h,w,c = image.shape
if h < target_wh:
target_wh = h
if w < target_wh:
target_wh = w
for i,image in enumerate(images):
if len(image.shape) == 2:
h,w = image.shape
c = 1
else:
h,w,c = image.shape
if c < max_c:
if c == 1:
if len(image.shape) == 2:
image = np.expand_dims ( image, -1 )
image = np.concatenate ( (image,)*max_c, -1 )
elif c == 2: #GA
image = np.expand_dims ( image[...,0], -1 )
image = np.concatenate ( (image,)*max_c, -1 )
else:
image = np.concatenate ( (image, np.ones((h,w,max_c - c))), -1 )
if h != target_wh or w != target_wh:
image = cv2.resize ( image, (target_wh, target_wh) )
h,w,c = image.shape
images[i] = image
return np.concatenate ( images, axis = 1 )
def bgr2hsv (img):
return cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
def hsv2bgr (img):
return cv2.cvtColor(img, cv2.COLOR_HSV2BGR)
def bgra2hsva (img):
return np.concatenate ( (cv2.cvtColor(img[...,0:3], cv2.COLOR_BGR2HSV ), np.expand_dims (img[...,3], -1)), -1 )
def bgra2hsva_list (imgs):
return [ bgra2hsva(img) for img in imgs ]
def hsva2bgra (img):
return np.concatenate ( (cv2.cvtColor(img[...,0:3], cv2.COLOR_HSV2BGR ), np.expand_dims (img[...,3], -1)), -1 )
def hsva2bgra_list (imgs):
return [ hsva2bgra(img) for img in imgs ]
def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ):
h,w,c = source.shape
if (h != w) or (w != 64 and w != 128 and w != 256 and w != 512 and w != 1024):
raise ValueError ('TrainingDataGenerator accepts only square power of 2 images.')
rotation = np.random.uniform( rotation_range[0], rotation_range[1] )
scale = np.random.uniform(1 +scale_range[0], 1 +scale_range[1])
tx = np.random.uniform( tx_range[0], tx_range[1] )
ty = np.random.uniform( ty_range[0], ty_range[1] )
#random warp by grid
cell_size = [ w // (2**i) for i in range(1,4) ] [ np.random.randint(3) ]
cell_count = w // cell_size + 1
grid_points = np.linspace( 0, w, cell_count)
mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy()
mapy = mapx.T
mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24)
half_cell_size = cell_size // 2
mapx = cv2.resize(mapx, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32)
mapy = cv2.resize(mapy, (w+cell_size,)*2 )[half_cell_size:-half_cell_size-1,half_cell_size:-half_cell_size-1].astype(np.float32)
#random transform
random_transform_mat = cv2.getRotationMatrix2D((w // 2, w // 2), rotation, scale)
random_transform_mat[:, 2] += (tx*w, ty*w)
params = dict()
params['mapx'] = mapx
params['mapy'] = mapy
params['rmat'] = random_transform_mat
params['w'] = w
params['flip'] = flip and np.random.randint(10) < 4
return params
def warp_by_params (params, img, warp, transform, flip, is_border_replicate):
if warp:
img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC )
if transform:
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
if flip and params['flip']:
img = img[:,::-1,:]
return img
#n_colors = [0..256]
def reduce_colors (img_bgr, n_colors):
img_rgb = (cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) * 255.0).astype(np.uint8)
img_rgb_pil = Image.fromarray(img_rgb)
img_rgb_pil_p = img_rgb_pil.convert('P', palette=Image.ADAPTIVE, colors=n_colors)
img_rgb_p = img_rgb_pil_p.convert('RGB')
img_bgr = cv2.cvtColor( np.array(img_rgb_p, dtype=np.float32) / 255.0, cv2.COLOR_RGB2BGR )
return img_bgr
class TFLabConverter():
def __init__(self):
exec (nnlib.import_tf(), locals(), globals())
self.tf_sess = tf_sess
self.bgr_input_tensor = tf.placeholder("float", [None, None, 3])
self.lab_input_tensor = tf.placeholder("float", [None, None, 3])
self.lab_output_tensor = tf_rgb_to_lab()(self.bgr_input_tensor)
self.bgr_output_tensor = tf_lab_to_rgb()(self.lab_input_tensor)
def bgr2lab(self, bgr):
return self.tf_sess.run(self.lab_output_tensor, feed_dict={self.bgr_input_tensor: bgr})
def lab2bgr(self, lab):
return self.tf_sess.run(self.bgr_output_tensor, feed_dict={self.lab_input_tensor: lab})