DeepFaceLab/samplelib/SampleProcessor.py
Colombo 7386a9d6fd optimized face sample generator, CPU load is significantly reduced
SAEHD:

added new option
GAN power 0.0 .. 10.0
	Train the network in Generative Adversarial manner.
	Forces the neural network to learn small details of the face.
	You can enable/disable this option at any time,
	but better to enable it when the network is trained enough.
	Typical value is 1.0
	GAN power with pretrain mode will not work.

Example of enabling GAN on 81k iters +5k iters
https://i.imgur.com/OdXHLhU.jpg
https://i.imgur.com/CYAJmJx.jpg

dfhd: default Decoder dimensions are now 48
the preview for 256 res is now correctly displayed

fixed model naming/renaming/removing

Improvements for those involved in post-processing in AfterEffects:

Codec is reverted back to x264 in order to properly use in AfterEffects and video players.

Merger now always outputs the mask to workspace\data_dst\merged_mask

removed raw modes except raw-rgb
raw-rgb mode now outputs selected face mask_mode (before square mask)

'export alpha mask' button is replaced by 'show alpha mask'.
You can view the alpha mask without recompute the frames.

8) 'merged *.bat' now also output 'result_mask.' video file.
8) 'merged lossless' now uses x264 lossless codec (before PNG codec)
result_mask video file is always lossless.

Thus you can use result_mask video file as mask layer in the AfterEffects.
2020-01-28 12:24:45 +04:00

329 lines
16 KiB
Python

import collections
from enum import IntEnum
import cv2
import numpy as np
from core import imagelib
from facelib import FaceType, LandmarksProcessor
"""
output_sample_types = [
{} opts,
...
]
opts:
'types' : (S,S,...,S)
where S:
'IMG_SOURCE'
'IMG_WARPED'
'IMG_WARPED_TRANSFORMED''
'IMG_TRANSFORMED'
'IMG_LANDMARKS_ARRAY' #currently unused
'IMG_PITCH_YAW_ROLL'
'FACE_TYPE_HALF'
'FACE_TYPE_FULL'
'FACE_TYPE_HEAD' #currently unused
'FACE_TYPE_AVATAR' #currently unused
'MODE_BGR' #BGR
'MODE_G' #Grayscale
'MODE_GGG' #3xGrayscale
'MODE_M' #mask only
'MODE_BGR_SHUFFLE' #BGR shuffle
'resolution' : N
'motion_blur' : (chance_int, range) - chance 0..100 to apply to face (not mask), and max_size of motion blur
'ct_mode' :
'normalize_tanh' : bool
"""
class SampleProcessor(object):
class Types(IntEnum):
NONE = 0
IMG_TYPE_BEGIN = 1
IMG_SOURCE = 1
IMG_WARPED = 2
IMG_WARPED_TRANSFORMED = 3
IMG_TRANSFORMED = 4
IMG_LANDMARKS_ARRAY = 5 #currently unused
IMG_PITCH_YAW_ROLL = 6
IMG_PITCH_YAW_ROLL_SIGMOID = 7
IMG_TYPE_END = 10
FACE_TYPE_BEGIN = 10
FACE_TYPE_HALF = 10
FACE_TYPE_MID_FULL = 11
FACE_TYPE_FULL = 12
FACE_TYPE_HEAD = 13 #currently unused
FACE_TYPE_AVATAR = 14 #currently unused
FACE_TYPE_FULL_NO_ALIGN = 15
FACE_TYPE_HEAD_NO_ALIGN = 16
FACE_TYPE_END = 20
MODE_BEGIN = 40
MODE_BGR = 40 #BGR
MODE_G = 41 #Grayscale
MODE_GGG = 42 #3xGrayscale
MODE_M = 43 #mask only
MODE_BGR_SHUFFLE = 44 #BGR shuffle
MODE_BGR_RANDOM_HSV_SHIFT = 45
MODE_END = 50
class Options(object):
def __init__(self, random_flip = True, rotation_range=[-10,10], scale_range=[-0.05, 0.05], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05] ):
self.random_flip = random_flip
self.rotation_range = rotation_range
self.scale_range = scale_range
self.tx_range = tx_range
self.ty_range = ty_range
SPTF_FACETYPE_TO_FACETYPE = { Types.FACE_TYPE_HALF : FaceType.HALF,
Types.FACE_TYPE_MID_FULL : FaceType.MID_FULL,
Types.FACE_TYPE_FULL : FaceType.FULL,
Types.FACE_TYPE_HEAD : FaceType.HEAD,
Types.FACE_TYPE_FULL_NO_ALIGN : FaceType.FULL_NO_ALIGN,
Types.FACE_TYPE_HEAD_NO_ALIGN : FaceType.HEAD_NO_ALIGN,
}
@staticmethod
def process (samples, sample_process_options, output_sample_types, debug, ct_sample=None):
SPTF = SampleProcessor.Types
sample_rnd_seed = np.random.randint(0x80000000)
outputs = []
for sample in samples:
sample_bgr = sample.load_bgr()
ct_sample_bgr = None
h,w,c = sample_bgr.shape
is_face_sample = sample.landmarks is not None
if debug and is_face_sample:
LandmarksProcessor.draw_landmarks (sample_bgr, sample.landmarks, (0, 1, 0))
params = imagelib.gen_warp_params(sample_bgr, sample_process_options.random_flip, rotation_range=sample_process_options.rotation_range, scale_range=sample_process_options.scale_range, tx_range=sample_process_options.tx_range, ty_range=sample_process_options.ty_range, rnd_seed=sample_rnd_seed )
outputs_sample = []
for opts in output_sample_types:
resolution = opts.get('resolution', 0)
types = opts.get('types', [] )
motion_blur = opts.get('motion_blur', None)
gaussian_blur = opts.get('gaussian_blur', None)
ct_mode = opts.get('ct_mode', 'None')
normalize_tanh = opts.get('normalize_tanh', False)
data_format = opts.get('data_format', 'NHWC')
img_type = SPTF.NONE
target_face_type = SPTF.NONE
mode_type = SPTF.NONE
for t in types:
if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END:
img_type = t
elif t >= SPTF.FACE_TYPE_BEGIN and t < SPTF.FACE_TYPE_END:
target_face_type = t
elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END:
mode_type = t
if mode_type == SPTF.MODE_M and not is_face_sample:
raise ValueError("MODE_M applicable only for face samples")
can_warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED)
can_transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED)
if img_type == SPTF.NONE:
raise ValueError ('expected IMG_ type')
if img_type == SPTF.IMG_LANDMARKS_ARRAY:
l = sample.landmarks
l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 )
l = np.clip(l, 0.0, 1.0)
out_sample = l
elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID:
pitch_yaw_roll = sample.get_pitch_yaw_roll()
if params['flip']:
yaw = -yaw
if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID:
pitch = np.clip( (pitch / math.pi) / 2.0 + 0.5, 0, 1)
yaw = np.clip( (yaw / math.pi) / 2.0 + 0.5, 0, 1)
roll = np.clip( (roll / math.pi) / 2.0 + 0.5, 0, 1)
out_sample = (pitch, yaw, roll)
else:
if mode_type == SPTF.NONE:
raise ValueError ('expected MODE_ type')
need_img = mode_type != SPTF.MODE_M
need_mask = mode_type == SPTF.MODE_M
if need_mask:
if sample.eyebrows_expand_mod is not None:
mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
else:
mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks)
if sample.ie_polys is not None:
sample.ie_polys.overlay_mask(mask)
if need_img:
img = sample_bgr
if motion_blur is not None:
chance, mb_max_size = motion_blur
chance = np.clip(chance, 0, 100)
if np.random.randint(100) < chance:
img = imagelib.LinearMotionBlur (img, np.random.randint( mb_max_size )+1, np.random.randint(360) )
if gaussian_blur is not None:
chance, kernel_max_size = gaussian_blur
chance = np.clip(chance, 0, 100)
if np.random.randint(100) < chance:
img = cv2.GaussianBlur(img, ( np.random.randint( kernel_max_size )*2+1 ,) *2 , 0)
if is_face_sample and target_face_type != SPTF.NONE:
target_ft = SampleProcessor.SPTF_FACETYPE_TO_FACETYPE[target_face_type]
if target_ft > sample.face_type:
raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, target_ft) )
if sample.face_type == FaceType.MARK_ONLY:
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0])
if need_img:
img = cv2.warpAffine( img, mat, flags=cv2.INTER_CUBIC )
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
if need_mask:
mask = cv2.warpAffine( mask, mat, flags=cv2.INTER_CUBIC )
mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None]
else:
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft)
if need_img:
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC )
if need_mask:
mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )[...,None]
else:
if need_img:
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
if need_mask:
mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None]
if mode_type == SPTF.MODE_M:
out_sample = np.clip(mask, 0, 1).astype(np.float32)
else:
img = np.clip(img, 0, 1).astype(np.float32)
if ct_mode is not None and ct_sample is not None:
if ct_sample_bgr is None:
ct_sample_bgr = ct_sample.load_bgr()
img = imagelib.color_transfer (ct_mode,
img,
cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) )
if mode_type == SPTF.MODE_BGR:
out_sample = img
elif mode_type == SPTF.MODE_BGR_SHUFFLE:
rnd_state = np.random.RandomState (sample_rnd_seed)
out_sample = np.take (img, rnd_state.permutation(img.shape[-1]), axis=-1)
elif mode_type == SPTF.MODE_BGR_RANDOM_HSV_SHIFT:
rnd_state = np.random.RandomState (sample_rnd_seed)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
h = (h + rnd_state.randint(360) ) % 360
s = np.clip ( s + rnd_state.random()-0.5, 0, 1 )
v = np.clip ( v + rnd_state.random()-0.5, 0, 1 )
hsv = cv2.merge([h, s, v])
out_sample = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 )
elif mode_type == SPTF.MODE_G:
out_sample = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[...,None]
elif mode_type == SPTF.MODE_GGG:
out_sample = np.repeat ( np.expand_dims(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),-1), (3,), -1)
if not debug:
if normalize_tanh:
out_sample = np.clip (out_sample * 2.0 - 1.0, -1.0, 1.0)
if data_format == "NCHW":
out_sample = np.transpose(out_sample, (2,0,1) )
outputs_sample.append ( out_sample )
outputs += [outputs_sample]
return outputs
"""
close_sample = sample.close_target_list[ np.random.randint(0, len(sample.close_target_list)) ] if sample.close_target_list is not None else None
close_sample_bgr = close_sample.load_bgr() if close_sample is not None else None
if debug and close_sample_bgr is not None:
LandmarksProcessor.draw_landmarks (close_sample_bgr, close_sample.landmarks, (0, 1, 0))
RANDOM_CLOSE = 0x00000040, #currently unused
MORPH_TO_RANDOM_CLOSE = 0x00000080, #currently unused
if f & SPTF.RANDOM_CLOSE != 0:
img_type += 10
elif f & SPTF.MORPH_TO_RANDOM_CLOSE != 0:
img_type += 20
if img_type >= 10 and img_type <= 19: #RANDOM_CLOSE
img_type -= 10
img = close_sample_bgr
cur_sample = close_sample
elif img_type >= 20 and img_type <= 29: #MORPH_TO_RANDOM_CLOSE
img_type -= 20
res = sample.shape[0]
s_landmarks = sample.landmarks.copy()
d_landmarks = close_sample.landmarks.copy()
idxs = list(range(len(s_landmarks)))
#remove landmarks near boundaries
for i in idxs[:]:
s_l = s_landmarks[i]
d_l = d_landmarks[i]
if s_l[0] < 5 or s_l[1] < 5 or s_l[0] >= res-5 or s_l[1] >= res-5 or \
d_l[0] < 5 or d_l[1] < 5 or d_l[0] >= res-5 or d_l[1] >= res-5:
idxs.remove(i)
#remove landmarks that close to each other in 5 dist
for landmarks in [s_landmarks, d_landmarks]:
for i in idxs[:]:
s_l = landmarks[i]
for j in idxs[:]:
if i == j:
continue
s_l_2 = landmarks[j]
diff_l = np.abs(s_l - s_l_2)
if np.sqrt(diff_l.dot(diff_l)) < 5:
idxs.remove(i)
break
s_landmarks = s_landmarks[idxs]
d_landmarks = d_landmarks[idxs]
s_landmarks = np.concatenate ( [s_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] )
d_landmarks = np.concatenate ( [d_landmarks, [ [0,0], [ res // 2, 0], [ res-1, 0], [0, res//2], [res-1, res//2] ,[0,res-1] ,[res//2, res-1] ,[res-1,res-1] ] ] )
img = imagelib.morph_by_points (sample_bgr, s_landmarks, d_landmarks)
cur_sample = close_sample
else:
"""