mirror of
https://github.com/iperov/DeepFaceLab.git
synced 2025-08-22 06:23:20 -07:00
commit
c01b528d1e
28 changed files with 472 additions and 215 deletions
|
@ -18,8 +18,11 @@ More than 95% of deepfake videos are created with DeepFaceLab.
|
|||
|
||||
DeepFaceLab is used by such popular youtube channels as
|
||||
|
||||
| [Ctrl Shift Face](https://www.youtube.com/channel/UCKpH0CKltc73e4wh0_pgL3g)| [Sham00k](https://www.youtube.com/channel/UCZXbWcv7fSZFTAZV4beckyw/videos)| [Collider videos](https://www.youtube.com/watch?v=A91P2qtPT54&list=PLayt6616lBclvOprvrC8qKGCO-mAhPRux)| [iFake](https://www.youtube.com/channel/UCC0lK2Zo2BMXX-k1Ks0r7dg/videos)| [VFXChris Ume](https://www.youtube.com/channel/UCGf4OlX_aTt8DlrgiH3jN3g/videos)|
|
||||
|---|---|---|---|---|
|
||||
| [Ctrl Shift Face](https://www.youtube.com/channel/UCKpH0CKltc73e4wh0_pgL3g)| [VFXChris Ume](https://www.youtube.com/channel/UCGf4OlX_aTt8DlrgiH3jN3g/videos)|
|
||||
|---|---|
|
||||
|
||||
| [Sham00k](https://www.youtube.com/channel/UCZXbWcv7fSZFTAZV4beckyw/videos)| [Collider videos](https://www.youtube.com/watch?v=A91P2qtPT54&list=PLayt6616lBclvOprvrC8qKGCO-mAhPRux)| [iFake](https://www.youtube.com/channel/UCC0lK2Zo2BMXX-k1Ks0r7dg/videos)|
|
||||
|---|---|---|
|
||||
</td></tr>
|
||||
<tr><td align="center" width="9999">
|
||||
|
||||
|
@ -97,7 +100,7 @@ deepfake quality progress
|
|||
</td></tr>
|
||||
<tr><td align="center" width="9999">
|
||||
|
||||
<sub>#deepfacelab #deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets</sub>
|
||||
<sub>#deepfacelab #deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets #tensorflow #cuda #nvidia</sub>
|
||||
|
||||
</td></tr>
|
||||
</table>
|
||||
|
|
|
@ -366,8 +366,8 @@ def color_hist_match(src_im, tar_im, hist_match_threshold=255):
|
|||
return matched
|
||||
|
||||
def color_transfer_mix(img_src,img_trg):
|
||||
img_src = (img_src*255.0).astype(np.uint8)
|
||||
img_trg = (img_trg*255.0).astype(np.uint8)
|
||||
img_src = np.clip(img_src*255.0, 0, 255).astype(np.uint8)
|
||||
img_trg = np.clip(img_trg*255.0, 0, 255).astype(np.uint8)
|
||||
|
||||
img_src_lab = cv2.cvtColor(img_src, cv2.COLOR_BGR2LAB)
|
||||
img_trg_lab = cv2.cvtColor(img_trg, cv2.COLOR_BGR2LAB)
|
||||
|
|
|
@ -47,11 +47,11 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0
|
|||
|
||||
return params
|
||||
|
||||
def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate):
|
||||
def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate, cv2_inter=cv2.INTER_CUBIC):
|
||||
if can_warp:
|
||||
img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC )
|
||||
img = cv2.remap(img, params['mapx'], params['mapy'], cv2_inter )
|
||||
if can_transform:
|
||||
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
|
||||
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2_inter )
|
||||
if len(img.shape) == 2:
|
||||
img = img[...,None]
|
||||
if can_flip and params['flip']:
|
||||
|
|
|
@ -1,7 +1,28 @@
|
|||
import queue as Queue
|
||||
import multiprocessing
|
||||
import queue as Queue
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
class SubprocessGenerator(object):
|
||||
|
||||
@staticmethod
|
||||
def launch_thread(generator):
|
||||
generator._start()
|
||||
|
||||
@staticmethod
|
||||
def start_in_parallel( generator_list ):
|
||||
"""
|
||||
Start list of generators in parallel
|
||||
"""
|
||||
for generator in generator_list:
|
||||
thread = threading.Thread(target=SubprocessGenerator.launch_thread, args=(generator,) )
|
||||
thread.daemon = True
|
||||
thread.start()
|
||||
|
||||
while not all ([generator._is_started() for generator in generator_list]):
|
||||
time.sleep(0.005)
|
||||
|
||||
def __init__(self, generator_func, user_param=None, prefetch=2, start_now=True):
|
||||
super().__init__()
|
||||
self.prefetch = prefetch
|
||||
|
@ -17,10 +38,14 @@ class SubprocessGenerator(object):
|
|||
if self.p == None:
|
||||
user_param = self.user_param
|
||||
self.user_param = None
|
||||
self.p = multiprocessing.Process(target=self.process_func, args=(user_param,) )
|
||||
self.p.daemon = True
|
||||
self.p.start()
|
||||
|
||||
p = multiprocessing.Process(target=self.process_func, args=(user_param,) )
|
||||
p.daemon = True
|
||||
p.start()
|
||||
self.p = p
|
||||
|
||||
def _is_started(self):
|
||||
return self.p is not None
|
||||
|
||||
def process_func(self, user_param):
|
||||
self.generator_func = self.generator_func(user_param)
|
||||
while True:
|
||||
|
|
|
@ -289,7 +289,7 @@ class Subprocessor(object):
|
|||
terminate_it = True
|
||||
break
|
||||
|
||||
if self.no_response_time_sec != 0 and (time.time() - cli.sent_time) > self.no_response_time_sec:
|
||||
if (time.time() - cli.sent_time) > 30:
|
||||
terminate_it = True
|
||||
|
||||
if terminate_it:
|
||||
|
|
|
@ -318,7 +318,12 @@ def initialize_layers(nn):
|
|||
|
||||
class BlurPool(LayerBase):
|
||||
def __init__(self, filt_size=3, stride=2, **kwargs ):
|
||||
self.strides = [1,stride,stride,1]
|
||||
|
||||
if nn.data_format == "NHWC":
|
||||
self.strides = [1,stride,stride,1]
|
||||
else:
|
||||
self.strides = [1,1,stride,stride]
|
||||
|
||||
self.filt_size = filt_size
|
||||
pad = [ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ]
|
||||
|
||||
|
@ -352,9 +357,9 @@ def initialize_layers(nn):
|
|||
self.k = tf.constant (self.a, dtype=nn.tf_floatx )
|
||||
|
||||
def __call__(self, x):
|
||||
k = tf.tile (self.k, (1,1,x.shape[-1],1) )
|
||||
k = tf.tile (self.k, (1,1,x.shape[nn.conv2d_ch_axis],1) )
|
||||
x = tf.pad(x, self.padding )
|
||||
x = tf.nn.depthwise_conv2d(x, k, self.strides, 'VALID')
|
||||
x = tf.nn.depthwise_conv2d(x, k, self.strides, 'VALID', data_format=nn.data_format)
|
||||
return x
|
||||
nn.BlurPool = BlurPool
|
||||
|
||||
|
|
|
@ -243,15 +243,15 @@ def initialize_models(nn):
|
|||
nn.ModelBase = ModelBase
|
||||
|
||||
class PatchDiscriminator(nn.ModelBase):
|
||||
def on_build(self, patch_size, in_ch, base_ch=256, kernel_initializer=None):
|
||||
def on_build(self, patch_size, in_ch, base_ch=256, conv_kernel_initializer=None):
|
||||
prev_ch = in_ch
|
||||
self.convs = []
|
||||
for i, (kernel_size, strides) in enumerate(patch_discriminator_kernels[patch_size]):
|
||||
cur_ch = base_ch * min( (2**i), 8 )
|
||||
self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=kernel_initializer) )
|
||||
self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=conv_kernel_initializer) )
|
||||
prev_ch = cur_ch
|
||||
|
||||
self.out_conv = nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=kernel_initializer)
|
||||
self.out_conv = nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=conv_kernel_initializer)
|
||||
|
||||
def forward(self, x):
|
||||
for conv in self.convs:
|
||||
|
@ -260,7 +260,32 @@ def initialize_models(nn):
|
|||
|
||||
nn.PatchDiscriminator = PatchDiscriminator
|
||||
|
||||
class IllumDiscriminator(nn.ModelBase):
|
||||
def on_build(self, patch_size, in_ch, base_ch=256, conv_kernel_initializer=None):
|
||||
prev_ch = in_ch
|
||||
self.convs = []
|
||||
for i, (kernel_size, strides) in enumerate(patch_discriminator_kernels[patch_size]):
|
||||
cur_ch = base_ch * min( (2**i), 8 )
|
||||
self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=conv_kernel_initializer) )
|
||||
prev_ch = cur_ch
|
||||
|
||||
self.out1 = nn.Conv2D( 1, 1024, kernel_size=1, strides=1, padding='SAME', kernel_initializer=conv_kernel_initializer)
|
||||
self.out2 = nn.Conv2D( 1024, 1, kernel_size=1, strides=1, padding='SAME', kernel_initializer=conv_kernel_initializer)
|
||||
|
||||
def forward(self, x):
|
||||
for conv in self.convs:
|
||||
x = tf.nn.leaky_relu( conv(x), 0.1 )
|
||||
|
||||
x = tf.reduce_mean(x, axis=nn.conv2d_ch_axis, keep_dims=True)
|
||||
|
||||
x = self.out1(x)
|
||||
x = tf.nn.leaky_relu(x, 0.1 )
|
||||
x = self.out2(x)
|
||||
|
||||
return x
|
||||
|
||||
nn.IllumDiscriminator = IllumDiscriminator
|
||||
|
||||
patch_discriminator_kernels = \
|
||||
{ 1 : [ [1,1] ],
|
||||
2 : [ [2,1] ],
|
||||
|
@ -277,4 +302,8 @@ patch_discriminator_kernels = \
|
|||
13 : [ [3,2], [4,2], [2,1] ],
|
||||
14 : [ [4,2], [4,2], [2,1] ],
|
||||
15 : [ [3,2], [3,2], [3,1] ],
|
||||
16 : [ [4,2], [3,2], [3,1] ] }
|
||||
16 : [ [4,2], [3,2], [3,1] ],
|
||||
|
||||
|
||||
28 : [ [4,2], [3,2], [4,2], [2,1] ]
|
||||
}
|
|
@ -55,6 +55,7 @@ class nn():
|
|||
tf_upsample2d = None
|
||||
tf_upsample2d_bilinear = None
|
||||
tf_flatten = None
|
||||
tf_max_pool = None
|
||||
tf_reshape_4D = None
|
||||
tf_random_binomial = None
|
||||
tf_gaussian_blur = None
|
||||
|
@ -82,7 +83,8 @@ class nn():
|
|||
|
||||
# Models
|
||||
PatchDiscriminator = None
|
||||
|
||||
IllumDiscriminator = None
|
||||
|
||||
@staticmethod
|
||||
def initialize(device_config=None, floatx="float32", data_format="NHWC"):
|
||||
|
||||
|
|
|
@ -129,6 +129,14 @@ def initialize_tensor_ops(nn):
|
|||
|
||||
nn.tf_flatten = tf_flatten
|
||||
|
||||
def tf_max_pool(x, kernel_size, strides):
|
||||
if nn.data_format == "NHWC":
|
||||
return tf.nn.max_pool(x, [1,kernel_size,kernel_size,1], [1,strides,strides,1], "VALID", data_format=nn.data_format)
|
||||
else:
|
||||
return tf.nn.max_pool(x, [1,1,kernel_size,kernel_size], [1,1,strides,strides], "VALID", data_format=nn.data_format)
|
||||
|
||||
nn.tf_max_pool = tf_max_pool
|
||||
|
||||
def tf_reshape_4D(x, w,h,c):
|
||||
if nn.data_format == "NHWC":
|
||||
# match NCHW version in order to switch data_format without problems
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 10 KiB |
Binary file not shown.
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 49 KiB |
|
@ -216,7 +216,7 @@ class FANExtractor(object):
|
|||
for i, lmrks in enumerate(landmarks):
|
||||
try:
|
||||
if lmrks is not None:
|
||||
image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL, full_face_align_top=False)
|
||||
image_to_face_mat = LandmarksProcessor.get_transform_mat (lmrks, 256, FaceType.FULL)
|
||||
face_image = cv2.warpAffine(input_image, image_to_face_mat, (256, 256), cv2.INTER_CUBIC )
|
||||
|
||||
rects2 = second_pass_extractor.extract(face_image, is_bgr=is_bgr)
|
||||
|
|
|
@ -6,8 +6,9 @@ class FaceType(IntEnum):
|
|||
MID_FULL = 1
|
||||
FULL = 2
|
||||
FULL_NO_ALIGN = 3
|
||||
HEAD = 4
|
||||
HEAD_NO_ALIGN = 5
|
||||
WHOLE_FACE = 4
|
||||
HEAD = 5
|
||||
HEAD_NO_ALIGN = 6
|
||||
|
||||
MARK_ONLY = 10, #no align at all, just embedded faceinfo
|
||||
|
||||
|
@ -25,6 +26,7 @@ class FaceType(IntEnum):
|
|||
from_string_dict = {'half_face': FaceType.HALF,
|
||||
'midfull_face': FaceType.MID_FULL,
|
||||
'full_face': FaceType.FULL,
|
||||
'whole_face': FaceType.WHOLE_FACE,
|
||||
'head' : FaceType.HEAD,
|
||||
'mark_only' : FaceType.MARK_ONLY,
|
||||
'full_face_no_align' : FaceType.FULL_NO_ALIGN,
|
||||
|
@ -33,6 +35,7 @@ from_string_dict = {'half_face': FaceType.HALF,
|
|||
to_string_dict = { FaceType.HALF : 'half_face',
|
||||
FaceType.MID_FULL : 'midfull_face',
|
||||
FaceType.FULL : 'full_face',
|
||||
FaceType.WHOLE_FACE : 'whole_face',
|
||||
FaceType.HEAD : 'head',
|
||||
FaceType.MARK_ONLY :'mark_only',
|
||||
FaceType.FULL_NO_ALIGN : 'full_face_no_align',
|
||||
|
|
|
@ -188,8 +188,9 @@ FaceType_to_padding_remove_align = {
|
|||
FaceType.MID_FULL: (0.0675, False),
|
||||
FaceType.FULL: (0.2109375, False),
|
||||
FaceType.FULL_NO_ALIGN: (0.2109375, True),
|
||||
FaceType.HEAD: (0.369140625, False),
|
||||
FaceType.HEAD_NO_ALIGN: (0.369140625, True),
|
||||
FaceType.WHOLE_FACE: (0.40, False),
|
||||
FaceType.HEAD: (1.0, False),
|
||||
FaceType.HEAD_NO_ALIGN: (1.0, True),
|
||||
}
|
||||
|
||||
def convert_98_to_68(lmrks):
|
||||
|
@ -249,77 +250,63 @@ def transform_points(points, mat, invert=False):
|
|||
points = np.squeeze(points)
|
||||
return points
|
||||
|
||||
def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0, full_face_align_top=True):
|
||||
def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
|
||||
if not isinstance(image_landmarks, np.ndarray):
|
||||
image_landmarks = np.array (image_landmarks)
|
||||
|
||||
padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
|
||||
|
||||
# estimate landmarks transform from global space to local aligned space with bounds [0..1]
|
||||
mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2]
|
||||
l_p = transform_points ( np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True)
|
||||
l_c = l_p[4]
|
||||
|
||||
# get corner points in global space
|
||||
g_p = transform_points ( np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5) ]) , mat, True)
|
||||
g_c = g_p[4]
|
||||
|
||||
tb_diag_vec = (l_p[2]-l_p[0]).astype(np.float32)
|
||||
# calc diagonal vectors between corners in global space
|
||||
tb_diag_vec = (g_p[2]-g_p[0]).astype(np.float32)
|
||||
tb_diag_vec /= npla.norm(tb_diag_vec)
|
||||
bt_diag_vec = (l_p[1]-l_p[3]).astype(np.float32)
|
||||
bt_diag_vec = (g_p[1]-g_p[3]).astype(np.float32)
|
||||
bt_diag_vec /= npla.norm(bt_diag_vec)
|
||||
|
||||
mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) )
|
||||
|
||||
# calc modifier of diagonal vectors for scale and padding value
|
||||
padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
|
||||
mod = (1.0 / scale)* ( npla.norm(g_p[0]-g_p[2])*(padding*np.sqrt(2.0) + 0.5) )
|
||||
|
||||
if face_type == FaceType.WHOLE_FACE:
|
||||
vec = (g_p[0]-g_p[3]).astype(np.float32)
|
||||
vec_len = npla.norm(vec)
|
||||
vec /= vec_len
|
||||
|
||||
g_c += vec*vec_len*0.07
|
||||
|
||||
# calc 3 points in global space to estimate 2d affine transform
|
||||
if not remove_align:
|
||||
l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
|
||||
np.round( l_c + bt_diag_vec*mod ),
|
||||
np.round( l_c + tb_diag_vec*mod ) ] )
|
||||
l_t = np.array( [ np.round( g_c - tb_diag_vec*mod ),
|
||||
np.round( g_c + bt_diag_vec*mod ),
|
||||
np.round( g_c + tb_diag_vec*mod ) ] )
|
||||
else:
|
||||
l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
|
||||
np.round( l_c + bt_diag_vec*mod ),
|
||||
np.round( l_c + tb_diag_vec*mod ),
|
||||
np.round( l_c - bt_diag_vec*mod ),
|
||||
# remove_align - face will be centered in the frame but not aligned
|
||||
l_t = np.array( [ np.round( g_c - tb_diag_vec*mod ),
|
||||
np.round( g_c + bt_diag_vec*mod ),
|
||||
np.round( g_c + tb_diag_vec*mod ),
|
||||
np.round( g_c - bt_diag_vec*mod ),
|
||||
] )
|
||||
|
||||
# get area of face square in global space
|
||||
area = mathlib.polygon_area(l_t[:,0], l_t[:,1] )
|
||||
|
||||
# calc side of square
|
||||
side = np.float32(math.sqrt(area) / 2)
|
||||
l_t = np.array( [ np.round( l_c + [-side,-side] ),
|
||||
np.round( l_c + [ side,-side] ),
|
||||
np.round( l_c + [ side, side] ) ] )
|
||||
|
||||
# calc 3 points with unrotated square
|
||||
l_t = np.array( [ np.round( g_c + [-side,-side] ),
|
||||
np.round( g_c + [ side,-side] ),
|
||||
np.round( g_c + [ side, side] ) ] )
|
||||
|
||||
# calc affine transform from 3 global space points to 3 local space points size of 'output_size'
|
||||
pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) ))
|
||||
mat = cv2.getAffineTransform(l_t,pts2)
|
||||
|
||||
|
||||
#if remove_align:
|
||||
# bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True)
|
||||
# #import code
|
||||
# #code.interact(local=dict(globals(), **locals()))
|
||||
# area = mathlib.polygon_area(bbox[:,0], bbox[:,1] )
|
||||
# side = math.sqrt(area) / 2
|
||||
# center = transform_points ( [(output_size/2,output_size/2)], mat, True)
|
||||
# pts1 = np.float32(( center+[-side,-side], center+[side,-side], center+[side,-side] ))
|
||||
# pts2 = np.float32([[0,0],[output_size,0],[0,output_size]])
|
||||
# mat = cv2.getAffineTransform(pts1,pts2)
|
||||
|
||||
return mat
|
||||
|
||||
#if full_face_align_top and (face_type == FaceType.FULL or face_type == FaceType.FULL_NO_ALIGN):
|
||||
# #lmrks2 = expand_eyebrows(image_landmarks)
|
||||
# #lmrks2_ = transform_points( [ lmrks2[19], lmrks2[24] ], mat, False )
|
||||
# #y_diff = np.float32( (0,np.min(lmrks2_[:,1])) )
|
||||
# #y_diff = transform_points( [ np.float32( (0,0) ), y_diff], mat, True)
|
||||
# #y_diff = y_diff[1]-y_diff[0]
|
||||
#
|
||||
# x_diff = np.float32((0,0))
|
||||
#
|
||||
# lmrks2_ = transform_points( [ image_landmarks[0], image_landmarks[16] ], mat, False )
|
||||
# if lmrks2_[0,0] < 0:
|
||||
# x_diff = lmrks2_[0,0]
|
||||
# x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
|
||||
# x_diff = x_diff[1]-x_diff[0]
|
||||
# elif lmrks2_[1,0] >= output_size:
|
||||
# x_diff = lmrks2_[1,0]-(output_size-1)
|
||||
# x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
|
||||
# x_diff = x_diff[1]-x_diff[0]
|
||||
#
|
||||
# mat = cv2.getAffineTransform( l_t+y_diff+x_diff ,pts2)
|
||||
|
||||
def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0):
|
||||
if len(lmrks) != 68:
|
||||
raise Exception('works only with 68 landmarks')
|
||||
|
@ -349,8 +336,8 @@ def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0):
|
|||
|
||||
|
||||
|
||||
def get_image_hull_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0, ie_polys=None, color=(1,) ):
|
||||
hull_mask = np.zeros(image_shape[0:2]+( len(color),),dtype=np.float32)
|
||||
def get_image_hull_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0, ie_polys=None ):
|
||||
hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32)
|
||||
|
||||
lmrks = expand_eyebrows(image_landmarks, eyebrows_expand_mod)
|
||||
|
||||
|
@ -366,7 +353,7 @@ def get_image_hull_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0,
|
|||
|
||||
for item in parts:
|
||||
merged = np.concatenate(item)
|
||||
cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), color )
|
||||
cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), (1,) )
|
||||
|
||||
if ie_polys is not None:
|
||||
ie_polys.overlay_mask(hull_mask)
|
||||
|
@ -411,7 +398,7 @@ def alpha_to_color (img_alpha, color):
|
|||
def get_cmask (image_shape, lmrks, eyebrows_expand_mod=1.0):
|
||||
h,w,c = image_shape
|
||||
|
||||
hull = get_image_hull_mask (image_shape, lmrks, eyebrows_expand_mod, color=(1,) )
|
||||
hull = get_image_hull_mask (image_shape, lmrks, eyebrows_expand_mod )
|
||||
|
||||
result = np.zeros( (h,w,3), dtype=np.float32 )
|
||||
|
||||
|
@ -671,26 +658,19 @@ def draw_rect_landmarks (image, rect, image_landmarks, face_size, face_type, tra
|
|||
|
||||
points = transform_points ( [ ( int(face_size*0.05), 0), ( int(face_size*0.1), int(face_size*0.1) ), ( 0, int(face_size*0.1) ) ], image_to_face_mat, True)
|
||||
imagelib.draw_polygon (image, points, (0,0,255), 2)
|
||||
|
||||
|
||||
def calc_face_pitch(landmarks):
|
||||
if not isinstance(landmarks, np.ndarray):
|
||||
landmarks = np.array (landmarks)
|
||||
t = ( (landmarks[6][1]-landmarks[8][1]) + (landmarks[10][1]-landmarks[8][1]) ) / 2.0
|
||||
b = landmarks[8][1]
|
||||
return float(b-t)
|
||||
|
||||
def calc_face_yaw(landmarks):
|
||||
if not isinstance(landmarks, np.ndarray):
|
||||
landmarks = np.array (landmarks)
|
||||
l = ( (landmarks[27][0]-landmarks[0][0]) + (landmarks[28][0]-landmarks[1][0]) + (landmarks[29][0]-landmarks[2][0]) ) / 3.0
|
||||
r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0
|
||||
return float(r-l)
|
||||
|
||||
def estimate_pitch_yaw_roll(aligned_256px_landmarks):
|
||||
|
||||
def estimate_pitch_yaw_roll(aligned_landmarks, size=256):
|
||||
"""
|
||||
returns pitch,yaw,roll [-pi...+pi]
|
||||
"""
|
||||
shape = (256,256)
|
||||
shape = (size,size)
|
||||
focal_length = shape[1]
|
||||
camera_center = (shape[1] / 2, shape[0] / 2)
|
||||
camera_matrix = np.array(
|
||||
|
@ -700,7 +680,7 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):
|
|||
|
||||
(_, rotation_vector, translation_vector) = cv2.solvePnP(
|
||||
landmarks_68_3D,
|
||||
aligned_256px_landmarks.astype(np.float32),
|
||||
aligned_landmarks.astype(np.float32),
|
||||
camera_matrix,
|
||||
np.zeros((4, 1)) )
|
||||
|
||||
|
@ -710,3 +690,132 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks):
|
|||
roll = np.clip ( roll, -math.pi, math.pi )
|
||||
|
||||
return -pitch, yaw, roll
|
||||
|
||||
#if remove_align:
|
||||
# bbox = transform_points ( [ (0,0), (0,output_size), (output_size, output_size), (output_size,0) ], mat, True)
|
||||
# #import code
|
||||
# #code.interact(local=dict(globals(), **locals()))
|
||||
# area = mathlib.polygon_area(bbox[:,0], bbox[:,1] )
|
||||
# side = math.sqrt(area) / 2
|
||||
# center = transform_points ( [(output_size/2,output_size/2)], mat, True)
|
||||
# pts1 = np.float32(( center+[-side,-side], center+[side,-side], center+[side,-side] ))
|
||||
# pts2 = np.float32([[0,0],[output_size,0],[0,output_size]])
|
||||
# mat = cv2.getAffineTransform(pts1,pts2)
|
||||
#if full_face_align_top and (face_type == FaceType.FULL or face_type == FaceType.FULL_NO_ALIGN):
|
||||
# #lmrks2 = expand_eyebrows(image_landmarks)
|
||||
# #lmrks2_ = transform_points( [ lmrks2[19], lmrks2[24] ], mat, False )
|
||||
# #y_diff = np.float32( (0,np.min(lmrks2_[:,1])) )
|
||||
# #y_diff = transform_points( [ np.float32( (0,0) ), y_diff], mat, True)
|
||||
# #y_diff = y_diff[1]-y_diff[0]
|
||||
#
|
||||
# x_diff = np.float32((0,0))
|
||||
#
|
||||
# lmrks2_ = transform_points( [ image_landmarks[0], image_landmarks[16] ], mat, False )
|
||||
# if lmrks2_[0,0] < 0:
|
||||
# x_diff = lmrks2_[0,0]
|
||||
# x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
|
||||
# x_diff = x_diff[1]-x_diff[0]
|
||||
# elif lmrks2_[1,0] >= output_size:
|
||||
# x_diff = lmrks2_[1,0]-(output_size-1)
|
||||
# x_diff = transform_points( [ np.float32( (0,0) ), np.float32((x_diff,0)) ], mat, True)
|
||||
# x_diff = x_diff[1]-x_diff[0]
|
||||
#
|
||||
# mat = cv2.getAffineTransform( l_t+y_diff+x_diff ,pts2)
|
||||
|
||||
|
||||
"""
|
||||
def get_averaged_transform_mat (img_landmarks,
|
||||
img_landmarks_prev,
|
||||
img_landmarks_next,
|
||||
average_frame_count,
|
||||
average_center_frame_count,
|
||||
output_size, face_type, scale=1.0):
|
||||
|
||||
l_c_list = []
|
||||
tb_diag_vec_list = []
|
||||
bt_diag_vec_list = []
|
||||
mod_list = []
|
||||
|
||||
count = max(average_frame_count,average_center_frame_count)
|
||||
for i in range ( -count, count+1, 1 ):
|
||||
if i < 0:
|
||||
lmrks = img_landmarks_prev[i] if -i < len(img_landmarks_prev) else None
|
||||
elif i > 0:
|
||||
lmrks = img_landmarks_next[i] if i < len(img_landmarks_next) else None
|
||||
else:
|
||||
lmrks = img_landmarks
|
||||
|
||||
if lmrks is None:
|
||||
continue
|
||||
|
||||
l_c, tb_diag_vec, bt_diag_vec, mod = get_transform_mat_data (lmrks, face_type, scale=scale)
|
||||
|
||||
if i >= -average_frame_count and i <= average_frame_count:
|
||||
tb_diag_vec_list.append(tb_diag_vec)
|
||||
bt_diag_vec_list.append(bt_diag_vec)
|
||||
mod_list.append(mod)
|
||||
|
||||
if i >= -average_center_frame_count and i <= average_center_frame_count:
|
||||
l_c_list.append(l_c)
|
||||
|
||||
tb_diag_vec = np.mean( np.array(tb_diag_vec_list), axis=0 )
|
||||
bt_diag_vec = np.mean( np.array(bt_diag_vec_list), axis=0 )
|
||||
mod = np.mean( np.array(mod_list), axis=0 )
|
||||
l_c = np.mean( np.array(l_c_list), axis=0 )
|
||||
|
||||
return get_transform_mat_by_data (l_c, tb_diag_vec, bt_diag_vec, mod, output_size, face_type)
|
||||
|
||||
|
||||
def get_transform_mat (image_landmarks, output_size, face_type, scale=1.0):
|
||||
if not isinstance(image_landmarks, np.ndarray):
|
||||
image_landmarks = np.array (image_landmarks)
|
||||
|
||||
# get face padding value for FaceType
|
||||
padding, remove_align = FaceType_to_padding_remove_align.get(face_type, 0.0)
|
||||
|
||||
# estimate landmarks transform from global space to local aligned space with bounds [0..1]
|
||||
mat = umeyama( np.concatenate ( [ image_landmarks[17:49] , image_landmarks[54:55] ] ) , landmarks_2D_new, True)[0:2]
|
||||
|
||||
# get corner points in global space
|
||||
l_p = transform_points ( np.float32([(0,0),(1,0),(1,1),(0,1),(0.5,0.5)]) , mat, True)
|
||||
l_c = l_p[4]
|
||||
|
||||
# calc diagonal vectors between corners in global space
|
||||
tb_diag_vec = (l_p[2]-l_p[0]).astype(np.float32)
|
||||
tb_diag_vec /= npla.norm(tb_diag_vec)
|
||||
bt_diag_vec = (l_p[1]-l_p[3]).astype(np.float32)
|
||||
bt_diag_vec /= npla.norm(bt_diag_vec)
|
||||
|
||||
# calc modifier of diagonal vectors for scale and padding value
|
||||
mod = (1.0 / scale)* ( npla.norm(l_p[0]-l_p[2])*(padding*np.sqrt(2.0) + 0.5) )
|
||||
|
||||
# calc 3 points in global space to estimate 2d affine transform
|
||||
if not remove_align:
|
||||
l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
|
||||
np.round( l_c + bt_diag_vec*mod ),
|
||||
np.round( l_c + tb_diag_vec*mod ) ] )
|
||||
else:
|
||||
# remove_align - face will be centered in the frame but not aligned
|
||||
l_t = np.array( [ np.round( l_c - tb_diag_vec*mod ),
|
||||
np.round( l_c + bt_diag_vec*mod ),
|
||||
np.round( l_c + tb_diag_vec*mod ),
|
||||
np.round( l_c - bt_diag_vec*mod ),
|
||||
] )
|
||||
|
||||
# get area of face square in global space
|
||||
area = mathlib.polygon_area(l_t[:,0], l_t[:,1] )
|
||||
|
||||
# calc side of square
|
||||
side = np.float32(math.sqrt(area) / 2)
|
||||
|
||||
# calc 3 points with unrotated square
|
||||
l_t = np.array( [ np.round( l_c + [-side,-side] ),
|
||||
np.round( l_c + [ side,-side] ),
|
||||
np.round( l_c + [ side, side] ) ] )
|
||||
|
||||
# calc affine transform from 3 global space points to 3 local space points size of 'output_size'
|
||||
pts2 = np.float32(( (0,0),(output_size,0),(output_size,output_size) ))
|
||||
mat = cv2.getAffineTransform(l_t,pts2)
|
||||
|
||||
return mat
|
||||
"""
|
2
main.py
2
main.py
|
@ -47,7 +47,7 @@ if __name__ == "__main__":
|
|||
p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.")
|
||||
p.add_argument('--output-debug', action="store_true", dest="output_debug", default=None, help="Writes debug images to <output-dir>_debug\ directory.")
|
||||
p.add_argument('--no-output-debug', action="store_false", dest="output_debug", default=None, help="Don't writes debug images to <output-dir>_debug\ directory.")
|
||||
p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'full_face_no_align', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")
|
||||
p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'whole_face', 'head', 'full_face_no_align', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")
|
||||
p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.")
|
||||
p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.")
|
||||
p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.")
|
||||
|
|
|
@ -680,7 +680,6 @@ def main(detector=None,
|
|||
manual_fix=False,
|
||||
manual_output_debug_fix=False,
|
||||
manual_window_size=1368,
|
||||
image_size=256,
|
||||
face_type='full_face',
|
||||
max_faces_from_image=0,
|
||||
cpu_only = False,
|
||||
|
@ -688,6 +687,8 @@ def main(detector=None,
|
|||
):
|
||||
face_type = FaceType.fromString(face_type)
|
||||
|
||||
image_size = 512 if face_type == FaceType.WHOLE_FACE else 256
|
||||
|
||||
if not input_path.exists():
|
||||
io.log_err ('Input directory not found. Please ensure it exists.')
|
||||
return
|
||||
|
@ -710,7 +711,7 @@ def main(detector=None,
|
|||
if not manual_output_debug_fix and input_path != output_path:
|
||||
output_images_paths = pathex.get_image_paths(output_path)
|
||||
if len(output_images_paths) > 0:
|
||||
io.input(f"WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.")
|
||||
io.input(f"\n WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.\n")
|
||||
for filename in output_images_paths:
|
||||
Path(filename).unlink()
|
||||
else:
|
||||
|
|
|
@ -180,7 +180,7 @@ class MergeSubprocessor(Subprocessor):
|
|||
if len (frames) == 0:
|
||||
raise ValueError ("len (frames) == 0")
|
||||
|
||||
super().__init__('Merger', MergeSubprocessor.Cli, 86400 if MERGER_DEBUG else 60, io_loop_sleep_time=0.001)
|
||||
super().__init__('Merger', MergeSubprocessor.Cli, io_loop_sleep_time=0.001)
|
||||
|
||||
self.is_interactive = is_interactive
|
||||
self.merger_session_filepath = Path(merger_session_filepath)
|
||||
|
@ -673,7 +673,8 @@ def main (model_class_name=None,
|
|||
cfg.ask_settings()
|
||||
|
||||
input_path_image_paths = pathex.get_image_paths(input_path)
|
||||
|
||||
|
||||
|
||||
if cfg.type == MergerConfig.TYPE_MASKED:
|
||||
if not aligned_path.exists():
|
||||
io.log_err('Aligned directory not found. Please ensure it exists.')
|
||||
|
@ -783,6 +784,8 @@ def main (model_class_name=None,
|
|||
|
||||
|
||||
elif cfg.type == MergerConfig.TYPE_FACE_AVATAR:
|
||||
pass
|
||||
"""
|
||||
filesdata = []
|
||||
for filepath in io.progress_bar_generator(input_path_image_paths, "Collecting info"):
|
||||
filepath = Path(filepath)
|
||||
|
@ -812,7 +815,7 @@ def main (model_class_name=None,
|
|||
frames.append ( MergeSubprocessor.Frame(prev_temporal_frame_infos=prev_temporal_frame_infos,
|
||||
frame_info=frame_info,
|
||||
next_temporal_frame_infos=next_temporal_frame_infos) )
|
||||
|
||||
"""
|
||||
if len(frames) == 0:
|
||||
io.log_info ("No frames to merge in input_dir.")
|
||||
else:
|
||||
|
|
|
@ -114,7 +114,7 @@ def sort_by_face_yaw(input_path):
|
|||
trash_img_list.append ( [str(filepath)] )
|
||||
continue
|
||||
|
||||
pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() )
|
||||
pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks(), size=dflimg.get_shape()[1] )
|
||||
|
||||
img_list.append( [str(filepath), yaw ] )
|
||||
|
||||
|
@ -137,7 +137,7 @@ def sort_by_face_pitch(input_path):
|
|||
trash_img_list.append ( [str(filepath)] )
|
||||
continue
|
||||
|
||||
pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() )
|
||||
pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks(), size=dflimg.get_shape()[1] )
|
||||
|
||||
img_list.append( [str(filepath), pitch ] )
|
||||
|
||||
|
@ -418,7 +418,7 @@ class FinalLoaderSubprocessor(Subprocessor):
|
|||
|
||||
gray = cv2.cvtColor(bgr, cv2.COLOR_BGR2GRAY)
|
||||
sharpness = estimate_sharpness(gray) if self.include_by_blur else 0
|
||||
pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks() )
|
||||
pitch, yaw, roll = LandmarksProcessor.estimate_pitch_yaw_roll ( dflimg.get_landmarks(), size=dflimg.get_shape()[1] )
|
||||
|
||||
hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
|
||||
except Exception as e:
|
||||
|
@ -723,11 +723,11 @@ def sort_by_absdiff(input_path):
|
|||
image_paths = pathex.get_image_paths(input_path)
|
||||
image_paths_len = len(image_paths)
|
||||
|
||||
batch_size = 1024
|
||||
batch_size = 512
|
||||
batch_size_remain = image_paths_len % batch_size
|
||||
|
||||
i_t = tf.placeholder (tf.float32, (None,256,256,3) )
|
||||
j_t = tf.placeholder (tf.float32, (None,256,256,3) )
|
||||
i_t = tf.placeholder (tf.float32, (None,None,None,None) )
|
||||
j_t = tf.placeholder (tf.float32, (None,None,None,None) )
|
||||
|
||||
outputs_full = []
|
||||
outputs_remain = []
|
||||
|
|
|
@ -394,7 +394,6 @@ def extract_fanseg(input_dir, device_args={} ):
|
|||
|
||||
#unused in end user workflow
|
||||
def extract_umd_csv(input_file_csv,
|
||||
image_size=256,
|
||||
face_type='full_face',
|
||||
device_args={} ):
|
||||
|
||||
|
@ -456,7 +455,7 @@ def extract_umd_csv(input_file_csv,
|
|||
data = ExtractSubprocessor (data, 'landmarks', multi_gpu=multi_gpu, cpu_only=cpu_only).run()
|
||||
|
||||
io.log_info ('Performing 3rd pass...')
|
||||
data = ExtractSubprocessor (data, 'final', image_size, face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
|
||||
data = ExtractSubprocessor (data, 'final', face_type, None, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run()
|
||||
faces_detected += sum([d.faces_detected for d in data])
|
||||
|
||||
|
||||
|
|
|
@ -18,7 +18,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
|
|||
out_img = img_bgr.copy()
|
||||
out_merging_mask_a = None
|
||||
|
||||
mask_subres = 4
|
||||
input_size = predictor_input_shape[0]
|
||||
mask_subres_size = input_size*4
|
||||
output_size = input_size
|
||||
|
@ -26,12 +25,12 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
|
|||
output_size *= 4
|
||||
|
||||
face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type)
|
||||
face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale )
|
||||
face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale)
|
||||
|
||||
if mask_subres_size == output_size:
|
||||
face_mask_output_mat = face_output_mat
|
||||
else:
|
||||
face_mask_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, mask_subres_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale )
|
||||
face_mask_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, mask_subres_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale)
|
||||
|
||||
dst_face_bgr = cv2.warpAffine( img_bgr , face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC )
|
||||
dst_face_bgr = np.clip(dst_face_bgr, 0, 1)
|
||||
|
@ -56,11 +55,10 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
|
|||
if cfg.super_resolution_power != 0:
|
||||
prd_face_bgr_enhanced = cfg.superres_func(prd_face_bgr)
|
||||
mod = cfg.super_resolution_power / 100.0
|
||||
|
||||
prd_face_bgr = cv2.resize(prd_face_bgr, (output_size,output_size))*(1.0-mod) + \
|
||||
prd_face_bgr_enhanced*mod
|
||||
prd_face_bgr = cv2.resize(prd_face_bgr, (output_size,output_size))*(1.0-mod) + prd_face_bgr_enhanced*mod
|
||||
prd_face_bgr = np.clip(prd_face_bgr, 0, 1)
|
||||
|
||||
if cfg.super_resolution_power != 0:
|
||||
if predictor_masked:
|
||||
prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (output_size, output_size), cv2.INTER_CUBIC)
|
||||
else:
|
||||
|
@ -106,13 +104,12 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
|
|||
|
||||
prd_face_mask_a_0[ prd_face_mask_a_0 < (1.0/255.0) ] = 0.0 # get rid of noise
|
||||
|
||||
# resize to mask_subres_size
|
||||
if prd_face_mask_a_0.shape[0] != mask_subres_size:
|
||||
prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (mask_subres_size, mask_subres_size), cv2.INTER_CUBIC)
|
||||
|
||||
# process mask in local predicted space
|
||||
if 'raw' not in cfg.mode:
|
||||
# resize to mask_subres_size
|
||||
if prd_face_mask_a_0.shape[0] != mask_subres_size:
|
||||
prd_face_mask_a_0 = cv2.resize (prd_face_mask_a_0, (mask_subres_size, mask_subres_size), cv2.INTER_CUBIC)
|
||||
|
||||
# add zero pad
|
||||
prd_face_mask_a_0 = np.pad (prd_face_mask_a_0, input_size)
|
||||
|
||||
|
@ -176,9 +173,9 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
|
|||
|
||||
if 'seamless' not in cfg.mode and cfg.color_transfer_mode != 0:
|
||||
if cfg.color_transfer_mode == 1: #rct
|
||||
prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( prd_face_bgr*255, 0, 255).astype(np.uint8),
|
||||
np.clip( dst_face_bgr*255, 0, 255).astype(np.uint8),
|
||||
source_mask=prd_face_mask_area_a, target_mask=prd_face_mask_area_a)
|
||||
prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( prd_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8),
|
||||
np.clip( dst_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8), )
|
||||
|
||||
prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
|
||||
elif cfg.color_transfer_mode == 2: #lct
|
||||
prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr)
|
||||
|
@ -247,15 +244,12 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
|
|||
|
||||
out_img = img_bgr*(1-img_face_mask_a) + (out_img*img_face_mask_a)
|
||||
|
||||
out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) )
|
||||
out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size), flags=cv2.INTER_CUBIC )
|
||||
|
||||
if 'seamless' in cfg.mode and cfg.color_transfer_mode != 0:
|
||||
if cfg.color_transfer_mode == 1:
|
||||
face_mask_a = cv2.warpAffine( img_face_mask_a, face_mat, (output_size, output_size) )[...,None]
|
||||
|
||||
out_face_bgr = imagelib.reinhard_color_transfer ( (out_face_bgr*255).astype(np.uint8),
|
||||
(dst_face_bgr*255).astype(np.uint8),
|
||||
source_mask=face_mask_a, target_mask=face_mask_a)
|
||||
out_face_bgr = imagelib.reinhard_color_transfer ( np.clip(out_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8),
|
||||
np.clip(dst_face_bgr*prd_face_mask_area_a*255, 0, 255).astype(np.uint8) )
|
||||
out_face_bgr = np.clip( out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
|
||||
elif cfg.color_transfer_mode == 2: #lct
|
||||
out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr)
|
||||
|
|
|
@ -107,8 +107,6 @@ class MergerConfigMasked(MergerConfig):
|
|||
|
||||
def __init__(self, face_type=FaceType.FULL,
|
||||
default_mode = 'overlay',
|
||||
clip_hborder_mask_per = 0,
|
||||
|
||||
mode='overlay',
|
||||
masked_hist_match=True,
|
||||
hist_match_threshold = 238,
|
||||
|
@ -128,11 +126,10 @@ class MergerConfigMasked(MergerConfig):
|
|||
super().__init__(type=MergerConfig.TYPE_MASKED, **kwargs)
|
||||
|
||||
self.face_type = face_type
|
||||
if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL ]:
|
||||
if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL, FaceType.WHOLE_FACE ]:
|
||||
raise ValueError("MergerConfigMasked does not support this type of face.")
|
||||
|
||||
self.default_mode = default_mode
|
||||
self.clip_hborder_mask_per = clip_hborder_mask_per
|
||||
|
||||
#default changeable params
|
||||
if mode not in mode_str_dict:
|
||||
|
@ -242,7 +239,7 @@ class MergerConfigMasked(MergerConfig):
|
|||
self.color_transfer_mode = ctm_str_dict[self.color_transfer_mode]
|
||||
|
||||
super().ask_settings()
|
||||
|
||||
|
||||
self.super_resolution_power = np.clip ( io.input_int ("Choose super resolution power", 0, add_info="0..100", help_message="Enhance details by applying superresolution network."), 0, 100)
|
||||
|
||||
if 'raw' not in self.mode:
|
||||
|
@ -298,11 +295,11 @@ class MergerConfigMasked(MergerConfig):
|
|||
r += f"""output_face_scale: {self.output_face_scale}\n"""
|
||||
|
||||
if 'raw' not in self.mode:
|
||||
r += f"""color_transfer_mode: { ctm_dict[self.color_transfer_mode]}\n"""
|
||||
r += f"""color_transfer_mode: {ctm_dict[self.color_transfer_mode]}\n"""
|
||||
|
||||
r += super().to_string(filename)
|
||||
r += f"""super_resolution_power: {self.super_resolution_power}\n"""
|
||||
|
||||
|
||||
if 'raw' not in self.mode:
|
||||
r += (f"""image_denoise_power: {self.image_denoise_power}\n"""
|
||||
f"""bicubic_degrade_power: {self.bicubic_degrade_power}\n"""
|
||||
|
|
|
@ -372,14 +372,14 @@ class QModel(ModelBase):
|
|||
sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False),
|
||||
output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution':resolution, },
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution, },
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution } ],
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_HULL), 'data_format':nn.data_format, 'resolution': resolution } ],
|
||||
generators_count=src_generators_count ),
|
||||
|
||||
SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
|
||||
sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False),
|
||||
output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution':resolution},
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution},
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution} ],
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_HULL), 'data_format':nn.data_format, 'resolution': resolution} ],
|
||||
generators_count=dst_generators_count )
|
||||
])
|
||||
|
||||
|
@ -454,7 +454,6 @@ class QModel(ModelBase):
|
|||
import merger
|
||||
return self.predictor_func, (self.resolution, self.resolution, 3), merger.MergerConfigMasked(face_type=face_type,
|
||||
default_mode = 'overlay',
|
||||
clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0,
|
||||
)
|
||||
|
||||
Model = QModel
|
||||
|
|
|
@ -33,9 +33,11 @@ class SAEHDModel(ModelBase):
|
|||
default_archi = self.options['archi'] = self.load_or_def_option('archi', 'df')
|
||||
default_ae_dims = self.options['ae_dims'] = self.load_or_def_option('ae_dims', 256)
|
||||
default_e_dims = self.options['e_dims'] = self.load_or_def_option('e_dims', 64)
|
||||
self.options['d_dims'] = None
|
||||
self.options['d_mask_dims'] = None
|
||||
default_d_dims = self.options['d_dims'] = self.options.get('d_dims', None)
|
||||
default_d_mask_dims = self.options['d_mask_dims'] = self.options.get('d_mask_dims', None)
|
||||
default_masked_training = self.options['masked_training'] = self.load_or_def_option('masked_training', True)
|
||||
default_learn_mask = self.options['learn_mask'] = self.load_or_def_option('learn_mask', True)
|
||||
default_eyes_prio = self.options['eyes_prio'] = self.load_or_def_option('eyes_prio', False)
|
||||
default_lr_dropout = self.options['lr_dropout'] = self.load_or_def_option('lr_dropout', False)
|
||||
default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True)
|
||||
default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0)
|
||||
|
@ -58,12 +60,13 @@ class SAEHDModel(ModelBase):
|
|||
resolution = io.input_int("Resolution", default_resolution, add_info="64-256", help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.")
|
||||
resolution = np.clip ( (resolution // 16) * 16, 64, 256)
|
||||
self.options['resolution'] = resolution
|
||||
self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f'], help_message="Half / mid face / full face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face.").lower()
|
||||
self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f','wf'], help_message="Half / mid face / full face / whole face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face. 'Whole face' covers full area of face include forehead, but requires manual merge in Adobe After Effects.").lower()
|
||||
self.options['archi'] = io.input_str ("AE architecture", default_archi, ['dfhd','liaehd','df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'hd' is heavyweight version for the best quality.").lower()
|
||||
|
||||
default_d_dims = 48 if self.options['archi'] == 'dfhd' else 64
|
||||
default_d_dims = self.options['d_dims'] = self.load_or_def_option('d_dims', default_d_dims)
|
||||
|
||||
|
||||
default_d_mask_dims = default_d_dims // 3
|
||||
default_d_mask_dims += default_d_mask_dims % 2
|
||||
default_d_mask_dims = self.options['d_mask_dims'] = self.load_or_def_option('d_mask_dims', default_d_mask_dims)
|
||||
|
@ -82,8 +85,12 @@ class SAEHDModel(ModelBase):
|
|||
self.options['d_mask_dims'] = d_mask_dims + d_mask_dims % 2
|
||||
|
||||
if self.is_first_run() or ask_override:
|
||||
self.options['learn_mask'] = io.input_bool ("Learn mask", default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case merger forced to use 'not predicted mask' that is not smooth as predicted.")
|
||||
|
||||
if self.options['face_type'] == 'wf':
|
||||
self.options['masked_training'] = io.input_bool ("Masked training", default_masked_training, help_message="This option is available only for 'whole_face' type. Masked training clips training area to full_face mask, thus network will train the faces properly. When the face is trained enough, disable this option to train all area of the frame. Merge with 'raw-rgb' mode, then use Adobe After Effects to manually mask and compose whole face include forehead.")
|
||||
|
||||
self.options['learn_mask'] = io.input_bool ("Learn mask", default_learn_mask, help_message="Learning mask will produce a smooth mask in the merger. Also it works as guide for neural network to recognize face directions.")
|
||||
self.options['eyes_prio'] = io.input_bool ("Eyes priority", default_eyes_prio, help_message='Helps to fix eye problems during training like "alien eyes" and wrong eyes direction ( especially on HD architectures ) by forcing the neural network to train eyes with higher priority. before/after https://i.imgur.com/YQHOuSR.jpg ')
|
||||
|
||||
if self.is_first_run() or ask_override:
|
||||
if len(device_config.devices) == 1:
|
||||
self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.")
|
||||
|
@ -98,10 +105,13 @@ class SAEHDModel(ModelBase):
|
|||
else:
|
||||
self.options['true_face_power'] = 0.0
|
||||
|
||||
self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
|
||||
self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 )
|
||||
if self.options['face_type'] != 'wf':
|
||||
self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.001 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
|
||||
self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 )
|
||||
|
||||
self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.")
|
||||
self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
|
||||
|
||||
self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.")
|
||||
|
||||
if self.options['pretrain'] and self.get_pretraining_data_path() is None:
|
||||
|
@ -333,6 +343,7 @@ class SAEHDModel(ModelBase):
|
|||
|
||||
self.resolution = resolution = self.options['resolution']
|
||||
learn_mask = self.options['learn_mask']
|
||||
eyes_prio = self.options['eyes_prio']
|
||||
archi = self.options['archi']
|
||||
ae_dims = self.options['ae_dims']
|
||||
e_dims = self.options['e_dims']
|
||||
|
@ -344,7 +355,7 @@ class SAEHDModel(ModelBase):
|
|||
|
||||
self.gan_power = gan_power = self.options['gan_power'] if not self.pretrain else 0.0
|
||||
|
||||
masked_training = True
|
||||
masked_training = self.options['masked_training']
|
||||
|
||||
models_opt_on_gpu = False if len(devices) == 0 else True if len(devices) > 1 else self.options['models_opt_on_gpu']
|
||||
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
|
||||
|
@ -367,9 +378,9 @@ class SAEHDModel(ModelBase):
|
|||
self.target_src = tf.placeholder (nn.tf_floatx, bgr_shape)
|
||||
self.target_dst = tf.placeholder (nn.tf_floatx, bgr_shape)
|
||||
|
||||
self.target_srcm = tf.placeholder (nn.tf_floatx, mask_shape)
|
||||
self.target_dstm = tf.placeholder (nn.tf_floatx, mask_shape)
|
||||
|
||||
self.target_srcm_all = tf.placeholder (nn.tf_floatx, mask_shape)
|
||||
self.target_dstm_all = tf.placeholder (nn.tf_floatx, mask_shape)
|
||||
|
||||
# Initializing model classes
|
||||
with tf.device (models_opt_device):
|
||||
if 'df' in archi:
|
||||
|
@ -468,13 +479,13 @@ class SAEHDModel(ModelBase):
|
|||
with tf.device(f'/CPU:0'):
|
||||
# slice on CPU, otherwise all batch data will be transfered to GPU first
|
||||
batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu )
|
||||
gpu_warped_src = self.warped_src [batch_slice,:,:,:]
|
||||
gpu_warped_dst = self.warped_dst [batch_slice,:,:,:]
|
||||
gpu_target_src = self.target_src [batch_slice,:,:,:]
|
||||
gpu_target_dst = self.target_dst [batch_slice,:,:,:]
|
||||
gpu_target_srcm = self.target_srcm[batch_slice,:,:,:]
|
||||
gpu_target_dstm = self.target_dstm[batch_slice,:,:,:]
|
||||
|
||||
gpu_warped_src = self.warped_src [batch_slice,:,:,:]
|
||||
gpu_warped_dst = self.warped_dst [batch_slice,:,:,:]
|
||||
gpu_target_src = self.target_src [batch_slice,:,:,:]
|
||||
gpu_target_dst = self.target_dst [batch_slice,:,:,:]
|
||||
gpu_target_srcm_all = self.target_srcm_all[batch_slice,:,:,:]
|
||||
gpu_target_dstm_all = self.target_dstm_all[batch_slice,:,:,:]
|
||||
|
||||
# process model tensors
|
||||
if 'df' in archi:
|
||||
gpu_src_code = self.inter(self.encoder(gpu_warped_src))
|
||||
|
@ -504,7 +515,13 @@ class SAEHDModel(ModelBase):
|
|||
gpu_pred_src_srcm_list.append(gpu_pred_src_srcm)
|
||||
gpu_pred_dst_dstm_list.append(gpu_pred_dst_dstm)
|
||||
gpu_pred_src_dstm_list.append(gpu_pred_src_dstm)
|
||||
|
||||
|
||||
# unpack masks from one combined mask
|
||||
gpu_target_srcm = tf.clip_by_value (gpu_target_srcm_all, 0, 1)
|
||||
gpu_target_dstm = tf.clip_by_value (gpu_target_dstm_all, 0, 1)
|
||||
gpu_target_srcm_eyes = tf.clip_by_value (gpu_target_srcm_all-1, 0, 1)
|
||||
gpu_target_dstm_eyes = tf.clip_by_value (gpu_target_dstm_all-1, 0, 1)
|
||||
|
||||
gpu_target_srcm_blur = nn.tf_gaussian_blur(gpu_target_srcm, max(1, resolution // 32) )
|
||||
gpu_target_dstm_blur = nn.tf_gaussian_blur(gpu_target_dstm, max(1, resolution // 32) )
|
||||
|
||||
|
@ -513,7 +530,7 @@ class SAEHDModel(ModelBase):
|
|||
|
||||
gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
|
||||
gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst
|
||||
|
||||
|
||||
gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
|
||||
gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst
|
||||
|
||||
|
@ -522,6 +539,10 @@ class SAEHDModel(ModelBase):
|
|||
|
||||
gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
|
||||
|
||||
if eyes_prio:
|
||||
gpu_src_loss += tf.reduce_mean ( 300*tf.abs ( gpu_target_src*gpu_target_srcm_eyes - gpu_pred_src_src*gpu_target_srcm_eyes ), axis=[1,2,3])
|
||||
|
||||
if learn_mask:
|
||||
gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )
|
||||
|
||||
|
@ -534,8 +555,12 @@ class SAEHDModel(ModelBase):
|
|||
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.tf_dssim(gpu_psd_target_dst_anti_masked, gpu_target_dst_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
|
||||
gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square( gpu_psd_target_dst_anti_masked - gpu_target_dst_anti_masked), axis=[1,2,3] )
|
||||
|
||||
gpu_dst_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
|
||||
gpu_dst_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
|
||||
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
|
||||
|
||||
if eyes_prio:
|
||||
gpu_dst_loss += tf.reduce_mean ( 300*tf.abs ( gpu_target_dst*gpu_target_dstm_eyes - gpu_pred_dst_dst*gpu_target_dstm_eyes ), axis=[1,2,3])
|
||||
|
||||
if learn_mask:
|
||||
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )
|
||||
|
||||
|
@ -606,15 +631,15 @@ class SAEHDModel(ModelBase):
|
|||
|
||||
|
||||
# Initializing training and view functions
|
||||
def src_dst_train(warped_src, target_src, target_srcm, \
|
||||
warped_dst, target_dst, target_dstm):
|
||||
def src_dst_train(warped_src, target_src, target_srcm_all, \
|
||||
warped_dst, target_dst, target_dstm_all):
|
||||
s, d, _ = nn.tf_sess.run ( [ src_loss, dst_loss, src_dst_loss_gv_op],
|
||||
feed_dict={self.warped_src :warped_src,
|
||||
self.target_src :target_src,
|
||||
self.target_srcm:target_srcm,
|
||||
self.target_srcm_all:target_srcm_all,
|
||||
self.warped_dst :warped_dst,
|
||||
self.target_dst :target_dst,
|
||||
self.target_dstm:target_dstm,
|
||||
self.target_dstm_all:target_dstm_all,
|
||||
})
|
||||
s = np.mean(s)
|
||||
d = np.mean(d)
|
||||
|
@ -627,14 +652,14 @@ class SAEHDModel(ModelBase):
|
|||
self.D_train = D_train
|
||||
|
||||
if gan_power != 0:
|
||||
def D_src_dst_train(warped_src, target_src, target_srcm, \
|
||||
warped_dst, target_dst, target_dstm):
|
||||
def D_src_dst_train(warped_src, target_src, target_srcm_all, \
|
||||
warped_dst, target_dst, target_dstm_all):
|
||||
nn.tf_sess.run ([src_D_src_dst_loss_gv_op], feed_dict={self.warped_src :warped_src,
|
||||
self.target_src :target_src,
|
||||
self.target_srcm:target_srcm,
|
||||
self.target_srcm_all:target_srcm_all,
|
||||
self.warped_dst :warped_dst,
|
||||
self.target_dst :target_dst,
|
||||
self.target_dstm:target_dstm})
|
||||
self.target_dstm_all:target_dstm_all})
|
||||
self.D_src_dst_train = D_src_dst_train
|
||||
|
||||
if learn_mask:
|
||||
|
@ -703,7 +728,9 @@ class SAEHDModel(ModelBase):
|
|||
face_type = t.FACE_TYPE_MID_FULL
|
||||
elif self.options['face_type'] == 'f':
|
||||
face_type = t.FACE_TYPE_FULL
|
||||
|
||||
elif self.options['face_type'] == 'wf':
|
||||
face_type = t.FACE_TYPE_WHOLE_FACE
|
||||
|
||||
training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path()
|
||||
training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path()
|
||||
|
||||
|
@ -722,14 +749,16 @@ class SAEHDModel(ModelBase):
|
|||
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
|
||||
output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution, 'ct_mode': self.options['ct_mode'] },
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution, 'ct_mode': self.options['ct_mode'] },
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution } ],
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_EYES_HULL), 'data_format':nn.data_format, 'resolution': resolution },
|
||||
],
|
||||
generators_count=src_generators_count ),
|
||||
|
||||
SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
|
||||
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
|
||||
output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution},
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution},
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution} ],
|
||||
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_EYES_HULL), 'data_format':nn.data_format, 'resolution': resolution},
|
||||
],
|
||||
generators_count=dst_generators_count )
|
||||
])
|
||||
|
||||
|
@ -748,23 +777,23 @@ class SAEHDModel(ModelBase):
|
|||
|
||||
#override
|
||||
def onTrainOneIter(self):
|
||||
( (warped_src, target_src, target_srcm), \
|
||||
(warped_dst, target_dst, target_dstm) ) = self.generate_next_samples()
|
||||
( (warped_src, target_src, target_srcm_all), \
|
||||
(warped_dst, target_dst, target_dstm_all) ) = self.generate_next_samples()
|
||||
|
||||
src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm)
|
||||
src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm_all, warped_dst, target_dst, target_dstm_all)
|
||||
|
||||
if self.options['true_face_power'] != 0 and not self.pretrain:
|
||||
self.D_train (warped_src, warped_dst)
|
||||
|
||||
if self.gan_power != 0:
|
||||
self.D_src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm)
|
||||
self.D_src_dst_train (warped_src, target_src, target_srcm_all, warped_dst, target_dst, target_dstm_all)
|
||||
|
||||
return ( ('src_loss', src_loss), ('dst_loss', dst_loss), )
|
||||
|
||||
#override
|
||||
def onGetPreview(self, samples):
|
||||
( (warped_src, target_src, target_srcm),
|
||||
(warped_dst, target_dst, target_dstm) ) = samples
|
||||
( (warped_src, target_src, target_srcm_all,),
|
||||
(warped_dst, target_dst, target_dstm_all,) ) = samples
|
||||
|
||||
if self.options['learn_mask']:
|
||||
S, D, SS, DD, DDM, SD, SDM = [ np.clip( nn.to_data_format(x,"NHWC", self.model_data_format), 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
|
||||
|
@ -772,8 +801,11 @@ class SAEHDModel(ModelBase):
|
|||
else:
|
||||
S, D, SS, DD, SD, = [ np.clip( nn.to_data_format(x,"NHWC", self.model_data_format) , 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
|
||||
|
||||
target_srcm, target_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm, target_dstm] )]
|
||||
|
||||
target_srcm_all, target_dstm_all = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm_all, target_dstm_all] )]
|
||||
|
||||
target_srcm = np.clip(target_srcm_all, 0, 1)
|
||||
target_dstm = np.clip(target_dstm_all, 0, 1)
|
||||
|
||||
n_samples = min(4, self.get_batch_size(), 800 // self.resolution )
|
||||
|
||||
result = []
|
||||
|
@ -815,11 +847,12 @@ class SAEHDModel(ModelBase):
|
|||
face_type = FaceType.MID_FULL
|
||||
elif self.options['face_type'] == 'f':
|
||||
face_type = FaceType.FULL
|
||||
|
||||
elif self.options['face_type'] == 'wf':
|
||||
face_type = FaceType.WHOLE_FACE
|
||||
|
||||
import merger
|
||||
return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), merger.MergerConfigMasked(face_type=face_type,
|
||||
default_mode = 'overlay' if self.options['ct_mode'] != 'none' or self.options['face_style_power'] or self.options['bg_style_power'] else 'seamless',
|
||||
clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0,
|
||||
)
|
||||
|
||||
Model = SAEHDModel
|
||||
|
|
|
@ -61,7 +61,7 @@ class Sample(object):
|
|||
|
||||
def get_pitch_yaw_roll(self):
|
||||
if self.pitch_yaw_roll is None:
|
||||
self.pitch_yaw_roll = LandmarksProcessor.estimate_pitch_yaw_roll(landmarks)
|
||||
self.pitch_yaw_roll = LandmarksProcessor.estimate_pitch_yaw_roll(landmarks, size=self.shape[1])
|
||||
return self.pitch_yaw_roll
|
||||
|
||||
def set_filename_offset_size(self, filename, offset, size):
|
||||
|
|
|
@ -60,7 +60,10 @@ class SampleGeneratorFace(SampleGeneratorBase):
|
|||
if self.debug:
|
||||
self.generators = [ThisThreadGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None) )]
|
||||
else:
|
||||
self.generators = [SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None), start_now=True ) for i in range(self.generators_count) ]
|
||||
self.generators = [SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None), start_now=False ) \
|
||||
for i in range(self.generators_count) ]
|
||||
|
||||
SubprocessGenerator.start_in_parallel( self.generators )
|
||||
|
||||
self.generator_counter = -1
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
|
|||
self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (samples_host.create_cli(), index2d_host.create_cli(),) )]
|
||||
else:
|
||||
self.generators_count = np.clip(multiprocessing.cpu_count(), 2, 4)
|
||||
self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (samples_host.create_cli(), index2d_host.create_cli(),), start_now=True ) for i in range(self.generators_count) ]
|
||||
self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (samples_host.create_cli(), index2d_host.create_cli(),) ) for i in range(self.generators_count) ]
|
||||
|
||||
self.generator_counter = -1
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ class SampleGeneratorFaceTemporal(SampleGeneratorBase):
|
|||
if self.debug:
|
||||
self.generators = [ThisThreadGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(),) )]
|
||||
else:
|
||||
self.generators = [SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(),), start_now=True ) for i in range(self.generators_count) ]
|
||||
self.generators = [SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(),) ) for i in range(self.generators_count) ]
|
||||
|
||||
self.generator_counter = -1
|
||||
|
||||
|
|
|
@ -25,21 +25,24 @@ class SampleProcessor(object):
|
|||
FACE_TYPE_HALF = 10
|
||||
FACE_TYPE_MID_FULL = 11
|
||||
FACE_TYPE_FULL = 12
|
||||
FACE_TYPE_HEAD = 13 #currently unused
|
||||
FACE_TYPE_AVATAR = 14 #currently unused
|
||||
FACE_TYPE_FULL_NO_ALIGN = 15
|
||||
FACE_TYPE_HEAD_NO_ALIGN = 16
|
||||
FACE_TYPE_WHOLE_FACE = 13
|
||||
FACE_TYPE_HEAD = 14 #currently unused
|
||||
FACE_TYPE_AVATAR = 15 #currently unused
|
||||
FACE_TYPE_FULL_NO_ALIGN = 16
|
||||
FACE_TYPE_HEAD_NO_ALIGN = 17
|
||||
FACE_TYPE_END = 20
|
||||
|
||||
MODE_BEGIN = 40
|
||||
MODE_BGR = 40 #BGR
|
||||
MODE_G = 41 #Grayscale
|
||||
MODE_GGG = 42 #3xGrayscale
|
||||
MODE_FACE_MASK_HULL = 43 #mask hull as grayscale
|
||||
MODE_FACE_MASK_ALL_HULL = 43 #mask all hull as grayscale
|
||||
MODE_FACE_MASK_EYES_HULL = 44 #mask eyes hull as grayscale
|
||||
MODE_FACE_MASK_STRUCT = 45 #mask structure as grayscale
|
||||
MODE_BGR_SHUFFLE = 46 #BGR shuffle
|
||||
MODE_BGR_RANDOM_HSV_SHIFT = 47
|
||||
MODE_FACE_MASK_ALL_EYES_HULL = 45 #combo all + eyes as grayscale
|
||||
MODE_FACE_MASK_STRUCT = 46 #mask structure as grayscale
|
||||
MODE_BGR_SHUFFLE = 47 #BGR shuffle
|
||||
MODE_BGR_RANDOM_HSV_SHIFT = 48
|
||||
MODE_BGR_RANDOM_RGB_LEVELS = 49
|
||||
MODE_END = 50
|
||||
|
||||
class Options(object):
|
||||
|
@ -53,6 +56,7 @@ class SampleProcessor(object):
|
|||
SPTF_FACETYPE_TO_FACETYPE = { Types.FACE_TYPE_HALF : FaceType.HALF,
|
||||
Types.FACE_TYPE_MID_FULL : FaceType.MID_FULL,
|
||||
Types.FACE_TYPE_FULL : FaceType.FULL,
|
||||
Types.FACE_TYPE_WHOLE_FACE : FaceType.WHOLE_FACE,
|
||||
Types.FACE_TYPE_HEAD : FaceType.HEAD,
|
||||
Types.FACE_TYPE_FULL_NO_ALIGN : FaceType.FULL_NO_ALIGN,
|
||||
Types.FACE_TYPE_HEAD_NO_ALIGN : FaceType.HEAD_NO_ALIGN,
|
||||
|
@ -107,11 +111,13 @@ class SampleProcessor(object):
|
|||
if target_face_type == SPTF.NONE:
|
||||
raise ValueError("target face type must be defined for face samples")
|
||||
else:
|
||||
if mode_type == SPTF.MODE_FACE_MASK_HULL:
|
||||
raise ValueError("MODE_FACE_MASK_HULL applicable only for face samples")
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL:
|
||||
raise ValueError("MODE_FACE_MASK_ALL_HULL applicable only for face samples")
|
||||
if mode_type == SPTF.MODE_FACE_MASK_EYES_HULL:
|
||||
raise ValueError("MODE_FACE_MASK_EYES_HULL applicable only for face samples")
|
||||
elif mode_type == SPTF.MODE_FACE_MASK_STRUCT:
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
|
||||
raise ValueError("MODE_FACE_MASK_ALL_EYES_HULL applicable only for face samples")
|
||||
if mode_type == SPTF.MODE_FACE_MASK_STRUCT:
|
||||
raise ValueError("MODE_FACE_MASK_STRUCT applicable only for face samples")
|
||||
|
||||
can_warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED)
|
||||
|
@ -141,16 +147,33 @@ class SampleProcessor(object):
|
|||
if mode_type == SPTF.NONE:
|
||||
raise ValueError ('expected MODE_ type')
|
||||
|
||||
if mode_type == SPTF.MODE_FACE_MASK_HULL:
|
||||
if sample.eyebrows_expand_mod is not None:
|
||||
img = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
|
||||
else:
|
||||
img = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks)
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
|
||||
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
|
||||
if sample.eyebrows_expand_mod is not None:
|
||||
all_mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
|
||||
else:
|
||||
all_mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks)
|
||||
|
||||
all_mask = np.clip(all_mask, 0, 1)
|
||||
|
||||
if mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
|
||||
eyes_mask = LandmarksProcessor.get_image_eye_mask (sample_bgr.shape, sample.landmarks)
|
||||
eyes_mask = np.clip(eyes_mask, 0, 1)
|
||||
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL:
|
||||
img = all_mask
|
||||
elif mode_type == SPTF.MODE_FACE_MASK_EYES_HULL:
|
||||
img = eyes_mask
|
||||
elif mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
|
||||
img = all_mask + eyes_mask
|
||||
|
||||
if sample.ie_polys is not None:
|
||||
sample.ie_polys.overlay_mask(img)
|
||||
elif mode_type == SPTF.MODE_FACE_MASK_EYES_HULL:
|
||||
img = LandmarksProcessor.get_image_eye_mask (sample_bgr.shape, sample.landmarks)
|
||||
|
||||
elif mode_type == SPTF.MODE_FACE_MASK_STRUCT:
|
||||
if sample.eyebrows_expand_mod is not None:
|
||||
|
@ -174,9 +197,13 @@ class SampleProcessor(object):
|
|||
if gaussian_blur is not None:
|
||||
chance, kernel_max_size = gaussian_blur
|
||||
chance = np.clip(chance, 0, 100)
|
||||
|
||||
rnd_state = np.random.RandomState (sample_rnd_seed+1)
|
||||
gblur_rnd_chance = rnd_state.randint(100)
|
||||
gblur_rnd_kernel = rnd_state.randint(kernel_max_size)*2+1
|
||||
|
||||
if np.random.randint(100) < chance:
|
||||
img = cv2.GaussianBlur(img, ( np.random.randint( kernel_max_size )*2+1 ,) *2 , 0)
|
||||
if gblur_rnd_chance < chance:
|
||||
img = cv2.GaussianBlur(img, (gblur_rnd_kernel,) *2 , 0)
|
||||
|
||||
if is_face_sample:
|
||||
target_ft = SampleProcessor.SPTF_FACETYPE_TO_FACETYPE[target_face_type]
|
||||
|
@ -186,12 +213,13 @@ class SampleProcessor(object):
|
|||
if sample.face_type == FaceType.MARK_ONLY:
|
||||
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft)
|
||||
|
||||
if mode_type == SPTF.MODE_FACE_MASK_HULL or \
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_STRUCT:
|
||||
img = cv2.warpAffine( img, mat, (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
|
||||
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False)
|
||||
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )[...,None]
|
||||
img = cv2.warpAffine( img, mat, (sample.shape[0],sample.shape[0]), flags=cv2.INTER_LINEAR )
|
||||
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False, cv2_inter=cv2.INTER_LINEAR)
|
||||
img = cv2.resize( img, (resolution,resolution), cv2.INTER_LINEAR )[...,None]
|
||||
else:
|
||||
img = cv2.warpAffine( img, mat, (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
|
||||
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
|
||||
|
@ -200,11 +228,12 @@ class SampleProcessor(object):
|
|||
else:
|
||||
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft)
|
||||
|
||||
if mode_type == SPTF.MODE_FACE_MASK_HULL or \
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_STRUCT:
|
||||
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False)
|
||||
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )[...,None]
|
||||
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_STRUCT:
|
||||
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False, cv2_inter=cv2.INTER_LINEAR)
|
||||
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_LINEAR )[...,None]
|
||||
else:
|
||||
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
|
||||
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC )
|
||||
|
@ -213,10 +242,11 @@ class SampleProcessor(object):
|
|||
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
|
||||
|
||||
|
||||
if mode_type == SPTF.MODE_FACE_MASK_HULL or \
|
||||
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL or \
|
||||
mode_type == SPTF.MODE_FACE_MASK_STRUCT:
|
||||
out_sample = np.clip(img.astype(np.float32), 0, 1)
|
||||
out_sample = img.astype(np.float32)
|
||||
else:
|
||||
img = np.clip(img.astype(np.float32), 0, 1)
|
||||
|
||||
|
@ -242,6 +272,20 @@ class SampleProcessor(object):
|
|||
v = np.clip ( v + rnd_state.random()-0.5, 0, 1 )
|
||||
hsv = cv2.merge([h, s, v])
|
||||
out_sample = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 )
|
||||
|
||||
elif mode_type == SPTF.MODE_BGR_RANDOM_RGB_LEVELS:
|
||||
rnd_state = np.random.RandomState (sample_rnd_seed)
|
||||
np_rnd = rnd_state.rand
|
||||
|
||||
inBlack = np.array([np_rnd()*0.25 , np_rnd()*0.25 , np_rnd()*0.25], dtype=np.float32)
|
||||
inWhite = np.array([1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25, 1.0-np_rnd()*0.25], dtype=np.float32)
|
||||
inGamma = np.array([0.5+np_rnd(), 0.5+np_rnd(), 0.5+np_rnd()], dtype=np.float32)
|
||||
outBlack = np.array([0.0, 0.0, 0.0], dtype=np.float32)
|
||||
outWhite = np.array([1.0, 1.0, 1.0], dtype=np.float32)
|
||||
|
||||
out_sample = np.clip( (img - inBlack) / (inWhite - inBlack), 0, 1 )
|
||||
out_sample = ( out_sample ** (1/inGamma) ) * (outWhite - outBlack) + outBlack
|
||||
out_sample = np.clip(out_sample, 0, 1)
|
||||
elif mode_type == SPTF.MODE_G:
|
||||
out_sample = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[...,None]
|
||||
elif mode_type == SPTF.MODE_GGG:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue