added option Eyes priority (y/n)

	fix eye problems during training  ( especially on HD architectures )
	by forcing the neural network to train eyes with higher priority
	before/after https://i.imgur.com/YQHOuSR.jpg

	It does not guarantee the right eye direction.
This commit is contained in:
Colombo 2020-02-18 14:30:07 +04:00
parent 4f928074b9
commit 9598ba0141
5 changed files with 105 additions and 60 deletions

View file

@ -47,11 +47,11 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0
return params
def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate):
def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate, cv2_inter=cv2.INTER_CUBIC):
if can_warp:
img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC )
img = cv2.remap(img, params['mapx'], params['mapy'], cv2_inter )
if can_transform:
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2_inter )
if len(img.shape) == 2:
img = img[...,None]
if can_flip and params['flip']:

View file

@ -328,8 +328,8 @@ def expand_eyebrows(lmrks, eyebrows_expand_mod=1.0):
def get_image_hull_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0, ie_polys=None, color=(1,) ):
hull_mask = np.zeros(image_shape[0:2]+( len(color),),dtype=np.float32)
def get_image_hull_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0, ie_polys=None ):
hull_mask = np.zeros(image_shape[0:2]+(1,),dtype=np.float32)
lmrks = expand_eyebrows(image_landmarks, eyebrows_expand_mod)
@ -345,7 +345,7 @@ def get_image_hull_mask (image_shape, image_landmarks, eyebrows_expand_mod=1.0,
for item in parts:
merged = np.concatenate(item)
cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), color )
cv2.fillConvexPoly(hull_mask, cv2.convexHull(merged), (1,) )
if ie_polys is not None:
ie_polys.overlay_mask(hull_mask)
@ -390,7 +390,7 @@ def alpha_to_color (img_alpha, color):
def get_cmask (image_shape, lmrks, eyebrows_expand_mod=1.0):
h,w,c = image_shape
hull = get_image_hull_mask (image_shape, lmrks, eyebrows_expand_mod, color=(1,) )
hull = get_image_hull_mask (image_shape, lmrks, eyebrows_expand_mod )
result = np.zeros( (h,w,3), dtype=np.float32 )

View file

@ -372,14 +372,14 @@ class QModel(ModelBase):
sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False),
output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution':resolution, },
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution, },
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution } ],
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_HULL), 'data_format':nn.data_format, 'resolution': resolution } ],
generators_count=src_generators_count ),
SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False),
output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution':resolution},
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution},
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution} ],
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_HULL), 'data_format':nn.data_format, 'resolution': resolution} ],
generators_count=dst_generators_count )
])

View file

@ -36,6 +36,7 @@ class SAEHDModel(ModelBase):
self.options['d_dims'] = None
self.options['d_mask_dims'] = None
default_learn_mask = self.options['learn_mask'] = self.load_or_def_option('learn_mask', True)
default_eyes_prio = self.options['eyes_prio'] = self.load_or_def_option('eyes_prio', False)
default_lr_dropout = self.options['lr_dropout'] = self.load_or_def_option('lr_dropout', False)
default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True)
default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0)
@ -83,6 +84,7 @@ class SAEHDModel(ModelBase):
if self.is_first_run() or ask_override:
self.options['learn_mask'] = io.input_bool ("Learn mask", default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case merger forced to use 'not predicted mask' that is not smooth as predicted.")
self.options['eyes_prio'] = io.input_bool ("Eyes priority", default_eyes_prio, help_message="fix eye problems during training ( especially on HD architectures ) by forcing the neural network to train eyes with higher priority. before/after https://i.imgur.com/YQHOuSR.jpg . It does not guarantee the right eye direction.")
if self.is_first_run() or ask_override:
if len(device_config.devices) == 1:
@ -333,6 +335,7 @@ class SAEHDModel(ModelBase):
self.resolution = resolution = self.options['resolution']
learn_mask = self.options['learn_mask']
eyes_prio = self.options['eyes_prio']
archi = self.options['archi']
ae_dims = self.options['ae_dims']
e_dims = self.options['e_dims']
@ -367,8 +370,8 @@ class SAEHDModel(ModelBase):
self.target_src = tf.placeholder (nn.tf_floatx, bgr_shape)
self.target_dst = tf.placeholder (nn.tf_floatx, bgr_shape)
self.target_srcm = tf.placeholder (nn.tf_floatx, mask_shape)
self.target_dstm = tf.placeholder (nn.tf_floatx, mask_shape)
self.target_srcm_all = tf.placeholder (nn.tf_floatx, mask_shape)
self.target_dstm_all = tf.placeholder (nn.tf_floatx, mask_shape)
# Initializing model classes
with tf.device (models_opt_device):
@ -472,8 +475,8 @@ class SAEHDModel(ModelBase):
gpu_warped_dst = self.warped_dst [batch_slice,:,:,:]
gpu_target_src = self.target_src [batch_slice,:,:,:]
gpu_target_dst = self.target_dst [batch_slice,:,:,:]
gpu_target_srcm = self.target_srcm[batch_slice,:,:,:]
gpu_target_dstm = self.target_dstm[batch_slice,:,:,:]
gpu_target_srcm_all = self.target_srcm_all[batch_slice,:,:,:]
gpu_target_dstm_all = self.target_dstm_all[batch_slice,:,:,:]
# process model tensors
if 'df' in archi:
@ -505,6 +508,12 @@ class SAEHDModel(ModelBase):
gpu_pred_dst_dstm_list.append(gpu_pred_dst_dstm)
gpu_pred_src_dstm_list.append(gpu_pred_src_dstm)
# unpack masks from one combined mask
gpu_target_srcm = tf.clip_by_value (gpu_target_srcm_all, 0, 1)
gpu_target_dstm = tf.clip_by_value (gpu_target_dstm_all, 0, 1)
gpu_target_srcm_eyes = tf.clip_by_value (gpu_target_srcm_all-1, 0, 1)
gpu_target_dstm_eyes = tf.clip_by_value (gpu_target_dstm_all-1, 0, 1)
gpu_target_srcm_blur = nn.tf_gaussian_blur(gpu_target_srcm, max(1, resolution // 32) )
gpu_target_dstm_blur = nn.tf_gaussian_blur(gpu_target_dstm, max(1, resolution // 32) )
@ -522,6 +531,10 @@ class SAEHDModel(ModelBase):
gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
if eyes_prio:
gpu_src_loss += tf.reduce_mean ( 300*tf.abs ( gpu_target_src*gpu_target_srcm_eyes - gpu_pred_src_src*gpu_target_srcm_eyes ), axis=[1,2,3])
if learn_mask:
gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )
@ -536,6 +549,10 @@ class SAEHDModel(ModelBase):
gpu_dst_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3])
if eyes_prio:
gpu_dst_loss += tf.reduce_mean ( 300*tf.abs ( gpu_target_dst*gpu_target_dstm_eyes - gpu_pred_dst_dst*gpu_target_dstm_eyes ), axis=[1,2,3])
if learn_mask:
gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] )
@ -606,15 +623,15 @@ class SAEHDModel(ModelBase):
# Initializing training and view functions
def src_dst_train(warped_src, target_src, target_srcm, \
warped_dst, target_dst, target_dstm):
def src_dst_train(warped_src, target_src, target_srcm_all, \
warped_dst, target_dst, target_dstm_all):
s, d, _ = nn.tf_sess.run ( [ src_loss, dst_loss, src_dst_loss_gv_op],
feed_dict={self.warped_src :warped_src,
self.target_src :target_src,
self.target_srcm:target_srcm,
self.target_srcm_all:target_srcm_all,
self.warped_dst :warped_dst,
self.target_dst :target_dst,
self.target_dstm:target_dstm,
self.target_dstm_all:target_dstm_all,
})
s = np.mean(s)
d = np.mean(d)
@ -722,14 +739,16 @@ class SAEHDModel(ModelBase):
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution, 'ct_mode': self.options['ct_mode'] },
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution, 'ct_mode': self.options['ct_mode'] },
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution } ],
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_EYES_HULL), 'data_format':nn.data_format, 'resolution': resolution },
],
generators_count=src_generators_count ),
SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
sample_process_options=SampleProcessor.Options(random_flip=self.random_flip),
output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution},
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'data_format':nn.data_format, 'resolution': resolution},
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_HULL), 'data_format':nn.data_format, 'resolution': resolution} ],
{'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_FACE_MASK_ALL_EYES_HULL), 'data_format':nn.data_format, 'resolution': resolution},
],
generators_count=dst_generators_count )
])
@ -748,23 +767,23 @@ class SAEHDModel(ModelBase):
#override
def onTrainOneIter(self):
( (warped_src, target_src, target_srcm), \
(warped_dst, target_dst, target_dstm) ) = self.generate_next_samples()
( (warped_src, target_src, target_srcm_all), \
(warped_dst, target_dst, target_dstm_all) ) = self.generate_next_samples()
src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm)
src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm_all, warped_dst, target_dst, target_dstm_all)
if self.options['true_face_power'] != 0 and not self.pretrain:
self.D_train (warped_src, warped_dst)
if self.gan_power != 0:
self.D_src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm)
self.D_src_dst_train (warped_src, target_src, target_srcm_all, warped_dst, target_dst, target_dstm_all)
return ( ('src_loss', src_loss), ('dst_loss', dst_loss), )
#override
def onGetPreview(self, samples):
( (warped_src, target_src, target_srcm),
(warped_dst, target_dst, target_dstm) ) = samples
( (warped_src, target_src, target_srcm_all,),
(warped_dst, target_dst, target_dstm_all,) ) = samples
if self.options['learn_mask']:
S, D, SS, DD, DDM, SD, SDM = [ np.clip( nn.to_data_format(x,"NHWC", self.model_data_format), 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
@ -772,7 +791,10 @@ class SAEHDModel(ModelBase):
else:
S, D, SS, DD, SD, = [ np.clip( nn.to_data_format(x,"NHWC", self.model_data_format) , 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ]
target_srcm, target_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm, target_dstm] )]
target_srcm_all, target_dstm_all = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm_all, target_dstm_all] )]
target_srcm = np.clip(target_srcm_all, 0, 1)
target_dstm = np.clip(target_dstm_all, 0, 1)
n_samples = min(4, self.get_batch_size(), 800 // self.resolution )

View file

@ -35,11 +35,12 @@ class SampleProcessor(object):
MODE_BGR = 40 #BGR
MODE_G = 41 #Grayscale
MODE_GGG = 42 #3xGrayscale
MODE_FACE_MASK_HULL = 43 #mask hull as grayscale
MODE_FACE_MASK_ALL_HULL = 43 #mask all hull as grayscale
MODE_FACE_MASK_EYES_HULL = 44 #mask eyes hull as grayscale
MODE_FACE_MASK_STRUCT = 45 #mask structure as grayscale
MODE_BGR_SHUFFLE = 46 #BGR shuffle
MODE_BGR_RANDOM_HSV_SHIFT = 47
MODE_FACE_MASK_ALL_EYES_HULL = 45 #combo all + eyes as grayscale
MODE_FACE_MASK_STRUCT = 46 #mask structure as grayscale
MODE_BGR_SHUFFLE = 47 #BGR shuffle
MODE_BGR_RANDOM_HSV_SHIFT = 48
MODE_END = 50
class Options(object):
@ -107,11 +108,13 @@ class SampleProcessor(object):
if target_face_type == SPTF.NONE:
raise ValueError("target face type must be defined for face samples")
else:
if mode_type == SPTF.MODE_FACE_MASK_HULL:
raise ValueError("MODE_FACE_MASK_HULL applicable only for face samples")
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL:
raise ValueError("MODE_FACE_MASK_ALL_HULL applicable only for face samples")
if mode_type == SPTF.MODE_FACE_MASK_EYES_HULL:
raise ValueError("MODE_FACE_MASK_EYES_HULL applicable only for face samples")
elif mode_type == SPTF.MODE_FACE_MASK_STRUCT:
if mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
raise ValueError("MODE_FACE_MASK_ALL_EYES_HULL applicable only for face samples")
if mode_type == SPTF.MODE_FACE_MASK_STRUCT:
raise ValueError("MODE_FACE_MASK_STRUCT applicable only for face samples")
can_warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED)
@ -141,16 +144,33 @@ class SampleProcessor(object):
if mode_type == SPTF.NONE:
raise ValueError ('expected MODE_ type')
if mode_type == SPTF.MODE_FACE_MASK_HULL:
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
if sample.eyebrows_expand_mod is not None:
img = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
all_mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
else:
img = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks)
all_mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks)
all_mask = np.clip(all_mask, 0, 1)
if mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
eyes_mask = LandmarksProcessor.get_image_eye_mask (sample_bgr.shape, sample.landmarks)
eyes_mask = np.clip(eyes_mask, 0, 1)
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL:
img = all_mask
elif mode_type == SPTF.MODE_FACE_MASK_EYES_HULL:
img = eyes_mask
elif mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL:
img = all_mask + eyes_mask
if sample.ie_polys is not None:
sample.ie_polys.overlay_mask(img)
elif mode_type == SPTF.MODE_FACE_MASK_EYES_HULL:
img = LandmarksProcessor.get_image_eye_mask (sample_bgr.shape, sample.landmarks)
elif mode_type == SPTF.MODE_FACE_MASK_STRUCT:
if sample.eyebrows_expand_mod is not None:
@ -186,12 +206,13 @@ class SampleProcessor(object):
if sample.face_type == FaceType.MARK_ONLY:
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft)
if mode_type == SPTF.MODE_FACE_MASK_HULL or \
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_STRUCT:
img = cv2.warpAffine( img, mat, (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False)
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )[...,None]
img = cv2.warpAffine( img, mat, (sample.shape[0],sample.shape[0]), flags=cv2.INTER_LINEAR )
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False, cv2_inter=cv2.INTER_LINEAR)
img = cv2.resize( img, (resolution,resolution), cv2.INTER_LINEAR )[...,None]
else:
img = cv2.warpAffine( img, mat, (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
@ -200,11 +221,12 @@ class SampleProcessor(object):
else:
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft)
if mode_type == SPTF.MODE_FACE_MASK_HULL or \
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_STRUCT:
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False)
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )[...,None]
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=False, cv2_inter=cv2.INTER_LINEAR)
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_LINEAR )[...,None]
else:
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC )
@ -213,10 +235,11 @@ class SampleProcessor(object):
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
if mode_type == SPTF.MODE_FACE_MASK_HULL or \
if mode_type == SPTF.MODE_FACE_MASK_ALL_HULL or \
mode_type == SPTF.MODE_FACE_MASK_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_ALL_EYES_HULL or \
mode_type == SPTF.MODE_FACE_MASK_STRUCT:
out_sample = np.clip(img.astype(np.float32), 0, 1)
out_sample = img.astype(np.float32)
else:
img = np.clip(img.astype(np.float32), 0, 1)