optimized face sample generator, CPU load is significantly reduced

SAEHD:

added new option
GAN power 0.0 .. 10.0
	Train the network in Generative Adversarial manner.
	Forces the neural network to learn small details of the face.
	You can enable/disable this option at any time,
	but better to enable it when the network is trained enough.
	Typical value is 1.0
	GAN power with pretrain mode will not work.

Example of enabling GAN on 81k iters +5k iters
https://i.imgur.com/OdXHLhU.jpg
https://i.imgur.com/CYAJmJx.jpg

dfhd: default Decoder dimensions are now 48
the preview for 256 res is now correctly displayed

fixed model naming/renaming/removing

Improvements for those involved in post-processing in AfterEffects:

Codec is reverted back to x264 in order to properly use in AfterEffects and video players.

Merger now always outputs the mask to workspace\data_dst\merged_mask

removed raw modes except raw-rgb
raw-rgb mode now outputs selected face mask_mode (before square mask)

'export alpha mask' button is replaced by 'show alpha mask'.
You can view the alpha mask without recompute the frames.

8) 'merged *.bat' now also output 'result_mask.' video file.
8) 'merged lossless' now uses x264 lossless codec (before PNG codec)
result_mask video file is always lossless.

Thus you can use result_mask video file as mask layer in the AfterEffects.
This commit is contained in:
Colombo 2020-01-28 12:24:45 +04:00
parent 80f285067a
commit 7386a9d6fd
28 changed files with 455 additions and 363 deletions

View file

@ -4,8 +4,10 @@ from pathlib import Path
from core.interact import interact as io
import traceback
#allows to open non-english characters path
def cv2_imread(filename, flags=cv2.IMREAD_UNCHANGED, loader_func=None):
"""
allows to open non-english characters path
"""
try:
if loader_func is not None:
bytes = bytearray(loader_func(filename))

View file

@ -11,7 +11,7 @@ from .warp import gen_warp_params, warp_by_params
from .reduce_colors import reduce_colors
from .color_transfer import color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone
from .color_transfer import color_transfer, color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone
from .common import normalize_channels, cut_odd_image, overlay_alpha_image

View file

@ -299,7 +299,7 @@ def linear_color_transfer(target_img, source_img, mode='pca', eps=1e-5):
matched_img += mu_s
matched_img[matched_img>1] = 1
matched_img[matched_img<0] = 0
return matched_img.astype(source_img.dtype)
return np.clip(matched_img.astype(source_img.dtype), 0, 1)
def lab_image_stats(image):
# compute the mean and standard deviation of each channel
@ -391,3 +391,24 @@ def color_transfer_mix(img_src,img_trg):
return (img_rct / 255.0).astype(np.float32)
def color_transfer(ct_mode, img_src, img_trg):
"""
color transfer for [0,1] float inputs
"""
if ct_mode == 'lct':
out = linear_color_transfer (img_src, img_trg)
elif ct_mode == 'rct':
out = reinhard_color_transfer ( np.clip( img_src*255, 0, 255 ).astype(np.uint8),
np.clip( img_trg*255, 0, 255 ).astype(np.uint8) )
out = np.clip( out.astype(np.float32) / 255.0, 0.0, 1.0)
elif ct_mode == 'mkl':
out = color_transfer_mkl (img_src, img_trg)
elif ct_mode == 'idt':
out = color_transfer_idt (img_src, img_trg)
elif ct_mode == 'sot':
out = color_transfer_sot (img_src, img_trg)
out = np.clip( out, 0.0, 1.0)
else:
raise ValueError(f"unknown ct_mode {ct_mode}")
return out

View file

@ -47,11 +47,13 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0
return params
def warp_by_params (params, img, warp, transform, flip, is_border_replicate):
if warp:
def warp_by_params (params, img, can_warp, can_transform, can_flip, border_replicate):
if can_warp:
img = cv2.remap(img, params['mapx'], params['mapy'], cv2.INTER_CUBIC )
if transform:
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if is_border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
if flip and params['flip']:
if can_transform:
img = cv2.warpAffine( img, params['rmat'], (params['w'], params['w']), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
if len(img.shape) == 2:
img = img[...,None]
if can_flip and params['flip']:
img = img[:,::-1,...]
return img

View file

@ -78,26 +78,7 @@ def initialize_layers(nn):
return True
def init_weights(self):
ops = []
ca_tuples_w = []
ca_tuples = []
for w in self.get_weights():
initializer = w.initializer
for input in initializer.inputs:
if "_cai_" in input.name:
ca_tuples_w.append (w)
ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) )
break
else:
ops.append (initializer)
if len(ops) != 0:
nn.tf_sess.run (ops)
if len(ca_tuples) != 0:
nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] )
nn.tf_init_weights(self.get_weights())
nn.Saveable = Saveable
class LayerBase():
@ -302,6 +283,7 @@ def initialize_layers(nn):
raise ValueError ("strides must be an int type")
if not isinstance(dilations, int):
raise ValueError ("dilations must be an int type")
kernel_size = int(kernel_size)
if dtype is None:
dtype = nn.tf_floatx
@ -405,6 +387,7 @@ def initialize_layers(nn):
def __init__(self, in_ch, out_ch, kernel_size, strides=2, padding='SAME', use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ):
if not isinstance(strides, int):
raise ValueError ("strides must be an int type")
kernel_size = int(kernel_size)
if dtype is None:
dtype = nn.tf_floatx

41
core/leras/models.py Normal file
View file

@ -0,0 +1,41 @@
def initialize_models(nn):
tf = nn.tf
class PatchDiscriminator(nn.ModelBase):
def on_build(self, patch_size, in_ch, base_ch=256, kernel_initializer=None):
prev_ch = in_ch
self.convs = []
for i, (kernel_size, strides) in enumerate(patch_discriminator_kernels[patch_size]):
cur_ch = base_ch * min( (2**i), 8 )
self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=kernel_size, strides=strides, padding='SAME', kernel_initializer=kernel_initializer) )
prev_ch = cur_ch
self.out_conv = nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=kernel_initializer)
def forward(self, x):
for conv in self.convs:
x = tf.nn.leaky_relu( conv(x), 0.1 )
return self.out_conv(x)
nn.PatchDiscriminator = PatchDiscriminator
patch_discriminator_kernels = \
{ 1 : [ [1,1] ],
2 : [ [2,1] ],
3 : [ [2,1], [2,1] ],
4 : [ [2,2], [2,2] ],
5 : [ [3,2], [2,2] ],
6 : [ [4,2], [2,2] ],
7 : [ [3,2], [3,2] ],
8 : [ [4,2], [3,2] ],
9 : [ [3,2], [4,2] ],
10 : [ [4,2], [4,2] ],
11 : [ [3,2], [3,2], [2,1] ],
12 : [ [4,2], [3,2], [2,1] ],
13 : [ [3,2], [4,2], [2,1] ],
14 : [ [4,2], [4,2], [2,1] ],
15 : [ [3,2], [3,2], [3,1] ],
16 : [ [4,2], [3,2], [3,1] ] }

View file

@ -46,6 +46,7 @@ class nn():
# Tensor ops
tf_get_value = None
tf_batch_set_value = None
tf_init_weights = None
tf_gradients = None
tf_average_gv_list = None
tf_average_tensor_list = None
@ -79,6 +80,9 @@ class nn():
TFBaseOptimizer = None
TFRMSpropOptimizer = None
# Models
PatchDiscriminator = None
@staticmethod
def initialize(device_config=None, floatx="float32", data_format="NHWC"):
@ -138,11 +142,13 @@ class nn():
from .layers import initialize_layers
from .initializers import initialize_initializers
from .optimizers import initialize_optimizers
from .models import initialize_models
initialize_tensor_ops(nn)
initialize_layers(nn)
initialize_initializers(nn)
initialize_optimizers(nn)
initialize_models(nn)
if nn.tf_sess is None:
nn.tf_sess = tf.Session(config=nn.tf_sess_config)

View file

@ -29,6 +29,27 @@ def initialize_tensor_ops(nn):
nn.tf_sess.run(assign_ops, feed_dict=feed_dict)
nn.tf_batch_set_value = tf_batch_set_value
def tf_init_weights(weights):
ops = []
ca_tuples_w = []
ca_tuples = []
for w in weights:
initializer = w.initializer
for input in initializer.inputs:
if "_cai_" in input.name:
ca_tuples_w.append (w)
ca_tuples.append ( (w.shape.as_list(), w.dtype.as_numpy_dtype) )
break
else:
ops.append (initializer)
if len(ops) != 0:
nn.tf_sess.run (ops)
if len(ca_tuples) != 0:
nn.tf_batch_set_value( [*zip(ca_tuples_w, nn.initializers.ca.generate_batch (ca_tuples))] )
nn.tf_init_weights = tf_init_weights
def tf_gradients ( loss, vars ):
grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True )

39
main.py
View file

@ -201,23 +201,23 @@ if __name__ == "__main__":
def process_merge(arguments):
osex.set_process_lowest_prio()
kwargs = {'model_class_name' : arguments.model_name,
'saved_models_path' : Path(arguments.model_dir),
'training_data_src_path' : Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None,
'force_model_name' : arguments.force_model_name,
'input_path' : Path(arguments.input_dir),
'output_path' : Path(arguments.output_dir),
'aligned_path' : Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None,
'cpu_only' : arguments.cpu_only,
'force_gpu_idxs' : arguments.force_gpu_idxs,
}
from mainscripts import Merger
Merger.main (**kwargs)
Merger.main ( model_class_name = arguments.model_name,
saved_models_path = Path(arguments.model_dir),
training_data_src_path = Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None,
force_model_name = arguments.force_model_name,
input_path = Path(arguments.input_dir),
output_path = Path(arguments.output_dir),
output_mask_path = Path(arguments.output_mask_dir),
aligned_path = Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None,
force_gpu_idxs = arguments.force_gpu_idxs,
cpu_only = arguments.cpu_only)
p = subparsers.add_parser( "merge", help="Merger")
p.add_argument('--training-data-src-dir', action=fixPathAction, dest="training_data_src_dir", default=None, help="(optional, may be required by some models) Dir of extracted SRC faceset.")
p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.")
p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the merged files will be stored.")
p.add_argument('--output-mask-dir', required=True, action=fixPathAction, dest="output_mask_dir", help="Output mask directory. This is where the mask files will be stored.")
p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", default=None, help="Aligned directory. This is where the extracted of dst faces stored.")
p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.")
p.add_argument('--model', required=True, dest="model_name", choices=pathex.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Model class name.")
@ -268,13 +268,14 @@ if __name__ == "__main__":
def process_videoed_video_from_sequence(arguments):
osex.set_process_lowest_prio()
from mainscripts import VideoEd
VideoEd.video_from_sequence (arguments.input_dir,
arguments.output_file,
arguments.reference_file,
arguments.ext,
arguments.fps,
arguments.bitrate,
arguments.lossless)
VideoEd.video_from_sequence (input_dir = arguments.input_dir,
output_file = arguments.output_file,
reference_file = arguments.reference_file,
ext = arguments.ext,
fps = arguments.fps,
bitrate = arguments.bitrate,
include_audio = arguments.include_audio,
lossless = arguments.lossless)
p = videoed_parser.add_parser( "video-from-sequence", help="Make video from image sequence.")
p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input file to be processed. Specify .*-extension to find first file.")
@ -283,7 +284,9 @@ if __name__ == "__main__":
p.add_argument('--ext', dest="ext", default='png', help="Image format (extension) of input files.")
p.add_argument('--fps', type=int, dest="fps", default=None, help="FPS of output file. Overwritten by reference-file.")
p.add_argument('--bitrate', type=int, dest="bitrate", default=None, help="Bitrate of output file in Megabits.")
p.add_argument('--include-audio', action="store_true", dest="include_audio", default=False, help="Include audio from reference file.")
p.add_argument('--lossless', action="store_true", dest="lossless", default=False, help="PNG codec.")
p.set_defaults(func=process_videoed_video_from_sequence)
def process_labelingtool_edit_mask(arguments):

View file

@ -297,7 +297,18 @@ class ExtractSubprocessor(Subprocessor):
if not cpu_only:
if type == 'landmarks-manual':
devices = [devices.get_best_device()]
result = [ (device.index, 'GPU', device.name, device.total_mem_gb) for device in devices ]
result = []
for device in devices:
count = 1
if count == 1:
result += [ (device.index, 'GPU', device.name, device.total_mem_gb) ]
else:
for i in range(count):
result += [ (device.index, 'GPU', f"{device.name} #{i}", device.total_mem_gb) ]
return result
else:
if type == 'landmarks-manual':

View file

@ -39,6 +39,7 @@ class MergeSubprocessor(Subprocessor):
self.frame_info = frame_info
self.next_temporal_frame_infos = next_temporal_frame_infos
self.output_filepath = None
self.output_mask_filepath = None
self.idx = None
self.cfg = None
@ -54,6 +55,7 @@ class MergeSubprocessor(Subprocessor):
frame_info=None,
next_temporal_frame_infos=None,
output_filepath=None,
output_mask_filepath=None,
need_return_image = False):
self.idx = idx
self.cfg = cfg
@ -61,6 +63,7 @@ class MergeSubprocessor(Subprocessor):
self.frame_info = frame_info
self.next_temporal_frame_infos = next_temporal_frame_infos
self.output_filepath = output_filepath
self.output_mask_filepath = output_mask_filepath
self.need_return_image = need_return_image
if self.need_return_image:
@ -123,35 +126,22 @@ class MergeSubprocessor(Subprocessor):
cfg.superres_func = self.superres_func
frame_info = pf.frame_info
filepath = frame_info.filepath
landmarks_list = frame_info.landmarks_list
output_filepath = pf.output_filepath
need_return_image = pf.need_return_image
if len(frame_info.landmarks_list) == 0:
self.log_info (f'no faces found for {filepath.name}, copying without faces')
if len(landmarks_list) == 0:
self.log_info ( 'no faces found for %s, copying without faces' % (filepath.name) )
if cfg.export_mask_alpha:
img_bgr = cv2_imread(filepath)
imagelib.normalize_channels(img_bgr, 3)
cv2_imwrite (pf.output_filepath, img_bgr)
h,w,c = img_bgr.shape
if c == 1:
img_bgr = np.repeat(img_bgr, 3, -1)
if c == 3:
img_bgr = np.concatenate ([img_bgr, np.zeros((h,w,1), dtype=img_bgr.dtype) ], axis=-1)
cv2_imwrite (output_filepath, img_bgr)
else:
if filepath.suffix == '.png':
shutil.copy ( str(filepath), str(output_filepath) )
else:
img_bgr = cv2_imread(filepath)
cv2_imwrite (output_filepath, img_bgr)
img_mask = np.zeros( (h,w,1), dtype=img_bgr.dtype)
cv2_imwrite (pf.output_mask_filepath, img_mask)
if pf.need_return_image:
pf.image = np.concatenate ([img_bgr, img_mask], axis=-1)
if need_return_image:
img_bgr = cv2_imread(filepath)
pf.image = img_bgr
else:
if cfg.type == MergerConfig.TYPE_MASKED:
cfg.fanseg_input_size = self.fanseg_input_size
@ -172,10 +162,10 @@ class MergeSubprocessor(Subprocessor):
pf.frame_info,
pf.next_temporal_frame_infos )
if output_filepath is not None and final_img is not None:
cv2_imwrite (output_filepath, final_img )
cv2_imwrite (pf.output_filepath, final_img[...,0:3] )
cv2_imwrite (pf.output_mask_filepath, final_img[...,3:4] )
if need_return_image:
if pf.need_return_image:
pf.image = final_img
return pf
@ -186,7 +176,7 @@ class MergeSubprocessor(Subprocessor):
return pf.frame_info.filepath
#override
def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, frames_root_path, output_path, model_iter):
def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, frames_root_path, output_path, output_mask_path, model_iter):
if len (frames) == 0:
raise ValueError ("len (frames) == 0")
@ -226,6 +216,7 @@ class MergeSubprocessor(Subprocessor):
self.frames_root_path = frames_root_path
self.output_path = output_path
self.output_mask_path = output_mask_path
self.model_iter = model_iter
self.prefetch_frame_count = self.process_count = min(6,multiprocessing.cpu_count())
@ -305,12 +296,17 @@ class MergeSubprocessor(Subprocessor):
for filename in pathex.get_image_paths(self.output_path): #remove all images in output_path
Path(filename).unlink()
for filename in pathex.get_image_paths(self.output_mask_path): #remove all images in output_mask_path
Path(filename).unlink()
frames[0].cfg = self.merger_config.copy()
for i in range( len(self.frames) ):
frame = self.frames[i]
frame.idx = i
frame.output_filepath = self.output_path / ( frame.frame_info.filepath.stem + '.png' )
frame.output_mask_filepath = self.output_mask_path / ( frame.frame_info.filepath.stem + '.png' )
#override
def process_info_generator(self):
@ -353,9 +349,6 @@ class MergeSubprocessor(Subprocessor):
'3' : lambda cfg,shift_pressed: cfg.set_mode(3),
'4' : lambda cfg,shift_pressed: cfg.set_mode(4),
'5' : lambda cfg,shift_pressed: cfg.set_mode(5),
'6' : lambda cfg,shift_pressed: cfg.set_mode(6),
'7' : lambda cfg,shift_pressed: cfg.set_mode(7),
'8' : lambda cfg,shift_pressed: cfg.set_mode(8),
'q' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(1 if not shift_pressed else 5),
'a' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(-1 if not shift_pressed else -5),
'w' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(1 if not shift_pressed else 5),
@ -379,7 +372,6 @@ class MergeSubprocessor(Subprocessor):
'x' : lambda cfg,shift_pressed: cfg.toggle_mask_mode(),
'c' : lambda cfg,shift_pressed: cfg.toggle_color_transfer_mode(),
'v' : lambda cfg,shift_pressed: cfg.toggle_super_resolution_mode(),
'b' : lambda cfg,shift_pressed: cfg.toggle_export_mask_alpha(),
'n' : lambda cfg,shift_pressed: cfg.toggle_sharpen_mode(),
}
self.masked_keys = list(self.masked_keys_funcs.keys())
@ -393,6 +385,7 @@ class MergeSubprocessor(Subprocessor):
for frame in self.frames:
frame.output_filepath = None
frame.output_mask_filepath = None
frame.image = None
session_data = {
@ -435,12 +428,19 @@ class MergeSubprocessor(Subprocessor):
io.log_info (cur_frame.cfg.to_string( cur_frame.frame_info.filepath.name) )
if cur_frame.image is None:
cur_frame.image = cv2_imread ( cur_frame.output_filepath)
if cur_frame.image is None:
image = cv2_imread (cur_frame.output_filepath)
image_mask = cv2_imread (cur_frame.output_mask_filepath)
if image is None or image_mask is None:
# unable to read? recompute then
cur_frame.is_done = False
cur_frame.is_shown = False
else:
image_mask = imagelib.normalize_channels(image_mask, 1)
cur_frame.image = np.concatenate([image, image_mask], -1)
if cur_frame.is_done:
self.main_screen.set_image(cur_frame.image)
else:
self.main_screen.set_waiting_icon(True)
@ -510,6 +510,8 @@ class MergeSubprocessor(Subprocessor):
self.screen_manager.get_current().diff_scale(-0.1)
elif chr_key == '=':
self.screen_manager.get_current().diff_scale(0.1)
elif chr_key == 'b':
self.screen_manager.get_current().toggle_show_checker_board()
if go_prev_frame:
if cur_frame is None or cur_frame.is_done:
@ -607,6 +609,7 @@ class MergeSubprocessor(Subprocessor):
frame_info=frame.frame_info,
next_temporal_frame_infos=frame.next_temporal_frame_infos,
output_filepath=frame.output_filepath,
output_mask_filepath=frame.output_mask_filepath,
need_return_image=True )
return None
@ -621,6 +624,7 @@ def main (model_class_name=None,
force_model_name=None,
input_path=None,
output_path=None,
output_mask_path=None,
aligned_path=None,
force_gpu_idxs=None,
cpu_only=None):
@ -634,6 +638,9 @@ def main (model_class_name=None,
if not output_path.exists():
output_path.mkdir(parents=True, exist_ok=True)
if not output_mask_path.exists():
output_mask_path.mkdir(parents=True, exist_ok=True)
if not saved_models_path.exists():
io.log_err('Model directory not found. Please ensure it exists.')
return
@ -783,6 +790,7 @@ def main (model_class_name=None,
frames = frames,
frames_root_path = input_path,
output_path = output_path,
output_mask_path = output_mask_path,
model_iter = model.get_iter()
).run()

View file

@ -30,6 +30,7 @@ class Screen(object):
self.scale = 1
self.force_update = True
self.is_first_appear = True
self.show_checker_board = False
self.last_screen_shape = (480,640,3)
self.checkerboard_image = None
@ -39,6 +40,10 @@ class Screen(object):
def set_waiting_icon(self, b):
self.waiting_icon = b
def toggle_show_checker_board(self):
self.show_checker_board = not self.show_checker_board
self.force_update = True
def set_image(self, img):
if not img is self.image:
self.force_update = True
@ -85,6 +90,9 @@ class Screen(object):
screen = cv2.resize ( screen, ( int(w*self.scale), int(h*self.scale) ) )
if c == 4:
if not self.show_checker_board:
screen = screen[...,0:3]
else:
if self.checkerboard_image is None or self.checkerboard_image.shape[0:2] != screen.shape[0:2]:
self.checkerboard_image = ScreenAssets.build_checkerboard_a(screen.shape)

View file

@ -68,7 +68,7 @@ def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, b
if bitrate is None:
bitrate = max (1, io.input_int ("Bitrate of output file in MB/s", 25) )
kwargs = {"c:v": "libx265",
kwargs = {"c:v": "libx264",
"b:v": "%dM" %(bitrate),
"pix_fmt": "yuv420p",
}
@ -113,7 +113,7 @@ def denoise_image_sequence( input_dir, ext=None, factor=None ):
except:
io.log_err ("ffmpeg fail, job commandline:" + str(job.compile()) )
def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, lossless=None ):
def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, fps=None, bitrate=None, include_audio=False, lossless=None ):
input_path = Path(input_dir)
output_file_path = Path(output_file)
reference_file_path = Path(reference_file) if reference_file is not None else None
@ -177,7 +177,7 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None,
output_args = [i_in]
if ref_in_a is not None:
if include_audio and ref_in_a is not None:
output_args += [ref_in_a]
output_args += [str (output_file_path)]
@ -185,14 +185,17 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None,
output_kwargs = {}
if lossless:
output_kwargs.update ({"c:v": "png"
output_kwargs.update ({"c:v": "libx264",
"crf": "0",
"pix_fmt": "yuv420p",
})
else:
output_kwargs.update ({"c:v": "libx265",
output_kwargs.update ({"c:v": "libx264",
"b:v": "%dM" %(bitrate),
"pix_fmt": "yuv420p",
})
if include_audio and ref_in_a is not None:
output_kwargs.update ({"c:a": "aac",
"b:a": "192k",
"ar" : "48000"

Binary file not shown.

Before

Width:  |  Height:  |  Size: 317 KiB

After

Width:  |  Height:  |  Size: 306 KiB

Before After
Before After

View file

@ -13,8 +13,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks)
if cfg.mode == 'original':
if cfg.export_mask_alpha:
img_bgr = np.concatenate ( [img_bgr, img_face_mask_a], -1 )
return img_bgr, img_face_mask_a
out_img = img_bgr.copy()
@ -106,28 +104,9 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
img_face_mask_aaa [ img_face_mask_aaa <= 0.1 ] = 0.0 #get rid of noise
if 'raw' in cfg.mode:
face_corner_pts = np.array ([ [0,0], [output_size-1,0], [output_size-1,output_size-1], [0,output_size-1] ], dtype=np.float32)
square_mask = np.zeros(img_bgr.shape, dtype=np.float32)
cv2.fillConvexPoly(square_mask, \
LandmarksProcessor.transform_points (face_corner_pts, face_output_mat, invert=True ).astype(np.int), \
(1,1,1) )
if cfg.mode == 'raw-rgb':
out_merging_mask = square_mask
if cfg.mode == 'raw-rgb' or cfg.mode == 'raw-rgb-mask':
out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, out_img, cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT )
if cfg.mode == 'raw-rgb-mask':
out_img = np.concatenate ( [out_img, np.expand_dims (img_face_mask_aaa[:,:,0],-1)], -1 )
out_merging_mask = square_mask
elif cfg.mode == 'raw-mask-only':
out_img = img_face_mask_aaa
out_merging_mask = img_face_mask_aaa
elif cfg.mode == 'raw-predicted-only':
out_img = cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, np.zeros(img_bgr.shape, dtype=np.float32), cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT )
out_merging_mask = square_mask
out_img = np.clip (out_img, 0.0, 1.0 )
else:
@ -176,14 +155,12 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
if 'seamless' not in cfg.mode and cfg.color_transfer_mode != 0:
if cfg.color_transfer_mode == 1: #rct
prd_face_bgr = imagelib.reinhard_color_transfer ( (prd_face_bgr*255).astype(np.uint8),
(dst_face_bgr*255).astype(np.uint8),
prd_face_bgr = imagelib.reinhard_color_transfer ( np.clip( prd_face_bgr*255, 0, 255).astype(np.uint8),
np.clip( dst_face_bgr*255, 0, 255).astype(np.uint8),
source_mask=prd_face_mask_a, target_mask=prd_face_mask_a)
prd_face_bgr = np.clip( prd_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
elif cfg.color_transfer_mode == 2: #lct
prd_face_bgr = imagelib.linear_color_transfer (prd_face_bgr, dst_face_bgr)
prd_face_bgr = np.clip( prd_face_bgr, 0.0, 1.0)
elif cfg.color_transfer_mode == 3: #mkl
prd_face_bgr = imagelib.color_transfer_mkl (prd_face_bgr, dst_face_bgr)
elif cfg.color_transfer_mode == 4: #mkl-m
@ -270,7 +247,6 @@ def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img
out_face_bgr = np.clip( out_face_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
elif cfg.color_transfer_mode == 2: #lct
out_face_bgr = imagelib.linear_color_transfer (out_face_bgr, dst_face_bgr)
out_face_bgr = np.clip( out_face_bgr, 0.0, 1.0)
elif cfg.color_transfer_mode == 3: #mkl
out_face_bgr = imagelib.color_transfer_mkl (out_face_bgr, dst_face_bgr)
elif cfg.color_transfer_mode == 4: #mkl-m
@ -356,7 +332,6 @@ def MergeMasked (predictor_func, predictor_input_shape, cfg, frame_info):
final_img = final_img*(1-merging_mask) + img*merging_mask
final_mask = np.clip (final_mask + merging_mask, 0, 1 )
if cfg.export_mask_alpha:
final_img = np.concatenate ( [final_img, final_mask], -1)
return (final_img*255).astype(np.uint8)

View file

@ -101,10 +101,7 @@ mode_dict = {0:'original',
2:'hist-match',
3:'seamless',
4:'seamless-hist-match',
5:'raw-rgb',
6:'raw-rgb-mask',
7:'raw-mask-only',
8:'raw-predicted-only'}
5:'raw-rgb',}
mode_str_dict = {}
@ -144,7 +141,6 @@ class MergerConfigMasked(MergerConfig):
image_denoise_power = 0,
bicubic_degrade_power = 0,
color_degrade_power = 0,
export_mask_alpha = False,
**kwargs
):
@ -158,6 +154,9 @@ class MergerConfigMasked(MergerConfig):
self.clip_hborder_mask_per = clip_hborder_mask_per
#default changeable params
if mode not in mode_str_dict:
mode = mode_dict[1]
self.mode = mode
self.masked_hist_match = masked_hist_match
self.hist_match_threshold = hist_match_threshold
@ -170,7 +169,6 @@ class MergerConfigMasked(MergerConfig):
self.image_denoise_power = image_denoise_power
self.bicubic_degrade_power = bicubic_degrade_power
self.color_degrade_power = color_degrade_power
self.export_mask_alpha = export_mask_alpha
def copy(self):
return copy.copy(self)
@ -217,9 +215,6 @@ class MergerConfigMasked(MergerConfig):
def add_bicubic_degrade_power(self, diff):
self.bicubic_degrade_power = np.clip ( self.bicubic_degrade_power+diff, 0, 100)
def toggle_export_mask_alpha(self):
self.export_mask_alpha = not self.export_mask_alpha
def ask_settings(self):
s = """Choose mode: \n"""
for key in mode_dict.keys():
@ -267,7 +262,6 @@ class MergerConfigMasked(MergerConfig):
self.image_denoise_power = np.clip ( io.input_int ("Choose image degrade by denoise power", 0, add_info="0..500"), 0, 500)
self.bicubic_degrade_power = np.clip ( io.input_int ("Choose image degrade by bicubic rescale power", 0, add_info="0..100"), 0, 100)
self.color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image", 0, add_info="0..100"), 0, 100)
self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask?", False)
io.log_info ("")
@ -287,8 +281,7 @@ class MergerConfigMasked(MergerConfig):
self.color_transfer_mode == other.color_transfer_mode and \
self.image_denoise_power == other.image_denoise_power and \
self.bicubic_degrade_power == other.bicubic_degrade_power and \
self.color_degrade_power == other.color_degrade_power and \
self.export_mask_alpha == other.export_mask_alpha
self.color_degrade_power == other.color_degrade_power
return False
@ -324,8 +317,7 @@ class MergerConfigMasked(MergerConfig):
if 'raw' not in self.mode:
r += (f"""image_denoise_power: {self.image_denoise_power}\n"""
f"""bicubic_degrade_power: {self.bicubic_degrade_power}\n"""
f"""color_degrade_power: {self.color_degrade_power}\n"""
f"""export_mask_alpha: {self.export_mask_alpha}\n""")
f"""color_degrade_power: {self.color_degrade_power}\n""")
r += "================"

View file

@ -113,8 +113,8 @@ class ModelBase(object):
self.model_name = saved_models_names[model_idx]
else:
self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "noname")
self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "new")
self.model_name = self.model_name.replace('_', ' ')
break
self.model_name = self.model_name + '_' + self.model_class_name
@ -159,8 +159,8 @@ class ModelBase(object):
#####
io.input_skip_pending()
self.on_initialize_options()
if self.is_first_run():
# save as default options only for first run model initialize
self.default_options_path.write_bytes( pickle.dumps (self.options) )
@ -173,6 +173,8 @@ class ModelBase(object):
self.on_initialize()
self.options['batch_size'] = self.batch_size
if self.is_training:
self.preview_history_path = self.saved_models_path / ( f'{self.get_model_name()}_history' )
self.autobackups_path = self.saved_models_path / ( f'{self.get_model_name()}_autobackups' )
@ -275,7 +277,7 @@ class ModelBase(object):
def ask_batch_size(self, suggest_batch_size=None):
default_batch_size = self.load_or_def_option('batch_size', suggest_batch_size or self.batch_size)
self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually."))
self.options['batch_size'] = self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually."))
#overridable

View file

@ -14,7 +14,7 @@ class QModel(ModelBase):
#override
def on_initialize(self):
device_config = nn.getCurrentDeviceConfig()
self.model_data_format = "NCHW" if len(device_config.devices) != 0 else "NHWC"
self.model_data_format = "NCHW" if len(device_config.devices) != 0 and not self.is_debug() else "NHWC"
nn.initialize(data_format=self.model_data_format)
tf = nn.tf
@ -167,9 +167,9 @@ class QModel(ModelBase):
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
input_nc = 3
output_nc = 3
bgr_shape = nn.get4Dshape(resolution,resolution,input_nc)
input_ch = 3
output_ch = 3
bgr_shape = nn.get4Dshape(resolution,resolution,input_ch)
mask_shape = nn.get4Dshape(resolution,resolution,1)
lowest_dense_res = resolution // 16
@ -189,7 +189,7 @@ class QModel(ModelBase):
# Initializing model classes
with tf.device (models_opt_device):
self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, name='encoder')
self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, name='encoder')
encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape))
self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, d_ch=d_dims, name='inter')
@ -262,7 +262,7 @@ class QModel(ModelBase):
gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur
gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur)
gpu_target_srcmasked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst
gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
@ -271,8 +271,8 @@ class QModel(ModelBase):
gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur
gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur)
gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )
gpu_dst_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1])
@ -282,8 +282,8 @@ class QModel(ModelBase):
gpu_src_losses += [gpu_src_loss]
gpu_dst_losses += [gpu_dst_loss]
gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss
gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ]
gpu_G_loss = gpu_src_loss + gpu_dst_loss
gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_G_loss, self.src_dst_trainable_weights ) ]
# Average losses and gradients, and create optimizer update ops
@ -362,10 +362,9 @@ class QModel(ModelBase):
training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path()
training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path()
cpu_count = multiprocessing.cpu_count()
cpu_count = min(multiprocessing.cpu_count(), 8)
src_generators_count = cpu_count // 2
dst_generators_count = cpu_count - src_generators_count
dst_generators_count = cpu_count // 2
self.set_training_data_generators ([
SampleGeneratorFace(training_data_src_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
@ -396,11 +395,12 @@ class QModel(ModelBase):
#override
def onTrainOneIter(self):
if self.get_iter() % 3 == 0 and self.last_samples is not None:
( (warped_src, target_src, target_srcm), \
(warped_dst, target_dst, target_dstm) ) = self.last_samples
src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm,
target_dst, target_dst, target_dstm)
warped_src = target_src
warped_dst = target_dst
else:
samples = self.last_samples = self.generate_next_samples()
( (warped_src, target_src, target_srcm), \
@ -440,8 +440,7 @@ class QModel(ModelBase):
return result
def predictor_func (self, face=None):
face = face[None,...]
face = nn.to_data_format(face, self.model_data_format, "NHWC")
face = nn.to_data_format(face[None,...], self.model_data_format, "NHWC")
bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x, "NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ]
mask = mask_dst_dstm[0] * mask_src_dstm[0]

View file

@ -33,7 +33,9 @@ class SAEHDModel(ModelBase):
default_archi = self.options['archi'] = self.load_or_def_option('archi', 'dfhd')
default_ae_dims = self.options['ae_dims'] = self.load_or_def_option('ae_dims', 256)
default_e_dims = self.options['e_dims'] = self.load_or_def_option('e_dims', 64)
default_d_dims = self.options['d_dims'] = self.load_or_def_option('d_dims', 64)
default_d_dims = 48 if self.options['archi'] == 'dfhd' else 64
default_d_dims = self.options['d_dims'] = self.load_or_def_option('d_dims', default_d_dims)
default_d_mask_dims = default_d_dims // 3
default_d_mask_dims += default_d_mask_dims % 2
@ -43,6 +45,7 @@ class SAEHDModel(ModelBase):
default_learn_mask = self.options['learn_mask'] = self.load_or_def_option('learn_mask', True)
default_lr_dropout = self.options['lr_dropout'] = self.load_or_def_option('lr_dropout', False)
default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True)
default_gan_power = self.options['gan_power'] = self.load_or_def_option('gan_power', 0.0)
default_true_face_power = self.options['true_face_power'] = self.load_or_def_option('true_face_power', 0.0)
default_face_style_power = self.options['face_style_power'] = self.load_or_def_option('face_style_power', 0.0)
default_bg_style_power = self.options['bg_style_power'] = self.load_or_def_option('bg_style_power', 0.0)
@ -87,13 +90,15 @@ class SAEHDModel(ModelBase):
self.options['lr_dropout'] = io.input_bool ("Use learning rate dropout", default_lr_dropout, help_message="When the face is trained enough, you can enable this option to get extra sharpness for less amount of iterations.")
self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness for less amount of iterations.")
self.options['gan_power'] = np.clip ( io.input_number ("GAN power", default_gan_power, add_info="0.0 .. 10.0", help_message="Train the network in Generative Adversarial manner. Accelerates the speed of training. Forces the neural network to learn small details of the face. You can enable/disable this option at any time. Typical value is 1.0"), 0.0, 10.0 )
if 'df' in self.options['archi']:
self.options['true_face_power'] = np.clip ( io.input_number (" 'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
self.options['true_face_power'] = np.clip ( io.input_number ("'True face' power.", default_true_face_power, add_info="0.0000 .. 1.0", help_message="Experimental option. Discriminates result face to be more like src face. Higher value - stronger discrimination. Typical value is 0.01 . Comparison - https://i.imgur.com/czScS9q.png"), 0.0, 1.0 )
else:
self.options['true_face_power'] = 0.0
self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 )
self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse. Typical value is 2.0"), 0.0, 100.0 )
self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.")
self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.")
self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.")
@ -110,7 +115,7 @@ class SAEHDModel(ModelBase):
#override
def on_initialize(self):
device_config = nn.getCurrentDeviceConfig()
self.model_data_format = "NCHW" if len(device_config.devices) != 0 else "NHWC"
self.model_data_format = "NCHW" if len(device_config.devices) != 0 and not self.is_debug() else "NHWC"
nn.initialize(floatx="float16" if self.options['use_float16'] else "float32",
data_format=self.model_data_format)
tf = nn.tf
@ -136,10 +141,8 @@ class SAEHDModel(ModelBase):
def forward(self, x):
x = self.conv1(x)
if self.subpixel:
x = nn.tf_space_to_depth(x, 2)
if self.use_activator:
x = tf.nn.leaky_relu(x, 0.1)
return x
@ -332,7 +335,7 @@ class SAEHDModel(ModelBase):
device_config = nn.getCurrentDeviceConfig()
devices = device_config.devices
resolution = self.options['resolution']
self.resolution = resolution = self.options['resolution']
learn_mask = self.options['learn_mask']
archi = self.options['archi']
ae_dims = self.options['ae_dims']
@ -341,15 +344,17 @@ class SAEHDModel(ModelBase):
d_mask_dims = self.options['d_mask_dims']
self.pretrain = self.options['pretrain']
self.gan_power = gan_power = self.options['gan_power'] if not self.pretrain else 0.0
masked_training = True
models_opt_on_gpu = False if len(devices) != 1 else self.options['models_opt_on_gpu']
models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0'
optimizer_vars_on_cpu = models_opt_device=='/CPU:0'
input_nc = 3
output_nc = 3
bgr_shape = nn.get4Dshape(resolution,resolution,input_nc)
input_ch = 3
output_ch = 3
bgr_shape = nn.get4Dshape(resolution,resolution,input_ch)
mask_shape = nn.get4Dshape(resolution,resolution,1)
lowest_dense_res = resolution // 16
@ -370,7 +375,7 @@ class SAEHDModel(ModelBase):
# Initializing model classes
with tf.device (models_opt_device):
if 'df' in archi:
self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape))
self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, name='inter')
@ -386,11 +391,11 @@ class SAEHDModel(ModelBase):
if self.is_training:
if self.options['true_face_power'] != 0:
self.dis = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' )
self.model_filename_list += [ [self.dis, 'dis.npy'] ]
self.code_discriminator = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' )
self.model_filename_list += [ [self.code_discriminator, 'code_discriminator.npy'] ]
elif 'liae' in archi:
self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
self.encoder = Encoder(in_ch=input_ch, e_ch=e_dims, is_hd='hd' in archi, name='encoder')
encoder_out_ch = self.encoder.compute_output_channels ( (nn.tf_floatx, bgr_shape))
self.inter_AB = Inter(in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims*2, name='inter_AB')
@ -407,6 +412,12 @@ class SAEHDModel(ModelBase):
[self.decoder , 'decoder.npy'] ]
if self.is_training:
if gan_power != 0:
self.D_src = nn.PatchDiscriminator(patch_size=resolution//16, in_ch=output_ch, base_ch=512, name="D_src")
self.D_dst = nn.PatchDiscriminator(patch_size=resolution//16, in_ch=output_ch, base_ch=512, name="D_dst")
self.model_filename_list += [ [self.D_src, 'D_src.npy'] ]
self.model_filename_list += [ [self.D_dst, 'D_dst.npy'] ]
# Initialize optimizers
lr=5e-5
lr_dropout = 0.3 if self.options['lr_dropout'] else 1.0
@ -424,9 +435,14 @@ class SAEHDModel(ModelBase):
self.src_dst_opt.initialize_variables (self.src_dst_all_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu)
if self.options['true_face_power'] != 0:
self.D_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_opt')
self.D_opt.initialize_variables ( self.dis.get_weights(), vars_on_cpu=optimizer_vars_on_cpu)
self.model_filename_list += [ (self.D_opt, 'D_opt.npy') ]
self.D_code_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_code_opt')
self.D_code_opt.initialize_variables ( self.code_discriminator.get_weights(), vars_on_cpu=optimizer_vars_on_cpu)
self.model_filename_list += [ (self.D_code_opt, 'D_code_opt.npy') ]
if gan_power != 0:
self.D_src_dst_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_src_dst_opt')
self.D_src_dst_opt.initialize_variables ( self.D_src.get_weights()+self.D_dst.get_weights(), vars_on_cpu=optimizer_vars_on_cpu)
self.model_filename_list += [ (self.D_src_dst_opt, 'D_src_dst_opt.npy') ]
if self.is_training:
# Adjust batch size for multiple GPU
@ -445,9 +461,9 @@ class SAEHDModel(ModelBase):
gpu_src_losses = []
gpu_dst_losses = []
gpu_src_dst_loss_gvs = []
gpu_D_loss_gvs = []
gpu_G_loss_gvs = []
gpu_D_code_loss_gvs = []
gpu_D_src_dst_loss_gvs = []
for gpu_id in range(gpu_count):
with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ):
@ -497,7 +513,7 @@ class SAEHDModel(ModelBase):
gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur
gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur)
gpu_target_srcmasked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
gpu_target_src_masked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src
gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst
gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src
@ -506,8 +522,8 @@ class SAEHDModel(ModelBase):
gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur
gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur)
gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_src_masked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1])
gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_src_masked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3])
if learn_mask:
gpu_src_loss += tf.reduce_mean ( 10*tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] )
@ -528,26 +544,48 @@ class SAEHDModel(ModelBase):
gpu_src_losses += [gpu_src_loss]
gpu_dst_losses += [gpu_dst_loss]
gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss
gpu_G_loss = gpu_src_loss + gpu_dst_loss
if self.options['true_face_power'] != 0:
def DLoss(labels,logits):
return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits), axis=[1,2,3])
gpu_src_code_d = self.dis( gpu_src_code )
gpu_src_code_d_ones = tf.ones_like(gpu_src_code_d)
if self.options['true_face_power'] != 0:
gpu_src_code_d = self.code_discriminator( gpu_src_code )
gpu_src_code_d_ones = tf.ones_like (gpu_src_code_d)
gpu_src_code_d_zeros = tf.zeros_like(gpu_src_code_d)
gpu_dst_code_d = self.dis( gpu_dst_code )
gpu_dst_code_d = self.code_discriminator( gpu_dst_code )
gpu_dst_code_d_ones = tf.ones_like(gpu_dst_code_d)
gpu_src_dst_loss += self.options['true_face_power']*DLoss(gpu_src_code_d_ones, gpu_src_code_d)
gpu_G_loss += self.options['true_face_power']*DLoss(gpu_src_code_d_ones, gpu_src_code_d)
gpu_D_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \
gpu_D_code_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \
DLoss(gpu_src_code_d_zeros, gpu_src_code_d) ) * 0.5
gpu_D_loss_gvs += [ nn.tf_gradients (gpu_D_loss, self.dis.get_weights() ) ]
gpu_D_code_loss_gvs += [ nn.tf_gradients (gpu_D_code_loss, self.code_discriminator.get_weights() ) ]
gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ]
if gan_power != 0:
gpu_pred_src_src_d = self.D_src(gpu_pred_src_src_masked_opt)
gpu_pred_src_src_d_ones = tf.ones_like (gpu_pred_src_src_d)
gpu_pred_src_src_d_zeros = tf.zeros_like(gpu_pred_src_src_d)
gpu_target_src_d = self.D_src(gpu_target_src_masked_opt)
gpu_target_src_d_ones = tf.ones_like(gpu_target_src_d)
gpu_pred_dst_dst_d = self.D_dst(gpu_pred_dst_dst_masked_opt)
gpu_pred_dst_dst_d_ones = tf.ones_like (gpu_pred_dst_dst_d)
gpu_pred_dst_dst_d_zeros = tf.zeros_like(gpu_pred_dst_dst_d)
gpu_target_dst_d = self.D_dst(gpu_target_dst_masked_opt)
gpu_target_dst_d_ones = tf.ones_like(gpu_target_dst_d)
gpu_D_src_dst_loss = (DLoss(gpu_target_src_d_ones , gpu_target_src_d) + \
DLoss(gpu_pred_src_src_d_zeros, gpu_pred_src_src_d) ) * 0.5 + \
(DLoss(gpu_target_dst_d_ones , gpu_target_dst_d) + \
DLoss(gpu_pred_dst_dst_d_zeros, gpu_pred_dst_dst_d) ) * 0.5
gpu_D_src_dst_loss_gvs += [ nn.tf_gradients (gpu_D_src_dst_loss, self.D_src.get_weights()+self.D_dst.get_weights() ) ]
gpu_G_loss += gan_power*(DLoss(gpu_pred_src_src_d_ones, gpu_pred_src_src_d) + DLoss(gpu_pred_dst_dst_d_ones, gpu_pred_dst_dst_d))
gpu_G_loss_gvs += [ nn.tf_gradients ( gpu_G_loss, self.src_dst_trainable_weights ) ]
# Average losses and gradients, and create optimizer update ops
@ -558,15 +596,15 @@ class SAEHDModel(ModelBase):
pred_src_srcm = nn.tf_concat(gpu_pred_src_srcm_list, 0)
pred_dst_dstm = nn.tf_concat(gpu_pred_dst_dstm_list, 0)
pred_src_dstm = nn.tf_concat(gpu_pred_src_dstm_list, 0)
src_loss = nn.tf_average_tensor_list(gpu_src_losses)
dst_loss = nn.tf_average_tensor_list(gpu_dst_losses)
src_dst_loss_gv = nn.tf_average_gv_list (gpu_src_dst_loss_gvs)
src_dst_loss_gv_op = self.src_dst_opt.get_update_op (src_dst_loss_gv )
src_dst_loss_gv_op = self.src_dst_opt.get_update_op (nn.tf_average_gv_list (gpu_G_loss_gvs))
if self.options['true_face_power'] != 0:
D_loss_gv = nn.tf_average_gv_list(gpu_D_loss_gvs)
D_loss_gv_op = self.D_opt.get_update_op (D_loss_gv )
D_loss_gv_op = self.D_code_opt.get_update_op (nn.tf_average_gv_list(gpu_D_code_loss_gvs))
if gan_power != 0:
src_D_src_dst_loss_gv_op = self.D_src_dst_opt.get_update_op (nn.tf_average_gv_list(gpu_D_src_dst_loss_gvs) )
# Initializing training and view functions
@ -590,6 +628,17 @@ class SAEHDModel(ModelBase):
nn.tf_sess.run ([D_loss_gv_op], feed_dict={self.warped_src: warped_src, self.warped_dst: warped_dst})
self.D_train = D_train
if gan_power != 0:
def D_src_dst_train(warped_src, target_src, target_srcm, \
warped_dst, target_dst, target_dstm):
nn.tf_sess.run ([src_D_src_dst_loss_gv_op], feed_dict={self.warped_src :warped_src,
self.target_src :target_src,
self.target_srcm:target_srcm,
self.warped_dst :warped_dst,
self.target_dst :target_dst,
self.target_dstm:target_dstm})
self.D_src_dst_train = D_src_dst_train
if learn_mask:
def AE_view(warped_src, warped_dst):
return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm],
@ -663,12 +712,11 @@ class SAEHDModel(ModelBase):
t_img_warped = t.IMG_WARPED_TRANSFORMED if self.options['random_warp'] else t.IMG_TRANSFORMED
cpu_count = multiprocessing.cpu_count()
cpu_count = min(multiprocessing.cpu_count(), 8)
src_generators_count = cpu_count // 2
dst_generators_count = cpu_count // 2
if self.options['ct_mode'] != 'none':
src_generators_count = int(src_generators_count * 1.5)
dst_generators_count = cpu_count - src_generators_count
self.set_training_data_generators ([
SampleGeneratorFace(training_data_src_path, random_ct_samples_path=random_ct_samples_path, debug=self.is_debug(), batch_size=self.get_batch_size(),
@ -706,6 +754,9 @@ class SAEHDModel(ModelBase):
if self.options['true_face_power'] != 0 and not self.pretrain:
self.D_train (warped_src, warped_dst)
if self.gan_power != 0:
self.D_src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm)
return ( ('src_loss', src_loss), ('dst_loss', dst_loss), )
#override
@ -721,7 +772,8 @@ class SAEHDModel(ModelBase):
target_srcm, target_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format) for x in ([target_srcm, target_dstm] )]
n_samples = min(4, self.get_batch_size() )
n_samples = min(4, self.get_batch_size(), 800 // self.resolution )
result = []
st = []
for i in range(n_samples):
@ -742,8 +794,7 @@ class SAEHDModel(ModelBase):
return result
def predictor_func (self, face=None):
face = face[None,...]
face = nn.to_data_format(face, self.model_data_format, "NHWC")
face = nn.to_data_format(face[None,...], self.model_data_format, "NHWC")
if self.options['learn_mask']:
bgr, mask_dst_dstm, mask_src_dstm = [ nn.to_data_format(x,"NHWC", self.model_data_format).astype(np.float32) for x in self.AE_merge (face) ]

View file

@ -3,7 +3,7 @@ import shutil
import struct
from pathlib import Path
import samplelib.SampleHost
import samplelib.SampleLoader
from core.interact import interact as io
from samplelib import Sample
from core import pathex
@ -34,7 +34,7 @@ class PackedFaceset():
else:
image_paths = pathex.get_image_paths(samples_path)
samples = samplelib.SampleHost.load_face_samples(image_paths)
samples = samplelib.SampleLoader.load_face_samples(image_paths)
samples_len = len(samples)
samples_configs = []

View file

@ -9,7 +9,7 @@ import numpy as np
from core import mplib
from core.joblib import SubprocessGenerator, ThisThreadGenerator
from facelib import LandmarksProcessor
from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
SampleType)
@ -39,7 +39,7 @@ class SampleGeneratorFace(SampleGeneratorBase):
else:
self.generators_count = max(1, generators_count)
samples = SampleHost.load (SampleType.FACE, self.samples_path)
samples = SampleLoader.load (SampleType.FACE, self.samples_path)
self.samples_len = len(samples)
if self.samples_len == 0:
@ -48,7 +48,7 @@ class SampleGeneratorFace(SampleGeneratorBase):
index_host = mplib.IndexHost(self.samples_len)
if random_ct_samples_path is not None:
ct_samples = SampleHost.load (SampleType.FACE, random_ct_samples_path)
ct_samples = SampleLoader.load (SampleType.FACE, random_ct_samples_path)
ct_index_host = mplib.IndexHost( len(ct_samples) )
else:
ct_samples = None

View file

@ -8,7 +8,7 @@ import numpy as np
from core import mplib
from core.joblib import SubprocessGenerator, ThisThreadGenerator
from facelib import LandmarksProcessor
from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
SampleType)
@ -33,7 +33,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
raise NotImplementedError("Currently SampleGeneratorFacePerson is not implemented.")
samples_host = SampleHost.mp_host (SampleType.FACE, self.samples_path)
samples_host = SampleLoader.mp_host (SampleType.FACE, self.samples_path)
samples = samples_host.get_list()
self.samples_len = len(samples)
@ -98,7 +98,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase):
@staticmethod
def get_person_id_max_count(samples_path):
return SampleHost.get_person_id_max_count(samples_path)
return SampleLoader.get_person_id_max_count(samples_path)
"""
if self.person_id_mode==1:

View file

@ -9,7 +9,7 @@ import numpy as np
from core import mplib
from core.joblib import SubprocessGenerator, ThisThreadGenerator
from facelib import LandmarksProcessor
from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
SampleType)
@ -31,7 +31,7 @@ class SampleGeneratorFaceTemporal(SampleGeneratorBase):
else:
self.generators_count = generators_count
samples = SampleHost.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path)
samples = SampleLoader.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path)
samples_len = len(samples)
if samples_len == 0:
raise ValueError('No training data provided.')

View file

@ -4,7 +4,7 @@ import cv2
import numpy as np
from core.joblib import SubprocessGenerator, ThisThreadGenerator
from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor,
from samplelib import (SampleGeneratorBase, SampleLoader, SampleProcessor,
SampleType)
@ -22,7 +22,7 @@ class SampleGeneratorImageTemporal(SampleGeneratorBase):
self.sample_process_options = sample_process_options
self.output_sample_types = output_sample_types
self.samples = SampleHost.load (SampleType.IMAGE, self.samples_path)
self.samples = SampleLoader.load (SampleType.IMAGE, self.samples_path)
self.generator_samples = [ self.samples ]
self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, 0 )] if self.debug else \

View file

@ -14,7 +14,7 @@ from facelib import FaceType, LandmarksProcessor
from .Sample import Sample, SampleType
class SampleHost:
class SampleLoader:
samples_cache = dict()
@staticmethod
def get_person_id_max_count(samples_path):
@ -33,7 +33,7 @@ class SampleHost:
@staticmethod
def load(sample_type, samples_path):
samples_cache = SampleHost.samples_cache
samples_cache = SampleLoader.samples_cache
if str(samples_path) not in samples_cache.keys():
samples_cache[str(samples_path)] = [None]*SampleType.QTY
@ -55,12 +55,12 @@ class SampleHost:
io.log_info (f"Loaded {len(result)} packed faces from {samples_path}")
if result is None:
result = SampleHost.load_face_samples( pathex.get_image_paths(samples_path) )
result = SampleLoader.load_face_samples( pathex.get_image_paths(samples_path) )
samples[sample_type] = result
elif sample_type == SampleType.FACE_TEMPORAL_SORTED:
result = SampleHost.load (SampleType.FACE, samples_path)
result = SampleHost.upgradeToFaceTemporalSortedSamples(result)
result = SampleLoader.load (SampleType.FACE, samples_path)
result = SampleLoader.upgradeToFaceTemporalSortedSamples(result)
samples[sample_type] = result
return samples[sample_type]

View file

@ -101,7 +101,6 @@ class SampleProcessor(object):
for sample in samples:
sample_bgr = sample.load_bgr()
ct_sample_bgr = None
ct_sample_mask = None
h,w,c = sample_bgr.shape
is_face_sample = sample.landmarks is not None
@ -117,10 +116,6 @@ class SampleProcessor(object):
resolution = opts.get('resolution', 0)
types = opts.get('types', [] )
border_replicate = opts.get('border_replicate', True)
random_sub_res = opts.get('random_sub_res', 0)
normalize_std_dev = opts.get('normalize_std_dev', False)
normalize_vgg = opts.get('normalize_vgg', False)
motion_blur = opts.get('motion_blur', None)
gaussian_blur = opts.get('gaussian_blur', None)
@ -131,7 +126,6 @@ class SampleProcessor(object):
img_type = SPTF.NONE
target_face_type = SPTF.NONE
face_mask_type = SPTF.NONE
mode_type = SPTF.NONE
for t in types:
if t >= SPTF.IMG_TYPE_BEGIN and t < SPTF.IMG_TYPE_END:
@ -141,6 +135,12 @@ class SampleProcessor(object):
elif t >= SPTF.MODE_BEGIN and t < SPTF.MODE_END:
mode_type = t
if mode_type == SPTF.MODE_M and not is_face_sample:
raise ValueError("MODE_M applicable only for face samples")
can_warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED)
can_transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED)
if img_type == SPTF.NONE:
raise ValueError ('expected IMG_ type')
@ -148,7 +148,7 @@ class SampleProcessor(object):
l = sample.landmarks
l = np.concatenate ( [ np.expand_dims(l[:,0] / w,-1), np.expand_dims(l[:,1] / h,-1) ], -1 )
l = np.clip(l, 0.0, 1.0)
img = l
out_sample = l
elif img_type == SPTF.IMG_PITCH_YAW_ROLL or img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID:
pitch_yaw_roll = sample.get_pitch_yaw_roll()
@ -156,44 +156,29 @@ class SampleProcessor(object):
yaw = -yaw
if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID:
pitch = np.clip( (pitch / math.pi) / 2.0 + 1.0, 0, 1)
yaw = np.clip( (yaw / math.pi) / 2.0 + 1.0, 0, 1)
roll = np.clip( (roll / math.pi) / 2.0 + 1.0, 0, 1)
pitch = np.clip( (pitch / math.pi) / 2.0 + 0.5, 0, 1)
yaw = np.clip( (yaw / math.pi) / 2.0 + 0.5, 0, 1)
roll = np.clip( (roll / math.pi) / 2.0 + 0.5, 0, 1)
img = (pitch, yaw, roll)
out_sample = (pitch, yaw, roll)
else:
if mode_type == SPTF.NONE:
raise ValueError ('expected MODE_ type')
def do_transform(img, mask):
warp = (img_type==SPTF.IMG_WARPED or img_type==SPTF.IMG_WARPED_TRANSFORMED)
transform = (img_type==SPTF.IMG_WARPED_TRANSFORMED or img_type==SPTF.IMG_TRANSFORMED)
flip = img_type != SPTF.IMG_WARPED
need_img = mode_type != SPTF.MODE_M
need_mask = mode_type == SPTF.MODE_M
img = imagelib.warp_by_params (params, img, warp, transform, flip, border_replicate)
if mask is not None:
mask = imagelib.warp_by_params (params, mask, warp, transform, flip, False)
if len(mask.shape) == 2:
mask = mask[...,np.newaxis]
return img, mask
img = sample_bgr
### Prepare a mask
mask = None
if is_face_sample:
if need_mask:
if sample.eyebrows_expand_mod is not None:
mask = LandmarksProcessor.get_image_hull_mask (img.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks, eyebrows_expand_mod=sample.eyebrows_expand_mod )
else:
mask = LandmarksProcessor.get_image_hull_mask (img.shape, sample.landmarks)
mask = LandmarksProcessor.get_image_hull_mask (sample_bgr.shape, sample.landmarks)
if sample.ie_polys is not None:
sample.ie_polys.overlay_mask(mask)
##################
if need_img:
img = sample_bgr
if motion_blur is not None:
chance, mb_max_size = motion_blur
chance = np.clip(chance, 0, 100)
@ -214,99 +199,78 @@ class SampleProcessor(object):
raise Exception ('sample %s type %s does not match model requirement %s. Consider extract necessary type of faces.' % (sample.filename, sample.face_type, target_ft) )
if sample.face_type == FaceType.MARK_ONLY:
#first warp to target facetype
img = cv2.warpAffine( img, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
mask = cv2.warpAffine( mask, LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0]), flags=cv2.INTER_CUBIC )
#then apply transforms
img, mask = do_transform (img, mask)
img = np.concatenate( (img, mask ), -1 )
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
else:
img, mask = do_transform (img, mask)
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, sample.shape[0], target_ft), (sample.shape[0],sample.shape[0])
if need_img:
img = cv2.warpAffine( img, mat, flags=cv2.INTER_CUBIC )
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
if need_mask:
mask = cv2.warpAffine( mask, mat, flags=cv2.INTER_CUBIC )
mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None]
else:
mat = LandmarksProcessor.get_transform_mat (sample.landmarks, resolution, target_ft)
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=(cv2.BORDER_REPLICATE if border_replicate else cv2.BORDER_CONSTANT), flags=cv2.INTER_CUBIC )
mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )
img = np.concatenate( (img, mask[...,None] ), -1 )
if need_img:
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
img = cv2.warpAffine( img, mat, (resolution,resolution), borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC )
if need_mask:
mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
mask = cv2.warpAffine( mask, mat, (resolution,resolution), borderMode=cv2.BORDER_CONSTANT, flags=cv2.INTER_CUBIC )[...,None]
else:
img, mask = do_transform (img, mask)
img = np.concatenate( (img, mask ), -1 )
if need_img:
img = imagelib.warp_by_params (params, img, can_warp, can_transform, can_flip=True, border_replicate=True)
img = cv2.resize( img, (resolution,resolution), cv2.INTER_CUBIC )
if random_sub_res != 0:
sub_size = resolution - random_sub_res
rnd_state = np.random.RandomState (sample_rnd_seed+random_sub_res)
start_x = rnd_state.randint(sub_size+1)
start_y = rnd_state.randint(sub_size+1)
img = img[start_y:start_y+sub_size,start_x:start_x+sub_size,:]
if need_mask:
mask = imagelib.warp_by_params (params, mask, can_warp, can_transform, can_flip=True, border_replicate=False)
mask = cv2.resize( mask, (resolution,resolution), cv2.INTER_CUBIC )[...,None]
if mode_type == SPTF.MODE_M:
out_sample = np.clip(mask, 0, 1).astype(np.float32)
else:
img = np.clip(img, 0, 1).astype(np.float32)
img_bgr = img[...,0:3]
img_mask = img[...,3:4]
if ct_mode is not None and ct_sample is not None:
if ct_sample_bgr is None:
ct_sample_bgr = ct_sample.load_bgr()
ct_sample_bgr_resized = cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR )
if ct_mode == 'lct':
img_bgr = imagelib.linear_color_transfer (img_bgr, ct_sample_bgr_resized)
img_bgr = np.clip( img_bgr, 0.0, 1.0)
elif ct_mode == 'rct':
img_bgr = imagelib.reinhard_color_transfer ( np.clip( (img_bgr*255).astype(np.uint8), 0, 255),
np.clip( (ct_sample_bgr_resized*255).astype(np.uint8), 0, 255) )
img_bgr = np.clip( img_bgr.astype(np.float32) / 255.0, 0.0, 1.0)
elif ct_mode == 'mkl':
img_bgr = imagelib.color_transfer_mkl (img_bgr, ct_sample_bgr_resized)
elif ct_mode == 'idt':
img_bgr = imagelib.color_transfer_idt (img_bgr, ct_sample_bgr_resized)
elif ct_mode == 'sot':
img_bgr = imagelib.color_transfer_sot (img_bgr, ct_sample_bgr_resized)
img_bgr = np.clip( img_bgr, 0.0, 1.0)
if normalize_std_dev:
img_bgr = (img_bgr - img_bgr.mean( (0,1)) ) / img_bgr.std( (0,1) )
elif normalize_vgg:
img_bgr = np.clip(img_bgr*255, 0, 255)
img_bgr[:,:,0] -= 103.939
img_bgr[:,:,1] -= 116.779
img_bgr[:,:,2] -= 123.68
img = imagelib.color_transfer (ct_mode,
img,
cv2.resize( ct_sample_bgr, (resolution,resolution), cv2.INTER_LINEAR ) )
if mode_type == SPTF.MODE_BGR:
img = img_bgr
out_sample = img
elif mode_type == SPTF.MODE_BGR_SHUFFLE:
rnd_state = np.random.RandomState (sample_rnd_seed)
img = np.take (img_bgr, rnd_state.permutation(img_bgr.shape[-1]), axis=-1)
out_sample = np.take (img, rnd_state.permutation(img.shape[-1]), axis=-1)
elif mode_type == SPTF.MODE_BGR_RANDOM_HSV_SHIFT:
rnd_state = np.random.RandomState (sample_rnd_seed)
hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV)
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
h, s, v = cv2.split(hsv)
h = (h + rnd_state.randint(360) ) % 360
s = np.clip ( s + rnd_state.random()-0.5, 0, 1 )
v = np.clip ( v + rnd_state.random()-0.5, 0, 1 )
hsv = cv2.merge([h, s, v])
img = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 )
out_sample = np.clip( cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) , 0, 1 )
elif mode_type == SPTF.MODE_G:
img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)[...,None]
out_sample = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)[...,None]
elif mode_type == SPTF.MODE_GGG:
img = np.repeat ( np.expand_dims(cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY),-1), (3,), -1)
elif mode_type == SPTF.MODE_M and is_face_sample:
img = img_mask
out_sample = np.repeat ( np.expand_dims(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY),-1), (3,), -1)
if not debug:
if normalize_tanh:
img = np.clip (img * 2.0 - 1.0, -1.0, 1.0)
else:
img = np.clip (img, 0.0, 1.0)
out_sample = np.clip (out_sample * 2.0 - 1.0, -1.0, 1.0)
if data_format == "NCHW":
img = np.transpose(img, (2,0,1) )
out_sample = np.transpose(out_sample, (2,0,1) )
outputs_sample.append ( img )
outputs_sample.append ( out_sample )
outputs += [outputs_sample]
return outputs

View file

@ -1,6 +1,6 @@
from .Sample import Sample
from .Sample import SampleType
from .SampleHost import SampleHost
from .SampleLoader import SampleLoader
from .SampleProcessor import SampleProcessor
from .SampleGeneratorBase import SampleGeneratorBase
from .SampleGeneratorFace import SampleGeneratorFace