diff --git a/DFLIMG/DFLJPG.py b/DFLIMG/DFLJPG.py index 2d2ba56..1ed8692 100644 --- a/DFLIMG/DFLJPG.py +++ b/DFLIMG/DFLJPG.py @@ -4,9 +4,10 @@ import struct import cv2 import numpy as np +from core.interact import interact as io +from core.structex import * from facelib import FaceType -from utils.struct_utils import * -from interact import interact as io + class DFLJPG(object): def __init__(self): @@ -243,6 +244,7 @@ class DFLJPG(object): source_landmarks=source_landmarks, image_to_face_mat=image_to_face_mat, fanseg_mask=fanseg_mask, + eyebrows_expand_mod=eyebrows_expand_mod, relighted=relighted) def remove_ie_polys(self): @@ -322,4 +324,3 @@ class DFLJPG(object): return self.dfl_dict.get ('eyebrows_expand_mod', None) def get_relighted(self): return self.dfl_dict.get ('relighted', False) - diff --git a/README.md b/README.md index 1e54c71..e04188a 100644 --- a/README.md +++ b/README.md @@ -1,60 +1,77 @@ -![](doc/DFL_welcome.jpg) + + -[На русском](doc/manual_ru.pdf) -- ### [Windows Desktop App](doc/doc_windows_desktop_app.md) + -- ### [Build and repository info](doc/doc_build_and_repository_info.md) + -[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted) - -bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr + +
+ +

+ +# DeepFaceLab +### the leading software for creating deep fakes -![](doc/logo_cuda.jpg)![](doc/logo_opencl.jpg)![](doc/logo_keras.jpg)![](doc/logo_tensorflow.jpg)![](doc/logo_plaidml.jpg) -#deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets +
-## **DeepFaceLab** is a tool that utilizes machine learning to replace faces in videos. +More than 95% of deepfake videos are created with DeepFaceLab. -- ### [Gallery](doc/gallery/doc_gallery.md) +DeepFaceLab is used by such popular youtube channels as -- ### Manuals: +|[Ctrl Shift Face](https://www.youtube.com/channel/UCKpH0CKltc73e4wh0_pgL3g)|[Sham00k](https://www.youtube.com/channel/UCZXbWcv7fSZFTAZV4beckyw/videos)|[Collider videos](https://www.youtube.com/watch?v=A91P2qtPT54&list=PLayt6616lBclvOprvrC8qKGCO-mAhPRux)|[VFXChris Ume](https://www.youtube.com/channel/UCGf4OlX_aTt8DlrgiH3jN3g/videos)| +|---|---|---|---| -[English (google translated)](doc/manual_en_google_translated.pdf) +
-- ### Forks +## Releases -[Google Colab fork](https://github.com/chervonij/DFL-Colab) by @chervonij -[Linux fork](https://github.com/lbfs/DeepFaceLab_Linux) by @lbfs - may be outdated +|||| +|---|---|---| +|Windows|[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci)|if the download quota is exceeded, add the file to your own google drive and download from it| +||[Torrent page](https://rutracker.org/forum/viewtopic.php?t=5558863)|Use bittorrent client.| +|Google Colab|[github](https://github.com/chervonij/DFL-Colab)|by @chervonij . You can train fakes for free using Google Colab.| +|Linux|[github](https://github.com/lbfs/DeepFaceLab_Linux)|by @lbfs. May be outdated| +|||| -- ### [Ready to work facesets](doc/doc_ready_to_work_facesets.md) +
-- ### How I can help the project? +## Links -If you like this software, please consider a donation. -GOAL: next DeepFacelab update. +|||| +|---|---|---| +|Guides and tutorials|[mrdeepfakes](https://mrdeepfakes.com/forums/forum-guides-and-tutorials)|| +|||| +|Ready to work facesets|[mrdeepfakes](https://mrdeepfakes.com/forums/forum-celebrity-facesets)|| +|||| +|Communication groups|[telegram (English / Russian)](https://t.me/DeepFaceLab_official)|Don't forget to hide your phone number.| +||[mrdeepfakes](https://mrdeepfakes.com/forums/)|the biggest (Not) Safe For Work English community| +||QQ 951138799| Chinese QQ group for ML/AI experts|| +||[deepfaker.xyz](https://www.deepfaker.xyz)|Chinesse guys are localizing DeepFaceLab| +||[reddit r/GifFakes/](https://www.reddit.com/r/GifFakes/new/)|Post your deepfakes there !| +||[reddit r/SFWdeepfakes/](https://www.reddit.com/r/SFWdeepfakes/new/)|Post your deepfakes there !| -[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065) +
+ +## How I can help the project? -![](doc/example_faceset.jpg) +|||| +|---|---|---| +|Donate|If you like this software, please consider a donation. Current **GOAL**: next DeepFaceLab update.|| +||[Donate via Paypal](https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=lepersorium@gmail.com&lc=US&no_note=0&item_name=Support+DeepFaceLab&cn=&curency_code=USD&bn=PP-DonationsBF:btn_donateCC_LG.gif:NonHosted) +||[Donate via Yandex.Money](https://money.yandex.ru/to/41001142318065)|| +||bitcoin:31mPd6DxPCzbpCMZk4k1koWAbErSyqkAXr|| +|||| +|Collect facesets|You can collect faceset of any celebrity that can be used in DeepFaceLab and share it [in the community](https://mrdeepfakes.com/forums/forum-celebrity-facesets)| -You can collect faceset of any celebrities that can be used in DeepFaceLab (described in manual) +
+ +

-and share it here [mrdeepfakes celebrity-facesets](https://mrdeepfakes.com/forums/forum-celebrity-facesets) - -- ### Communication groups: - -[telegram (English / Russian)](https://t.me/DeepFaceLab_official) - -[mrdeepfakes (English)](https://mrdeepfakes.com/forums/) - the biggest SFW and NSFW community - -(Chinese) QQ group 951138799 for ML/AI experts - -[deepfakes (Chinese)](https://deepfakescn.com) - -[reddit r/GifFakes/ (English)](https://www.reddit.com/r/GifFakes/new/) - -[reddit r/SFWdeepfakes/ (English)](https://www.reddit.com/r/SFWdeepfakes/new/) +#deepfacelab #deepfakes #faceswap #face-swap #deep-learning #deeplearning #deep-neural-networks #deepface #deep-face-swap #fakeapp #fake-app #neural-networks #neural-nets +
diff --git a/converters/__init__.py b/converters/__init__.py deleted file mode 100644 index 8aa1057..0000000 --- a/converters/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .FrameInfo import FrameInfo -from .ConverterConfig import ConverterConfig, ConverterConfigMasked, ConverterConfigFaceAvatar -from .ConvertMasked import ConvertMasked -from .ConvertAvatar import ConvertFaceAvatar diff --git a/utils/cv2_utils.py b/core/cv2ex.py similarity index 100% rename from utils/cv2_utils.py rename to core/cv2ex.py diff --git a/imagelib/IEPolys.py b/core/imagelib/IEPolys.py similarity index 100% rename from imagelib/IEPolys.py rename to core/imagelib/IEPolys.py diff --git a/imagelib/__init__.py b/core/imagelib/__init__.py similarity index 86% rename from imagelib/__init__.py rename to core/imagelib/__init__.py index 7a1ed9b..6c0236f 100644 --- a/imagelib/__init__.py +++ b/core/imagelib/__init__.py @@ -13,9 +13,7 @@ from .reduce_colors import reduce_colors from .color_transfer import color_transfer_mix, color_transfer_sot, color_transfer_mkl, color_transfer_idt, color_hist_match, reinhard_color_transfer, linear_color_transfer, seamless_clone -from .RankSRGAN import RankSRGAN - -from .common import normalize_channels, overlay_alpha_image +from .common import normalize_channels, cut_odd_image, overlay_alpha_image from .IEPolys import IEPolys diff --git a/imagelib/blur.py b/core/imagelib/blur.py similarity index 100% rename from imagelib/blur.py rename to core/imagelib/blur.py diff --git a/imagelib/color_transfer.py b/core/imagelib/color_transfer.py similarity index 100% rename from imagelib/color_transfer.py rename to core/imagelib/color_transfer.py diff --git a/imagelib/common.py b/core/imagelib/common.py similarity index 88% rename from imagelib/common.py rename to core/imagelib/common.py index 2e66441..d73df8b 100644 --- a/imagelib/common.py +++ b/core/imagelib/common.py @@ -23,7 +23,14 @@ def normalize_channels(img, target_channels): c = target_channels return img - + +def cut_odd_image(img): + h, w, c = img.shape + wm, hm = w % 2, h % 2 + if wm + hm != 0: + img = img[0:h-hm,0:w-wm,:] + return img + def overlay_alpha_image(img_target, img_source, xy_offset=(0,0) ): (h,w,c) = img_source.shape if c != 4: diff --git a/imagelib/draw.py b/core/imagelib/draw.py similarity index 100% rename from imagelib/draw.py rename to core/imagelib/draw.py diff --git a/imagelib/equalize_and_stack_square.py b/core/imagelib/equalize_and_stack_square.py similarity index 100% rename from imagelib/equalize_and_stack_square.py rename to core/imagelib/equalize_and_stack_square.py diff --git a/imagelib/estimate_sharpness.py b/core/imagelib/estimate_sharpness.py similarity index 100% rename from imagelib/estimate_sharpness.py rename to core/imagelib/estimate_sharpness.py diff --git a/imagelib/morph.py b/core/imagelib/morph.py similarity index 100% rename from imagelib/morph.py rename to core/imagelib/morph.py diff --git a/imagelib/reduce_colors.py b/core/imagelib/reduce_colors.py similarity index 100% rename from imagelib/reduce_colors.py rename to core/imagelib/reduce_colors.py diff --git a/imagelib/text.py b/core/imagelib/text.py similarity index 100% rename from imagelib/text.py rename to core/imagelib/text.py diff --git a/imagelib/warp.py b/core/imagelib/warp.py similarity index 88% rename from imagelib/warp.py rename to core/imagelib/warp.py index a3c9490..d5d79b4 100644 --- a/imagelib/warp.py +++ b/core/imagelib/warp.py @@ -1,6 +1,6 @@ import numpy as np import cv2 -from utils import random_utils +from core import randomex def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0.5], tx_range=[-0.05, 0.05], ty_range=[-0.05, 0.05], rnd_seed=None ): h,w,c = source.shape @@ -26,8 +26,8 @@ def gen_warp_params (source, flip, rotation_range=[-10,10], scale_range=[-0.5, 0 mapx = np.broadcast_to(grid_points, (cell_count, cell_count)).copy() mapy = mapx.T - mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) - mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + random_utils.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + mapx[1:-1,1:-1] = mapx[1:-1,1:-1] + randomex.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) + mapy[1:-1,1:-1] = mapy[1:-1,1:-1] + randomex.random_normal( size=(cell_count-2, cell_count-2) )*(cell_size*0.24) half_cell_size = cell_size // 2 diff --git a/interact/__init__.py b/core/interact/__init__.py similarity index 100% rename from interact/__init__.py rename to core/interact/__init__.py diff --git a/interact/interact.py b/core/interact/interact.py similarity index 74% rename from interact/interact.py rename to core/interact/interact.py index dce7c37..46fd450 100644 --- a/interact/interact.py +++ b/core/interact/interact.py @@ -4,6 +4,7 @@ import sys import time import types +import colorama import cv2 from tqdm import tqdm @@ -16,6 +17,8 @@ try: except: is_colab = False +yn_str = {True:'y',False:'n'} + class InteractBase(object): EVENT_LBUTTONDOWN = 1 EVENT_LBUTTONUP = 2 @@ -186,13 +189,35 @@ class InteractBase(object): ar = self.key_events.get(wnd_name, []) self.key_events[wnd_name] = [] return ar + + def input(self, s): + return input(s) - def input_number(self, s, default_value, valid_list=None, help_message=None): + def input_number(self, s, default_value, valid_list=None, show_default_value=True, add_info=None, help_message=None): + if show_default_value and default_value is not None: + s = f"[{default_value}] {s}" + + if add_info is not None or \ + help_message is not None: + s += " (" + + if add_info is not None: + s += f" {add_info}" + if help_message is not None: + s += " ?:help" + + if add_info is not None or \ + help_message is not None: + s += " )" + + s += " : " + while True: try: inp = input(s) if len(inp) == 0: - raise ValueError("") + result = default_value + break if help_message is not None and inp == '?': print (help_message) @@ -200,13 +225,39 @@ class InteractBase(object): i = float(inp) if (valid_list is not None) and (i not in valid_list): - return default_value - return i + result = default_value + break + result = i + break except: - print (default_value) - return default_value - - def input_int(self,s, default_value, valid_list=None, help_message=None): + result = default_value + break + + print(result) + return result + + def input_int(self, s, default_value, valid_list=None, add_info=None, show_default_value=True, help_message=None): + if show_default_value: + if len(s) != 0: + s = f"[{default_value}] {s}" + else: + s = f"[{default_value}]" + + if add_info is not None or \ + help_message is not None: + s += " (" + + if add_info is not None: + s += f" {add_info}" + if help_message is not None: + s += " ?:help" + + if add_info is not None or \ + help_message is not None: + s += " )" + + s += " : " + while True: try: inp = input(s) @@ -219,13 +270,23 @@ class InteractBase(object): i = int(inp) if (valid_list is not None) and (i not in valid_list): - return default_value - return i + result = default_value + break + result = i + break except: - print (default_value) - return default_value + result = default_value + break + print (result) + return result - def input_bool(self, s, default_value, help_message=None): + def input_bool(self, s, default_value, help_message=None): + s = f"[{yn_str[default_value]}] {s} ( y/n" + + if help_message is not None: + s += " ?:help" + s += " ) : " + while True: try: inp = input(s) @@ -236,35 +297,65 @@ class InteractBase(object): print (help_message) continue - return bool ( {"y":True,"n":False,"1":True,"0":False}.get(inp.lower(), default_value) ) + return bool ( {"y":True,"n":False}.get(inp.lower(), default_value) ) except: print ( "y" if default_value else "n" ) return default_value - def input_str(self, s, default_value, valid_list=None, help_message=None): + def input_str(self, s, default_value=None, valid_list=None, show_default_value=True, help_message=None): + if show_default_value and default_value is not None: + s = f"[{default_value}] {s}" + + if valid_list is not None or \ + help_message is not None: + s += " (" + + if valid_list is not None: + s += " " + "/".join(valid_list) + + if help_message is not None: + s += " ?:help" + + if valid_list is not None or \ + help_message is not None: + s += " )" + + s += " : " + + while True: try: inp = input(s) + if len(inp) == 0: - raise ValueError("") - + if default_value is None: + print("") + return None + result = default_value + break + if help_message is not None and inp == '?': - print (help_message) + print(help_message) continue - + if valid_list is not None: if inp.lower() in valid_list: - return inp.lower() + result = inp.lower() + break if inp in valid_list: - return inp - return default_value - - return inp + result = inp + break + continue + result = inp + break except: - print (default_value) - return default_value - + result = default_value + break + + print(result) + return result + def input_process(self, stdin_fd, sq, str): sys.stdin = os.fdopen(stdin_fd) try: @@ -292,7 +383,14 @@ class InteractBase(object): class InteractDesktop(InteractBase): - + def __init__(self): + colorama.init() + super().__init__() + + def color_red(self): + pass + + def is_support_windows(self): return True @@ -347,9 +445,20 @@ class InteractDesktop(InteractBase): ord_key = cv2.waitKey(wait_key_time) shift_pressed = False if ord_key != -1: - if chr(ord_key) >= 'A' and chr(ord_key) <= 'Z': + chr_key = chr(ord_key) + + if chr_key >= 'A' and chr_key <= 'Z': shift_pressed = True ord_key += 32 + elif chr_key == '?': + shift_pressed = True + ord_key = ord('/') + elif chr_key == '<': + shift_pressed = True + ord_key = ord(',') + elif chr_key == '>': + shift_pressed = True + ord_key = ord('.') else: if sleep_time != 0: time.sleep(sleep_time) diff --git a/joblib/SubprocessFunctionCaller.py b/core/joblib/SubprocessFunctionCaller.py similarity index 100% rename from joblib/SubprocessFunctionCaller.py rename to core/joblib/SubprocessFunctionCaller.py diff --git a/utils/iter_utils.py b/core/joblib/SubprocessGenerator.py similarity index 75% rename from utils/iter_utils.py rename to core/joblib/SubprocessGenerator.py index e690e3b..b0d893e 100644 --- a/utils/iter_utils.py +++ b/core/joblib/SubprocessGenerator.py @@ -1,28 +1,8 @@ -import threading import queue as Queue import multiprocessing -import time - - -class ThisThreadGenerator(object): - def __init__(self, generator_func, user_param=None): - super().__init__() - self.generator_func = generator_func - self.user_param = user_param - self.initialized = False - - def __iter__(self): - return self - - def __next__(self): - if not self.initialized: - self.initialized = True - self.generator_func = self.generator_func(self.user_param) - - return next(self.generator_func) class SubprocessGenerator(object): - def __init__(self, generator_func, user_param=None, prefetch=2, start_now=False): + def __init__(self, generator_func, user_param=None, prefetch=2, start_now=True): super().__init__() self.prefetch = prefetch self.generator_func = generator_func diff --git a/joblib/SubprocessorBase.py b/core/joblib/SubprocessorBase.py similarity index 99% rename from joblib/SubprocessorBase.py rename to core/joblib/SubprocessorBase.py index a9cbc36..993d5cb 100644 --- a/joblib/SubprocessorBase.py +++ b/core/joblib/SubprocessorBase.py @@ -2,7 +2,7 @@ import traceback import multiprocessing import time import sys -from interact import interact as io +from core.interact import interact as io class Subprocessor(object): @@ -87,7 +87,7 @@ class Subprocessor(object): c2s.put ( {'op': 'error', 'data' : data} ) #overridable - def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0, io_loop_sleep_time=0.005, initialize_subprocesses_in_serial=True): + def __init__(self, name, SubprocessorCli_class, no_response_time_sec = 0, io_loop_sleep_time=0.005, initialize_subprocesses_in_serial=False): if not issubclass(SubprocessorCli_class, Subprocessor.Cli): raise ValueError("SubprocessorCli_class must be subclass of Subprocessor.Cli") @@ -130,7 +130,7 @@ class Subprocessor(object): #overridable def get_result(self): #return result that will be returned in func run() - raise NotImplementedError + return None #overridable def on_tick(self): diff --git a/core/joblib/ThisThreadGenerator.py b/core/joblib/ThisThreadGenerator.py new file mode 100644 index 0000000..e6f77a4 --- /dev/null +++ b/core/joblib/ThisThreadGenerator.py @@ -0,0 +1,16 @@ +class ThisThreadGenerator(object): + def __init__(self, generator_func, user_param=None): + super().__init__() + self.generator_func = generator_func + self.user_param = user_param + self.initialized = False + + def __iter__(self): + return self + + def __next__(self): + if not self.initialized: + self.initialized = True + self.generator_func = self.generator_func(self.user_param) + + return next(self.generator_func) \ No newline at end of file diff --git a/joblib/__init__.py b/core/joblib/__init__.py similarity index 50% rename from joblib/__init__.py rename to core/joblib/__init__.py index fbbc20c..68b3101 100644 --- a/joblib/__init__.py +++ b/core/joblib/__init__.py @@ -1,2 +1,4 @@ from .SubprocessorBase import Subprocessor from .SubprocessFunctionCaller import SubprocessFunctionCaller +from .ThisThreadGenerator import ThisThreadGenerator +from .SubprocessGenerator import SubprocessGenerator \ No newline at end of file diff --git a/core/leras/__init__.py b/core/leras/__init__.py new file mode 100644 index 0000000..7d9fb2b --- /dev/null +++ b/core/leras/__init__.py @@ -0,0 +1 @@ +from .nn import nn \ No newline at end of file diff --git a/core/leras/device.py b/core/leras/device.py new file mode 100644 index 0000000..e18ea2a --- /dev/null +++ b/core/leras/device.py @@ -0,0 +1,205 @@ +import sys +import ctypes +import os + +class Device(object): + def __init__(self, index, name, total_mem, free_mem, cc=0): + self.index = index + self.name = name + self.cc = cc + self.total_mem = total_mem + self.total_mem_gb = total_mem / 1024**3 + self.free_mem = free_mem + self.free_mem_gb = free_mem / 1024**3 + + def __str__(self): + return f"[{self.index}]:[{self.name}][{self.free_mem_gb:.3}/{self.total_mem_gb :.3}]" + +class Devices(object): + all_devices = None + + def __init__(self, devices): + self.devices = devices + + def __len__(self): + return len(self.devices) + + def __getitem__(self, key): + result = self.devices[key] + if isinstance(key, slice): + return Devices(result) + return result + + def __iter__(self): + for device in self.devices: + yield device + + def get_best_device(self): + result = None + idx_mem = 0 + for device in self.devices: + mem = device.total_mem + if mem > idx_mem: + result = device + idx_mem = mem + return result + + def get_worst_device(self): + result = None + idx_mem = sys.maxsize + for device in self.devices: + mem = device.total_mem + if mem < idx_mem: + result = device + idx_mem = mem + return result + + def get_device_by_index(self, idx): + for device in self.devices: + if device.index == idx: + return device + return None + + def get_devices_from_index_list(self, idx_list): + result = [] + for device in self.devices: + if device.index in idx_list: + result += [device] + return Devices(result) + + def get_equal_devices(self, device): + device_name = device.name + result = [] + for device in self.devices: + if device.name == device_name: + result.append (device) + return Devices(result) + + def get_devices_at_least_mem(self, totalmemsize_gb): + result = [] + for device in self.devices: + if device.total_mem >= totalmemsize_gb*(1024**3): + result.append (device) + return Devices(result) + + @staticmethod + def initialize_main_env(): + min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35)) + libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll') + for libname in libnames: + try: + cuda = ctypes.CDLL(libname) + except: + continue + else: + break + else: + return Devices([]) + + nGpus = ctypes.c_int() + name = b' ' * 200 + cc_major = ctypes.c_int() + cc_minor = ctypes.c_int() + freeMem = ctypes.c_size_t() + totalMem = ctypes.c_size_t() + + result = ctypes.c_int() + device = ctypes.c_int() + context = ctypes.c_void_p() + error_str = ctypes.c_char_p() + + devices = [] + + if cuda.cuInit(0) == 0 and \ + cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0: + for i in range(nGpus.value): + if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \ + cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \ + cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0: + continue + + if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0: + if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0: + cc = cc_major.value * 10 + cc_minor.value + if cc >= min_cc: + devices.append ( {'name' : name.split(b'\0', 1)[0].decode(), + 'total_mem' : totalMem.value, + 'free_mem' : freeMem.value, + 'cc' : cc + }) + cuda.cuCtxDetach(context) + + os.environ['NN_DEVICES_INITIALIZED'] = '1' + os.environ['NN_DEVICES_COUNT'] = str(len(devices)) + for i, device in enumerate(devices): + os.environ[f'NN_DEVICE_{i}_NAME'] = device['name'] + os.environ[f'NN_DEVICE_{i}_TOTAL_MEM'] = str(device['total_mem']) + os.environ[f'NN_DEVICE_{i}_FREE_MEM'] = str(device['free_mem']) + os.environ[f'NN_DEVICE_{i}_CC'] = str(device['cc']) + + @staticmethod + def getDevices(): + if Devices.all_devices is None: + if int(os.environ.get("NN_DEVICES_INITIALIZED", 0)) != 1: + raise Exception("nn devices are not initialized. Run initialize_main_env() in main process.") + devices = [] + for i in range ( int(os.environ['NN_DEVICES_COUNT']) ): + devices.append ( Device(index=i, + name=os.environ[f'NN_DEVICE_{i}_NAME'], + total_mem=int(os.environ[f'NN_DEVICE_{i}_TOTAL_MEM']), + free_mem=int(os.environ[f'NN_DEVICE_{i}_FREE_MEM']), + cc=int(os.environ[f'NN_DEVICE_{i}_CC']) )) + Devices.all_devices = Devices(devices) + + return Devices.all_devices + +""" +if Devices.all_devices is None: + min_cc = int(os.environ.get("TF_MIN_REQ_CAP", 35)) + + libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll') + for libname in libnames: + try: + cuda = ctypes.CDLL(libname) + except: + continue + else: + break + else: + return Devices([]) + + nGpus = ctypes.c_int() + name = b' ' * 200 + cc_major = ctypes.c_int() + cc_minor = ctypes.c_int() + freeMem = ctypes.c_size_t() + totalMem = ctypes.c_size_t() + + result = ctypes.c_int() + device = ctypes.c_int() + context = ctypes.c_void_p() + error_str = ctypes.c_char_p() + + devices = [] + + if cuda.cuInit(0) == 0 and \ + cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0: + for i in range(nGpus.value): + if cuda.cuDeviceGet(ctypes.byref(device), i) != 0 or \ + cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device) != 0 or \ + cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device) != 0: + continue + + if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device) == 0: + if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0: + cc = cc_major.value * 10 + cc_minor.value + if cc >= min_cc: + devices.append ( Device(index=i, + name=name.split(b'\0', 1)[0].decode(), + total_mem=totalMem.value, + free_mem=freeMem.value, + cc=cc) ) + cuda.cuCtxDetach(context) + Devices.all_devices = Devices(devices) + return Devices.all_devices +""" \ No newline at end of file diff --git a/core/leras/initializers.py b/core/leras/initializers.py new file mode 100644 index 0000000..a3294cb --- /dev/null +++ b/core/leras/initializers.py @@ -0,0 +1,52 @@ +import numpy as np + +def initialize_initializers(nn): + tf = nn.tf + from tensorflow.python.ops import init_ops + + class initializers(): + class ca (init_ops.Initializer): + def __init__(self, dtype=None): + pass + + def __call__(self, shape, dtype=None, partition_info=None): + return tf.zeros( shape, name="_cai_") + + @staticmethod + def generate(shape, eps_std=0.05, dtype=np.float32): + """ + Super fast implementation of Convolution Aware Initialization for 4D shapes + Convolution Aware Initialization https://arxiv.org/abs/1702.06295 + """ + if len(shape) != 4: + raise ValueError("only shape with rank 4 supported.") + + row, column, stack_size, filters_size = shape + + fan_in = stack_size * (row * column) + + kernel_shape = (row, column) + + kernel_fft_shape = np.fft.rfft2(np.zeros(kernel_shape)).shape + + basis_size = np.prod(kernel_fft_shape) + if basis_size == 1: + x = np.random.normal( 0.0, eps_std, (filters_size, stack_size, basis_size) ) + else: + nbb = stack_size // basis_size + 1 + x = np.random.normal(0.0, 1.0, (filters_size, nbb, basis_size, basis_size)) + x = x + np.transpose(x, (0,1,3,2) ) * (1-np.eye(basis_size)) + u, _, v = np.linalg.svd(x) + x = np.transpose(u, (0,1,3,2) ) + x = np.reshape(x, (filters_size, -1, basis_size) ) + x = x[:,:stack_size,:] + + x = np.reshape(x, ( (filters_size,stack_size,) + kernel_fft_shape ) ) + + x = np.fft.irfft2( x, kernel_shape ) \ + + np.random.normal(0, eps_std, (filters_size,stack_size,)+kernel_shape) + + x = x * np.sqrt( (2/fan_in) / np.var(x) ) + x = np.transpose( x, (2, 3, 1, 0) ) + return x.astype(dtype) + nn.initializers = initializers \ No newline at end of file diff --git a/core/leras/layers.py b/core/leras/layers.py new file mode 100644 index 0000000..7597ccf --- /dev/null +++ b/core/leras/layers.py @@ -0,0 +1,591 @@ +import pickle +import types +from pathlib import Path +from core import pathex +from core.interact import interact as io +import numpy as np + + +def initialize_layers(nn): + tf = nn.tf + + class Saveable(): + def __init__(self, name=None): + self.name = name + + #override + def get_weights(self): + #return tf tensors that should be initialized/loaded/saved + pass + + def save_weights(self, filename, force_dtype=None): + d = {} + weights = self.get_weights() + + if self.name is None: + raise Exception("name must be defined.") + + name = self.name + for w, w_val in zip(weights, nn.tf_sess.run (weights)): + w_name_split = w.name.split('/', 1) + if name != w_name_split[0]: + raise Exception("weight first name != Saveable.name") + + if force_dtype is not None: + w_val = w_val.astype(force_dtype) + + d[ w_name_split[1] ] = w_val + + d_dumped = pickle.dumps (d, 4) + pathex.write_bytes_safe ( Path(filename), d_dumped ) + + def load_weights(self, filename): + """ + returns True if file exists + """ + filepath = Path(filename) + if filepath.exists(): + result = True + d_dumped = filepath.read_bytes() + d = pickle.loads(d_dumped) + else: + return False + + weights = self.get_weights() + + if self.name is None: + raise Exception("name must be defined.") + + tuples = [] + for w in weights: + w_name_split = w.name.split('/') + if self.name != w_name_split[0]: + raise Exception("weight first name != Saveable.name") + + sub_w_name = "/".join(w_name_split[1:]) + + w_val = d.get(sub_w_name, None) + if w_val is None: + io.log_err(f"Weight {w.name} was not loaded from file {filename}") + tuples.append ( (w, w.initializer) ) + else: + tuples.append ( (w, w_val) ) + + nn.tf_batch_set_value(tuples) + + return True + + def init_weights(self): + ops = [] + tuples = [] + for w in self.get_weights(): + initializer = w.initializer + for input in initializer.inputs: + if "_cai_" in input.name: + tuples.append ( (w, nn.initializers.ca.generate(w.shape.as_list(), dtype= w.dtype.as_numpy_dtype) ) ) + break + else: + ops.append (initializer) + + nn.tf_sess.run (ops) + nn.tf_batch_set_value(tuples) + nn.Saveable = Saveable + + class LayerBase(): + def __init__(self, name=None, **kwargs): + self.name = name + + #override + def build_weights(self): + pass + + #override + def get_weights(self): + return [] + + def set_weights(self, new_weights): + weights = self.get_weights() + if len(weights) != len(new_weights): + raise ValueError ('len of lists mismatch') + + tuples = [] + for w, new_w in zip(weights, new_weights): + if len(w.shape) != new_w.shape: + new_w = new_w.reshape(w.shape) + + tuples.append ( (w, new_w) ) + + nn.tf_batch_set_value (tuples) + nn.LayerBase = LayerBase + + class ModelBase(Saveable): + def __init__(self, *args, name=None, **kwargs): + super().__init__(name=name) + self.layers = [] + self.built = False + self.args = args + self.kwargs = kwargs + self.run_placeholders = None + + def _build_sub(self, layer, name): + if isinstance (layer, list): + for i,sublayer in enumerate(layer): + self._build_sub(sublayer, f"{name}_{i}") + elif isinstance (layer, LayerBase) or \ + isinstance (layer, ModelBase): + + if layer.name is None: + layer.name = name + + if isinstance (layer, LayerBase): + with tf.variable_scope(layer.name): + layer.build_weights() + elif isinstance (layer, ModelBase): + layer.build() + + self.layers.append (layer) + + def xor_list(self, lst1, lst2): + return [value for value in lst1+lst2 if (value not in lst1) or (value not in lst2) ] + + def build(self): + with tf.variable_scope(self.name): + + current_vars = [] + generator = None + while True: + + if generator is None: + generator = self.on_build(*self.args, **self.kwargs) + if not isinstance(generator, types.GeneratorType): + generator = None + + if generator is not None: + try: + next(generator) + except StopIteration: + generator = None + + v = vars(self) + new_vars = self.xor_list (current_vars, list(v.keys()) ) + + for name in new_vars: + self._build_sub(v[name],name) + + current_vars += new_vars + + if generator is None: + break + + self.built = True + + #override + def get_weights(self): + if not self.built: + self.build() + + weights = [] + for layer in self.layers: + weights += layer.get_weights() + return weights + + def get_layers(self): + if not self.built: + self.build() + layers = [] + for layer in self.layers: + if isinstance (layer, LayerBase): + layers.append(layer) + else: + layers += layer.get_layers() + return layers + + #override + def on_build(self, *args, **kwargs): + """ + init model layers here + + return 'yield' if build is not finished + therefore dependency models will be initialized + """ + pass + + #override + def forward(self, *args, **kwargs): + #flow layers/models/tensors here + pass + + def __call__(self, *args, **kwargs): + if not self.built: + self.build() + + return self.forward(*args, **kwargs) + + def compute_output_shape(self, shapes): + if not self.built: + self.build() + + not_list = False + if not isinstance(shapes, list): + not_list = True + shapes = [shapes] + + with tf.device('/CPU:0'): + # CPU tensors will not impact any performance, only slightly RAM "leakage" + phs = [] + for dtype,sh in shapes: + phs += [ tf.placeholder(dtype, sh) ] + + result = self.__call__(phs[0] if not_list else phs) + + if not isinstance(result, list): + result = [result] + + result_shapes = [] + + for t in result: + result_shapes += [ t.shape.as_list() ] + + return result_shapes[0] if not_list else result_shapes + + def build_for_run(self, shapes_list): + if not isinstance(shapes_list, list): + raise ValueError("shapes_list must be a list.") + + self.run_placeholders = [] + for dtype,sh in shapes_list: + self.run_placeholders.append ( tf.placeholder(dtype, (None,)+sh) ) + + self.run_output = self.__call__(self.run_placeholders) + + def run (self, inputs): + if self.run_placeholders is None: + raise Exception ("Model didn't build for run.") + + if len(inputs) != len(self.run_placeholders): + raise ValueError("len(inputs) != self.run_placeholders") + + feed_dict = {} + for ph, inp in zip(self.run_placeholders, inputs): + feed_dict[ph] = inp + + return nn.tf_sess.run ( self.run_output, feed_dict=feed_dict) + + nn.ModelBase = ModelBase + + class Conv2D(LayerBase): + """ + use_wscale bool enables equalized learning rate, kernel_initializer will be forced to random_normal + + + """ + def __init__(self, in_ch, out_ch, kernel_size, strides=1, padding='SAME', dilations=1, use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): + if not isinstance(strides, int): + raise ValueError ("strides must be an int type") + if not isinstance(dilations, int): + raise ValueError ("dilations must be an int type") + + if isinstance(padding, str): + if padding == "SAME": + padding = ( (kernel_size - 1) * dilations + 1 ) // 2 + elif padding == "VALID": + padding = 0 + else: + raise ValueError ("Wrong padding type. Should be VALID SAME or INT or 4x INTs") + + if isinstance(padding, int): + if padding != 0: + padding = [ [0,0], [padding,padding], [padding,padding], [0,0] ] + else: + padding = None + + self.in_ch = in_ch + self.out_ch = out_ch + self.kernel_size = kernel_size + self.strides = [1,strides,strides,1] + self.padding = padding + self.dilations = [1,dilations,dilations,1] + self.use_bias = use_bias + self.use_wscale = use_wscale + self.kernel_initializer = None if use_wscale else kernel_initializer + self.bias_initializer = bias_initializer + self.trainable = trainable + if dtype is None: + dtype = nn.tf_floatx + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + kernel_initializer = self.kernel_initializer + if kernel_initializer is None: + if self.use_wscale: + gain = 1.0 if self.kernel_size == 1 else np.sqrt(2) + fan_in = self.kernel_size*self.kernel_size*self.in_ch + he_std = gain / np.sqrt(fan_in) # He init + self.wscale = tf.constant(he_std, dtype=self.dtype ) + kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype) + else: + kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype) + + self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.in_ch,self.out_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable ) + + if self.use_bias: + bias_initializer = self.bias_initializer + if bias_initializer is None: + bias_initializer = tf.initializers.zeros(dtype=self.dtype) + + self.bias = tf.get_variable("bias", (1,1,1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable ) + + def get_weights(self): + weights = [self.weight] + if self.use_bias: + weights += [self.bias] + return weights + + def __call__(self, x): + weight = self.weight + if self.use_wscale: + weight = weight * self.wscale + + if self.padding is not None: + x = tf.pad (x, self.padding, mode='CONSTANT') + + x = tf.nn.conv2d(x, weight, self.strides, 'VALID', dilations=self.dilations) + if self.use_bias: + x = x + self.bias + return x + + def __str__(self): + r = f"{self.__class__.__name__} : in_ch:{self.in_ch} out_ch:{self.out_ch} " + + return r + nn.Conv2D = Conv2D + + class Conv2DTranspose(LayerBase): + """ + use_wscale enables weight scale (equalized learning rate) + kernel_initializer will be forced to random_normal + """ + def __init__(self, in_ch, out_ch, kernel_size, strides=2, padding='SAME', use_bias=True, use_wscale=False, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): + if not isinstance(strides, int): + raise ValueError ("strides must be an int type") + self.in_ch = in_ch + self.out_ch = out_ch + self.kernel_size = kernel_size + self.strides = strides + self.padding = padding + self.use_bias = use_bias + self.use_wscale = use_wscale + self.kernel_initializer = None if use_wscale else kernel_initializer + self.bias_initializer = bias_initializer + self.trainable = trainable + if dtype is None: + dtype = nn.tf_floatx + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + kernel_initializer = self.kernel_initializer + if kernel_initializer is None: + if self.use_wscale: + gain = 1.0 if self.kernel_size == 1 else np.sqrt(2) + fan_in = self.kernel_size*self.kernel_size*self.in_ch + he_std = gain / np.sqrt(fan_in) # He init + self.wscale = tf.constant(he_std, dtype=self.dtype ) + kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype) + else: + kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype) + + self.weight = tf.get_variable("weight", (self.kernel_size,self.kernel_size,self.out_ch,self.in_ch), dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable ) + + if self.use_bias: + bias_initializer = self.bias_initializer + if bias_initializer is None: + bias_initializer = tf.initializers.zeros(dtype=self.dtype) + self.bias = tf.get_variable("bias", (1,1,1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable ) + + def get_weights(self): + weights = [self.weight] + if self.use_bias: + weights += [self.bias] + return weights + + def __call__(self, x): + shape = x.shape + + h,w,c = shape[1], shape[2], shape[3] + + output_shape = tf.stack ( (tf.shape(x)[0], + self.deconv_length(w, self.strides, self.kernel_size, self.padding), + self.deconv_length(h, self.strides, self.kernel_size, self.padding), + self.out_ch) ) + + weight = self.weight + if self.use_wscale: + weight = weight * self.wscale + + x = tf.nn.conv2d_transpose(x, weight, output_shape, [1,self.strides,self.strides,1], padding=self.padding) + + if self.use_bias: + x = x + self.bias + return x + + def __str__(self): + r = f"{self.__class__.__name__} : in_ch:{self.in_ch} out_ch:{self.out_ch} " + + return r + + def deconv_length(self, dim_size, stride_size, kernel_size, padding): + assert padding in {'SAME', 'VALID', 'FULL'} + if dim_size is None: + return None + if padding == 'VALID': + dim_size = dim_size * stride_size + max(kernel_size - stride_size, 0) + elif padding == 'FULL': + dim_size = dim_size * stride_size - (stride_size + kernel_size - 2) + elif padding == 'SAME': + dim_size = dim_size * stride_size + return dim_size + nn.Conv2DTranspose = Conv2DTranspose + + class BlurPool(LayerBase): + def __init__(self, filt_size=3, stride=2, **kwargs ): + self.strides = [1,stride,stride,1] + self.filt_size = filt_size + self.padding = [ [0,0], + [ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ], + [ int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ], + [0,0] ] + if(self.filt_size==1): + a = np.array([1.,]) + elif(self.filt_size==2): + a = np.array([1., 1.]) + elif(self.filt_size==3): + a = np.array([1., 2., 1.]) + elif(self.filt_size==4): + a = np.array([1., 3., 3., 1.]) + elif(self.filt_size==5): + a = np.array([1., 4., 6., 4., 1.]) + elif(self.filt_size==6): + a = np.array([1., 5., 10., 10., 5., 1.]) + elif(self.filt_size==7): + a = np.array([1., 6., 15., 20., 15., 6., 1.]) + + a = a[:,None]*a[None,:] + a = a / np.sum(a) + a = a[:,:,None,None] + self.a = a + super().__init__(**kwargs) + + def build_weights(self): + self.k = tf.constant (self.a, dtype=nn.tf_floatx ) + + def __call__(self, x): + k = tf.tile (self.k, (1,1,x.shape[-1],1) ) + x = tf.pad(x, self.padding ) + x = tf.nn.depthwise_conv2d(x, k, self.strides, 'VALID') + return x + nn.BlurPool = BlurPool + + class Dense(LayerBase): + def __init__(self, in_ch, out_ch, use_bias=True, use_wscale=False, maxout_ch=0, kernel_initializer=None, bias_initializer=None, trainable=True, dtype=None, **kwargs ): + """ + use_wscale enables weight scale (equalized learning rate) + kernel_initializer will be forced to random_normal + + maxout_ch https://link.springer.com/article/10.1186/s40537-019-0233-0 + typical 2-4 if you want to enable DenseMaxout behaviour + """ + self.in_ch = in_ch + self.out_ch = out_ch + self.use_bias = use_bias + self.use_wscale = use_wscale + self.maxout_ch = maxout_ch + self.kernel_initializer = kernel_initializer + self.bias_initializer = bias_initializer + self.trainable = trainable + if dtype is None: + dtype = tf.float32 + self.dtype = dtype + super().__init__(**kwargs) + + def build_weights(self): + if self.maxout_ch > 1: + weight_shape = (self.in_ch,self.out_ch*self.maxout_ch) + else: + weight_shape = (self.in_ch,self.out_ch) + + kernel_initializer = self.kernel_initializer + if kernel_initializer is None: + if self.use_wscale: + gain = 1.0 + fan_in = np.prod( weight_shape[:-1] ) + he_std = gain / np.sqrt(fan_in) # He init + self.wscale = tf.constant(he_std, dtype=self.dtype ) + kernel_initializer = tf.initializers.random_normal(0, 1.0, dtype=self.dtype) + else: + kernel_initializer = tf.initializers.glorot_uniform(dtype=self.dtype) + + self.weight = tf.get_variable("weight", weight_shape, dtype=self.dtype, initializer=kernel_initializer, trainable=self.trainable ) + + if self.use_bias: + bias_initializer = self.bias_initializer + if bias_initializer is None: + bias_initializer = tf.initializers.zeros(dtype=self.dtype) + self.bias = tf.get_variable("bias", (1,self.out_ch), dtype=self.dtype, initializer=bias_initializer, trainable=self.trainable ) + + def get_weights(self): + weights = [self.weight] + if self.use_bias: + weights += [self.bias] + return weights + + def __call__(self, x): + weight = self.weight + if self.use_wscale: + weight = weight * self.wscale + + x = tf.matmul(x, weight) + + if self.maxout_ch > 1: + x = tf.reshape (x, (-1, self.out_ch, self.maxout_ch) ) + x = tf.reduce_max(x, axis=-1) + + if self.use_bias: + x = x + self.bias + + return x + nn.Dense = Dense + + class BatchNorm2D(LayerBase): + """ + currently not for training + """ + def __init__(self, dim, eps=1e-05, momentum=0.1, dtype=None, **kwargs ): + self.dim = dim + self.eps = eps + self.momentum = momentum + if dtype is None: + dtype = nn.tf_floatx + self.dtype = dtype + + self.shape = (1,1,1,dim) + + super().__init__(**kwargs) + + def build_weights(self): + self.weight = tf.get_variable("weight", self.shape, dtype=self.dtype, initializer=tf.initializers.ones() ) + self.bias = tf.get_variable("bias", self.shape, dtype=self.dtype, initializer=tf.initializers.zeros() ) + self.running_mean = tf.get_variable("running_mean", self.shape, dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False ) + self.running_var = tf.get_variable("running_var", self.shape, dtype=self.dtype, initializer=tf.initializers.zeros(), trainable=False ) + + def get_weights(self): + return [self.weight, self.bias, self.running_mean, self.running_var] + + def __call__(self, x): + x = (x - self.running_mean) / tf.sqrt( self.running_var + self.eps ) + x *= self.weight + x += self.bias + return x + + nn.BatchNorm2D = BatchNorm2D \ No newline at end of file diff --git a/core/leras/nn.py b/core/leras/nn.py new file mode 100644 index 0000000..280bcdb --- /dev/null +++ b/core/leras/nn.py @@ -0,0 +1,256 @@ +""" +Leras. + +like lighter keras. +This is my lightweight neural network library written from scratch +based on pure tensorflow without keras. + +Provides: ++ full freedom of tensorflow operations without keras model's restrictions ++ easy model operations like in PyTorch, but in graph mode (no eager execution) ++ convenient and understandable logic + +Reasons why we cannot import tensorflow or any tensorflow.sub modules right here: +1) change env variables based on DeviceConfig before import tensorflow +2) multiprocesses will import tensorflow every spawn +""" + +import os +import sys +from pathlib import Path +from core.interact import interact as io +from .device import Devices + +class nn(): + current_DeviceConfig = None + + tf = None + tf_sess = None + tf_sess_config = None + + # Tensor ops + tf_get_value = None + tf_batch_set_value = None + tf_gradients = None + tf_average_gv_list = None + tf_average_tensor_list = None + tf_dot = None + tf_gelu = None + tf_upsample2d = None + tf_upsample2d_bilinear = None + tf_flatten = None + tf_random_binomial = None + tf_gaussian_blur = None + tf_style_loss = None + tf_dssim = None + + # Layers + Saveable = None + LayerBase = None + ModelBase = None + Conv2D = None + Conv2DTranspose = None + BlurPool = None + Dense = None + BatchNorm2D = None + + # Initializers + initializers = None + + # Optimizers + TFBaseOptimizer = None + TFRMSpropOptimizer = None + + @staticmethod + def initialize(device_config=None): + if nn.tf is None: + if device_config is None: + device_config = nn.getCurrentDeviceConfig() + else: + nn.setCurrentDeviceConfig(device_config) + + if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): + os.environ.pop('CUDA_VISIBLE_DEVICES') + + os.environ['CUDA_​CACHE_​MAXSIZE'] = '536870912' #512Mb (32mb default) + + first_run = False + + if sys.platform[0:3] == 'win': + devices_str = "" + for device in device_config.devices: + devices_str += "_" + device.name.replace(' ','_') + + compute_cache_path = Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache' + devices_str) + if not compute_cache_path.exists(): + first_run = True + os.environ['CUDA_CACHE_PATH'] = str(compute_cache_path) + + os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # tf log errors only + + import warnings + warnings.simplefilter(action='ignore', category=FutureWarning) + + if first_run: + io.log_info("Caching GPU kernels...") + + import tensorflow as tf + nn.tf = tf + + if device_config.cpu_only: + config = tf.ConfigProto(device_count={'GPU': 0}) + else: + config = tf.ConfigProto() + config.gpu_options.visible_device_list = ','.join([str(device.index) for device in device_config.devices]) + + config.gpu_options.force_gpu_compatible = True + config.gpu_options.allow_growth = True + nn.tf_sess_config = config + + nn.tf_floatx = nn.tf.float32 #nn.tf.float16 if device_config.use_fp16 else nn.tf.float32 + nn.np_floatx = nn.tf_floatx.as_numpy_dtype + + from .tensor_ops import initialize_tensor_ops + from .layers import initialize_layers + from .initializers import initialize_initializers + from .optimizers import initialize_optimizers + + initialize_tensor_ops(nn) + initialize_layers(nn) + initialize_initializers(nn) + initialize_optimizers(nn) + + if nn.tf_sess is None: + nn.tf_sess = tf.Session(config=nn.tf_sess_config) + + @staticmethod + def initialize_main_env(): + Devices.initialize_main_env() + + @staticmethod + def getCurrentDeviceConfig(): + if nn.current_DeviceConfig is None: + nn.current_DeviceConfig = DeviceConfig.BestGPU() + return nn.current_DeviceConfig + + @staticmethod + def setCurrentDeviceConfig(device_config): + nn.current_DeviceConfig = device_config + + @staticmethod + def tf_reset_session(): + if nn.tf is not None: + if nn.tf_sess is not None: + nn.tf.reset_default_graph() + nn.tf_sess.close() + nn.tf_sess = nn.tf.Session(config=nn.tf_sess_config) + + @staticmethod + def tf_close_session(): + if nn.tf_sess is not None: + nn.tf.reset_default_graph() + nn.tf_sess.close() + nn.tf_sess = None + + + @staticmethod + def ask_choose_device_idxs(choose_only_one=False, allow_cpu=True, suggest_best_multi_gpu=False, suggest_all_gpu=False, return_device_config=False): + devices = Devices.getDevices() + if len(devices) == 0: + return [] + + all_devices_indexes = [device.index for device in devices] + + if choose_only_one: + suggest_best_multi_gpu = False + suggest_all_gpu = False + + if suggest_all_gpu: + best_device_indexes = all_devices_indexes + elif suggest_best_multi_gpu: + best_device_indexes = [device.index for device in devices.get_equal_devices(devices.get_best_device()) ] + else: + best_device_indexes = [ devices.get_best_device().index ] + best_device_indexes = ",".join([str(x) for x in best_device_indexes]) + + io.log_info ("") + if choose_only_one: + io.log_info ("Choose one GPU idx.") + else: + io.log_info ("Choose one or several GPU idxs (separated by comma).") + io.log_info ("") + + if allow_cpu: + io.log_info ("[CPU] : CPU") + for device in devices: + io.log_info (f" [{device.index}] : {device.name}") + + io.log_info ("") + + while True: + try: + if choose_only_one: + choosed_idxs = io.input_str("Which GPU index to choose?", best_device_indexes) + else: + choosed_idxs = io.input_str("Which GPU indexes to choose?", best_device_indexes) + + if allow_cpu and choosed_idxs.lower() == "cpu": + choosed_idxs = [] + break + + choosed_idxs = [ int(x) for x in choosed_idxs.split(',') ] + + if choose_only_one: + if len(choosed_idxs) == 1: + break + else: + if all( [idx in all_devices_indexes for idx in choosed_idxs] ): + break + except: + pass + io.log_info ("") + + if return_device_config: + return nn.DeviceConfig.GPUIndexes(choosed_idxs) + else: + return choosed_idxs + + class DeviceConfig(): + def __init__ (self, devices=None): + devices = devices or [] + + if not isinstance(devices, Devices): + devices = Devices(devices) + + self.devices = devices + self.cpu_only = len(devices) == 0 + + @staticmethod + def BestGPU(): + devices = Devices.getDevices() + if len(devices) == 0: + return nn.DeviceConfig.CPU() + + return nn.DeviceConfig([devices.get_best_device()]) + + @staticmethod + def WorstGPU(): + devices = Devices.getDevices() + if len(devices) == 0: + return nn.DeviceConfig.CPU() + + return nn.DeviceConfig([devices.get_worst_device()]) + + @staticmethod + def GPUIndexes(indexes): + if len(indexes) != 0: + devices = Devices.getDevices().get_devices_from_index_list(indexes) + else: + devices = [] + + return nn.DeviceConfig(devices) + + @staticmethod + def CPU(): + return nn.DeviceConfig([]) diff --git a/core/leras/optimizers.py b/core/leras/optimizers.py new file mode 100644 index 0000000..0a6d477 --- /dev/null +++ b/core/leras/optimizers.py @@ -0,0 +1,108 @@ +def initialize_optimizers(nn): + tf = nn.tf + from tensorflow.python.ops import state_ops, control_flow_ops + + class TFBaseOptimizer(nn.Saveable): + def __init__(self, name=None): + super().__init__(name=name) + + def tf_clip_norm(self, g, c, n): + """Clip the gradient `g` if the L2 norm `n` exceeds `c`. + # Arguments + g: Tensor, the gradient tensor + c: float >= 0. Gradients will be clipped + when their L2 norm exceeds this value. + n: Tensor, actual norm of `g`. + # Returns + Tensor, the gradient clipped if required. + """ + if c <= 0: # if clipnorm == 0 no need to add ops to the graph + return g + + condition = n >= c + then_expression = tf.scalar_mul(c / n, g) + else_expression = g + + # saving the shape to avoid converting sparse tensor to dense + if isinstance(then_expression, tf.Tensor): + g_shape = copy.copy(then_expression.get_shape()) + elif isinstance(then_expression, tf.IndexedSlices): + g_shape = copy.copy(then_expression.dense_shape) + if condition.dtype != tf.bool: + condition = tf.cast(condition, 'bool') + g = tf.cond(condition, + lambda: then_expression, + lambda: else_expression) + if isinstance(then_expression, tf.Tensor): + g.set_shape(g_shape) + elif isinstance(then_expression, tf.IndexedSlices): + g._dense_shape = g_shape + + return g + nn.TFBaseOptimizer = TFBaseOptimizer + + class TFRMSpropOptimizer(TFBaseOptimizer): + def __init__(self, lr=0.001, rho=0.9, lr_dropout=1.0, epsilon=1e-7, clipnorm=0.0, name=None): + super().__init__(name=name) + + if name is None: + raise ValueError('name must be defined.') + + self.lr_dropout = lr_dropout + self.clipnorm = clipnorm + + with tf.device('/CPU:0') : + with tf.variable_scope(self.name): + self.lr = tf.Variable (lr, name="lr") + self.rho = tf.Variable (rho, name="rho") + self.epsilon = tf.Variable (epsilon, name="epsilon") + self.iterations = tf.Variable(0, dtype=tf.int64, name='iters') + + self.accumulators = [] + self.accumulator_counter = 0 + self.accumulators_dict = {} + self.lr_rnds_dict = {} + + def get_weights(self): + return [self.lr, self.rho, self.epsilon, self.iterations] + self.accumulators + + def initialize_variables(self, trainable_weights, vars_on_cpu=True): + # Initialize here all trainable variables used in training + e = tf.device('/CPU:0') if vars_on_cpu else None + if e: e.__enter__() + with tf.variable_scope(self.name): + accumulators = [ tf.get_variable ( f'acc_{i+self.accumulator_counter}', v.shape, initializer=tf.initializers.constant(0.0), trainable=False) + for (i, v ) in enumerate(trainable_weights) ] + + self.accumulators_dict.update ( { v.name : acc for v,acc in zip(trainable_weights,accumulators) } ) + self.accumulators += accumulators + self.accumulator_counter += len(trainable_weights) + + if self.lr_dropout != 1.0: + lr_rnds = [ nn.tf_random_binomial( v.shape, p=self.lr_dropout) for v in trainable_weights ] + self.lr_rnds_dict.update ( { v.name : rnd for v,rnd in zip(trainable_weights,lr_rnds) } ) + if e: e.__exit__(None, None, None) + + def get_update_op(self, grads_vars): + updates = [] + lr = self.lr + if self.clipnorm > 0.0: + norm = tf.sqrt( sum([tf.reduce_sum(tf.square(g)) for g,v in grads_vars])) + updates += [ state_ops.assign_add( self.iterations, 1) ] + for i, (g,v) in enumerate(grads_vars): + if self.clipnorm > 0.0: + g = self.tf_clip_norm(g, self.clipnorm, norm) + + a = self.accumulators_dict[v.name] + new_a = self.rho * a + (1. - self.rho) * tf.square(g) + v_diff = - lr * g / (tf.sqrt(new_a) + self.epsilon) + if self.lr_dropout != 1.0: + lr_rnd = self.lr_rnds_dict[v.name] + v_diff *= lr_rnd + new_v = v + v_diff + + updates.append (state_ops.assign(a, new_a)) + updates.append (state_ops.assign(v, new_v)) + + return control_flow_ops.group ( *updates, name=self.name+'_updates') + nn.TFRMSpropOptimizer = TFRMSpropOptimizer \ No newline at end of file diff --git a/core/leras/tensor_ops.py b/core/leras/tensor_ops.py new file mode 100644 index 0000000..52aa5ab --- /dev/null +++ b/core/leras/tensor_ops.py @@ -0,0 +1,295 @@ +import numpy as np + +def initialize_tensor_ops(nn): + tf = nn.tf + from tensorflow.python.ops import array_ops, random_ops, math_ops, sparse_ops, gradients + from tensorflow.python.framework import sparse_tensor + + def tf_get_value(tensor): + return nn.tf_sess.run (tensor) + nn.tf_get_value = tf_get_value + + + def tf_batch_set_value(tuples): + if len(tuples) != 0: + with nn.tf.device('/CPU:0'): + assign_ops = [] + feed_dict = {} + + for x, value in tuples: + if isinstance(value, nn.tf.Operation): + assign_ops.append(value) + else: + value = np.asarray(value, dtype=x.dtype.as_numpy_dtype) + assign_placeholder = nn.tf.placeholder( x.dtype.base_dtype, shape=[None]*value.ndim ) + assign_op = nn.tf.assign (x, assign_placeholder ) + assign_ops.append(assign_op) + feed_dict[assign_placeholder] = value + + nn.tf_sess.run(assign_ops, feed_dict=feed_dict) + nn.tf_batch_set_value = tf_batch_set_value + + + def tf_gradients ( loss, vars ): + grads = gradients.gradients(loss, vars, colocate_gradients_with_ops=True ) + #todo none gradient for var + return [*zip(grads,vars)] + nn.tf_gradients = tf_gradients + + def tf_average_gv_list(grad_var_list, tf_device_string=None): + e = tf.device(tf_device_string) if tf_device_string is not None else None + if e is not None: e.__enter__() + result = [] + for i, (gv) in enumerate(grad_var_list): + for j,(g,v) in enumerate(gv): + g = tf.expand_dims(g, 0) + if i == 0: + result += [ [[g], v] ] + else: + result[j][0] += [g] + + for i,(gs,v) in enumerate(result): + result[i] = ( tf.reduce_mean( tf.concat (gs, 0), 0 ), v ) + if e is not None: e.__exit__(None,None,None) + return result + nn.tf_average_gv_list = tf_average_gv_list + + def tf_average_tensor_list(tensors_list, tf_device_string=None): + e = tf.device(tf_device_string) if tf_device_string is not None else None + if e is not None: e.__enter__() + result = tf.reduce_mean(tf.concat ([tf.expand_dims(t, 0) for t in tensors_list], 0), 0) + if e is not None: e.__exit__(None,None,None) + return result + nn.tf_average_tensor_list = tf_average_tensor_list + + def tf_dot(x, y): + if x.shape.ndims > 2 or y.shape.ndims > 2: + x_shape = [] + for i, s in zip( x.shape.as_list(), array_ops.unstack(array_ops.shape(x))): + if i is not None: + x_shape.append(i) + else: + x_shape.append(s) + x_shape = tuple(x_shape) + y_shape = [] + for i, s in zip( y.shape.as_list(), array_ops.unstack(array_ops.shape(y))): + if i is not None: + y_shape.append(i) + else: + y_shape.append(s) + y_shape = tuple(y_shape) + y_permute_dim = list(range(y.shape.ndims)) + y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim + xt = array_ops.reshape(x, [-1, x_shape[-1]]) + yt = array_ops.reshape(array_ops.transpose(y, perm=y_permute_dim), [y_shape[-2], -1]) + + import code + code.interact(local=dict(globals(), **locals())) + return array_ops.reshape(math_ops.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:]) + if isinstance(x, sparse_tensor.SparseTensor): + out = sparse_ops.sparse_tensor_dense_matmul(x, y) + else: + out = math_ops.matmul(x, y) + return out + nn.tf_dot = tf_dot + + def tf_gelu(x): + cdf = 0.5 * (1.0 + tf.nn.tanh((np.sqrt(2 / np.pi) * (x + 0.044715 * tf.pow(x, 3))))) + return x * cdf + nn.tf_gelu = tf_gelu + + def tf_upsample2d(x, size=2): + return tf.image.resize_nearest_neighbor(x, (x.shape[1]*size, x.shape[2]*size) ) + nn.tf_upsample2d = tf_upsample2d + + def tf_upsample2d_bilinear(x, size=2): + return tf.image.resize_images(x, (x.shape[1]*size, x.shape[2]*size) ) + nn.tf_upsample2d_bilinear = tf_upsample2d_bilinear + + def tf_flatten(x, dynamic_dims=False): + """ + dynamic_dims allows to flatten without knowing size on input dims + """ + if dynamic_dims: + sh = tf.shape(x) + return tf.reshape (x, (sh[0], tf.reduce_prod(sh[1:]) ) ) + else: + return tf.reshape (x, (-1, np.prod(x.shape[1:])) ) + + nn.tf_flatten = tf_flatten + + def tf_random_binomial(shape, p=0.0, dtype=None, seed=None): + if dtype is None: + dtype=tf.float32 + + if seed is None: + seed = np.random.randint(10e6) + return array_ops.where( + random_ops.random_uniform(shape, dtype=tf.float16, seed=seed) < p, + array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype)) + nn.tf_random_binomial = tf_random_binomial + + def tf_gaussian_blur(input, radius=2.0): + def gaussian(x, mu, sigma): + return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2)) + + def make_kernel(sigma): + kernel_size = max(3, int(2 * 2 * sigma + 1)) + mean = np.floor(0.5 * kernel_size) + kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)]) + np_kernel = np.outer(kernel_1d, kernel_1d).astype(np.float32) + kernel = np_kernel / np.sum(np_kernel) + return kernel + + gauss_kernel = make_kernel(radius) + gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis] + kernel_size = gauss_kernel.shape[0] + + inputs = [ input[:,:,:,i:i+1] for i in range( input.shape[-1] ) ] + + outputs = [] + for i in range(len(inputs)): + x = inputs[i] + if kernel_size != 0: + padding = kernel_size//2 + x = tf.pad (x, [ [0,0], [padding,padding], [padding,padding], [0,0] ] ) + + outputs += [ tf.nn.conv2d(x, tf.constant(gauss_kernel, dtype=nn.tf_floatx ) , strides=[1,1,1,1], padding="VALID") ] + + return tf.concat (outputs, axis=-1) + nn.tf_gaussian_blur = tf_gaussian_blur + + def tf_style_loss(target, style, gaussian_blur_radius=0.0, loss_weight=1.0, step_size=1): + def sd(content, style, loss_weight): + content_nc = content.shape[-1] + style_nc = style.shape[-1] + if content_nc != style_nc: + raise Exception("style_loss() content_nc != style_nc") + + axes = [1,2] + c_mean, c_var = tf.nn.moments(content, axes=axes, keep_dims=True) + s_mean, s_var = tf.nn.moments(style, axes=axes, keep_dims=True) + c_std, s_std = tf.sqrt(c_var + 1e-5), tf.sqrt(s_var + 1e-5) + + mean_loss = tf.reduce_sum(tf.square(c_mean-s_mean), axis=[1,2,3]) + std_loss = tf.reduce_sum(tf.square(c_std-s_std), axis=[1,2,3]) + + return (mean_loss + std_loss) * ( loss_weight / content_nc.value ) + + if gaussian_blur_radius > 0.0: + target = tf_gaussian_blur(target, gaussian_blur_radius) + style = tf_gaussian_blur(style, gaussian_blur_radius) + + return sd( target, style, loss_weight=loss_weight ) + + nn.tf_style_loss = tf_style_loss + + def tf_dssim(img1,img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): + + ch = img2.shape[-1] + + def _fspecial_gauss(size, sigma): + #Function to mimic the 'fspecial' gaussian MATLAB function. + coords = np.arange(0, size, dtype=nn.np_floatx) + coords -= (size - 1 ) / 2.0 + g = coords**2 + g *= ( -0.5 / (sigma**2) ) + g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) ) + g = tf.constant ( np.reshape (g, (1,-1)), dtype=nn.tf_floatx ) + g = tf.nn.softmax(g) + g = tf.reshape (g, (size, size, 1, 1)) + g = tf.tile (g, (1,1,ch,1)) + return g + + kernel = _fspecial_gauss(filter_size,filter_sigma) + + def reducer(x): + return tf.nn.depthwise_conv2d(x, kernel, strides=[1,1,1,1], padding='VALID') + + c1 = (k1 * max_val) ** 2 + c2 = (k2 * max_val) ** 2 + + mean0 = reducer(img1) + mean1 = reducer(img2) + num0 = mean0 * mean1 * 2.0 + den0 = tf.square(mean0) + tf.square(mean1) + luminance = (num0 + c1) / (den0 + c1) + + num1 = reducer(img1 * img2) * 2.0 + den1 = reducer(tf.square(img1) + tf.square(img2)) + c2 *= 1.0 #compensation factor + cs = (num1 - num0 + c2) / (den1 - den0 + c2) + + ssim_val = tf.reduce_mean(luminance * cs, axis=(-3, -2) ) + return(1.0 - ssim_val ) / 2.0 + nn.tf_dssim = tf_dssim + + def tf_rgb_to_lab(srgb): + srgb_pixels = tf.reshape(srgb, [-1, 3]) + linear_mask = tf.cast(srgb_pixels <= 0.04045, dtype=tf.float32) + exponential_mask = tf.cast(srgb_pixels > 0.04045, dtype=tf.float32) + rgb_pixels = (srgb_pixels / 12.92 * linear_mask) + (((srgb_pixels + 0.055) / 1.055) ** 2.4) * exponential_mask + rgb_to_xyz = tf.constant([ + # X Y Z + [0.412453, 0.212671, 0.019334], # R + [0.357580, 0.715160, 0.119193], # G + [0.180423, 0.072169, 0.950227], # B + ]) + xyz_pixels = tf.matmul(rgb_pixels, rgb_to_xyz) + + xyz_normalized_pixels = tf.multiply(xyz_pixels, [1/0.950456, 1.0, 1/1.088754]) + + epsilon = 6/29 + linear_mask = tf.cast(xyz_normalized_pixels <= (epsilon**3), dtype=tf.float32) + exponential_mask = tf.cast(xyz_normalized_pixels > (epsilon**3), dtype=tf.float32) + fxfyfz_pixels = (xyz_normalized_pixels / (3 * epsilon**2) + 4/29) * linear_mask + (xyz_normalized_pixels ** (1/3)) * exponential_mask + + fxfyfz_to_lab = tf.constant([ + # l a b + [ 0.0, 500.0, 0.0], # fx + [116.0, -500.0, 200.0], # fy + [ 0.0, 0.0, -200.0], # fz + ]) + lab_pixels = tf.matmul(fxfyfz_pixels, fxfyfz_to_lab) + tf.constant([-16.0, 0.0, 0.0]) + return tf.reshape(lab_pixels, tf.shape(srgb)) + nn.tf_rgb_to_lab = tf_rgb_to_lab + + def tf_suppress_lower_mean(t, eps=0.00001): + if t.shape.ndims != 1: + raise ValueError("tf_suppress_lower_mean: t rank must be 1") + t_mean_eps = tf.reduce_mean(t) - eps + q = tf.clip_by_value(t, t_mean_eps, tf.reduce_max(t) ) + q = tf.clip_by_value(q-t_mean_eps, 0, eps) + q = q * (t/eps) + return q +""" +class GeLU(KL.Layer): + Gaussian Error Linear Unit. + A smoother version of ReLU generally used + in the BERT or BERT architecture based models. + Original paper: https://arxiv.org/abs/1606.08415 + Input shape: + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + Output shape: + Same shape as the input. + + def __init__(self, approximate=True, **kwargs): + super(GeLU, self).__init__(**kwargs) + self.approximate = approximate + self.supports_masking = True + + def call(self, inputs): + cdf = 0.5 * (1.0 + K.tanh((np.sqrt(2 / np.pi) * (inputs + 0.044715 * K.pow(inputs, 3))))) + return inputs * cdf + + def get_config(self): + config = {'approximate': self.approximate} + base_config = super(GeLU, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + return input_shape + nn.GeLU = GeLU +""" \ No newline at end of file diff --git a/mathlib/__init__.py b/core/mathlib/__init__.py similarity index 100% rename from mathlib/__init__.py rename to core/mathlib/__init__.py diff --git a/mathlib/umeyama.py b/core/mathlib/umeyama.py similarity index 100% rename from mathlib/umeyama.py rename to core/mathlib/umeyama.py diff --git a/utils/mp_utils.py b/core/mplib/__init__.py similarity index 100% rename from utils/mp_utils.py rename to core/mplib/__init__.py diff --git a/utils/os_utils.py b/core/osex.py similarity index 100% rename from utils/os_utils.py rename to core/osex.py diff --git a/utils/Path_utils.py b/core/pathex.py similarity index 88% rename from utils/Path_utils.py rename to core/pathex.py index c609572..5c93eed 100644 --- a/utils/Path_utils.py +++ b/core/pathex.py @@ -3,6 +3,16 @@ from os import scandir image_extensions = [".jpg", ".jpeg", ".png", ".tif", ".tiff"] +def write_bytes_safe(p, bytes_data): + """ + writes to .tmp first and then rename to target filename + """ + p_tmp = p.parent / (p.name + '.tmp') + p_tmp.write_bytes(bytes_data) + if p.exists(): + p.unlink() + p_tmp.rename (p) + def scantree(path): """Recursively yield DirEntry objects for given directory.""" for entry in scandir(path): @@ -46,7 +56,7 @@ def get_file_paths(dir_path): dir_path = Path (dir_path) if dir_path.exists(): - return sorted([ x.path for x in list(scandir(str(dir_path))) if x.is_file() ]) + return [ Path(x) for x in sorted([ x.path for x in list(scandir(str(dir_path))) if x.is_file() ]) ] else: return [] diff --git a/utils/random_utils.py b/core/randomex.py similarity index 100% rename from utils/random_utils.py rename to core/randomex.py diff --git a/utils/std_utils.py b/core/stdex.py similarity index 100% rename from utils/std_utils.py rename to core/stdex.py diff --git a/utils/struct_utils.py b/core/structex.py similarity index 100% rename from utils/struct_utils.py rename to core/structex.py diff --git a/doc/DeepFaceLab is working.png b/doc/DeepFaceLab is working.png new file mode 100644 index 0000000..4d86d36 Binary files /dev/null and b/doc/DeepFaceLab is working.png differ diff --git a/doc/doc_build_and_repository_info.md b/doc/doc_build_and_repository_info.md deleted file mode 100644 index 89a740e..0000000 --- a/doc/doc_build_and_repository_info.md +++ /dev/null @@ -1,5 +0,0 @@ -#### **CPU mode** - -It is possible to run from script for all stages using the `--cpu-only` flag. To run from script, install the separate dependencies for CPU mode using `pip -r requirements-cpu.txt`. - -Please note that extraction and training will take much long without a GPU and performance will greatly suffer without one. In particular, do not use DLIB extractor in CPU mode, it's too slow to run without a GPU. Train only on 64px resolution models like H64 or SAE (with low settings) and the lightweight encoder. \ No newline at end of file diff --git a/doc/doc_ready_to_work_facesets.md b/doc/doc_ready_to_work_facesets.md deleted file mode 100644 index 75f3dee..0000000 --- a/doc/doc_ready_to_work_facesets.md +++ /dev/null @@ -1,11 +0,0 @@ -### **Example Face Sets**: - -Faces sets for the following have been pre-extracted, - -- Nicolas Cage -- Steve Jobs -- Putin -- Elon Musk -- Harrison Ford - -[Download from Google drive](https://drive.google.com/open?id=1LwMdfTxdOaNAHt_sGV76aQVn7XPseXJB) diff --git a/doc/doc_windows_desktop_app.md b/doc/doc_windows_desktop_app.md deleted file mode 100644 index e6c83ec..0000000 --- a/doc/doc_windows_desktop_app.md +++ /dev/null @@ -1,27 +0,0 @@ -### **Prebuilt Windows Releases** - -Windows builds with all dependencies included are released regularly. Only the NVIDIA GeForce display driver needs to be installed. Prebuilt DeepFaceLab, including GPU and CPU versions, can be downloaded from - -[Google drive](https://drive.google.com/open?id=1BCFK_L7lPNwMbEQ_kFPqPpDdFEOd_Dci) - -if the download qouta is exceeded, add the file to your own google drive and download from it - -[Torrent](https://rutracker.org/forum/viewtopic.php?t=5558863) - -Available builds: - -* DeepFaceLab_CUDA - for NVIDIA cards - -* DeepFaceLab_OpenCL - for NVIDIA/AMD/IntelHD cards - -Important: you don't need to install CUDA ! - -#### Video tutorials using prebuilt windows app - -* [Basic workflow](https://www.youtube.com/watch?v=K98nTNjXkq8) - -* [Basic workflow (thanks @derpfakes)](https://www.youtube.com/watch?v=cVcyghhmQSA) - -* [How To Make DeepFakes With DeepFaceLab - An Amatuer's Guide](https://www.youtube.com/watch?v=wBax7_UWXvc) - -* [Manual re-extract poorly aligned frames](https://www.youtube.com/watch?v=7z1ykVVCHhM) \ No newline at end of file diff --git a/doc/example_faceset.jpg b/doc/example_faceset.jpg deleted file mode 100644 index 6ee914f..0000000 Binary files a/doc/example_faceset.jpg and /dev/null differ diff --git a/doc/gallery/1.jpg b/doc/gallery/1.jpg deleted file mode 100644 index bb4895b..0000000 Binary files a/doc/gallery/1.jpg and /dev/null differ diff --git a/doc/gallery/2.jpg b/doc/gallery/2.jpg deleted file mode 100644 index b8e6138..0000000 Binary files a/doc/gallery/2.jpg and /dev/null differ diff --git a/doc/gallery/doc_gallery.md b/doc/gallery/doc_gallery.md deleted file mode 100644 index 5ba780f..0000000 --- a/doc/gallery/doc_gallery.md +++ /dev/null @@ -1,3 +0,0 @@ -![](1.jpg) - -![](2.jpg) \ No newline at end of file diff --git a/doc/logo_cuda.jpg b/doc/logo_cuda.jpg deleted file mode 100644 index 472571e..0000000 Binary files a/doc/logo_cuda.jpg and /dev/null differ diff --git a/doc/logo_cuda.png b/doc/logo_cuda.png new file mode 100644 index 0000000..0b928a6 Binary files /dev/null and b/doc/logo_cuda.png differ diff --git a/doc/logo_keras.jpg b/doc/logo_keras.jpg deleted file mode 100644 index 6082a5b..0000000 Binary files a/doc/logo_keras.jpg and /dev/null differ diff --git a/doc/logo_opencl.jpg b/doc/logo_opencl.jpg deleted file mode 100644 index 36ab395..0000000 Binary files a/doc/logo_opencl.jpg and /dev/null differ diff --git a/doc/logo_plaidml.jpg b/doc/logo_plaidml.jpg deleted file mode 100644 index c206915..0000000 Binary files a/doc/logo_plaidml.jpg and /dev/null differ diff --git a/doc/logo_tensorflow.jpg b/doc/logo_tensorflow.jpg deleted file mode 100644 index 19f44c9..0000000 Binary files a/doc/logo_tensorflow.jpg and /dev/null differ diff --git a/doc/logo_tensorflow.png b/doc/logo_tensorflow.png new file mode 100644 index 0000000..b06cdd6 Binary files /dev/null and b/doc/logo_tensorflow.png differ diff --git a/doc/manual_en_google_translated.docx b/doc/manual_en_google_translated.docx deleted file mode 100644 index ac64a7d..0000000 Binary files a/doc/manual_en_google_translated.docx and /dev/null differ diff --git a/doc/manual_en_google_translated.pdf b/doc/manual_en_google_translated.pdf deleted file mode 100644 index d6fade9..0000000 Binary files a/doc/manual_en_google_translated.pdf and /dev/null differ diff --git a/doc/manual_extractor_0.jpg b/doc/manual_extractor_0.jpg deleted file mode 100644 index b88c02c..0000000 Binary files a/doc/manual_extractor_0.jpg and /dev/null differ diff --git a/doc/manual_ru.pdf b/doc/manual_ru.pdf deleted file mode 100644 index 2c27e9f..0000000 Binary files a/doc/manual_ru.pdf and /dev/null differ diff --git a/doc/manual_ru_source.docx b/doc/manual_ru_source.docx deleted file mode 100644 index f37a425..0000000 Binary files a/doc/manual_ru_source.docx and /dev/null differ diff --git a/ebsynth/__init__.py b/ebsynth/__init__.py deleted file mode 100644 index ce31c32..0000000 --- a/ebsynth/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .ebsynth import color_transfer \ No newline at end of file diff --git a/ebsynth/ebsynth.dll b/ebsynth/ebsynth.dll deleted file mode 100644 index f6c085e..0000000 Binary files a/ebsynth/ebsynth.dll and /dev/null differ diff --git a/ebsynth/ebsynth.py b/ebsynth/ebsynth.py deleted file mode 100644 index ec2ec08..0000000 --- a/ebsynth/ebsynth.py +++ /dev/null @@ -1,201 +0,0 @@ -import os -import sys -from ctypes import * -from pathlib import Path - -import cv2 -import numpy as np - -libebsynth = None -cached_buffer = {} - -EBSYNTH_BACKEND_CPU = 0x0001 -EBSYNTH_BACKEND_CUDA = 0x0002 -EBSYNTH_BACKEND_AUTO = 0x0000 -EBSYNTH_MAX_STYLE_CHANNELS = 8 -EBSYNTH_MAX_GUIDE_CHANNELS = 24 -EBSYNTH_VOTEMODE_PLAIN = 0x0001 # weight = 1 -EBSYNTH_VOTEMODE_WEIGHTED = 0x0002 # weight = 1/(1+error) - - -def _normalize_img_shape (img): - img_len = len(img.shape) - if img_len == 2: - sh, sw = img.shape - sc = 0 - elif img_len == 3: - sh, sw, sc = img.shape - - if sc == 0: - sc = 1 - img = img [...,np.newaxis] - return img - -def run (img_style, guides, - patch_size=5, - num_pyramid_levels=-1, - num_search_vote_iters = 6, - num_patch_match_iters = 4, - stop_threshold = 5, - uniformity_weight = 3500.0, - extraPass3x3 = False, - ): - if patch_size < 3: - raise ValueError ("patch_size is too small") - if patch_size % 2 == 0: - raise ValueError ("patch_size must be an odd number") - if len(guides) == 0: - raise ValueError ("at least one guide must be specified") - - global libebsynth - if libebsynth is None: - if sys.platform[0:3] == 'win': - libebsynth_path = str ( Path(__file__).parent / 'ebsynth.dll' ) - libebsynth = CDLL(libebsynth_path) - else: - #todo: implement for linux - pass - - if libebsynth is not None: - libebsynth.ebsynthRun.argtypes = ( \ - c_int, - c_int, - c_int, - c_int, - c_int, - c_void_p, - c_void_p, - c_int, - c_int, - c_void_p, - c_void_p, - POINTER(c_float), - POINTER(c_float), - c_float, - c_int, - c_int, - c_int, - POINTER(c_int), - POINTER(c_int), - POINTER(c_int), - c_int, - c_void_p, - c_void_p - ) - - if libebsynth is None: - return img_style - - img_style = _normalize_img_shape (img_style) - sh, sw, sc = img_style.shape - t_h, t_w, t_c = 0,0,0 - - if sc > EBSYNTH_MAX_STYLE_CHANNELS: - raise ValueError (f"error: too many style channels {sc}, maximum number is {EBSYNTH_MAX_STYLE_CHANNELS}") - - guides_source = [] - guides_target = [] - guides_weights = [] - - for i in range(len(guides)): - source_guide, target_guide, guide_weight = guides[i] - source_guide = _normalize_img_shape(source_guide) - target_guide = _normalize_img_shape(target_guide) - s_h, s_w, s_c = source_guide.shape - nt_h, nt_w, nt_c = target_guide.shape - - if s_h != sh or s_w != sw: - raise ValueError ("guide source and style resolution must match style resolution.") - - if t_c == 0: - t_h, t_w, t_c = nt_h, nt_w, nt_c - elif nt_h != t_h or nt_w != t_w: - raise ValueError ("guides target resolutions must be equal") - - if s_c != nt_c: - raise ValueError ("guide source and target channels must match exactly.") - - guides_source.append (source_guide) - guides_target.append (target_guide) - - guides_weights += [ guide_weight / s_c ] * s_c - - guides_source = np.concatenate ( guides_source, axis=-1) - guides_target = np.concatenate ( guides_target, axis=-1) - guides_weights = (c_float*len(guides_weights) ) ( *guides_weights ) - - styleWeight = 1.0 - style_weights = [ styleWeight / sc for i in range(sc) ] - style_weights = (c_float*sc) ( *style_weights ) - - - maxPyramidLevels = 0 - for level in range(32,-1,-1): - if min( min(sh, t_h)*pow(2.0, -level), \ - min(sw, t_w)*pow(2.0, -level) ) >= (2*patch_size+1): - maxPyramidLevels = level+1 - break - - if num_pyramid_levels == -1: - num_pyramid_levels = maxPyramidLevels - num_pyramid_levels = min(num_pyramid_levels, maxPyramidLevels) - - num_search_vote_iters_per_level = (c_int*num_pyramid_levels) ( *[num_search_vote_iters]*num_pyramid_levels ) - num_patch_match_iters_per_level = (c_int*num_pyramid_levels) ( *[num_patch_match_iters]*num_pyramid_levels ) - stop_threshold_per_level = (c_int*num_pyramid_levels) ( *[stop_threshold]*num_pyramid_levels ) - - buffer = cached_buffer.get ( (t_h,t_w,sc), None ) - if buffer is None: - buffer = create_string_buffer (t_h*t_w*sc) - cached_buffer[(t_h,t_w,sc)] = buffer - - libebsynth.ebsynthRun (EBSYNTH_BACKEND_CPU, #backend - sc, #numStyleChannels - guides_source.shape[-1], #numGuideChannels - sw, #sourceWidth - sh, #sourceHeight - img_style.tobytes(), #sourceStyleData (width * height * numStyleChannels) bytes, scan-line order - guides_source.tobytes(), #sourceGuideData (width * height * numGuideChannels) bytes, scan-line order - t_w, #targetWidth - t_h, #targetHeight - guides_target.tobytes(), #targetGuideData (width * height * numGuideChannels) bytes, scan-line order - None, #targetModulationData (width * height * numGuideChannels) bytes, scan-line order; pass NULL to switch off the modulation - style_weights, #styleWeights (numStyleChannels) floats - guides_weights, #guideWeights (numGuideChannels) floats - uniformity_weight, #uniformityWeight reasonable values are between 500-15000, 3500 is a good default - patch_size, #patchSize odd sizes only, use 5 for 5x5 patch, 7 for 7x7, etc. - EBSYNTH_VOTEMODE_PLAIN, #voteMode use VOTEMODE_WEIGHTED for sharper result - num_pyramid_levels, #numPyramidLevels - - num_search_vote_iters_per_level, #numSearchVoteItersPerLevel how many search/vote iters to perform at each level (array of ints, coarse first, fine last) - num_patch_match_iters_per_level, #numPatchMatchItersPerLevel how many Patch-Match iters to perform at each level (array of ints, coarse first, fine last) - stop_threshold_per_level, #stopThresholdPerLevel stop improving pixel when its change since last iteration falls under this threshold - 1 if extraPass3x3 else 0, #extraPass3x3 perform additional polishing pass with 3x3 patches at the finest level, use 0 to disable - None, #outputNnfData (width * height * 2) ints, scan-line order; pass NULL to ignore - buffer #outputImageData (width * height * numStyleChannels) bytes, scan-line order - ) - - return np.frombuffer(buffer, dtype=np.uint8).reshape ( (t_h,t_w,sc) ).copy() - -#transfer color from source to target -def color_transfer(img_source, img_target): - guides = [( cv2.cvtColor(img_source, cv2.COLOR_BGR2GRAY), - cv2.cvtColor(img_target, cv2.COLOR_BGR2GRAY), - 1 ) ] - - h,w,c = img_source.shape - result = [] - for i in range(c): - result += [ - run( img_source[...,i:i+1] , guides=guides, - patch_size=11, - num_pyramid_levels=40, - num_search_vote_iters = 6, - num_patch_match_iters = 4, - stop_threshold = 5, - uniformity_weight=500.0, - extraPass3x3=True, - ) - ] - - return np.concatenate( result, axis=-1 ) diff --git a/facelib/DLIBExtractor.py b/facelib/DLIBExtractor.py deleted file mode 100644 index a91164d..0000000 --- a/facelib/DLIBExtractor.py +++ /dev/null @@ -1,40 +0,0 @@ -import numpy as np -import os -import cv2 - -from pathlib import Path - -class DLIBExtractor(object): - def __init__(self, dlib): - self.scale_to = 1850 - #3100 eats ~1.687GB VRAM on 2GB 730 desktop card, but >4Gb on 6GB card, - #but 3100 doesnt work on 2GB 850M notebook card, I cant understand this behaviour - #1850 works on 2GB 850M notebook card, works faster than 3100, produces good result - self.dlib = dlib - - def __enter__(self): - self.dlib_cnn_face_detector = self.dlib.cnn_face_detection_model_v1( str(Path(__file__).parent / "mmod_human_face_detector.dat") ) - self.dlib_cnn_face_detector ( np.zeros ( (self.scale_to, self.scale_to, 3), dtype=np.uint8), 0 ) - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - del self.dlib_cnn_face_detector - return False #pass exception between __enter__ and __exit__ to outter level - - def extract_from_bgr (self, input_image): - input_image = input_image[:,:,::-1].copy() - (h, w, ch) = input_image.shape - - detected_faces = [] - input_scale = self.scale_to / (w if w > h else h) - input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) - detected_faces = self.dlib_cnn_face_detector(input_image, 0) - - result = [] - for d_rect in detected_faces: - if type(d_rect) == self.dlib.mmod_rectangle: - d_rect = d_rect.rect - left, top, right, bottom = d_rect.left(), d_rect.top(), d_rect.right(), d_rect.bottom() - result.append ( (int(left/input_scale), int(top/input_scale), int(right/input_scale), int(bottom/input_scale)) ) - - return result diff --git a/facelib/2DFAN-4.h5 b/facelib/FAN.npy similarity index 98% rename from facelib/2DFAN-4.h5 rename to facelib/FAN.npy index ac782ad..ba55f15 100644 Binary files a/facelib/2DFAN-4.h5 and b/facelib/FAN.npy differ diff --git a/facelib/FANExtractor.py b/facelib/FANExtractor.py index 9fd9b02..3429172 100644 --- a/facelib/FANExtractor.py +++ b/facelib/FANExtractor.py @@ -7,29 +7,159 @@ import numpy as np from numpy import linalg as npla from facelib import FaceType, LandmarksProcessor -from nnlib import nnlib +from core.leras import nn """ ported from https://github.com/1adrianb/face-alignment """ class FANExtractor(object): - def __init__ (self): - pass + def __init__ (self, place_model_on_cpu=False): + model_path = Path(__file__).parent / "FAN.npy" + if not model_path.exists(): + raise Exception("Unable to load FANExtractor model") - def __enter__(self): - keras_model_path = Path(__file__).parent / "2DFAN-4.h5" - if not keras_model_path.exists(): - return None + nn.initialize() + tf = nn.tf - exec( nnlib.import_all(), locals(), globals() ) - self.model = FANExtractor.BuildModel() - self.model.load_weights(str(keras_model_path)) + class ConvBlock(nn.ModelBase): + def on_build(self, in_planes, out_planes): + self.in_planes = in_planes + self.out_planes = out_planes - return self + self.bn1 = nn.BatchNorm2D(in_planes) + self.conv1 = nn.Conv2D (in_planes, out_planes/2, kernel_size=3, strides=1, padding='SAME', use_bias=False ) - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - del self.model - return False #pass exception between __enter__ and __exit__ to outter level + self.bn2 = nn.BatchNorm2D(out_planes/2) + self.conv2 = nn.Conv2D (out_planes/2, out_planes/4, kernel_size=3, strides=1, padding='SAME', use_bias=False ) + + self.bn3 = nn.BatchNorm2D(out_planes/4) + self.conv3 = nn.Conv2D (out_planes/4, out_planes/4, kernel_size=3, strides=1, padding='SAME', use_bias=False ) + + if self.in_planes != self.out_planes: + self.down_bn1 = nn.BatchNorm2D(in_planes) + self.down_conv1 = nn.Conv2D (in_planes, out_planes, kernel_size=1, strides=1, padding='VALID', use_bias=False ) + else: + self.down_bn1 = None + self.down_conv1 = None + + def forward(self, input): + x = input + x = self.bn1(x) + x = tf.nn.relu(x) + x = out1 = self.conv1(x) + + x = self.bn2(x) + x = tf.nn.relu(x) + x = out2 = self.conv2(x) + + x = self.bn3(x) + x = tf.nn.relu(x) + x = out3 = self.conv3(x) + x = tf.concat ([out1, out2, out3], axis=-1) + + if self.in_planes != self.out_planes: + downsample = self.down_bn1(input) + downsample = tf.nn.relu (downsample) + downsample = self.down_conv1 (downsample) + x = x + downsample + else: + x = x + input + + return x + + class HourGlass (nn.ModelBase): + def on_build(self, in_planes, depth): + self.b1 = ConvBlock (in_planes, 256) + self.b2 = ConvBlock (in_planes, 256) + + if depth > 1: + self.b2_plus = HourGlass(256, depth-1) + else: + self.b2_plus = ConvBlock(256, 256) + + self.b3 = ConvBlock(256, 256) + + def forward(self, input): + up1 = self.b1(input) + + low1 = tf.nn.avg_pool(input, [1,2,2,1], [1,2,2,1], 'VALID') + low1 = self.b2 (low1) + + low2 = self.b2_plus(low1) + low3 = self.b3(low2) + + up2 = nn.tf_upsample2d(low3) + + return up1+up2 + + class FAN (nn.ModelBase): + def __init__(self): + super().__init__(name='FAN') + + def on_build(self): + self.conv1 = nn.Conv2D (3, 64, kernel_size=7, strides=2, padding='SAME') + self.bn1 = nn.BatchNorm2D(64) + + self.conv2 = ConvBlock(64, 128) + self.conv3 = ConvBlock(128, 128) + self.conv4 = ConvBlock(128, 256) + + self.m = [] + self.top_m = [] + self.conv_last = [] + self.bn_end = [] + self.l = [] + self.bl = [] + self.al = [] + for i in range(4): + self.m += [ HourGlass(256, 4) ] + self.top_m += [ ConvBlock(256, 256) ] + + self.conv_last += [ nn.Conv2D (256, 256, kernel_size=1, strides=1, padding='VALID') ] + self.bn_end += [ nn.BatchNorm2D(256) ] + + self.l += [ nn.Conv2D (256, 68, kernel_size=1, strides=1, padding='VALID') ] + + if i < 4-1: + self.bl += [ nn.Conv2D (256, 256, kernel_size=1, strides=1, padding='VALID') ] + self.al += [ nn.Conv2D (68, 256, kernel_size=1, strides=1, padding='VALID') ] + + def forward(self, inp) : + x, = inp + x = self.conv1(x) + x = self.bn1(x) + x = tf.nn.relu(x) + + x = self.conv2(x) + x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], 'VALID') + x = self.conv3(x) + x = self.conv4(x) + + outputs = [] + previous = x + for i in range(4): + ll = self.m[i] (previous) + ll = self.top_m[i] (ll) + ll = self.conv_last[i] (ll) + ll = self.bn_end[i] (ll) + ll = tf.nn.relu(ll) + tmp_out = self.l[i](ll) + outputs.append(tmp_out) + if i < 4 - 1: + ll = self.bl[i](ll) + previous = previous + ll + self.al[i](tmp_out) + return outputs[-1] + + e = None + if place_model_on_cpu: + e = tf.device("/CPU:0") + + if e is not None: e.__enter__() + self.model = FAN() + self.model.load_weights(str(model_path)) + if e is not None: e.__exit__(None,None,None) + + self.model.build_for_run ([ ( tf.float32, (256,256,3) ) ]) def extract (self, input_image, rects, second_pass_extractor=None, is_bgr=True, multi_sample=False): if len(rects) == 0: @@ -63,13 +193,13 @@ class FANExtractor(object): images += [ self.crop(input_image, c, scale) ] images = np.stack (images) - images = images.astype(np.float32) / 255.0 + images = images.astype(np.float32) / 255.0 predicted = [] for i in range( len(images) ): - predicted += [ self.model.predict ( images[i][None,...] ).transpose (0,3,1,2)[0] ] + predicted += [ self.model.run ( [ images[i][None,...] ] ).transpose (0,3,1,2)[0] ] - predicted = np.stack(predicted) + predicted = np.stack(predicted) for i, pred in enumerate(predicted): ptss += [ self.get_pts_from_predict ( pred, centers[i], scale) ] @@ -144,81 +274,3 @@ class FANExtractor(object): c += 0.5 return np.array( [ self.transform (c[i], center, scale, a_w) for i in range(a_ch) ] ) - - @staticmethod - def BuildModel(): - def ConvBlock(out_planes, input): - in_planes = K.int_shape(input)[-1] - x = input - x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x) - x = ReLU() (x) - x = out1 = Conv2D( int(out_planes/2), kernel_size=3, strides=1, padding='valid', use_bias = False) (ZeroPadding2D(1)(x)) - - x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x) - x = ReLU() (x) - x = out2 = Conv2D( int(out_planes/4), kernel_size=3, strides=1, padding='valid', use_bias = False) (ZeroPadding2D(1)(x)) - - x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x) - x = ReLU() (x) - x = out3 = Conv2D( int(out_planes/4), kernel_size=3, strides=1, padding='valid', use_bias = False) (ZeroPadding2D(1)(x)) - - x = Concatenate()([out1, out2, out3]) - - if in_planes != out_planes: - downsample = BatchNormalization(momentum=0.1, epsilon=1e-05)(input) - downsample = ReLU() (downsample) - downsample = Conv2D( out_planes, kernel_size=1, strides=1, padding='valid', use_bias = False) (downsample) - x = Add ()([x, downsample]) - else: - x = Add ()([x, input]) - - - return x - - def HourGlass (depth, input): - up1 = ConvBlock(256, input) - - low1 = AveragePooling2D (pool_size=2, strides=2, padding='valid' )(input) - low1 = ConvBlock (256, low1) - - if depth > 1: - low2 = HourGlass (depth-1, low1) - else: - low2 = ConvBlock(256, low1) - - low3 = ConvBlock(256, low2) - - up2 = UpSampling2D(size=2) (low3) - return Add() ( [up1, up2] ) - - FAN_Input = Input ( (256, 256, 3) ) - - x = FAN_Input - - x = Conv2D (64, kernel_size=7, strides=2, padding='valid')(ZeroPadding2D(3)(x)) - x = BatchNormalization(momentum=0.1, epsilon=1e-05)(x) - x = ReLU()(x) - - x = ConvBlock (128, x) - x = AveragePooling2D (pool_size=2, strides=2, padding='valid') (x) - x = ConvBlock (128, x) - x = ConvBlock (256, x) - - outputs = [] - previous = x - for i in range(4): - ll = HourGlass (4, previous) - ll = ConvBlock (256, ll) - - ll = Conv2D(256, kernel_size=1, strides=1, padding='valid') (ll) - ll = BatchNormalization(momentum=0.1, epsilon=1e-05)(ll) - ll = ReLU() (ll) - - tmp_out = Conv2D(68, kernel_size=1, strides=1, padding='valid') (ll) - outputs.append(tmp_out) - - if i < 4 - 1: - ll = Conv2D(256, kernel_size=1, strides=1, padding='valid') (ll) - previous = Add() ( [previous, ll, KL.Conv2D(256, kernel_size=1, strides=1, padding='valid') (tmp_out) ] ) - - return Model(FAN_Input, outputs[-1] ) diff --git a/nnlib/FANSeg_256_full_face.h5 b/facelib/FANSeg_256_full_face.npy similarity index 99% rename from nnlib/FANSeg_256_full_face.h5 rename to facelib/FANSeg_256_full_face.npy index 6886504..53a6664 100644 Binary files a/nnlib/FANSeg_256_full_face.h5 and b/facelib/FANSeg_256_full_face.npy differ diff --git a/facelib/FaceEnhancer.h5 b/facelib/FaceEnhancer.npy similarity index 99% rename from facelib/FaceEnhancer.h5 rename to facelib/FaceEnhancer.npy index 201105b..1890f42 100644 Binary files a/facelib/FaceEnhancer.h5 and b/facelib/FaceEnhancer.npy differ diff --git a/facelib/FaceEnhancer.py b/facelib/FaceEnhancer.py index c3b2016..88c4da4 100644 --- a/facelib/FaceEnhancer.py +++ b/facelib/FaceEnhancer.py @@ -4,151 +4,321 @@ from pathlib import Path import cv2 import numpy as np - +from core.leras import nn class FaceEnhancer(object): """ x4 face enhancer """ - def __init__(self): - from nnlib import nnlib - exec( nnlib.import_all(), locals(), globals() ) + def __init__(self, place_model_on_cpu=False): + nn.initialize() + tf = nn.tf - model_path = Path(__file__).parent / "FaceEnhancer.h5" + class FaceEnhancer (nn.ModelBase): + def __init__(self, name='FaceEnhancer'): + super().__init__(name=name) + + def on_build(self): + self.conv1 = nn.Conv2D (3, 64, kernel_size=3, strides=1, padding='SAME') + + self.dense1 = nn.Dense (1, 64, use_bias=False) + self.dense2 = nn.Dense (1, 64, use_bias=False) + + self.e0_conv0 = nn.Conv2D (64, 64, kernel_size=3, strides=1, padding='SAME') + self.e0_conv1 = nn.Conv2D (64, 64, kernel_size=3, strides=1, padding='SAME') + + self.e1_conv0 = nn.Conv2D (64, 112, kernel_size=3, strides=1, padding='SAME') + self.e1_conv1 = nn.Conv2D (112, 112, kernel_size=3, strides=1, padding='SAME') + + self.e2_conv0 = nn.Conv2D (112, 192, kernel_size=3, strides=1, padding='SAME') + self.e2_conv1 = nn.Conv2D (192, 192, kernel_size=3, strides=1, padding='SAME') + + self.e3_conv0 = nn.Conv2D (192, 336, kernel_size=3, strides=1, padding='SAME') + self.e3_conv1 = nn.Conv2D (336, 336, kernel_size=3, strides=1, padding='SAME') + + self.e4_conv0 = nn.Conv2D (336, 512, kernel_size=3, strides=1, padding='SAME') + self.e4_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME') + + self.center_conv0 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME') + self.center_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME') + self.center_conv2 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME') + self.center_conv3 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME') + + self.d4_conv0 = nn.Conv2D (1024, 512, kernel_size=3, strides=1, padding='SAME') + self.d4_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME') + + self.d3_conv0 = nn.Conv2D (848, 512, kernel_size=3, strides=1, padding='SAME') + self.d3_conv1 = nn.Conv2D (512, 512, kernel_size=3, strides=1, padding='SAME') + + self.d2_conv0 = nn.Conv2D (704, 288, kernel_size=3, strides=1, padding='SAME') + self.d2_conv1 = nn.Conv2D (288, 288, kernel_size=3, strides=1, padding='SAME') + + self.d1_conv0 = nn.Conv2D (400, 160, kernel_size=3, strides=1, padding='SAME') + self.d1_conv1 = nn.Conv2D (160, 160, kernel_size=3, strides=1, padding='SAME') + + self.d0_conv0 = nn.Conv2D (224, 96, kernel_size=3, strides=1, padding='SAME') + self.d0_conv1 = nn.Conv2D (96, 96, kernel_size=3, strides=1, padding='SAME') + + self.out1x_conv0 = nn.Conv2D (96, 48, kernel_size=3, strides=1, padding='SAME') + self.out1x_conv1 = nn.Conv2D (48, 3, kernel_size=3, strides=1, padding='SAME') + + self.dec2x_conv0 = nn.Conv2D (96, 96, kernel_size=3, strides=1, padding='SAME') + self.dec2x_conv1 = nn.Conv2D (96, 96, kernel_size=3, strides=1, padding='SAME') + + self.out2x_conv0 = nn.Conv2D (96, 48, kernel_size=3, strides=1, padding='SAME') + self.out2x_conv1 = nn.Conv2D (48, 3, kernel_size=3, strides=1, padding='SAME') + + self.dec4x_conv0 = nn.Conv2D (96, 72, kernel_size=3, strides=1, padding='SAME') + self.dec4x_conv1 = nn.Conv2D (72, 72, kernel_size=3, strides=1, padding='SAME') + + self.out4x_conv0 = nn.Conv2D (72, 36, kernel_size=3, strides=1, padding='SAME') + self.out4x_conv1 = nn.Conv2D (36, 3 , kernel_size=3, strides=1, padding='SAME') + + def forward(self, inp): + bgr, param, param1 = inp + + x = self.conv1(bgr) + a = self.dense1(param) + a = tf.reshape(a, (-1,1,1,64) ) + + b = self.dense2(param1) + b = tf.reshape(b, (-1,1,1,64) ) + + x = tf.nn.leaky_relu(x+a+b, 0.1) + + x = tf.nn.leaky_relu(self.e0_conv0(x), 0.1) + x = e0 = tf.nn.leaky_relu(self.e0_conv1(x), 0.1) + + x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + x = tf.nn.leaky_relu(self.e1_conv0(x), 0.1) + x = e1 = tf.nn.leaky_relu(self.e1_conv1(x), 0.1) + + x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + x = tf.nn.leaky_relu(self.e2_conv0(x), 0.1) + x = e2 = tf.nn.leaky_relu(self.e2_conv1(x), 0.1) + + x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + x = tf.nn.leaky_relu(self.e3_conv0(x), 0.1) + x = e3 = tf.nn.leaky_relu(self.e3_conv1(x), 0.1) + + x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + x = tf.nn.leaky_relu(self.e4_conv0(x), 0.1) + x = e4 = tf.nn.leaky_relu(self.e4_conv1(x), 0.1) + + x = tf.nn.avg_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + x = tf.nn.leaky_relu(self.center_conv0(x), 0.1) + x = tf.nn.leaky_relu(self.center_conv1(x), 0.1) + x = tf.nn.leaky_relu(self.center_conv2(x), 0.1) + x = tf.nn.leaky_relu(self.center_conv3(x), 0.1) + + x = tf.concat( [nn.tf_upsample2d_bilinear(x), e4], -1 ) + x = tf.nn.leaky_relu(self.d4_conv0(x), 0.1) + x = tf.nn.leaky_relu(self.d4_conv1(x), 0.1) + + x = tf.concat( [nn.tf_upsample2d_bilinear(x), e3], -1 ) + x = tf.nn.leaky_relu(self.d3_conv0(x), 0.1) + x = tf.nn.leaky_relu(self.d3_conv1(x), 0.1) + + x = tf.concat( [nn.tf_upsample2d_bilinear(x), e2], -1 ) + x = tf.nn.leaky_relu(self.d2_conv0(x), 0.1) + x = tf.nn.leaky_relu(self.d2_conv1(x), 0.1) + + x = tf.concat( [nn.tf_upsample2d_bilinear(x), e1], -1 ) + x = tf.nn.leaky_relu(self.d1_conv0(x), 0.1) + x = tf.nn.leaky_relu(self.d1_conv1(x), 0.1) + + x = tf.concat( [nn.tf_upsample2d_bilinear(x), e0], -1 ) + x = tf.nn.leaky_relu(self.d0_conv0(x), 0.1) + x = d0 = tf.nn.leaky_relu(self.d0_conv1(x), 0.1) + + x = tf.nn.leaky_relu(self.out1x_conv0(x), 0.1) + x = self.out1x_conv1(x) + out1x = bgr + tf.nn.tanh(x) + + x = d0 + x = tf.nn.leaky_relu(self.dec2x_conv0(x), 0.1) + x = tf.nn.leaky_relu(self.dec2x_conv1(x), 0.1) + x = d2x = nn.tf_upsample2d_bilinear(x) + + x = tf.nn.leaky_relu(self.out2x_conv0(x), 0.1) + x = self.out2x_conv1(x) + + out2x = nn.tf_upsample2d_bilinear(out1x) + tf.nn.tanh(x) + + x = d2x + x = tf.nn.leaky_relu(self.dec4x_conv0(x), 0.1) + x = tf.nn.leaky_relu(self.dec4x_conv1(x), 0.1) + x = d4x = nn.tf_upsample2d_bilinear(x) + + x = tf.nn.leaky_relu(self.out4x_conv0(x), 0.1) + x = self.out4x_conv1(x) + + out4x = nn.tf_upsample2d_bilinear(out2x) + tf.nn.tanh(x) + + return out4x + + model_path = Path(__file__).parent / "FaceEnhancer.npy" if not model_path.exists(): - return - - bgr_inp = Input ( (192,192,3) ) - t_param_inp = Input ( (1,) ) - t_param1_inp = Input ( (1,) ) - x = Conv2D (64, 3, strides=1, padding='same' )(bgr_inp) - - a = Dense (64, use_bias=False) ( t_param_inp ) - a = Reshape( (1,1,64) )(a) - b = Dense (64, use_bias=False ) ( t_param1_inp ) - b = Reshape( (1,1,64) )(b) - x = Add()([x,a,b]) - - x = LeakyReLU(0.1)(x) + raise Exception("Unable to load FaceEnhancer.npy") - x = LeakyReLU(0.1)(Conv2D (64, 3, strides=1, padding='same' )(x)) - x = e0 = LeakyReLU(0.1)(Conv2D (64, 3, strides=1, padding='same')(x)) - - x = AveragePooling2D()(x) - x = LeakyReLU(0.1)(Conv2D (112, 3, strides=1, padding='same')(x)) - x = e1 = LeakyReLU(0.1)(Conv2D (112, 3, strides=1, padding='same')(x)) - - x = AveragePooling2D()(x) - x = LeakyReLU(0.1)(Conv2D (192, 3, strides=1, padding='same')(x)) - x = e2 = LeakyReLU(0.1)(Conv2D (192, 3, strides=1, padding='same')(x)) - - x = AveragePooling2D()(x) - x = LeakyReLU(0.1)(Conv2D (336, 3, strides=1, padding='same')(x)) - x = e3 = LeakyReLU(0.1)(Conv2D (336, 3, strides=1, padding='same')(x)) - - x = AveragePooling2D()(x) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - x = e4 = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - - x = AveragePooling2D()(x) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) + e = tf.device("/CPU:0") if place_model_on_cpu else None + if e is not None: e.__enter__() + self.model = FaceEnhancer() + self.model.load_weights (model_path) + if e is not None: e.__exit__(None,None,None) - x = Concatenate()([ BilinearInterpolation()(x), e4 ]) - - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - - x = Concatenate()([ BilinearInterpolation()(x), e3 ]) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (512, 3, strides=1, padding='same')(x)) - - x = Concatenate()([ BilinearInterpolation()(x), e2 ]) - x = LeakyReLU(0.1)(Conv2D (288, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (288, 3, strides=1, padding='same')(x)) - - x = Concatenate()([ BilinearInterpolation()(x), e1 ]) - x = LeakyReLU(0.1)(Conv2D (160, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (160, 3, strides=1, padding='same')(x)) - - x = Concatenate()([ BilinearInterpolation()(x), e0 ]) - x = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x)) - x = d0 = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x)) - - x = LeakyReLU(0.1)(Conv2D (48, 3, strides=1, padding='same')(x)) - - x = Conv2D (3, 3, strides=1, padding='same', activation='tanh')(x) - out1x = Add()([bgr_inp, x]) - - x = d0 - x = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (96, 3, strides=1, padding='same')(x)) - x = d2x = BilinearInterpolation()(x) - - x = LeakyReLU(0.1)(Conv2D (48, 3, strides=1, padding='same')(x)) - x = Conv2D (3, 3, strides=1, padding='same', activation='tanh')(x) - - out2x = Add()([BilinearInterpolation()(out1x), x]) - - x = d2x - x = LeakyReLU(0.1)(Conv2D (72, 3, strides=1, padding='same')(x)) - x = LeakyReLU(0.1)(Conv2D (72, 3, strides=1, padding='same')(x)) - x = d4x = BilinearInterpolation()(x) - - x = LeakyReLU(0.1)(Conv2D (36, 3, strides=1, padding='same')(x)) - x = Conv2D (3, 3, strides=1, padding='same', activation='tanh')(x) - out4x = Add()([BilinearInterpolation()(out2x), x ]) - - self.model = keras.models.Model ( [bgr_inp,t_param_inp,t_param1_inp], [out4x] ) - self.model.load_weights (str(model_path)) + self.model.build_for_run ([ (tf.float32, (192,192,3) ), + (tf.float32, (1,) ), + (tf.float32, (1,) ), + ]) def enhance (self, inp_img, is_tanh=False, preserve_size=True): if not is_tanh: inp_img = np.clip( inp_img * 2 -1, -1, 1 ) - + param = np.array([0.2]) - param1 = np.array([1.0]) + param1 = np.array([1.0]) up_res = 4 patch_size = 192 patch_size_half = patch_size // 2 - - h,w,c = inp_img.shape + + ih,iw,ic = inp_img.shape + h,w,c = ih,iw,ic + th,tw = h*up_res, w*up_res + + t_padding = 0 + b_padding = 0 + l_padding = 0 + r_padding = 0 + + if h < patch_size: + t_padding = (patch_size-h)//2 + b_padding = (patch_size-h) - t_padding + + if w < patch_size: + l_padding = (patch_size-w)//2 + r_padding = (patch_size-w) - l_padding + + if t_padding != 0: + inp_img = np.concatenate ([ np.zeros ( (t_padding,w,c), dtype=np.float32 ), inp_img ], axis=0 ) + h,w,c = inp_img.shape + + if b_padding != 0: + inp_img = np.concatenate ([ inp_img, np.zeros ( (b_padding,w,c), dtype=np.float32 ) ], axis=0 ) + h,w,c = inp_img.shape + + if l_padding != 0: + inp_img = np.concatenate ([ np.zeros ( (h,l_padding,c), dtype=np.float32 ), inp_img ], axis=1 ) + h,w,c = inp_img.shape + + if r_padding != 0: + inp_img = np.concatenate ([ inp_img, np.zeros ( (h,r_padding,c), dtype=np.float32 ) ], axis=1 ) + h,w,c = inp_img.shape + + i_max = w-patch_size+1 - j_max = h-patch_size+1 - + j_max = h-patch_size+1 + final_img = np.zeros ( (h*up_res,w*up_res,c), dtype=np.float32 ) final_img_div = np.zeros ( (h*up_res,w*up_res,1), dtype=np.float32 ) - + x = np.concatenate ( [ np.linspace (0,1,patch_size_half*up_res), np.linspace (1,0,patch_size_half*up_res) ] ) x,y = np.meshgrid(x,x) patch_mask = (x*y)[...,None] - + j=0 while j < j_max: i = 0 - while i < i_max: - patch_img = inp_img[j:j+patch_size, i:i+patch_size,:] - x = self.model.predict( [ patch_img[None,...], param, param1 ] )[0] + while i < i_max: + patch_img = inp_img[j:j+patch_size, i:i+patch_size,:] + x = self.model.run( [ patch_img[None,...], [param], [param1] ] )[0] final_img [j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += x*patch_mask final_img_div[j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += patch_mask if i == i_max-1: break - i = min( i+patch_size_half, i_max-1) + i = min( i+patch_size_half, i_max-1) if j == j_max-1: break j = min( j+patch_size_half, j_max-1) - + final_img_div[final_img_div==0] = 1.0 final_img /= final_img_div - + + if t_padding+b_padding+l_padding+r_padding != 0: + final_img = final_img [t_padding*up_res:(h-b_padding)*up_res, l_padding*up_res:(w-r_padding)*up_res,:] + if preserve_size: - final_img = cv2.resize (final_img, (w,h), cv2.INTER_LANCZOS4) - + final_img = cv2.resize (final_img, (iw,ih), cv2.INTER_LANCZOS4) + if not is_tanh: final_img = np.clip( final_img/2+0.5, 0, 1 ) - + return final_img + + +""" + + def enhance (self, inp_img, is_tanh=False, preserve_size=True): + if not is_tanh: + inp_img = np.clip( inp_img * 2 -1, -1, 1 ) + + param = np.array([0.2]) + param1 = np.array([1.0]) + up_res = 4 + patch_size = 192 + patch_size_half = patch_size // 2 + + h,w,c = inp_img.shape + + th,tw = h*up_res, w*up_res + + preupscale_rate = 1.0 + + if h < patch_size or w < patch_size: + preupscale_rate = 1.0 / ( max(h,w) / patch_size ) + + if preupscale_rate != 1.0: + inp_img = cv2.resize (inp_img, ( int(w*preupscale_rate), int(h*preupscale_rate) ), cv2.INTER_LANCZOS4) + h,w,c = inp_img.shape + + i_max = w-patch_size+1 + j_max = h-patch_size+1 + + final_img = np.zeros ( (h*up_res,w*up_res,c), dtype=np.float32 ) + final_img_div = np.zeros ( (h*up_res,w*up_res,1), dtype=np.float32 ) + + x = np.concatenate ( [ np.linspace (0,1,patch_size_half*up_res), np.linspace (1,0,patch_size_half*up_res) ] ) + x,y = np.meshgrid(x,x) + patch_mask = (x*y)[...,None] + + j=0 + while j < j_max: + i = 0 + while i < i_max: + patch_img = inp_img[j:j+patch_size, i:i+patch_size,:] + x = self.model.run( [ patch_img[None,...], [param], [param1] ] )[0] + final_img [j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += x*patch_mask + final_img_div[j*up_res:(j+patch_size)*up_res, i*up_res:(i+patch_size)*up_res,:] += patch_mask + if i == i_max-1: + break + i = min( i+patch_size_half, i_max-1) + if j == j_max-1: + break + j = min( j+patch_size_half, j_max-1) + + final_img_div[final_img_div==0] = 1.0 + final_img /= final_img_div + + if preserve_size: + final_img = cv2.resize (final_img, (w,h), cv2.INTER_LANCZOS4) + else: + if preupscale_rate != 1.0: + final_img = cv2.resize (final_img, (tw,th), cv2.INTER_LANCZOS4) + + if not is_tanh: + final_img = np.clip( final_img/2+0.5, 0, 1 ) + + return final_img +""" \ No newline at end of file diff --git a/facelib/LandmarksProcessor.py b/facelib/LandmarksProcessor.py index 7d9f706..560ae29 100644 --- a/facelib/LandmarksProcessor.py +++ b/facelib/LandmarksProcessor.py @@ -6,11 +6,11 @@ import cv2 import numpy as np import numpy.linalg as npla -import imagelib -import mathlib +from core import imagelib +from core import mathlib from facelib import FaceType -from imagelib import IEPolys -from mathlib.umeyama import umeyama +from core.imagelib import IEPolys +from core.mathlib.umeyama import umeyama landmarks_2D = np.array([ [ 0.000213256, 0.106454 ], #17 @@ -665,8 +665,10 @@ def calc_face_yaw(landmarks): r = ( (landmarks[16][0]-landmarks[27][0]) + (landmarks[15][0]-landmarks[28][0]) + (landmarks[14][0]-landmarks[29][0]) ) / 3.0 return float(r-l) -#returns pitch,yaw,roll [-1...+1] def estimate_pitch_yaw_roll(aligned_256px_landmarks): + """ + returns pitch,yaw,roll [-pi...+pi] + """ shape = (256,256) focal_length = shape[1] camera_center = (shape[1] / 2, shape[0] / 2) @@ -682,7 +684,8 @@ def estimate_pitch_yaw_roll(aligned_256px_landmarks): np.zeros((4, 1)) ) pitch, yaw, roll = mathlib.rotationMatrixToEulerAngles( cv2.Rodrigues(rotation_vector)[0] ) - pitch = np.clip ( pitch/1.30, -1.0, 1.0 ) - yaw = np.clip ( yaw / 1.11, -1.0, 1.0 ) - roll = np.clip ( roll/3.15, -1.0, 1.0 ) #todo radians + pitch = np.clip ( pitch, -math.pi, math.pi ) + yaw = np.clip ( yaw , -math.pi, math.pi ) + roll = np.clip ( roll, -math.pi, math.pi ) + return -pitch, yaw, roll diff --git a/facelib/MTCExtractor.py b/facelib/MTCExtractor.py deleted file mode 100644 index c524ab9..0000000 --- a/facelib/MTCExtractor.py +++ /dev/null @@ -1,350 +0,0 @@ -import numpy as np -import os -import cv2 - -from pathlib import Path -from nnlib import nnlib - -class MTCExtractor(object): - def __init__(self): - self.scale_to = 1920 - - self.min_face_size = self.scale_to * 0.042 - self.thresh1 = 0.7 - self.thresh2 = 0.85 - self.thresh3 = 0.6 - self.scale_factor = 0.95 - - exec( nnlib.import_all(), locals(), globals() ) - PNet_Input = Input ( (None, None,3) ) - x = PNet_Input - x = Conv2D (10, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="PReLU1" )(x) - x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) - x = Conv2D (16, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="PReLU2" )(x) - x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="PReLU3" )(x) - prob = Conv2D (2, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv41")(x) - prob = Softmax()(prob) - x = Conv2D (4, kernel_size=(1,1), strides=(1,1), padding='valid', name="conv42")(x) - - PNet_model = Model(PNet_Input, [x,prob] ) - PNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_pnet.h5').__str__() ) - - RNet_Input = Input ( (24, 24, 3) ) - x = RNet_Input - x = Conv2D (28, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="prelu1" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) - x = Conv2D (48, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="prelu2" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) - x = Conv2D (64, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="prelu3" )(x) - x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) - x = Dense (128, name='conv4')(x) - x = PReLU (name="prelu4" )(x) - prob = Dense (2, name='conv51')(x) - prob = Softmax()(prob) - x = Dense (4, name='conv52')(x) - RNet_model = Model(RNet_Input, [x,prob] ) - RNet_model.load_weights ( (Path(__file__).parent / 'mtcnn_rnet.h5').__str__() ) - - ONet_Input = Input ( (48, 48, 3) ) - x = ONet_Input - x = Conv2D (32, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv1")(x) - x = PReLU (shared_axes=[1,2], name="prelu1" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='same' ) (x) - x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv2")(x) - x = PReLU (shared_axes=[1,2], name="prelu2" )(x) - x = MaxPooling2D( pool_size=(3,3), strides=(2,2), padding='valid' ) (x) - x = Conv2D (64, kernel_size=(3,3), strides=(1,1), padding='valid', name="conv3")(x) - x = PReLU (shared_axes=[1,2], name="prelu3" )(x) - x = MaxPooling2D( pool_size=(2,2), strides=(2,2), padding='same' ) (x) - x = Conv2D (128, kernel_size=(2,2), strides=(1,1), padding='valid', name="conv4")(x) - x = PReLU (shared_axes=[1,2], name="prelu4" )(x) - x = Lambda ( lambda x: K.reshape (x, (-1, np.prod(K.int_shape(x)[1:]),) ), output_shape=(np.prod(K.int_shape(x)[1:]),) ) (x) - x = Dense (256, name='conv5')(x) - x = PReLU (name="prelu5" )(x) - prob = Dense (2, name='conv61')(x) - prob = Softmax()(prob) - x1 = Dense (4, name='conv62')(x) - x2 = Dense (10, name='conv63')(x) - ONet_model = Model(ONet_Input, [x1,x2,prob] ) - ONet_model.load_weights ( (Path(__file__).parent / 'mtcnn_onet.h5').__str__() ) - - self.pnet_fun = K.function ( PNet_model.inputs, PNet_model.outputs ) - self.rnet_fun = K.function ( RNet_model.inputs, RNet_model.outputs ) - self.onet_fun = K.function ( ONet_model.inputs, ONet_model.outputs ) - - def __enter__(self): - faces, pnts = detect_face ( np.zeros ( (self.scale_to, self.scale_to, 3)), self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) - - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def extract (self, input_image, is_bgr=True): - - if is_bgr: - input_image = input_image[:,:,::-1].copy() - is_bgr = False - - (h, w, ch) = input_image.shape - - input_scale = self.scale_to / max(w,h) - input_image = cv2.resize (input_image, ( int(w*input_scale), int(h*input_scale) ), interpolation=cv2.INTER_LINEAR) - - detected_faces, pnts = detect_face ( input_image, self.min_face_size, self.pnet_fun, self.rnet_fun, self.onet_fun, [ self.thresh1, self.thresh2, self.thresh3 ], self.scale_factor ) - detected_faces = [ ( int(face[0]/input_scale), int(face[1]/input_scale), int(face[2]/input_scale), int(face[3]/input_scale)) for face in detected_faces ] - - return detected_faces - -def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): - """Detects faces in an image, and returns bounding boxes and points for them. - img: input image - minsize: minimum faces' size - pnet, rnet, onet: caffemodel - threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold - factor: the factor used to create a scaling pyramid of face sizes to detect in the image. - """ - factor_count=0 - total_boxes=np.empty((0,9)) - points=np.empty(0) - h=img.shape[0] - w=img.shape[1] - minl=np.amin([h, w]) - m=12.0/minsize - minl=minl*m - # create scale pyramid - scales=[] - while minl>=12: - scales += [m*np.power(factor, factor_count)] - minl = minl*factor - factor_count += 1 - # first stage - for scale in scales: - hs=int(np.ceil(h*scale)) - ws=int(np.ceil(w*scale)) - #print ('scale %f %d %d' % (scale, ws,hs)) - im_data = imresample(img, (hs, ws)) - im_data = (im_data-127.5)*0.0078125 - img_x = np.expand_dims(im_data, 0) - img_y = np.transpose(img_x, (0,2,1,3)) - out = pnet([img_y]) - out0 = np.transpose(out[0], (0,2,1,3)) - out1 = np.transpose(out[1], (0,2,1,3)) - - boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) - - # inter-scale nms - pick = nms(boxes.copy(), 0.5, 'Union') - if boxes.size>0 and pick.size>0: - boxes = boxes[pick,:] - total_boxes = np.append(total_boxes, boxes, axis=0) - - numbox = total_boxes.shape[0] - if numbox>0: - pick = nms(total_boxes.copy(), 0.7, 'Union') - total_boxes = total_boxes[pick,:] - regw = total_boxes[:,2]-total_boxes[:,0] - regh = total_boxes[:,3]-total_boxes[:,1] - qq1 = total_boxes[:,0]+total_boxes[:,5]*regw - qq2 = total_boxes[:,1]+total_boxes[:,6]*regh - qq3 = total_boxes[:,2]+total_boxes[:,7]*regw - qq4 = total_boxes[:,3]+total_boxes[:,8]*regh - total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) - total_boxes = rerec(total_boxes.copy()) - total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) - dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) - - numbox = total_boxes.shape[0] - if numbox>0: - # second stage - tempimg = np.zeros((24,24,3,numbox)) - for k in range(0,numbox): - tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) - tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] - if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: - tempimg[:,:,:,k] = imresample(tmp, (24, 24)) - else: - return np.empty() - tempimg = (tempimg-127.5)*0.0078125 - tempimg1 = np.transpose(tempimg, (3,1,0,2)) - out = rnet([tempimg1]) - out0 = np.transpose(out[0]) - out1 = np.transpose(out[1]) - score = out1[1,:] - ipass = np.where(score>threshold[1]) - total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) - mv = out0[:,ipass[0]] - if total_boxes.shape[0]>0: - pick = nms(total_boxes, 0.7, 'Union') - total_boxes = total_boxes[pick,:] - total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) - total_boxes = rerec(total_boxes.copy()) - - numbox = total_boxes.shape[0] - if numbox>0: - # third stage - total_boxes = np.fix(total_boxes).astype(np.int32) - dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) - tempimg = np.zeros((48,48,3,numbox)) - for k in range(0,numbox): - tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) - tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] - if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: - tempimg[:,:,:,k] = imresample(tmp, (48, 48)) - else: - return np.empty() - tempimg = (tempimg-127.5)*0.0078125 - tempimg1 = np.transpose(tempimg, (3,1,0,2)) - out = onet([tempimg1]) - out0 = np.transpose(out[0]) - out1 = np.transpose(out[1]) - out2 = np.transpose(out[2]) - score = out2[1,:] - points = out1 - ipass = np.where(score>threshold[2]) - points = points[:,ipass[0]] - total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) - mv = out0[:,ipass[0]] - - w = total_boxes[:,2]-total_boxes[:,0]+1 - h = total_boxes[:,3]-total_boxes[:,1]+1 - points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 - points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 - if total_boxes.shape[0]>0: - total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) - pick = nms(total_boxes.copy(), 0.7, 'Min') - total_boxes = total_boxes[pick,:] - points = points[:,pick] - - return total_boxes, points - - -# function [boundingbox] = bbreg(boundingbox,reg) -def bbreg(boundingbox,reg): - """Calibrate bounding boxes""" - if reg.shape[1]==1: - reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) - - w = boundingbox[:,2]-boundingbox[:,0]+1 - h = boundingbox[:,3]-boundingbox[:,1]+1 - b1 = boundingbox[:,0]+reg[:,0]*w - b2 = boundingbox[:,1]+reg[:,1]*h - b3 = boundingbox[:,2]+reg[:,2]*w - b4 = boundingbox[:,3]+reg[:,3]*h - boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) - return boundingbox - -def generateBoundingBox(imap, reg, scale, t): - """Use heatmap to generate bounding boxes""" - stride=2 - cellsize=12 - - imap = np.transpose(imap) - dx1 = np.transpose(reg[:,:,0]) - dy1 = np.transpose(reg[:,:,1]) - dx2 = np.transpose(reg[:,:,2]) - dy2 = np.transpose(reg[:,:,3]) - y, x = np.where(imap >= t) - if y.shape[0]==1: - dx1 = np.flipud(dx1) - dy1 = np.flipud(dy1) - dx2 = np.flipud(dx2) - dy2 = np.flipud(dy2) - score = imap[(y,x)] - reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) - if reg.size==0: - reg = np.empty((0,3)) - bb = np.transpose(np.vstack([y,x])) - q1 = np.fix((stride*bb+1)/scale) - q2 = np.fix((stride*bb+cellsize-1+1)/scale) - boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) - return boundingbox, reg - -# function pick = nms(boxes,threshold,type) -def nms(boxes, threshold, method): - if boxes.size==0: - return np.empty((0,3)) - x1 = boxes[:,0] - y1 = boxes[:,1] - x2 = boxes[:,2] - y2 = boxes[:,3] - s = boxes[:,4] - area = (x2-x1+1) * (y2-y1+1) - I = np.argsort(s) - pick = np.zeros_like(s, dtype=np.int16) - counter = 0 - while I.size>0: - i = I[-1] - pick[counter] = i - counter += 1 - idx = I[0:-1] - xx1 = np.maximum(x1[i], x1[idx]) - yy1 = np.maximum(y1[i], y1[idx]) - xx2 = np.minimum(x2[i], x2[idx]) - yy2 = np.minimum(y2[i], y2[idx]) - w = np.maximum(0.0, xx2-xx1+1) - h = np.maximum(0.0, yy2-yy1+1) - inter = w * h - if method == 'Min': - o = inter / np.minimum(area[i], area[idx]) - else: - o = inter / (area[i] + area[idx] - inter) - I = I[np.where(o<=threshold)] - pick = pick[0:counter] - return pick - -# function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) -def pad(total_boxes, w, h): - """Compute the padding coordinates (pad the bounding boxes to square)""" - tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) - tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) - numbox = total_boxes.shape[0] - - dx = np.ones((numbox), dtype=np.int32) - dy = np.ones((numbox), dtype=np.int32) - edx = tmpw.copy().astype(np.int32) - edy = tmph.copy().astype(np.int32) - - x = total_boxes[:,0].copy().astype(np.int32) - y = total_boxes[:,1].copy().astype(np.int32) - ex = total_boxes[:,2].copy().astype(np.int32) - ey = total_boxes[:,3].copy().astype(np.int32) - - tmp = np.where(ex>w) - edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) - ex[tmp] = w - - tmp = np.where(ey>h) - edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) - ey[tmp] = h - - tmp = np.where(x<1) - dx.flat[tmp] = np.expand_dims(2-x[tmp],1) - x[tmp] = 1 - - tmp = np.where(y<1) - dy.flat[tmp] = np.expand_dims(2-y[tmp],1) - y[tmp] = 1 - - return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph - -# function [bboxA] = rerec(bboxA) -def rerec(bboxA): - """Convert bboxA to square.""" - h = bboxA[:,3]-bboxA[:,1] - w = bboxA[:,2]-bboxA[:,0] - l = np.maximum(w, h) - bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 - bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 - bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) - return bboxA - -def imresample(img, sz): - im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_LINEAR) #@UndefinedVariable - return im_data diff --git a/facelib/PoseEstimator.py b/facelib/PoseEstimator.py deleted file mode 100644 index 7939d90..0000000 --- a/facelib/PoseEstimator.py +++ /dev/null @@ -1,302 +0,0 @@ -import os -import pickle -from functools import partial -from pathlib import Path - -import cv2 -import numpy as np - -from interact import interact as io -from nnlib import nnlib - -""" -PoseEstimator estimates pitch, yaw, roll, from FAN aligned face. -trained on https://www.umdfaces.io -based on https://arxiv.org/pdf/1901.06778.pdf HYBRID COARSE-FINE CLASSIFICATION FOR HEAD POSE ESTIMATION -""" - -class PoseEstimator(object): - VERSION = 1 - def __init__ (self, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): - exec( nnlib.import_all(), locals(), globals() ) - self.resolution = resolution - - self.angles = [60, 45, 30, 10, 2] - self.alpha_cat_losses = [7,5,3,1,1] - self.class_nums = [ angle+1 for angle in self.angles ] - self.encoder, self.decoder, self.model_l = PoseEstimator.BuildModels(resolution, class_nums=self.class_nums) - - if weights_file_root is not None: - weights_file_root = Path(weights_file_root) - else: - weights_file_root = Path(__file__).parent - - self.encoder_weights_path = weights_file_root / ('PoseEst_%d_%s_enc.h5' % (resolution, face_type_str) ) - self.decoder_weights_path = weights_file_root / ('PoseEst_%d_%s_dec.h5' % (resolution, face_type_str) ) - self.l_weights_path = weights_file_root / ('PoseEst_%d_%s_l.h5' % (resolution, face_type_str) ) - - self.model_weights_path = weights_file_root / ('PoseEst_%d_%s.h5' % (resolution, face_type_str) ) - - self.input_bgr_shape = (resolution, resolution, 3) - - def ResamplerFunc(input): - mean_t, logvar_t = input - return mean_t + K.exp(0.5*logvar_t)*K.random_normal(K.shape(mean_t)) - - self.BVAEResampler = Lambda ( lambda x: x[0] + K.random_normal(K.shape(x[0])) * K.sqrt(K.exp(0.5*x[1])), - output_shape=K.int_shape(self.encoder.outputs[0])[1:] ) - - inp_t = Input (self.input_bgr_shape) - inp_real_t = Input (self.input_bgr_shape) - inp_pitch_t = Input ( (1,) ) - inp_yaw_t = Input ( (1,) ) - inp_roll_t = Input ( (1,) ) - - - mean_t, logvar_t = self.encoder(inp_t) - - latent_t = self.BVAEResampler([mean_t, logvar_t]) - - if training: - bgr_t = self.decoder (latent_t) - pyrs_t = self.model_l(latent_t) - else: - self.model = Model(inp_t, self.model_l(latent_t) ) - pyrs_t = self.model(inp_t) - - if load_weights: - if training: - self.encoder.load_weights (str(self.encoder_weights_path)) - self.decoder.load_weights (str(self.decoder_weights_path)) - self.model_l.load_weights (str(self.l_weights_path)) - else: - self.model.load_weights (str(self.model_weights_path)) - - else: - def gather_Conv2D_layers(models_list): - conv_weights_list = [] - for model in models_list: - for layer in model.layers: - layer_type = type(layer) - if layer_type == keras.layers.Conv2D: - conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights - elif layer_type == keras.engine.training.Model: - conv_weights_list += gather_Conv2D_layers ([layer]) - return conv_weights_list - - CAInitializerMP ( gather_Conv2D_layers( [self.encoder, self.decoder] ) ) - - - if training: - inp_pyrs_t = [] - for class_num in self.class_nums: - inp_pyrs_t += [ Input ((3,)) ] - - pyr_loss = [] - - for i,class_num in enumerate(self.class_nums): - a = self.alpha_cat_losses[i] - pyr_loss += [ a*K.mean( K.square ( inp_pyrs_t[i] - pyrs_t[i]) ) ] - - def BVAELoss(beta=4): - def func(input): - mean_t, logvar_t = input - return beta * K.mean ( K.sum( 0.5*(K.exp(logvar_t)+ K.square(mean_t)-logvar_t-1), axis=1) ) - return func - - BVAE_loss = BVAELoss()([mean_t, logvar_t]) - - bgr_loss = K.mean(K.sum(K.abs(inp_real_t-bgr_t), axis=[1,2,3])) - - G_loss = BVAE_loss+bgr_loss - pyr_loss = sum(pyr_loss) - - - self.train = K.function ([inp_t, inp_real_t], - [ G_loss ], Adam(lr=0.0005, beta_1=0.9, beta_2=0.999).get_updates( G_loss, self.encoder.trainable_weights+self.decoder.trainable_weights ) ) - - self.train_l = K.function ([inp_t] + inp_pyrs_t, - [pyr_loss], Adam(lr=0.0001).get_updates( pyr_loss, self.model_l.trainable_weights) ) - - - self.view = K.function ([inp_t], [ bgr_t, pyrs_t[0] ] ) - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def save_weights(self): - self.encoder.save_weights (str(self.encoder_weights_path)) - self.decoder.save_weights (str(self.decoder_weights_path)) - self.model_l.save_weights (str(self.l_weights_path)) - - inp_t = Input (self.input_bgr_shape) - - Model(inp_t, self.model_l(self.BVAEResampler(self.encoder(inp_t))) ).save_weights (str(self.model_weights_path)) - - def train_on_batch(self, warps, imgs, pyr_tanh, skip_bgr_train=False): - if not skip_bgr_train: - bgr_loss, = self.train( [warps, imgs] ) - pyr_loss = 0 - else: - bgr_loss = 0 - - feed = [imgs] - for i, (angle, class_num) in enumerate(zip(self.angles, self.class_nums)): - a = angle / 2 - c = np.round( (pyr_tanh+1) * a ) / a -1 #.astype(K.floatx()) - feed += [c] - - pyr_loss, = self.train_l(feed) - - return bgr_loss, pyr_loss - - def extract (self, input_image, is_input_tanh=False): - if is_input_tanh: - raise NotImplemented("is_input_tanh") - - input_shape_len = len(input_image.shape) - if input_shape_len == 3: - input_image = input_image[np.newaxis,...] - - bgr, result, = self.view( [input_image] ) - - - #result = np.clip ( result / (self.angles[0] / 2) - 1, 0.0, 1.0 ) - - if input_shape_len == 3: - bgr = bgr[0] - result = result[0] - - return bgr, result - - @staticmethod - def BuildModels ( resolution, class_nums, ae_dims=128): - exec( nnlib.import_all(), locals(), globals() ) - - x = inp = Input ( (resolution,resolution,3) ) - x = PoseEstimator.EncFlow(ae_dims)(x) - encoder = Model(inp,x) - - x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) - x = PoseEstimator.DecFlow(resolution, ae_dims)(x) - decoder = Model(inp,x) - - x = inp = Input ( K.int_shape(encoder.outputs[0][1:]) ) - x = PoseEstimator.LatentFlow(class_nums=class_nums)(x) - model_l = Model(inp, x ) - - return encoder, decoder, model_l - - @staticmethod - def EncFlow(ae_dims): - exec( nnlib.import_all(), locals(), globals() ) - - def downscale (dim, **kwargs): - def func(x): - return ReLU() ( Conv2D(dim, kernel_size=5, strides=2, padding='same')(x)) - return func - - - downscale = partial(downscale) - - ed_ch_dims = 128 - - def func(input): - x = input - x = downscale(64)(x) - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = Flatten()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - mean = Dense(ae_dims)(x) - logvar = Dense(ae_dims)(x) - - return mean, logvar - - return func - - @staticmethod - def DecFlow(resolution, ae_dims): - exec( nnlib.import_all(), locals(), globals() ) - - def upscale (dim, strides=2, **kwargs): - def func(x): - return ReLU()( ( Conv2DTranspose(dim, kernel_size=3, strides=strides, padding='same')(x)) ) - return func - - def to_bgr (output_nc, **kwargs): - def func(x): - return Conv2D(output_nc, kernel_size=5, padding='same', activation='sigmoid')(x) - return func - - upscale = partial(upscale) - lowest_dense_res = resolution // 16 - - def func(input): - x = input - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense(256)(x) - x = ReLU()(x) - - x = Dense( (lowest_dense_res*lowest_dense_res*256) ) (x) - x = ReLU()(x) - - x = Reshape( (lowest_dense_res,lowest_dense_res,256) )(x) - - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - x = to_bgr(3)(x) - - return x - return func - - @staticmethod - def LatentFlow(class_nums): - exec( nnlib.import_all(), locals(), globals() ) - - def func(latent): - x = latent - - x = Dense(1024, activation='relu')(x) - x = Dropout(0.5)(x) - x = Dense(1024, activation='relu')(x) - # x = Dropout(0.5)(x) - # x = Dense(4096, activation='relu')(x) - - output = [] - for class_num in class_nums: - pyr = Dense(3, activation='tanh')(x) - output += [pyr] - - return output - - #y = Dropout(0.5)(y) - #y = Dense(1024, activation='relu')(y) - return func - - -# resnet50 = keras.applications.ResNet50(include_top=False, weights=None, input_shape=K.int_shape(x)[1:], pooling='avg') -# x = resnet50(x) -# output = [] -# for class_num in class_nums: -# pitch = Dense(class_num)(x) -# yaw = Dense(class_num)(x) -# roll = Dense(class_num)(x) -# output += [pitch,yaw,roll] - -# return output diff --git a/facelib/S3FD.h5 b/facelib/S3FD.npy similarity index 84% rename from facelib/S3FD.h5 rename to facelib/S3FD.npy index aed0938..66c054f 100644 Binary files a/facelib/S3FD.h5 and b/facelib/S3FD.npy differ diff --git a/facelib/S3FDExtractor.py b/facelib/S3FDExtractor.py index adafae9..c58d931 100644 --- a/facelib/S3FDExtractor.py +++ b/facelib/S3FDExtractor.py @@ -4,21 +4,171 @@ from pathlib import Path import cv2 import numpy as np -from nnlib import nnlib +from core.leras import nn class S3FDExtractor(object): - def __init__(self, do_dummy_predict=False): - exec( nnlib.import_all(), locals(), globals() ) - - model_path = Path(__file__).parent / "S3FD.h5" - if not model_path.exists(): - return None - - self.model = nnlib.keras.models.load_model ( str(model_path) ) + def __init__(self, place_model_on_cpu=False): + nn.initialize() + tf = nn.tf - if do_dummy_predict: - self.extract ( np.zeros( (640,640,3), dtype=np.uint8) ) + model_path = Path(__file__).parent / "S3FD.npy" + if not model_path.exists(): + raise Exception("Unable to load S3FD.npy") + class L2Norm(nn.LayerBase): + def __init__(self, n_channels, **kwargs): + self.n_channels = n_channels + super().__init__(**kwargs) + + def build_weights(self): + self.weight = tf.get_variable ("weight", (1, 1, 1, self.n_channels), dtype=nn.tf_floatx, initializer=tf.initializers.ones ) + + def get_weights(self): + return [self.weight] + + def __call__(self, inputs): + x = inputs + x = x / (tf.sqrt( tf.reduce_sum( tf.pow(x, 2), axis=-1, keepdims=True ) ) + 1e-10) * self.weight + return x + + class S3FD(nn.ModelBase): + def __init__(self): + super().__init__(name='S3FD') + + def on_build(self): + self.minus = tf.constant([104,117,123], dtype=nn.tf_floatx ) + self.conv1_1 = nn.Conv2D(3, 64, kernel_size=3, strides=1, padding='SAME') + self.conv1_2 = nn.Conv2D(64, 64, kernel_size=3, strides=1, padding='SAME') + + self.conv2_1 = nn.Conv2D(64, 128, kernel_size=3, strides=1, padding='SAME') + self.conv2_2 = nn.Conv2D(128, 128, kernel_size=3, strides=1, padding='SAME') + + self.conv3_1 = nn.Conv2D(128, 256, kernel_size=3, strides=1, padding='SAME') + self.conv3_2 = nn.Conv2D(256, 256, kernel_size=3, strides=1, padding='SAME') + self.conv3_3 = nn.Conv2D(256, 256, kernel_size=3, strides=1, padding='SAME') + + self.conv4_1 = nn.Conv2D(256, 512, kernel_size=3, strides=1, padding='SAME') + self.conv4_2 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME') + self.conv4_3 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME') + + self.conv5_1 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME') + self.conv5_2 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME') + self.conv5_3 = nn.Conv2D(512, 512, kernel_size=3, strides=1, padding='SAME') + + self.fc6 = nn.Conv2D(512, 1024, kernel_size=3, strides=1, padding=3) + self.fc7 = nn.Conv2D(1024, 1024, kernel_size=1, strides=1, padding='SAME') + + self.conv6_1 = nn.Conv2D(1024, 256, kernel_size=1, strides=1, padding='SAME') + self.conv6_2 = nn.Conv2D(256, 512, kernel_size=3, strides=2, padding='SAME') + + self.conv7_1 = nn.Conv2D(512, 128, kernel_size=1, strides=1, padding='SAME') + self.conv7_2 = nn.Conv2D(128, 256, kernel_size=3, strides=2, padding='SAME') + + self.conv3_3_norm = L2Norm(256) + self.conv4_3_norm = L2Norm(512) + self.conv5_3_norm = L2Norm(512) + + + self.conv3_3_norm_mbox_conf = nn.Conv2D(256, 4, kernel_size=3, strides=1, padding='SAME') + self.conv3_3_norm_mbox_loc = nn.Conv2D(256, 4, kernel_size=3, strides=1, padding='SAME') + + self.conv4_3_norm_mbox_conf = nn.Conv2D(512, 2, kernel_size=3, strides=1, padding='SAME') + self.conv4_3_norm_mbox_loc = nn.Conv2D(512, 4, kernel_size=3, strides=1, padding='SAME') + + self.conv5_3_norm_mbox_conf = nn.Conv2D(512, 2, kernel_size=3, strides=1, padding='SAME') + self.conv5_3_norm_mbox_loc = nn.Conv2D(512, 4, kernel_size=3, strides=1, padding='SAME') + + self.fc7_mbox_conf = nn.Conv2D(1024, 2, kernel_size=3, strides=1, padding='SAME') + self.fc7_mbox_loc = nn.Conv2D(1024, 4, kernel_size=3, strides=1, padding='SAME') + + self.conv6_2_mbox_conf = nn.Conv2D(512, 2, kernel_size=3, strides=1, padding='SAME') + self.conv6_2_mbox_loc = nn.Conv2D(512, 4, kernel_size=3, strides=1, padding='SAME') + + self.conv7_2_mbox_conf = nn.Conv2D(256, 2, kernel_size=3, strides=1, padding='SAME') + self.conv7_2_mbox_loc = nn.Conv2D(256, 4, kernel_size=3, strides=1, padding='SAME') + + def forward(self, inp): + x, = inp + x = x - self.minus + x = tf.nn.relu(self.conv1_1(x)) + x = tf.nn.relu(self.conv1_2(x)) + x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + + x = tf.nn.relu(self.conv2_1(x)) + x = tf.nn.relu(self.conv2_2(x)) + x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + + x = tf.nn.relu(self.conv3_1(x)) + x = tf.nn.relu(self.conv3_2(x)) + x = tf.nn.relu(self.conv3_3(x)) + f3_3 = x + x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + + x = tf.nn.relu(self.conv4_1(x)) + x = tf.nn.relu(self.conv4_2(x)) + x = tf.nn.relu(self.conv4_3(x)) + f4_3 = x + x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + + x = tf.nn.relu(self.conv5_1(x)) + x = tf.nn.relu(self.conv5_2(x)) + x = tf.nn.relu(self.conv5_3(x)) + f5_3 = x + x = tf.nn.max_pool(x, [1,2,2,1], [1,2,2,1], "VALID") + + x = tf.nn.relu(self.fc6(x)) + x = tf.nn.relu(self.fc7(x)) + ffc7 = x + + x = tf.nn.relu(self.conv6_1(x)) + x = tf.nn.relu(self.conv6_2(x)) + f6_2 = x + + x = tf.nn.relu(self.conv7_1(x)) + x = tf.nn.relu(self.conv7_2(x)) + f7_2 = x + + f3_3 = self.conv3_3_norm(f3_3) + f4_3 = self.conv4_3_norm(f4_3) + f5_3 = self.conv5_3_norm(f5_3) + + cls1 = self.conv3_3_norm_mbox_conf(f3_3) + reg1 = self.conv3_3_norm_mbox_loc(f3_3) + + cls2 = tf.nn.softmax(self.conv4_3_norm_mbox_conf(f4_3)) + reg2 = self.conv4_3_norm_mbox_loc(f4_3) + + cls3 = tf.nn.softmax(self.conv5_3_norm_mbox_conf(f5_3)) + reg3 = self.conv5_3_norm_mbox_loc(f5_3) + + cls4 = tf.nn.softmax(self.fc7_mbox_conf(ffc7)) + reg4 = self.fc7_mbox_loc(ffc7) + + cls5 = tf.nn.softmax(self.conv6_2_mbox_conf(f6_2)) + reg5 = self.conv6_2_mbox_loc(f6_2) + + cls6 = tf.nn.softmax(self.conv7_2_mbox_conf(f7_2)) + reg6 = self.conv7_2_mbox_loc(f7_2) + + # max-out background label + bmax = tf.maximum(tf.maximum(cls1[:,:,:,0:1], cls1[:,:,:,1:2]), cls1[:,:,:,2:3]) + + cls1 = tf.concat ([bmax, cls1[:,:,:,3:4] ], axis=-1) + cls1 = tf.nn.softmax(cls1) + + return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6] + + e = None + if place_model_on_cpu: + e = tf.device("/CPU:0") + + if e is not None: e.__enter__() + self.model = S3FD() + self.model.load_weights (model_path) + if e is not None: e.__exit__(None,None,None) + + self.model.build_for_run ([ ( tf.float32, (None,None,3) ) ]) + def __enter__(self): return self @@ -40,7 +190,7 @@ class S3FDExtractor(object): input_scale = d / scale_to input_image = cv2.resize (input_image, ( int(w/input_scale), int(h/input_scale) ), interpolation=cv2.INTER_LINEAR) - olist = self.model.predict( np.expand_dims(input_image,0) ) + olist = self.model.run ([ input_image[None,...] ] ) detected_faces = [] for ltrb in self.refine (olist): @@ -75,8 +225,8 @@ class S3FDExtractor(object): s_d2 = stride / 2 s_m4 = stride * 4 - for hindex, windex in zip(*np.where(ocls > 0.05)): - score = ocls[hindex, windex] + for hindex, windex in zip(*np.where(ocls[...,1] > 0.05)): + score = ocls[hindex, windex, 1] loc = oreg[hindex, windex, :] priors = np.array([windex * stride + s_d2, hindex * stride + s_d2, s_m4, s_m4]) priors_2p = priors[2:] diff --git a/facelib/TernausNet.py b/facelib/TernausNet.py new file mode 100644 index 0000000..a8fed6c --- /dev/null +++ b/facelib/TernausNet.py @@ -0,0 +1,318 @@ +import os +import pickle +from functools import partial +from pathlib import Path + +import cv2 +import numpy as np + +from core.interact import interact as io +from core.leras import nn + +""" +Dataset used to train located in official DFL mega.nz folder +https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg + +using https://github.com/ternaus/TernausNet +TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation +""" + +class TernausNet(object): + VERSION = 1 + def __init__ (self, name, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False, place_model_on_cpu=False): + nn.initialize() + tf = nn.tf + + class Ternaus(nn.ModelBase): + def on_build(self, in_ch, ch): + + self.features_0 = nn.Conv2D (in_ch, ch, kernel_size=3, padding='SAME') + self.blurpool_0 = nn.BlurPool (filt_size=3) + + self.features_3 = nn.Conv2D (ch, ch*2, kernel_size=3, padding='SAME') + self.blurpool_3 = nn.BlurPool (filt_size=3) + + self.features_6 = nn.Conv2D (ch*2, ch*4, kernel_size=3, padding='SAME') + self.features_8 = nn.Conv2D (ch*4, ch*4, kernel_size=3, padding='SAME') + self.blurpool_8 = nn.BlurPool (filt_size=3) + + self.features_11 = nn.Conv2D (ch*4, ch*8, kernel_size=3, padding='SAME') + self.features_13 = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME') + self.blurpool_13 = nn.BlurPool (filt_size=3) + + self.features_16 = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME') + self.features_18 = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME') + self.blurpool_18 = nn.BlurPool (filt_size=3) + + self.conv_center = nn.Conv2D (ch*8, ch*8, kernel_size=3, padding='SAME') + + self.conv1_up = nn.Conv2DTranspose (ch*8, ch*4, kernel_size=3, padding='SAME') + self.conv1 = nn.Conv2D (ch*12, ch*8, kernel_size=3, padding='SAME') + + self.conv2_up = nn.Conv2DTranspose (ch*8, ch*4, kernel_size=3, padding='SAME') + self.conv2 = nn.Conv2D (ch*12, ch*8, kernel_size=3, padding='SAME') + + self.conv3_up = nn.Conv2DTranspose (ch*8, ch*2, kernel_size=3, padding='SAME') + self.conv3 = nn.Conv2D (ch*6, ch*4, kernel_size=3, padding='SAME') + + self.conv4_up = nn.Conv2DTranspose (ch*4, ch, kernel_size=3, padding='SAME') + self.conv4 = nn.Conv2D (ch*3, ch*2, kernel_size=3, padding='SAME') + + self.conv5_up = nn.Conv2DTranspose (ch*2, ch//2, kernel_size=3, padding='SAME') + self.conv5 = nn.Conv2D (ch//2+ch, ch, kernel_size=3, padding='SAME') + + self.out_conv = nn.Conv2D (ch, 1, kernel_size=3, padding='SAME') + + def forward(self, inp): + x, = inp + + x = x0 = tf.nn.relu(self.features_0(x)) + x = self.blurpool_0(x) + + x = x1 = tf.nn.relu(self.features_3(x)) + x = self.blurpool_3(x) + + x = tf.nn.relu(self.features_6(x)) + x = x2 = tf.nn.relu(self.features_8(x)) + x = self.blurpool_8(x) + + x = tf.nn.relu(self.features_11(x)) + x = x3 = tf.nn.relu(self.features_13(x)) + x = self.blurpool_13(x) + + x = tf.nn.relu(self.features_16(x)) + x = x4 = tf.nn.relu(self.features_18(x)) + x = self.blurpool_18(x) + + x = self.conv_center(x) + + x = tf.nn.relu(self.conv1_up(x)) + x = tf.concat( [x,x4], -1) + x = tf.nn.relu(self.conv1(x)) + + x = tf.nn.relu(self.conv2_up(x)) + x = tf.concat( [x,x3], -1) + x = tf.nn.relu(self.conv2(x)) + + x = tf.nn.relu(self.conv3_up(x)) + x = tf.concat( [x,x2], -1) + x = tf.nn.relu(self.conv3(x)) + + x = tf.nn.relu(self.conv4_up(x)) + x = tf.concat( [x,x1], -1) + x = tf.nn.relu(self.conv4(x)) + + x = tf.nn.relu(self.conv5_up(x)) + x = tf.concat( [x,x0], -1) + x = tf.nn.relu(self.conv5(x)) + + x = tf.nn.sigmoid(self.out_conv(x)) + return x + + if weights_file_root is not None: + weights_file_root = Path(weights_file_root) + else: + weights_file_root = Path(__file__).parent + self.weights_path = weights_file_root / ('%s_%d_%s.npy' % (name, resolution, face_type_str) ) + + e = tf.device('/CPU:0') if place_model_on_cpu else None + + if e is not None: e.__enter__() + self.net = Ternaus(3, 64, name='Ternaus') + if load_weights: + self.net.load_weights (self.weights_path) + else: + self.net.init_weights() + if e is not None: e.__exit__(None,None,None) + + self.net.build_for_run ( [(tf.float32, (resolution,resolution,3))] ) + + if training: + raise Exception("training not supported yet") + + + """ + if training: + try: + with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f: + d = pickle.loads (f.read()) + + for i in [0,3,6,8,11,13,16,18]: + s = 'features.%d' % i + + self.model.get_layer (s).set_weights ( d[s] ) + except: + io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy") + + conv_weights_list = [] + for layer in self.model.layers: + if 'CA.' in layer.name: + conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights + CAInitializerMP ( conv_weights_list ) + """ + + + """ + if training: + inp_t = Input ( (resolution, resolution, 3) ) + real_t = Input ( (resolution, resolution, 1) ) + out_t = self.model(inp_t) + + loss = K.mean(10*K.binary_crossentropy(real_t,out_t) ) + + out_t_diff1 = out_t[:, 1:, :, :] - out_t[:, :-1, :, :] + out_t_diff2 = out_t[:, :, 1:, :] - out_t[:, :, :-1, :] + + total_var_loss = K.mean( 0.1*K.abs(out_t_diff1), axis=[1, 2, 3] ) + K.mean( 0.1*K.abs(out_t_diff2), axis=[1, 2, 3] ) + + opt = Adam(lr=0.0001, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2) + + self.train_func = K.function ( [inp_t, real_t], [K.mean(loss)], opt.get_updates( [loss], self.model.trainable_weights) ) + """ + + def __enter__(self): + return self + + def __exit__(self, exc_type=None, exc_value=None, traceback=None): + return False #pass exception between __enter__ and __exit__ to outter level + + def save_weights(self): + self.net.save_weights (str(self.weights_path)) + + def train(self, inp, real): + loss, = self.train_func ([inp, real]) + return loss + + def extract (self, input_image): + input_shape_len = len(input_image.shape) + if input_shape_len == 3: + input_image = input_image[np.newaxis,...] + + result = np.clip ( self.net.run([input_image]), 0, 1.0 ) + result[result < 0.1] = 0 #get rid of noise + + if input_shape_len == 3: + result = result[0] + + return result + + + + + + + +""" + self.weights_path = weights_file_root / ('%s_%d_%s.h5' % (name, resolution, face_type_str) ) + + + self.net.build() + + + self.net.features_0.set_weights ( self.model.get_layer('features.0').get_weights() ) + self.net.features_3.set_weights ( self.model.get_layer('features.3').get_weights() ) + self.net.features_6.set_weights ( self.model.get_layer('features.6').get_weights() ) + self.net.features_8.set_weights ( self.model.get_layer('features.8').get_weights() ) + self.net.features_11.set_weights ( self.model.get_layer('features.11').get_weights() ) + self.net.features_13.set_weights ( self.model.get_layer('features.13').get_weights() ) + self.net.features_16.set_weights ( self.model.get_layer('features.16').get_weights() ) + self.net.features_18.set_weights ( self.model.get_layer('features.18').get_weights() ) + + self.net.conv_center.set_weights ( self.model.get_layer('CA.1').get_weights() ) + + self.net.conv1_up.set_weights ( self.model.get_layer('CA.2').get_weights() ) + self.net.conv1.set_weights ( self.model.get_layer('CA.3').get_weights() ) + + self.net.conv2_up.set_weights ( self.model.get_layer('CA.4').get_weights() ) + self.net.conv2.set_weights ( self.model.get_layer('CA.5').get_weights() ) + + self.net.conv3_up.set_weights ( self.model.get_layer('CA.6').get_weights() ) + self.net.conv3.set_weights ( self.model.get_layer('CA.7').get_weights() ) + + self.net.conv4_up.set_weights ( self.model.get_layer('CA.8').get_weights() ) + self.net.conv4.set_weights ( self.model.get_layer('CA.9').get_weights() ) + + self.net.conv5_up.set_weights ( self.model.get_layer('CA.10').get_weights() ) + self.net.conv5.set_weights ( self.model.get_layer('CA.11').get_weights() ) + + self.net.out_conv.set_weights ( self.model.get_layer('CA.12').get_weights() ) + + self.net.build_for_run ( [ (tf.float32, (resolution,resolution,3)) ]) + self.net.save_weights (self.weights_path2) + + + def extract (self, input_image): + input_shape_len = len(input_image.shape) + if input_shape_len == 3: + input_image = input_image[np.newaxis,...] + + result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 ) + result[result < 0.1] = 0 #get rid of noise + + if input_shape_len == 3: + result = result[0] + + return result + + + @staticmethod + def BuildModel ( resolution, ngf=64): + exec( nn.initialize(), locals(), globals() ) + inp = Input ( (resolution,resolution,3) ) + x = inp + x = TernausNet.Flow(ngf=ngf)(x) + model = Model(inp,x) + return model + + @staticmethod + def Flow(ngf=64): + exec( nn.initialize(), locals(), globals() ) + + def func(input): + x = input + + x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x) + x = BlurPool(filt_size=3)(x) + + x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x) + x = BlurPool(filt_size=3)(x) + + x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x) + x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x) + x = BlurPool(filt_size=3)(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x) + x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x) + x = BlurPool(filt_size=3)(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x) + x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x) + x = BlurPool(filt_size=3)(x) + + x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', name='CA.1')(x) + + x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.2') (x) + x = Concatenate(axis=3)([ x, x4]) + x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.3') (x) + + x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.4') (x) + x = Concatenate(axis=3)([ x, x3]) + x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.5') (x) + + x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu', name='CA.6') (x) + x = Concatenate(axis=3)([ x, x2]) + x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu', name='CA.7') (x) + + x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu', name='CA.8') (x) + x = Concatenate(axis=3)([ x, x1]) + x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu', name='CA.9') (x) + + x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu', name='CA.10') (x) + x = Concatenate(axis=3)([ x, x0]) + x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu', name='CA.11') (x) + + return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid', name='CA.12')(x) + + + return func +""" diff --git a/facelib/__init__.py b/facelib/__init__.py index cde2ab5..ca3292e 100644 --- a/facelib/__init__.py +++ b/facelib/__init__.py @@ -1,7 +1,5 @@ from .FaceType import FaceType -from .DLIBExtractor import DLIBExtractor -from .MTCExtractor import MTCExtractor from .S3FDExtractor import S3FDExtractor from .FANExtractor import FANExtractor -from .PoseEstimator import PoseEstimator -from .FaceEnhancer import FaceEnhancer \ No newline at end of file +from .FaceEnhancer import FaceEnhancer +from .TernausNet import TernausNet \ No newline at end of file diff --git a/facelib/mmod_human_face_detector.dat b/facelib/mmod_human_face_detector.dat deleted file mode 100644 index f1f73a5..0000000 Binary files a/facelib/mmod_human_face_detector.dat and /dev/null differ diff --git a/facelib/mtcnn_onet.h5 b/facelib/mtcnn_onet.h5 deleted file mode 100644 index bd615de..0000000 Binary files a/facelib/mtcnn_onet.h5 and /dev/null differ diff --git a/facelib/mtcnn_pnet.h5 b/facelib/mtcnn_pnet.h5 deleted file mode 100644 index e13f81b..0000000 Binary files a/facelib/mtcnn_pnet.h5 and /dev/null differ diff --git a/facelib/mtcnn_rnet.h5 b/facelib/mtcnn_rnet.h5 deleted file mode 100644 index 798a807..0000000 Binary files a/facelib/mtcnn_rnet.h5 and /dev/null differ diff --git a/nnlib/vgg11_enc_weights.npy b/facelib/vgg11_enc_weights.npy similarity index 100% rename from nnlib/vgg11_enc_weights.npy rename to facelib/vgg11_enc_weights.npy diff --git a/imagelib/RankSRGAN.h5 b/imagelib/RankSRGAN.h5 deleted file mode 100644 index 765ae2e..0000000 Binary files a/imagelib/RankSRGAN.h5 and /dev/null differ diff --git a/imagelib/RankSRGAN.py b/imagelib/RankSRGAN.py deleted file mode 100644 index 529dea8..0000000 --- a/imagelib/RankSRGAN.py +++ /dev/null @@ -1,109 +0,0 @@ -import numpy as np -import cv2 -from pathlib import Path -from nnlib import nnlib -from interact import interact as io - -class RankSRGAN(): - def __init__(self): - exec( nnlib.import_all(), locals(), globals() ) - - class PixelShufflerTorch(KL.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(PixelShufflerTorch, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = inputs - out = K.permute_dimensions(out, (0, 3, 1, 2)) #NCHW - - out = K.reshape(out, (batch_size, oc, rh, rw, h, w)) - out = K.permute_dimensions(out, (0, 1, 4, 2, 5, 3)) - out = K.reshape(out, (batch_size, oc, oh, ow)) - - out = K.permute_dimensions(out, (0, 2, 3, 1)) - return out - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + str(4) + '; Received input shape:', str(input_shape)) - - height = input_shape[1] * self.size[0] if input_shape[1] is not None else None - width = input_shape[2] * self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[3]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - height, - width, - channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(PixelShufflerTorch, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - - def res_block(inp, name_prefix): - x = inp - x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', activation="relu", name=name_prefix+"0")(x) - x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name=name_prefix+"2")(x) - return Add()([inp,x]) - - ndf = 64 - nb = 16 - inp = Input ( (None, None,3) ) - x = inp - - x = x0 = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name="model0")(x) - for i in range(nb): - x = res_block(x, "model1%.2d" %i ) - x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name="model1160")(x) - x = Add()([x0,x]) - - x = ReLU() ( PixelShufflerTorch() ( Conv2D (ndf*4, kernel_size=3, strides=1, padding='same', name="model2")(x) ) ) - x = ReLU() ( PixelShufflerTorch() ( Conv2D (ndf*4, kernel_size=3, strides=1, padding='same', name="model5")(x) ) ) - - x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', activation="relu", name="model8")(x) - x = Conv2D (3, kernel_size=3, strides=1, padding='same', name="model10")(x) - self.model = Model(inp, x ) - self.model.load_weights ( Path(__file__).parent / 'RankSRGAN.h5') - - def upscale(self, img, scale=2, is_bgr=True, is_float=True): - if scale not in [2,4]: - raise ValueError ("RankSRGAN: supported scale are 2 or 4.") - - if not is_bgr: - img = img[...,::-1] - - if not is_float: - img /= 255.0 - - h, w = img.shape[:2] - ch = img.shape[2] if len(img.shape) >= 3 else 1 - - output = self.model.predict([img[None,...]])[0] - - if scale == 2: - output = cv2.resize (output, (w*scale, h*scale), cv2.INTER_CUBIC) - - if not is_float: - output = np.clip (output * 255.0, 0, 255.0) - - if not is_bgr: - output = output[...,::-1] - - return output \ No newline at end of file diff --git a/main.py b/main.py index 2166817..f7a78ba 100644 --- a/main.py +++ b/main.py @@ -1,14 +1,17 @@ if __name__ == "__main__": + from core.leras import nn + nn.initialize_main_env() + import os import sys import time import argparse import multiprocessing multiprocessing.set_start_method("spawn") - from utils import Path_utils - from utils import os_utils + from core import pathex + from core import osex from pathlib import Path - from interact import interact as io + from core.interact import interact as io if sys.version_info[0] < 3 or (sys.version_info[0] == 3 and sys.version_info[1] < 6): raise Exception("This program requires at least Python 3.6") @@ -21,36 +24,37 @@ if __name__ == "__main__": subparsers = parser.add_subparsers() def process_extract(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import Extractor - Extractor.main( arguments.input_dir, - arguments.output_dir, - arguments.debug_dir, - arguments.detector, - arguments.manual_fix, - arguments.manual_output_debug_fix, - arguments.manual_window_size, - face_type=arguments.face_type, - device_args={'cpu_only' : arguments.cpu_only, - 'multi_gpu' : arguments.multi_gpu, - } + Extractor.main( detector = arguments.detector, + input_path = Path(arguments.input_dir), + output_path = Path(arguments.output_dir), + output_debug = arguments.output_debug, + manual_fix = arguments.manual_fix, + manual_output_debug_fix = arguments.manual_output_debug_fix, + manual_window_size = arguments.manual_window_size, + face_type = arguments.face_type, + cpu_only = arguments.cpu_only, + force_gpu_idxs = [ int(x) for x in arguments.force_gpu_idxs.split(',') ] if arguments.force_gpu_idxs is not None else None, ) p = subparsers.add_parser( "extract", help="Extract the faces from a pictures.") + p.add_argument('--detector', dest="detector", choices=['s3fd','manual'], default=None, help="Type of detector.") p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.") - p.add_argument('--debug-dir', action=fixPathAction, dest="debug_dir", help="Writes debug images to this directory.") + p.add_argument('--output-debug', action="store_true", dest="output_debug", default=None, help="Writes debug images to _debug\ directory.") + p.add_argument('--no-output-debug', action="store_false", dest="output_debug", default=None, help="Don't writes debug images to _debug\ directory.") p.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'full_face_no_align', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'") - p.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") p.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.") p.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.") p.add_argument('--manual-window-size', type=int, dest="manual_window_size", default=1368, help="Manual fix window size. Default: 1368.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU. Forces to use MT extractor.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Extract on CPU..") + p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.") + p.set_defaults (func=process_extract) def process_dev_extract_vggface2_dataset(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import dev_misc dev_misc.extract_vggface2_dataset( arguments.input_dir, device_args={'cpu_only' : arguments.cpu_only, @@ -65,7 +69,7 @@ if __name__ == "__main__": p.set_defaults (func=process_dev_extract_vggface2_dataset) def process_dev_extract_umd_csv(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import dev_misc dev_misc.extract_umd_csv( arguments.input_csv_file, device_args={'cpu_only' : arguments.cpu_only, @@ -81,7 +85,7 @@ if __name__ == "__main__": def process_dev_apply_celebamaskhq(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import dev_misc dev_misc.apply_celebamaskhq( arguments.input_dir ) @@ -90,7 +94,7 @@ if __name__ == "__main__": p.set_defaults (func=process_dev_apply_celebamaskhq) def process_dev_test(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import dev_misc dev_misc.dev_test( arguments.input_dir ) @@ -99,17 +103,17 @@ if __name__ == "__main__": p.set_defaults (func=process_dev_test) def process_sort(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import Sorter - Sorter.main (input_path=arguments.input_dir, sort_by_method=arguments.sort_by_method) + Sorter.main (input_path=Path(arguments.input_dir), sort_by_method=arguments.sort_by_method) p = subparsers.add_parser( "sort", help="Sort faces in a directory.") p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--by', required=True, dest="sort_by_method", choices=("blur", "face", "face-dissim", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "final-no-blur", "vggface", "absdiff", "test"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) + p.add_argument('--by', dest="sort_by_method", default=None, choices=("blur", "face-yaw", "face-pitch", "hist", "hist-dissim", "brightness", "hue", "black", "origname", "oneface", "final", "absdiff"), help="Method of sorting. 'origname' sort by original filename to recover original sequence." ) p.set_defaults (func=process_sort) def process_util(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import Util if arguments.convert_png_to_jpg: @@ -158,65 +162,71 @@ if __name__ == "__main__": p.set_defaults (func=process_util) def process_train(arguments): - os_utils.set_process_lowest_prio() - args = {'training_data_src_dir' : arguments.training_data_src_dir, - 'training_data_dst_dir' : arguments.training_data_dst_dir, - 'pretraining_data_dir' : arguments.pretraining_data_dir, - 'model_path' : arguments.model_dir, - 'model_name' : arguments.model_name, - 'no_preview' : arguments.no_preview, - 'debug' : arguments.debug, - 'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program ] - } - device_args = {'cpu_only' : arguments.cpu_only, - 'force_gpu_idx' : arguments.force_gpu_idx, - } + osex.set_process_lowest_prio() + + + kwargs = {'model_class_name' : arguments.model_name, + 'saved_models_path' : Path(arguments.model_dir), + 'training_data_src_path' : Path(arguments.training_data_src_dir), + 'training_data_dst_path' : Path(arguments.training_data_dst_dir), + 'pretraining_data_path' : Path(arguments.pretraining_data_dir) if arguments.pretraining_data_dir is not None else None, + 'pretrained_model_path' : Path(arguments.pretrained_model_dir) if arguments.pretrained_model_dir is not None else None, + 'no_preview' : arguments.no_preview, + 'force_model_name' : arguments.force_model_name, + 'force_gpu_idxs' : arguments.force_gpu_idxs, + 'cpu_only' : arguments.cpu_only, + 'execute_programs' : [ [int(x[0]), x[1] ] for x in arguments.execute_program ], + 'debug' : arguments.debug, + } from mainscripts import Trainer - Trainer.main(args, device_args) + Trainer.main(**kwargs) p = subparsers.add_parser( "train", help="Trainer") p.add_argument('--training-data-src-dir', required=True, action=fixPathAction, dest="training_data_src_dir", help="Dir of extracted SRC faceset.") p.add_argument('--training-data-dst-dir', required=True, action=fixPathAction, dest="training_data_dst_dir", help="Dir of extracted DST faceset.") p.add_argument('--pretraining-data-dir', action=fixPathAction, dest="pretraining_data_dir", default=None, help="Optional dir of extracted faceset that will be used in pretraining mode.") - p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") - p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.") + p.add_argument('--pretrained-model-dir', action=fixPathAction, dest="pretrained_model_dir", default=None, help="Optional dir of pretrain model files. (Currently only for Quick96).") + p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Saved models dir.") + p.add_argument('--model', required=True, dest="model_name", choices=pathex.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Model class name.") p.add_argument('--debug', action="store_true", dest="debug", default=False, help="Debug samples.") + p.add_argument('--no-preview', action="store_true", dest="no_preview", default=False, help="Disable preview window.") + p.add_argument('--force-model-name', dest="force_model_name", default=None, help="Forcing to choose model name from model/ folder.") p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Train on CPU.") - p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") + p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.") p.add_argument('--execute-program', dest="execute_program", default=[], action='append', nargs='+') p.set_defaults (func=process_train) - def process_convert(arguments): - os_utils.set_process_lowest_prio() - args = {'training_data_src_dir' : arguments.training_data_src_dir, - 'input_dir' : arguments.input_dir, - 'output_dir' : arguments.output_dir, - 'aligned_dir' : arguments.aligned_dir, - 'model_dir' : arguments.model_dir, - 'model_name' : arguments.model_name, + def process_merge(arguments): + osex.set_process_lowest_prio() + kwargs = {'model_class_name' : arguments.model_name, + 'saved_models_path' : Path(arguments.model_dir), + 'training_data_src_path' : Path(arguments.training_data_src_dir) if arguments.training_data_src_dir is not None else None, + 'force_model_name' : arguments.force_model_name, + 'input_path' : Path(arguments.input_dir), + 'output_path' : Path(arguments.output_dir), + 'aligned_path' : Path(arguments.aligned_dir) if arguments.aligned_dir is not None else None, + 'cpu_only' : arguments.cpu_only, + 'force_gpu_idxs' : arguments.force_gpu_idxs, } - device_args = {'cpu_only' : arguments.cpu_only, - 'force_gpu_idx' : arguments.force_gpu_idx, - } - from mainscripts import Converter - Converter.main (args, device_args) + from mainscripts import Merger + Merger.main (**kwargs) - p = subparsers.add_parser( "convert", help="Converter") - p.add_argument('--training-data-src-dir', action=fixPathAction, dest="training_data_src_dir", help="(optional, may be required by some models) Dir of extracted SRC faceset.") + p = subparsers.add_parser( "merge", help="Merger") + p.add_argument('--training-data-src-dir', action=fixPathAction, dest="training_data_src_dir", default=None, help="(optional, may be required by some models) Dir of extracted SRC faceset.") p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory. A directory containing the files you wish to process.") - p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the converted files will be stored.") - p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", help="Aligned directory. This is where the extracted of dst faces stored.") + p.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the merged files will be stored.") + p.add_argument('--aligned-dir', action=fixPathAction, dest="aligned_dir", default=None, help="Aligned directory. This is where the extracted of dst faces stored.") p.add_argument('--model-dir', required=True, action=fixPathAction, dest="model_dir", help="Model dir.") - p.add_argument('--model', required=True, dest="model_name", choices=Path_utils.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Type of model") - p.add_argument('--force-gpu-idx', type=int, dest="force_gpu_idx", default=-1, help="Force to choose this GPU idx.") - p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Convert on CPU.") - p.set_defaults(func=process_convert) + p.add_argument('--model', required=True, dest="model_name", choices=pathex.get_all_dir_names_startswith ( Path(__file__).parent / 'models' , 'Model_'), help="Model class name.") + p.add_argument('--force-model-name', dest="force_model_name", default=None, help="Forcing to choose model name from model/ folder.") + p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Merge on CPU.") + p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.") + p.set_defaults(func=process_merge) videoed_parser = subparsers.add_parser( "videoed", help="Video processing.").add_subparsers() def process_videoed_extract_video(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import VideoEd VideoEd.extract_video (arguments.input_file, arguments.output_dir, arguments.output_ext, arguments.fps) p = videoed_parser.add_parser( "extract-video", help="Extract images from video file.") @@ -227,7 +237,7 @@ if __name__ == "__main__": p.set_defaults(func=process_videoed_extract_video) def process_videoed_cut_video(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import VideoEd VideoEd.cut_video (arguments.input_file, arguments.from_time, @@ -243,7 +253,7 @@ if __name__ == "__main__": p.set_defaults(func=process_videoed_cut_video) def process_videoed_denoise_image_sequence(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import VideoEd VideoEd.denoise_image_sequence (arguments.input_dir, arguments.ext, arguments.factor) p = videoed_parser.add_parser( "denoise-image-sequence", help="Denoise sequence of images, keeping sharp edges. This allows you to make the final fake more believable, since the neural network is not able to make a detailed skin texture, but it makes the edges quite clear. Therefore, if the whole frame is more `blurred`, then a fake will seem more believable. Especially true for scenes of the film, which are usually very clear.") @@ -253,7 +263,7 @@ if __name__ == "__main__": p.set_defaults(func=process_videoed_denoise_image_sequence) def process_videoed_video_from_sequence(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import VideoEd VideoEd.video_from_sequence (arguments.input_dir, arguments.output_file, @@ -289,25 +299,28 @@ if __name__ == "__main__": facesettool_parser = subparsers.add_parser( "facesettool", help="Faceset tools.").add_subparsers() def process_faceset_enhancer(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import FacesetEnhancer - FacesetEnhancer.process_folder ( Path(arguments.input_dir), multi_gpu=arguments.multi_gpu, cpu_only=arguments.cpu_only ) + FacesetEnhancer.process_folder ( Path(arguments.input_dir), + cpu_only=arguments.cpu_only, + force_gpu_idxs=arguments.force_gpu_idxs + ) p = facesettool_parser.add_parser ("enhance", help="Enhance details in DFL faceset.") p.add_argument('--input-dir', required=True, action=fixPathAction, dest="input_dir", help="Input directory of aligned faces.") - p.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.") p.add_argument('--cpu-only', action="store_true", dest="cpu_only", default=False, help="Process on CPU.") + p.add_argument('--force-gpu-idxs', dest="force_gpu_idxs", default=None, help="Force to choose GPU indexes separated by comma.") p.set_defaults(func=process_faceset_enhancer) """ def process_relight_faceset(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import FacesetRelighter FacesetRelighter.relight (arguments.input_dir, arguments.lighten, arguments.random_one) def process_delete_relighted(arguments): - os_utils.set_process_lowest_prio() + osex.set_process_lowest_prio() from mainscripts import FacesetRelighter FacesetRelighter.delete_relighted (arguments.input_dir) @@ -332,21 +345,6 @@ if __name__ == "__main__": print ("Done.") - """ - Suppressing error with keras 2.2.4+ on python exit: - - Exception ignored in: > - Traceback (most recent call last): - File "D:\DeepFaceLab\_internal\bin\lib\site-packages\tensorflow\python\client\session.py", line 1413, in __del__ - AttributeError: 'NoneType' object has no attribute 'raise_exception_on_not_ok_status' - - reproduce: https://github.com/keras-team/keras/issues/11751 ( still no solution ) - """ - outnull_file = open(os.devnull, 'w') - os.dup2 ( outnull_file.fileno(), sys.stderr.fileno() ) - sys.stderr = outnull_file - - ''' import code code.interact(local=dict(globals(), **locals())) diff --git a/mainscripts/ConverterScreen/__init__.py b/mainscripts/ConverterScreen/__init__.py deleted file mode 100644 index 5103fc4..0000000 --- a/mainscripts/ConverterScreen/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .ConverterScreen import Screen, ScreenManager \ No newline at end of file diff --git a/mainscripts/Extractor.py b/mainscripts/Extractor.py index 76f55c1..7d41684 100644 --- a/mainscripts/Extractor.py +++ b/mainscripts/Extractor.py @@ -12,22 +12,22 @@ import cv2 import numpy as np import facelib -import imagelib -import mathlib -from facelib import FaceType, LandmarksProcessor -from interact import interact as io -from joblib import Subprocessor -from nnlib import TernausNet, nnlib -from utils import Path_utils -from utils.cv2_utils import * +from core import imagelib +from core import mathlib +from facelib import FaceType, LandmarksProcessor, TernausNet +from core.interact import interact as io +from core.joblib import Subprocessor +from core.leras import nn +from core import pathex +from core.cv2ex import * from DFLIMG import * DEBUG = False class ExtractSubprocessor(Subprocessor): class Data(object): - def __init__(self, filename=None, rects=None, landmarks = None, landmarks_accurate=True, manual=False, force_output_path=None, final_output_files = None): - self.filename = filename + def __init__(self, filepath=None, rects=None, landmarks = None, landmarks_accurate=True, manual=False, force_output_path=None, final_output_files = None): + self.filepath = filepath self.rects = rects or [] self.rects_rotation = 0 self.landmarks_accurate = landmarks_accurate @@ -41,289 +41,295 @@ class ExtractSubprocessor(Subprocessor): #override def on_initialize(self, client_dict): - self.type = client_dict['type'] - self.image_size = client_dict['image_size'] - self.face_type = client_dict['face_type'] + self.type = client_dict['type'] + self.image_size = client_dict['image_size'] + self.face_type = client_dict['face_type'] self.max_faces_from_image = client_dict['max_faces_from_image'] - self.device_idx = client_dict['device_idx'] - self.cpu_only = client_dict['device_type'] == 'CPU' - self.final_output_path = Path(client_dict['final_output_dir']) if 'final_output_dir' in client_dict.keys() else None - self.debug_dir = client_dict['debug_dir'] - + self.device_idx = client_dict['device_idx'] + self.cpu_only = client_dict['device_type'] == 'CPU' + self.final_output_path = client_dict['final_output_path'] + self.output_debug_path = client_dict['output_debug_path'] + #transfer and set stdin in order to work code.interact in debug subprocess stdin_fd = client_dict['stdin_fd'] if stdin_fd is not None and DEBUG: sys.stdin = os.fdopen(stdin_fd) + self.log_info (f"Running on {client_dict['device_name'] }") + + if self.cpu_only: + device_config = nn.DeviceConfig.CPU() + place_model_on_cpu = True + else: + device_config = nn.DeviceConfig.GPUIndexes ([self.device_idx]) + place_model_on_cpu = device_config.devices[0].total_mem_gb < 4 + + if self.type == 'all' or 'rects' in self.type or 'landmarks' in self.type: + nn.initialize (device_config) + + if self.type == 'all' or self.type == 'rects-s3fd' or 'landmarks' in self.type: + self.rects_extractor = facelib.S3FDExtractor(place_model_on_cpu=place_model_on_cpu) + + if self.type == 'all' or 'landmarks' in self.type: + self.landmarks_extractor = facelib.FANExtractor(place_model_on_cpu=place_model_on_cpu) + self.cached_image = (None, None) - self.e = None - device_config = nnlib.DeviceConfig ( cpu_only=self.cpu_only, force_gpu_idx=self.device_idx, allow_growth=True) - self.device_vram = device_config.gpu_vram_gb[0] - - intro_str = 'Running on %s.' % (client_dict['device_name']) - if not self.cpu_only and self.device_vram <= 2: - intro_str += " Recommended to close all programs using this device." - - self.log_info (intro_str) - - if 'rects' in self.type: - if self.type == 'rects-mt': - nnlib.import_all (device_config) - self.e = facelib.MTCExtractor() - elif self.type == 'rects-dlib': - nnlib.import_dlib (device_config) - self.e = facelib.DLIBExtractor(nnlib.dlib) - elif self.type == 'rects-s3fd': - nnlib.import_all (device_config) - self.e = facelib.S3FDExtractor(do_dummy_predict=True) - else: - raise ValueError ("Wrong type.") - - if self.e is not None: - self.e.__enter__() - - elif self.type == 'landmarks': - nnlib.import_all (device_config) - self.e = facelib.FANExtractor() - self.e.__enter__() - if self.device_vram >= 2: - self.second_pass_e = facelib.S3FDExtractor(do_dummy_predict=False) - self.second_pass_e.__enter__() - else: - self.second_pass_e = None - - elif self.type == 'fanseg': - nnlib.import_all (device_config) - self.e = TernausNet(256, FaceType.toString(FaceType.FULL) ) - self.e.__enter__() - - elif self.type == 'final': - pass - - #override - def on_finalize(self): - if self.e is not None: - self.e.__exit__() - #override def process_data(self, data): - filename_path = Path( data.filename ) - filename_path_str = str(filename_path) - - if self.type == 'landmarks' and len(data.rects) == 0: - return data - - if self.cached_image[0] == filename_path_str: - image = self.cached_image[1] #cached image for manual extractor - else: - image = cv2_imread( filename_path_str ) - - if image is None: - self.log_err ( 'Failed to extract %s, reason: cv2_imread() fail.' % ( str(filename_path) ) ) - return data - - image = imagelib.normalize_channels(image, 3) - h, w, ch = image.shape - - wm, hm = w % 2, h % 2 - if wm + hm != 0: #fix odd image - image = image[0:h-hm,0:w-wm,:] - self.cached_image = ( filename_path_str, image ) - - src_dflimg = None - h, w, ch = image.shape - if h == w: - #extracting from already extracted jpg image? - src_dflimg = DFLIMG.load (filename_path) - - if 'rects' in self.type: - if min(w,h) < 128: - self.log_err ( 'Image is too small %s : [%d, %d]' % ( str(filename_path), w, h ) ) - data.rects = [] - else: - for rot in ([0, 90, 270, 180]): - data.rects_rotation = rot - if rot == 0: - rotated_image = image - elif rot == 90: - rotated_image = image.swapaxes( 0,1 )[:,::-1,:] - elif rot == 180: - rotated_image = image[::-1,::-1,:] - elif rot == 270: - rotated_image = image.swapaxes( 0,1 )[::-1,:,:] - - rects = data.rects = self.e.extract (rotated_image, is_bgr=True) - if len(rects) != 0: - break - - if self.max_faces_from_image != 0 and len(data.rects) > 1: - data.rects = data.rects[0:self.max_faces_from_image] - - return data - - elif self.type == 'landmarks': - if data.rects_rotation == 0: - rotated_image = image - elif data.rects_rotation == 90: - rotated_image = image.swapaxes( 0,1 )[:,::-1,:] - elif data.rects_rotation == 180: - rotated_image = image[::-1,::-1,:] - elif data.rects_rotation == 270: - rotated_image = image.swapaxes( 0,1 )[::-1,:,:] - - data.landmarks = self.e.extract (rotated_image, data.rects, self.second_pass_e if (src_dflimg is None and data.landmarks_accurate) else None, is_bgr=True) - if data.rects_rotation != 0: - for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)): - new_rect, new_lmrks = rect, lmrks - (l,t,r,b) = rect - if data.rects_rotation == 90: - new_rect = ( t, h-l, b, h-r) - if lmrks is not None: - new_lmrks = lmrks[:,::-1].copy() - new_lmrks[:,1] = h - new_lmrks[:,1] - elif data.rects_rotation == 180: - if lmrks is not None: - new_rect = ( w-l, h-t, w-r, h-b) - new_lmrks = lmrks.copy() - new_lmrks[:,0] = w - new_lmrks[:,0] - new_lmrks[:,1] = h - new_lmrks[:,1] - elif data.rects_rotation == 270: - new_rect = ( w-b, l, w-t, r ) - if lmrks is not None: - new_lmrks = lmrks[:,::-1].copy() - new_lmrks[:,0] = w - new_lmrks[:,0] - data.rects[i], data.landmarks[i] = new_rect, new_lmrks - - return data - - elif self.type == 'final': - data.final_output_files = [] - rects = data.rects - landmarks = data.landmarks - - if self.debug_dir is not None: - debug_output_file = str( Path(self.debug_dir) / (filename_path.stem+'.jpg') ) - debug_image = image.copy() - - if src_dflimg is not None and len(rects) != 1: - #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it - print("src_dflimg is not None and len(rects) != 1", str(filename_path) ) - output_file = str(self.final_output_path / filename_path.name) - if str(filename_path) != str(output_file): - shutil.copy ( str(filename_path), str(output_file) ) - data.final_output_files.append (output_file) - else: - face_idx = 0 - for rect, image_landmarks in zip( rects, landmarks ): - - if src_dflimg is not None and face_idx > 1: - #cannot extract more than 1 face from dflimg - break - - if image_landmarks is None: - continue - - rect = np.array(rect) - - if self.face_type == FaceType.MARK_ONLY: - image_to_face_mat = None - face_image = image - face_image_landmarks = image_landmarks - else: - image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type) - - face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4) - face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) - - landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True) - - rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]])) - landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] ) - - if not data.manual and self.face_type <= FaceType.FULL_NO_ALIGN and landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area - continue - - if self.debug_dir is not None: - LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type, transparent_mask=True) - - final_output_path = self.final_output_path - if data.force_output_path is not None: - final_output_path = data.force_output_path - - if src_dflimg is not None and filename_path.suffix == '.jpg': - #if extracting from dflimg and jpg copy it in order not to lose quality - output_file = str(final_output_path / filename_path.name) - if str(filename_path) != str(output_file): - shutil.copy ( str(filename_path), str(output_file) ) - else: - - output_file = '{}_{}{}'.format(str(final_output_path / filename_path.stem), str(face_idx), '.jpg') - cv2_imwrite(output_file, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100] ) - - DFLJPG.embed_data(output_file, face_type=FaceType.toString(self.face_type), - landmarks=face_image_landmarks.tolist(), - source_filename=filename_path.name, - source_rect=rect, - source_landmarks=image_landmarks.tolist(), - image_to_face_mat=image_to_face_mat - ) - - data.final_output_files.append (output_file) - face_idx += 1 - data.faces_detected = face_idx - - if self.debug_dir is not None: - cv2_imwrite(debug_output_file, debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) - + if 'landmarks' in self.type and len(data.rects) == 0: return data - elif self.type == 'fanseg': - if src_dflimg is not None: - fanseg_mask = self.e.extract( image / 255.0 ) - src_dflimg.embed_and_set( filename_path_str, - fanseg_mask=fanseg_mask, - ) - + filepath = data.filepath + cached_filepath, image = self.cached_image + if cached_filepath != filepath: + image = cv2_imread( filepath ) + if image is None: + self.log_err (f'Failed to open {filepath}, reason: cv2_imread() fail.') + return data + image = imagelib.normalize_channels(image, 3) + image = imagelib.cut_odd_image(image) + self.cached_image = ( filepath, image ) + + h, w, c = image.shape + extract_from_dflimg = (h == w and DFLIMG.load (filepath) is not None) + + if 'rects' in self.type or self.type == 'all': + data = ExtractSubprocessor.Cli.rects_stage (data=data, + image=image, + max_faces_from_image=self.max_faces_from_image, + rects_extractor=self.rects_extractor, + ) + + if 'landmarks' in self.type or self.type == 'all': + data = ExtractSubprocessor.Cli.landmarks_stage (data=data, + image=image, + extract_from_dflimg=extract_from_dflimg, + landmarks_extractor=self.landmarks_extractor, + rects_extractor=self.rects_extractor, + ) + + if self.type == 'final' or self.type == 'all': + data = ExtractSubprocessor.Cli.final_stage(data=data, + image=image, + face_type=self.face_type, + image_size=self.image_size, + extract_from_dflimg=extract_from_dflimg, + output_debug_path=self.output_debug_path, + final_output_path=self.final_output_path, + ) + return data + + @staticmethod + def rects_stage(data, + image, + max_faces_from_image, + rects_extractor, + ): + h,w,c = image.shape + if min(h,w) < 128: + # Image is too small + data.rects = [] + else: + for rot in ([0, 90, 270, 180]): + if rot == 0: + rotated_image = image + elif rot == 90: + rotated_image = image.swapaxes( 0,1 )[:,::-1,:] + elif rot == 180: + rotated_image = image[::-1,::-1,:] + elif rot == 270: + rotated_image = image.swapaxes( 0,1 )[::-1,:,:] + rects = data.rects = rects_extractor.extract (rotated_image, is_bgr=True) + if len(rects) != 0: + data.rects_rotation = rot + break + if max_faces_from_image != 0 and len(data.rects) > 1: + data.rects = data.rects[0:max_faces_from_image] + return data + + + @staticmethod + def landmarks_stage(data, + image, + extract_from_dflimg, + landmarks_extractor, + rects_extractor, + ): + if data.rects_rotation == 0: + rotated_image = image + elif data.rects_rotation == 90: + rotated_image = image.swapaxes( 0,1 )[:,::-1,:] + elif data.rects_rotation == 180: + rotated_image = image[::-1,::-1,:] + elif data.rects_rotation == 270: + rotated_image = image.swapaxes( 0,1 )[::-1,:,:] + + data.landmarks = landmarks_extractor.extract (rotated_image, data.rects, rects_extractor if (not extract_from_dflimg and data.landmarks_accurate) else None, is_bgr=True) + if data.rects_rotation != 0: + for i, (rect, lmrks) in enumerate(zip(data.rects, data.landmarks)): + new_rect, new_lmrks = rect, lmrks + (l,t,r,b) = rect + if data.rects_rotation == 90: + new_rect = ( t, h-l, b, h-r) + if lmrks is not None: + new_lmrks = lmrks[:,::-1].copy() + new_lmrks[:,1] = h - new_lmrks[:,1] + elif data.rects_rotation == 180: + if lmrks is not None: + new_rect = ( w-l, h-t, w-r, h-b) + new_lmrks = lmrks.copy() + new_lmrks[:,0] = w - new_lmrks[:,0] + new_lmrks[:,1] = h - new_lmrks[:,1] + elif data.rects_rotation == 270: + new_rect = ( w-b, l, w-t, r ) + if lmrks is not None: + new_lmrks = lmrks[:,::-1].copy() + new_lmrks[:,0] = w - new_lmrks[:,0] + data.rects[i], data.landmarks[i] = new_rect, new_lmrks + + return data + + @staticmethod + def final_stage(data, + image, + face_type, + image_size, + extract_from_dflimg = False, + output_debug_path=None, + final_output_path=None, + ): + data.final_output_files = [] + filepath = data.filepath + rects = data.rects + landmarks = data.landmarks + + if output_debug_path is not None: + debug_image = image.copy() + + if extract_from_dflimg and len(rects) != 1: + #if re-extracting from dflimg and more than 1 or zero faces detected - dont process and just copy it + print("extract_from_dflimg and len(rects) != 1", filepath ) + output_filepath = final_output_path / filepath.name + if filepath != str(output_file): + shutil.copy ( str(filepath), str(output_filepath) ) + data.final_output_files.append (output_filepath) + else: + face_idx = 0 + for rect, image_landmarks in zip( rects, landmarks ): + + if extract_from_dflimg and face_idx > 1: + #cannot extract more than 1 face from dflimg + break + + if image_landmarks is None: + continue + + rect = np.array(rect) + + if face_type == FaceType.MARK_ONLY: + image_to_face_mat = None + face_image = image + face_image_landmarks = image_landmarks + else: + image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, image_size, face_type) + + face_image = cv2.warpAffine(image, image_to_face_mat, (image_size, image_size), cv2.INTER_LANCZOS4) + face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat) + + landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,image_size-1), (image_size-1, image_size-1), (image_size-1,0) ], image_to_face_mat, True) + + rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]])) + landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] ) + + if not data.manual and face_type <= FaceType.FULL_NO_ALIGN and landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area + continue + + if output_debug_path is not None: + LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, image_size, face_type, transparent_mask=True) + + output_path = final_output_path + if data.force_output_path is not None: + output_path = data.force_output_path + + if extract_from_dflimg and filepath.suffix == '.jpg': + #if extracting from dflimg and jpg copy it in order not to lose quality + output_filepath = output_path / filepath.name + if filepath != output_filepath: + shutil.copy ( str(filepath), str(output_filepath) ) + else: + output_filepath = output_path / f"{filepath.stem}_{face_idx}.jpg" + cv2_imwrite(output_filepath, face_image, [int(cv2.IMWRITE_JPEG_QUALITY), 100] ) + + DFLJPG.embed_data(output_filepath, face_type=FaceType.toString(face_type), + landmarks=face_image_landmarks.tolist(), + source_filename=filepath.name, + source_rect=rect, + source_landmarks=image_landmarks.tolist(), + image_to_face_mat=image_to_face_mat + ) + + data.final_output_files.append (output_filepath) + face_idx += 1 + data.faces_detected = face_idx + + if output_debug_path is not None: + cv2_imwrite( output_debug_path / (filepath.stem+'.jpg'), debug_image, [int(cv2.IMWRITE_JPEG_QUALITY), 50] ) + + return data + #overridable def get_data_name (self, data): #return string identificator of your data - return data.filename + return data.filepath + + @staticmethod + def get_devices_for_config (type, device_config): + devices = device_config.devices + cpu_only = len(devices) == 0 + + if 'rects' in type or \ + 'landmarks' in type or \ + 'all' in type: + + if not cpu_only: + if type == 'landmarks-manual': + devices = [devices.get_best_device()] + result = [ (device.index, 'GPU', device.name, device.total_mem_gb) for device in devices ] + return result + else: + if type == 'landmarks-manual': + return [ (0, 'CPU', 'CPU', 0 ) ] + else: + return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] + + elif type == 'final': + return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in (range(min(8, multiprocessing.cpu_count())) if not DEBUG else [0]) ] + + def __init__(self, input_data, type, image_size=None, face_type=None, output_debug_path=None, manual_window_size=0, max_faces_from_image=0, final_output_path=None, device_config=None): + if type == 'landmarks-manual': + for x in input_data: + x.manual = True - #override - def __init__(self, input_data, type, image_size=None, face_type=None, debug_dir=None, multi_gpu=False, cpu_only=False, manual=False, manual_window_size=0, max_faces_from_image=0, final_output_path=None): self.input_data = input_data + self.type = type self.image_size = image_size self.face_type = face_type - self.debug_dir = debug_dir + self.output_debug_path = output_debug_path self.final_output_path = final_output_path - self.manual = manual self.manual_window_size = manual_window_size self.max_faces_from_image = max_faces_from_image self.result = [] - - self.devices = ExtractSubprocessor.get_devices_for_config(self.manual, self.type, multi_gpu, cpu_only) - if self.manual or DEBUG: - no_response_time_sec = 999999 - elif nnlib.device.backend == 'plaidML': - no_response_time_sec = 600 - else: - no_response_time_sec = 60 - - super().__init__('Extractor', ExtractSubprocessor.Cli, no_response_time_sec) + self.devices = ExtractSubprocessor.get_devices_for_config(self.type, device_config) - #override - def on_check_run(self): - if len(self.devices) == 0: - io.log_err("No devices found to start subprocessor.") - return False - return True + super().__init__('Extractor', ExtractSubprocessor.Cli, + 999999 if type == 'landmarks-manual' or DEBUG else 120) #override def on_clients_initialized(self): - if self.manual == True: + if self.type == 'landmarks-manual': self.wnd_name = 'Manual pass' io.named_window(self.wnd_name) io.capture_mouse(self.wnd_name) @@ -346,7 +352,7 @@ class ExtractSubprocessor(Subprocessor): #override def on_clients_finalized(self): - if self.manual == True: + if self.type == 'landmarks-manual': io.destroy_all_windows() io.progress_bar_close() @@ -357,8 +363,8 @@ class ExtractSubprocessor(Subprocessor): 'image_size': self.image_size, 'face_type': self.face_type, 'max_faces_from_image':self.max_faces_from_image, - 'debug_dir': self.debug_dir, - 'final_output_dir': str(self.final_output_path), + 'output_debug_path': self.output_debug_path, + 'final_output_path': self.final_output_path, 'stdin_fd': sys.stdin.fileno() } @@ -371,15 +377,12 @@ class ExtractSubprocessor(Subprocessor): #override def get_data(self, host_dict): - if not self.manual: - if len (self.input_data) > 0: - return self.input_data.pop(0) - else: + if self.type == 'landmarks-manual': need_remark_face = False redraw_needed = False while len (self.input_data) > 0: data = self.input_data[0] - filename, data_rects, data_landmarks = data.filename, data.rects, data.landmarks + filepath, data_rects, data_landmarks = data.filepath, data.rects, data.landmarks is_frame_done = False if need_remark_face: # need remark image from input data that already has a marked face? @@ -396,21 +399,21 @@ class ExtractSubprocessor(Subprocessor): self.y = ( self.rect[1] + self.rect[3] ) / 2 if len(data_rects) == 0: - if self.cache_original_image[0] == filename: + if self.cache_original_image[0] == filepath: self.original_image = self.cache_original_image[1] else: - self.original_image = imagelib.normalize_channels( cv2_imread( filename ), 3 ) - - self.cache_original_image = (filename, self.original_image ) + self.original_image = imagelib.normalize_channels( cv2_imread( filepath ), 3 ) + + self.cache_original_image = (filepath, self.original_image ) (h,w,c) = self.original_image.shape self.view_scale = 1.0 if self.manual_window_size == 0 else self.manual_window_size / ( h * (16.0/9.0) ) - if self.cache_image[0] == (h,w,c) + (self.view_scale,filename): + if self.cache_image[0] == (h,w,c) + (self.view_scale,filepath): self.image = self.cache_image[1] else: self.image = cv2.resize (self.original_image, ( int(w*self.view_scale), int(h*self.view_scale) ), interpolation=cv2.INTER_LINEAR) - self.cache_image = ( (h,w,c) + (self.view_scale,filename), self.image ) + self.cache_image = ( (h,w,c) + (self.view_scale,filepath), self.image ) (h,w,c) = self.image.shape @@ -526,9 +529,9 @@ class ExtractSubprocessor(Subprocessor): if redraw_needed: redraw_needed = False - return ExtractSubprocessor.Data (filename, landmarks_accurate=self.landmarks_accurate) + return ExtractSubprocessor.Data (filepath, landmarks_accurate=self.landmarks_accurate) else: - return ExtractSubprocessor.Data (filename, rects=[self.rect], landmarks_accurate=self.landmarks_accurate) + return ExtractSubprocessor.Data (filepath, rects=[self.rect], landmarks_accurate=self.landmarks_accurate) else: is_frame_done = True @@ -539,19 +542,22 @@ class ExtractSubprocessor(Subprocessor): io.progress_bar_inc(1) self.extract_needed = True self.rect_locked = False + else: + if len (self.input_data) > 0: + return self.input_data.pop(0) return None #override def on_data_return (self, host_dict, data): - if not self.manual: + if not self.type != 'landmarks-manual': self.input_data.insert(0, data) #override def on_result (self, host_dict, data, result): - if self.manual == True: - filename, landmarks = result.filename, result.landmarks - + if self.type == 'landmarks-manual': + filepath, landmarks = result.filepath, result.landmarks + if len(landmarks) != 0 and landmarks[0] is not None: self.landmarks = landmarks[0] @@ -596,56 +602,6 @@ class ExtractSubprocessor(Subprocessor): def get_result(self): return self.result - @staticmethod - def get_devices_for_config (manual, type, multi_gpu, cpu_only): - backend = nnlib.device.backend - if 'cpu' in backend: - cpu_only = True - - if 'rects' in type or type == 'landmarks' or type == 'fanseg': - if not cpu_only and type == 'rects-mt' and backend == "plaidML": #plaidML works with MT very slowly - cpu_only = True - - if not cpu_only: - devices = [] - if not manual and multi_gpu: - devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2) - - if len(devices) == 0: - idx = nnlib.device.getBestValidDeviceIdx() - if idx != -1: - devices = [idx] - - if len(devices) == 0: - cpu_only = True - - result = [] - for idx in devices: - dev_name = nnlib.device.getDeviceName(idx) - dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx) - - count = 1 - - if not manual: - if (type == 'rects-mt' ): - count = int (max (1, dev_vram / 2) ) - - if count == 1: - result += [ (idx, 'GPU', dev_name, dev_vram) ] - else: - for i in range (count): - result += [ (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram) ] - - return result - - if cpu_only: - if manual: - return [ (0, 'CPU', 'CPU', 0 ) ] - else: - return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] - - elif type == 'final': - return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in (range(min(8, multiprocessing.cpu_count())) if not DEBUG else [0]) ] class DeletedFilesSearcherSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): @@ -704,87 +660,100 @@ class DeletedFilesSearcherSubprocessor(Subprocessor): def get_result(self): return self.result -def main(input_dir, - output_dir, - debug_dir=None, - detector='mt', +def main(detector=None, + input_path=None, + output_path=None, + output_debug=None, manual_fix=False, manual_output_debug_fix=False, manual_window_size=1368, image_size=256, face_type='full_face', max_faces_from_image=0, - device_args={}): - - input_path = Path(input_dir) - output_path = Path(output_dir) + cpu_only = False, + force_gpu_idxs = None, + ): face_type = FaceType.fromString(face_type) - multi_gpu = device_args.get('multi_gpu', False) - cpu_only = device_args.get('cpu_only', False) - if not input_path.exists(): - raise ValueError('Input directory not found. Please ensure it exists.') + io.log_err ('Input directory not found. Please ensure it exists.') + return + + if detector is None: + io.log_info ("Choose detector type.") + io.log_info ("[0] S3FD") + io.log_info ("[1] manual") + detector = {0:'s3fd', 1:'manual'}[ io.input_int("", 0, [0,1]) ] + + device_config = nn.DeviceConfig.GPUIndexes( force_gpu_idxs or nn.ask_choose_device_idxs(choose_only_one=detector=='manual', suggest_all_gpu=True) ) \ + if not cpu_only else nn.DeviceConfig.CPU() + + output_debug_path = output_path.parent / (output_path.name + '_debug') + + if output_debug is None: + output_debug = io.input_bool (f"Write debug images to {output_debug_path.name}?", False) if output_path.exists(): if not manual_output_debug_fix and input_path != output_path: - output_images_paths = Path_utils.get_image_paths(output_path) + output_images_paths = pathex.get_image_paths(output_path) if len(output_images_paths) > 0: - io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) + io.input(f"WARNING !!! \n {output_path} contains files! \n They will be deleted. \n Press enter to continue.") for filename in output_images_paths: Path(filename).unlink() else: output_path.mkdir(parents=True, exist_ok=True) + input_path_image_paths = pathex.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info) + if manual_output_debug_fix: - if debug_dir is None: - raise ValueError('debug-dir must be specified') - detector = 'manual' - io.log_info('Performing re-extract frames which were deleted from _debug directory.') + if not output_debug_path.exists(): + io.log_err(f'{output_debug_path} not found. Re-extract faces with "Write debug images" option.') + return + else: + detector = 'manual' + io.log_info('Performing re-extract frames which were deleted from _debug directory.') - input_path_image_paths = Path_utils.get_image_unique_filestem_paths(input_path, verbose_print_func=io.log_info) - if debug_dir is not None: - debug_output_path = Path(debug_dir) - - if manual_output_debug_fix: - if not debug_output_path.exists(): - raise ValueError("%s not found " % ( str(debug_output_path) )) - - input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, Path_utils.get_image_paths(debug_output_path) ).run() + input_path_image_paths = DeletedFilesSearcherSubprocessor (input_path_image_paths, pathex.get_image_paths(output_debug_path) ).run() input_path_image_paths = sorted (input_path_image_paths) io.log_info('Found %d images.' % (len(input_path_image_paths))) + else: + if output_debug_path.exists(): + for filename in pathex.get_image_paths(output_debug_path): + Path(filename).unlink() else: - if debug_output_path.exists(): - for filename in Path_utils.get_image_paths(debug_output_path): - Path(filename).unlink() - else: - debug_output_path.mkdir(parents=True, exist_ok=True) + output_debug_path.mkdir(parents=True, exist_ok=True) images_found = len(input_path_image_paths) faces_detected = 0 if images_found != 0: if detector == 'manual': io.log_info ('Performing manual extract...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename, manual=True) for filename in input_path_image_paths ], 'landmarks', image_size, face_type, debug_dir, cpu_only=cpu_only, manual=True, manual_window_size=manual_window_size).run() + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(Path(filename)) for filename in input_path_image_paths ], 'landmarks-manual', image_size, face_type, output_debug_path if output_debug else None, manual_window_size=manual_window_size, device_config=device_config).run() + + io.log_info ('Performing 3rd pass...') + data = ExtractSubprocessor (data, 'final', image_size, face_type, output_debug_path if output_debug else None, final_output_path=output_path, device_config=device_config).run() + else: - io.log_info ('Performing 1st pass...') - data = ExtractSubprocessor ([ ExtractSubprocessor.Data(filename) for filename in input_path_image_paths ], 'rects-'+detector, image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, max_faces_from_image=max_faces_from_image).run() - - io.log_info ('Performing 2nd pass...') - data = ExtractSubprocessor (data, 'landmarks', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False).run() - - io.log_info ('Performing 3rd pass...') - data = ExtractSubprocessor (data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + io.log_info ('Extracting faces...') + data = ExtractSubprocessor ([ ExtractSubprocessor.Data(Path(filename)) for filename in input_path_image_paths ], + 'all', + image_size, + face_type, + output_debug_path if output_debug else None, + max_faces_from_image=max_faces_from_image, + final_output_path=output_path, + device_config=device_config).run() + faces_detected += sum([d.faces_detected for d in data]) if manual_fix: if all ( np.array ( [ d.faces_detected > 0 for d in data] ) == True ): io.log_info ('All faces are detected, manual fix not needed.') else: - fix_data = [ ExtractSubprocessor.Data(d.filename, manual=True) for d in data if d.faces_detected == 0 ] + fix_data = [ ExtractSubprocessor.Data(d.filepath) for d in data if d.faces_detected == 0 ] io.log_info ('Performing manual fix for %d images...' % (len(fix_data)) ) - fix_data = ExtractSubprocessor (fix_data, 'landmarks', image_size, face_type, debug_dir, manual=True, manual_window_size=manual_window_size).run() - fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, debug_dir, multi_gpu=multi_gpu, cpu_only=cpu_only, manual=False, final_output_path=output_path).run() + fix_data = ExtractSubprocessor (fix_data, 'landmarks-manual', image_size, face_type, output_debug_path if output_debug else None, manual_window_size=manual_window_size, device_config=device_config).run() + fix_data = ExtractSubprocessor (fix_data, 'final', image_size, face_type, output_debug_path if output_debug else None, final_output_path=output_path, device_config=device_config).run() faces_detected += sum([d.faces_detected for d in fix_data]) diff --git a/mainscripts/FacesetEnhancer.py b/mainscripts/FacesetEnhancer.py index a63161a..eb4dc20 100644 --- a/mainscripts/FacesetEnhancer.py +++ b/mainscripts/FacesetEnhancer.py @@ -2,22 +2,23 @@ import multiprocessing import shutil from DFLIMG import * -from interact import interact as io -from joblib import Subprocessor -from nnlib import nnlib -from utils import Path_utils -from utils.cv2_utils import * +from core.interact import interact as io +from core.joblib import Subprocessor +from core.leras import nn +from core import pathex +from core.cv2ex import * class FacesetEnhancerSubprocessor(Subprocessor): #override - def __init__(self, image_paths, output_dirpath, multi_gpu=False, cpu_only=False): + def __init__(self, image_paths, output_dirpath, device_config): self.image_paths = image_paths self.output_dirpath = output_dirpath self.result = [] - self.devices = FacesetEnhancerSubprocessor.get_devices_for_config(multi_gpu, cpu_only) - + self.nn_initialize_mp_lock = multiprocessing.Lock() + self.devices = FacesetEnhancerSubprocessor.get_devices_for_config(device_config) + super().__init__('FacesetEnhancer', FacesetEnhancerSubprocessor.Cli, 600) #override @@ -30,7 +31,8 @@ class FacesetEnhancerSubprocessor(Subprocessor): #override def process_info_generator(self): - base_dict = {'output_dirpath':self.output_dirpath} + base_dict = {'output_dirpath':self.output_dirpath, + 'nn_initialize_mp_lock': self.nn_initialize_mp_lock,} for (device_idx, device_type, device_name, device_total_vram_gb) in self.devices: client_dict = base_dict.copy() @@ -59,37 +61,13 @@ class FacesetEnhancerSubprocessor(Subprocessor): return self.result @staticmethod - def get_devices_for_config (multi_gpu, cpu_only): - backend = nnlib.device.backend - if 'cpu' in backend: - cpu_only = True - - if not cpu_only and backend == "plaidML": - cpu_only = True - - if not cpu_only: - devices = [] - if multi_gpu: - devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2) - - if len(devices) == 0: - idx = nnlib.device.getBestValidDeviceIdx() - if idx != -1: - devices = [idx] - - if len(devices) == 0: - cpu_only = True - - result = [] - for idx in devices: - dev_name = nnlib.device.getDeviceName(idx) - dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx) - - result += [ (idx, 'GPU', dev_name, dev_vram) ] - - return result - - if cpu_only: + def get_devices_for_config (device_config): + devices = device_config.devices + cpu_only = len(devices) == 0 + + if not cpu_only: + return [ (device.index, 'GPU', device.name, device.total_mem_gb) for device in devices ] + else: return [ (i, 'CPU', 'CPU%d' % (i), 0 ) for i in range( min(8, multiprocessing.cpu_count() // 2) ) ] class Cli(Subprocessor.Cli): @@ -99,20 +77,23 @@ class FacesetEnhancerSubprocessor(Subprocessor): device_idx = client_dict['device_idx'] cpu_only = client_dict['device_type'] == 'CPU' self.output_dirpath = client_dict['output_dirpath'] - - device_config = nnlib.DeviceConfig ( cpu_only=cpu_only, force_gpu_idx=device_idx, allow_growth=True) - nnlib.import_all (device_config) - - device_vram = device_config.gpu_vram_gb[0] + nn_initialize_mp_lock = client_dict['nn_initialize_mp_lock'] + + if cpu_only: + device_config = nn.DeviceConfig.CPU() + device_vram = 99 + else: + device_config = nn.DeviceConfig.GPUIndexes ([device_idx]) + device_vram = device_config.devices[0].total_mem_gb + + nn.initialize (device_config) intro_str = 'Running on %s.' % (client_dict['device_name']) - if not cpu_only and device_vram <= 2: - intro_str += " Recommended to close all programs using this device." - + self.log_info (intro_str) - from facelib import FaceEnhancer - self.fe = FaceEnhancer() + from facelib import FaceEnhancer + self.fe = FaceEnhancer( place_model_on_cpu=(device_vram<=2) ) #override def process_data(self, filepath): @@ -137,7 +118,10 @@ class FacesetEnhancerSubprocessor(Subprocessor): return (0, filepath, None) -def process_folder ( dirpath, multi_gpu=False, cpu_only=False ): +def process_folder ( dirpath, cpu_only=False, force_gpu_idxs=None ): + device_config = nn.DeviceConfig.GPUIndexes( force_gpu_idxs or nn.ask_choose_device_idxs(suggest_all_gpu=True) ) \ + if not cpu_only else nn.DeviceConfig.CPU() + output_dirpath = dirpath.parent / (dirpath.name + '_enhanced') output_dirpath.mkdir (exist_ok=True, parents=True) @@ -146,15 +130,15 @@ def process_folder ( dirpath, multi_gpu=False, cpu_only=False ): io.log_info (f"Enhancing faceset in {dirpath_parts}") io.log_info ( f"Processing to {output_dirpath_parts}") - output_images_paths = Path_utils.get_image_paths(output_dirpath) + output_images_paths = pathex.get_image_paths(output_dirpath) if len(output_images_paths) > 0: for filename in output_images_paths: Path(filename).unlink() - image_paths = [Path(x) for x in Path_utils.get_image_paths( dirpath )] - result = FacesetEnhancerSubprocessor ( image_paths, output_dirpath, multi_gpu=multi_gpu, cpu_only=cpu_only).run() + image_paths = [Path(x) for x in pathex.get_image_paths( dirpath )] + result = FacesetEnhancerSubprocessor ( image_paths, output_dirpath, device_config=device_config).run() - is_merge = io.input_bool (f"\r\nMerge {output_dirpath_parts} to {dirpath_parts} ? (y/n skip:y) : ", True) + is_merge = io.input_bool (f"\r\nMerge {output_dirpath_parts} to {dirpath_parts} ?", True) if is_merge: io.log_info (f"Copying processed files to {dirpath_parts}") diff --git a/mainscripts/FacesetRelighter.py b/mainscripts/FacesetRelighter.py deleted file mode 100644 index 238ab74..0000000 --- a/mainscripts/FacesetRelighter.py +++ /dev/null @@ -1,263 +0,0 @@ -import traceback -from pathlib import Path - -import imagelib -from interact import interact as io -from nnlib import DeepPortraitRelighting -from utils import Path_utils -from utils.cv2_utils import * -from DFLIMG import * - -class RelightEditor: - def __init__(self, image_paths, dpr, lighten): - self.image_paths = image_paths - self.dpr = dpr - self.lighten = lighten - - self.current_img_path = None - self.current_img = None - self.current_img_shape = None - self.pick_new_face() - - self.alt_azi_ar = [ [0,0,1.0] ] - self.alt_azi_cur = 0 - - self.mouse_x = self.mouse_y = 9999 - self.screen_status_block = None - self.screen_status_block_dirty = True - self.screen_changed = True - - def pick_new_face(self): - self.current_img_path = self.image_paths[ np.random.randint(len(self.image_paths)) ] - self.current_img = cv2_imread (str(self.current_img_path)) - self.current_img_shape = self.current_img.shape - self.set_screen_changed() - - def set_screen_changed(self): - self.screen_changed = True - - def switch_screen_changed(self): - result = self.screen_changed - self.screen_changed = False - return result - - def make_screen(self): - alt,azi,inten=self.alt_azi_ar[self.alt_azi_cur] - - img = self.dpr.relight (self.current_img, alt, azi, inten, self.lighten) - - h,w,c = img.shape - - lines = ['Pick light directions for whole faceset.', - '[q]-new test face', - '[w][e]-navigate', - '[a][s]-intensity', - '[r]-new [t]-delete [enter]-process', - ''] - - for i, (alt,azi,inten) in enumerate(self.alt_azi_ar): - s = '>:' if self.alt_azi_cur == i else ' :' - s += f'alt=[{ int(alt):03}] azi=[{ int(azi):03}] int=[{inten:01.1f}]' - lines += [ s ] - - lines_count = len(lines) - h_line = 16 - - sh = lines_count * h_line - sw = 400 - sc = c - status_img = np.ones ( (sh,sw,sc) ) * 0.1 - - for i in range(lines_count): - status_img[ i*h_line:(i+1)*h_line, 0:sw] += \ - imagelib.get_text_image ( (h_line,sw,c), lines[i], color=[0.8]*c ) - - status_img = np.clip(status_img*255, 0, 255).astype(np.uint8) - - #combine screens - if sh > h: - img = np.concatenate ([img, np.zeros( (sh-h,w,c), dtype=img.dtype ) ], axis=0) - elif h > sh: - status_img = np.concatenate ([status_img, np.zeros( (h-sh,sw,sc), dtype=img.dtype ) ], axis=0) - - img = np.concatenate ([img, status_img], axis=1) - - return img - - def run(self): - wnd_name = "Relighter" - io.named_window(wnd_name) - io.capture_keys(wnd_name) - io.capture_mouse(wnd_name) - - zoom_factor = 1.0 - - is_angle_editing = False - - is_exit = False - while not is_exit: - io.process_messages(0.0001) - - mouse_events = io.get_mouse_events(wnd_name) - for ev in mouse_events: - (x, y, ev, flags) = ev - if ev == io.EVENT_LBUTTONDOWN: - is_angle_editing = True - - if ev == io.EVENT_LBUTTONUP: - is_angle_editing = False - - if is_angle_editing: - h,w,c = self.current_img_shape - - alt,azi,inten = self.alt_azi_ar[self.alt_azi_cur] - alt = np.clip ( ( 0.5-y/w )*2.0, -1, 1)*90 - azi = np.clip ( (x / h - 0.5)*2.0, -1, 1)*90 - self.alt_azi_ar[self.alt_azi_cur] = (alt,azi,inten) - - - self.set_screen_changed() - - key_events = io.get_key_events(wnd_name) - key, chr_key, ctrl_pressed, alt_pressed, shift_pressed = key_events[-1] if len(key_events) > 0 else (0,0,False,False,False) - - if key != 0: - if chr_key == 'q': - self.pick_new_face() - elif chr_key == 'w': - self.alt_azi_cur = np.clip (self.alt_azi_cur-1, 0, len(self.alt_azi_ar)-1) - self.set_screen_changed() - elif chr_key == 'e': - self.alt_azi_cur = np.clip (self.alt_azi_cur+1, 0, len(self.alt_azi_ar)-1) - self.set_screen_changed() - elif chr_key == 'r': - #add direction - self.alt_azi_ar += [ [0,0,1.0] ] - self.alt_azi_cur +=1 - self.set_screen_changed() - elif chr_key == 't': - if len(self.alt_azi_ar) > 1: - self.alt_azi_ar.pop(self.alt_azi_cur) - self.alt_azi_cur = np.clip (self.alt_azi_cur, 0, len(self.alt_azi_ar)-1) - self.set_screen_changed() - elif chr_key == 'a': - alt,azi,inten = self.alt_azi_ar[self.alt_azi_cur] - inten = np.clip ( inten-0.1, 0.0, 1.0) - self.alt_azi_ar[self.alt_azi_cur] = (alt,azi,inten) - self.set_screen_changed() - elif chr_key == 's': - alt,azi,inten = self.alt_azi_ar[self.alt_azi_cur] - inten = np.clip ( inten+0.1, 0.0, 1.0) - self.alt_azi_ar[self.alt_azi_cur] = (alt,azi,inten) - self.set_screen_changed() - elif key == 27 or chr_key == '\r' or chr_key == '\n': #esc - is_exit = True - - if self.switch_screen_changed(): - screen = self.make_screen() - if zoom_factor != 1.0: - h,w,c = screen.shape - screen = cv2.resize ( screen, ( int(w*zoom_factor), int(h*zoom_factor) ) ) - io.show_image (wnd_name, screen ) - - io.destroy_window(wnd_name) - - return self.alt_azi_ar - -def relight(input_dir, lighten=None, random_one=None): - if lighten is None: - lighten = io.input_bool ("Lighten the faces? ( y/n default:n ?:help ) : ", False, help_message="Lighten the faces instead of shadow. May produce artifacts." ) - - if io.is_colab(): - io.log_info("In colab version you cannot choose light directions manually.") - manual = False - else: - manual = io.input_bool ("Choose light directions manually? ( y/n default:y ) : ", True) - - if not manual: - if random_one is None: - random_one = io.input_bool ("Relight the faces only with one random direction and random intensity? ( y/n default:y ?:help) : ", True, help_message="Otherwise faceset will be relighted with predefined 7 light directions but with random intensity.") - - image_paths = [Path(x) for x in Path_utils.get_image_paths(input_dir)] - filtered_image_paths = [] - for filepath in io.progress_bar_generator(image_paths, "Collecting fileinfo"): - try: - dflimg = DFLIMG.load (Path(filepath)) - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - else: - if not dflimg.get_relighted(): - filtered_image_paths += [filepath] - except: - io.log_err (f"Exception occured while processing file {filepath.name}. Error: {traceback.format_exc()}") - image_paths = filtered_image_paths - - if len(image_paths) == 0: - io.log_info("No files to process.") - return - - dpr = DeepPortraitRelighting() - - if manual: - alt_azi_ar = RelightEditor(image_paths, dpr, lighten).run() - - for filepath in io.progress_bar_generator(image_paths, "Relighting"): - try: - dflimg = DFLIMG.load ( Path(filepath) ) - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - else: - if dflimg.get_relighted(): - continue - img = cv2_imread (str(filepath)) - - if random_one: - alt = np.random.randint(-90,91) - azi = np.random.randint(-90,91) - inten = np.random.random()*0.3+0.3 - relighted_imgs = [dpr.relight(img,alt=alt,azi=azi,intensity=inten,lighten=lighten)] - else: - if not manual and not random_one: - inten = np.random.random()*0.3+0.3 - alt_azi_ar = [(60,0,inten), (60,60,inten), (0,60,inten), (-60,60,inten), (-60,0,inten), (-60,-60,inten), (0,-60,inten), (60,-60,inten)] - - relighted_imgs = [dpr.relight(img,alt=alt,azi=azi,intensity=inten,lighten=lighten) for (alt,azi,inten) in alt_azi_ar ] - - i = 0 - for i,relighted_img in enumerate(relighted_imgs): - im_flags = [] - if filepath.suffix == '.jpg': - im_flags += [int(cv2.IMWRITE_JPEG_QUALITY), 100] - - while True: - relighted_filepath = filepath.parent / (filepath.stem+f'_relighted_{i}'+filepath.suffix) - if not relighted_filepath.exists(): - break - i += 1 - - cv2_imwrite (relighted_filepath, relighted_img ) - - dflimg.remove_source_filename() - dflimg.embed_and_set (relighted_filepath, relighted=True ) - except: - io.log_err (f"Exception occured while processing file {filepath.name}. Error: {traceback.format_exc()}") - -def delete_relighted(input_dir): - input_path = Path(input_dir) - image_paths = [Path(x) for x in Path_utils.get_image_paths(input_path)] - - files_to_delete = [] - for filepath in io.progress_bar_generator(image_paths, "Loading"): - dflimg = DFLIMG.load ( Path(filepath) ) - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - continue - else: - if dflimg.get_relighted(): - files_to_delete += [filepath] - - for file in io.progress_bar_generator(files_to_delete, "Deleting"): - file.unlink() diff --git a/mainscripts/MaskEditorTool.py b/mainscripts/MaskEditorTool.py index 125926f..4eee26f 100644 --- a/mainscripts/MaskEditorTool.py +++ b/mainscripts/MaskEditorTool.py @@ -8,13 +8,13 @@ import cv2 import numpy as np import numpy.linalg as npl -import imagelib +from core import imagelib from DFLIMG import * from facelib import LandmarksProcessor -from imagelib import IEPolys -from interact import interact as io -from utils import Path_utils -from utils.cv2_utils import * +from core.imagelib import IEPolys +from core.interact import interact as io +from core import pathex +from core.cv2ex import * class MaskEditor: @@ -343,7 +343,7 @@ def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None, no_default skipped_path.mkdir(parents=True) if not no_default_mask: - eyebrows_expand_mod = np.clip ( io.input_int ("Default eyebrows expand modifier? (0..400, skip:100) : ", 100), 0, 400 ) / 100.0 + eyebrows_expand_mod = np.clip ( io.input_int ("Default eyebrows expand modifier?", 100, add_info="0..400"), 0, 400 ) / 100.0 else: eyebrows_expand_mod = None @@ -354,7 +354,7 @@ def mask_editor_main(input_dir, confirmed_dir=None, skipped_dir=None, no_default cached_images = {} - image_paths = [ Path(x) for x in Path_utils.get_image_paths(input_path)] + image_paths = [ Path(x) for x in pathex.get_image_paths(input_path)] done_paths = [] done_images_types = {} image_paths_total = len(image_paths) diff --git a/mainscripts/Converter.py b/mainscripts/Merger.py similarity index 63% rename from mainscripts/Converter.py rename to mainscripts/Merger.py index 614a5a0..ba6af29 100644 --- a/mainscripts/Converter.py +++ b/mainscripts/Merger.py @@ -13,23 +13,23 @@ import cv2 import numpy as np import numpy.linalg as npla -import imagelib +from core import imagelib import samplelib -from converters import (ConverterConfig, ConvertFaceAvatar, ConvertMasked, - FrameInfo) -from facelib import FaceType, LandmarksProcessor -from interact import interact as io -from joblib import SubprocessFunctionCaller, Subprocessor -from nnlib import TernausNet -from utils import Path_utils -from utils.cv2_utils import * +from merger import (MergerConfig, MergeFaceAvatar, MergeMasked, + FrameInfo) from DFLIMG import DFLIMG +from facelib import FaceEnhancer, FaceType, LandmarksProcessor, TernausNet +from core.interact import interact as io +from core.joblib import SubprocessFunctionCaller, Subprocessor +from core.leras import nn +from core import pathex +from core.cv2ex import * -from .ConverterScreen import Screen, ScreenManager +from .MergerScreen import Screen, ScreenManager -CONVERTER_DEBUG = False +MERGER_DEBUG = False -class ConvertSubprocessor(Subprocessor): +class MergeSubprocessor(Subprocessor): class Frame(object): def __init__(self, prev_temporal_frame_infos=None, @@ -76,18 +76,14 @@ class ConvertSubprocessor(Subprocessor): self.predictor_func = client_dict['predictor_func'] self.predictor_input_shape = client_dict['predictor_input_shape'] self.superres_func = client_dict['superres_func'] + self.fanseg_input_size = client_dict['fanseg_input_size'] + self.fanseg_extract_func = client_dict['fanseg_extract_func'] #transfer and set stdin in order to work code.interact in debug subprocess stdin_fd = client_dict['stdin_fd'] if stdin_fd is not None: sys.stdin = os.fdopen(stdin_fd) - from nnlib import nnlib - #model process ate all GPU mem, - #so we cannot use GPU for any TF operations in converter processes - #therefore forcing active_DeviceConfig to CPU only - nnlib.active_DeviceConfig = nnlib.DeviceConfig (cpu_only=True) - def blursharpen_func (img, sharpen_mode=0, kernel_size=3, amount=100): if kernel_size % 2 == 0: kernel_size += 1 @@ -118,37 +114,6 @@ class ConvertSubprocessor(Subprocessor): return img self.blursharpen_func = blursharpen_func - self.fanseg_by_face_type = {} - self.fanseg_input_size = 256 - - def fanseg_extract(face_type, *args, **kwargs): - fanseg = self.fanseg_by_face_type.get(face_type, None) - if self.fanseg_by_face_type.get(face_type, None) is None: - fanseg = TernausNet("FANSeg", self.fanseg_input_size , FaceType.toString( face_type ) ) - self.fanseg_by_face_type[face_type] = fanseg - - return fanseg.extract(*args, **kwargs) - - self.fanseg_extract_func = fanseg_extract - - self.fanchq_by_face_type = {} - self.fanchq_input_size = 256 - def fanchq_extract(face_type, *args, **kwargs): - fanchq = self.fanchq_by_face_type.get(face_type, None) - if self.fanchq_by_face_type.get(face_type, None) is None: - fanchq = TernausNet("FANCHQ", self.fanchq_input_size , FaceType.toString( face_type ) ) - self.fanchq_by_face_type[face_type] = fanchq - - return fanchq.extract(*args, **kwargs) - - self.fanchq_extract_func = fanchq_extract - - import ebsynth - def ebs_ct(*args, **kwargs): - return ebsynth.color_transfer(*args, **kwargs) - - self.ebs_ct_func = ebs_ct - return None #override @@ -156,7 +121,6 @@ class ConvertSubprocessor(Subprocessor): cfg = pf.cfg.copy() cfg.blursharpen_func = self.blursharpen_func cfg.superres_func = self.superres_func - cfg.ebs_ct_func = self.ebs_ct_func frame_info = pf.frame_info @@ -169,15 +133,15 @@ class ConvertSubprocessor(Subprocessor): if len(landmarks_list) == 0: self.log_info ( 'no faces found for %s, copying without faces' % (filename_path.name) ) - + if cfg.export_mask_alpha: img_bgr = cv2_imread(filename) h,w,c = img_bgr.shape if c == 1: - img_bgr = np.repeat(img_bgr, 3, -1) + img_bgr = np.repeat(img_bgr, 3, -1) if c == 3: img_bgr = np.concatenate ([img_bgr, np.zeros((h,w,1), dtype=img_bgr.dtype) ], axis=-1) - + cv2_imwrite (output_filename, img_bgr) else: if filename_path.suffix == '.png': @@ -190,23 +154,21 @@ class ConvertSubprocessor(Subprocessor): img_bgr = cv2_imread(filename) pf.image = img_bgr else: - if cfg.type == ConverterConfig.TYPE_MASKED: + if cfg.type == MergerConfig.TYPE_MASKED: cfg.fanseg_input_size = self.fanseg_input_size cfg.fanseg_extract_func = self.fanseg_extract_func - cfg.fanchq_input_size = self.fanchq_input_size - cfg.fanchq_extract_func = self.fanchq_extract_func try: - final_img = ConvertMasked (self.predictor_func, self.predictor_input_shape, cfg, frame_info) + final_img = MergeMasked (self.predictor_func, self.predictor_input_shape, cfg, frame_info) except Exception as e: e_str = traceback.format_exc() if 'MemoryError' in e_str: raise Subprocessor.SilenceException else: - raise Exception( 'Error while converting file [%s]: %s' % (filename, e_str) ) + raise Exception( 'Error while merging file [%s]: %s' % (filename, e_str) ) - elif cfg.type == ConverterConfig.TYPE_FACE_AVATAR: - final_img = ConvertFaceAvatar (self.predictor_func, self.predictor_input_shape, + elif cfg.type == MergerConfig.TYPE_FACE_AVATAR: + final_img = MergeFaceAvatar (self.predictor_func, self.predictor_input_shape, cfg, pf.prev_temporal_frame_infos, pf.frame_info, pf.next_temporal_frame_infos ) @@ -225,32 +187,43 @@ class ConvertSubprocessor(Subprocessor): return pf.frame_info.filename #override - def __init__(self, is_interactive, converter_session_filepath, predictor_func, predictor_input_shape, converter_config, frames, output_path, model_iter): + def __init__(self, is_interactive, merger_session_filepath, predictor_func, predictor_input_shape, merger_config, frames, output_path, model_iter): if len (frames) == 0: raise ValueError ("len (frames) == 0") - super().__init__('Converter', ConvertSubprocessor.Cli, 86400 if CONVERTER_DEBUG else 60, io_loop_sleep_time=0.001, initialize_subprocesses_in_serial=False) + super().__init__('Merger', MergeSubprocessor.Cli, 86400 if MERGER_DEBUG else 60, io_loop_sleep_time=0.001) self.is_interactive = is_interactive - self.converter_session_filepath = Path(converter_session_filepath) - self.converter_config = converter_config - - #dummy predict and sleep, tensorflow caching kernels. If remove it, sometime conversion speed can be x2 slower - predictor_func (dummy_predict=True) - time.sleep(2) + self.merger_session_filepath = Path(merger_session_filepath) + self.merger_config = merger_config self.predictor_func_host, self.predictor_func = SubprocessFunctionCaller.make_pair(predictor_func) self.predictor_input_shape = predictor_input_shape - self.dcscn = None - self.ranksrgan = None - def superres_func(mode, *args, **kwargs): + self.face_enhancer = None + def superres_func(mode, face_bgr): if mode == 1: - if self.ranksrgan is None: - self.ranksrgan = imagelib.RankSRGAN() - return self.ranksrgan.upscale(*args, **kwargs) + if self.face_enhancer is None: + self.face_enhancer = FaceEnhancer(place_model_on_cpu=True) - self.dcscn_host, self.superres_func = SubprocessFunctionCaller.make_pair(superres_func) + return self.face_enhancer.enhance (face_bgr, is_tanh=True, preserve_size=False) + + self.superres_host, self.superres_func = SubprocessFunctionCaller.make_pair(superres_func) + + self.fanseg_by_face_type = {} + self.fanseg_input_size = 256 + def fanseg_extract_func(face_type, *args, **kwargs): + fanseg = self.fanseg_by_face_type.get(face_type, None) + if self.fanseg_by_face_type.get(face_type, None) is None: + cpu_only = len(nn.getCurrentDeviceConfig().devices) == 0 + + with nn.tf.device('/CPU:0' if cpu_only else '/GPU:0'): + fanseg = TernausNet("FANSeg", self.fanseg_input_size , FaceType.toString( face_type ), place_model_on_cpu=True ) + + self.fanseg_by_face_type[face_type] = fanseg + return fanseg.extract(*args, **kwargs) + + self.fanseg_host, self.fanseg_extract_func = SubprocessFunctionCaller.make_pair(fanseg_extract_func) self.output_path = output_path self.model_iter = model_iter @@ -258,11 +231,11 @@ class ConvertSubprocessor(Subprocessor): self.prefetch_frame_count = self.process_count = min(6,multiprocessing.cpu_count()) session_data = None - if self.is_interactive and self.converter_session_filepath.exists(): + if self.is_interactive and self.merger_session_filepath.exists(): - if io.input_bool ("Use saved session? (y/n skip:y) : ", True): + if io.input_bool ("Use saved session?", True): try: - with open( str(self.converter_session_filepath), "rb") as f: + with open( str(self.merger_session_filepath), "rb") as f: session_data = pickle.loads(f.read()) except Exception as e: pass @@ -293,12 +266,12 @@ class ConvertSubprocessor(Subprocessor): break if frames_equal: - io.log_info ('Using saved session from ' + '/'.join (self.converter_session_filepath.parts[-2:]) ) + io.log_info ('Using saved session from ' + '/'.join (self.merger_session_filepath.parts[-2:]) ) for frame in s_frames: if frame.cfg is not None: - #recreate ConverterConfig class using constructor with get_config() as dict params - #so if any new param will be added, old converter session will work properly + #recreate MergerConfig class using constructor with get_config() as dict params + #so if any new param will be added, old merger session will work properly frame.cfg = frame.cfg.__class__( **frame.cfg.get_config() ) self.frames = s_frames @@ -328,10 +301,10 @@ class ConvertSubprocessor(Subprocessor): session_data = None if session_data is None: - for filename in Path_utils.get_image_paths(self.output_path): #remove all images in output_path + for filename in pathex.get_image_paths(self.output_path): #remove all images in output_path Path(filename).unlink() - frames[0].cfg = self.converter_config.copy() + frames[0].cfg = self.merger_config.copy() for i in range( len(self.frames) ): frame = self.frames[i] @@ -342,7 +315,7 @@ class ConvertSubprocessor(Subprocessor): #override def process_info_generator(self): - r = [0] if CONVERTER_DEBUG else range(self.process_count) + r = [0] if MERGER_DEBUG else range(self.process_count) for i in r: yield 'CPU%d' % (i), {}, {'device_idx': i, @@ -350,28 +323,68 @@ class ConvertSubprocessor(Subprocessor): 'predictor_func': self.predictor_func, 'predictor_input_shape' : self.predictor_input_shape, 'superres_func': self.superres_func, - 'stdin_fd': sys.stdin.fileno() if CONVERTER_DEBUG else None + 'fanseg_input_size' : self.fanseg_input_size, + 'fanseg_extract_func' : self.fanseg_extract_func, + 'stdin_fd': sys.stdin.fileno() if MERGER_DEBUG else None } #overridable optional def on_clients_initialized(self): - io.progress_bar ("Converting", len (self.frames_idxs), initial=len(self.frames_done_idxs) ) + io.progress_bar ("Merging", len (self.frames_idxs), initial=len(self.frames_done_idxs) ) self.process_remain_frames = not self.is_interactive self.is_interactive_quitting = not self.is_interactive if self.is_interactive: help_images = { - ConverterConfig.TYPE_MASKED : cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_converter_masked.jpg') ), - ConverterConfig.TYPE_FACE_AVATAR : cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_converter_face_avatar.jpg') ), + MergerConfig.TYPE_MASKED : cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_merger_masked.jpg') ), + MergerConfig.TYPE_FACE_AVATAR : cv2_imread ( str(Path(__file__).parent / 'gfx' / 'help_merger_face_avatar.jpg') ), } self.main_screen = Screen(initial_scale_to_width=1368, image=None, waiting_icon=True) - self.help_screen = Screen(initial_scale_to_height=768, image=help_images[self.converter_config.type], waiting_icon=False) - self.screen_manager = ScreenManager( "Converter", [self.main_screen, self.help_screen], capture_keys=True ) + self.help_screen = Screen(initial_scale_to_height=768, image=help_images[self.merger_config.type], waiting_icon=False) + self.screen_manager = ScreenManager( "Merger", [self.main_screen, self.help_screen], capture_keys=True ) self.screen_manager.set_current (self.help_screen) self.screen_manager.show_current() + self.masked_keys_funcs = { + '`' : lambda cfg,shift_pressed: cfg.set_mode(0), + '1' : lambda cfg,shift_pressed: cfg.set_mode(1), + '2' : lambda cfg,shift_pressed: cfg.set_mode(2), + '3' : lambda cfg,shift_pressed: cfg.set_mode(3), + '4' : lambda cfg,shift_pressed: cfg.set_mode(4), + '5' : lambda cfg,shift_pressed: cfg.set_mode(5), + '6' : lambda cfg,shift_pressed: cfg.set_mode(6), + '7' : lambda cfg,shift_pressed: cfg.set_mode(7), + '8' : lambda cfg,shift_pressed: cfg.set_mode(8), + 'q' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(1 if not shift_pressed else 5), + 'a' : lambda cfg,shift_pressed: cfg.add_hist_match_threshold(-1 if not shift_pressed else -5), + 'w' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(1 if not shift_pressed else 5), + 's' : lambda cfg,shift_pressed: cfg.add_erode_mask_modifier(-1 if not shift_pressed else -5), + 'e' : lambda cfg,shift_pressed: cfg.add_blur_mask_modifier(1 if not shift_pressed else 5), + 'd' : lambda cfg,shift_pressed: cfg.add_blur_mask_modifier(-1 if not shift_pressed else -5), + 'r' : lambda cfg,shift_pressed: cfg.add_motion_blur_power(1 if not shift_pressed else 5), + 'f' : lambda cfg,shift_pressed: cfg.add_motion_blur_power(-1 if not shift_pressed else -5), + 'y' : lambda cfg,shift_pressed: cfg.add_blursharpen_amount(1 if not shift_pressed else 5), + 'h' : lambda cfg,shift_pressed: cfg.add_blursharpen_amount(-1 if not shift_pressed else -5), + 'u' : lambda cfg,shift_pressed: cfg.add_output_face_scale(1 if not shift_pressed else 5), + 'j' : lambda cfg,shift_pressed: cfg.add_output_face_scale(-1 if not shift_pressed else -5), + 'i' : lambda cfg,shift_pressed: cfg.add_image_denoise_power(1 if not shift_pressed else 5), + 'k' : lambda cfg,shift_pressed: cfg.add_image_denoise_power(-1 if not shift_pressed else -5), + 'o' : lambda cfg,shift_pressed: cfg.add_bicubic_degrade_power(1 if not shift_pressed else 5), + 'l' : lambda cfg,shift_pressed: cfg.add_bicubic_degrade_power(-1 if not shift_pressed else -5), + 'p' : lambda cfg,shift_pressed: cfg.add_color_degrade_power(1 if not shift_pressed else 5), + ';' : lambda cfg,shift_pressed: cfg.add_color_degrade_power(-1), + ':' : lambda cfg,shift_pressed: cfg.add_color_degrade_power(-5), + 'z' : lambda cfg,shift_pressed: cfg.toggle_masked_hist_match(), + 'x' : lambda cfg,shift_pressed: cfg.toggle_mask_mode(), + 'c' : lambda cfg,shift_pressed: cfg.toggle_color_transfer_mode(), + 'v' : lambda cfg,shift_pressed: cfg.toggle_super_resolution_mode(), + 'b' : lambda cfg,shift_pressed: cfg.toggle_export_mask_alpha(), + 'n' : lambda cfg,shift_pressed: cfg.toggle_sharpen_mode(), + } + self.masked_keys = list(self.masked_keys_funcs.keys()) + #overridable optional def on_clients_finalized(self): io.progress_bar_close() @@ -389,22 +402,24 @@ class ConvertSubprocessor(Subprocessor): 'frames_done_idxs': self.frames_done_idxs, 'model_iter' : self.model_iter, } - self.converter_session_filepath.write_bytes( pickle.dumps(session_data) ) + self.merger_session_filepath.write_bytes( pickle.dumps(session_data) ) - io.log_info ("Session is saved to " + '/'.join (self.converter_session_filepath.parts[-2:]) ) + io.log_info ("Session is saved to " + '/'.join (self.merger_session_filepath.parts[-2:]) ) - cfg_change_keys = ['`','1', '2', '3', '4', '5', '6', '7', '8', - 'q', 'a', 'w', 's', 'e', 'd', 'r', 'f', 'y','h','u','j','i','k','o','l','p', ';',':',#'t', 'g', - 'z', 'x', 'c', 'v', 'b','n' ] #override def on_tick(self): self.predictor_func_host.process_messages() - self.dcscn_host.process_messages() + self.superres_host.process_messages() + self.fanseg_host.process_messages() go_prev_frame = False + go_first_frame = False go_prev_frame_overriding_cfg = False + go_first_frame_overriding_cfg = False + go_next_frame = self.process_remain_frames go_next_frame_overriding_cfg = False + go_last_frame_overriding_cfg = False cur_frame = None if len(self.frames_idxs) != 0: @@ -423,7 +438,8 @@ class ConvertSubprocessor(Subprocessor): if cur_frame.image is None: cur_frame.image = cv2_imread ( cur_frame.output_filename) if cur_frame.image is None: - cur_frame.is_done = False #unable to read? recompute then + # unable to read? recompute then + cur_frame.is_done = False cur_frame.is_shown = False self.main_screen.set_image(cur_frame.image) else: @@ -446,119 +462,79 @@ class ConvertSubprocessor(Subprocessor): if key == 27: #esc self.is_interactive_quitting = True elif self.screen_manager.get_current() is self.main_screen: - if chr_key in self.cfg_change_keys: + + if self.merger_config.type == MergerConfig.TYPE_MASKED and chr_key in self.masked_keys: self.process_remain_frames = False if cur_frame is not None: cfg = cur_frame.cfg prev_cfg = cfg.copy() - if cfg.type == ConverterConfig.TYPE_MASKED: - if chr_key == '`': - cfg.set_mode(0) - elif key >= ord('1') and key <= ord('8'): - cfg.set_mode( key - ord('0') ) - elif chr_key == 'q': - cfg.add_hist_match_threshold(1 if not shift_pressed else 5) - elif chr_key == 'a': - cfg.add_hist_match_threshold(-1 if not shift_pressed else -5) - elif chr_key == 'w': - cfg.add_erode_mask_modifier(1 if not shift_pressed else 5) - elif chr_key == 's': - cfg.add_erode_mask_modifier(-1 if not shift_pressed else -5) - elif chr_key == 'e': - cfg.add_blur_mask_modifier(1 if not shift_pressed else 5) - elif chr_key == 'd': - cfg.add_blur_mask_modifier(-1 if not shift_pressed else -5) - elif chr_key == 'r': - cfg.add_motion_blur_power(1 if not shift_pressed else 5) - elif chr_key == 'f': - cfg.add_motion_blur_power(-1 if not shift_pressed else -5) - elif chr_key == 'y': - cfg.add_blursharpen_amount(1 if not shift_pressed else 5) - elif chr_key == 'h': - cfg.add_blursharpen_amount(-1 if not shift_pressed else -5) - elif chr_key == 'u': - cfg.add_output_face_scale(1 if not shift_pressed else 5) - elif chr_key == 'j': - cfg.add_output_face_scale(-1 if not shift_pressed else -5) - elif chr_key == 'i': - cfg.add_image_denoise_power(1 if not shift_pressed else 5) - elif chr_key == 'k': - cfg.add_image_denoise_power(-1 if not shift_pressed else -5) - elif chr_key == 'o': - cfg.add_bicubic_degrade_power(1 if not shift_pressed else 5) - elif chr_key == 'l': - cfg.add_bicubic_degrade_power(-1 if not shift_pressed else -5) - - elif chr_key == 'p': - cfg.add_color_degrade_power(1 if not shift_pressed else 5) - elif chr_key == ';': - cfg.add_color_degrade_power(-1) - elif chr_key == ':': - cfg.add_color_degrade_power(-5) - - elif chr_key == 'z': - cfg.toggle_masked_hist_match() - elif chr_key == 'x': - cfg.toggle_mask_mode() - elif chr_key == 'c': - cfg.toggle_color_transfer_mode() - elif chr_key == 'v': - cfg.toggle_super_resolution_mode() - elif chr_key == 'b': - cfg.toggle_export_mask_alpha() - elif chr_key == 'n': - cfg.toggle_sharpen_mode() - - else: - if chr_key == 'y': - cfg.add_blursharpen_amount(1 if not shift_pressed else 5) - elif chr_key == 'h': - cfg.add_blursharpen_amount(-1 if not shift_pressed else -5) - elif chr_key == 's': - cfg.toggle_add_source_image() - elif chr_key == 'v': - cfg.toggle_super_resolution_mode() - elif chr_key == 'n': - cfg.toggle_sharpen_mode() + if cfg.type == MergerConfig.TYPE_MASKED: + self.masked_keys_funcs[chr_key](cfg, shift_pressed) if prev_cfg != cfg: io.log_info ( cfg.to_string(cur_frame.frame_info.filename_short) ) cur_frame.is_done = False cur_frame.is_shown = False else: + if chr_key == ',' or chr_key == 'm': self.process_remain_frames = False go_prev_frame = True - go_prev_frame_overriding_cfg = chr_key == 'm' + + if chr_key == ',': + if shift_pressed: + go_first_frame = True + + elif chr_key == 'm': + if not shift_pressed: + go_prev_frame_overriding_cfg = True + else: + go_first_frame_overriding_cfg = True + elif chr_key == '.' or chr_key == '/': self.process_remain_frames = False go_next_frame = True - go_next_frame_overriding_cfg = chr_key == '/' - elif chr_key == '\r' or chr_key == '\n': - self.process_remain_frames = not self.process_remain_frames + + if chr_key == '.': + if shift_pressed: + self.process_remain_frames = not self.process_remain_frames + + elif chr_key == '/': + if not shift_pressed: + go_next_frame_overriding_cfg = True + else: + go_last_frame_overriding_cfg = True + elif chr_key == '-': self.screen_manager.get_current().diff_scale(-0.1) elif chr_key == '=': self.screen_manager.get_current().diff_scale(0.1) - if go_prev_frame: if cur_frame is None or cur_frame.is_done: if cur_frame is not None: cur_frame.image = None - if len(self.frames_done_idxs) > 0: - prev_frame = self.frames[self.frames_done_idxs.pop()] - self.frames_idxs.insert(0, prev_frame.idx) - prev_frame.is_shown = False - io.progress_bar_inc(-1) + while True: + if len(self.frames_done_idxs) > 0: + prev_frame = self.frames[self.frames_done_idxs.pop()] + self.frames_idxs.insert(0, prev_frame.idx) + prev_frame.is_shown = False + io.progress_bar_inc(-1) - if cur_frame is not None and go_prev_frame_overriding_cfg: - if prev_frame.cfg != cur_frame.cfg: - prev_frame.cfg = cur_frame.cfg.copy() - prev_frame.is_done = False + if cur_frame is not None and (go_prev_frame_overriding_cfg or go_first_frame_overriding_cfg): + if prev_frame.cfg != cur_frame.cfg: + prev_frame.cfg = cur_frame.cfg.copy() + prev_frame.is_done = False + + cur_frame = prev_frame + + if go_first_frame_overriding_cfg or go_first_frame: + if len(self.frames_done_idxs) > 0: + continue + break elif go_next_frame: if cur_frame is not None and cur_frame.is_done: @@ -568,26 +544,33 @@ class ConvertSubprocessor(Subprocessor): self.frames_idxs.pop(0) io.progress_bar_inc(1) + f = self.frames + if len(self.frames_idxs) != 0: - next_frame = self.frames[ self.frames_idxs[0] ] - - if go_next_frame_overriding_cfg: - f = self.frames - for i in range( next_frame.idx, len(self.frames) ): - f[i].cfg = None - f[i].is_shown = False - - if next_frame.cfg is None or next_frame.is_shown == False: #next frame is never shown or override current cfg to next frames and the prefetches - for i in range( min(len(self.frames_idxs), self.prefetch_frame_count) ): - frame = self.frames[ self.frames_idxs[i] ] - - if frame.cfg is None or frame.cfg != cur_frame.cfg: - frame.cfg = cur_frame.cfg.copy() - frame.is_done = False #initiate solve again - - + next_frame = f[ self.frames_idxs[0] ] next_frame.is_shown = False + if go_next_frame_overriding_cfg or go_last_frame_overriding_cfg: + + if go_next_frame_overriding_cfg: + to_frames = next_frame.idx+1 + else: + to_frames = len(f) + + for i in range( next_frame.idx, to_frames ): + f[i].cfg = None + + for i in range( min(len(self.frames_idxs), self.prefetch_frame_count) ): + frame = f[ self.frames_idxs[i] ] + if frame.cfg is None: + if i == 0: + frame.cfg = cur_frame.cfg.copy() + else: + frame.cfg = f[ self.frames_idxs[i-1] ].cfg.copy() + + frame.is_done = False #initiate solve again + frame.is_shown = False + if len(self.frames_idxs) == 0: self.process_remain_frames = False @@ -619,7 +602,7 @@ class ConvertSubprocessor(Subprocessor): if not frame.is_done and not frame.is_processing and frame.cfg is not None: frame.is_processing = True - return ConvertSubprocessor.ProcessingFrame(idx=frame.idx, + return MergeSubprocessor.ProcessingFrame(idx=frame.idx, cfg=frame.cfg.copy(), prev_temporal_frame_infos=frame.prev_temporal_frame_infos, frame_info=frame.frame_info, @@ -633,19 +616,18 @@ class ConvertSubprocessor(Subprocessor): def get_result(self): return 0 -def main (args, device_args): - io.log_info ("Running converter.\r\n") - - training_data_src_dir = args.get('training_data_src_dir', None) - training_data_src_path = Path(training_data_src_dir) if training_data_src_dir is not None else None - aligned_dir = args.get('aligned_dir', None) - avaperator_aligned_dir = args.get('avaperator_aligned_dir', None) +def main (model_class_name=None, + saved_models_path=None, + training_data_src_path=None, + force_model_name=None, + input_path=None, + output_path=None, + aligned_path=None, + force_gpu_idxs=None, + cpu_only=None): + io.log_info ("Running merger.\r\n") try: - input_path = Path(args['input_dir']) - output_path = Path(args['output_dir']) - model_path = Path(args['model_dir']) - if not input_path.exists(): io.log_err('Input directory not found. Please ensure it exists.') return @@ -653,54 +635,53 @@ def main (args, device_args): if not output_path.exists(): output_path.mkdir(parents=True, exist_ok=True) - if not model_path.exists(): + if not saved_models_path.exists(): io.log_err('Model directory not found. Please ensure it exists.') return - is_interactive = io.input_bool ("Use interactive converter? (y/n skip:y) : ", True) if not io.is_colab() else False + is_interactive = io.input_bool ("Use interactive merger?", True) if not io.is_colab() else False import models - model = models.import_model( args['model_name'])(model_path, device_args=device_args, training_data_src_path=training_data_src_path) - converter_session_filepath = model.get_strpath_storage_for_file('converter_session.dat') - predictor_func, predictor_input_shape, cfg = model.get_ConverterConfig() + model = models.import_model(model_class_name)(is_training=False, + saved_models_path=saved_models_path, + training_data_src_path=training_data_src_path, + force_gpu_idxs=force_gpu_idxs, + cpu_only=cpu_only) + merger_session_filepath = model.get_strpath_storage_for_file('merger_session.dat') + predictor_func, predictor_input_shape, cfg = model.get_MergerConfig() if not is_interactive: cfg.ask_settings() - input_path_image_paths = Path_utils.get_image_paths(input_path) + input_path_image_paths = pathex.get_image_paths(input_path) - if cfg.type == ConverterConfig.TYPE_MASKED: - if aligned_dir is None: - io.log_err('Aligned directory not found. Please ensure it exists.') - return - - aligned_path = Path(aligned_dir) + if cfg.type == MergerConfig.TYPE_MASKED: if not aligned_path.exists(): io.log_err('Aligned directory not found. Please ensure it exists.') return packed_samples = None try: - packed_samples = samplelib.PackedFaceset.load(aligned_path) + packed_samples = samplelib.PackedFaceset.load(aligned_path) except: io.log_err(f"Error occured while loading samplelib.PackedFaceset.load {str(aligned_path)}, {traceback.format_exc()}") - - if packed_samples is not None: - io.log_info ("Using packed faceset.") + + if packed_samples is not None: + io.log_info ("Using packed faceset.") def generator(): - for sample in io.progress_bar_generator( packed_samples, "Collecting alignments"): - filepath = Path(sample.filename) + for sample in io.progress_bar_generator( packed_samples, "Collecting alignments"): + filepath = Path(sample.filename) yield DFLIMG.load(filepath, loader_func=lambda x: sample.read_raw_file() ) else: def generator(): - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(aligned_path), "Collecting alignments"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(aligned_path), "Collecting alignments"): filepath = Path(filepath) yield DFLIMG.load(filepath) - + alignments = {} multiple_faces_detected = False - + for dflimg in generator(): if dflimg is None: io.log_err ("%s is not a dfl image file" % (filepath.name) ) @@ -709,10 +690,10 @@ def main (args, device_args): source_filename = dflimg.get_source_filename() if source_filename is None or source_filename == "_": continue - + source_filename = Path(source_filename) source_filename_stem = source_filename.stem - + if source_filename_stem not in alignments.keys(): alignments[ source_filename_stem ] = [] @@ -724,7 +705,7 @@ def main (args, device_args): if multiple_faces_detected: io.log_info ("Warning: multiple faces detected. Strongly recommended to process them separately.") - frames = [ ConvertSubprocessor.Frame( frame_info=FrameInfo(filename=p, landmarks_list=alignments.get(Path(p).stem, None))) for p in input_path_image_paths ] + frames = [ MergeSubprocessor.Frame( frame_info=FrameInfo(filename=p, landmarks_list=alignments.get(Path(p).stem, None))) for p in input_path_image_paths ] if multiple_faces_detected: io.log_info ("Warning: multiple faces detected. Motion blur will not be used.") @@ -760,11 +741,11 @@ def main (args, device_args): fi.motion_deg = -math.atan2(motion_vector[1],motion_vector[0])*180 / math.pi - elif cfg.type == ConverterConfig.TYPE_FACE_AVATAR: + elif cfg.type == MergerConfig.TYPE_FACE_AVATAR: filesdata = [] for filepath in io.progress_bar_generator(input_path_image_paths, "Collecting info"): filepath = Path(filepath) - + dflimg = DFLIMG.load(filepath) if dflimg is None: io.log_err ("%s is not a dfl image file" % (filepath.name) ) @@ -787,19 +768,19 @@ def main (args, device_args): prev_temporal_frame_infos.insert (0, prev_frame_info ) next_temporal_frame_infos.append ( next_frame_info ) - frames.append ( ConvertSubprocessor.Frame(prev_temporal_frame_infos=prev_temporal_frame_infos, + frames.append ( MergeSubprocessor.Frame(prev_temporal_frame_infos=prev_temporal_frame_infos, frame_info=frame_info, next_temporal_frame_infos=next_temporal_frame_infos) ) if len(frames) == 0: - io.log_info ("No frames to convert in input_dir.") + io.log_info ("No frames to merge in input_dir.") else: - ConvertSubprocessor ( + MergeSubprocessor ( is_interactive = is_interactive, - converter_session_filepath = converter_session_filepath, + merger_session_filepath = merger_session_filepath, predictor_func = predictor_func, predictor_input_shape = predictor_input_shape, - converter_config = cfg, + merger_config = cfg, frames = frames, output_path = output_path, model_iter = model.get_iter() diff --git a/mainscripts/ConverterScreen/ConverterScreen.py b/mainscripts/MergerScreen/MergerScreen.py similarity index 96% rename from mainscripts/ConverterScreen/ConverterScreen.py rename to mainscripts/MergerScreen/MergerScreen.py index 78ed45d..d105a86 100644 --- a/mainscripts/ConverterScreen/ConverterScreen.py +++ b/mainscripts/MergerScreen/MergerScreen.py @@ -3,10 +3,10 @@ from pathlib import Path import numpy as np -import imagelib -from interact import interact as io -from utils.cv2_utils import * -from utils.os_utils import get_screen_size +from core import imagelib +from core.interact import interact as io +from core.cv2ex import * +from core import osex class ScreenAssets(object): @@ -96,7 +96,7 @@ class Screen(object): if self.is_first_appear: self.is_first_appear = False #center window - desktop_w, desktop_h = get_screen_size() + desktop_w, desktop_h = osex.get_screen_size() h,w,c = screen.shape cv2.moveWindow(self.scrn_manager.wnd_name, max(0,(desktop_w-w) // 2), max(0, (desktop_h-h) // 2) ) diff --git a/mainscripts/MergerScreen/__init__.py b/mainscripts/MergerScreen/__init__.py new file mode 100644 index 0000000..ea3e320 --- /dev/null +++ b/mainscripts/MergerScreen/__init__.py @@ -0,0 +1 @@ +from .MergerScreen import Screen, ScreenManager \ No newline at end of file diff --git a/mainscripts/ConverterScreen/gfx/sand_clock_64.png b/mainscripts/MergerScreen/gfx/sand_clock_64.png similarity index 100% rename from mainscripts/ConverterScreen/gfx/sand_clock_64.png rename to mainscripts/MergerScreen/gfx/sand_clock_64.png diff --git a/mainscripts/Sorter.py b/mainscripts/Sorter.py index 9666ca9..e5d5d06 100644 --- a/mainscripts/Sorter.py +++ b/mainscripts/Sorter.py @@ -1,4 +1,5 @@ import multiprocessing +import math import operator import os import sys @@ -11,23 +12,18 @@ import cv2 import numpy as np from numpy import linalg as npla -import imagelib -from facelib import LandmarksProcessor -from imagelib import estimate_sharpness -from interact import interact as io -from joblib import Subprocessor -from nnlib import VGGFace, nnlib -from utils import Path_utils -from utils.cv2_utils import * +from core import imagelib, pathex +from core.cv2ex import * +from core.imagelib import estimate_sharpness +from core.interact import interact as io +from core.joblib import Subprocessor +from core.leras import nn from DFLIMG import * +from facelib import LandmarksProcessor + class BlurEstimatorSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.log_info('Running on %s.' % (client_dict['device_name']) ) - #override def process_data(self, data): filepath = Path( data[0] ) @@ -62,10 +58,11 @@ class BlurEstimatorSubprocessor(Subprocessor): #override def process_info_generator(self): - for i in range(0, multiprocessing.cpu_count() ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - } + cpu_count = multiprocessing.cpu_count() + io.log_info(f'Running on {cpu_count} CPUs') + + for i in range(cpu_count): + yield 'CPU%d' % (i), {}, {} #override def get_data(self, host_dict): @@ -95,7 +92,7 @@ class BlurEstimatorSubprocessor(Subprocessor): def sort_by_blur(input_path): io.log_info ("Sorting by blur...") - img_list = [ (filename,[]) for filename in Path_utils.get_image_paths(input_path) ] + img_list = [ (filename,[]) for filename in pathex.get_image_paths(input_path) ] img_list, trash_img_list = BlurEstimatorSubprocessor (img_list).run() io.log_info ("Sorting...") @@ -103,81 +100,11 @@ def sort_by_blur(input_path): return img_list, trash_img_list -def sort_by_face(input_path): - io.log_info ("Sorting by face similarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - dflimg = DFLIMG.load (filepath) - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_landmarks()] ) - - - img_list_len = len(img_list) - for i in io.progress_bar_generator ( range(0, img_list_len-1), "Sorting"): - min_score = float("inf") - j_min_score = i+1 - for j in range(i+1,len(img_list)): - - fl1 = img_list[i][1] - fl2 = img_list[j][1] - score = np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) - - if score < min_score: - min_score = score - j_min_score = j - img_list[i+1], img_list[j_min_score] = img_list[j_min_score], img_list[i+1] - - return img_list, trash_img_list - -def sort_by_face_dissim(input_path): - - io.log_info ("Sorting by face dissimilarity...") - - img_list = [] - trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): - filepath = Path(filepath) - - dflimg = DFLIMG.load (filepath) - - if dflimg is None: - io.log_err ("%s is not a dfl image file" % (filepath.name) ) - trash_img_list.append ( [str(filepath)] ) - continue - - img_list.append( [str(filepath), dflimg.get_landmarks(), 0 ] ) - - img_list_len = len(img_list) - for i in io.progress_bar_generator( range(img_list_len-1), "Sorting"): - score_total = 0 - for j in range(i+1,len(img_list)): - if i == j: - continue - fl1 = img_list[i][1] - fl2 = img_list[j][1] - score_total += np.sum ( np.absolute ( (fl2 - fl1).flatten() ) ) - - img_list[i][2] = score_total - - io.log_info ("Sorting...") - img_list = sorted(img_list, key=operator.itemgetter(2), reverse=True) - - return img_list, trash_img_list - def sort_by_face_yaw(input_path): io.log_info ("Sorting by face yaw...") img_list = [] trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"): filepath = Path(filepath) dflimg = DFLIMG.load (filepath) @@ -200,7 +127,7 @@ def sort_by_face_pitch(input_path): io.log_info ("Sorting by face pitch...") img_list = [] trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"): filepath = Path(filepath) dflimg = DFLIMG.load (filepath) @@ -221,10 +148,6 @@ def sort_by_face_pitch(input_path): class HistSsimSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - #override def process_data(self, data): img_list = [] @@ -277,10 +200,11 @@ class HistSsimSubprocessor(Subprocessor): #override def process_info_generator(self): - for i in range( len(self.img_chunks_list) ): - yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, - 'device_name': 'CPU%d' % (i) - } + cpu_count = len(self.img_chunks_list) + io.log_info(f'Running on {cpu_count} threads') + for i in range(cpu_count): + yield 'CPU%d' % (i), {'i':i}, {} + #override def on_clients_initialized(self): io.progress_bar ("Sorting", len(self.img_list)) @@ -311,14 +235,13 @@ class HistSsimSubprocessor(Subprocessor): def sort_by_hist(input_path): io.log_info ("Sorting by histogram similarity...") - img_list = HistSsimSubprocessor(Path_utils.get_image_paths(input_path)).run() - return img_list + img_list = HistSsimSubprocessor(pathex.get_image_paths(input_path)).run() + return img_list, [] class HistDissimSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): #override def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) self.img_list = client_dict['img_list'] self.img_list_len = len(self.img_list) @@ -355,11 +278,11 @@ class HistDissimSubprocessor(Subprocessor): #override def process_info_generator(self): - for i in range(0, min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'img_list' : self.img_list - } + cpu_count = min(multiprocessing.cpu_count(), 8) + io.log_info(f'Running on {cpu_count} CPUs') + for i in range(cpu_count): + yield 'CPU%d' % (i), {}, {'img_list' : self.img_list} + #override def get_data(self, host_dict): if len (self.img_list_range) > 0: @@ -385,7 +308,7 @@ def sort_by_hist_dissim(input_path): img_list = [] trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"): filepath = Path(filepath) dflimg = DFLIMG.load (filepath) @@ -407,37 +330,37 @@ def sort_by_hist_dissim(input_path): def sort_by_brightness(input_path): io.log_info ("Sorting by brightness...") - img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] + img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,2].flatten() )] for x in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading") ] io.log_info ("Sorting...") img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - return img_list + return img_list, [] def sort_by_hue(input_path): io.log_info ("Sorting by hue...") - img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten() )] for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading") ] + img_list = [ [x, np.mean ( cv2.cvtColor(cv2_imread(x), cv2.COLOR_BGR2HSV)[...,0].flatten() )] for x in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading") ] io.log_info ("Sorting...") img_list = sorted(img_list, key=operator.itemgetter(1), reverse=True) - return img_list + return img_list, [] def sort_by_black(input_path): io.log_info ("Sorting by amount of black pixels...") img_list = [] - for x in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + for x in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"): img = cv2_imread(x) img_list.append ([x, img[(img == 0)].size ]) io.log_info ("Sorting...") img_list = sorted(img_list, key=operator.itemgetter(1), reverse=False) - return img_list + return img_list, [] def sort_by_origname(input_path): io.log_info ("Sort by original filename...") img_list = [] trash_img_list = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Loading"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Loading"): filepath = Path(filepath) dflimg = DFLIMG.load (filepath) @@ -455,7 +378,7 @@ def sort_by_origname(input_path): def sort_by_oneface_in_image(input_path): io.log_info ("Sort by one face in images...") - image_paths = Path_utils.get_image_paths(input_path) + image_paths = pathex.get_image_paths(input_path) a = np.array ([ ( int(x[0]), int(x[1]) ) \ for x in [ Path(filepath).stem.split('_') for filepath in image_paths ] if len(x) == 2 ]) @@ -468,13 +391,14 @@ def sort_by_oneface_in_image(input_path): img_list = [ (path,) for i,path in enumerate(image_paths) if i not in idxs ] trash_img_list = [ (image_paths[x],) for x in idxs ] return img_list, trash_img_list + + io.log_info ("Nothing found. Possible recover original filenames first.") return [], [] class FinalLoaderSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): #override def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) self.include_by_blur = client_dict['include_by_blur'] #override @@ -528,11 +452,11 @@ class FinalLoaderSubprocessor(Subprocessor): #override def process_info_generator(self): - for i in range(0, min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'include_by_blur': self.include_by_blur - } + cpu_count = min(multiprocessing.cpu_count(), 8) + io.log_info(f'Running on {cpu_count} CPUs') + + for i in range(cpu_count): + yield 'CPU%d' % (i), {}, {'include_by_blur': self.include_by_blur} #override def get_data(self, host_dict): @@ -559,10 +483,6 @@ class FinalLoaderSubprocessor(Subprocessor): class FinalHistDissimSubprocessor(Subprocessor): class Cli(Subprocessor.Cli): - #override - def on_initialize(self, client_dict): - self.log_info ('Running on %s.' % (client_dict['device_name']) ) - #override def process_data(self, data): idx, pitch_yaw_img_list = data @@ -598,10 +518,11 @@ class FinalHistDissimSubprocessor(Subprocessor): #override def process_info_generator(self): - for i in range(min(multiprocessing.cpu_count(), 8) ): - yield 'CPU%d' % (i), {'i':i}, {'device_idx': i, - 'device_name': 'CPU%d' % (i) - } + cpu_count = min(multiprocessing.cpu_count(), 8) + io.log_info(f'Running on {cpu_count} CPUs') + for i in range(cpu_count): + yield 'CPU%d' % (i), {}, {} + #override def on_clients_initialized(self): io.progress_bar ("Sort by hist-dissim", len(self.pitch_yaw_sample_list_idxs) ) @@ -632,18 +553,18 @@ class FinalHistDissimSubprocessor(Subprocessor): def get_result(self): return self.result -def sort_final(input_path, include_by_blur=True): - io.log_info ("Performing final sort.") +def sort_best(input_path, include_by_blur=True): + io.log_info ("Performing sort by best faces.") - target_count = io.input_int ("Target number of images? (default:2000) : ", 2000) + target_count = io.input_int ("Target number of faces?", 2000) - img_list, trash_img_list = FinalLoaderSubprocessor( Path_utils.get_image_paths(input_path), include_by_blur ).run() + img_list, trash_img_list = FinalLoaderSubprocessor( pathex.get_image_paths(input_path), include_by_blur ).run() final_img_list = [] grads = 128 imgs_per_grad = round (target_count / grads) - grads_space = np.linspace (-1.0,1.0,grads) + grads_space = np.linspace (-math.pi / 2, math.pi / 2,grads) yaws_sample_list = [None]*grads for g in io.progress_bar_generator ( range(grads), "Sort by yaw"): @@ -696,7 +617,7 @@ def sort_final(input_path, include_by_blur=True): pitch_sample_list = [None]*pitch_grads - grads_space = np.linspace (-1.0,1.0, pitch_grads ) + grads_space = np.linspace (-math.pi / 2,math.pi / 2, pitch_grads ) for pg in range (pitch_grads): @@ -747,7 +668,7 @@ def sort_final(input_path, include_by_blur=True): return final_img_list, trash_img_list - +""" def sort_by_vggface(input_path): io.log_info ("Sorting by face similarity using VGGFace model...") @@ -756,7 +677,7 @@ def sort_by_vggface(input_path): final_img_list = [] trash_img_list = [] - image_paths = Path_utils.get_image_paths(input_path) + image_paths = pathex.get_image_paths(input_path) img_list = [ (x,) for x in image_paths ] img_list_len = len(img_list) img_list_range = [*range(img_list_len)] @@ -786,42 +707,48 @@ def sort_by_vggface(input_path): feats[i+1], feats[idx] = feats[idx], feats[i+1] return img_list, trash_img_list +""" def sort_by_absdiff(input_path): io.log_info ("Sorting by absolute difference...") - is_sim = io.input_bool ("Sort by similar? ( y/n ?:help skip:y ) : ", True, help_message="Otherwise sort by dissimilar.") + is_sim = io.input_bool ("Sort by similar?", True, help_message="Otherwise sort by dissimilar.") - from nnlib import nnlib - exec( nnlib.import_all( device_config=nnlib.device.Config() ), locals(), globals() ) + from core.leras import nn - image_paths = Path_utils.get_image_paths(input_path) + device_config = nn.ask_choose_device_idxs(choose_only_one=True, return_device_config=True) + nn.initialize( device_config=device_config ) + tf = nn.tf + + image_paths = pathex.get_image_paths(input_path) image_paths_len = len(image_paths) batch_size = 1024 batch_size_remain = image_paths_len % batch_size - i_t = Input ( (256,256,3) ) - j_t = Input ( (256,256,3) ) + i_t = tf.placeholder (tf.float32, (None,256,256,3) ) + j_t = tf.placeholder (tf.float32, (None,256,256,3) ) - outputs = [] + outputs_full = [] + outputs_remain = [] + for i in range(batch_size): - outputs += [ K.sum( K.abs(i_t-j_t[i]), axis=[1,2,3] ) ] + diff_t = tf.reduce_sum( tf.abs(i_t-j_t[i]), axis=[1,2,3] ) + outputs_full.append(diff_t) + if i < batch_size_remain: + outputs_remain.append(diff_t) - func_bs_full = K.function ( [i_t,j_t], outputs) + def func_bs_full(i,j): + return nn.tf_sess.run (outputs_full, feed_dict={i_t:i,j_t:j}) - outputs = [] - for i in range(batch_size_remain): - outputs += [ K.sum( K.abs(i_t-j_t[i]), axis=[1,2,3] ) ] - - func_bs_remain = K.function ( [i_t,j_t], outputs) + def func_bs_remain(i,j): + return nn.tf_sess.run (outputs_remain, feed_dict={i_t:i,j_t:j}) import h5py db_file_path = Path(tempfile.gettempdir()) / 'sort_cache.hdf5' db_file = h5py.File( str(db_file_path), "w") db = db_file.create_dataset("results", (image_paths_len,image_paths_len), compression="gzip") - pg_len = image_paths_len // batch_size if batch_size_remain != 0: pg_len += 1 @@ -841,7 +768,7 @@ def sort_by_absdiff(input_path): if i >= j: i_images = [ cv2_imread(x) for x in image_paths[i:i+batch_size] ] i_images_len = len(i_images) - result = func ([i_images,j_images]) + result = func (i_images,j_images) db[j:j+j_images_len,i:i+i_images_len] = np.array(result) io.progress_bar_inc(1) @@ -874,7 +801,7 @@ def final_process(input_path, img_list, trash_img_list): io.log_info ("Trashing %d items to %s" % ( len(trash_img_list), str(trash_path) ) ) - for filename in Path_utils.get_image_paths(trash_path): + for filename in pathex.get_image_paths(trash_path): Path(filename).unlink() for i in io.progress_bar_generator( range(len(trash_img_list)), "Moving trash", leave=False): @@ -905,29 +832,40 @@ def final_process(input_path, img_list, trash_img_list): except: io.log_info ('fail to rename %s' % (src.name) ) -def main (input_path, sort_by_method): - input_path = Path(input_path) - sort_by_method = sort_by_method.lower() +sort_func_methods = { + 'blur': ("blur", sort_by_blur), + 'face-yaw': ("face yaw direction", sort_by_face_yaw), + 'face-pitch': ("face pitch direction", sort_by_face_pitch), + 'hist': ("histogram similarity", sort_by_hist), + 'hist-dissim': ("histogram dissimilarity", sort_by_hist_dissim), + 'brightness': ("brightness", sort_by_brightness), + 'hue': ("hue", sort_by_hue), + 'black': ("amount of black pixels", sort_by_black), + 'origname': ("original filename", sort_by_origname), + 'oneface': ("one face in image", sort_by_oneface_in_image), + 'absdiff': ("absolute pixel difference", sort_by_absdiff), + 'final': ("best faces", sort_best), +} +def main (input_path, sort_by_method=None): io.log_info ("Running sort tool.\r\n") - img_list = [] - trash_img_list = [] - if sort_by_method == 'blur': img_list, trash_img_list = sort_by_blur (input_path) - elif sort_by_method == 'face': img_list, trash_img_list = sort_by_face (input_path) - elif sort_by_method == 'face-dissim': img_list, trash_img_list = sort_by_face_dissim (input_path) - elif sort_by_method == 'face-yaw': img_list, trash_img_list = sort_by_face_yaw (input_path) - elif sort_by_method == 'face-pitch': img_list, trash_img_list = sort_by_face_pitch (input_path) - elif sort_by_method == 'hist': img_list = sort_by_hist (input_path) - elif sort_by_method == 'hist-dissim': img_list, trash_img_list = sort_by_hist_dissim (input_path) - elif sort_by_method == 'brightness': img_list = sort_by_brightness (input_path) - elif sort_by_method == 'hue': img_list = sort_by_hue (input_path) - elif sort_by_method == 'black': img_list = sort_by_black (input_path) - elif sort_by_method == 'origname': img_list, trash_img_list = sort_by_origname (input_path) - elif sort_by_method == 'oneface': img_list, trash_img_list = sort_by_oneface_in_image (input_path) - elif sort_by_method == 'vggface': img_list, trash_img_list = sort_by_vggface (input_path) - elif sort_by_method == 'absdiff': img_list, trash_img_list = sort_by_absdiff (input_path) - elif sort_by_method == 'final': img_list, trash_img_list = sort_final (input_path) - elif sort_by_method == 'final-no-blur': img_list, trash_img_list = sort_final (input_path, include_by_blur=False) + if sort_by_method is None: + io.log_info(f"Choose sorting method:") + + key_list = list(sort_func_methods.keys()) + for i, key in enumerate(key_list): + desc, func = sort_func_methods[key] + io.log_info(f"[{i}] {desc}") + + io.log_info("") + id = io.input_int("", 3, valid_list=[*range(len(key_list))] ) + + sort_by_method = key_list[id] + else: + sort_by_method = sort_by_method.lower() + + desc, func = sort_func_methods[sort_by_method] + img_list, trash_img_list = func(input_path) final_process (input_path, img_list, trash_img_list) diff --git a/mainscripts/Trainer.py b/mainscripts/Trainer.py index ce9c1df..b3e174c 100644 --- a/mainscripts/Trainer.py +++ b/mainscripts/Trainer.py @@ -6,29 +6,31 @@ import time import numpy as np import itertools from pathlib import Path -from utils import Path_utils -import imagelib +from core import pathex +from core import imagelib import cv2 import models -from interact import interact as io +from core.interact import interact as io -def trainerThread (s2c, c2s, e, args, device_args): +def trainerThread (s2c, c2s, e, + model_class_name = None, + saved_models_path = None, + training_data_src_path = None, + training_data_dst_path = None, + pretraining_data_path = None, + pretrained_model_path = None, + no_preview=False, + force_model_name=None, + force_gpu_idxs=None, + cpu_only=None, + execute_programs = None, + debug=False, + **kwargs): while True: try: start_time = time.time() - training_data_src_path = Path( args.get('training_data_src_dir', '') ) - training_data_dst_path = Path( args.get('training_data_dst_dir', '') ) - - pretraining_data_path = args.get('pretraining_data_dir', '') - pretraining_data_path = Path(pretraining_data_path) if pretraining_data_path is not None else None - - model_path = Path( args.get('model_path', '') ) - model_name = args.get('model_name', '') save_interval_min = 15 - debug = args.get('debug', '') - execute_programs = args.get('execute_programs', []) - no_preview = args.get('no_preview', False) if not training_data_src_path.exists(): io.log_err('Training data src directory does not exist.') @@ -38,18 +40,22 @@ def trainerThread (s2c, c2s, e, args, device_args): io.log_err('Training data dst directory does not exist.') break - if not model_path.exists(): - model_path.mkdir(exist_ok=True) + if not saved_models_path.exists(): + saved_models_path.mkdir(exist_ok=True) - model = models.import_model(model_name)( - model_path, + model = models.import_model(model_class_name)( + is_training=True, + saved_models_path=saved_models_path, training_data_src_path=training_data_src_path, training_data_dst_path=training_data_dst_path, pretraining_data_path=pretraining_data_path, - is_training=True, - debug=debug, + pretrained_model_path=pretrained_model_path, no_preview=no_preview, - device_args=device_args) + force_model_name=force_model_name, + force_gpu_idxs=force_gpu_idxs, + cpu_only=cpu_only, + debug=debug, + ) is_reached_goal = model.is_reached_iter_goal() @@ -71,10 +77,6 @@ def trainerThread (s2c, c2s, e, args, device_args): c2s.put ( {'op':'show', 'previews': previews} ) e.set() #Set the GUI Thread as Ready - - if model.is_first_run(): - model_save() - if model.get_target_iter() != 0: if is_reached_goal: io.log_info('Model already trained to target iteration. You can use preview.') @@ -108,6 +110,12 @@ def trainerThread (s2c, c2s, e, args, device_args): print("Unable to execute program: %s" % (prog) ) if not is_reached_goal: + + if model.get_iter() == 0: + io.log_info("") + io.log_info("Trying to do the first iteration. If an error occurs, reduce the model parameters.") + io.log_info("") + iter, iter_time = model.train_one_iter() loss_history = model.get_loss_history() @@ -119,8 +127,8 @@ def trainerThread (s2c, c2s, e, args, device_args): if shared_state['after_save']: shared_state['after_save'] = False - last_save_time = time.time() #upd last_save_time only after save+one_iter, because plaidML rebuilds programs after save https://github.com/plaidml/plaidml/issues/274 - + last_save_time = time.time() + mean_loss = np.mean ( [ np.array(loss_history[i]) for i in range(save_iter, iter) ], axis=0) for loss_value in mean_loss: @@ -137,7 +145,10 @@ def trainerThread (s2c, c2s, e, args, device_args): io.log_info ('\r' + loss_string, end='') else: io.log_info (loss_string, end='\r') - + + if model.get_iter() == 1: + model_save() + if model.get_target_iter() != 0 and model.is_reached_iter_goal(): io.log_info ('Reached target iteration.') model_save() @@ -185,16 +196,16 @@ def trainerThread (s2c, c2s, e, args, device_args): -def main(args, device_args): +def main(**kwargs): io.log_info ("Running trainer.\r\n") - no_preview = args.get('no_preview', False) + no_preview = kwargs.get('no_preview', False) s2c = queue.Queue() c2s = queue.Queue() e = threading.Event() - thread = threading.Thread(target=trainerThread, args=(s2c, c2s, e, args, device_args) ) + thread = threading.Thread(target=trainerThread, args=(s2c, c2s, e), kwargs=kwargs ) thread.start() e.wait() #Wait for inital load to occur. diff --git a/mainscripts/Util.py b/mainscripts/Util.py index ba2ad97..bc65b90 100644 --- a/mainscripts/Util.py +++ b/mainscripts/Util.py @@ -5,10 +5,10 @@ import cv2 from DFLIMG import * from facelib import LandmarksProcessor -from imagelib import IEPolys -from interact import interact as io -from utils import Path_utils -from utils.cv2_utils import * +from core.imagelib import IEPolys +from core.interact import interact as io +from core import pathex +from core.cv2ex import * def save_faceset_metadata_folder(input_path): @@ -19,7 +19,7 @@ def save_faceset_metadata_folder(input_path): io.log_info (f"Saving metadata to {str(metadata_filepath)}\r\n") d = {} - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"): filepath = Path(filepath) dflimg = DFLIMG.load (filepath) @@ -52,7 +52,7 @@ def restore_faceset_metadata_folder(input_path): except: raise FileNotFoundError(filename) - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"): filepath = Path(filepath) shape, dfl_dict = d.get(filepath.name, None) @@ -92,7 +92,7 @@ def remove_ie_polys_folder(input_path): io.log_info ("Removing ie_polys...\r\n") - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Removing"): filepath = Path(filepath) remove_ie_polys_file(filepath) @@ -114,7 +114,7 @@ def remove_fanseg_folder(input_path): io.log_info ("Removing fanseg mask...\r\n") - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Removing"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Removing"): filepath = Path(filepath) remove_fanseg_file(filepath) @@ -150,14 +150,14 @@ def convert_png_to_jpg_folder (input_path): io.log_info ("Converting PNG to JPG...\r\n") - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Converting"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Converting"): filepath = Path(filepath) convert_png_to_jpg_file(filepath) def add_landmarks_debug_images(input_path): io.log_info ("Adding landmarks debug images...") - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"): filepath = Path(filepath) img = cv2_imread(str(filepath)) @@ -179,7 +179,7 @@ def recover_original_aligned_filename(input_path): io.log_info ("Recovering original aligned filename...") files = [] - for filepath in io.progress_bar_generator( Path_utils.get_image_paths(input_path), "Processing"): + for filepath in io.progress_bar_generator( pathex.get_image_paths(input_path), "Processing"): filepath = Path(filepath) dflimg = DFLIMG.load (filepath) diff --git a/mainscripts/VideoEd.py b/mainscripts/VideoEd.py index ac7ebc5..cab6fcc 100644 --- a/mainscripts/VideoEd.py +++ b/mainscripts/VideoEd.py @@ -2,8 +2,8 @@ import subprocess import numpy as np import ffmpeg from pathlib import Path -from utils import Path_utils -from interact import interact as io +from core import pathex +from core.interact import interact as io def extract_video(input_file, output_dir, output_ext=None, fps=None): input_file_path = Path(input_file) @@ -14,7 +14,7 @@ def extract_video(input_file, output_dir, output_ext=None, fps=None): if input_file_path.suffix == '.*': - input_file_path = Path_utils.get_first_file_by_stem (input_file_path.parent, input_file_path.stem) + input_file_path = pathex.get_first_file_by_stem (input_file_path.parent, input_file_path.stem) else: if not input_file_path.exists(): input_file_path = None @@ -24,12 +24,12 @@ def extract_video(input_file, output_dir, output_ext=None, fps=None): return if fps is None: - fps = io.input_int ("Enter FPS ( ?:help skip:fullfps ) : ", 0, help_message="How many frames of every second of the video will be extracted.") + fps = io.input_int ("Enter FPS", 0, help_message="How many frames of every second of the video will be extracted. 0 - full fps") if output_ext is None: - output_ext = io.input_str ("Output image format? ( jpg png ?:help skip:png ) : ", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.") + output_ext = io.input_str ("Output image format", "png", ["png","jpg"], help_message="png is lossless, but extraction is x10 slower for HDD, requires x10 more disk space than jpg.") - for filename in Path_utils.get_image_paths (output_path, ['.'+output_ext]): + for filename in pathex.get_image_paths (output_path, ['.'+output_ext]): Path(filename).unlink() job = ffmpeg.input(str(input_file_path)) @@ -57,16 +57,16 @@ def cut_video ( input_file, from_time=None, to_time=None, audio_track_id=None, b output_file_path = input_file_path.parent / (input_file_path.stem + "_cut" + input_file_path.suffix) if from_time is None: - from_time = io.input_str ("From time (skip: 00:00:00.000) : ", "00:00:00.000") + from_time = io.input_str ("From time", "00:00:00.000") if to_time is None: - to_time = io.input_str ("To time (skip: 00:00:00.000) : ", "00:00:00.000") + to_time = io.input_str ("To time", "00:00:00.000") if audio_track_id is None: - audio_track_id = io.input_int ("Specify audio track id. ( skip:0 ) : ", 0) + audio_track_id = io.input_int ("Specify audio track id.", 0) if bitrate is None: - bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:25) : ", 25) ) + bitrate = max (1, io.input_int ("Bitrate of output file in MB/s", 25) ) kwargs = {"c:v": "libx264", "b:v": "%dM" %(bitrate), @@ -93,10 +93,10 @@ def denoise_image_sequence( input_dir, ext=None, factor=None ): return if ext is None: - ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") + ext = io.input_str ("Input image format (extension)", "png") if factor is None: - factor = np.clip ( io.input_int ("Denoise factor? (1-20 default:5) : ", 5), 1, 20 ) + factor = np.clip ( io.input_int ("Denoise factor?", 5, add_info="1-20"), 1, 20 ) kwargs = {} if ext == 'jpg': @@ -129,17 +129,17 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, out_ext = output_file_path.suffix if ext is None: - ext = io.input_str ("Input image format (extension)? ( default:png ) : ", "png") + ext = io.input_str ("Input image format (extension)", "png") if lossless is None: - lossless = io.input_bool ("Use lossless codec ? ( default:no ) : ", False) + lossless = io.input_bool ("Use lossless codec", False) video_id = None audio_id = None ref_in_a = None if reference_file_path is not None: if reference_file_path.suffix == '.*': - reference_file_path = Path_utils.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem) + reference_file_path = pathex.get_first_file_by_stem (reference_file_path.parent, reference_file_path.stem) else: if not reference_file_path.exists(): reference_file_path = None @@ -166,12 +166,12 @@ def video_from_sequence( input_dir, output_file, reference_file=None, ext=None, if fps is None: #if fps not specified and not overwritten by reference-file - fps = max (1, io.input_int ("FPS ? (default:25) : ", 25) ) + fps = max (1, io.input_int ("Enter FPS", 25) ) if not lossless and bitrate is None: - bitrate = max (1, io.input_int ("Bitrate of output file in MB/s ? (default:16) : ", 16) ) + bitrate = max (1, io.input_int ("Bitrate of output file in MB/s", 16) ) - input_image_paths = Path_utils.get_image_paths(input_path) + input_image_paths = pathex.get_image_paths(input_path) i_in = ffmpeg.input('pipe:', format='image2pipe', r=fps) diff --git a/mainscripts/dev_misc.py b/mainscripts/dev_misc.py index 27c44e4..cb12ae9 100644 --- a/mainscripts/dev_misc.py +++ b/mainscripts/dev_misc.py @@ -7,10 +7,10 @@ import numpy as np from DFLIMG import * from facelib import FaceType, LandmarksProcessor -from interact import interact as io -from joblib import Subprocessor -from utils import Path_utils -from utils.cv2_utils import * +from core.interact import interact as io +from core.joblib import Subprocessor +from core import pathex +from core.cv2ex import * from . import Extractor, Sorter from .Extractor import ExtractSubprocessor @@ -41,7 +41,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ): output_path = input_path.parent / (input_path.name + '_out') - dir_names = Path_utils.get_all_dir_names(input_path) + dir_names = pathex.get_all_dir_names(input_path) if not output_path.exists(): output_path.mkdir(parents=True, exist_ok=True) @@ -54,7 +54,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ): if not cur_output_path.exists(): cur_output_path.mkdir(parents=True, exist_ok=True) - input_path_image_paths = Path_utils.get_image_paths(cur_input_path) + input_path_image_paths = pathex.get_image_paths(cur_input_path) for filename in input_path_image_paths: filename_path = Path(filename) @@ -116,7 +116,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ): cur_input_path = input_path / dir_name cur_output_path = output_path / dir_name - input_path_image_paths = Path_utils.get_image_paths(cur_input_path) + input_path_image_paths = pathex.get_image_paths(cur_input_path) l = len(input_path_image_paths) #if l < 250 or l > 350: # continue @@ -176,7 +176,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ): output_path = input_path.parent / (input_path.name + '_out') - dir_names = Path_utils.get_all_dir_names(input_path) + dir_names = pathex.get_all_dir_names(input_path) if not output_path.exists(): output_path.mkdir(parents=True, exist_ok=True) @@ -188,7 +188,7 @@ def extract_vggface2_dataset(input_dir, device_args={} ): cur_input_path = input_path / dir_name cur_output_path = output_path / dir_name - l = len(Path_utils.get_image_paths(cur_input_path)) + l = len(pathex.get_image_paths(cur_input_path)) if l < 250 or l > 350: continue @@ -316,13 +316,13 @@ def apply_celebamaskhq(input_dir ): if not img_path.exists(): raise ValueError(f'{str(img_path)} directory not found. Please ensure it exists.') - CelebAMASKHQSubprocessor(Path_utils.get_image_paths(img_path), - Path_utils.get_image_paths(mask_path, subdirs=True) ).run() + CelebAMASKHQSubprocessor(pathex.get_image_paths(img_path), + pathex.get_image_paths(mask_path, subdirs=True) ).run() return paths_to_extract = [] - for filename in io.progress_bar_generator(Path_utils.get_image_paths(img_path), desc="Processing"): + for filename in io.progress_bar_generator(pathex.get_image_paths(img_path), desc="Processing"): filepath = Path(filename) dflimg = DFLIMG.load(filepath) @@ -381,7 +381,7 @@ def extract_fanseg(input_dir, device_args={} ): raise ValueError('Input directory not found. Please ensure it exists.') paths_to_extract = [] - for filename in Path_utils.get_image_paths(input_path) : + for filename in pathex.get_image_paths(input_path) : filepath = Path(filename) dflimg = DFLIMG.load ( filepath ) if dflimg is not None: @@ -413,7 +413,7 @@ def extract_umd_csv(input_file_csv, io.log_info("Output dir is %s." % (str(output_path)) ) if output_path.exists(): - output_images_paths = Path_utils.get_image_paths(output_path) + output_images_paths = pathex.get_image_paths(output_path) if len(output_images_paths) > 0: io.input_bool("WARNING !!! \n %s contains files! \n They will be deleted. \n Press enter to continue." % (str(output_path)), False ) for filename in output_images_paths: @@ -443,12 +443,7 @@ def extract_umd_csv(input_file_csv, data = [] for d in csv_data: filename = input_file_csv_root_path / d['FILE'] - - #pitch, yaw, roll = float(d['PITCH']), float(d['YAW']), float(d['ROLL']) - #if pitch < -90 or pitch > 90 or yaw < -90 or yaw > 90 or roll < -90 or roll > 90: - # continue - # - #pitch_yaw_roll = pitch/90.0, yaw/90.0, roll/90.0 + x,y,w,h = float(d['FACE_X']), float(d['FACE_Y']), float(d['FACE_WIDTH']), float(d['FACE_HEIGHT']) @@ -473,11 +468,11 @@ def extract_umd_csv(input_file_csv, def dev_test(input_dir): input_path = Path(input_dir) - dir_names = Path_utils.get_all_dir_names(input_path) + dir_names = pathex.get_all_dir_names(input_path) for dir_name in io.progress_bar_generator(dir_names, desc="Processing"): - img_paths = Path_utils.get_image_paths (input_path / dir_name) + img_paths = pathex.get_image_paths (input_path / dir_name) for filename in img_paths: filepath = Path(filename) diff --git a/mainscripts/gfx/help_converter_masked.jpg b/mainscripts/gfx/help_converter_masked.jpg deleted file mode 100644 index e737f54..0000000 Binary files a/mainscripts/gfx/help_converter_masked.jpg and /dev/null differ diff --git a/mainscripts/gfx/help_converter_face_avatar.jpg b/mainscripts/gfx/help_merger_face_avatar.jpg similarity index 100% rename from mainscripts/gfx/help_converter_face_avatar.jpg rename to mainscripts/gfx/help_merger_face_avatar.jpg diff --git a/mainscripts/gfx/help_converter_face_avatar_source.psd b/mainscripts/gfx/help_merger_face_avatar_source.psd similarity index 100% rename from mainscripts/gfx/help_converter_face_avatar_source.psd rename to mainscripts/gfx/help_merger_face_avatar_source.psd diff --git a/mainscripts/gfx/help_merger_masked.jpg b/mainscripts/gfx/help_merger_masked.jpg new file mode 100644 index 0000000..d7598d2 Binary files /dev/null and b/mainscripts/gfx/help_merger_masked.jpg differ diff --git a/mainscripts/gfx/help_converter_masked_source.psd b/mainscripts/gfx/help_merger_masked_source.psd similarity index 94% rename from mainscripts/gfx/help_converter_masked_source.psd rename to mainscripts/gfx/help_merger_masked_source.psd index ebb3b0d..6b1e387 100644 Binary files a/mainscripts/gfx/help_converter_masked_source.psd and b/mainscripts/gfx/help_merger_masked_source.psd differ diff --git a/converters/FrameInfo.py b/merger/FrameInfo.py similarity index 100% rename from converters/FrameInfo.py rename to merger/FrameInfo.py diff --git a/converters/ConvertAvatar.py b/merger/MergeAvatar.py similarity index 88% rename from converters/ConvertAvatar.py rename to merger/MergeAvatar.py index e5264a0..07ada6d 100644 --- a/converters/ConvertAvatar.py +++ b/merger/MergeAvatar.py @@ -1,9 +1,9 @@ import cv2 import numpy as np -import imagelib +from core import imagelib from facelib import FaceType, LandmarksProcessor -from utils.cv2_utils import * +from core.cv2ex import * def process_frame_info(frame_info, inp_sh): img_uint8 = cv2_imread (frame_info.filename) @@ -14,7 +14,7 @@ def process_frame_info(frame_info, inp_sh): img = cv2.warpAffine( img, img_mat, inp_sh[0:2], borderMode=cv2.BORDER_REPLICATE, flags=cv2.INTER_CUBIC ) return img -def ConvertFaceAvatar (predictor_func, predictor_input_shape, cfg, prev_temporal_frame_infos, frame_info, next_temporal_frame_infos): +def MergeFaceAvatar (predictor_func, predictor_input_shape, cfg, prev_temporal_frame_infos, frame_info, next_temporal_frame_infos): inp_sh = predictor_input_shape prev_imgs=[] diff --git a/converters/ConvertMasked.py b/merger/MergeMasked.py similarity index 90% rename from converters/ConvertMasked.py rename to merger/MergeMasked.py index 67769cd..8aa3067 100644 --- a/converters/ConvertMasked.py +++ b/merger/MergeMasked.py @@ -3,12 +3,12 @@ import traceback import cv2 import numpy as np -import imagelib +from core import imagelib from facelib import FaceType, LandmarksProcessor -from interact import interact as io -from utils.cv2_utils import * +from core.interact import interact as io +from core.cv2ex import * -def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmarks): +def MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmarks): img_size = img_bgr.shape[1], img_bgr.shape[0] img_face_mask_a = LandmarksProcessor.get_image_hull_mask (img_bgr.shape, img_face_landmarks) @@ -22,7 +22,7 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i output_size = predictor_input_shape[0] if cfg.super_resolution_mode != 0: - output_size *= 2 + output_size *= 4 face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type) face_output_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type, scale= 1.0 + 0.01*cfg.output_face_scale ) @@ -37,12 +37,12 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i predicted = predictor_func (predictor_input_bgr) if isinstance(predicted, tuple): - #converter return bgr,mask + #merger return bgr,mask prd_face_bgr = np.clip (predicted[0], 0, 1.0) prd_face_mask_a_0 = np.clip (predicted[1], 0, 1.0) predictor_masked = True else: - #converter return bgr only, using dst mask + #merger return bgr only, using dst mask prd_face_bgr = np.clip (predicted, 0, 1.0 ) prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, predictor_input_shape[0:2] ) predictor_masked = False @@ -82,24 +82,7 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i m = cv2.getAffineTransform(b, fanseg_rect_corner_pts) FAN_dst_face_mask_a_0 = cv2.warpAffine(dst_face_fanseg_mask, m, (cfg.fanseg_input_size,)*2, flags=cv2.INTER_CUBIC ) FAN_dst_face_mask_a_0 = cv2.resize (FAN_dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) - """ - if cfg.mask_mode == 8: #FANCHQ-dst - full_face_fanchq_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, cfg.fanchq_input_size, face_type=FaceType.FULL) - dst_face_fanchq_bgr = cv2.warpAffine(img_bgr, full_face_fanchq_mat, (cfg.fanchq_input_size,)*2, flags=cv2.INTER_CUBIC ) - dst_face_fanchq_mask = cfg.fanchq_extract_func( FaceType.FULL, dst_face_fanchq_bgr ) - if cfg.face_type == FaceType.FULL: - FANCHQ_dst_face_mask_a_0 = cv2.resize (dst_face_fanchq_mask, (output_size,output_size), cv2.INTER_CUBIC) - else: - face_fanchq_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, cfg.fanchq_input_size, face_type=cfg.face_type) - - fanchq_rect_corner_pts = np.array ( [ [0,0], [cfg.fanchq_input_size-1,0], [0,cfg.fanchq_input_size-1] ], dtype=np.float32 ) - a = LandmarksProcessor.transform_points (fanchq_rect_corner_pts, face_fanchq_mat, invert=True ) - b = LandmarksProcessor.transform_points (a, full_face_fanchq_mat ) - m = cv2.getAffineTransform(b, fanchq_rect_corner_pts) - FAN_dst_face_mask_a_0 = cv2.warpAffine(dst_face_fanchq_mask, m, (cfg.fanchq_input_size,)*2, flags=cv2.INTER_CUBIC ) - FAN_dst_face_mask_a_0 = cv2.resize (FAN_dst_face_mask_a_0, (output_size,output_size), cv2.INTER_CUBIC) - """ if cfg.mask_mode == 3: #FAN-prd prd_face_mask_a_0 = FAN_prd_face_mask_a_0 elif cfg.mask_mode == 4: #FAN-dst @@ -350,14 +333,14 @@ def ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, i return out_img, out_merging_mask[...,0:1] -def ConvertMasked (predictor_func, predictor_input_shape, cfg, frame_info): +def MergeMasked (predictor_func, predictor_input_shape, cfg, frame_info): img_bgr_uint8 = cv2_imread(frame_info.filename) img_bgr_uint8 = imagelib.normalize_channels (img_bgr_uint8, 3) img_bgr = img_bgr_uint8.astype(np.float32) / 255.0 outs = [] for face_num, img_landmarks in enumerate( frame_info.landmarks_list ): - out_img, out_img_merging_mask = ConvertMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_landmarks) + out_img, out_img_merging_mask = MergeMaskedFace (predictor_func, predictor_input_shape, cfg, frame_info, img_bgr_uint8, img_bgr, img_landmarks) outs += [ (out_img, out_img_merging_mask) ] #Combining multiple face outputs diff --git a/converters/ConverterConfig.py b/merger/MergerConfig.py similarity index 79% rename from converters/ConverterConfig.py rename to merger/MergerConfig.py index b7fb095..7f3984e 100644 --- a/converters/ConverterConfig.py +++ b/merger/MergerConfig.py @@ -2,10 +2,10 @@ import numpy as np import copy from facelib import FaceType -from interact import interact as io +from core.interact import interact as io -class ConverterConfig(object): +class MergerConfig(object): TYPE_NONE = 0 TYPE_MASKED = 1 TYPE_FACE_AVATAR = 2 @@ -28,11 +28,7 @@ class ConverterConfig(object): self.fanseg_input_size = None self.fanseg_extract_func = None - self.fanchq_input_size = None - self.fanchq_extract_func = None - self.ebs_ct_func = None - - self.super_res_dict = {0:"None", 1:'RankSRGAN'} + self.super_res_dict = {0:"None", 1:'FaceEnhancer'} self.sharpen_dict = {0:"None", 1:'box', 2:'gaussian'} #default changeable params @@ -47,18 +43,18 @@ class ConverterConfig(object): def ask_settings(self): s = """Choose sharpen mode: \n""" for key in self.sharpen_dict.keys(): - s += f"""({key}) {self.sharpen_dict[key]}\n""" - s += f"""?:help Default: {list(self.sharpen_dict.keys())[0]} : """ - self.sharpen_mode = io.input_int (s, 0, valid_list=self.sharpen_dict.keys(), help_message="Enhance details by applying sharpen filter.") + s += f"""({key}) {self.sharpen_dict[key]}\n""" + io.log_info(s) + self.sharpen_mode = io.input_int ("", 0, valid_list=self.sharpen_dict.keys(), help_message="Enhance details by applying sharpen filter.") if self.sharpen_mode != 0: - self.blursharpen_amount = np.clip ( io.input_int ("Choose blur/sharpen amount [-100..100] (skip:0) : ", 0), -100, 100 ) + self.blursharpen_amount = np.clip ( io.input_int ("Choose blur/sharpen amount", 0, add_info="-100..100"), -100, 100 ) s = """Choose super resolution mode: \n""" for key in self.super_res_dict.keys(): s += f"""({key}) {self.super_res_dict[key]}\n""" - s += f"""?:help Default: {list(self.super_res_dict.keys())[0]} : """ - self.super_resolution_mode = io.input_int (s, 0, valid_list=self.super_res_dict.keys(), help_message="Enhance details by applying superresolution network.") + io.log_info(s) + self.super_resolution_mode = io.input_int ("", 0, valid_list=self.super_res_dict.keys(), help_message="Enhance details by applying superresolution network.") def toggle_sharpen_mode(self): a = list( self.sharpen_dict.keys() ) @@ -85,7 +81,7 @@ class ConverterConfig(object): def __eq__(self, other): #check equality of changeable params - if isinstance(other, ConverterConfig): + if isinstance(other, MergerConfig): return self.sharpen_mode == other.sharpen_mode and \ self.blursharpen_amount == other.blursharpen_amount and \ self.super_resolution_mode == other.super_resolution_mode @@ -130,7 +126,7 @@ half_face_mask_mode_dict = {1:'learned', ctm_dict = { 0: "None", 1:"rct", 2:"lct", 3:"mkl", 4:"mkl-m", 5:"idt", 6:"idt-m", 7:"sot-m", 8:"mix-m" } ctm_str_dict = {None:0, "rct":1, "lct":2, "mkl":3, "mkl-m":4, "idt":5, "idt-m":6, "sot-m":7, "mix-m":8 } -class ConverterConfigMasked(ConverterConfig): +class MergerConfigMasked(MergerConfig): def __init__(self, face_type=FaceType.FULL, default_mode = 'overlay', @@ -152,11 +148,11 @@ class ConverterConfigMasked(ConverterConfig): **kwargs ): - super().__init__(type=ConverterConfig.TYPE_MASKED, **kwargs) + super().__init__(type=MergerConfig.TYPE_MASKED, **kwargs) self.face_type = face_type if self.face_type not in [FaceType.HALF, FaceType.MID_FULL, FaceType.FULL ]: - raise ValueError("ConverterConfigMasked does not support this type of face.") + raise ValueError("MergerConfigMasked does not support this type of face.") self.default_mode = default_mode self.clip_hborder_mask_per = clip_hborder_mask_per @@ -228,58 +224,57 @@ class ConverterConfigMasked(ConverterConfig): s = """Choose mode: \n""" for key in mode_dict.keys(): s += f"""({key}) {mode_dict[key]}\n""" - s += f"""Default: { mode_str_dict.get(self.default_mode, 1) } : """ - - mode = io.input_int (s, mode_str_dict.get(self.default_mode, 1) ) + io.log_info(s) + mode = io.input_int ("", mode_str_dict.get(self.default_mode, 1) ) self.mode = mode_dict.get (mode, self.default_mode ) if 'raw' not in self.mode: if self.mode == 'hist-match' or self.mode == 'hist-match-bw': - self.masked_hist_match = io.input_bool("Masked hist match? (y/n skip:y) : ", True) + self.masked_hist_match = io.input_bool("Masked hist match?", True) if self.mode == 'hist-match' or self.mode == 'hist-match-bw' or self.mode == 'seamless-hist-match': - self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold [0..255] (skip:255) : ", 255), 0, 255) + self.hist_match_threshold = np.clip ( io.input_int("Hist match threshold", 255, add_info="0..255"), 0, 255) if self.face_type == FaceType.FULL: s = """Choose mask mode: \n""" for key in full_face_mask_mode_dict.keys(): s += f"""({key}) {full_face_mask_mode_dict[key]}\n""" - s += f"""?:help Default: 1 : """ + io.log_info(s) - self.mask_mode = io.input_int (s, 1, valid_list=full_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks.") + self.mask_mode = io.input_int ("", 1, valid_list=full_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images. 'FAN-prd' - using super smooth mask by pretrained FAN-model from predicted face. 'FAN-dst' - using super smooth mask by pretrained FAN-model from dst face. 'FAN-prd*FAN-dst' or 'learned*FAN-prd*FAN-dst' - using multiplied masks.") else: s = """Choose mask mode: \n""" for key in half_face_mask_mode_dict.keys(): s += f"""({key}) {half_face_mask_mode_dict[key]}\n""" - s += f"""?:help , Default: 1 : """ - self.mask_mode = io.input_int (s, 1, valid_list=half_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images.") + io.log_info(s) + self.mask_mode = io.input_int ("", 1, valid_list=half_face_mask_mode_dict.keys(), help_message="If you learned the mask, then option 1 should be choosed. 'dst' mask is raw shaky mask from dst aligned images.") if 'raw' not in self.mode: - self.erode_mask_modifier = np.clip ( io.input_int ("Choose erode mask modifier [-400..400] (skip:%d) : " % 0, 0), -400, 400) - self.blur_mask_modifier = np.clip ( io.input_int ("Choose blur mask modifier [-400..400] (skip:%d) : " % 0, 0), -400, 400) - self.motion_blur_power = np.clip ( io.input_int ("Choose motion blur power [0..100] (skip:%d) : " % (0), 0), 0, 100) + self.erode_mask_modifier = np.clip ( io.input_int ("Choose erode mask modifier", 0, add_info="-400..400"), -400, 400) + self.blur_mask_modifier = np.clip ( io.input_int ("Choose blur mask modifier", 0, add_info="-400..400"), -400, 400) + self.motion_blur_power = np.clip ( io.input_int ("Choose motion blur power", 0, add_info="0..100"), 0, 100) - self.output_face_scale = np.clip (io.input_int ("Choose output face scale modifier [-50..50] (skip:0) : ", 0), -50, 50) + self.output_face_scale = np.clip (io.input_int ("Choose output face scale modifier", 0, add_info="-50..50" ), -50, 50) if 'raw' not in self.mode: - self.color_transfer_mode = io.input_str ( f"Apply color transfer to predicted face? Choose mode ( {' / '.join ([str(x) for x in list(ctm_str_dict.keys())])} skip:None ) : ", None, ctm_str_dict.keys() ) + self.color_transfer_mode = io.input_str ( "Color transfer to predicted face", None, valid_list=list(ctm_str_dict.keys())[1:] ) self.color_transfer_mode = ctm_str_dict[self.color_transfer_mode] super().ask_settings() if 'raw' not in self.mode: - self.image_denoise_power = np.clip ( io.input_int ("Choose image degrade by denoise power [0..500] (skip:%d) : " % (0), 0), 0, 500) - self.bicubic_degrade_power = np.clip ( io.input_int ("Choose image degrade by bicubic rescale power [0..100] (skip:%d) : " % (0), 0), 0, 100) - self.color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100) - self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask? (y/n skip:n) : ", False) + self.image_denoise_power = np.clip ( io.input_int ("Choose image degrade by denoise power", 0, add_info="0..500"), 0, 500) + self.bicubic_degrade_power = np.clip ( io.input_int ("Choose image degrade by bicubic rescale power", 0, add_info="0..100"), 0, 100) + self.color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image", 0, add_info="0..100"), 0, 100) + self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask?", False) io.log_info ("") def __eq__(self, other): #check equality of changeable params - if isinstance(other, ConverterConfigMasked): + if isinstance(other, MergerConfigMasked): return super().__eq__(other) and \ self.mode == other.mode and \ self.masked_hist_match == other.masked_hist_match and \ @@ -299,7 +294,7 @@ class ConverterConfigMasked(ConverterConfig): def to_string(self, filename): r = ( - f"""ConverterConfig {filename}:\n""" + f"""MergerConfig {filename}:\n""" f"""Mode: {self.mode}\n""" ) @@ -337,11 +332,11 @@ class ConverterConfigMasked(ConverterConfig): return r -class ConverterConfigFaceAvatar(ConverterConfig): +class MergerConfigFaceAvatar(MergerConfig): def __init__(self, temporal_face_count=0, add_source_image=False): - super().__init__(type=ConverterConfig.TYPE_FACE_AVATAR) + super().__init__(type=MergerConfig.TYPE_FACE_AVATAR) self.temporal_face_count = temporal_face_count #changeable params @@ -352,7 +347,7 @@ class ConverterConfigFaceAvatar(ConverterConfig): #override def ask_settings(self): - self.add_source_image = io.input_bool("Add source image? (y/n ?:help skip:n) : ", False, help_message="Add source image for comparison.") + self.add_source_image = io.input_bool("Add source image?", False, help_message="Add source image for comparison.") super().ask_settings() def toggle_add_source_image(self): @@ -362,7 +357,7 @@ class ConverterConfigFaceAvatar(ConverterConfig): def __eq__(self, other): #check equality of changeable params - if isinstance(other, ConverterConfigFaceAvatar): + if isinstance(other, MergerConfigFaceAvatar): return super().__eq__(other) and \ self.add_source_image == other.add_source_image @@ -370,7 +365,7 @@ class ConverterConfigFaceAvatar(ConverterConfig): #override def to_string(self, filename): - return (f"ConverterConfig {filename}:\n" + return (f"MergerConfig {filename}:\n" f"add_source_image : {self.add_source_image}\n") + \ super().to_string(filename) + "================" diff --git a/merger/__init__.py b/merger/__init__.py new file mode 100644 index 0000000..e21651e --- /dev/null +++ b/merger/__init__.py @@ -0,0 +1,4 @@ +from .FrameInfo import FrameInfo +from .MergerConfig import MergerConfig, MergerConfigMasked, MergerConfigFaceAvatar +from .MergeMasked import MergeMasked +from .MergeAvatar import MergeFaceAvatar diff --git a/models/ModelBase.py b/models/ModelBase.py index c7027d1..1b43099 100644 --- a/models/ModelBase.py +++ b/models/ModelBase.py @@ -1,162 +1,179 @@ import colorsys import inspect import json +import operator import os import pickle import shutil +import tempfile import time from pathlib import Path import cv2 import numpy as np -import imagelib -from interact import interact as io -from nnlib import nnlib +from core import imagelib +from core.interact import interact as io +from core.leras import nn from samplelib import SampleGeneratorBase -from utils import Path_utils, std_utils -from utils.cv2_utils import * +from core import pathex +from core.cv2ex import * + -''' -You can implement your own model. Check examples. -''' class ModelBase(object): - - - def __init__(self, model_path, training_data_src_path=None, training_data_dst_path=None, pretraining_data_path=None, is_training=False, debug = False, no_preview=False, device_args = None, - ask_enable_autobackup=True, - ask_write_preview_history=True, - ask_target_iter=True, - ask_batch_size=True, - ask_random_flip=True, **kwargs): - - device_args['force_gpu_idx'] = device_args.get('force_gpu_idx',-1) - device_args['cpu_only'] = True if debug else device_args.get('cpu_only',False) - - if device_args['force_gpu_idx'] == -1 and not device_args['cpu_only']: - idxs_names_list = nnlib.device.getValidDevicesIdxsWithNamesList() - if len(idxs_names_list) > 1: - io.log_info ("You have multi GPUs in a system: ") - for idx, name in idxs_names_list: - io.log_info ("[%d] : %s" % (idx, name) ) - - device_args['force_gpu_idx'] = io.input_int("Which GPU idx to choose? ( skip: best GPU ) : ", -1, [ x[0] for x in idxs_names_list] ) - self.device_args = device_args - - self.device_config = nnlib.DeviceConfig(allow_growth=True, **self.device_args) - - io.log_info ("Loading model...") - - self.model_path = model_path - self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') ) - + def __init__(self, is_training=False, + saved_models_path=None, + training_data_src_path=None, + training_data_dst_path=None, + pretraining_data_path=None, + pretrained_model_path=None, + no_preview=False, + force_model_name=None, + force_gpu_idxs=None, + cpu_only=False, + debug=False, + **kwargs): + self.is_training = is_training + self.saved_models_path = saved_models_path self.training_data_src_path = training_data_src_path self.training_data_dst_path = training_data_dst_path self.pretraining_data_path = pretraining_data_path - - self.debug = debug + self.pretrained_model_path = pretrained_model_path self.no_preview = no_preview - self.is_training_mode = is_training + self.debug = debug + + self.model_class_name = model_class_name = Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1] + + if force_model_name is not None: + self.model_name = force_model_name + else: + while True: + # gather all model dat files + saved_models_names = [] + for filepath in pathex.get_file_paths(saved_models_path): + filepath_name = filepath.name + if filepath_name.endswith(f'{model_class_name}_data.dat'): + saved_models_names += [ (filepath_name.split('_')[0], os.path.getmtime(filepath)) ] + + # sort by modified datetime + saved_models_names = sorted(saved_models_names, key=operator.itemgetter(1), reverse=True ) + saved_models_names = [ x[0] for x in saved_models_names ] + + if len(saved_models_names) != 0: + io.log_info ("Choose one of saved models, or enter a name to create a new model.") + io.log_info ("[r] : rename") + io.log_info ("[d] : delete") + io.log_info ("") + for i, model_name in enumerate(saved_models_names): + s = f"[{i}] : {model_name} " + if i == 0: + s += "- latest" + io.log_info (s) + + inp = io.input_str(f"", "0", show_default_value=False ) + model_idx = -1 + try: + model_idx = np.clip ( int(inp), 0, len(saved_models_names)-1 ) + except: + pass + + if model_idx == -1: + if len(inp) == 1: + is_rename = inp[0] == 'r' + is_delete = inp[0] == 'd' + + if is_rename or is_delete: + if len(saved_models_names) != 0: + + if is_rename: + name = io.input_str(f"Enter the name of the model you want to rename") + elif is_delete: + name = io.input_str(f"Enter the name of the model you want to delete") + + if name in saved_models_names: + + if is_rename: + new_model_name = io.input_str(f"Enter new name of the model") + + for filepath in pathex.get_file_paths(saved_models_path): + filepath_name = filepath.name + + model_filename, remain_filename = filepath_name.split('_', 1) + if model_filename == name: + + if is_rename: + new_filepath = filepath.parent / ( new_model_name + '_' + remain_filename ) + filepath.rename (new_filepath) + elif is_delete: + filepath.unlink() + continue + + self.model_name = inp + else: + self.model_name = saved_models_names[model_idx] + + else: + self.model_name = io.input_str(f"No saved models found. Enter a name of a new model", "noname") + + break + + self.model_name = self.model_name + '_' + self.model_class_name self.iter = 0 self.options = {} self.loss_history = [] self.sample_for_preview = None + self.choosed_gpu_indexes = None model_data = {} + self.model_data_path = Path( self.get_strpath_storage_for_file('data.dat') ) if self.model_data_path.exists(): + io.log_info (f"Loading {self.model_name} model...") model_data = pickle.loads ( self.model_data_path.read_bytes() ) - self.iter = max( model_data.get('iter',0), model_data.get('epoch',0) ) - if 'epoch' in self.options: - self.options.pop('epoch') + self.iter = model_data.get('iter',0) if self.iter != 0: self.options = model_data['options'] self.loss_history = model_data.get('loss_history', []) self.sample_for_preview = model_data.get('sample_for_preview', None) + self.choosed_gpu_indexes = model_data.get('choosed_gpu_indexes', None) - ask_override = self.is_training_mode and self.iter != 0 and io.input_in_time ("Press enter in 2 seconds to override model settings.", 5 if io.is_colab() else 2 ) - - yn_str = {True:'y',False:'n'} - - if self.iter == 0: + if self.is_first_run(): io.log_info ("\nModel first run.") - if ask_enable_autobackup and (self.iter == 0 or ask_override): - default_autobackup = False if self.iter == 0 else self.options.get('autobackup',False) - self.options['autobackup'] = io.input_bool("Enable autobackup? (y/n ?:help skip:%s) : " % (yn_str[default_autobackup]) , default_autobackup, help_message="Autobackup model files with preview every hour for last 15 hours. Latest backup located in model/<>_autobackups/01") - else: - self.options['autobackup'] = self.options.get('autobackup', False) + self.device_config = nn.DeviceConfig.GPUIndexes( force_gpu_idxs or nn.ask_choose_device_idxs(suggest_best_multi_gpu=True)) \ + if not cpu_only else nn.DeviceConfig.CPU() - if ask_write_preview_history and (self.iter == 0 or ask_override): - default_write_preview_history = False if self.iter == 0 else self.options.get('write_preview_history',False) - self.options['write_preview_history'] = io.input_bool("Write preview history? (y/n ?:help skip:%s) : " % (yn_str[default_write_preview_history]) , default_write_preview_history, help_message="Preview history will be writed to _history folder.") - else: - self.options['write_preview_history'] = self.options.get('write_preview_history', False) + nn.initialize(self.device_config) - if (self.iter == 0 or ask_override) and self.options['write_preview_history'] and io.is_support_windows(): - choose_preview_history = io.input_bool("Choose image for the preview history? (y/n skip:%s) : " % (yn_str[False]) , False) - elif (self.iter == 0 or ask_override) and self.options['write_preview_history'] and io.is_colab(): - choose_preview_history = io.input_bool("Randomly choose new image for preview history? (y/n ?:help skip:%s) : " % (yn_str[False]), False, help_message="Preview image history will stay stuck with old faces if you reuse the same model on different celebs. Choose no unless you are changing src/dst to a new person") - else: - choose_preview_history = False + #### + self.default_options_path = saved_models_path / f'{self.model_class_name}_default_options.dat' + self.default_options = {} + if self.default_options_path.exists(): + try: + self.default_options = pickle.loads ( self.default_options_path.read_bytes() ) + except: + pass - if ask_target_iter: - if (self.iter == 0 or ask_override): - self.options['target_iter'] = max(0, io.input_int("Target iteration (skip:unlimited/default) : ", 0)) - else: - self.options['target_iter'] = max(model_data.get('target_iter',0), self.options.get('target_epoch',0)) - if 'target_epoch' in self.options: - self.options.pop('target_epoch') + self.choose_preview_history = False + self.batch_size = self.load_or_def_option('batch_size', 1) + ##### - if ask_batch_size and (self.iter == 0 or ask_override): - default_batch_size = 0 if self.iter == 0 else self.options.get('batch_size',0) - self.batch_size = max(0, io.input_int("Batch_size (?:help skip:%d) : " % (default_batch_size), default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) - else: - self.batch_size = self.options.get('batch_size', 0) - - if ask_random_flip: - default_random_flip = self.options.get('random_flip', True) - if (self.iter == 0 or ask_override): - self.options['random_flip'] = io.input_bool(f"Flip faces randomly? (y/n ?:help skip:{yn_str[default_random_flip]}) : ", default_random_flip, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.") - else: - self.options['random_flip'] = self.options.get('random_flip', default_random_flip) + self.on_initialize_options() + if self.is_first_run(): + # save as default options only for first run model initialize + self.default_options_path.write_bytes( pickle.dumps (self.options) ) self.autobackup = self.options.get('autobackup', False) - if not self.autobackup and 'autobackup' in self.options: - self.options.pop('autobackup') - self.write_preview_history = self.options.get('write_preview_history', False) - if not self.write_preview_history and 'write_preview_history' in self.options: - self.options.pop('write_preview_history') - self.target_iter = self.options.get('target_iter',0) - if self.target_iter == 0 and 'target_iter' in self.options: - self.options.pop('target_iter') - - #self.batch_size = self.options.get('batch_size',0) - self.sort_by_yaw = self.options.get('sort_by_yaw',False) self.random_flip = self.options.get('random_flip',True) - self.onInitializeOptions(self.iter == 0, ask_override) - - nnlib.import_all(self.device_config) - self.keras = nnlib.keras - self.K = nnlib.keras.backend - - self.onInitialize() - + self.on_initialize() self.options['batch_size'] = self.batch_size - - if self.debug or self.batch_size == 0: - self.batch_size = 1 - - if self.is_training_mode: - if self.device_args['force_gpu_idx'] == -1: - self.preview_history_path = self.model_path / ( '%s_history' % (self.get_model_name()) ) - self.autobackups_path = self.model_path / ( '%s_autobackups' % (self.get_model_name()) ) - else: - self.preview_history_path = self.model_path / ( '%d_%s_history' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) - self.autobackups_path = self.model_path / ( '%d_%s_autobackups' % (self.device_args['force_gpu_idx'], self.get_model_name()) ) + + if self.is_training: + self.preview_history_path = self.saved_models_path / ( f'{self.get_model_name()}_history' ) + self.autobackups_path = self.saved_models_path / ( f'{self.get_model_name()}_autobackups' ) if self.autobackup: self.autobackup_current_hour = time.localtime().tm_hour @@ -169,7 +186,7 @@ class ModelBase(object): self.preview_history_path.mkdir(exist_ok=True) else: if self.iter == 0: - for filename in Path_utils.get_image_paths(self.preview_history_path): + for filename in pathex.get_image_paths(self.preview_history_path): Path(filename).unlink() if self.generator_list is None: @@ -179,15 +196,15 @@ class ModelBase(object): if not isinstance(generator, SampleGeneratorBase): raise ValueError('training data generator is not subclass of SampleGeneratorBase') - if self.sample_for_preview is None or choose_preview_history: - if choose_preview_history and io.is_support_windows(): + if self.sample_for_preview is None or self.choose_preview_history: + if self.choose_preview_history and io.is_support_windows(): io.log_info ("Choose image for the preview history. [p] - next. [enter] - confirm.") wnd_name = "[p] - next. [enter] - confirm." io.named_window(wnd_name) io.capture_keys(wnd_name) choosed = False while not choosed: - self.sample_for_preview = self.generate_next_sample() + self.sample_for_preview = self.generate_next_samples() preview = self.get_static_preview() io.show_image( wnd_name, (preview*255).astype(np.uint8) ) @@ -207,73 +224,66 @@ class ModelBase(object): io.destroy_window(wnd_name) else: - self.sample_for_preview = self.generate_next_sample() + self.sample_for_preview = self.generate_next_samples() try: self.get_static_preview() except: - self.sample_for_preview = self.generate_next_sample() + self.sample_for_preview = self.generate_next_samples() self.last_sample = self.sample_for_preview - ###Generate text summary of model hyperparameters - #Find the longest key name and value string. Used as column widths. - width_name = max([len(k) for k in self.options.keys()] + [17]) + 1 # Single space buffer to left edge. Minimum of 17, the length of the longest static string used "Current iteration" - width_value = max([len(str(x)) for x in self.options.values()] + [len(str(self.iter)), len(self.get_model_name())]) + 1 # Single space buffer to right edge - if not self.device_config.cpu_only: #Check length of GPU names - width_value = max([len(nnlib.device.getDeviceName(idx))+1 for idx in self.device_config.gpu_idxs] + [width_value]) - width_total = width_name + width_value + 2 #Plus 2 for ": " + io.log_info( self.get_summary_text() ) - model_summary_text = [] - model_summary_text += [f'=={" Model Summary ":=^{width_total}}=='] # Model/status summary - model_summary_text += [f'=={" "*width_total}=='] - model_summary_text += [f'=={"Model name": >{width_name}}: {self.get_model_name(): <{width_value}}=='] # Name - model_summary_text += [f'=={" "*width_total}=='] - model_summary_text += [f'=={"Current iteration": >{width_name}}: {str(self.iter): <{width_value}}=='] # Iter - model_summary_text += [f'=={" "*width_total}=='] + def load_or_def_option(self, name, def_value): + options_val = self.options.get(name, None) + if options_val is not None: + return options_val - model_summary_text += [f'=={" Model Options ":-^{width_total}}=='] # Model options - model_summary_text += [f'=={" "*width_total}=='] - for key in self.options.keys(): - model_summary_text += [f'=={key: >{width_name}}: {str(self.options[key]): <{width_value}}=='] # self.options key/value pairs - model_summary_text += [f'=={" "*width_total}=='] + def_opt_val = self.default_options.get(name, None) + if def_opt_val is not None: + return def_opt_val - model_summary_text += [f'=={" Running On ":-^{width_total}}=='] # Training hardware info - model_summary_text += [f'=={" "*width_total}=='] - if self.device_config.multi_gpu: - model_summary_text += [f'=={"Using multi_gpu": >{width_name}}: {"True": <{width_value}}=='] # multi_gpu - model_summary_text += [f'=={" "*width_total}=='] - if self.device_config.cpu_only: - model_summary_text += [f'=={"Using device": >{width_name}}: {"CPU": <{width_value}}=='] # cpu_only - else: - for idx in self.device_config.gpu_idxs: - model_summary_text += [f'=={"Device index": >{width_name}}: {idx: <{width_value}}=='] # GPU hardware device index - model_summary_text += [f'=={"Name": >{width_name}}: {nnlib.device.getDeviceName(idx): <{width_value}}=='] # GPU name - vram_str = f'{nnlib.device.getDeviceVRAMTotalGb(idx):.2f}GB' # GPU VRAM - Formated as #.## (or ##.##) - model_summary_text += [f'=={"VRAM": >{width_name}}: {vram_str: <{width_value}}=='] - model_summary_text += [f'=={" "*width_total}=='] - model_summary_text += [f'=={"="*width_total}=='] + return def_value - if not self.device_config.cpu_only and self.device_config.gpu_vram_gb[0] <= 2: # Low VRAM warning - model_summary_text += ["/!\\"] - model_summary_text += ["/!\\ WARNING:"] - model_summary_text += ["/!\\ You are using a GPU with 2GB or less VRAM. This may significantly reduce the quality of your result!"] - model_summary_text += ["/!\\ If training does not start, close all programs and try again."] - model_summary_text += ["/!\\ Also you can disable Windows Aero Desktop to increase available VRAM."] - model_summary_text += ["/!\\"] + def ask_override(self): + return self.is_training and self.iter != 0 and io.input_in_time ("Press enter in 2 seconds to override model settings.", 5 if io.is_colab() else 2 ) + + def ask_enable_autobackup(self): + default_autobackup = self.options['autobackup'] = self.load_or_def_option('autobackup', False) + self.options['autobackup'] = io.input_bool(f"Enable autobackup", default_autobackup, help_message="Autobackup model files with preview every hour for last 15 hours. Latest backup located in model/<>_autobackups/01") + + def ask_write_preview_history(self): + default_write_preview_history = self.load_or_def_option('write_preview_history', False) + self.options['write_preview_history'] = io.input_bool(f"Write preview history", default_write_preview_history, help_message="Preview history will be writed to _history folder.") + + if self.options['write_preview_history']: + if io.is_support_windows(): + self.choose_preview_history = io.input_bool("Choose image for the preview history", False) + elif io.is_colab(): + self.choose_preview_history = io.input_bool("Randomly choose new image for preview history", False, help_message="Preview image history will stay stuck with old faces if you reuse the same model on different celebs. Choose no unless you are changing src/dst to a new person") + + def ask_target_iter(self): + default_target_iter = self.load_or_def_option('target_iter', 0) + self.options['target_iter'] = max(0, io.input_int("Target iteration", default_target_iter)) + + def ask_random_flip(self): + default_random_flip = self.load_or_def_option('random_flip', True) + self.options['random_flip'] = io.input_bool("Flip faces randomly", default_random_flip, help_message="Predicted face will look more naturally without this option, but src faceset should cover all face directions as dst faceset.") + + def ask_batch_size(self, suggest_batch_size=None): + default_batch_size = self.load_or_def_option('batch_size', suggest_batch_size or self.batch_size) + self.batch_size = max(0, io.input_int("Batch_size", default_batch_size, help_message="Larger batch size is better for NN's generalization, but it can cause Out of Memory error. Tune this value for your videocard manually.")) - model_summary_text = "\n".join (model_summary_text) - self.model_summary_text = model_summary_text - io.log_info(model_summary_text) #overridable - def onInitializeOptions(self, is_first_run, ask_override): + def on_initialize_options(self): pass #overridable - def onInitialize(self): + def on_initialize(self): ''' - initialize your keras models + initialize your models store and retrieve your model options in self.options[''] @@ -283,12 +293,12 @@ class ModelBase(object): #overridable def onSave(self): - #save your keras models here + #save your models here pass #overridable def onTrainOneIter(self, sample, generator_list): - #train your keras models here + #train your models here #return array of losses return ( ('loss_src', 0), ('loss_dst', 0) ) @@ -301,42 +311,26 @@ class ModelBase(object): #overridable if you want model name differs from folder name def get_model_name(self): - return Path(inspect.getmodule(self).__file__).parent.name.rsplit("_", 1)[1] + return self.model_name #overridable , return [ [model, filename],... ] list def get_model_filename_list(self): return [] #overridable - def get_ConverterConfig(self): - #return predictor_func, predictor_input_shape, ConverterConfig() for the model + def get_MergerConfig(self): + #return predictor_func, predictor_input_shape, MergerConfig() for the model raise NotImplementedError + def get_pretraining_data_path(self): + return self.pretraining_data_path + def get_target_iter(self): return self.target_iter def is_reached_iter_goal(self): return self.target_iter != 0 and self.iter >= self.target_iter - #multi gpu in keras actually is fake and doesn't work for training https://github.com/keras-team/keras/issues/11976 - #def to_multi_gpu_model_if_possible (self, models_list): - # if len(self.device_config.gpu_idxs) > 1: - # #make batch_size to divide on GPU count without remainder - # self.batch_size = int( self.batch_size / len(self.device_config.gpu_idxs) ) - # if self.batch_size == 0: - # self.batch_size = 1 - # self.batch_size *= len(self.device_config.gpu_idxs) - # - # result = [] - # for model in models_list: - # for i in range( len(model.output_names) ): - # model.output_names = 'output_%d' % (i) - # result += [ nnlib.keras.utils.multi_gpu_model( model, self.device_config.gpu_idxs ) ] - # - # return result - # else: - # return models_list - def get_previews(self): return self.onGetPreview ( self.last_sample ) @@ -345,21 +339,23 @@ class ModelBase(object): def save(self): summary_path = self.get_strpath_storage_for_file('summary.txt') - Path( summary_path ).write_text(self.model_summary_text) + Path( summary_path ).write_text( self.get_summary_text() ) + self.onSave() model_data = { 'iter': self.iter, 'options': self.options, 'loss_history': self.loss_history, - 'sample_for_preview' : self.sample_for_preview + 'sample_for_preview' : self.sample_for_preview, + 'choosed_gpu_indexes' : self.choosed_gpu_indexes, } - self.model_data_path.write_bytes( pickle.dumps(model_data) ) - - bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ] - bckp_filename_list += [ str(summary_path), str(self.model_data_path) ] + pathex.write_bytes_safe (self.model_data_path, pickle.dumps(model_data) ) if self.autobackup: + bckp_filename_list = [ self.get_strpath_storage_for_file(filename) for _, filename in self.get_model_filename_list() ] + bckp_filename_list += [ str(summary_path), str(self.model_data_path) ] + current_hour = time.localtime().tm_hour if self.autobackup_current_hour != current_hour: self.autobackup_current_hour = current_hour @@ -373,10 +369,10 @@ class ModelBase(object): if idx_backup_path.exists(): if i == 15: - Path_utils.delete_all_files(idx_backup_path) + pathex.delete_all_files(idx_backup_path) else: next_idx_packup_path.mkdir(exist_ok=True) - Path_utils.move_all_files (idx_backup_path, next_idx_packup_path) + pathex.move_all_files (idx_backup_path, next_idx_packup_path) if i == 1: idx_backup_path.mkdir(exist_ok=True) @@ -394,97 +390,6 @@ class ModelBase(object): img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) cv2_imwrite (filepath, img ) - def load_weights_safe(self, model_filename_list, optimizer_filename_list=[]): - exec(nnlib.code_import_all, locals(), globals()) - - loaded = [] - not_loaded = [] - for mf in model_filename_list: - model, filename = mf - filename = self.get_strpath_storage_for_file(filename) - - if Path(filename).exists(): - loaded += [ mf ] - - if issubclass(model.__class__, keras.optimizers.Optimizer): - opt = model - - try: - with open(filename, "rb") as f: - fd = pickle.loads(f.read()) - - weights = fd.get('weights', None) - if weights is not None: - opt.set_weights(weights) - - except Exception as e: - print ("Unable to load ", filename) - - else: - model.load_weights(filename) - else: - not_loaded += [ mf ] - - - return loaded, not_loaded - - def save_weights_safe(self, model_filename_list): - exec(nnlib.code_import_all, locals(), globals()) - - for model, filename in model_filename_list: - filename = self.get_strpath_storage_for_file(filename) + '.tmp' - - if issubclass(model.__class__, keras.optimizers.Optimizer): - opt = model - - try: - fd = {} - symbolic_weights = getattr(opt, 'weights') - if symbolic_weights: - fd['weights'] = self.K.batch_get_value(symbolic_weights) - - with open(filename, 'wb') as f: - f.write( pickle.dumps(fd) ) - except Exception as e: - print ("Unable to save ", filename) - else: - model.save_weights( filename) - - rename_list = model_filename_list - - """ - #unused - , optimizer_filename_list=[] - if len(optimizer_filename_list) != 0: - opt_filename = self.get_strpath_storage_for_file('opt.h5') - - try: - d = {} - for opt, filename in optimizer_filename_list: - fd = {} - symbolic_weights = getattr(opt, 'weights') - if symbolic_weights: - fd['weights'] = self.K.batch_get_value(symbolic_weights) - - d[filename] = fd - - with open(opt_filename+'.tmp', 'wb') as f: - f.write( pickle.dumps(d) ) - - rename_list += [('', 'opt.h5')] - except Exception as e: - print ("Unable to save ", opt_filename) - """ - - for _, filename in rename_list: - filename = self.get_strpath_storage_for_file(filename) - source_filename = Path(filename+'.tmp') - if source_filename.exists(): - target_filename = Path(filename) - if target_filename.exists(): - target_filename.unlink() - source_filename.rename ( str(target_filename) ) - def debug_one_iter(self): images = [] for generator in self.generator_list: @@ -494,19 +399,15 @@ class ModelBase(object): return imagelib.equalize_and_stack_square (images) - def generate_next_sample(self): - return [ generator.generate_next() for generator in self.generator_list] - - #overridable - def on_success_train_one_iter(self): - pass + def generate_next_samples(self): + self.last_sample = sample = [ generator.generate_next() for generator in self.generator_list] + return sample def train_one_iter(self): - sample = self.generate_next_sample() + iter_time = time.time() - losses = self.onTrainOneIter(sample, self.generator_list) + losses = self.onTrainOneIter() iter_time = time.time() - iter_time - self.last_sample = sample self.loss_history.append ( [float(loss[1]) for loss in losses] ) @@ -527,17 +428,15 @@ class ModelBase(object): img = (np.concatenate ( [preview_lh, preview], axis=0 ) * 255).astype(np.uint8) cv2_imwrite (filepath, img ) - self.on_success_train_one_iter() - self.iter += 1 return self.iter, iter_time def pass_one_iter(self): - self.last_sample = self.generate_next_sample() + self.generate_next_samples() def finalize(self): - nnlib.finalize_all() + nn.tf_close_session() def is_first_run(self): return self.iter == 0 @@ -554,6 +453,10 @@ class ModelBase(object): def get_iter(self): return self.iter + def set_iter(self, iter): + self.iter = iter + self.loss_history = self.loss_history[:iter] + def get_loss_history(self): return self.loss_history @@ -564,30 +467,48 @@ class ModelBase(object): return self.generator_list def get_model_root_path(self): - return self.model_path + return self.saved_models_path def get_strpath_storage_for_file(self, filename): - if self.device_args['force_gpu_idx'] == -1: - return str( self.model_path / ( self.get_model_name() + '_' + filename) ) - else: - return str( self.model_path / ( str(self.device_args['force_gpu_idx']) + '_' + self.get_model_name() + '_' + filename) ) + return str( self.saved_models_path / ( self.get_model_name() + '_' + filename) ) - def set_vram_batch_requirements (self, d): - #example d = {2:2,3:4,4:8,5:16,6:32,7:32,8:32,9:48} - keys = [x for x in d.keys()] + def get_summary_text(self): + ###Generate text summary of model hyperparameters + #Find the longest key name and value string. Used as column widths. + width_name = max([len(k) for k in self.options.keys()] + [17]) + 1 # Single space buffer to left edge. Minimum of 17, the length of the longest static string used "Current iteration" + width_value = max([len(str(x)) for x in self.options.values()] + [len(str(self.get_iter())), len(self.get_model_name())]) + 1 # Single space buffer to right edge + if not self.device_config.cpu_only: #Check length of GPU names + width_value = max([len(device.name)+1 for device in self.device_config.devices] + [width_value]) + width_total = width_name + width_value + 2 #Plus 2 for ": " + summary_text = [] + summary_text += [f'=={" Model Summary ":=^{width_total}}=='] # Model/status summary + summary_text += [f'=={" "*width_total}=='] + summary_text += [f'=={"Model name": >{width_name}}: {self.get_model_name(): <{width_value}}=='] # Name + summary_text += [f'=={" "*width_total}=='] + summary_text += [f'=={"Current iteration": >{width_name}}: {str(self.get_iter()): <{width_value}}=='] # Iter + summary_text += [f'=={" "*width_total}=='] + + summary_text += [f'=={" Model Options ":-^{width_total}}=='] # Model options + summary_text += [f'=={" "*width_total}=='] + for key in self.options.keys(): + summary_text += [f'=={key: >{width_name}}: {str(self.options[key]): <{width_value}}=='] # self.options key/value pairs + summary_text += [f'=={" "*width_total}=='] + + summary_text += [f'=={" Running On ":-^{width_total}}=='] # Training hardware info + summary_text += [f'=={" "*width_total}=='] if self.device_config.cpu_only: - if self.batch_size == 0: - self.batch_size = 2 + summary_text += [f'=={"Using device": >{width_name}}: {"CPU": <{width_value}}=='] # cpu_only else: - if self.batch_size == 0: - for x in keys: - if self.device_config.gpu_vram_gb[0] <= x: - self.batch_size = d[x] - break - - if self.batch_size == 0: - self.batch_size = d[ keys[-1] ] + for device in self.device_config.devices: + summary_text += [f'=={"Device index": >{width_name}}: {device.index: <{width_value}}=='] # GPU hardware device index + summary_text += [f'=={"Name": >{width_name}}: {device.name: <{width_value}}=='] # GPU name + vram_str = f'{device.total_mem_gb:.2f}GB' # GPU VRAM - Formated as #.## (or ##.##) + summary_text += [f'=={"VRAM": >{width_name}}: {vram_str: <{width_value}}=='] + summary_text += [f'=={" "*width_total}=='] + summary_text += [f'=={"="*width_total}=='] + summary_text = "\n".join (summary_text) + return summary_text @staticmethod def get_loss_history_preview(loss_history, iter, w, c): diff --git a/models/Model_AVATAR/Model.py b/models/Model_AVATAR/Model.py deleted file mode 100644 index 3aef73d..0000000 --- a/models/Model_AVATAR/Model.py +++ /dev/null @@ -1,490 +0,0 @@ -from functools import partial - -import cv2 -import numpy as np - -from facelib import FaceType -from interact import interact as io -from mathlib import get_power_of_two -from models import ModelBase -from nnlib import nnlib -from samplelib import * - -from facelib import PoseEstimator - -class AVATARModel(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_random_flip=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - #avatar_type = io.input_int("Avatar type ( 0:source, 1:head, 2:full_face ?:help skip:1) : ", 1, [0,1,2], - # help_message="Training target for the model. Source is direct untouched images. Full_face or head are centered nose unaligned faces.") - #avatar_type = {0:'source', - # 1:'head', - # 2:'full_face'}[avatar_type] - - self.options['avatar_type'] = 'head' - else: - self.options['avatar_type'] = self.options.get('avatar_type', 'head') - - if is_first_run or ask_override: - def_stage = self.options.get('stage', 1) - self.options['stage'] = io.input_int("Stage (0, 1, 2 ?:help skip:%d) : " % def_stage, def_stage, [0,1,2], help_message="Train first stage, then second. Tune batch size to maximum possible for both stages.") - else: - self.options['stage'] = self.options.get('stage', 1) - - #override - def onInitialize(self, batch_size=-1, **in_options): - exec(nnlib.code_import_all, locals(), globals()) - self.set_vram_batch_requirements({6:4}) - - resolution = self.resolution = 224 - avatar_type = self.options['avatar_type'] - stage = self.stage = self.options['stage'] - df_res = self.df_res = 128 - df_bgr_shape = (df_res, df_res, 3) - df_mask_shape = (df_res, df_res, 1) - res_bgr_shape = (resolution, resolution, 3) - res_bgr_t_shape = (resolution, resolution, 9) - - self.enc = modelify(AVATARModel.EncFlow())( [Input(df_bgr_shape),] ) - - self.decA64 = modelify(AVATARModel.DecFlow()) ( [ Input(K.int_shape(self.enc.outputs[0])[1:]) ] ) - self.decB64 = modelify(AVATARModel.DecFlow()) ( [ Input(K.int_shape(self.enc.outputs[0])[1:]) ] ) - self.D = modelify(AVATARModel.Discriminator() ) (Input(df_bgr_shape)) - self.C = modelify(AVATARModel.ResNet (9, n_blocks=6, ngf=128, use_dropout=False))( Input(res_bgr_t_shape)) - - self.CA_conv_weights_list = [] - if self.is_first_run(): - for model, _ in self.get_model_filename_list(): - for layer in model.layers: - if type(layer) == keras.layers.Conv2D: - self.CA_conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights - - if not self.is_first_run(): - self.load_weights_safe( self.get_model_filename_list() ) - - def DLoss(labels,logits): - return K.mean(K.binary_crossentropy(labels,logits)) - - warped_A64 = Input(df_bgr_shape) - real_A64 = Input(df_bgr_shape) - real_A64m = Input(df_mask_shape) - - real_B64_t0 = Input(df_bgr_shape) - real_B64_t1 = Input(df_bgr_shape) - real_B64_t2 = Input(df_bgr_shape) - - real_A64_t0 = Input(df_bgr_shape) - real_A64m_t0 = Input(df_mask_shape) - real_A_t0 = Input(res_bgr_shape) - real_A64_t1 = Input(df_bgr_shape) - real_A64m_t1 = Input(df_mask_shape) - real_A_t1 = Input(res_bgr_shape) - real_A64_t2 = Input(df_bgr_shape) - real_A64m_t2 = Input(df_mask_shape) - real_A_t2 = Input(res_bgr_shape) - - warped_B64 = Input(df_bgr_shape) - real_B64 = Input(df_bgr_shape) - real_B64m = Input(df_mask_shape) - - warped_A_code = self.enc (warped_A64) - warped_B_code = self.enc (warped_B64) - - rec_A64 = self.decA64(warped_A_code) - rec_B64 = self.decB64(warped_B_code) - rec_AB64 = self.decA64(warped_B_code) - - def Lambda_grey_mask (x,m): - return Lambda (lambda x: x[0]*m+(1-m)*0.5, output_shape= K.int_shape(x)[1:3] + (3,)) ([x, m]) - - def Lambda_gray_pad(x): - a = np.ones((resolution,resolution,3))*0.5 - pad = ( resolution - df_res ) // 2 - a[pad:-pad:,pad:-pad:,:] = 0 - - return Lambda ( lambda x: K.spatial_2d_padding(x, padding=((pad, pad), (pad, pad)) ) + K.constant(a, dtype=K.floatx() ), - output_shape=(resolution,resolution,3) ) (x) - - def Lambda_concat ( x ): - c = sum ( [ K.int_shape(l)[-1] for l in x ] ) - return Lambda ( lambda x: K.concatenate (x, axis=-1), output_shape=K.int_shape(x[0])[1:3] + (c,) ) (x) - - def Lambda_Cto3t(x): - return Lambda ( lambda x: x[...,0:3], output_shape= K.int_shape(x)[1:3] + (3,) ) (x), \ - Lambda ( lambda x: x[...,3:6], output_shape= K.int_shape(x)[1:3] + (3,) ) (x), \ - Lambda ( lambda x: x[...,6:9], output_shape= K.int_shape(x)[1:3] + (3,) ) (x) - - real_A64_d = self.D( Lambda_grey_mask(real_A64, real_A64m) ) - - real_A64_d_ones = K.ones_like(real_A64_d) - fake_A64_d = self.D(rec_AB64) - fake_A64_d_ones = K.ones_like(fake_A64_d) - fake_A64_d_zeros = K.zeros_like(fake_A64_d) - - rec_AB_t0 = Lambda_gray_pad( self.decA64 (self.enc (real_B64_t0)) ) - rec_AB_t1 = Lambda_gray_pad( self.decA64 (self.enc (real_B64_t1)) ) - rec_AB_t2 = Lambda_gray_pad( self.decA64 (self.enc (real_B64_t2)) ) - - C_in_A_t0 = Lambda_gray_pad( Lambda_grey_mask (real_A64_t0, real_A64m_t0) ) - C_in_A_t1 = Lambda_gray_pad( Lambda_grey_mask (real_A64_t1, real_A64m_t1) ) - C_in_A_t2 = Lambda_gray_pad( Lambda_grey_mask (real_A64_t2, real_A64m_t2) ) - - rec_C_A_t0, rec_C_A_t1, rec_C_A_t2 = Lambda_Cto3t ( self.C ( Lambda_concat ( [C_in_A_t0, C_in_A_t1, C_in_A_t2]) ) ) - rec_C_AB_t0, rec_C_AB_t1, rec_C_AB_t2 = Lambda_Cto3t( self.C ( Lambda_concat ( [rec_AB_t0, rec_AB_t1, rec_AB_t2]) ) ) - - #real_A_t012_d = self.CD ( K.concatenate ( [real_A_t0, real_A_t1,real_A_t2], axis=-1) ) - #real_A_t012_d_ones = K.ones_like(real_A_t012_d) - #rec_C_AB_t012_d = self.CD ( K.concatenate ( [rec_C_AB_t0,rec_C_AB_t1, rec_C_AB_t2], axis=-1) ) - #rec_C_AB_t012_d_ones = K.ones_like(rec_C_AB_t012_d) - #rec_C_AB_t012_d_zeros = K.zeros_like(rec_C_AB_t012_d) - - self.G64_view = K.function([warped_A64, warped_B64],[rec_A64, rec_B64, rec_AB64]) - self.G_view = K.function([real_A64_t0, real_A64m_t0, real_A64_t1, real_A64m_t1, real_A64_t2, real_A64m_t2, real_B64_t0, real_B64_t1, real_B64_t2], [rec_C_A_t0, rec_C_A_t1, rec_C_A_t2, rec_C_AB_t0, rec_C_AB_t1, rec_C_AB_t2]) - - if self.is_training_mode: - loss_AB64 = K.mean(10 * dssim(kernel_size=int(df_res/11.6),max_value=1.0) ( rec_A64, real_A64*real_A64m + (1-real_A64m)*0.5) ) + \ - K.mean(10 * dssim(kernel_size=int(df_res/11.6),max_value=1.0) ( rec_B64, real_B64*real_B64m + (1-real_B64m)*0.5) ) + 0.1*DLoss(fake_A64_d_ones, fake_A64_d ) - - weights_AB64 = self.enc.trainable_weights + self.decA64.trainable_weights + self.decB64.trainable_weights - - loss_C = K.mean( 10 * dssim(kernel_size=int(resolution/11.6),max_value=1.0) ( real_A_t0, rec_C_A_t0 ) ) + \ - K.mean( 10 * dssim(kernel_size=int(resolution/11.6),max_value=1.0) ( real_A_t1, rec_C_A_t1 ) ) + \ - K.mean( 10 * dssim(kernel_size=int(resolution/11.6),max_value=1.0) ( real_A_t2, rec_C_A_t2 ) ) - #0.1*DLoss(rec_C_AB_t012_d_ones, rec_C_AB_t012_d ) - - weights_C = self.C.trainable_weights - - loss_D = (DLoss(real_A64_d_ones, real_A64_d ) + \ - DLoss(fake_A64_d_zeros, fake_A64_d ) ) * 0.5 - - #loss_CD = ( DLoss(real_A_t012_d_ones, real_A_t012_d) + \ - # DLoss(rec_C_AB_t012_d_zeros, rec_C_AB_t012_d) ) * 0.5 - # - #weights_CD = self.CD.trainable_weights - - def opt(lr=5e-5): - return Adam(lr=lr, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2 if 'tensorflow' in self.device_config.backend else 0 ) - - self.AB64_train = K.function ([warped_A64, real_A64, real_A64m, warped_B64, real_B64, real_B64m], [loss_AB64], opt().get_updates(loss_AB64, weights_AB64) ) - self.C_train = K.function ([real_A64_t0, real_A64m_t0, real_A_t0, - real_A64_t1, real_A64m_t1, real_A_t1, - real_A64_t2, real_A64m_t2, real_A_t2, - real_B64_t0, real_B64_t1, real_B64_t2],[ loss_C ], opt().get_updates(loss_C, weights_C) ) - - self.D_train = K.function ([warped_A64, real_A64, real_A64m, warped_B64, real_B64, real_B64m],[loss_D], opt().get_updates(loss_D, self.D.trainable_weights) ) - - - #self.CD_train = K.function ([real_A64_t0, real_A64m_t0, real_A_t0, - # real_A64_t1, real_A64m_t1, real_A_t1, - # real_A64_t2, real_A64m_t2, real_A_t2, - # real_B64_t0, real_B64_t1, real_B64_t2 ],[ loss_CD ], opt().get_updates(loss_CD, weights_CD) ) - - ########### - t = SampleProcessor.Types - - training_target = {'source' : t.NONE, - 'full_face' : t.FACE_TYPE_FULL_NO_ALIGN, - 'head' : t.FACE_TYPE_HEAD_NO_ALIGN}[avatar_type] - - generators = [ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=False), - output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res}, - {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res}, - {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_M), 'resolution':df_res} - ] ), - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=False), - output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res}, - {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res}, - {'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_M), 'resolution':df_res} - ] ), - - SampleGeneratorFaceTemporal(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - temporal_image_count=3, - sample_process_options=SampleProcessor.Options(random_flip=False), - output_sample_types=[{'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res},#IMG_WARPED_TRANSFORMED - {'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_M), 'resolution':df_res}, - {'types': (t.IMG_SOURCE, training_target, t.MODE_BGR), 'resolution':resolution}, - ] ), - - SampleGeneratorFaceTemporal(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - temporal_image_count=3, - sample_process_options=SampleProcessor.Options(random_flip=False), - output_sample_types=[{'types': (t.IMG_SOURCE, t.FACE_TYPE_FULL_NO_ALIGN, t.MODE_BGR), 'resolution':df_res}, - {'types': (t.IMG_SOURCE, t.NONE, t.MODE_BGR), 'resolution':resolution}, - ] ), - ] - - if self.stage == 1: - generators[2].set_active(False) - generators[3].set_active(False) - elif self.stage == 2: - generators[0].set_active(False) - generators[1].set_active(False) - - self.set_training_data_generators (generators) - else: - self.G_convert = K.function([real_B64_t0, real_B64_t1, real_B64_t2],[rec_C_AB_t1]) - - #override , return [ [model, filename],... ] list - def get_model_filename_list(self): - return [ [self.enc, 'enc.h5'], - [self.decA64, 'decA64.h5'], - [self.decB64, 'decB64.h5'], - [self.C, 'C.h5'], - [self.D, 'D.h5'], - #[self.CD, 'CD.h5'], - ] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def on_success_train_one_iter(self): - if len(self.CA_conv_weights_list) != 0: - exec(nnlib.import_all(), locals(), globals()) - CAInitializerMP ( self.CA_conv_weights_list ) - self.CA_conv_weights_list = [] - - #override - def onTrainOneIter(self, generators_samples, generators_list): - warped_src64, src64, src64m = generators_samples[0] - warped_dst64, dst64, dst64m = generators_samples[1] - - real_A64_t0, real_A64m_t0, real_A_t0, real_A64_t1, real_A64m_t1, real_A_t1, real_A64_t2, real_A64m_t2, real_A_t2 = generators_samples[2] - real_B64_t0, _, real_B64_t1, _, real_B64_t2, _ = generators_samples[3] - - if self.stage == 0 or self.stage == 1: - loss, = self.AB64_train ( [warped_src64, src64, src64m, warped_dst64, dst64, dst64m] ) - loss_D, = self.D_train ( [warped_src64, src64, src64m, warped_dst64, dst64, dst64m] ) - if self.stage != 0: - loss_C = loss_CD = 0 - - if self.stage == 0 or self.stage == 2: - loss_C1, = self.C_train ( [real_A64_t0, real_A64m_t0, real_A_t0, - real_A64_t1, real_A64m_t1, real_A_t1, - real_A64_t2, real_A64m_t2, real_A_t2, - real_B64_t0, real_B64_t1, real_B64_t2] ) - - loss_C2, = self.C_train ( [real_A64_t2, real_A64m_t2, real_A_t2, - real_A64_t1, real_A64m_t1, real_A_t1, - real_A64_t0, real_A64m_t0, real_A_t0, - real_B64_t0, real_B64_t1, real_B64_t2] ) - - #loss_CD1, = self.CD_train ( [real_A64_t0, real_A64m_t0, real_A_t0, - # real_A64_t1, real_A64m_t1, real_A_t1, - # real_A64_t2, real_A64m_t2, real_A_t2, - # real_B64_t0, real_B64_t1, real_B64_t2] ) - # - #loss_CD2, = self.CD_train ( [real_A64_t2, real_A64m_t2, real_A_t2, - # real_A64_t1, real_A64m_t1, real_A_t1, - # real_A64_t0, real_A64m_t0, real_A_t0, - # real_B64_t0, real_B64_t1, real_B64_t2] ) - - loss_C = (loss_C1 + loss_C2) / 2 - #loss_CD = (loss_CD1 + loss_CD2) / 2 - if self.stage != 0: - loss = loss_D = 0 - - return ( ('loss', loss), ('D', loss_D), ('C', loss_C), ) #('CD', loss_CD) ) - - #override - def onGetPreview(self, sample): - test_A064w = sample[0][0][0:4] - test_A064r = sample[0][1][0:4] - test_A064m = sample[0][2][0:4] - - test_B064w = sample[1][0][0:4] - test_B064r = sample[1][1][0:4] - test_B064m = sample[1][2][0:4] - - t_src64_0 = sample[2][0][0:4] - t_src64m_0 = sample[2][1][0:4] - t_src_0 = sample[2][2][0:4] - t_src64_1 = sample[2][3][0:4] - t_src64m_1 = sample[2][4][0:4] - t_src_1 = sample[2][5][0:4] - t_src64_2 = sample[2][6][0:4] - t_src64m_2 = sample[2][7][0:4] - t_src_2 = sample[2][8][0:4] - - t_dst64_0 = sample[3][0][0:4] - t_dst_0 = sample[3][1][0:4] - t_dst64_1 = sample[3][2][0:4] - t_dst_1 = sample[3][3][0:4] - t_dst64_2 = sample[3][4][0:4] - t_dst_2 = sample[3][5][0:4] - - G64_view_result = self.G64_view ([test_A064r, test_B064r]) - test_A064r, test_B064r, rec_A64, rec_B64, rec_AB64 = [ x[0] for x in ([test_A064r, test_B064r] + G64_view_result) ] - - sample64x4 = np.concatenate ([ np.concatenate ( [rec_B64, rec_A64], axis=1 ), - np.concatenate ( [test_B064r, rec_AB64], axis=1) ], axis=0 ) - - sample64x4 = cv2.resize (sample64x4, (self.resolution, self.resolution) ) - - G_view_result = self.G_view([t_src64_0, t_src64m_0, t_src64_1, t_src64m_1, t_src64_2, t_src64m_2, t_dst64_0, t_dst64_1, t_dst64_2 ]) - - t_dst_0, t_dst_1, t_dst_2, rec_C_A_t0, rec_C_A_t1, rec_C_A_t2, rec_C_AB_t0, rec_C_AB_t1, rec_C_AB_t2 = [ x[0] for x in ([t_dst_0, t_dst_1, t_dst_2, ] + G_view_result) ] - - c1 = np.concatenate ( (sample64x4, rec_C_A_t0, t_dst_0, rec_C_AB_t0 ), axis=1 ) - c2 = np.concatenate ( (sample64x4, rec_C_A_t1, t_dst_1, rec_C_AB_t1 ), axis=1 ) - c3 = np.concatenate ( (sample64x4, rec_C_A_t2, t_dst_2, rec_C_AB_t2 ), axis=1 ) - - r = np.concatenate ( [c1,c2,c3], axis=0 ) - - return [ ('AVATAR', r ) ] - - def predictor_func (self, prev_imgs=None, img=None, next_imgs=None, dummy_predict=False): - if dummy_predict: - z = np.zeros ( (1, self.df_res, self.df_res, 3), dtype=np.float32 ) - self.G_convert ([z,z,z]) - else: - feed = [ prev_imgs[-1][np.newaxis,...], img[np.newaxis,...], next_imgs[0][np.newaxis,...] ] - x = self.G_convert (feed)[0] - return np.clip ( x[0], 0, 1) - - #override - def get_ConverterConfig(self): - import converters - return self.predictor_func, (self.df_res, self.df_res, 3), converters.ConverterConfigFaceAvatar(temporal_face_count=1) - - @staticmethod - def Discriminator(ndf=128): - exec (nnlib.import_all(), locals(), globals()) - - def func(input): - b,h,w,c = K.int_shape(input) - - x = input - - x = Conv2D( ndf, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = LeakyReLU(0.2)(x) - - x = Conv2D( ndf*2, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = InstanceNormalization (axis=-1)(x) - x = LeakyReLU(0.2)(x) - - x = Conv2D( ndf*4, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = InstanceNormalization (axis=-1)(x) - x = LeakyReLU(0.2)(x) - - x = Conv2D( ndf*8, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = InstanceNormalization (axis=-1)(x) - x = LeakyReLU(0.2)(x) - - return Conv2D( 1, 4, strides=1, padding='valid', activation='sigmoid')( ZeroPadding2D(3)(x) ) - return func - - @staticmethod - def EncFlow(): - exec (nnlib.import_all(), locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)( Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - - def func(input): - x, = input - b,h,w,c = K.int_shape(x) - - dim_res = w // 16 - - x = downscale(64)(x) - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - - x = Dense(512)(Flatten()(x)) - x = Dense(dim_res * dim_res * 512)(x) - x = Reshape((dim_res, dim_res, 512))(x) - x = upscale(512)(x) - return x - - return func - - @staticmethod - def DecFlow(output_nc=3, **kwargs): - exec (nnlib.import_all(), locals(), globals()) - - def upscale (dim): - def func(x): - return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def to_bgr (output_nc, **kwargs): - def func(x): - return Conv2D(output_nc, kernel_size=5, strides=1, padding='same', activation='sigmoid')(x) - return func - - def func(input): - x = input[0] - - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - return to_bgr(output_nc) (x) - - return func - - @staticmethod - def ResNet(output_nc, ngf=64, n_blocks=6, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - def func(input): - def ResnetBlock(dim, use_dropout=False): - def func(input): - x = input - - x = Conv2D(dim, 3, strides=1, padding='same')(x) - x = InstanceNormalization (axis=-1)(x) - x = ReLU()(x) - - if use_dropout: - x = Dropout(0.5)(x) - - x = Conv2D(dim, 3, strides=1, padding='same')(x) - x = InstanceNormalization (axis=-1)(x) - x = ReLU()(x) - return Add()([x,input]) - return func - - x = input - - x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf, 7, strides=1, padding='same')(x))) - - x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf*2, 3, strides=2, padding='same')(x))) - x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf*4, 3, strides=2, padding='same')(x))) - - x = ReLU()(InstanceNormalization (axis=-1)(Conv2D(ngf*4, 3, strides=2, padding='same')(x))) - - for i in range(n_blocks): - x = ResnetBlock(ngf*4, use_dropout=use_dropout)(x) - - x = ReLU()(InstanceNormalization (axis=-1)(Conv2DTranspose(ngf*4, 3, strides=2, padding='same')(x))) - - x = ReLU()(InstanceNormalization (axis=-1)(Conv2DTranspose(ngf*2, 3, strides=2, padding='same')(x))) - x = ReLU()(InstanceNormalization (axis=-1)(Conv2DTranspose(ngf , 3, strides=2, padding='same')(x))) - - x = Conv2D(output_nc, 7, strides=1, activation='sigmoid', padding='same')(x) - - return x - - return func - -Model = AVATARModel \ No newline at end of file diff --git a/models/Model_AVATAR/__init__.py b/models/Model_AVATAR/__init__.py deleted file mode 100644 index cdb3fe7..0000000 --- a/models/Model_AVATAR/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model \ No newline at end of file diff --git a/models/Model_DEV_FANSEG/Model.py b/models/Model_DEV_FANSEG/Model.py deleted file mode 100644 index afe32fb..0000000 --- a/models/Model_DEV_FANSEG/Model.py +++ /dev/null @@ -1,103 +0,0 @@ -import numpy as np - -from nnlib import nnlib, TernausNet -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_enable_autobackup=False, - ask_write_preview_history=False, - ask_target_iter=False, - ask_random_flip=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - default_face_type = 'f' - if is_first_run: - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="").lower() - else: - self.options['face_type'] = self.options.get('face_type', default_face_type) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {1.5:4, 11:48} ) - - self.resolution = 256 - self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - model_name = 'FANSeg' - self.fan_seg = TernausNet(model_name, self.resolution, - FaceType.toString(self.face_type), - load_weights=not self.is_first_run(), - weights_file_root=self.get_model_root_path(), - training=True) - - if self.is_training_mode: - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True), - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_RANDOM_HSV_SHIFT), 'resolution' : self.resolution, 'motion_blur':(25, 5), 'gaussian_blur':(25,5), 'border_replicate':False}, - { 'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_M), 'resolution': self.resolution }, - ]), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True ), - output_sample_types=[ { 'types': (t.IMG_TRANSFORMED , face_type, t.MODE_BGR_RANDOM_HSV_SHIFT), 'resolution' : self.resolution}, - ]) - ]) - - #override - def onSave(self): - self.fan_seg.save_weights() - - #override - def onTrainOneIter(self, generators_samples, generators_list): - target_src, target_src_mask = generators_samples[0] - - loss = self.fan_seg.train( target_src, target_src_mask ) - - return ( ('loss', loss), ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][0][0:4] #first 4 samples - test_Am = sample[0][1][0:4] #first 4 samples - test_B = sample[1][0][0:4] #first 4 samples - - - mAA = self.fan_seg.extract(test_A) - mBB = self.fan_seg.extract(test_B) - - test_Am = np.repeat ( test_Am, (3,), -1) - mAA = np.repeat ( mAA, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - test_Am[i], - mAA[i], - test_A[i,:,:,0:3]*mAA[i], - ), axis=1) ) - - st2 = [] - for i in range(0, len(test_B)): - st2.append ( np.concatenate ( ( - test_B[i,:,:,0:3], - mBB[i], - test_B[i,:,:,0:3]*mBB[i], - ), axis=1) ) - - return [ ('training data', np.concatenate ( st, axis=0 ) ), - ('evaluating data', np.concatenate ( st2, axis=0 ) ), - ] diff --git a/models/Model_DEV_FANSEG/__init__.py b/models/Model_DEV_FANSEG/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_DEV_FANSEG/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_DEV_FUNIT/Model.py b/models/Model_DEV_FUNIT/Model.py deleted file mode 100644 index 4a0788f..0000000 --- a/models/Model_DEV_FUNIT/Model.py +++ /dev/null @@ -1,178 +0,0 @@ -from functools import partial - -import cv2 -import numpy as np - -from facelib import FaceType -from interact import interact as io -from mathlib import get_power_of_two -from models import ModelBase -from nnlib import nnlib, FUNIT -from samplelib import * - - - -class FUNITModel(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_random_flip=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - - default_resolution = 64 - if is_first_run: - self.options['resolution'] = io.input_int(f"Resolution ( 64,96,128,224 ?:help skip:{default_resolution}) : ", default_resolution, [64,96,128,224]) - else: - self.options['resolution'] = self.options.get('resolution', default_resolution) - - default_face_type = 'mf' - if is_first_run: - self.options['face_type'] = io.input_str (f"Half or Full face? (h/mf/f, ?:help skip:{default_face_type}) : ", default_face_type, ['h','mf','f'], help_message="").lower() - else: - self.options['face_type'] = self.options.get('face_type', default_face_type) - - if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend: - def_optimizer_mode = self.options.get('optimizer_mode', 1) - self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.") - else: - self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1) - - #override - def onInitialize(self, batch_size=-1, **in_options): - exec(nnlib.code_import_all, locals(), globals()) - self.set_vram_batch_requirements({4:16,11:24}) - - resolution = self.options['resolution'] - face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - person_id_max_count = SampleGeneratorFacePerson.get_person_id_max_count(self.training_data_src_path) - - - self.model = FUNIT( face_type_str=FaceType.toString(face_type), - batch_size=self.batch_size, - encoder_nf=64, - encoder_downs=2, - encoder_res_blk=2, - class_downs=4, - class_nf=64, - class_latent=64, - mlp_blks=2, - dis_nf=64, - dis_res_blks=8,#10 - num_classes=person_id_max_count, - subpixel_decoder=True, - initialize_weights=self.is_first_run(), - is_training=self.is_training_mode, - tf_cpu_mode=self.options['optimizer_mode']-1 - ) - - if not self.is_first_run(): - self.load_weights_safe(self.model.get_model_filename_list()) - - if self.is_training_mode: - t = SampleProcessor.Types - if self.options['face_type'] == 'h': - face_type = t.FACE_TYPE_HALF - elif self.options['face_type'] == 'mf': - face_type = t.FACE_TYPE_MID_FULL - elif self.options['face_type'] == 'f': - face_type = t.FACE_TYPE_FULL - - output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh':True} ] - output_sample_types1=[ {'types': (t.IMG_SOURCE, face_type, t.MODE_BGR), 'resolution':resolution, 'normalize_tanh':True} ] - - self.set_training_data_generators ([ - SampleGeneratorFacePerson(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0] ), - output_sample_types=output_sample_types, person_id_mode=1, ), - - SampleGeneratorFacePerson(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0] ), - output_sample_types=output_sample_types, person_id_mode=1, ), - - SampleGeneratorFacePerson(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0]), - output_sample_types=output_sample_types1, person_id_mode=1, ), - - SampleGeneratorFacePerson(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=True, rotation_range=[0,0]), - output_sample_types=output_sample_types1, person_id_mode=1, ), - ]) - - #override - def get_model_filename_list(self): - return self.model.get_model_filename_list() - - #override - def onSave(self): - self.save_weights_safe(self.model.get_model_filename_list()) - - #override - def onTrainOneIter(self, generators_samples, generators_list): - xa,la = generators_samples[0] - xb,lb = generators_samples[1] - - G_loss, D_loss = self.model.train(xa,la,xb,lb) - - return ( ('G_loss', G_loss), ('D_loss', D_loss), ) - - #override - def onGetPreview(self, generators_samples): - xa = generators_samples[0][0] - xb = generators_samples[1][0] - ta = generators_samples[2][0] - tb = generators_samples[3][0] - - view_samples = min(4, xa.shape[0]) - - lines_train = [] - lines_test = [] - - for i in range(view_samples): - - s_xa = self.model.get_average_class_code([ xa[i:i+1] ])[0][None,...] - s_xb = self.model.get_average_class_code([ xb[i:i+1] ])[0][None,...] - - s_ta = self.model.get_average_class_code([ ta[i:i+1] ])[0][None,...] - s_tb = self.model.get_average_class_code([ tb[i:i+1] ])[0][None,...] - - xaxa = self.model.convert ([ xa[i:i+1], s_xa ] )[0][0] - xbxb = self.model.convert ([ xb[i:i+1], s_xb ] )[0][0] - xaxb = self.model.convert ([ xa[i:i+1], s_xb ] )[0][0] - xbxa = self.model.convert ([ xb[i:i+1], s_xa ] )[0][0] - - tata = self.model.convert ([ ta[i:i+1], s_ta ] )[0][0] - tbtb = self.model.convert ([ tb[i:i+1], s_tb ] )[0][0] - tatb = self.model.convert ([ ta[i:i+1], s_tb ] )[0][0] - tbta = self.model.convert ([ tb[i:i+1], s_ta ] )[0][0] - - line_train = [ xa[i], xaxa, xb[i], xbxb, xaxb, xbxa ] - line_test = [ ta[i], tata, tb[i], tbtb, tatb, tbta ] - - lines_train += [ np.concatenate([ np.clip(x/2+0.5,0,1) for x in line_train], axis=1) ] - lines_test += [ np.concatenate([ np.clip(x/2+0.5,0,1) for x in line_test ], axis=1) ] - - lines_train = np.concatenate ( lines_train, axis=0 ) - lines_test = np.concatenate ( lines_test, axis=0 ) - return [ ('TRAIN', lines_train ), ('TEST', lines_test) ] - - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.model.convert ([ np.zeros ( (1, self.options['resolution'], self.options['resolution'], 3), dtype=np.float32 ), self.average_class_code ]) - else: - bgr, = self.model.convert ([ face[np.newaxis,...]*2-1, self.average_class_code ]) - return bgr[0] / 2 + 0.5 - - #override - def get_ConverterConfig(self): - face_type = FaceType.FULL - - import converters - return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), converters.ConverterConfigMasked(face_type=face_type, - default_mode = 1, - clip_hborder_mask_per=0.0625 if (face_type == FaceType.FULL) else 0, - ) - - -Model = FUNITModel diff --git a/models/Model_DEV_FUNIT/__init__.py b/models/Model_DEV_FUNIT/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_DEV_FUNIT/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_DEV_POSEEST/Model.py b/models/Model_DEV_POSEEST/Model.py deleted file mode 100644 index 68114d9..0000000 --- a/models/Model_DEV_POSEEST/Model.py +++ /dev/null @@ -1,120 +0,0 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from facelib import PoseEstimator -from samplelib import * -from interact import interact as io -import imagelib - -class Model(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_enable_autobackup=False, - ask_write_preview_history=False, - ask_target_iter=False, - ask_random_flip=False) - - #override - def onInitializeOptions(self, is_first_run, ask_override): - yn_str = {True:'y',False:'n'} - - default_face_type = 'f' - if is_first_run: - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() - else: - self.options['face_type'] = self.options.get('face_type', default_face_type) - - def_train_bgr = self.options.get('train_bgr', True) - if is_first_run or ask_override: - self.options['train_bgr'] = io.input_bool ("Train bgr? (y/n, ?:help skip: %s) : " % (yn_str[def_train_bgr]), def_train_bgr) - else: - self.options['train_bgr'] = self.options.get('train_bgr', def_train_bgr) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4:64} ) - - self.resolution = 128 - self.face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - - self.pose_est = PoseEstimator(self.resolution, - FaceType.toString(self.face_type), - load_weights=not self.is_first_run(), - weights_file_root=self.get_model_root_path(), - training=True) - - if self.is_training_mode: - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, - sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, - output_sample_types=[ {'types': (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution, 'motion_blur':(25, 1) }, - {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR_SHUFFLE), 'resolution':self.resolution }, - {'types': (t.IMG_PITCH_YAW_ROLL,)} - ]), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, generators_count=4, - sample_process_options=SampleProcessor.Options( rotation_range=[0,0] ), #random_flip=True, - output_sample_types=[ {'types': (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':self.resolution }, - {'types': (t.IMG_PITCH_YAW_ROLL,)} - ]) - ]) - - #override - def onSave(self): - self.pose_est.save_weights() - - #override - def onTrainOneIter(self, generators_samples, generators_list): - target_srcw, target_src, pitch_yaw_roll = generators_samples[0] - - bgr_loss, pyr_loss = self.pose_est.train_on_batch( target_srcw, target_src, pitch_yaw_roll, skip_bgr_train=not self.options['train_bgr'] ) - - return ( ('bgr_loss', bgr_loss), ('pyr_loss', pyr_loss), ) - - #override - def onGetPreview(self, generators_samples): - test_src = generators_samples[0][1][0:4] #first 4 samples - test_pyr_src = generators_samples[0][2][0:4] - test_dst = generators_samples[1][0][0:4] - test_pyr_dst = generators_samples[1][1][0:4] - - h,w,c = self.resolution,self.resolution,3 - h_line = 13 - - result = [] - for name, img, pyr in [ ['training data', test_src, test_pyr_src], \ - ['evaluating data',test_dst, test_pyr_dst] ]: - bgr_pred, pyr_pred = self.pose_est.extract(img) - - hor_imgs = [] - for i in range(len(img)): - img_info = np.ones ( (h,w,c) ) * 0.1 - - i_pyr = pyr[i] - i_pyr_pred = pyr_pred[i] - lines = ["%.4f %.4f %.4f" % (i_pyr[0],i_pyr[1],i_pyr[2]), - "%.4f %.4f %.4f" % (i_pyr_pred[0],i_pyr_pred[1],i_pyr_pred[2]) ] - - lines_count = len(lines) - for ln in range(lines_count): - img_info[ ln*h_line:(ln+1)*h_line, 0:w] += \ - imagelib.get_text_image ( (h_line,w,c), lines[ln], color=[0.8]*c ) - - hor_imgs.append ( np.concatenate ( ( - img[i,:,:,0:3], - bgr_pred[i], - img_info - ), axis=1) ) - - - result += [ (name, np.concatenate (hor_imgs, axis=0)) ] - - return result \ No newline at end of file diff --git a/models/Model_DEV_POSEEST/__init__.py b/models/Model_DEV_POSEEST/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_DEV_POSEEST/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_DF/Model.py b/models/Model_DF/Model.py deleted file mode 100644 index 3164a93..0000000 --- a/models/Model_DF/Model.py +++ /dev/null @@ -1,169 +0,0 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoder_src, self.decoder_dst = self.Build(ae_input_layer) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - rec_src = self.decoder_src(self.encoder(ae_input_layer)) - rec_dst = self.decoder_dst(self.encoder(ae_input_layer)) - self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - - self.convert = K.function([ae_input_layer], rec_src) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('DF', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.convert ([ np.zeros ( (1, 128, 128, 3), dtype=np.float32 ) ]) - else: - x, mx = self.convert ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_ConverterConfig(self): - import converters - return self.predictor_func, (128,128,3), converters.ConverterConfigMasked(face_type=FaceType.FULL, default_mode='seamless') - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_layer): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - - x = Dense(512)(Flatten()(x)) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - - return Model(input_layer, x) - - def Decoder(): - input_ = Input(shape=(16, 16, 512)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - return Model(input_, [x,y]) - - return Encoder(input_layer), Decoder(), Decoder() diff --git a/models/Model_DF/__init__.py b/models/Model_DF/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_DF/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_H128/Model.py b/models/Model_H128/Model.py deleted file mode 100644 index 870780d..0000000 --- a/models/Model_H128/Model.py +++ /dev/null @@ -1,203 +0,0 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") - else: - default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future - if 'created_vram_gb' in self.options.keys(): - self.options.pop ('created_vram_gb') - self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) - - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {2.5:4} ) - - bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build( self.options['lighter_ae'] ) - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - input_src_bgr = Input(bgr_shape) - input_src_mask = Input(mask_shape) - input_dst_bgr = Input(bgr_shape) - input_dst_mask = Input(mask_shape) - - rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) - rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) - - self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) - - self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), - loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) - - self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) - self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ), - output_sample_types=output_sample_types ), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types ) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_mask, warped_dst, target_dst_mask], [target_src, target_src_mask, target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.src_view([test_A]) - AB, mAB = self.src_view([test_B]) - BB, mBB = self.dst_view([test_B]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('H128', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.src_view ([ np.zeros ( (1, 128, 128, 3), dtype=np.float32 ) ]) - else: - x, mx = self.src_view ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_ConverterConfig(self): - import converters - return self.predictor_func, (128,128,3), converters.ConverterConfigMasked(face_type=FaceType.HALF, default_mode='seamless') - - def Build(self, lighter_ae): - exec(nnlib.code_import_all, locals(), globals()) - - bgr_shape = (128, 128, 3) - mask_shape = (128, 128, 1) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_shape): - input_layer = Input(input_shape) - x = input_layer - if not lighter_ae: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(512)(Flatten()(x)) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - else: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(256)(Flatten()(x)) - x = Dense(8 * 8 * 256)(x) - x = Reshape((8, 8, 256))(x) - x = upscale(256)(x) - - return Model(input_layer, x) - - def Decoder(): - if not lighter_ae: - input_ = Input(shape=(16, 16, 512)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - else: - input_ = Input(shape=(16, 16, 256)) - x = input_ - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - - y = input_ #mask decoder - y = upscale(256)(y) - y = upscale(128)(y) - y = upscale(64)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - - return Model(input_, [x,y]) - - return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() diff --git a/models/Model_H128/__init__.py b/models/Model_H128/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_H128/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_H64/Model.py b/models/Model_H64/Model.py deleted file mode 100644 index 2e7142c..0000000 --- a/models/Model_H64/Model.py +++ /dev/null @@ -1,200 +0,0 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run: - self.options['lighter_ae'] = io.input_bool ("Use lightweight autoencoder? (y/n, ?:help skip:n) : ", False, help_message="Lightweight autoencoder is faster, requires less VRAM, sacrificing overall quality. If your GPU VRAM <= 4, you should to choose this option.") - else: - default_lighter_ae = self.options.get('created_vram_gb', 99) <= 4 #temporally support old models, deprecate in future - if 'created_vram_gb' in self.options.keys(): - self.options.pop ('created_vram_gb') - self.options['lighter_ae'] = self.options.get('lighter_ae', default_lighter_ae) - - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {1.5:4} ) - - - bgr_shape, mask_shape, self.encoder, self.decoder_src, self.decoder_dst = self.Build(self.options['lighter_ae']) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder , 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] - ] - self.load_weights_safe(weights_to_load) - - input_src_bgr = Input(bgr_shape) - input_src_mask = Input(mask_shape) - input_dst_bgr = Input(bgr_shape) - input_dst_mask = Input(mask_shape) - - rec_src_bgr, rec_src_mask = self.decoder_src( self.encoder(input_src_bgr) ) - rec_dst_bgr, rec_dst_mask = self.decoder_dst( self.encoder(input_dst_bgr) ) - - self.ae = Model([input_src_bgr,input_src_mask,input_dst_bgr,input_dst_mask], [rec_src_bgr, rec_src_mask, rec_dst_bgr, rec_dst_mask] ) - - self.ae.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[ DSSIMMSEMaskLoss(input_src_mask, is_mse=self.options['pixel_loss']), 'mae', DSSIMMSEMaskLoss(input_dst_mask, is_mse=self.options['pixel_loss']), 'mae' ] ) - - self.src_view = K.function([input_src_bgr],[rec_src_bgr, rec_src_mask]) - self.dst_view = K.function([input_dst_bgr],[rec_dst_bgr, rec_dst_mask]) - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_BGR), 'resolution':64}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_HALF, t.MODE_M), 'resolution':64} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_full_mask = sample[0] - warped_dst, target_dst, target_dst_full_mask = sample[1] - - total, loss_src_bgr, loss_src_mask, loss_dst_bgr, loss_dst_mask = self.ae.train_on_batch( [warped_src, target_src_full_mask, warped_dst, target_dst_full_mask], [target_src, target_src_full_mask, target_dst, target_dst_full_mask] ) - - return ( ('loss_src', loss_src_bgr), ('loss_dst', loss_dst_bgr) ) - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.src_view([test_A]) - AB, mAB = self.src_view([test_B]) - BB, mBB = self.dst_view([test_B]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('H64', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.src_view ([ np.zeros ( (1, 64, 64, 3), dtype=np.float32 ) ]) - else: - x, mx = self.src_view ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_ConverterConfig(self): - import converters - return self.predictor_func, (64,64,3), converters.ConverterConfigMasked(face_type=FaceType.HALF, default_mode='seamless') - - def Build(self, lighter_ae): - exec(nnlib.code_import_all, locals(), globals()) - - bgr_shape = (64, 64, 3) - mask_shape = (64, 64, 1) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(input_shape): - input_layer = Input(input_shape) - x = input_layer - if not lighter_ae: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Dense(1024)(Flatten()(x)) - x = Dense(4 * 4 * 1024)(x) - x = Reshape((4, 4, 1024))(x) - x = upscale(512)(x) - else: - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(768)(x) - x = Dense(512)(Flatten()(x)) - x = Dense(4 * 4 * 512)(x) - x = Reshape((4, 4, 512))(x) - x = upscale(256)(x) - return Model(input_layer, x) - - def Decoder(): - if not lighter_ae: - input_ = Input(shape=(8, 8, 512)) - x = input_ - - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - - else: - input_ = Input(shape=(8, 8, 256)) - - x = input_ - x = upscale(256)(x) - x = upscale(128)(x) - x = upscale(64)(x) - - y = input_ #mask decoder - y = upscale(256)(y) - y = upscale(128)(y) - y = upscale(64)(y) - - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid')(y) - - return Model(input_, [x,y]) - - return bgr_shape, mask_shape, Encoder(bgr_shape), Decoder(), Decoder() diff --git a/models/Model_H64/__init__.py b/models/Model_H64/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_H64/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_LIAEF128/Model.py b/models/Model_LIAEF128/Model.py deleted file mode 100644 index fc81c93..0000000 --- a/models/Model_LIAEF128/Model.py +++ /dev/null @@ -1,178 +0,0 @@ -import numpy as np - -from nnlib import nnlib -from models import ModelBase -from facelib import FaceType -from samplelib import * -from interact import interact as io - -class Model(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool ("Use pixel loss? (y/n, ?:help skip: n/default ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time.") - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements( {4.5:4} ) - - ae_input_layer = Input(shape=(128, 128, 3)) - mask_layer = Input(shape=(128, 128, 1)) #same as output - - self.encoder, self.decoder, self.inter_B, self.inter_AB = self.Build(ae_input_layer) - - if not self.is_first_run(): - weights_to_load = [ [self.encoder, 'encoder.h5'], - [self.decoder, 'decoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5'] - ] - self.load_weights_safe(weights_to_load) - - code = self.encoder(ae_input_layer) - AB = self.inter_AB(code) - B = self.inter_B(code) - rec_src = self.decoder(Concatenate()([AB, AB])) - rec_dst = self.decoder(Concatenate()([B, AB])) - self.autoencoder_src = Model([ae_input_layer,mask_layer], rec_src ) - self.autoencoder_dst = Model([ae_input_layer,mask_layer], rec_dst ) - - self.autoencoder_src.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - self.autoencoder_dst.compile(optimizer=Adam(lr=5e-5, beta_1=0.5, beta_2=0.999), loss=[DSSIMMSEMaskLoss(mask_layer, is_mse=self.options['pixel_loss']), 'mse'] ) - - self.convert = K.function([ae_input_layer],rec_src) - - - if self.is_training_mode: - t = SampleProcessor.Types - output_sample_types=[ { 'types': (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution':128}, - { 'types': (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution':128} ] - - self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ), - output_sample_types=output_sample_types), - - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), - output_sample_types=output_sample_types) - ]) - - #override - def get_model_filename_list(self): - return [[self.encoder, 'encoder.h5'], - [self.decoder, 'decoder.h5'], - [self.inter_B, 'inter_B.h5'], - [self.inter_AB, 'inter_AB.h5']] - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, sample, generators_list): - warped_src, target_src, target_src_mask = sample[0] - warped_dst, target_dst, target_dst_mask = sample[1] - - loss_src = self.autoencoder_src.train_on_batch( [warped_src, target_src_mask], [target_src, target_src_mask] ) - loss_dst = self.autoencoder_dst.train_on_batch( [warped_dst, target_dst_mask], [target_dst, target_dst_mask] ) - - return ( ('loss_src', loss_src[0]), ('loss_dst', loss_dst[0]) ) - - - #override - def onGetPreview(self, sample): - test_A = sample[0][1][0:4] #first 4 samples - test_A_m = sample[0][2][0:4] #first 4 samples - test_B = sample[1][1][0:4] - test_B_m = sample[1][2][0:4] - - AA, mAA = self.autoencoder_src.predict([test_A, test_A_m]) - AB, mAB = self.autoencoder_src.predict([test_B, test_B_m]) - BB, mBB = self.autoencoder_dst.predict([test_B, test_B_m]) - - mAA = np.repeat ( mAA, (3,), -1) - mAB = np.repeat ( mAB, (3,), -1) - mBB = np.repeat ( mBB, (3,), -1) - - st = [] - for i in range(0, len(test_A)): - st.append ( np.concatenate ( ( - test_A[i,:,:,0:3], - AA[i], - #mAA[i], - test_B[i,:,:,0:3], - BB[i], - #mBB[i], - AB[i], - #mAB[i] - ), axis=1) ) - - return [ ('LIAEF128', np.concatenate ( st, axis=0 ) ) ] - - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.convert ([ np.zeros ( (1, 128, 128, 3), dtype=np.float32 ) ]) - else: - x, mx = self.convert ( [ face[np.newaxis,...] ] ) - return x[0], mx[0][...,0] - - #override - def get_ConverterConfig(self): - import converters - return self.predictor_func, (128,128,3), converters.ConverterConfigMasked(face_type=FaceType.FULL, default_mode='seamless') - - def Build(self, input_layer): - exec(nnlib.code_import_all, locals(), globals()) - - def downscale (dim): - def func(x): - return LeakyReLU(0.1)(Conv2D(dim, 5, strides=2, padding='same')(x)) - return func - - def upscale (dim): - def func(x): - return PixelShuffler()(LeakyReLU(0.1)(Conv2D(dim * 4, 3, strides=1, padding='same')(x))) - return func - - def Encoder(): - x = input_layer - x = downscale(128)(x) - x = downscale(256)(x) - x = downscale(512)(x) - x = downscale(1024)(x) - x = Flatten()(x) - return Model(input_layer, x) - - def Intermediate(): - input_layer = Input(shape=(None, 8 * 8 * 1024)) - x = input_layer - x = Dense(256)(x) - x = Dense(8 * 8 * 512)(x) - x = Reshape((8, 8, 512))(x) - x = upscale(512)(x) - return Model(input_layer, x) - - def Decoder(): - input_ = Input(shape=(16, 16, 1024)) - x = input_ - x = upscale(512)(x) - x = upscale(256)(x) - x = upscale(128)(x) - x = Conv2D(3, kernel_size=5, padding='same', activation='sigmoid')(x) - - y = input_ #mask decoder - y = upscale(512)(y) - y = upscale(256)(y) - y = upscale(128)(y) - y = Conv2D(1, kernel_size=5, padding='same', activation='sigmoid' )(y) - - return Model(input_, [x,y]) - - return Encoder(), Decoder(), Intermediate(), Intermediate() diff --git a/models/Model_LIAEF128/__init__.py b/models/Model_LIAEF128/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_LIAEF128/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_Quick96/Model.py b/models/Model_Quick96/Model.py index 6b66041..4d18710 100644 --- a/models/Model_Quick96/Model.py +++ b/models/Model_Quick96/Model.py @@ -1,261 +1,503 @@ +import multiprocessing from functools import partial import numpy as np -import mathlib +from core import mathlib +from core.interact import interact as io +from core.leras import nn from facelib import FaceType -from interact import interact as io from models import ModelBase -from nnlib import nnlib from samplelib import * - -class Quick96Model(ModelBase): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs, - ask_enable_autobackup=False, - ask_write_preview_history=False, - ask_target_iter=True, - ask_batch_size=False, - ask_random_flip=False) - +class QModel(ModelBase): #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements({1.5:2,2:4}) + def on_initialize(self): + nn.initialize() + tf = nn.tf + + conv_kernel_initializer = nn.initializers.ca + + class Downscale(nn.ModelBase): + def __init__(self, in_ch, out_ch, kernel_size=5, dilations=1, subpixel=True, use_activator=True, *kwargs ): + self.in_ch = in_ch + self.out_ch = out_ch + self.kernel_size = kernel_size + self.dilations = dilations + self.subpixel = subpixel + self.use_activator = use_activator + super().__init__(*kwargs) + + def on_build(self, *args, **kwargs ): + self.conv1 = nn.Conv2D( self.in_ch, + self.out_ch // (4 if self.subpixel else 1), + kernel_size=self.kernel_size, + strides=1 if self.subpixel else 2, + padding='SAME', dilations=self.dilations, kernel_initializer=conv_kernel_initializer ) + + def forward(self, x): + x = self.conv1(x) + + if self.subpixel: + x = tf.nn.space_to_depth(x, 2) + + if self.use_activator: + x = tf.nn.leaky_relu(x, 0.2) + return x + + def get_out_ch(self): + return (self.out_ch // 4) * 4 + + class DownscaleBlock(nn.ModelBase): + def on_build(self, in_ch, ch, n_downscales, kernel_size, dilations=1, subpixel=True): + self.downs = [] + + last_ch = in_ch + for i in range(n_downscales): + cur_ch = ch*( min(2**i, 8) ) + self.downs.append ( Downscale(last_ch, cur_ch, kernel_size=kernel_size, dilations=dilations, subpixel=subpixel) ) + last_ch = self.downs[-1].get_out_ch() + + def forward(self, inp): + x = inp + for down in self.downs: + x = down(x) + return x + + class Upscale(nn.ModelBase): + def on_build(self, in_ch, out_ch, kernel_size=3 ): + self.conv1 = nn.Conv2D( in_ch, out_ch*4, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer) + + def forward(self, x): + x = self.conv1(x) + x = tf.nn.leaky_relu(x, 0.2) + x = tf.nn.depth_to_space(x, 2) + return x + + class UpdownResidualBlock(nn.ModelBase): + def on_build(self, ch, inner_ch, kernel_size=3 ): + self.up = Upscale (ch, inner_ch, kernel_size=kernel_size) + self.res = ResidualBlock (inner_ch, kernel_size=kernel_size) + self.down = Downscale (inner_ch, ch, kernel_size=kernel_size, use_activator=False) + + def forward(self, inp): + x = self.up(inp) + x = upx = self.res(x) + x = self.down(x) + x = x + inp + x = tf.nn.leaky_relu(x, 0.2) + return x, upx + + class ResidualBlock(nn.ModelBase): + def on_build(self, ch, kernel_size=3 ): + self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer) + self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer) + + def forward(self, inp): + x = self.conv1(inp) + x = tf.nn.leaky_relu(x, 0.2) + x = self.conv2(x) + x = inp + x + x = tf.nn.leaky_relu(x, 0.2) + return x + + class Encoder(nn.ModelBase): + def on_build(self, in_ch, e_ch): + self.down1 = DownscaleBlock(in_ch, e_ch, n_downscales=4, kernel_size=5) + def forward(self, inp): + return nn.tf_flatten(self.down1(inp)) + + class Inter(nn.ModelBase): + def __init__(self, in_ch, lowest_dense_res, ae_ch, ae_out_ch, d_ch, **kwargs): + self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch, self.d_ch = in_ch, lowest_dense_res, ae_ch, ae_out_ch, d_ch + super().__init__(**kwargs) + + def on_build(self): + in_ch, lowest_dense_res, ae_ch, ae_out_ch, d_ch = self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch, self.d_ch + + self.dense1 = nn.Dense( in_ch, ae_ch, kernel_initializer=tf.initializers.orthogonal ) + self.dense2 = nn.Dense( ae_ch, lowest_dense_res * lowest_dense_res * ae_out_ch, maxout_features=2, kernel_initializer=tf.initializers.orthogonal ) + self.upscale1 = Upscale(ae_out_ch, d_ch*8) + self.res1 = ResidualBlock(d_ch*8) + + def forward(self, inp): + x = self.dense1(inp) + x = self.dense2(x) + x = tf.reshape (x, (-1, lowest_dense_res, lowest_dense_res, self.ae_out_ch)) + x = self.upscale1(x) + x = self.res1(x) + return x + + def get_out_ch(self): + return self.ae_out_ch + + class Decoder(nn.ModelBase): + def on_build(self, in_ch, d_ch): + self.upscale1 = Upscale(in_ch, d_ch*4) + + self.res1 = UpdownResidualBlock(d_ch*4, d_ch*2) + self.upscale2 = Upscale(d_ch*4, d_ch*2) + self.res2 = UpdownResidualBlock(d_ch*2, d_ch) + self.upscale3 = Upscale(d_ch*2, d_ch*1) + self.res3 = UpdownResidualBlock(d_ch, d_ch//2) + + self.upscalem1 = Upscale(in_ch, d_ch) + self.upscalem2 = Upscale(d_ch, d_ch//2) + self.upscalem3 = Upscale(d_ch//2, d_ch//2) + + self.out_conv = nn.Conv2D( d_ch*1, 3, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer) + self.out_convm = nn.Conv2D( d_ch//2, 1, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer) + + def forward(self, inp): + z = inp + + x = self.upscale1(z) + x, upx = self.res1(x) + + x = self.upscale2(x) + x = tf.nn.leaky_relu(x + upx, 0.2) + x, upx = self.res2(x) + + x = self.upscale3(x) + x = tf.nn.leaky_relu(x + upx, 0.2) + x, upx = self.res3(x) + + """ + x = self.upscale1 (z) + x = self.res1 (x) + x = self.upscale2 (x) + x = self.res2 (x) + x = self.upscale3 (x) + x = self.res3 (x) + """ + + y = self.upscalem1 (z) + y = self.upscalem2 (y) + y = self.upscalem3 (y) + + return tf.nn.sigmoid(self.out_conv(x)), \ + tf.nn.sigmoid(self.out_convm(y)) + + device_config = nn.getCurrentDeviceConfig() + devices = device_config.devices resolution = self.resolution = 96 + ae_dims = 128 + e_dims = 128 + d_dims = 64 + self.pretrain = True + self.pretrain_just_disabled = False - class CommonModel(object): - def downscale (self, dim, kernel_size=5, dilation_rate=1): - def func(x): - return SubpixelDownscaler()(ELU()(Conv2D(dim // 4, kernel_size=kernel_size, strides=1, dilation_rate=dilation_rate, padding='same')(x))) - return func + masked_training = True - def upscale (self, dim, size=(2,2)): - def func(x): - return SubpixelUpscaler(size=size)(ELU()(Conv2D(dim * np.prod(size) , kernel_size=3, strides=1, padding='same')(x))) - return func + models_opt_on_gpu = len(devices) == 1 and devices[0].total_mem_gb >= 4 + models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0' + optimizer_vars_on_cpu = models_opt_device=='/CPU:0' - def ResidualBlock(self, dim): - def func(inp): - x = Conv2D(dim, kernel_size=3, padding='same')(inp) - x = LeakyReLU(0.2)(x) - x = Conv2D(dim, kernel_size=3, padding='same')(x) - x = Add()([x, inp]) - x = LeakyReLU(0.2)(x) - return x - return func + input_nc = 3 + output_nc = 3 + bgr_shape = (resolution, resolution, output_nc) + mask_shape = (resolution, resolution, 1) + lowest_dense_res = resolution // 16 - class QModel(CommonModel): - def __init__(self, resolution, ae_dims, e_dims, d_dims): - super().__init__() - bgr_shape = (resolution, resolution, 3) - mask_shape = (resolution, resolution, 1) - lowest_dense_res = resolution // 16 + self.model_filename_list = [] - def enc_flow(): - def func(inp): - x = self.downscale(e_dims, 3, 1 )(inp) - x = self.downscale(e_dims*2, 3, 1 )(x) - x = self.downscale(e_dims*4, 3, 1 )(x) - x0 = self.downscale(e_dims*8, 3, 1 )(x) - - x = self.downscale(e_dims, 3, 2 )(inp) - x = self.downscale(e_dims*2, 3, 2 )(x) - x = self.downscale(e_dims*4, 3, 2 )(x) - x1 = self.downscale(e_dims*8, 3, 2 )(x) - - x = Concatenate()([x0,x1]) - - x = DenseMaxout(ae_dims, kernel_initializer='orthogonal')(Flatten()(x)) - x = DenseMaxout(lowest_dense_res * lowest_dense_res * ae_dims, kernel_initializer='orthogonal')(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x) - - x = self.ResidualBlock(ae_dims)(x) - x = self.upscale(d_dims*8)(x) - x = self.ResidualBlock(d_dims*8)(x) - return x - return func - def dec_flow(): - def func(inp): - x = self.upscale(d_dims*4)(inp) - x = self.ResidualBlock(d_dims*4)(x) - x = self.upscale(d_dims*2)(x) - x = self.ResidualBlock(d_dims*2)(x) - x = self.upscale(d_dims)(x) - x = self.ResidualBlock(d_dims)(x) - - y = self.upscale(d_dims)(inp) - y = self.upscale(d_dims//2)(y) - y = self.upscale(d_dims//4)(y) - - return Conv2D(3, kernel_size=1, padding='same', activation='tanh')(x), \ - Conv2D(1, kernel_size=1, padding='same', activation='sigmoid')(y) + with tf.device ('/CPU:0'): + #Place holders on CPU + self.warped_src = tf.placeholder (tf.float32, (None,)+bgr_shape) + self.warped_dst = tf.placeholder (tf.float32, (None,)+bgr_shape) - return func + self.target_src = tf.placeholder (tf.float32, (None,)+bgr_shape) + self.target_dst = tf.placeholder (tf.float32, (None,)+bgr_shape) - self.encoder = modelify(enc_flow()) ( Input(bgr_shape) ) + self.target_srcm = tf.placeholder (tf.float32, (None,)+mask_shape) + self.target_dstm = tf.placeholder (tf.float32, (None,)+mask_shape) - sh = K.int_shape( self.encoder.outputs[0] )[1:] - self.decoder_src = modelify(dec_flow()) ( Input(sh) ) - self.decoder_dst = modelify(dec_flow()) ( Input(sh) ) + # Initializing model classes + with tf.device (models_opt_device): + self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, name='encoder') + encoder_out_ch = self.encoder.compute_output_shape ( (tf.float32, (None,resolution,resolution,input_nc)))[-1] - self.src_trainable_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights - self.dst_trainable_weights = self.encoder.trainable_weights + self.decoder_dst.trainable_weights + self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, d_ch=d_dims, name='inter') + inter_out_ch = self.inter.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1] - self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape) - self.target_src, self.target_dst = Input(bgr_shape), Input(bgr_shape) - self.target_srcm, self.target_dstm = Input(mask_shape), Input(mask_shape) - - self.src_code = self.encoder(self.warped_src) - self.dst_code = self.encoder(self.warped_dst) + self.decoder_src = Decoder(in_ch=inter_out_ch, d_ch=d_dims, name='decoder_src') + self.decoder_dst = Decoder(in_ch=inter_out_ch, d_ch=d_dims, name='decoder_dst') - self.pred_src_src, self.pred_src_srcm = self.decoder_src(self.src_code) - self.pred_dst_dst, self.pred_dst_dstm = self.decoder_dst(self.dst_code) - self.pred_src_dst, self.pred_src_dstm = self.decoder_src(self.dst_code) + self.model_filename_list += [ [self.encoder, 'encoder.npy' ], + [self.inter, 'inter.npy' ], + [self.decoder_src, 'decoder_src.npy'], + [self.decoder_dst, 'decoder_dst.npy'] ] - def get_model_filename_list(self, exclude_for_pretrain=False): - ar = [] - if not exclude_for_pretrain: - ar += [ [self.encoder, 'encoder.h5'] ] - ar += [ [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] ] - - return ar + if self.is_training: + self.src_dst_trainable_weights = self.encoder.get_weights() + self.decoder_src.get_weights() + self.decoder_dst.get_weights() - self.model = QModel (resolution, 128, 64, 64) + # Initialize optimizers + self.src_dst_opt = nn.TFRMSpropOptimizer(lr=2e-4, lr_dropout=0.3, name='src_dst_opt') + self.src_dst_opt.initialize_variables(self.src_dst_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu ) + self.model_filename_list += [ (self.src_dst_opt, 'src_dst_opt.npy') ] - loaded, not_loaded = [], self.model.get_model_filename_list() - if not self.is_first_run(): - loaded, not_loaded = self.load_weights_safe(not_loaded) + if self.is_training: + # Adjust batch size for multiple GPU + gpu_count = max(1, len(devices) ) + bs_per_gpu = max(1, 4 // gpu_count) + self.set_batch_size( gpu_count*bs_per_gpu) - CA_models = [ model for model, _ in not_loaded ] - - self.CA_conv_weights_list = [] - for model in CA_models: - for layer in model.layers: - if type(layer) == keras.layers.Conv2D: - self.CA_conv_weights_list += [layer.weights[0]] #- is Conv2D kernel_weights - - if self.is_training_mode: - lr_dropout = 0.3 if nnlib.device.backend != 'plaidML' else 0.0 - self.src_dst_opt = RMSprop(lr=2e-4, lr_dropout=lr_dropout) - self.src_dst_mask_opt = RMSprop(lr=2e-4, lr_dropout=lr_dropout) - - target_src_masked = self.model.target_src*self.model.target_srcm - target_dst_masked = self.model.target_dst*self.model.target_dstm - - pred_src_src_masked = self.model.pred_src_src*self.model.target_srcm - pred_dst_dst_masked = self.model.pred_dst_dst*self.model.target_dstm + # Compute losses per GPU + gpu_pred_src_src_list = [] + gpu_pred_dst_dst_list = [] + gpu_pred_src_dst_list = [] + gpu_pred_src_srcm_list = [] + gpu_pred_dst_dstm_list = [] + gpu_pred_src_dstm_list = [] - src_loss = K.mean ( 10*dssim(kernel_size=int(resolution/11.6),max_value=2.0)( target_src_masked+1, pred_src_src_masked+1) ) - src_loss += K.mean ( 10*K.square( target_src_masked - pred_src_src_masked ) ) - src_loss += K.mean(K.square(self.model.target_srcm-self.model.pred_src_srcm)) + gpu_src_losses = [] + gpu_dst_losses = [] + gpu_src_dst_loss_gvs = [] - dst_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=2.0)(target_dst_masked+1, pred_dst_dst_masked+1) ) - dst_loss += K.mean( 10*K.square( target_dst_masked - pred_dst_dst_masked ) ) - dst_loss += K.mean(K.square(self.model.target_dstm-self.model.pred_dst_dstm)) + for gpu_id in range(gpu_count): + with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ): + batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu ) + with tf.device(f'/CPU:0'): + # slice on CPU, otherwise all batch data will be transfered to GPU first + gpu_warped_src = self.warped_src [batch_slice,:,:,:] + gpu_warped_dst = self.warped_dst [batch_slice,:,:,:] + gpu_target_src = self.target_src [batch_slice,:,:,:] + gpu_target_dst = self.target_dst [batch_slice,:,:,:] + gpu_target_srcm = self.target_srcm[batch_slice,:,:,:] + gpu_target_dstm = self.target_dstm[batch_slice,:,:,:] - self.src_train = K.function ([self.model.warped_src, self.model.target_src, self.model.target_srcm], [src_loss], self.src_dst_opt.get_updates( src_loss, self.model.src_trainable_weights) ) - self.dst_train = K.function ([self.model.warped_dst, self.model.target_dst, self.model.target_dstm], [dst_loss], self.src_dst_opt.get_updates( dst_loss, self.model.dst_trainable_weights) ) - self.AE_view = K.function ([self.model.warped_src, self.model.warped_dst], [self.model.pred_src_src, self.model.pred_dst_dst, self.model.pred_dst_dstm, self.model.pred_src_dst, self.model.pred_src_dstm]) + # process model tensors + gpu_src_code = self.inter(self.encoder(gpu_warped_src)) + gpu_dst_code = self.inter(self.encoder(gpu_warped_dst)) + gpu_pred_src_src, gpu_pred_src_srcm = self.decoder_src(gpu_src_code) + gpu_pred_dst_dst, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code) + gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code) + + gpu_pred_src_src_list.append(gpu_pred_src_src) + gpu_pred_dst_dst_list.append(gpu_pred_dst_dst) + gpu_pred_src_dst_list.append(gpu_pred_src_dst) + + gpu_pred_src_srcm_list.append(gpu_pred_src_srcm) + gpu_pred_dst_dstm_list.append(gpu_pred_dst_dstm) + gpu_pred_src_dstm_list.append(gpu_pred_src_dstm) + + gpu_target_srcm_blur = nn.tf_gaussian_blur(gpu_target_srcm, max(1, resolution // 32) ) + gpu_target_dstm_blur = nn.tf_gaussian_blur(gpu_target_dstm, max(1, resolution // 32) ) + + gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur + gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur) + + gpu_target_srcmasked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src + gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst + + gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src + gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst + + gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur + gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur) + + gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) + gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3]) + gpu_src_loss += tf.reduce_mean ( tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] ) + + gpu_dst_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1]) + gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3]) + gpu_dst_loss += tf.reduce_mean ( tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] ) + + gpu_src_losses += [gpu_src_loss] + gpu_dst_losses += [gpu_dst_loss] + + gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss + gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ] + + + # Average losses and gradients, and create optimizer update ops + with tf.device (models_opt_device): + if gpu_count == 1: + pred_src_src = gpu_pred_src_src_list[0] + pred_dst_dst = gpu_pred_dst_dst_list[0] + pred_src_dst = gpu_pred_src_dst_list[0] + pred_src_srcm = gpu_pred_src_srcm_list[0] + pred_dst_dstm = gpu_pred_dst_dstm_list[0] + pred_src_dstm = gpu_pred_src_dstm_list[0] + + src_loss = gpu_src_losses[0] + dst_loss = gpu_dst_losses[0] + src_dst_loss_gv = gpu_src_dst_loss_gvs[0] + else: + pred_src_src = tf.concat(gpu_pred_src_src_list, 0) + pred_dst_dst = tf.concat(gpu_pred_dst_dst_list, 0) + pred_src_dst = tf.concat(gpu_pred_src_dst_list, 0) + pred_src_srcm = tf.concat(gpu_pred_src_srcm_list, 0) + pred_dst_dstm = tf.concat(gpu_pred_dst_dstm_list, 0) + pred_src_dstm = tf.concat(gpu_pred_src_dstm_list, 0) + + src_loss = nn.tf_average_tensor_list(gpu_src_losses) + dst_loss = nn.tf_average_tensor_list(gpu_dst_losses) + src_dst_loss_gv = nn.tf_average_gv_list (gpu_src_dst_loss_gvs) + + src_dst_loss_gv_op = self.src_dst_opt.get_update_op (src_dst_loss_gv) + + # Initializing training and view functions + def src_dst_train(warped_src, target_src, target_srcm, \ + warped_dst, target_dst, target_dstm): + s, d, _ = nn.tf_sess.run ( [ src_loss, dst_loss, src_dst_loss_gv_op], + feed_dict={self.warped_src :warped_src, + self.target_src :target_src, + self.target_srcm:target_srcm, + self.warped_dst :warped_dst, + self.target_dst :target_dst, + self.target_dstm:target_dstm, + }) + s = np.mean(s) + d = np.mean(d) + return s, d + self.src_dst_train = src_dst_train + + def AE_view(warped_src, warped_dst): + return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm], + feed_dict={self.warped_src:warped_src, + self.warped_dst:warped_dst}) + + self.AE_view = AE_view else: - self.AE_convert = K.function ([self.model.warped_dst],[ self.model.pred_src_dst, self.model.pred_dst_dstm, self.model.pred_src_dstm ]) + # Initializing merge function + with tf.device( f'/GPU:0' if len(devices) != 0 else f'/CPU:0'): + gpu_dst_code = self.inter(self.encoder(self.warped_dst)) + gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code) + _, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code) - if self.is_training_mode: + def AE_merge( warped_dst): + return nn.tf_sess.run ( [gpu_pred_src_dst, gpu_pred_dst_dstm, gpu_pred_src_dstm], feed_dict={self.warped_dst:warped_dst}) + + self.AE_merge = AE_merge + + + + + # Loading/initializing all models/optimizers weights + for model, filename in io.progress_bar_generator(self.model_filename_list, "Initializing models"): + do_init = self.is_first_run() + + if self.pretrain_just_disabled: + if model == self.inter: + do_init = True + + if not do_init: + do_init = not model.load_weights( self.get_strpath_storage_for_file(filename) ) + + if do_init and self.pretrained_model_path is not None: + pretrained_filepath = self.pretrained_model_path / filename + if pretrained_filepath.exists(): + do_init = not model.load_weights(pretrained_filepath) + + if do_init: + model.init_weights() + + # initializing sample generators + + if self.is_training: t = SampleProcessor.Types + face_type = t.FACE_TYPE_FULL + + training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path() + training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path() + + cpu_count = multiprocessing.cpu_count() + + src_generators_count = cpu_count // 2 + dst_generators_count = cpu_count - src_generators_count self.set_training_data_generators ([ - SampleGeneratorFace(self.training_data_src_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=False, scale_range=np.array([-0.05, 0.05]) ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True }, - {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True }, - {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution': resolution } ] - ), + SampleGeneratorFace(training_data_src_path, debug=self.is_debug(), batch_size=self.get_batch_size(), + sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False), + output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':resolution, }, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution, }, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ], + generators_count=src_generators_count ), - SampleGeneratorFace(self.training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=False, ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True }, - {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_BGR), 'resolution': resolution, 'normalize_tanh':True }, - {'types' : (t.IMG_TRANSFORMED, t.FACE_TYPE_FULL, t.MODE_M), 'resolution': resolution} ]) + SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(), + sample_process_options=SampleProcessor.Options(random_flip=True if self.pretrain else False), + output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t.MODE_BGR), 'resolution':resolution}, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution}, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ], + generators_count=dst_generators_count ) ]) - self.counter = 0 - + + self.last_samples = None + #override def get_model_filename_list(self): - return self.model.get_model_filename_list () + return self.model_filename_list #override def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) + for model, filename in io.progress_bar_generator(self.get_model_filename_list(), "Saving", leave=False): + model.save_weights ( self.get_strpath_storage_for_file(filename) ) + #override - def on_success_train_one_iter(self): - if len(self.CA_conv_weights_list) != 0: - exec(nnlib.import_all(), locals(), globals()) - CAInitializerMP ( self.CA_conv_weights_list ) - self.CA_conv_weights_list = [] - - #override - def onTrainOneIter(self, generators_samples, generators_list): - warped_src, target_src, target_srcm = generators_samples[0] - warped_dst, target_dst, target_dstm = generators_samples[1] - - self.counter += 1 - if self.counter % 3 == 0: - src_loss, = self.src_train ([warped_src, target_src, target_srcm]) - dst_loss, = self.dst_train ([warped_dst, target_dst, target_dstm]) + def onTrainOneIter(self): + if self.get_iter() % 3 == 0 and self.last_samples is not None: + ( (warped_src, target_src, target_srcm), \ + (warped_dst, target_dst, target_dstm) ) = self.last_samples + src_loss, dst_loss = self.src_dst_train (target_src, target_src, target_srcm, + target_dst, target_dst, target_dstm) else: - src_loss, = self.src_train ([target_src, target_src, target_srcm]) - dst_loss, = self.dst_train ([target_dst, target_dst, target_dstm]) + samples = self.last_samples = self.generate_next_samples() + ( (warped_src, target_src, target_srcm), \ + (warped_dst, target_dst, target_dstm) ) = samples + src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, + warped_dst, target_dst, target_dstm) + return ( ('src_loss', src_loss), ('dst_loss', dst_loss), ) #override - def onGetPreview(self, sample): - test_S = sample[0][1][0:4] #first 4 samples - test_S_m = sample[0][2][0:4] #first 4 samples - test_D = sample[1][1][0:4] - test_D_m = sample[1][2][0:4] + def onGetPreview(self, samples): + n_samples = min(4, self.get_batch_size() ) - S, D, SS, DD, DDM, SD, SDM = [test_S,test_D] + self.AE_view ([test_S, test_D]) - S, D, SS, DD, SD, = [ np.clip(x/2+0.5, 0.0, 1.0) for x in [S, D, SS, DD, SD] ] - DDM, SDM, = [ np.clip( np.repeat (x, (3,), -1), 0, 1) for x in [DDM, SDM] ] + ( (warped_src, target_src, target_srcm), + (warped_dst, target_dst, target_dstm) ) = \ + [ [sample[0:n_samples] for sample in sample_list ] + for sample_list in samples ] + + S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ] + DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] result = [] st = [] - for i in range(len(test_S)): + for i in range(n_samples): ar = S[i], SS[i], D[i], DD[i], SD[i] st.append ( np.concatenate ( ar, axis=1) ) result += [ ('Quick96', np.concatenate (st, axis=0 )), ] - + st_m = [] - for i in range(len(test_S)): - ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) + for i in range(n_samples): + ar = S[i]*target_srcm[i], SS[i], D[i]*target_dstm[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) st_m.append ( np.concatenate ( ar, axis=1) ) result += [ ('Quick96 masked', np.concatenate (st_m, axis=0 )), ] return result - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.AE_convert ([ np.zeros ( (1, self.resolution, self.resolution, 3), dtype=np.float32 ) ]) - else: - face = face * 2 - 1 - bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]]) - bgr = bgr /2 + 0.5 - mask = mask_dst_dstm[0] * mask_src_dstm[0] - return bgr[0], mask[...,0] + def predictor_func (self, face=None): + + bgr, mask_dst_dstm, mask_src_dstm = self.AE_merge (face[np.newaxis,...]) + mask = mask_dst_dstm[0] * mask_src_dstm[0] + return bgr[0], mask[...,0] #override - def get_ConverterConfig(self): - import converters - return self.predictor_func, (self.resolution, self.resolution, 3), converters.ConverterConfigMasked(face_type=FaceType.FULL, - default_mode='seamless', clip_hborder_mask_per=0.0625) + def get_MergerConfig(self): + face_type = FaceType.FULL -Model = Quick96Model + import merger + return self.predictor_func, (self.resolution, self.resolution, 3), merger.MergerConfigMasked(face_type=face_type, + default_mode = 'overlay', + clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0, + ) + +Model = QModel diff --git a/models/Model_SAE/Model.py b/models/Model_SAE/Model.py deleted file mode 100644 index 28f5904..0000000 --- a/models/Model_SAE/Model.py +++ /dev/null @@ -1,568 +0,0 @@ -from functools import partial - -import numpy as np - -import mathlib -from facelib import FaceType -from interact import interact as io -from models import ModelBase -from nnlib import nnlib -from samplelib import * - - -#SAE - Styled AutoEncoder -class SAEModel(ModelBase): - - #override - def onInitializeOptions(self, is_first_run, ask_override): - yn_str = {True:'y',False:'n'} - - default_resolution = 128 - default_archi = 'df' - default_face_type = 'f' - - - if is_first_run: - resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") - resolution = np.clip (resolution, 64, 256) - while np.modf(resolution / 16)[0] != 0.0: - resolution -= 1 - self.options['resolution'] = resolution - - self.options['face_type'] = io.input_str ("Half or Full face? (h/f, ?:help skip:f) : ", default_face_type, ['h','f'], help_message="Half face has better resolution, but covers less area of cheeks.").lower() - else: - self.options['resolution'] = self.options.get('resolution', default_resolution) - self.options['face_type'] = self.options.get('face_type', default_face_type) - - default_learn_mask = self.options.get('learn_mask', True) - if is_first_run or ask_override: - self.options['learn_mask'] = io.input_bool ( f"Learn mask? (y/n, ?:help skip:{yn_str[default_learn_mask]} ) : " , default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted. Model with style values can be learned without mask and produce same quality result.") - else: - self.options['learn_mask'] = self.options.get('learn_mask', default_learn_mask) - - if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend: - def_optimizer_mode = self.options.get('optimizer_mode', 1) - self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.") - else: - self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1) - - if is_first_run: - self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse. - else: - self.options['archi'] = self.options.get('archi', default_archi) - - default_ae_dims = 256 if 'liae' in self.options['archi'] else 512 - default_e_ch_dims = 42 - default_d_ch_dims = default_e_ch_dims // 2 - def_ca_weights = False - - if is_first_run: - self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 ) - self.options['e_ch_dims'] = np.clip ( io.input_int("Encoder dims per channel (21-85 ?:help skip:%d) : " % (default_e_ch_dims) , default_e_ch_dims, help_message="More encoder dims help to recognize more facial features, but require more VRAM. You can fine-tune model size to fit your GPU." ), 21, 85 ) - default_d_ch_dims = self.options['e_ch_dims'] // 2 - self.options['d_ch_dims'] = np.clip ( io.input_int("Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_d_ch_dims) , default_d_ch_dims, help_message="More decoder dims help to get better details, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 ) - self.options['ca_weights'] = io.input_bool (f"Use CA weights? (y/n, ?:help skip:{yn_str[def_ca_weights]} ) : ", def_ca_weights, help_message="Initialize network with 'Convolution Aware' weights. This may help to achieve a higher accuracy model, but consumes a time at first run.") - else: - self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims) - self.options['e_ch_dims'] = self.options.get('e_ch_dims', default_e_ch_dims) - self.options['d_ch_dims'] = self.options.get('d_ch_dims', default_d_ch_dims) - self.options['ca_weights'] = self.options.get('ca_weights', def_ca_weights) - - default_face_style_power = 0.0 - default_bg_style_power = 0.0 - if is_first_run or ask_override: - def_pixel_loss = self.options.get('pixel_loss', False) - self.options['pixel_loss'] = io.input_bool (f"Use pixel loss? (y/n, ?:help skip:{yn_str[def_pixel_loss]} ) : ", def_pixel_loss, help_message="Pixel loss may help to enhance fine details and stabilize face color. Use it only if quality does not improve over time. Enabling this option too early increases the chance of model collapse.") - - default_face_style_power = default_face_style_power if is_first_run else self.options.get('face_style_power', default_face_style_power) - self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power, - help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - - default_bg_style_power = default_bg_style_power if is_first_run else self.options.get('bg_style_power', default_bg_style_power) - self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power, - help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - - default_ct_mode = self.options.get('ct_mode', 'none') - self.options['ct_mode'] = io.input_str (f"Color transfer mode apply to src faceset. ( none/rct/lct/mkl/idt/sot, ?:help skip:{default_ct_mode}) : ", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.") - - if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301 - default_clipgrad = False if is_first_run else self.options.get('clipgrad', False) - self.options['clipgrad'] = io.input_bool (f"Enable gradient clipping? (y/n, ?:help skip:{yn_str[default_clipgrad]}) : ", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") - else: - self.options['clipgrad'] = False - - else: - self.options['pixel_loss'] = self.options.get('pixel_loss', False) - self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power) - self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power) - self.options['ct_mode'] = self.options.get('ct_mode', 'none') - self.options['clipgrad'] = self.options.get('clipgrad', False) - - if is_first_run: - self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.") - else: - self.options['pretrain'] = False - - #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements({1.5:4}) - - resolution = self.options['resolution'] - learn_mask = self.options['learn_mask'] - - ae_dims = self.options['ae_dims'] - e_ch_dims = self.options['e_ch_dims'] - d_ch_dims = self.options['d_ch_dims'] - self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False) - if not self.pretrain: - self.options.pop('pretrain') - - bgr_shape = (resolution, resolution, 3) - mask_shape = (resolution, resolution, 1) - - masked_training = True - - class SAEDFModel(object): - def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask): - super().__init__() - self.learn_mask = learn_mask - - output_nc = 3 - bgr_shape = (resolution, resolution, output_nc) - mask_shape = (resolution, resolution, 1) - lowest_dense_res = resolution // 16 - e_dims = output_nc*e_ch_dims - - def upscale (dim): - def func(x): - return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x)))) - return func - - def enc_flow(e_dims, ae_dims, lowest_dense_res): - def func(x): - x = LeakyReLU(0.1)(Conv2D(e_dims, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - x = LeakyReLU(0.1)(Conv2D(e_dims*2, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - x = LeakyReLU(0.1)(Conv2D(e_dims*4, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - x = LeakyReLU(0.1)(Conv2D(e_dims*8, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - - x = Dense(ae_dims)(Flatten()(x)) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x) - x = upscale(ae_dims)(x) - return x - return func - - def dec_flow(output_nc, d_ch_dims, add_residual_blocks=True): - dims = output_nc * d_ch_dims - def ResidualBlock(dim): - def func(inp): - x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(inp)) - x = LeakyReLU(0.2)(x) - x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(x)) - x = Add()([x, inp]) - x = LeakyReLU(0.2)(x) - return x - return func - - def func(x): - x = upscale(dims*8)(x) - - if add_residual_blocks: - x = ResidualBlock(dims*8)(x) - x = ResidualBlock(dims*8)(x) - - x = upscale(dims*4)(x) - - if add_residual_blocks: - x = ResidualBlock(dims*4)(x) - x = ResidualBlock(dims*4)(x) - - x = upscale(dims*2)(x) - - if add_residual_blocks: - x = ResidualBlock(dims*2)(x) - x = ResidualBlock(dims*2)(x) - - return Conv2D(output_nc, kernel_size=5, padding='valid', activation='sigmoid')(ZeroPadding2D(2)(x)) - return func - - self.encoder = modelify(enc_flow(e_dims, ae_dims, lowest_dense_res)) ( Input(bgr_shape) ) - - sh = K.int_shape( self.encoder.outputs[0] )[1:] - self.decoder_src = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) ) - self.decoder_dst = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) ) - - if learn_mask: - self.decoder_srcm = modelify(dec_flow(1, d_ch_dims, add_residual_blocks=False)) ( Input(sh) ) - self.decoder_dstm = modelify(dec_flow(1, d_ch_dims, add_residual_blocks=False)) ( Input(sh) ) - - self.src_dst_trainable_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights - - if learn_mask: - self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights - - self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape) - src_code, dst_code = self.encoder(self.warped_src), self.encoder(self.warped_dst) - - self.pred_src_src = self.decoder_src(src_code) - self.pred_dst_dst = self.decoder_dst(dst_code) - self.pred_src_dst = self.decoder_src(dst_code) - - if learn_mask: - self.pred_src_srcm = self.decoder_srcm(src_code) - self.pred_dst_dstm = self.decoder_dstm(dst_code) - self.pred_src_dstm = self.decoder_srcm(dst_code) - - def get_model_filename_list(self, exclude_for_pretrain=False): - ar = [] - if not exclude_for_pretrain: - ar += [ [self.encoder, 'encoder.h5'] ] - ar += [ [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] ] - if self.learn_mask: - ar += [ [self.decoder_srcm, 'decoder_srcm.h5'], - [self.decoder_dstm, 'decoder_dstm.h5'] ] - return ar - - class SAELIAEModel(object): - def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask): - super().__init__() - self.learn_mask = learn_mask - - output_nc = 3 - bgr_shape = (resolution, resolution, output_nc) - mask_shape = (resolution, resolution, 1) - - e_dims = output_nc*e_ch_dims - - lowest_dense_res = resolution // 16 - - def upscale (dim): - def func(x): - return SubpixelUpscaler()(LeakyReLU(0.1)(Conv2D(dim * 4, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x)))) - return func - - def enc_flow(e_dims): - def func(x): - x = LeakyReLU(0.1)(Conv2D(e_dims, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - x = LeakyReLU(0.1)(Conv2D(e_dims*2, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - x = LeakyReLU(0.1)(Conv2D(e_dims*4, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - x = LeakyReLU(0.1)(Conv2D(e_dims*8, kernel_size=5, strides=2, padding='valid')(ZeroPadding2D(2)(x))) - x = Flatten()(x) - return x - return func - - def inter_flow(lowest_dense_res, ae_dims): - def func(x): - x = Dense(ae_dims)(x) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x) - x = upscale(ae_dims*2)(x) - return x - return func - - def dec_flow(output_nc, d_ch_dims, add_residual_blocks=True): - d_dims = output_nc*d_ch_dims - def ResidualBlock(dim): - def func(inp): - x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(inp)) - x = LeakyReLU(0.2)(x) - x = Conv2D(dim, kernel_size=3, padding='valid')(ZeroPadding2D(1)(inp)) - x = Add()([x, inp]) - x = LeakyReLU(0.2)(x) - return x - return func - - def func(x): - x = upscale(d_dims*8)(x) - - if add_residual_blocks: - x = ResidualBlock(d_dims*8)(x) - x = ResidualBlock(d_dims*8)(x) - - x = upscale(d_dims*4)(x) - - if add_residual_blocks: - x = ResidualBlock(d_dims*4)(x) - x = ResidualBlock(d_dims*4)(x) - - x = upscale(d_dims*2)(x) - - if add_residual_blocks: - x = ResidualBlock(d_dims*2)(x) - x = ResidualBlock(d_dims*2)(x) - - return Conv2D(output_nc, kernel_size=5, padding='valid', activation='sigmoid')(ZeroPadding2D(2)(x)) - return func - - self.encoder = modelify(enc_flow(e_dims)) ( Input(bgr_shape) ) - - sh = K.int_shape( self.encoder.outputs[0] )[1:] - self.inter_B = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) ) - self.inter_AB = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) ) - - sh = np.array(K.int_shape( self.inter_B.outputs[0] )[1:])*(1,1,2) - self.decoder = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) ) - - if learn_mask: - self.decoderm = modelify(dec_flow(1, d_ch_dims, add_residual_blocks=False)) ( Input(sh) ) - - self.src_dst_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights - - if learn_mask: - self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights - - self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape) - - warped_src_code = self.encoder (self.warped_src) - warped_src_inter_AB_code = self.inter_AB (warped_src_code) - warped_src_inter_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code]) - - warped_dst_code = self.encoder (self.warped_dst) - warped_dst_inter_B_code = self.inter_B (warped_dst_code) - warped_dst_inter_AB_code = self.inter_AB (warped_dst_code) - warped_dst_inter_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code]) - - warped_src_dst_inter_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code]) - - self.pred_src_src = self.decoder(warped_src_inter_code) - self.pred_dst_dst = self.decoder(warped_dst_inter_code) - self.pred_src_dst = self.decoder(warped_src_dst_inter_code) - - if learn_mask: - self.pred_src_srcm = self.decoderm(warped_src_inter_code) - self.pred_dst_dstm = self.decoderm(warped_dst_inter_code) - self.pred_src_dstm = self.decoderm(warped_src_dst_inter_code) - - def get_model_filename_list(self, exclude_for_pretrain=False): - ar = [ [self.encoder, 'encoder.h5'], - [self.inter_B, 'inter_B.h5'] ] - - if not exclude_for_pretrain: - ar += [ [self.inter_AB, 'inter_AB.h5'] ] - - ar += [ [self.decoder, 'decoder.h5'] ] - - if self.learn_mask: - ar += [ [self.decoderm, 'decoderm.h5'] ] - - return ar - - if 'df' in self.options['archi']: - self.model = SAEDFModel (resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask) - elif 'liae' in self.options['archi']: - self.model = SAELIAEModel (resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask) - - loaded, not_loaded = [], self.model.get_model_filename_list() - if not self.is_first_run(): - loaded, not_loaded = self.load_weights_safe(not_loaded) - - CA_models = [] - if self.options.get('ca_weights', False): - CA_models += [ model for model, _ in not_loaded ] - - CA_conv_weights_list = [] - for model in CA_models: - for layer in model.layers: - if type(layer) == keras.layers.Conv2D: - CA_conv_weights_list += [layer.weights[0]] #- is Conv2D kernel_weights - - if len(CA_conv_weights_list) != 0: - CAInitializerMP ( CA_conv_weights_list ) - - warped_src = self.model.warped_src - target_src = Input ( (resolution, resolution, 3) ) - target_srcm = Input ( (resolution, resolution, 1) ) - - warped_dst = self.model.warped_dst - target_dst = Input ( (resolution, resolution, 3) ) - target_dstm = Input ( (resolution, resolution, 1) ) - - target_src_sigm = target_src - target_dst_sigm = target_dst - - target_srcm_sigm = gaussian_blur( max(1, K.int_shape(target_srcm)[1] // 32) )(target_srcm) - target_dstm_sigm = gaussian_blur( max(1, K.int_shape(target_dstm)[1] // 32) )(target_dstm) - target_dstm_anti_sigm = 1.0 - target_dstm_sigm - - target_src_masked = target_src_sigm*target_srcm_sigm - target_dst_masked = target_dst_sigm*target_dstm_sigm - target_dst_anti_masked = target_dst_sigm*target_dstm_anti_sigm - - target_src_masked_opt = target_src_masked if masked_training else target_src_sigm - target_dst_masked_opt = target_dst_masked if masked_training else target_dst_sigm - - pred_src_src = self.model.pred_src_src - pred_dst_dst = self.model.pred_dst_dst - pred_src_dst = self.model.pred_src_dst - if learn_mask: - pred_src_srcm = self.model.pred_src_srcm - pred_dst_dstm = self.model.pred_dst_dstm - pred_src_dstm = self.model.pred_src_dstm - - pred_src_src_sigm = self.model.pred_src_src - pred_dst_dst_sigm = self.model.pred_dst_dst - pred_src_dst_sigm = self.model.pred_src_dst - - pred_src_src_masked = pred_src_src_sigm*target_srcm_sigm - pred_dst_dst_masked = pred_dst_dst_sigm*target_dstm_sigm - - pred_src_src_masked_opt = pred_src_src_masked if masked_training else pred_src_src_sigm - pred_dst_dst_masked_opt = pred_dst_dst_masked if masked_training else pred_dst_dst_sigm - - psd_target_dst_masked = pred_src_dst_sigm*target_dstm_sigm - psd_target_dst_anti_masked = pred_src_dst_sigm*target_dstm_anti_sigm - - if self.is_training_mode: - self.src_dst_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - self.src_dst_mask_opt = Adam(lr=5e-5, beta_1=0.5, beta_2=0.999, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - - if not self.options['pixel_loss']: - src_loss = K.mean ( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_opt, pred_src_src_masked_opt) ) - else: - src_loss = K.mean ( 50*K.square( target_src_masked_opt - pred_src_src_masked_opt ) ) - - face_style_power = self.options['face_style_power'] / 100.0 - if face_style_power != 0: - src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked, target_dst_masked ) - - bg_style_power = self.options['bg_style_power'] / 100.0 - if bg_style_power != 0: - if not self.options['pixel_loss']: - src_loss += K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked, target_dst_anti_masked )) - else: - src_loss += K.mean( (50*bg_style_power)*K.square( psd_target_dst_anti_masked - target_dst_anti_masked )) - - if not self.options['pixel_loss']: - dst_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_opt, pred_dst_dst_masked_opt) ) - else: - dst_loss = K.mean( 50*K.square( target_dst_masked_opt - pred_dst_dst_masked_opt ) ) - - self.src_dst_train = K.function ([warped_src, warped_dst, target_src, target_srcm, target_dst, target_dstm],[src_loss,dst_loss], self.src_dst_opt.get_updates(src_loss+dst_loss, self.model.src_dst_trainable_weights) ) - - if self.options['learn_mask']: - src_mask_loss = K.mean(K.square(target_srcm-pred_src_srcm)) - dst_mask_loss = K.mean(K.square(target_dstm-pred_dst_dstm)) - self.src_dst_mask_train = K.function ([warped_src, warped_dst, target_srcm, target_dstm],[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, self.model.src_dst_mask_trainable_weights ) ) - - if self.options['learn_mask']: - self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm]) - else: - self.AE_view = K.function ([warped_src, warped_dst], [pred_src_src, pred_dst_dst, pred_src_dst ]) - - else: - if self.options['learn_mask']: - self.AE_convert = K.function ([warped_dst],[ pred_src_dst, pred_dst_dstm, pred_src_dstm ]) - else: - self.AE_convert = K.function ([warped_dst],[ pred_src_dst ]) - - - if self.is_training_mode: - t = SampleProcessor.Types - face_type = t.FACE_TYPE_FULL if self.options['face_type'] == 'f' else t.FACE_TYPE_HALF - - t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE - - training_data_src_path = self.training_data_src_path - training_data_dst_path = self.training_data_dst_path - - if self.pretrain and self.pretraining_data_path is not None: - training_data_src_path = self.pretraining_data_path - training_data_dst_path = self.pretraining_data_path - - self.set_training_data_generators ([ - SampleGeneratorFace(training_data_src_path, random_ct_samples_path=training_data_dst_path if self.options['ct_mode'] != 'none' else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution, 'ct_mode': self.options['ct_mode'] }, - {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution, 'ct_mode': self.options['ct_mode'] }, - {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ] - ), - - SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ), - output_sample_types = [ {'types' : (t.IMG_WARPED_TRANSFORMED, face_type, t_mode_bgr), 'resolution':resolution}, - {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution}, - {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ]) - ]) - - #override - def get_model_filename_list(self): - ar = self.model.get_model_filename_list ( exclude_for_pretrain=(self.pretrain and self.iter != 0) ) - return ar - - #override - def onSave(self): - self.save_weights_safe( self.get_model_filename_list() ) - - #override - def onTrainOneIter(self, generators_samples, generators_list): - warped_src, target_src, target_srcm = generators_samples[0] - warped_dst, target_dst, target_dstm = generators_samples[1] - - feed = [warped_src, warped_dst, target_src, target_srcm, target_dst, target_dstm] - - src_loss, dst_loss, = self.src_dst_train (feed) - - if self.options['learn_mask']: - feed = [ warped_src, warped_dst, target_srcm, target_dstm ] - src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed) - - return ( ('src_loss', src_loss), ('dst_loss', dst_loss), ) - - #override - def onGetPreview(self, sample): - test_S = sample[0][1][0:4] #first 4 samples - test_S_m = sample[0][2][0:4] #first 4 samples - test_D = sample[1][1][0:4] - test_D_m = sample[1][2][0:4] - - if self.options['learn_mask']: - S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] - DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] - else: - S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] - - result = [] - st = [] - for i in range(len(test_S)): - ar = S[i], SS[i], D[i], DD[i], SD[i] - - st.append ( np.concatenate ( ar, axis=1) ) - - result += [ ('SAE', np.concatenate (st, axis=0 )), ] - - if self.options['learn_mask']: - st_m = [] - for i in range(len(test_S)): - ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) - st_m.append ( np.concatenate ( ar, axis=1) ) - - result += [ ('SAE masked', np.concatenate (st_m, axis=0 )), ] - - return result - - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.AE_convert ([ np.zeros ( (1, self.options['resolution'], self.options['resolution'], 3), dtype=np.float32 ) ]) - else: - if self.options['learn_mask']: - bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]]) - mask = mask_dst_dstm[0] * mask_src_dstm[0] - return bgr[0], mask[...,0] - else: - bgr, = self.AE_convert ([face[np.newaxis,...]]) - return bgr[0] - - #override - def get_ConverterConfig(self): - face_type = FaceType.FULL if self.options['face_type'] == 'f' else FaceType.HALF - - import converters - return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), converters.ConverterConfigMasked(face_type=face_type, - default_mode = 'overlay' if self.options['ct_mode'] != 'none' or self.options['face_style_power'] or self.options['bg_style_power'] else 'seamless', - clip_hborder_mask_per=0.0625 if (self.options['face_type'] == 'f') else 0, - ) - -Model = SAEModel diff --git a/models/Model_SAE/__init__.py b/models/Model_SAE/__init__.py deleted file mode 100644 index 0188f11..0000000 --- a/models/Model_SAE/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .Model import Model diff --git a/models/Model_SAEHD/Model.py b/models/Model_SAEHD/Model.py index 180b2a1..67a19f3 100644 --- a/models/Model_SAEHD/Model.py +++ b/models/Model_SAEHD/Model.py @@ -1,524 +1,666 @@ +import multiprocessing from functools import partial import numpy as np -import mathlib +from core import mathlib +from core.interact import interact as io +from core.leras import nn from facelib import FaceType -from interact import interact as io from models import ModelBase -from nnlib import nnlib from samplelib import * - -#SAE - Styled AutoEncoder class SAEHDModel(ModelBase): #override - def onInitializeOptions(self, is_first_run, ask_override): + def on_initialize_options(self): + device_config = nn.getCurrentDeviceConfig() + + lowest_vram = 2 + if len(device_config.devices) != 0: + lowest_vram = device_config.devices.get_worst_device().total_mem_gb + + if lowest_vram >= 4: + suggest_batch_size = 8 + else: + suggest_batch_size = 4 + yn_str = {True:'y',False:'n'} + ask_override = self.ask_override() - default_resolution = 128 - default_archi = 'df' - default_face_type = 'f' + if self.is_first_run() or ask_override: + self.ask_enable_autobackup() + self.ask_write_preview_history() + self.ask_target_iter() + self.ask_random_flip() + self.ask_batch_size(suggest_batch_size) + default_resolution = self.options['resolution'] = self.load_or_def_option('resolution', 128) + default_face_type = self.options['face_type'] = self.load_or_def_option('face_type', 'f') + default_models_opt_on_gpu = self.options['models_opt_on_gpu'] = self.load_or_def_option('models_opt_on_gpu', True) + default_archi = self.options['archi'] = self.load_or_def_option('archi', 'dfhd') + default_ae_dims = self.options['ae_dims'] = self.load_or_def_option('ae_dims', 256) + default_e_dims = self.options['e_dims'] = self.load_or_def_option('e_dims', 64) + default_d_dims = self.options['d_dims'] = self.load_or_def_option('d_dims', 64) + + default_d_mask_dims = default_d_dims // 3 + default_d_mask_dims += default_d_mask_dims % 2 + default_d_mask_dims = self.options['d_mask_dims'] = self.load_or_def_option('d_mask_dims', default_d_mask_dims) + + default_learn_mask = self.options['learn_mask'] = self.load_or_def_option('learn_mask', True) + default_lr_dropout = self.options['lr_dropout'] = self.load_or_def_option('lr_dropout', False) + default_random_warp = self.options['random_warp'] = self.load_or_def_option('random_warp', True) + default_true_face_training = self.options['true_face_training'] = self.load_or_def_option('true_face_training', False) + default_face_style_power = self.options['face_style_power'] = self.load_or_def_option('face_style_power', 0.0) + default_bg_style_power = self.options['bg_style_power'] = self.load_or_def_option('bg_style_power', 0.0) + default_ct_mode = self.options['ct_mode'] = self.load_or_def_option('ct_mode', 'none') + default_clipgrad = self.options['clipgrad'] = self.load_or_def_option('clipgrad', False) + default_pretrain = self.options['pretrain'] = self.load_or_def_option('pretrain', False) - if is_first_run: - resolution = io.input_int("Resolution ( 64-256 ?:help skip:128) : ", default_resolution, help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") - resolution = np.clip (resolution, 64, 256) - while np.modf(resolution / 16)[0] != 0.0: - resolution -= 1 + if self.is_first_run(): + resolution = io.input_int("Resolution", default_resolution, add_info="64-256", help_message="More resolution requires more VRAM and time to train. Value will be adjusted to multiple of 16.") + resolution = np.clip ( (resolution // 16) * 16, 64, 256) self.options['resolution'] = resolution - self.options['face_type'] = io.input_str ("Half, mid full, or full face? (h/mf/f, ?:help skip:f) : ", default_face_type, ['h','mf','f'], help_message="Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face.").lower() - else: - self.options['resolution'] = self.options.get('resolution', default_resolution) - self.options['face_type'] = self.options.get('face_type', default_face_type) + self.options['face_type'] = io.input_str ("Face type", default_face_type, ['h','mf','f'], help_message="Half / mid face / full face. Half face has better resolution, but covers less area of cheeks. Mid face is 30% wider than half face.").lower() - default_learn_mask = self.options.get('learn_mask', True) - if is_first_run or ask_override: - self.options['learn_mask'] = io.input_bool ( f"Learn mask? (y/n, ?:help skip:{yn_str[default_learn_mask]} ) : " , default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case converter forced to use 'not predicted mask' that is not smooth as predicted.") - else: - self.options['learn_mask'] = self.options.get('learn_mask', default_learn_mask) + if (self.is_first_run() or ask_override) and len(device_config.devices) == 1: + self.options['models_opt_on_gpu'] = io.input_bool ("Place models and optimizer on GPU", default_models_opt_on_gpu, help_message="When you train on one GPU, by default model and optimizer weights are placed on GPU to accelerate the process. You can place they on CPU to free up extra VRAM, thus set bigger dimensions.") - if (is_first_run or ask_override) and 'tensorflow' in self.device_config.backend: - def_optimizer_mode = self.options.get('optimizer_mode', 1) - self.options['optimizer_mode'] = io.input_int ("Optimizer mode? ( 1,2,3 ?:help skip:%d) : " % (def_optimizer_mode), def_optimizer_mode, help_message="1 - no changes. 2 - allows you to train x2 bigger network consuming RAM. 3 - allows you to train x3 bigger network consuming huge amount of RAM and slower, depends on CPU power.") - else: - self.options['optimizer_mode'] = self.options.get('optimizer_mode', 1) + if self.is_first_run(): + self.options['archi'] = io.input_str ("AE architecture", default_archi, ['dfhd','liaehd','df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes. 'hd' is heavyweight version for the best quality.").lower() #-s version is slower, but has decreased change to collapse. + self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dimensions", default_ae_dims, add_info="32-1024", help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 ) + + e_dims = np.clip ( io.input_int("Encoder dimensions", default_e_dims, add_info="16-256", help_message="More dims help to recognize more facial features and achieve sharper result, but require more VRAM. You can fine-tune model size to fit your GPU." ), 16, 256 ) + self.options['e_dims'] = e_dims + e_dims % 2 + + d_dims = np.clip ( io.input_int("Decoder dimensions", default_d_dims, add_info="16-256", help_message="More dims help to recognize more facial features and achieve sharper result, but require more VRAM. You can fine-tune model size to fit your GPU." ), 16, 256 ) + self.options['d_dims'] = d_dims + d_dims % 2 + + d_mask_dims = np.clip ( io.input_int("Decoder mask dimensions", default_d_mask_dims, add_info="16-256", help_message="Typical mask dimensions = decoder dimensions / 3. If you manually cut out obstacles from the dst mask, you can increase this parameter to achieve better quality." ), 16, 256 ) + self.options['d_mask_dims'] = d_mask_dims + d_mask_dims % 2 + + if self.is_first_run() or ask_override: + self.options['learn_mask'] = io.input_bool ("Learn mask", default_learn_mask, help_message="Learning mask can help model to recognize face directions. Learn without mask can reduce model size, in this case merger forced to use 'not predicted mask' that is not smooth as predicted.") + self.options['lr_dropout'] = io.input_bool ("Use learning rate dropout", default_lr_dropout, help_message="When the face is trained enough, you can enable this option to get extra sharpness for less amount of iterations.") + self.options['random_warp'] = io.input_bool ("Enable random warp of samples", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness for less amount of iterations.") - if is_first_run: - self.options['archi'] = io.input_str ("AE architecture (df, liae ?:help skip:%s) : " % (default_archi) , default_archi, ['df','liae'], help_message="'df' keeps faces more natural. 'liae' can fix overly different face shapes.").lower() #-s version is slower, but has decreased change to collapse. - else: - self.options['archi'] = self.options.get('archi', default_archi) - - default_ae_dims = 256 - default_ed_ch_dims = 21 - - if is_first_run: - self.options['ae_dims'] = np.clip ( io.input_int("AutoEncoder dims (32-1024 ?:help skip:%d) : " % (default_ae_dims) , default_ae_dims, help_message="All face information will packed to AE dims. If amount of AE dims are not enough, then for example closed eyes will not be recognized. More dims are better, but require more VRAM. You can fine-tune model size to fit your GPU." ), 32, 1024 ) - self.options['ed_ch_dims'] = np.clip ( io.input_int("Encoder/Decoder dims per channel (10-85 ?:help skip:%d) : " % (default_ed_ch_dims) , default_ed_ch_dims, help_message="More dims help to recognize more facial features and achieve sharper result, but require more VRAM. You can fine-tune model size to fit your GPU." ), 10, 85 ) - else: - self.options['ae_dims'] = self.options.get('ae_dims', default_ae_dims) - self.options['ed_ch_dims'] = self.options.get('ed_ch_dims', default_ed_ch_dims) - - default_true_face_training = self.options.get('true_face_training', False) - default_face_style_power = self.options.get('face_style_power', 0.0) - default_bg_style_power = self.options.get('bg_style_power', 0.0) - - if is_first_run or ask_override: - if nnlib.device.backend != 'plaidML': - default_lr_dropout = self.options.get('lr_dropout', False) - self.options['lr_dropout'] = io.input_bool ( f"Use learning rate dropout? (y/n, ?:help skip:{yn_str[default_lr_dropout]} ) : ", default_lr_dropout, help_message="When the face is trained enough, you can enable this option to get extra sharpness for less amount of iterations.") + if 'df' in self.options['archi']: + self.options['true_face_training'] = io.input_bool ("Enable 'true face' training", default_true_face_training, help_message="The result face will be more like src and will get extra sharpness. Enable it for last 10-20k iterations before conversion.") else: - self.options['lr_dropout'] = False - - default_random_warp = self.options.get('random_warp', True) - self.options['random_warp'] = io.input_bool (f"Enable random warp of samples? ( y/n, ?:help skip:{yn_str[default_random_warp]}) : ", default_random_warp, help_message="Random warp is required to generalize facial expressions of both faces. When the face is trained enough, you can disable it to get extra sharpness for less amount of iterations.") + self.options['true_face_training'] = False - self.options['true_face_training'] = io.input_bool (f"Enable 'true face' training? (y/n, ?:help skip:{yn_str[default_true_face_training]}) : ", default_true_face_training, help_message="The result face will be more like src and will get extra sharpness. Enable it for last 10-20k iterations before conversion.") + self.options['face_style_power'] = np.clip ( io.input_number("Face style power", default_face_style_power, add_info="0.0..100.0", help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + self.options['bg_style_power'] = np.clip ( io.input_number("Background style power", default_bg_style_power, add_info="0.0..100.0", help_message="Learn to transfer background around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + self.options['ct_mode'] = io.input_str (f"Color transfer for src faceset", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.") + self.options['clipgrad'] = io.input_bool ("Enable gradient clipping", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") + self.options['pretrain'] = io.input_bool ("Enable pretraining mode", default_pretrain, help_message="Pretrain the model with large amount of various faces. After that, model can be used to train the fakes more quickly.") - self.options['face_style_power'] = np.clip ( io.input_number("Face style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_face_style_power), default_face_style_power, - help_message="Learn to transfer face style details such as light and color conditions. Warning: Enable it only after 10k iters, when predicted face is clear enough to start learn style. Start from 0.1 value and check history changes. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) + if self.options['pretrain'] and self.get_pretraining_data_path() is None: + raise Exception("pretraining_data_path is not defined") - self.options['bg_style_power'] = np.clip ( io.input_number("Background style power ( 0.0 .. 100.0 ?:help skip:%.2f) : " % (default_bg_style_power), default_bg_style_power, - help_message="Learn to transfer image around face. This can make face more like dst. Enabling this option increases the chance of model collapse."), 0.0, 100.0 ) - - default_ct_mode = self.options.get('ct_mode', 'none') - self.options['ct_mode'] = io.input_str (f"Color transfer mode apply to src faceset. ( none/rct/lct/mkl/idt/sot, ?:help skip:{default_ct_mode}) : ", default_ct_mode, ['none','rct','lct','mkl','idt','sot'], help_message="Change color distribution of src samples close to dst samples. Try all modes to find the best.") - - if nnlib.device.backend != 'plaidML': # todo https://github.com/plaidml/plaidml/issues/301 - default_clipgrad = False if is_first_run else self.options.get('clipgrad', False) - self.options['clipgrad'] = io.input_bool (f"Enable gradient clipping? (y/n, ?:help skip:{yn_str[default_clipgrad]}) : ", default_clipgrad, help_message="Gradient clipping reduces chance of model collapse, sacrificing speed of training.") - else: - self.options['clipgrad'] = False - else: - self.options['lr_dropout'] = self.options.get('lr_dropout', False) - self.options['random_warp'] = self.options.get('random_warp', True) - self.options['true_face_training'] = self.options.get('true_face_training', default_true_face_training) - self.options['face_style_power'] = self.options.get('face_style_power', default_face_style_power) - self.options['bg_style_power'] = self.options.get('bg_style_power', default_bg_style_power) - self.options['ct_mode'] = self.options.get('ct_mode', 'none') - self.options['clipgrad'] = self.options.get('clipgrad', False) - - if is_first_run: - self.options['pretrain'] = io.input_bool ("Pretrain the model? (y/n, ?:help skip:n) : ", False, help_message="Pretrain the model with large amount of various faces. This technique may help to train the fake with overly different face shapes and light conditions of src/dst data. Face will be look more like a morphed. To reduce the morph effect, some model files will be initialized but not be updated after pretrain: LIAE: inter_AB.h5 DF: encoder.h5. The longer you pretrain the model the more morphed face will look. After that, save and run the training again.") - else: - self.options['pretrain'] = False + self.pretrain_just_disabled = (default_pretrain == True and self.options['pretrain'] == False) + + if self.pretrain_just_disabled: + self.set_iter(1) #override - def onInitialize(self): - exec(nnlib.import_all(), locals(), globals()) - self.set_vram_batch_requirements({1.5:4,4:8}) + def on_initialize(self): + nn.initialize() + tf = nn.tf + + conv_kernel_initializer = nn.initializers.ca + + class Downscale(nn.ModelBase): + def __init__(self, in_ch, out_ch, kernel_size=5, dilations=1, subpixel=True, use_activator=True, *kwargs ): + self.in_ch = in_ch + self.out_ch = out_ch + self.kernel_size = kernel_size + self.dilations = dilations + self.subpixel = subpixel + self.use_activator = use_activator + super().__init__(*kwargs) + + def on_build(self, *args, **kwargs ): + self.conv1 = nn.Conv2D( self.in_ch, + self.out_ch // (4 if self.subpixel else 1), + kernel_size=self.kernel_size, + strides=1 if self.subpixel else 2, + padding='SAME', dilations=self.dilations, kernel_initializer=conv_kernel_initializer ) + + def forward(self, x): + x = self.conv1(x) + + if self.subpixel: + x = tf.nn.space_to_depth(x, 2) + + if self.use_activator: + x = tf.nn.leaky_relu(x, 0.1) + return x + + def get_out_ch(self): + return (self.out_ch // 4) * 4 + + class DownscaleBlock(nn.ModelBase): + def on_build(self, in_ch, ch, n_downscales, kernel_size, dilations=1, subpixel=True): + self.downs = [] + + last_ch = in_ch + for i in range(n_downscales): + cur_ch = ch*( min(2**i, 8) ) + self.downs.append ( Downscale(last_ch, cur_ch, kernel_size=kernel_size, dilations=dilations, subpixel=subpixel) ) + last_ch = self.downs[-1].get_out_ch() + + def forward(self, inp): + x = inp + for down in self.downs: + x = down(x) + return x + + class Upscale(nn.ModelBase): + def on_build(self, in_ch, out_ch, kernel_size=3 ): + self.conv1 = nn.Conv2D( in_ch, out_ch*4, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer) + + def forward(self, x): + x = self.conv1(x) + x = tf.nn.leaky_relu(x, 0.1) + x = tf.nn.depth_to_space(x, 2) + return x + + class ResidualBlock(nn.ModelBase): + def on_build(self, ch, kernel_size=3 ): + self.conv1 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer) + self.conv2 = nn.Conv2D( ch, ch, kernel_size=kernel_size, padding='SAME', kernel_initializer=conv_kernel_initializer) + + def forward(self, inp): + x = self.conv1(inp) + x = tf.nn.leaky_relu(x, 0.2) + x = self.conv2(x) + x = tf.nn.leaky_relu(inp + x, 0.2) + return x + + class UpdownResidualBlock(nn.ModelBase): + def on_build(self, ch, inner_ch, kernel_size=3 ): + self.up = Upscale (ch, inner_ch, kernel_size=kernel_size) + self.res = ResidualBlock (inner_ch, kernel_size=kernel_size) + self.down = Downscale (inner_ch, ch, kernel_size=kernel_size, use_activator=False) + + def forward(self, inp): + x = self.up(inp) + x = upx = self.res(x) + x = self.down(x) + x = x + inp + x = tf.nn.leaky_relu(x, 0.2) + return x, upx + + class Encoder(nn.ModelBase): + def on_build(self, in_ch, e_ch, is_hd): + self.is_hd=is_hd + if self.is_hd: + self.down1 = DownscaleBlock(in_ch, e_ch*2, n_downscales=4, kernel_size=3, dilations=1) + self.down2 = DownscaleBlock(in_ch, e_ch*2, n_downscales=4, kernel_size=5, dilations=1) + self.down3 = DownscaleBlock(in_ch, e_ch//2, n_downscales=4, kernel_size=5, dilations=2) + self.down4 = DownscaleBlock(in_ch, e_ch//2, n_downscales=4, kernel_size=7, dilations=2) + else: + self.down1 = DownscaleBlock(in_ch, e_ch, n_downscales=4, kernel_size=5, dilations=1, subpixel=False) + + def forward(self, inp): + if self.is_hd: + x = tf.concat([ nn.tf_flatten(self.down1(inp)), + nn.tf_flatten(self.down2(inp)), + nn.tf_flatten(self.down3(inp)), + nn.tf_flatten(self.down4(inp)) ], -1 ) + else: + x = nn.tf_flatten(self.down1(inp)) + + return x + + class Inter(nn.ModelBase): + def __init__(self, in_ch, lowest_dense_res, ae_ch, ae_out_ch, **kwargs): + self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch = in_ch, lowest_dense_res, ae_ch, ae_out_ch + super().__init__(**kwargs) + + def on_build(self): + in_ch, lowest_dense_res, ae_ch, ae_out_ch = self.in_ch, self.lowest_dense_res, self.ae_ch, self.ae_out_ch + + self.dense1 = nn.Dense( in_ch, ae_ch, kernel_initializer=tf.initializers.orthogonal ) + self.dense2 = nn.Dense( ae_ch, lowest_dense_res * lowest_dense_res * ae_out_ch, kernel_initializer=tf.initializers.orthogonal ) + self.upscale1 = Upscale(ae_out_ch, ae_out_ch) + + def forward(self, inp): + x = self.dense1(inp) + x = self.dense2(x) + x = tf.reshape (x, (-1, lowest_dense_res, lowest_dense_res, self.ae_out_ch)) + x = self.upscale1(x) + return x + + def get_out_ch(self): + return self.ae_out_ch + + class Decoder(nn.ModelBase): + def on_build(self, in_ch, d_ch, d_mask_ch, is_hd ): + self.is_hd = is_hd + + self.upscale0 = Upscale(in_ch, d_ch*8, kernel_size=3) + self.upscale1 = Upscale(d_ch*8, d_ch*4, kernel_size=3) + self.upscale2 = Upscale(d_ch*4, d_ch*2, kernel_size=3) + + if is_hd: + self.res0 = UpdownResidualBlock(in_ch, d_ch*8, kernel_size=3) + self.res1 = UpdownResidualBlock(d_ch*8, d_ch*4, kernel_size=3) + self.res2 = UpdownResidualBlock(d_ch*4, d_ch*2, kernel_size=3) + self.res3 = UpdownResidualBlock(d_ch*2, d_ch, kernel_size=3) + else: + self.res0 = ResidualBlock(d_ch*8, kernel_size=3) + self.res1 = ResidualBlock(d_ch*4, kernel_size=3) + self.res2 = ResidualBlock(d_ch*2, kernel_size=3) + + self.out_conv = nn.Conv2D( d_ch*2, 3, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer) + + self.upscalem0 = Upscale(in_ch, d_mask_ch*8, kernel_size=3) + self.upscalem1 = Upscale(d_mask_ch*8, d_mask_ch*4, kernel_size=3) + self.upscalem2 = Upscale(d_mask_ch*4, d_mask_ch*2, kernel_size=3) + self.out_convm = nn.Conv2D( d_mask_ch*2, 1, kernel_size=1, padding='SAME', kernel_initializer=conv_kernel_initializer) + + def get_weights_ex(self, include_mask): + # Call internal get_weights in order to initialize inner logic + self.get_weights() + + weights = self.upscale0.get_weights() + self.upscale1.get_weights() + self.upscale2.get_weights() \ + + self.res0.get_weights() + self.res1.get_weights() + self.res2.get_weights() + self.out_conv.get_weights() + + if include_mask: + weights += self.upscalem0.get_weights() + self.upscalem1.get_weights() + self.upscalem2.get_weights() \ + + self.out_convm.get_weights() + return weights + + + def forward(self, inp): + z = inp + + if self.is_hd: + x, upx = self.res0(z) + + x = self.upscale0(x) + x = tf.nn.leaky_relu(x + upx, 0.2) + x, upx = self.res1(x) + + x = self.upscale1(x) + x = tf.nn.leaky_relu(x + upx, 0.2) + x, upx = self.res2(x) + + x = self.upscale2(x) + x = tf.nn.leaky_relu(x + upx, 0.2) + x, upx = self.res3(x) + else: + x = self.upscale0(z) + x = self.res0(x) + x = self.upscale1(x) + x = self.res1(x) + x = self.upscale2(x) + x = self.res2(x) + + m = self.upscalem0(z) + m = self.upscalem1(m) + m = self.upscalem2(m) + + return tf.nn.sigmoid(self.out_conv(x)), \ + tf.nn.sigmoid(self.out_convm(m)) + + class CodeDiscriminator(nn.ModelBase): + def on_build(self, in_ch, code_res, ch=256): + n_downscales = 2 + code_res // 8 + + self.convs = [] + prev_ch = in_ch + for i in range(n_downscales): + cur_ch = ch * min( (2**i), 8 ) + self.convs.append ( nn.Conv2D( prev_ch, cur_ch, kernel_size=4 if i == 0 else 3, strides=2, padding='SAME', kernel_initializer=conv_kernel_initializer) ) + prev_ch = cur_ch + + self.out_conv = nn.Conv2D( prev_ch, 1, kernel_size=1, padding='VALID', kernel_initializer=conv_kernel_initializer) + + def forward(self, x): + for conv in self.convs: + x = tf.nn.leaky_relu( conv(x), 0.1 ) + return self.out_conv(x) + + device_config = nn.getCurrentDeviceConfig() + devices = device_config.devices resolution = self.options['resolution'] learn_mask = self.options['learn_mask'] - - ae_dims = self.options['ae_dims'] - ed_ch_dims = self.options['ed_ch_dims'] - self.pretrain = self.options['pretrain'] = self.options.get('pretrain', False) - if not self.pretrain: - self.options.pop('pretrain') - - bgr_shape = (resolution, resolution, 3) - mask_shape = (resolution, resolution, 1) - - self.true_face_training = self.options.get('true_face_training', False) + archi = self.options['archi'] + ae_dims = self.options['ae_dims'] + e_dims = self.options['e_dims'] + d_dims = self.options['d_dims'] + d_mask_dims = self.options['d_mask_dims'] + self.pretrain = self.options['pretrain'] + masked_training = True - class CommonModel(object): - def downscale (self, dim, kernel_size=5, dilation_rate=1, use_activator=True): - def func(x): - if not use_activator: - return SubpixelDownscaler()(Conv2D(dim // 4, kernel_size=kernel_size, strides=1, dilation_rate=dilation_rate, padding='same')(x)) - else: - return SubpixelDownscaler()(LeakyReLU(0.1)(Conv2D(dim // 4, kernel_size=kernel_size, strides=1, dilation_rate=dilation_rate, padding='same')(x))) - return func - - def upscale (self, dim, size=(2,2)): - def func(x): - return SubpixelUpscaler(size=size)(LeakyReLU(0.1)(Conv2D(dim * np.prod(size) , kernel_size=3, strides=1, padding='same')(x))) - return func - - def ResidualBlock(self, dim): - def func(inp): - x = Conv2D(dim, kernel_size=3, padding='same')(inp) - x = LeakyReLU(0.2)(x) - x = Conv2D(dim, kernel_size=3, padding='same')(x) - x = Add()([x, inp]) - x = LeakyReLU(0.2)(x) - return x - return func - - class SAEDFModel(CommonModel): - def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask): - super().__init__() - self.learn_mask = learn_mask - - output_nc = 3 - bgr_shape = (resolution, resolution, output_nc) - mask_shape = (resolution, resolution, 1) - lowest_dense_res = resolution // 16 - e_dims = output_nc*e_ch_dims - - - - def enc_flow(e_ch_dims, ae_dims, lowest_dense_res): - dims = output_nc * e_ch_dims - if dims % 2 != 0: - dims += 1 - - def func(inp): - x = self.downscale(dims , 3, 1 )(inp) - x = self.downscale(dims*2, 3, 1 )(x) - x = self.downscale(dims*4, 3, 1 )(x) - x0 = self.downscale(dims*8, 3, 1 )(x) - - x = self.downscale(dims , 5, 1 )(inp) - x = self.downscale(dims*2, 5, 1 )(x) - x = self.downscale(dims*4, 5, 1 )(x) - x1 = self.downscale(dims*8, 5, 1 )(x) - - x = self.downscale(dims , 5, 2 )(inp) - x = self.downscale(dims*2, 5, 2 )(x) - x = self.downscale(dims*4, 5, 2 )(x) - x2 = self.downscale(dims*8, 5, 2 )(x) - - x = self.downscale(dims , 7, 2 )(inp) - x = self.downscale(dims*2, 7, 2 )(x) - x = self.downscale(dims*4, 7, 2 )(x) - x3 = self.downscale(dims*8, 7, 2 )(x) - - x = Concatenate()([x0,x1,x2,x3]) - - x = Dense(ae_dims)(Flatten()(x)) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims))(x) - x = self.upscale(ae_dims)(x) - return x - return func - - def dec_flow(output_nc, d_ch_dims, is_mask=False): - dims = output_nc * d_ch_dims - if dims % 2 != 0: - dims += 1 - - def func(x): - - for i in [8,4,2]: - x = self.upscale(dims*i)(x) - - if not is_mask: - x0 = x - x = self.upscale( (dims*i)//2 )(x) - x = self.ResidualBlock( (dims*i)//2 )(x) - x = self.downscale( dims*i, use_activator=False ) (x) - x = Add()([x, x0]) - x = LeakyReLU(0.2)(x) - - return Conv2D(output_nc, kernel_size=1, padding='same', activation='sigmoid')(x) - - return func - - self.encoder = modelify(enc_flow(e_ch_dims, ae_dims, lowest_dense_res)) ( Input(bgr_shape) ) - - sh = K.int_shape( self.encoder.outputs[0] )[1:] - self.decoder_src = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) ) - self.decoder_dst = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) ) - - if learn_mask: - self.decoder_srcm = modelify(dec_flow(1, d_ch_dims, is_mask=True)) ( Input(sh) ) - self.decoder_dstm = modelify(dec_flow(1, d_ch_dims, is_mask=True)) ( Input(sh) ) - - self.src_dst_trainable_weights = self.encoder.trainable_weights + self.decoder_src.trainable_weights + self.decoder_dst.trainable_weights - - if learn_mask: - self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.decoder_srcm.trainable_weights + self.decoder_dstm.trainable_weights - - self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape) - self.target_src, self.target_dst = Input(bgr_shape), Input(bgr_shape) - self.target_srcm, self.target_dstm = Input(mask_shape), Input(mask_shape) - self.src_code, self.dst_code = self.encoder(self.warped_src), self.encoder(self.warped_dst) - - self.pred_src_src = self.decoder_src(self.src_code) - self.pred_dst_dst = self.decoder_dst(self.dst_code) - self.pred_src_dst = self.decoder_src(self.dst_code) - - if learn_mask: - self.pred_src_srcm = self.decoder_srcm(self.src_code) - self.pred_dst_dstm = self.decoder_dstm(self.dst_code) - self.pred_src_dstm = self.decoder_srcm(self.dst_code) - - def get_model_filename_list(self, exclude_for_pretrain=False): - ar = [] - if not exclude_for_pretrain: - ar += [ [self.encoder, 'encoder.h5'] ] - ar += [ [self.decoder_src, 'decoder_src.h5'], - [self.decoder_dst, 'decoder_dst.h5'] ] - if self.learn_mask: - ar += [ [self.decoder_srcm, 'decoder_srcm.h5'], - [self.decoder_dstm, 'decoder_dstm.h5'] ] - return ar - - class SAELIAEModel(CommonModel): - def __init__(self, resolution, ae_dims, e_ch_dims, d_ch_dims, learn_mask): - super().__init__() - self.learn_mask = learn_mask - - output_nc = 3 - bgr_shape = (resolution, resolution, output_nc) - mask_shape = (resolution, resolution, 1) - - lowest_dense_res = resolution // 16 - - def enc_flow(e_ch_dims): - dims = output_nc*e_ch_dims - if dims % 2 != 0: - dims += 1 - - def func(inp): - x = self.downscale(dims , 3, 1 )(inp) - x = self.downscale(dims*2, 3, 1 )(x) - x = self.downscale(dims*4, 3, 1 )(x) - x0 = self.downscale(dims*8, 3, 1 )(x) - - x = self.downscale(dims , 5, 1 )(inp) - x = self.downscale(dims*2, 5, 1 )(x) - x = self.downscale(dims*4, 5, 1 )(x) - x1 = self.downscale(dims*8, 5, 1 )(x) - - x = self.downscale(dims , 5, 2 )(inp) - x = self.downscale(dims*2, 5, 2 )(x) - x = self.downscale(dims*4, 5, 2 )(x) - x2 = self.downscale(dims*8, 5, 2 )(x) - - x = self.downscale(dims , 7, 2 )(inp) - x = self.downscale(dims*2, 7, 2 )(x) - x = self.downscale(dims*4, 7, 2 )(x) - x3 = self.downscale(dims*8, 7, 2 )(x) - - x = Concatenate()([x0,x1,x2,x3]) - - x = Flatten()(x) - return x - return func - - def inter_flow(lowest_dense_res, ae_dims): - def func(x): - x = Dense(ae_dims)(x) - x = Dense(lowest_dense_res * lowest_dense_res * ae_dims*2)(x) - x = Reshape((lowest_dense_res, lowest_dense_res, ae_dims*2))(x) - x = self.upscale(ae_dims*2)(x) - return x - return func - - def dec_flow(output_nc, d_ch_dims, is_mask=False): - dims = output_nc * d_ch_dims - if dims % 2 != 0: - dims += 1 - - def func(x): - - for i in [8,4,2]: - x = self.upscale(dims*i)(x) - - if not is_mask: - x0 = x - x = self.upscale( (dims*i)//2 )(x) - x = self.ResidualBlock( (dims*i)//2 )(x) - x = self.downscale( dims*i, use_activator=False ) (x) - x = Add()([x, x0]) - x = LeakyReLU(0.2)(x) - - return Conv2D(output_nc, kernel_size=1, padding='same', activation='sigmoid')(x) - - return func - - self.encoder = modelify(enc_flow(e_ch_dims)) ( Input(bgr_shape) ) - - sh = K.int_shape( self.encoder.outputs[0] )[1:] - self.inter_B = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) ) - self.inter_AB = modelify(inter_flow(lowest_dense_res, ae_dims)) ( Input(sh) ) - - sh = np.array(K.int_shape( self.inter_B.outputs[0] )[1:])*(1,1,2) - self.decoder = modelify(dec_flow(output_nc, d_ch_dims)) ( Input(sh) ) - - if learn_mask: - self.decoderm = modelify(dec_flow(1, d_ch_dims, is_mask=True)) ( Input(sh) ) - - self.src_dst_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoder.trainable_weights - - if learn_mask: - self.src_dst_mask_trainable_weights = self.encoder.trainable_weights + self.inter_B.trainable_weights + self.inter_AB.trainable_weights + self.decoderm.trainable_weights - - self.warped_src, self.warped_dst = Input(bgr_shape), Input(bgr_shape) - self.target_src, self.target_dst = Input(bgr_shape), Input(bgr_shape) - self.target_srcm, self.target_dstm = Input(mask_shape), Input(mask_shape) - - warped_src_code = self.encoder (self.warped_src) - warped_src_inter_AB_code = self.inter_AB (warped_src_code) - self.src_code = Concatenate()([warped_src_inter_AB_code,warped_src_inter_AB_code]) - - warped_dst_code = self.encoder (self.warped_dst) - warped_dst_inter_B_code = self.inter_B (warped_dst_code) - warped_dst_inter_AB_code = self.inter_AB (warped_dst_code) - self.dst_code = Concatenate()([warped_dst_inter_B_code,warped_dst_inter_AB_code]) - - src_dst_code = Concatenate()([warped_dst_inter_AB_code,warped_dst_inter_AB_code]) - - self.pred_src_src = self.decoder(self.src_code) - self.pred_dst_dst = self.decoder(self.dst_code) - self.pred_src_dst = self.decoder(src_dst_code) - - if learn_mask: - self.pred_src_srcm = self.decoderm(self.src_code) - self.pred_dst_dstm = self.decoderm(self.dst_code) - self.pred_src_dstm = self.decoderm(src_dst_code) - - def get_model_filename_list(self, exclude_for_pretrain=False): - ar = [ [self.encoder, 'encoder.h5'], - [self.inter_B, 'inter_B.h5'] ] - - if not exclude_for_pretrain: - ar += [ [self.inter_AB, 'inter_AB.h5'] ] - - ar += [ [self.decoder, 'decoder.h5'] ] - - if self.learn_mask: - ar += [ [self.decoderm, 'decoderm.h5'] ] - - return ar - - if 'df' in self.options['archi']: - self.model = SAEDFModel (resolution, ae_dims, ed_ch_dims, ed_ch_dims, learn_mask) - elif 'liae' in self.options['archi']: - self.model = SAELIAEModel (resolution, ae_dims, ed_ch_dims, ed_ch_dims, learn_mask) - - self.opt_dis_model = [] - - if self.true_face_training: - def dis_flow(ndf=256): - def func(x): - x, = x - - code_res = K.int_shape(x)[1] - - x = Conv2D( ndf, 4, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = LeakyReLU(0.1)(x) - - x = Conv2D( ndf*2, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = LeakyReLU(0.1)(x) - - if code_res > 8: - x = Conv2D( ndf*4, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = LeakyReLU(0.1)(x) - - if code_res > 16: - x = Conv2D( ndf*8, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = LeakyReLU(0.1)(x) - - if code_res > 32: - x = Conv2D( ndf*8, 3, strides=2, padding='valid')( ZeroPadding2D(1)(x) ) - x = LeakyReLU(0.1)(x) - - return Conv2D( 1, 1, strides=1, padding='valid', activation='sigmoid')(x) - return func - - sh = [ Input( K.int_shape(self.model.src_code)[1:] ) ] - self.dis = modelify(dis_flow()) (sh) - - self.opt_dis_model = [ (self.dis, 'dis.h5') ] - - loaded, not_loaded = [], self.model.get_model_filename_list()+self.opt_dis_model - if not self.is_first_run(): - loaded, not_loaded = self.load_weights_safe(not_loaded) - - CA_models = [ model for model, _ in not_loaded ] - - self.CA_conv_weights_list = [] - for model in CA_models: - for layer in model.layers: - if type(layer) == keras.layers.Conv2D: - self.CA_conv_weights_list += [layer.weights[0]] #- is Conv2D kernel_weights - - target_srcm = gaussian_blur( max(1, resolution // 32) )(self.model.target_srcm) - target_dstm = gaussian_blur( max(1, resolution // 32) )(self.model.target_dstm) - - target_src_masked = self.model.target_src*target_srcm - target_dst_masked = self.model.target_dst*target_dstm - target_dst_anti_masked = self.model.target_dst*(1.0 - target_dstm) - - target_src_masked_opt = target_src_masked if masked_training else self.model.target_src - target_dst_masked_opt = target_dst_masked if masked_training else self.model.target_dst - - pred_src_src_masked_opt = self.model.pred_src_src*target_srcm if masked_training else self.model.pred_src_src - pred_dst_dst_masked_opt = self.model.pred_dst_dst*target_dstm if masked_training else self.model.pred_dst_dst - - psd_target_dst_masked = self.model.pred_src_dst*target_dstm - psd_target_dst_anti_masked = self.model.pred_src_dst*(1.0 - target_dstm) - - if self.is_training_mode: - lr_dropout = 0.3 if self.options['lr_dropout'] else 0.0 - self.src_dst_opt = RMSprop(lr=5e-5, lr_dropout=lr_dropout, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - self.src_dst_mask_opt = RMSprop(lr=5e-5, lr_dropout=lr_dropout, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - self.D_opt = RMSprop(lr=5e-5, lr_dropout=lr_dropout, clipnorm=1.0 if self.options['clipgrad'] else 0.0, tf_cpu_mode=self.options['optimizer_mode']-1) - - src_loss = K.mean ( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( target_src_masked_opt, pred_src_src_masked_opt) ) - src_loss += K.mean ( 10*K.square( target_src_masked_opt - pred_src_src_masked_opt ) ) - - face_style_power = self.options['face_style_power'] / 100.0 - if face_style_power != 0: - src_loss += style_loss(gaussian_blur_radius=resolution//16, loss_weight=face_style_power, wnd_size=0)( psd_target_dst_masked, target_dst_masked ) - - bg_style_power = self.options['bg_style_power'] / 100.0 - if bg_style_power != 0: - src_loss += K.mean( (10*bg_style_power)*dssim(kernel_size=int(resolution/11.6),max_value=1.0)( psd_target_dst_anti_masked, target_dst_anti_masked )) - src_loss += K.mean( (10*bg_style_power)*K.square( psd_target_dst_anti_masked - target_dst_anti_masked )) - - dst_loss = K.mean( 10*dssim(kernel_size=int(resolution/11.6),max_value=1.0)(target_dst_masked_opt, pred_dst_dst_masked_opt) ) - dst_loss += K.mean( 10*K.square( target_dst_masked_opt - pred_dst_dst_masked_opt ) ) - - G_loss = src_loss+dst_loss - - if self.true_face_training: - def DLoss(labels,logits): - return K.mean(K.binary_crossentropy(labels,logits)) - - src_code_d = self.dis( self.model.src_code ) - src_code_d_ones = K.ones_like(src_code_d) - src_code_d_zeros = K.zeros_like(src_code_d) - dst_code_d = self.dis( self.model.dst_code ) - dst_code_d_ones = K.ones_like(dst_code_d) - G_loss += 0.01*DLoss(src_code_d_ones, src_code_d) - - loss_D = (DLoss(dst_code_d_ones , dst_code_d) + \ - DLoss(src_code_d_zeros, src_code_d) ) * 0.5 - - self.D_train = K.function ([self.model.warped_src, self.model.warped_dst],[loss_D], self.D_opt.get_updates(loss_D, self.dis.trainable_weights) ) - - self.src_dst_train = K.function ([self.model.warped_src, self.model.warped_dst, self.model.target_src, self.model.target_srcm, self.model.target_dst, self.model.target_dstm], - [src_loss,dst_loss], - self.src_dst_opt.get_updates( G_loss, self.model.src_dst_trainable_weights) - ) - - if self.options['learn_mask']: - src_mask_loss = K.mean(K.square(self.model.target_srcm-self.model.pred_src_srcm)) - dst_mask_loss = K.mean(K.square(self.model.target_dstm-self.model.pred_dst_dstm)) - self.src_dst_mask_train = K.function ([self.model.warped_src, self.model.warped_dst, self.model.target_srcm, self.model.target_dstm],[src_mask_loss, dst_mask_loss], self.src_dst_mask_opt.get_updates(src_mask_loss+dst_mask_loss, self.model.src_dst_mask_trainable_weights ) ) - - if self.options['learn_mask']: - self.AE_view = K.function ([self.model.warped_src, self.model.warped_dst], [self.model.pred_src_src, self.model.pred_dst_dst, self.model.pred_dst_dstm, self.model.pred_src_dst, self.model.pred_src_dstm]) + models_opt_on_gpu = False if len(devices) != 1 else self.options['models_opt_on_gpu'] + models_opt_device = '/GPU:0' if models_opt_on_gpu and self.is_training else '/CPU:0' + optimizer_vars_on_cpu = models_opt_device=='/CPU:0' + + input_nc = 3 + output_nc = 3 + bgr_shape = (resolution, resolution, output_nc) + mask_shape = (resolution, resolution, 1) + lowest_dense_res = resolution // 16 + + self.model_filename_list = [] + + + with tf.device ('/CPU:0'): + #Place holders on CPU + self.warped_src = tf.placeholder (tf.float32, (None,)+bgr_shape) + self.warped_dst = tf.placeholder (tf.float32, (None,)+bgr_shape) + + self.target_src = tf.placeholder (tf.float32, (None,)+bgr_shape) + self.target_dst = tf.placeholder (tf.float32, (None,)+bgr_shape) + + self.target_srcm = tf.placeholder (tf.float32, (None,)+mask_shape) + self.target_dstm = tf.placeholder (tf.float32, (None,)+mask_shape) + + # Initializing model classes + with tf.device (models_opt_device): + if 'df' in archi: + self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder') + encoder_out_ch = self.encoder.compute_output_shape ( (tf.float32, (None,resolution,resolution,input_nc)))[-1] + + self.inter = Inter (in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims, name='inter') + inter_out_ch = self.inter.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1] + + self.decoder_src = Decoder(in_ch=inter_out_ch, d_ch=d_dims, d_mask_ch=d_mask_dims, is_hd='hd' in archi, name='decoder_src') + self.decoder_dst = Decoder(in_ch=inter_out_ch, d_ch=d_dims, d_mask_ch=d_mask_dims, is_hd='hd' in archi, name='decoder_dst') + + self.model_filename_list += [ [self.encoder, 'encoder.npy' ], + [self.inter, 'inter.npy' ], + [self.decoder_src, 'decoder_src.npy'], + [self.decoder_dst, 'decoder_dst.npy'] ] + + if self.is_training: + if self.options['true_face_training']: + self.dis = CodeDiscriminator(ae_dims, code_res=lowest_dense_res*2, name='dis' ) + self.model_filename_list += [ [self.dis, 'dis.npy'] ] + + elif 'liae' in archi: + self.encoder = Encoder(in_ch=input_nc, e_ch=e_dims, is_hd='hd' in archi, name='encoder') + encoder_out_ch = self.encoder.compute_output_shape ( (tf.float32, (None,resolution,resolution,input_nc)))[-1] + + self.inter_AB = Inter(in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims*2, name='inter_AB') + self.inter_B = Inter(in_ch=encoder_out_ch, lowest_dense_res=lowest_dense_res, ae_ch=ae_dims, ae_out_ch=ae_dims*2, name='inter_B') + + inter_AB_out_ch = self.inter_AB.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1] + inter_B_out_ch = self.inter_B.compute_output_shape ( (tf.float32, (None,encoder_out_ch)))[-1] + inters_out_ch = inter_AB_out_ch+inter_B_out_ch + + self.decoder = Decoder(in_ch=inters_out_ch, d_ch=d_dims, d_mask_ch=d_mask_dims, is_hd='hd' in archi, name='decoder') + + self.model_filename_list += [ [self.encoder, 'encoder.npy'], + [self.inter_AB, 'inter_AB.npy'], + [self.inter_B , 'inter_B.npy'], + [self.decoder , 'decoder.npy'] ] + + if self.is_training: + # Initialize optimizers + lr=5e-5 + lr_dropout = 0.3 if self.options['lr_dropout'] else 1.0 + clipnorm = 1.0 if self.options['clipgrad'] else 0.0 + self.src_dst_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='src_dst_opt') + self.model_filename_list += [ (self.src_dst_opt, 'src_dst_opt.npy') ] + if 'df' in archi: + self.src_dst_all_trainable_weights = self.encoder.get_weights() + self.decoder_src.get_weights() + self.decoder_dst.get_weights() + self.src_dst_trainable_weights = self.encoder.get_weights() + self.decoder_src.get_weights_ex(learn_mask) + self.decoder_dst.get_weights_ex(learn_mask) + + elif 'liae' in archi: + self.src_dst_all_trainable_weights = self.encoder.get_weights() + self.inter_AB.get_weights() + self.inter_B.get_weights() + self.decoder.get_weights() + self.src_dst_trainable_weights = self.encoder.get_weights() + self.inter_AB.get_weights() + self.inter_B.get_weights() + self.decoder.get_weights_ex(learn_mask) + + self.src_dst_opt.initialize_variables (self.src_dst_all_trainable_weights, vars_on_cpu=optimizer_vars_on_cpu) + + if self.options['true_face_training']: + self.D_opt = nn.TFRMSpropOptimizer(lr=lr, lr_dropout=lr_dropout, clipnorm=clipnorm, name='D_opt') + self.D_opt.initialize_variables ( self.dis.get_weights(), vars_on_cpu=optimizer_vars_on_cpu) + self.model_filename_list += [ (self.D_opt, 'D_opt.npy') ] + + if self.is_training: + # Adjust batch size for multiple GPU + gpu_count = max(1, len(devices) ) + bs_per_gpu = max(1, self.get_batch_size() // gpu_count) + self.set_batch_size( gpu_count*bs_per_gpu) + + + # Compute losses per GPU + gpu_pred_src_src_list = [] + gpu_pred_dst_dst_list = [] + gpu_pred_src_dst_list = [] + gpu_pred_src_srcm_list = [] + gpu_pred_dst_dstm_list = [] + gpu_pred_src_dstm_list = [] + + gpu_src_losses = [] + gpu_dst_losses = [] + gpu_src_dst_loss_gvs = [] + gpu_D_loss_gvs = [] + + for gpu_id in range(gpu_count): + with tf.device( f'/GPU:{gpu_id}' if len(devices) != 0 else f'/CPU:0' ): + batch_slice = slice( gpu_id*bs_per_gpu, (gpu_id+1)*bs_per_gpu ) + with tf.device(f'/CPU:0'): + # slice on CPU, otherwise all batch data will be transfered to GPU first + gpu_warped_src = self.warped_src [batch_slice,:,:,:] + gpu_warped_dst = self.warped_dst [batch_slice,:,:,:] + gpu_target_src = self.target_src [batch_slice,:,:,:] + gpu_target_dst = self.target_dst [batch_slice,:,:,:] + gpu_target_srcm = self.target_srcm[batch_slice,:,:,:] + gpu_target_dstm = self.target_dstm[batch_slice,:,:,:] + + # process model tensors + if 'df' in archi: + gpu_src_code = self.inter(self.encoder(gpu_warped_src)) + gpu_dst_code = self.inter(self.encoder(gpu_warped_dst)) + gpu_pred_src_src, gpu_pred_src_srcm = self.decoder_src(gpu_src_code) + gpu_pred_dst_dst, gpu_pred_dst_dstm = self.decoder_dst(gpu_dst_code) + gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder_src(gpu_dst_code) + + elif 'liae' in archi: + gpu_src_code = self.encoder (gpu_warped_src) + gpu_src_inter_AB_code = self.inter_AB (gpu_src_code) + gpu_src_code = tf.concat([gpu_src_inter_AB_code,gpu_src_inter_AB_code],-1) + gpu_dst_code = self.encoder (gpu_warped_dst) + gpu_dst_inter_B_code = self.inter_B (gpu_dst_code) + gpu_dst_inter_AB_code = self.inter_AB (gpu_dst_code) + gpu_dst_code = tf.concat([gpu_dst_inter_B_code,gpu_dst_inter_AB_code],-1) + gpu_src_dst_code = tf.concat([gpu_dst_inter_AB_code,gpu_dst_inter_AB_code],-1) + + gpu_pred_src_src, gpu_pred_src_srcm = self.decoder(gpu_src_code) + gpu_pred_dst_dst, gpu_pred_dst_dstm = self.decoder(gpu_dst_code) + gpu_pred_src_dst, gpu_pred_src_dstm = self.decoder(gpu_src_dst_code) + + gpu_pred_src_src_list.append(gpu_pred_src_src) + gpu_pred_dst_dst_list.append(gpu_pred_dst_dst) + gpu_pred_src_dst_list.append(gpu_pred_src_dst) + + gpu_pred_src_srcm_list.append(gpu_pred_src_srcm) + gpu_pred_dst_dstm_list.append(gpu_pred_dst_dstm) + gpu_pred_src_dstm_list.append(gpu_pred_src_dstm) + + gpu_target_srcm_blur = nn.tf_gaussian_blur(gpu_target_srcm, max(1, resolution // 32) ) + gpu_target_dstm_blur = nn.tf_gaussian_blur(gpu_target_dstm, max(1, resolution // 32) ) + + gpu_target_dst_masked = gpu_target_dst*gpu_target_dstm_blur + gpu_target_dst_anti_masked = gpu_target_dst*(1.0 - gpu_target_dstm_blur) + + gpu_target_srcmasked_opt = gpu_target_src*gpu_target_srcm_blur if masked_training else gpu_target_src + gpu_target_dst_masked_opt = gpu_target_dst_masked if masked_training else gpu_target_dst + + gpu_pred_src_src_masked_opt = gpu_pred_src_src*gpu_target_srcm_blur if masked_training else gpu_pred_src_src + gpu_pred_dst_dst_masked_opt = gpu_pred_dst_dst*gpu_target_dstm_blur if masked_training else gpu_pred_dst_dst + + gpu_psd_target_dst_masked = gpu_pred_src_dst*gpu_target_dstm_blur + gpu_psd_target_dst_anti_masked = gpu_pred_src_dst*(1.0 - gpu_target_dstm_blur) + + gpu_src_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_srcmasked_opt, gpu_pred_src_src_masked_opt, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) + gpu_src_loss += tf.reduce_mean ( 10*tf.square ( gpu_target_srcmasked_opt - gpu_pred_src_src_masked_opt ), axis=[1,2,3]) + if learn_mask: + gpu_src_loss += tf.reduce_mean ( tf.square( gpu_target_srcm - gpu_pred_src_srcm ),axis=[1,2,3] ) + + face_style_power = self.options['face_style_power'] / 100.0 + if face_style_power != 0 and not self.pretrain: + gpu_src_loss += nn.tf_style_loss(gpu_psd_target_dst_masked, gpu_target_dst_masked, gaussian_blur_radius=resolution//16, loss_weight=10000*face_style_power) + + bg_style_power = self.options['bg_style_power'] / 100.0 + if bg_style_power != 0 and not self.pretrain: + gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*nn.tf_dssim(gpu_psd_target_dst_anti_masked, gpu_target_dst_anti_masked, max_val=1.0, filter_size=int(resolution/11.6)), axis=[1]) + gpu_src_loss += tf.reduce_mean( (10*bg_style_power)*tf.square( gpu_psd_target_dst_anti_masked - gpu_target_dst_anti_masked), axis=[1,2,3] ) + + gpu_dst_loss = tf.reduce_mean ( 10*nn.tf_dssim(gpu_target_dst_masked_opt, gpu_pred_dst_dst_masked_opt, max_val=1.0, filter_size=int(resolution/11.6) ), axis=[1]) + gpu_dst_loss += tf.reduce_mean ( 10*tf.square( gpu_target_dst_masked_opt- gpu_pred_dst_dst_masked_opt ), axis=[1,2,3]) + if learn_mask: + gpu_dst_loss += tf.reduce_mean ( tf.square( gpu_target_dstm - gpu_pred_dst_dstm ),axis=[1,2,3] ) + + gpu_src_losses += [gpu_src_loss] + gpu_dst_losses += [gpu_dst_loss] + + gpu_src_dst_loss = gpu_src_loss + gpu_dst_loss + + if self.options['true_face_training']: + def DLoss(labels,logits): + return tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=labels, logits=logits), axis=[1,2,3]) + + gpu_src_code_d = self.dis( gpu_src_code ) + gpu_src_code_d_ones = tf.ones_like(gpu_src_code_d) + gpu_src_code_d_zeros = tf.zeros_like(gpu_src_code_d) + gpu_dst_code_d = self.dis( gpu_dst_code ) + gpu_dst_code_d_ones = tf.ones_like(gpu_dst_code_d) + + gpu_src_dst_loss += 0.01*DLoss(gpu_src_code_d_ones, gpu_src_code_d) + + gpu_D_loss = (DLoss(gpu_src_code_d_ones , gpu_dst_code_d) + \ + DLoss(gpu_src_code_d_zeros, gpu_src_code_d) ) * 0.5 + + gpu_D_loss_gvs += [ nn.tf_gradients (gpu_D_loss, self.dis.get_weights() ) ] + + gpu_src_dst_loss_gvs += [ nn.tf_gradients ( gpu_src_dst_loss, self.src_dst_trainable_weights ) ] + + + # Average losses and gradients, and create optimizer update ops + with tf.device (models_opt_device): + if gpu_count == 1: + pred_src_src = gpu_pred_src_src_list[0] + pred_dst_dst = gpu_pred_dst_dst_list[0] + pred_src_dst = gpu_pred_src_dst_list[0] + pred_src_srcm = gpu_pred_src_srcm_list[0] + pred_dst_dstm = gpu_pred_dst_dstm_list[0] + pred_src_dstm = gpu_pred_src_dstm_list[0] + + src_loss = gpu_src_losses[0] + dst_loss = gpu_dst_losses[0] + src_dst_loss_gv = gpu_src_dst_loss_gvs[0] + else: + pred_src_src = tf.concat(gpu_pred_src_src_list, 0) + pred_dst_dst = tf.concat(gpu_pred_dst_dst_list, 0) + pred_src_dst = tf.concat(gpu_pred_src_dst_list, 0) + pred_src_srcm = tf.concat(gpu_pred_src_srcm_list, 0) + pred_dst_dstm = tf.concat(gpu_pred_dst_dstm_list, 0) + pred_src_dstm = tf.concat(gpu_pred_src_dstm_list, 0) + + src_loss = nn.tf_average_tensor_list(gpu_src_losses) + dst_loss = nn.tf_average_tensor_list(gpu_dst_losses) + src_dst_loss_gv = nn.tf_average_gv_list (gpu_src_dst_loss_gvs) + + if self.options['true_face_training']: + D_loss_gv = nn.tf_average_gv_list(gpu_D_loss_gvs) + + src_dst_loss_gv_op = self.src_dst_opt.get_update_op (src_dst_loss_gv ) + + if self.options['true_face_training']: + D_loss_gv_op = self.D_opt.get_update_op (D_loss_gv ) + + + # Initializing training and view functions + def src_dst_train(warped_src, target_src, target_srcm, \ + warped_dst, target_dst, target_dstm): + s, d, _ = nn.tf_sess.run ( [ src_loss, dst_loss, src_dst_loss_gv_op], + feed_dict={self.warped_src :warped_src, + self.target_src :target_src, + self.target_srcm:target_srcm, + self.warped_dst :warped_dst, + self.target_dst :target_dst, + self.target_dstm:target_dstm, + }) + s = np.mean(s) + d = np.mean(d) + return s, d + self.src_dst_train = src_dst_train + + if self.options['true_face_training']: + def D_train(warped_src, warped_dst): + nn.tf_sess.run ([D_loss_gv_op], feed_dict={self.warped_src: warped_src, self.warped_dst: warped_dst}) + self.D_train = D_train + + if learn_mask: + def AE_view(warped_src, warped_dst): + return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_dst_dstm, pred_src_dst, pred_src_dstm], + feed_dict={self.warped_src:warped_src, + self.warped_dst:warped_dst}) else: - self.AE_view = K.function ([self.model.warped_src, self.model.warped_dst], [self.model.pred_src_src, self.model.pred_dst_dst, self.model.pred_src_dst ]) - + def AE_view(warped_src, warped_dst): + return nn.tf_sess.run ( [pred_src_src, pred_dst_dst, pred_src_dst], + feed_dict={self.warped_src:warped_src, + self.warped_dst:warped_dst}) + self.AE_view = AE_view else: - if self.options['learn_mask']: - self.AE_convert = K.function ([self.model.warped_dst],[ self.model.pred_src_dst, self.model.pred_dst_dstm, self.model.pred_src_dstm ]) + # Initializing merge function + with tf.device( f'/GPU:0' if len(devices) != 0 else f'/CPU:0'): + if 'df' in archi: + gpu_dst_code = self.inter(self.encoder(self.warped_dst)) + gpu_pred_src_dst = self.decoder_src(gpu_dst_code) + gpu_pred_dst_dstm = self.decoder_dstm(gpu_dst_code) + gpu_pred_src_dstm = self.decoder_srcm(gpu_dst_code) + elif 'liae' in archi: + gpu_dst_code = self.encoder (self.warped_dst) + gpu_dst_inter_B_code = self.inter_B (gpu_dst_code) + gpu_dst_inter_AB_code = self.inter_AB (gpu_dst_code) + gpu_dst_code = tf.concat([gpu_dst_inter_B_code,gpu_dst_inter_AB_code],-1) + gpu_src_dst_code = tf.concat([gpu_dst_inter_AB_code,gpu_dst_inter_AB_code],-1) + + gpu_pred_src_dst = self.decoder(gpu_src_dst_code) + gpu_pred_dst_dstm = self.decoderm(gpu_dst_code) + gpu_pred_src_dstm = self.decoderm(gpu_src_dst_code) + + if learn_mask: + def AE_merge( warped_dst): + return nn.tf_sess.run ( [gpu_pred_src_dst, gpu_pred_dst_dstm, gpu_pred_src_dstm], feed_dict={self.warped_dst:warped_dst}) else: - self.AE_convert = K.function ([self.model.warped_dst],[ self.model.pred_src_dst ]) + def AE_merge( warped_dst): + return nn.tf_sess.run ( [gpu_pred_src_dst], feed_dict={self.warped_dst:warped_dst}) + self.AE_merge = AE_merge - if self.is_training_mode: + # Loading/initializing all models/optimizers weights + for model, filename in io.progress_bar_generator(self.model_filename_list, "Initializing models"): + do_init = self.is_first_run() + + if self.pretrain_just_disabled: + if 'df' in archi: + if model == self.inter: + do_init = True + elif 'liae' in archi: + if model == self.inter_AB: + do_init = True + + if not do_init: + do_init = not model.load_weights( self.get_strpath_storage_for_file(filename) ) + + if do_init: + model.init_weights() + + # initializing sample generators + + if self.is_training: t = SampleProcessor.Types - if self.options['face_type'] == 'h': face_type = t.FACE_TYPE_HALF elif self.options['face_type'] == 'mf': @@ -526,82 +668,76 @@ class SAEHDModel(ModelBase): elif self.options['face_type'] == 'f': face_type = t.FACE_TYPE_FULL - t_mode_bgr = t.MODE_BGR if not self.pretrain else t.MODE_BGR_SHUFFLE - - training_data_src_path = self.training_data_src_path - training_data_dst_path = self.training_data_dst_path - - if self.pretrain and self.pretraining_data_path is not None: - training_data_src_path = self.pretraining_data_path - training_data_dst_path = self.pretraining_data_path + training_data_src_path = self.training_data_src_path if not self.pretrain else self.get_pretraining_data_path() + training_data_dst_path = self.training_data_dst_path if not self.pretrain else self.get_pretraining_data_path() + random_ct_samples_path=training_data_dst_path if self.options['ct_mode'] != 'none' and not self.pretrain else None + t_img_warped = t.IMG_WARPED_TRANSFORMED if self.options['random_warp'] else t.IMG_TRANSFORMED + + cpu_count = multiprocessing.cpu_count() + + src_generators_count = cpu_count // 2 + if self.options['ct_mode'] != 'none': + src_generators_count = int(src_generators_count * 1.5) + dst_generators_count = cpu_count - src_generators_count self.set_training_data_generators ([ - SampleGeneratorFace(training_data_src_path, random_ct_samples_path=training_data_dst_path if self.options['ct_mode'] != 'none' else None, - debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, scale_range=np.array([-0.05, 0.05]) ), - output_sample_types = [ {'types' : (t_img_warped, face_type, t_mode_bgr), 'resolution':resolution, 'ct_mode': self.options['ct_mode'] }, - {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution, 'ct_mode': self.options['ct_mode'] }, - {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ] - ), + SampleGeneratorFace(training_data_src_path, random_ct_samples_path=random_ct_samples_path, debug=self.is_debug(), batch_size=self.get_batch_size(), + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'resolution':resolution, 'ct_mode': self.options['ct_mode'] }, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution, 'ct_mode': self.options['ct_mode'] }, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution } ], + generators_count=src_generators_count ), - SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.batch_size, - sample_process_options=SampleProcessor.Options(random_flip=self.random_flip, ), - output_sample_types = [ {'types' : (t_img_warped, face_type, t_mode_bgr), 'resolution':resolution}, - {'types' : (t.IMG_TRANSFORMED, face_type, t_mode_bgr), 'resolution': resolution}, - {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ]) + SampleGeneratorFace(training_data_dst_path, debug=self.is_debug(), batch_size=self.get_batch_size(), + sample_process_options=SampleProcessor.Options(random_flip=self.random_flip), + output_sample_types = [ {'types' : (t_img_warped, face_type, t.MODE_BGR), 'resolution':resolution}, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_BGR), 'resolution': resolution}, + {'types' : (t.IMG_TRANSFORMED, face_type, t.MODE_M), 'resolution': resolution} ], + generators_count=dst_generators_count ) ]) #override def get_model_filename_list(self): - return self.model.get_model_filename_list ( exclude_for_pretrain=(self.pretrain and self.iter != 0) ) +self.opt_dis_model + return self.model_filename_list #override def onSave(self): - self.save_weights_safe( self.get_model_filename_list()+self.opt_dis_model ) + for model, filename in io.progress_bar_generator(self.get_model_filename_list(), "Saving", leave=False): + model.save_weights ( self.get_strpath_storage_for_file(filename) ) + #override - def on_success_train_one_iter(self): - if len(self.CA_conv_weights_list) != 0: - exec(nnlib.import_all(), locals(), globals()) - CAInitializerMP ( self.CA_conv_weights_list ) - self.CA_conv_weights_list = [] + def onTrainOneIter(self): + ( (warped_src, target_src, target_srcm), \ + (warped_dst, target_dst, target_dstm) ) = self.generate_next_samples() + + src_loss, dst_loss = self.src_dst_train (warped_src, target_src, target_srcm, warped_dst, target_dst, target_dstm) - #override - def onTrainOneIter(self, generators_samples, generators_list): - warped_src, target_src, target_srcm = generators_samples[0] - warped_dst, target_dst, target_dstm = generators_samples[1] - - feed = [warped_src, warped_dst, target_src, target_srcm, target_dst, target_dstm] - - src_loss, dst_loss, = self.src_dst_train (feed) - - if self.true_face_training: - self.D_train([warped_src, warped_dst]) - - if self.options['learn_mask']: - feed = [ warped_src, warped_dst, target_srcm, target_dstm ] - src_mask_loss, dst_mask_loss, = self.src_dst_mask_train (feed) + if self.options['true_face_training'] and not self.pretrain: + self.D_train (warped_src, warped_dst) return ( ('src_loss', src_loss), ('dst_loss', dst_loss), ) #override - def onGetPreview(self, sample): - test_S = sample[0][1][0:4] #first 4 samples - test_S_m = sample[0][2][0:4] #first 4 samples - test_D = sample[1][1][0:4] - test_D_m = sample[1][2][0:4] + def onGetPreview(self, samples): + n_samples = min(4, self.get_batch_size() ) + + ( (warped_src, target_src, target_srcm), + (warped_dst, target_dst, target_dstm) ) = \ + [ [sample[0:n_samples] for sample in sample_list ] + for sample_list in samples ] if self.options['learn_mask']: - S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] + S, D, SS, DD, DDM, SD, SDM = [ np.clip(x, 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ] DDM, SDM, = [ np.repeat (x, (3,), -1) for x in [DDM, SDM] ] else: - S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([test_S,test_D] + self.AE_view ([test_S, test_D]) ) ] + S, D, SS, DD, SD, = [ np.clip(x, 0.0, 1.0) for x in ([target_src,target_dst] + self.AE_view (target_src, target_dst) ) ] result = [] st = [] - for i in range(len(test_S)): + for i in range(n_samples): ar = S[i], SS[i], D[i], DD[i], SD[i] st.append ( np.concatenate ( ar, axis=1) ) @@ -610,28 +746,25 @@ class SAEHDModel(ModelBase): if self.options['learn_mask']: st_m = [] - for i in range(len(test_S)): - ar = S[i]*test_S_m[i], SS[i], D[i]*test_D_m[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) + for i in range(n_samples): + ar = S[i]*target_srcm[i], SS[i], D[i]*target_dstm[i], DD[i]*DDM[i], SD[i]*(DDM[i]*SDM[i]) st_m.append ( np.concatenate ( ar, axis=1) ) result += [ ('SAEHD masked', np.concatenate (st_m, axis=0 )), ] return result - def predictor_func (self, face=None, dummy_predict=False): - if dummy_predict: - self.AE_convert ([ np.zeros ( (1, self.options['resolution'], self.options['resolution'], 3), dtype=np.float32 ) ]) + def predictor_func (self, face=None): + if self.options['learn_mask']: + bgr, mask_dst_dstm, mask_src_dstm = self.AE_merge (face[np.newaxis,...]) + mask = mask_dst_dstm[0] * mask_src_dstm[0] + return bgr[0], mask[...,0] else: - if self.options['learn_mask']: - bgr, mask_dst_dstm, mask_src_dstm = self.AE_convert ([face[np.newaxis,...]]) - mask = mask_dst_dstm[0] * mask_src_dstm[0] - return bgr[0], mask[...,0] - else: - bgr, = self.AE_convert ([face[np.newaxis,...]]) - return bgr[0] + bgr, = self.AE_merge (face[np.newaxis,...]) + return bgr[0] #override - def get_ConverterConfig(self): + def get_MergerConfig(self): if self.options['face_type'] == 'h': face_type = FaceType.HALF elif self.options['face_type'] == 'mf': @@ -639,8 +772,8 @@ class SAEHDModel(ModelBase): elif self.options['face_type'] == 'f': face_type = FaceType.FULL - import converters - return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), converters.ConverterConfigMasked(face_type=face_type, + import merger + return self.predictor_func, (self.options['resolution'], self.options['resolution'], 3), merger.MergerConfigMasked(face_type=face_type, default_mode = 'overlay' if self.options['ct_mode'] != 'none' or self.options['face_style_power'] or self.options['bg_style_power'] else 'seamless', clip_hborder_mask_per=0.0625 if (face_type != FaceType.HALF) else 0, ) diff --git a/models/__init__.py b/models/__init__.py index 971091d..490e9c8 100644 --- a/models/__init__.py +++ b/models/__init__.py @@ -1,5 +1,5 @@ from .ModelBase import ModelBase -def import_model(name): - module = __import__('Model_'+name, globals(), locals(), [], 1) +def import_model(model_class_name): + module = __import__('Model_'+model_class_name, globals(), locals(), [], 1) return getattr(module, 'Model') diff --git a/models/archived_models.zip b/models/archived_models.zip deleted file mode 100644 index 02b7a0b..0000000 Binary files a/models/archived_models.zip and /dev/null differ diff --git a/nnlib/CAInitializer.py b/nnlib/CAInitializer.py deleted file mode 100644 index f81dd06..0000000 --- a/nnlib/CAInitializer.py +++ /dev/null @@ -1,112 +0,0 @@ -import numpy as np - -def _compute_fans(shape, data_format='channels_last'): - """Computes the number of input and output units for a weight shape. - # Arguments - shape: Integer shape tuple. - data_format: Image data format to use for convolution kernels. - Note that all kernels in Keras are standardized on the - `channels_last` ordering (even when inputs are set - to `channels_first`). - # Returns - A tuple of scalars, `(fan_in, fan_out)`. - # Raises - ValueError: in case of invalid `data_format` argument. - """ - if len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - elif len(shape) in {3, 4, 5}: - # Assuming convolution kernels (1D, 2D or 3D). - # TH kernel shape: (depth, input_depth, ...) - # TF kernel shape: (..., input_depth, depth) - if data_format == 'channels_first': - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - elif data_format == 'channels_last': - receptive_field_size = np.prod(shape[:-2]) - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - else: - raise ValueError('Invalid data_format: ' + data_format) - else: - # No specific assumptions. - fan_in = np.sqrt(np.prod(shape)) - fan_out = np.sqrt(np.prod(shape)) - return fan_in, fan_out - -def _create_basis(filters, size, floatx, eps_std): - if size == 1: - return np.random.normal(0.0, eps_std, (filters, size)) - - nbb = filters // size + 1 - li = [] - for i in range(nbb): - a = np.random.normal(0.0, 1.0, (size, size)) - a = _symmetrize(a) - u, _, v = np.linalg.svd(a) - li.extend(u.T.tolist()) - p = np.array(li[:filters], dtype=floatx) - return p - -def _symmetrize(a): - return a + a.T - np.diag(a.diagonal()) - -def _scale_filters(filters, variance): - c_var = np.var(filters) - p = np.sqrt(variance / c_var) - return filters * p - -def CAGenerateWeights ( shape, floatx, data_format, eps_std=0.05, seed=None ): - if seed is not None: - np.random.seed(seed) - - fan_in, fan_out = _compute_fans(shape, data_format) - variance = 2 / fan_in - - rank = len(shape) - if rank == 3: - row, stack_size, filters_size = shape - - transpose_dimensions = (2, 1, 0) - kernel_shape = (row,) - correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0]) - correct_fft = np.fft.rfft - - elif rank == 4: - row, column, stack_size, filters_size = shape - - transpose_dimensions = (2, 3, 1, 0) - kernel_shape = (row, column) - correct_ifft = np.fft.irfft2 - correct_fft = np.fft.rfft2 - - elif rank == 5: - x, y, z, stack_size, filters_size = shape - - transpose_dimensions = (3, 4, 0, 1, 2) - kernel_shape = (x, y, z) - correct_fft = np.fft.rfftn - correct_ifft = np.fft.irfftn - else: - raise ValueError('rank unsupported') - - kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape - - init = [] - for i in range(filters_size): - basis = _create_basis( - stack_size, np.prod(kernel_fourier_shape), floatx, eps_std) - basis = basis.reshape((stack_size,) + kernel_fourier_shape) - - filters = [correct_ifft(x, kernel_shape) + - np.random.normal(0, eps_std, kernel_shape) for - x in basis] - - init.append(filters) - - # Format of array is now: filters, stack, row, column - init = np.array(init) - init = _scale_filters(init, variance) - return init.transpose(transpose_dimensions) diff --git a/nnlib/DeepPortraitRelighting.py b/nnlib/DeepPortraitRelighting.py deleted file mode 100644 index 8fc6176..0000000 --- a/nnlib/DeepPortraitRelighting.py +++ /dev/null @@ -1,241 +0,0 @@ -import math -from pathlib import Path - -import cv2 -import numpy as np -import numpy.linalg as npla - - -class DeepPortraitRelighting(object): - - def __init__(self): - from nnlib import nnlib - nnlib.import_torch() - self.torch = nnlib.torch - self.torch_device = nnlib.torch_device - self.model = DeepPortraitRelighting.build_model(self.torch, self.torch_device) - - def SH_basis(self, alt, azi): - alt = alt * math.pi / 180.0 - azi = azi * math.pi / 180.0 - - x = math.cos(alt)*math.sin(azi) - y = -math.cos(alt)*math.cos(azi) - z = math.sin(alt) - - normal = np.array([x,y,z]) - - norm_X = normal[0] - norm_Y = normal[1] - norm_Z = normal[2] - - sh_basis = np.zeros((9)) - att= np.pi*np.array([1, 2.0/3.0, 1/4.0]) - sh_basis[0] = 0.5/np.sqrt(np.pi)*att[0] - - sh_basis[1] = np.sqrt(3)/2/np.sqrt(np.pi)*norm_Y*att[1] - sh_basis[2] = np.sqrt(3)/2/np.sqrt(np.pi)*norm_Z*att[1] - sh_basis[3] = np.sqrt(3)/2/np.sqrt(np.pi)*norm_X*att[1] - - sh_basis[4] = np.sqrt(15)/2/np.sqrt(np.pi)*norm_Y*norm_X*att[2] - sh_basis[5] = np.sqrt(15)/2/np.sqrt(np.pi)*norm_Y*norm_Z*att[2] - sh_basis[6] = np.sqrt(5)/4/np.sqrt(np.pi)*(3*norm_Z**2-1)*att[2] - sh_basis[7] = np.sqrt(15)/2/np.sqrt(np.pi)*norm_X*norm_Z*att[2] - sh_basis[8] = np.sqrt(15)/4/np.sqrt(np.pi)*(norm_X**2-norm_Y**2)*att[2] - return sh_basis - - #n = [0..8] - def relight(self, img, alt, azi, intensity=1.0, lighten=False): - torch = self.torch - - sh = self.SH_basis (alt, azi) - sh = (sh.reshape( (1,9,1,1) ) ).astype(np.float32) - #sh *= 0.1 - sh = torch.autograd.Variable(torch.from_numpy(sh).to(self.torch_device)) - - row, col, _ = img.shape - img = cv2.resize(img, (512, 512)) - Lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB) - - inputL = Lab[:,:,0] - outputImg, outputSH = self.model(torch.autograd.Variable(torch.from_numpy(inputL[None,None,...].astype(np.float32)/255.0).to(self.torch_device)), - sh, 0) - - outputImg = outputImg[0].cpu().data.numpy() - outputImg = outputImg.transpose((1,2,0)) - outputImg = np.squeeze(outputImg) - outputImg = np.clip (outputImg, 0.0, 1.0) - outputImg = cv2.blur(outputImg, (3,3) ) - - if not lighten: - outputImg = inputL*(1.0-intensity) + (inputL*outputImg)*intensity - else: - outputImg = inputL*(1.0-intensity) + (outputImg*255.0)*intensity - - outputImg = np.clip(outputImg, 0,255).astype(np.uint8) - - Lab[:,:,0] = outputImg - result = cv2.cvtColor(Lab, cv2.COLOR_LAB2BGR) - result = cv2.resize(result, (col, row)) - return result - - @staticmethod - def build_model(torch, torch_device): - nn = torch.nn - F = torch.nn.functional - - def conv3X3(in_planes, out_planes, stride=1): - return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) - - # define the network - class BasicBlock(nn.Module): - def __init__(self, inplanes, outplanes, batchNorm_type=0, stride=1, downsample=None): - super(BasicBlock, self).__init__() - # batchNorm_type 0 means batchnormalization - # 1 means instance normalization - self.inplanes = inplanes - self.outplanes = outplanes - self.conv1 = conv3X3(inplanes, outplanes, 1) - self.conv2 = conv3X3(outplanes, outplanes, 1) - if batchNorm_type == 0: - self.bn1 = nn.BatchNorm2d(outplanes) - self.bn2 = nn.BatchNorm2d(outplanes) - else: - self.bn1 = nn.InstanceNorm2d(outplanes) - self.bn2 = nn.InstanceNorm2d(outplanes) - - self.shortcuts = nn.Conv2d(inplanes, outplanes, kernel_size=1, stride=1, bias=False) - - def forward(self, x): - out = self.conv1(x) - out = self.bn1(out) - out = F.relu(out) - out = self.conv2(out) - out = self.bn2(out) - - if self.inplanes != self.outplanes: - out += self.shortcuts(x) - else: - out += x - - out = F.relu(out) - return out - - class HourglassBlock(nn.Module): - def __init__(self, inplane, mid_plane, middleNet, skipLayer=True): - super(HourglassBlock, self).__init__() - # upper branch - self.skipLayer = True - self.upper = BasicBlock(inplane, inplane, batchNorm_type=1) - - # lower branch - self.downSample = nn.MaxPool2d(kernel_size=2, stride=2) - self.upSample = nn.Upsample(scale_factor=2, mode='nearest') - self.low1 = BasicBlock(inplane, mid_plane) - self.middle = middleNet - self.low2 = BasicBlock(mid_plane, inplane, batchNorm_type=1) - - def forward(self, x, light, count, skip_count): - # we use count to indicate wich layer we are in - # max_count indicates the from which layer, we would use skip connections - out_upper = self.upper(x) - out_lower = self.downSample(x) - out_lower = self.low1(out_lower) - out_lower, out_middle = self.middle(out_lower, light, count+1, skip_count) - out_lower = self.low2(out_lower) - out_lower = self.upSample(out_lower) - if count >= skip_count and self.skipLayer: - out = out_lower + out_upper - else: - out = out_lower - return out, out_middle - - class lightingNet(nn.Module): - def __init__(self, ncInput, ncOutput, ncMiddle): - super(lightingNet, self).__init__() - self.ncInput = ncInput - self.ncOutput = ncOutput - self.ncMiddle = ncMiddle - self.predict_FC1 = nn.Conv2d(self.ncInput, self.ncMiddle, kernel_size=1, stride=1, bias=False) - self.predict_relu1 = nn.PReLU() - self.predict_FC2 = nn.Conv2d(self.ncMiddle, self.ncOutput, kernel_size=1, stride=1, bias=False) - - self.post_FC1 = nn.Conv2d(self.ncOutput, self.ncMiddle, kernel_size=1, stride=1, bias=False) - self.post_relu1 = nn.PReLU() - self.post_FC2 = nn.Conv2d(self.ncMiddle, self.ncInput, kernel_size=1, stride=1, bias=False) - self.post_relu2 = nn.ReLU() # to be consistance with the original feature - - def forward(self, innerFeat, target_light, count, skip_count): - x = innerFeat[:,0:self.ncInput,:,:] # lighting feature - _, _, row, col = x.shape - # predict lighting - feat = x.mean(dim=(2,3), keepdim=True) - light = self.predict_relu1(self.predict_FC1(feat)) - light = self.predict_FC2(light) - upFeat = self.post_relu1(self.post_FC1(target_light)) - upFeat = self.post_relu2(self.post_FC2(upFeat)) - upFeat = upFeat.repeat((1,1,row, col)) - innerFeat[:,0:self.ncInput,:,:] = upFeat - return innerFeat, light#light - - - class HourglassNet(nn.Module): - def __init__(self, baseFilter = 16, gray=True): - super(HourglassNet, self).__init__() - - self.ncLight = 27 # number of channels for input to lighting network - self.baseFilter = baseFilter - - # number of channles for output of lighting network - if gray: - self.ncOutLight = 9 # gray: channel is 1 - else: - self.ncOutLight = 27 # color: channel is 3 - - self.ncPre = self.baseFilter # number of channels for pre-convolution - - # number of channels - self.ncHG3 = self.baseFilter - self.ncHG2 = 2*self.baseFilter - self.ncHG1 = 4*self.baseFilter - self.ncHG0 = 8*self.baseFilter + self.ncLight - - self.pre_conv = nn.Conv2d(1, self.ncPre, kernel_size=5, stride=1, padding=2) - self.pre_bn = nn.BatchNorm2d(self.ncPre) - - self.light = lightingNet(self.ncLight, self.ncOutLight, 128) - self.HG0 = HourglassBlock(self.ncHG1, self.ncHG0, self.light) - self.HG1 = HourglassBlock(self.ncHG2, self.ncHG1, self.HG0) - self.HG2 = HourglassBlock(self.ncHG3, self.ncHG2, self.HG1) - self.HG3 = HourglassBlock(self.ncPre, self.ncHG3, self.HG2) - - self.conv_1 = nn.Conv2d(self.ncPre, self.ncPre, kernel_size=3, stride=1, padding=1) - self.bn_1 = nn.BatchNorm2d(self.ncPre) - self.conv_2 = nn.Conv2d(self.ncPre, self.ncPre, kernel_size=1, stride=1, padding=0) - self.bn_2 = nn.BatchNorm2d(self.ncPre) - self.conv_3 = nn.Conv2d(self.ncPre, self.ncPre, kernel_size=1, stride=1, padding=0) - self.bn_3 = nn.BatchNorm2d(self.ncPre) - - self.output = nn.Conv2d(self.ncPre, 1, kernel_size=1, stride=1, padding=0) - - def forward(self, x, target_light, skip_count): - feat = self.pre_conv(x) - - feat = F.relu(self.pre_bn(feat)) - # get the inner most features - feat, out_light = self.HG3(feat, target_light, 0, skip_count) - #return feat, out_light - - feat = F.relu(self.bn_1(self.conv_1(feat))) - feat = F.relu(self.bn_2(self.conv_2(feat))) - feat = F.relu(self.bn_3(self.conv_3(feat))) - out_img = self.output(feat) - out_img = torch.sigmoid(out_img) - return out_img, out_light - - model = HourglassNet() - t_dict = torch.load( Path(__file__).parent / 'DeepPortraitRelighting.t7' ) - model.load_state_dict(t_dict) - model.to( torch_device ) - model.train(False) - return model diff --git a/nnlib/DeepPortraitRelighting.t7 b/nnlib/DeepPortraitRelighting.t7 deleted file mode 100644 index 943b172..0000000 Binary files a/nnlib/DeepPortraitRelighting.t7 and /dev/null differ diff --git a/nnlib/FUNIT.py b/nnlib/FUNIT.py deleted file mode 100644 index 0bd5006..0000000 --- a/nnlib/FUNIT.py +++ /dev/null @@ -1,333 +0,0 @@ -from pathlib import Path - -import numpy as np - -from interact import interact as io -from nnlib import nnlib - -""" -My port of FUNIT: Few-Shot Unsupervised Image-to-Image Translation to pure keras. -original repo: https://github.com/NVlabs/FUNIT/ -""" -class FUNIT(object): - VERSION = 1 - def __init__ (self, face_type_str, - batch_size, - encoder_nf=64, - encoder_downs=2, - encoder_res_blk=2, - class_downs=4, - class_nf=64, - class_latent=64, - mlp_blks=2, - dis_nf=64, - dis_res_blks=10, - num_classes=2, - subpixel_decoder=True, - initialize_weights=True, - - load_weights_locally=False, - weights_file_root=None, - - is_training=True, - tf_cpu_mode=0, - ): - exec( nnlib.import_all(), locals(), globals() ) - - self.batch_size = batch_size - bgr_shape = (None, None, 3) - label_shape = (1,) - - self.enc_content = modelify ( FUNIT.ContentEncoderFlow(downs=encoder_downs, nf=encoder_nf, n_res_blks=encoder_res_blk) ) ( Input(bgr_shape) ) - self.enc_class_model = modelify ( FUNIT.ClassModelEncoderFlow(downs=class_downs, nf=class_nf, latent_dim=class_latent) ) ( Input(bgr_shape) ) - self.decoder = modelify ( FUNIT.DecoderFlow(ups=encoder_downs, n_res_blks=encoder_res_blk, mlp_blks=mlp_blks, subpixel_decoder=subpixel_decoder ) ) \ - ( [ Input(K.int_shape(self.enc_content.outputs[0])[1:], name="decoder_input_1"), - Input(K.int_shape(self.enc_class_model.outputs[0])[1:], name="decoder_input_2") - ] ) - - self.dis = modelify ( FUNIT.DiscriminatorFlow(nf=dis_nf, n_res_blks=dis_res_blks, num_classes=num_classes) ) (Input(bgr_shape)) - - self.G_opt = RMSprop(lr=0.0001, decay=0.0001, tf_cpu_mode=tf_cpu_mode) - self.D_opt = RMSprop(lr=0.0001, decay=0.0001, tf_cpu_mode=tf_cpu_mode) - - xa = Input(bgr_shape, name="xa") - la = Input(label_shape, dtype="int32", name="la") - - xb = Input(bgr_shape, name="xb") - lb = Input(label_shape, dtype="int32", name="lb") - - s_xa_one = Input( ( K.int_shape(self.enc_class_model.outputs[0])[-1],), name="s_xa_input") - - c_xa = self.enc_content(xa) - - s_xa = self.enc_class_model(xa) - s_xb = self.enc_class_model(xb) - - s_xa_mean = K.mean(s_xa, axis=0) - - xr = self.decoder ([c_xa,s_xa]) - xt = self.decoder ([c_xa,s_xb]) - xr_one = self.decoder ([c_xa,s_xa_one]) - - d_xr, d_xr_feat = self.dis(xr) - d_xt, d_xt_feat = self.dis(xt) - - d_xa, d_xa_feat = self.dis(xa) - d_xb, d_xb_feat = self.dis(xb) - - def dis_gather(x,l): - tensors = [] - for i in range(self.batch_size): - t = x[i:i+1,:,:, l[i,0]] - tensors += [t] - return tensors - - def dis_gather_batch_mean(x,l, func=None): - x_shape = K.shape(x) - b,h,w,c = x_shape[0],x_shape[1],x_shape[2],x_shape[3] - b,h,w,c = [ K.cast(x, K.floatx()) for x in [b,h,w,c] ] - - tensors = dis_gather(x,l) - if func is not None: - tensors = [func(t) for t in tensors] - - return K.sum(tensors, axis=[1,2,3]) / (h*w) - - def dis_gather_mean(x,l, func=None, acc_func=None): - x_shape = K.shape(x) - b,h,w,c = x_shape[0],x_shape[1],x_shape[2],x_shape[3] - b,h,w,c = [ K.cast(x, K.floatx()) for x in [b,h,w,c] ] - - tensors = dis_gather(x,l) - - if acc_func is not None: - acc = [] - for t in tensors: - acc += [ K.sum( K.cast( acc_func(t), K.floatx() )) ] - acc = K.cast( K.sum(acc), K.floatx() ) / (b*h*w) - else: - acc = None - - if func is not None: - tensors = [func(t) for t in tensors] - - return K.sum(tensors, axis=[1,2,3] ) / (h*w), acc - - d_xr_la, d_xr_la_acc = dis_gather_mean(d_xr, la, acc_func=lambda x: x >= 0) - d_xt_lb, d_xt_lb_acc = dis_gather_mean(d_xt, lb, acc_func=lambda x: x >= 0) - - d_xb_lb = dis_gather_batch_mean(d_xb, lb) - - d_xb_lb_real, d_xb_lb_real_acc = dis_gather_mean(d_xb, lb, lambda x: K.relu(1.0-x), acc_func=lambda x: x >= 0) - d_xt_lb_fake, d_xt_lb_fake_acc = dis_gather_mean(d_xt, lb, lambda x: K.relu(1.0+x), acc_func=lambda x: x < 0) - - - G_c_rec = K.mean(K.abs(K.mean(d_xr_feat, axis=[1,2]) - K.mean(d_xa_feat, axis=[1,2])), axis=1 ) #* 1.0 - G_m_rec = K.mean(K.abs(K.mean(d_xt_feat, axis=[1,2]) - K.mean(d_xb_feat, axis=[1,2])), axis=1 ) #* 1.0 - G_x_rec = 0.1 * K.mean(K.abs(xr-xa), axis=[1,2,3]) - - G_loss = (-d_xr_la-d_xt_lb)*0.5 + G_x_rec + G_c_rec + G_m_rec - - G_weights = self.enc_class_model.trainable_weights + self.enc_content.trainable_weights + self.decoder.trainable_weights - ###### - - D_real = d_xb_lb_real #1.0 * - D_fake = d_xt_lb_fake #1.0 * - - l_reg = 10 * K.sum( K.gradients( d_xb_lb, xb )[0] ** 2 , axis=[1,2,3] ) #/ self.batch_size ) - - D_loss = D_real + D_fake + l_reg - - D_weights = self.dis.trainable_weights - - self.G_train = K.function ([xa, la, xb, lb],[K.mean(G_loss)], self.G_opt.get_updates(G_loss, G_weights) ) - - self.D_train = K.function ([xa, la, xb, lb],[K.mean(D_loss)], self.D_opt.get_updates(D_loss, D_weights) ) - self.get_average_class_code = K.function ([xa],[s_xa_mean]) - - self.G_convert = K.function ([xa,s_xa_one],[xr_one]) - - if initialize_weights: - #gather weights from layers for initialization - weights_list = [] - for model, _ in self.get_model_filename_list(): - if type(model) == keras.models.Model: - for layer in model.layers: - if type(layer) == FUNITAdain: - weights_list += [ x for x in layer.weights if 'kernel' in x.name ] - elif type(layer) == keras.layers.Conv2D or type(layer) == keras.layers.Dense: - weights_list += [ layer.weights[0] ] - - initer = keras.initializers.he_normal() - for w in weights_list: - K.set_value( w, K.get_value(initer(K.int_shape(w))) ) - - - if load_weights_locally: - pass - #f weights_file_root is not None: - # weights_file_root = Path(weights_file_root) - #lse: - # weights_file_root = Path(__file__).parent - #elf.weights_path = weights_file_root / ('FUNIT_%s.h5' % (face_type_str) ) - #f load_weights: - # self.model.load_weights (str(self.weights_path)) - - - - def get_model_filename_list(self): - return [[self.enc_class_model, 'enc_class_model.h5'], - [self.enc_content, 'enc_content.h5'], - [self.decoder, 'decoder.h5'], - [self.dis, 'dis.h5'], - [self.G_opt, 'G_opt.h5'], - [self.D_opt, 'D_opt.h5'], - ] - - def train(self, xa,la,xb,lb): - D_loss, = self.D_train ([xa,la,xb,lb]) - G_loss, = self.G_train ([xa,la,xb,lb]) - return G_loss, D_loss - - def get_average_class_code(self, *args, **kwargs): - return self.get_average_class_code(*args, **kwargs) - - def convert(self, *args, **kwargs): - return self.G_convert(*args, **kwargs) - - @staticmethod - def ContentEncoderFlow(downs=2, nf=64, n_res_blks=2): - exec (nnlib.import_all(), locals(), globals()) - - def ResBlock(dim): - def func(input): - x = input - x = Conv2D(dim, 3, strides=1, padding='same')(x) - x = InstanceNormalization()(x) - x = ReLU()(x) - x = Conv2D(dim, 3, strides=1, padding='same')(x) - x = InstanceNormalization()(x) - - return Add()([x,input]) - return func - - def func(x): - x = Conv2D (nf, kernel_size=7, strides=1, padding='same')(x) - x = InstanceNormalization()(x) - x = ReLU()(x) - for i in range(downs): - x = Conv2D (nf * 2**(i+1), kernel_size=4, strides=2, padding='valid')(ZeroPadding2D(1)(x)) - x = InstanceNormalization()(x) - x = ReLU()(x) - for i in range(n_res_blks): - x = ResBlock( nf * 2**downs )(x) - return x - - return func - - @staticmethod - def ClassModelEncoderFlow(downs=4, nf=64, latent_dim=64): - exec (nnlib.import_all(), locals(), globals()) - - def func(x): - x = Conv2D (nf, kernel_size=7, strides=1, padding='same', activation='relu')(x) - for i in range(downs): - x = Conv2D (nf * min ( 4, 2**(i+1) ), kernel_size=4, strides=2, padding='valid', activation='relu')(ZeroPadding2D(1)(x)) - x = GlobalAveragePooling2D()(x) - x = Dense(latent_dim)(x) - return x - - return func - - @staticmethod - def DecoderFlow(ups, n_res_blks=2, mlp_blks=2, subpixel_decoder=False ): - exec (nnlib.import_all(), locals(), globals()) - - def ResBlock(dim): - def func(input): - inp, mlp = input - x = inp - x = Conv2D(dim, 3, strides=1, padding='same')(x) - x = FUNITAdain(kernel_initializer='he_normal')([x,mlp]) - x = ReLU()(x) - x = Conv2D(dim, 3, strides=1, padding='same')(x) - x = FUNITAdain(kernel_initializer='he_normal')([x,mlp]) - return Add()([x,inp]) - return func - - def func(inputs): - x , class_code = inputs - - nf = K.int_shape(x)[-1] - - ### MLP block inside decoder - mlp = class_code - for i in range(mlp_blks): - mlp = Dense(nf, activation='relu')(mlp) - - for i in range(n_res_blks): - x = ResBlock(nf)( [x,mlp] ) - - for i in range(ups): - - if subpixel_decoder: - x = Conv2D (4* (nf // 2**(i+1)), kernel_size=3, strides=1, padding='same')(x) - x = SubpixelUpscaler()(x) - else: - x = UpSampling2D()(x) - x = Conv2D (nf // 2**(i+1), kernel_size=5, strides=1, padding='same')(x) - - x = InstanceNormalization()(x) - x = ReLU()(x) - - rgb = Conv2D (3, kernel_size=7, strides=1, padding='same', activation='tanh')(x) - return rgb - - return func - - - - @staticmethod - def DiscriminatorFlow(nf, n_res_blks, num_classes ): - exec (nnlib.import_all(), locals(), globals()) - - n_layers = n_res_blks // 2 - - def ActFirstResBlock(fout): - def func(x): - fin = K.int_shape(x)[-1] - fhid = min(fin, fout) - - if fin != fout: - x_s = Conv2D (fout, kernel_size=1, strides=1, padding='valid', use_bias=False)(x) - else: - x_s = x - - x = LeakyReLU(0.2)(x) - x = Conv2D (fhid, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x)) - x = LeakyReLU(0.2)(x) - x = Conv2D (fout, kernel_size=3, strides=1, padding='valid')(ZeroPadding2D(1)(x)) - return Add()([x_s, x]) - - return func - - def func( x ): - l_nf = nf - x = Conv2D (l_nf, kernel_size=7, strides=1, padding='valid')(ZeroPadding2D(3)(x)) - for i in range(n_layers-1): - l_nf_out = min( l_nf*2, 1024 ) - x = ActFirstResBlock(l_nf)(x) - x = ActFirstResBlock(l_nf_out)(x) - x = AveragePooling2D( pool_size=3, strides=2, padding='valid' )(ZeroPadding2D(1)(x)) - l_nf = min( l_nf*2, 1024 ) - - l_nf_out = min( l_nf*2, 1024 ) - x = ActFirstResBlock(l_nf)(x) - feat = x = ActFirstResBlock(l_nf_out)(x) - - x = LeakyReLU(0.2)(x) - x = Conv2D (num_classes, kernel_size=1, strides=1, padding='valid')(x) - - return x, feat - - return func \ No newline at end of file diff --git a/nnlib/TernausNet.py b/nnlib/TernausNet.py deleted file mode 100644 index 9016ead..0000000 --- a/nnlib/TernausNet.py +++ /dev/null @@ -1,157 +0,0 @@ -import os -import pickle -from functools import partial -from pathlib import Path - -import cv2 -import numpy as np - -from interact import interact as io -from nnlib import nnlib - -""" -Dataset used to train located in official DFL mega.nz folder -https://mega.nz/#F!b9MzCK4B!zEAG9txu7uaRUjXz9PtBqg - -using https://github.com/ternaus/TernausNet -TernausNet: U-Net with VGG11 Encoder Pre-Trained on ImageNet for Image Segmentation -""" - -class TernausNet(object): - VERSION = 1 - def __init__ (self, name, resolution, face_type_str, load_weights=True, weights_file_root=None, training=False): - exec( nnlib.import_all(), locals(), globals() ) - - self.model = TernausNet.BuildModel(resolution, ngf=64) - - if weights_file_root is not None: - weights_file_root = Path(weights_file_root) - else: - weights_file_root = Path(__file__).parent - - self.weights_path = weights_file_root / ('%s_%d_%s.h5' % (name, resolution, face_type_str) ) - - if load_weights: - self.model.load_weights (str(self.weights_path)) - else: - if training: - try: - with open( Path(__file__).parent / 'vgg11_enc_weights.npy', 'rb' ) as f: - d = pickle.loads (f.read()) - - for i in [0,3,6,8,11,13,16,18]: - s = 'features.%d' % i - - self.model.get_layer (s).set_weights ( d[s] ) - except: - io.log_err("Unable to load VGG11 pretrained weights from vgg11_enc_weights.npy") - - conv_weights_list = [] - for layer in self.model.layers: - if 'CA.' in layer.name: - conv_weights_list += [layer.weights[0]] #Conv2D kernel_weights - CAInitializerMP ( conv_weights_list ) - - if training: - inp_t = Input ( (resolution, resolution, 3) ) - real_t = Input ( (resolution, resolution, 1) ) - out_t = self.model(inp_t) - - loss = K.mean(10*K.binary_crossentropy(real_t,out_t) ) - - out_t_diff1 = out_t[:, 1:, :, :] - out_t[:, :-1, :, :] - out_t_diff2 = out_t[:, :, 1:, :] - out_t[:, :, :-1, :] - - total_var_loss = K.mean( 0.1*K.abs(out_t_diff1), axis=[1, 2, 3] ) + K.mean( 0.1*K.abs(out_t_diff2), axis=[1, 2, 3] ) - - opt = Adam(lr=0.0001, beta_1=0.5, beta_2=0.999, tf_cpu_mode=2) - - self.train_func = K.function ( [inp_t, real_t], [K.mean(loss)], opt.get_updates( [loss], self.model.trainable_weights) ) - - - def __enter__(self): - return self - - def __exit__(self, exc_type=None, exc_value=None, traceback=None): - return False #pass exception between __enter__ and __exit__ to outter level - - def save_weights(self): - self.model.save_weights (str(self.weights_path)) - - def train(self, inp, real): - loss, = self.train_func ([inp, real]) - return loss - - def extract (self, input_image, is_input_tanh=False): - input_shape_len = len(input_image.shape) - if input_shape_len == 3: - input_image = input_image[np.newaxis,...] - - result = np.clip ( self.model.predict( [input_image] ), 0, 1.0 ) - result[result < 0.1] = 0 #get rid of noise - - if input_shape_len == 3: - result = result[0] - - return result - - @staticmethod - def BuildModel ( resolution, ngf=64): - exec( nnlib.import_all(), locals(), globals() ) - inp = Input ( (resolution,resolution,3) ) - x = inp - x = TernausNet.Flow(ngf=ngf)(x) - model = Model(inp,x) - return model - - @staticmethod - def Flow(ngf=64): - exec( nnlib.import_all(), locals(), globals() ) - - def func(input): - x = input - - x0 = x = Conv2D(ngf, kernel_size=3, strides=1, padding='same', activation='relu', name='features.0')(x) - x = BlurPool(filt_size=3)(x) - - x1 = x = Conv2D(ngf*2, kernel_size=3, strides=1, padding='same', activation='relu', name='features.3')(x) - x = BlurPool(filt_size=3)(x) - - x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.6')(x) - x2 = x = Conv2D(ngf*4, kernel_size=3, strides=1, padding='same', activation='relu', name='features.8')(x) - x = BlurPool(filt_size=3)(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.11')(x) - x3 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.13')(x) - x = BlurPool(filt_size=3)(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.16')(x) - x4 = x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', activation='relu', name='features.18')(x) - x = BlurPool(filt_size=3)(x) - - x = Conv2D(ngf*8, kernel_size=3, strides=1, padding='same', name='CA.1')(x) - - x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.2') (x) - x = Concatenate(axis=3)([ x, x4]) - x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.3') (x) - - x = Conv2DTranspose (ngf*4, 3, strides=2, padding='same', activation='relu', name='CA.4') (x) - x = Concatenate(axis=3)([ x, x3]) - x = Conv2D (ngf*8, 3, strides=1, padding='same', activation='relu', name='CA.5') (x) - - x = Conv2DTranspose (ngf*2, 3, strides=2, padding='same', activation='relu', name='CA.6') (x) - x = Concatenate(axis=3)([ x, x2]) - x = Conv2D (ngf*4, 3, strides=1, padding='same', activation='relu', name='CA.7') (x) - - x = Conv2DTranspose (ngf, 3, strides=2, padding='same', activation='relu', name='CA.8') (x) - x = Concatenate(axis=3)([ x, x1]) - x = Conv2D (ngf*2, 3, strides=1, padding='same', activation='relu', name='CA.9') (x) - - x = Conv2DTranspose (ngf // 2, 3, strides=2, padding='same', activation='relu', name='CA.10') (x) - x = Concatenate(axis=3)([ x, x0]) - x = Conv2D (ngf, 3, strides=1, padding='same', activation='relu', name='CA.11') (x) - - return Conv2D(1, 3, strides=1, padding='same', activation='sigmoid', name='CA.12')(x) - - - return func diff --git a/nnlib/VGGFace.py b/nnlib/VGGFace.py deleted file mode 100644 index 60a1de4..0000000 --- a/nnlib/VGGFace.py +++ /dev/null @@ -1,51 +0,0 @@ -from nnlib import nnlib - -def VGGFace(): - exec(nnlib.import_all(), locals(), globals()) - - img_input = Input(shape=(224,224,3) ) - - # Block 1 - x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_1')(img_input) - x = Conv2D(64, (3, 3), activation='relu', padding='same', name='conv1_2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool1')(x) - - # Block 2 - x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_1')(x) - x = Conv2D(128, (3, 3), activation='relu', padding='same', name='conv2_2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool2')(x) - - # Block 3 - x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_1')(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_2')(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', name='conv3_3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool3')(x) - - # Block 4 - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_1')(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_2')(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv4_3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool4')(x) - - # Block 5 - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_1')(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_2')(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', name='conv5_3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='pool5')(x) - - - # Classification block - x = Flatten(name='flatten')(x) - x = Dense(4096, name='fc6')(x) - x = Activation('relu', name='fc6/relu')(x) - x = Dense(4096, name='fc7')(x) - x = Activation('relu', name='fc7/relu')(x) - x = Dense(2622, name='fc8')(x) - x = Activation('softmax', name='fc8/softmax')(x) - - model = Model(img_input, x, name='vggface_vgg16') - weights_path = keras.utils.data_utils.get_file('rcmalli_vggface_tf_vgg16.h5', - 'https://github.com/rcmalli/keras-vggface/releases/download/v2.0/rcmalli_vggface_tf_vgg16.h5') - model.load_weights(weights_path, by_name=True) - - return model \ No newline at end of file diff --git a/nnlib/__init__.py b/nnlib/__init__.py deleted file mode 100644 index 6876185..0000000 --- a/nnlib/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .nnlib import nnlib -from .FUNIT import FUNIT -from .TernausNet import TernausNet -from .VGGFace import VGGFace -from .DeepPortraitRelighting import DeepPortraitRelighting \ No newline at end of file diff --git a/nnlib/device.py b/nnlib/device.py deleted file mode 100644 index 0264842..0000000 --- a/nnlib/device.py +++ /dev/null @@ -1,311 +0,0 @@ -import sys -import ctypes -import os -import json -import numpy as np - -#you can set DFL_TF_MIN_REQ_CAP manually for your build -#the reason why we cannot check tensorflow.version is it requires import tensorflow -tf_min_req_cap = int(os.environ.get("DFL_TF_MIN_REQ_CAP", 35)) - -class device: - backend = None - class Config(): - force_gpu_idx = -1 - multi_gpu = False - force_gpu_idxs = None - choose_worst_gpu = False - gpu_idxs = [] - gpu_names = [] - gpu_compute_caps = [] - gpu_vram_gb = [] - allow_growth = True - use_fp16 = False - cpu_only = False - backend = None - def __init__ (self, force_gpu_idx = -1, - multi_gpu = False, - force_gpu_idxs = None, - choose_worst_gpu = False, - allow_growth = True, - use_fp16 = False, - cpu_only = False, - **in_options): - - self.backend = device.backend - self.use_fp16 = use_fp16 - self.cpu_only = cpu_only - - if not self.cpu_only: - self.cpu_only = (self.backend == "tensorflow-cpu") - - if not self.cpu_only: - self.force_gpu_idx = force_gpu_idx - self.multi_gpu = multi_gpu - self.force_gpu_idxs = force_gpu_idxs - self.choose_worst_gpu = choose_worst_gpu - self.allow_growth = allow_growth - - self.gpu_idxs = [] - - if force_gpu_idxs is not None: - for idx in force_gpu_idxs.split(','): - idx = int(idx) - if device.isValidDeviceIdx(idx): - self.gpu_idxs.append(idx) - else: - gpu_idx = force_gpu_idx if (force_gpu_idx >= 0 and device.isValidDeviceIdx(force_gpu_idx)) else device.getBestValidDeviceIdx() if not choose_worst_gpu else device.getWorstValidDeviceIdx() - if gpu_idx != -1: - if self.multi_gpu: - self.gpu_idxs = device.getDeviceIdxsEqualModel( gpu_idx ) - if len(self.gpu_idxs) <= 1: - self.multi_gpu = False - else: - self.gpu_idxs = [gpu_idx] - - self.cpu_only = (len(self.gpu_idxs) == 0) - - - if not self.cpu_only: - self.gpu_names = [] - self.gpu_compute_caps = [] - self.gpu_vram_gb = [] - for gpu_idx in self.gpu_idxs: - self.gpu_names += [device.getDeviceName(gpu_idx)] - self.gpu_compute_caps += [ device.getDeviceComputeCapability(gpu_idx) ] - self.gpu_vram_gb += [ device.getDeviceVRAMTotalGb(gpu_idx) ] - self.cpu_only = (len(self.gpu_idxs) == 0) - else: - self.gpu_names = ['CPU'] - self.gpu_compute_caps = [99] - self.gpu_vram_gb = [0] - - if self.cpu_only: - self.backend = "tensorflow-cpu" - - @staticmethod - def getValidDeviceIdxsEnumerator(): - if device.backend == "plaidML": - for i in range(plaidML_devices_count): - yield i - elif device.backend == "tensorflow": - for dev in cuda_devices: - yield dev['index'] - - @staticmethod - def getValidDevicesWithAtLeastTotalMemoryGB(totalmemsize_gb): - result = [] - if device.backend == "plaidML": - for i in device.getValidDeviceIdxsEnumerator(): - if plaidML_devices[i]['globalMemSize'] >= totalmemsize_gb*1024*1024*1024: - result.append (i) - elif device.backend == "tensorflow": - for dev in cuda_devices: - if dev['total_mem'] >= totalmemsize_gb*1024*1024*1024: - result.append ( dev['index'] ) - - return result - - @staticmethod - def getValidDevicesIdxsWithNamesList(): - if device.backend == "plaidML": - return [ (i, plaidML_devices[i]['description'] ) for i in device.getValidDeviceIdxsEnumerator() ] - elif device.backend == "tensorflow": - return [ ( dev['index'], dev['name'] ) for dev in cuda_devices ] - elif device.backend == "tensorflow-cpu": - return [ (0, 'CPU') ] - - @staticmethod - def getDeviceVRAMTotalGb (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['globalMemSize'] / (1024*1024*1024) - elif device.backend == "tensorflow": - for dev in cuda_devices: - if idx == dev['index']: - return round ( dev['total_mem'] / (1024*1024*1024) ) - return 0 - - @staticmethod - def getBestValidDeviceIdx(): - if device.backend == "plaidML": - idx = -1 - idx_mem = 0 - for i in device.getValidDeviceIdxsEnumerator(): - total = plaidML_devices[i]['globalMemSize'] - if total > idx_mem: - idx = i - idx_mem = total - - return idx - elif device.backend == "tensorflow": - idx = -1 - idx_mem = 0 - for dev in cuda_devices: - if dev['total_mem'] > idx_mem: - idx = dev['index'] - idx_mem = dev['total_mem'] - - return idx - - @staticmethod - def getWorstValidDeviceIdx(): - if device.backend == "plaidML": - idx = -1 - idx_mem = sys.maxsize - for i in device.getValidDeviceIdxsEnumerator(): - total = plaidML_devices[i]['globalMemSize'] - if total < idx_mem: - idx = i - idx_mem = total - - return idx - elif device.backend == "tensorflow": - idx = -1 - idx_mem = sys.maxsize - for dev in cuda_devices: - if dev['total_mem'] < idx_mem: - idx = dev['index'] - idx_mem = dev['total_mem'] - - return idx - - @staticmethod - def isValidDeviceIdx(idx): - if device.backend == "plaidML": - return idx in [*device.getValidDeviceIdxsEnumerator()] - elif device.backend == "tensorflow": - for dev in cuda_devices: - if idx == dev['index']: - return True - return False - - @staticmethod - def getDeviceIdxsEqualModel(idx): - if device.backend == "plaidML": - result = [] - idx_name = plaidML_devices[idx]['description'] - for i in device.getValidDeviceIdxsEnumerator(): - if plaidML_devices[i]['description'] == idx_name: - result.append (i) - - return result - elif device.backend == "tensorflow": - result = [] - idx_name = device.getDeviceName(idx) - for dev in cuda_devices: - if dev['name'] == idx_name: - result.append ( dev['index'] ) - - - return result - - @staticmethod - def getDeviceName (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['description'] - elif device.backend == "tensorflow": - for dev in cuda_devices: - if dev['index'] == idx: - return dev['name'] - - return None - - @staticmethod - def getDeviceID (idx): - if device.backend == "plaidML": - if idx < plaidML_devices_count: - return plaidML_devices[idx]['id'].decode() - - return None - - @staticmethod - def getDeviceComputeCapability(idx): - if device.backend == "plaidML": - return 99 - elif device.backend == "tensorflow": - for dev in cuda_devices: - if dev['index'] == idx: - return dev['cc'] - return 0 - -plaidML_build = os.environ.get("DFL_PLAIDML_BUILD", "0") == "1" -plaidML_devices = None -plaidML_devices_count = 0 -cuda_devices = None - -if plaidML_build: - if plaidML_devices is None: - plaidML_devices = [] - # Using plaidML OpenCL backend to determine system devices - try: - os.environ['PLAIDML_EXPERIMENTAL'] = 'false' #this enables work plaidML without run 'plaidml-setup' - import plaidml - ctx = plaidml.Context() - for d in plaidml.devices(ctx, return_all=True)[0]: - details = json.loads(d.details) - if details['type'] == 'CPU': #skipping opencl-CPU - continue - plaidML_devices += [ {'id':d.id, - 'globalMemSize' : int(details['globalMemSize']), - 'description' : d.description.decode() - }] - ctx.shutdown() - except: - pass - plaidML_devices_count = len(plaidML_devices) - if plaidML_devices_count != 0: - device.backend = "plaidML" -else: - if cuda_devices is None: - cuda_devices = [] - libnames = ('libcuda.so', 'libcuda.dylib', 'nvcuda.dll') - cuda = None - for libname in libnames: - try: - cuda = ctypes.CDLL(libname) - except: - continue - else: - break - - if cuda is not None: - nGpus = ctypes.c_int() - name = b' ' * 200 - cc_major = ctypes.c_int() - cc_minor = ctypes.c_int() - freeMem = ctypes.c_size_t() - totalMem = ctypes.c_size_t() - - result = ctypes.c_int() - device_t = ctypes.c_int() - context = ctypes.c_void_p() - error_str = ctypes.c_char_p() - - if cuda.cuInit(0) == 0 and \ - cuda.cuDeviceGetCount(ctypes.byref(nGpus)) == 0: - for i in range(nGpus.value): - if cuda.cuDeviceGet(ctypes.byref(device_t), i) != 0 or \ - cuda.cuDeviceGetName(ctypes.c_char_p(name), len(name), device_t) != 0 or \ - cuda.cuDeviceComputeCapability(ctypes.byref(cc_major), ctypes.byref(cc_minor), device_t) != 0: - continue - - if cuda.cuCtxCreate_v2(ctypes.byref(context), 0, device_t) == 0: - if cuda.cuMemGetInfo_v2(ctypes.byref(freeMem), ctypes.byref(totalMem)) == 0: - cc = cc_major.value * 10 + cc_minor.value - if cc >= tf_min_req_cap: - cuda_devices.append ( {'index':i, - 'name':name.split(b'\0', 1)[0].decode(), - 'total_mem':totalMem.value, - 'free_mem':freeMem.value, - 'cc':cc - } - ) - cuda.cuCtxDetach(context) - - if len(cuda_devices) != 0: - device.backend = "tensorflow" - -if device.backend is None: - device.backend = "tensorflow-cpu" diff --git a/nnlib/nnlib.py b/nnlib/nnlib.py deleted file mode 100644 index 975cf94..0000000 --- a/nnlib/nnlib.py +++ /dev/null @@ -1,1904 +0,0 @@ -import contextlib -import multiprocessing -import os -import sys -from pathlib import Path - -import numpy as np - -from interact import interact as io -from joblib import Subprocessor -from utils import std_utils - -from .CAInitializer import CAGenerateWeights -from .device import device - - -class nnlib(object): - device = device #forwards nnlib.devicelib to device in order to use nnlib as standalone lib - DeviceConfig = device.Config - active_DeviceConfig = DeviceConfig() #default is one best GPU - - backend = "" - - dlib = None - - torch = None - torch_device = None - - keras = None - keras_contrib = None - - tf = None - tf_sess = None - tf_sess_config = None - - PML = None - PMLK = None - PMLTile= None - - code_import_keras = None - code_import_keras_contrib = None - code_import_all = None - - code_import_dlib = None - - - ResNet = None - UNet = None - UNetTemporalPredictor = None - NLayerDiscriminator = None - - code_import_keras_string = \ -""" -keras = nnlib.keras -K = keras.backend -KL = keras.layers - -Input = KL.Input - -Dense = KL.Dense -Conv2D = KL.Conv2D -WScaleConv2DLayer = nnlib.WScaleConv2DLayer -Conv2DTranspose = KL.Conv2DTranspose -EqualConv2D = nnlib.EqualConv2D -SeparableConv2D = KL.SeparableConv2D -DepthwiseConv2D = KL.DepthwiseConv2D -MaxPooling2D = KL.MaxPooling2D -AveragePooling2D = KL.AveragePooling2D -GlobalAveragePooling2D = KL.GlobalAveragePooling2D -UpSampling2D = KL.UpSampling2D -BatchNormalization = KL.BatchNormalization -PixelNormalization = nnlib.PixelNormalization - -Activation = KL.Activation -LeakyReLU = KL.LeakyReLU -ELU = KL.ELU -GeLU = nnlib.GeLU -ReLU = KL.ReLU -PReLU = KL.PReLU -tanh = KL.Activation('tanh') -sigmoid = KL.Activation('sigmoid') -Dropout = KL.Dropout -Softmax = KL.Softmax - -Lambda = KL.Lambda -Add = KL.Add -Multiply = KL.Multiply -Concatenate = KL.Concatenate - - -Flatten = KL.Flatten -Reshape = KL.Reshape - -ZeroPadding2D = KL.ZeroPadding2D - -RandomNormal = keras.initializers.RandomNormal -Model = keras.models.Model - -Adam = nnlib.Adam -RMSprop = nnlib.RMSprop -LookaheadOptimizer = nnlib.LookaheadOptimizer -SGD = nnlib.keras.optimizers.SGD - -modelify = nnlib.modelify -gaussian_blur = nnlib.gaussian_blur -style_loss = nnlib.style_loss -dssim = nnlib.dssim - -DenseMaxout = nnlib.DenseMaxout -PixelShuffler = nnlib.PixelShuffler -SubpixelUpscaler = nnlib.SubpixelUpscaler -SubpixelDownscaler = nnlib.SubpixelDownscaler -Scale = nnlib.Scale -BilinearInterpolation = nnlib.BilinearInterpolation -BlurPool = nnlib.BlurPool -FUNITAdain = nnlib.FUNITAdain -SelfAttention = nnlib.SelfAttention - -CAInitializerMP = nnlib.CAInitializerMP - -#ReflectionPadding2D = nnlib.ReflectionPadding2D -#AddUniformNoise = nnlib.AddUniformNoise -""" - code_import_keras_contrib_string = \ -""" -keras_contrib = nnlib.keras_contrib -GroupNormalization = keras_contrib.layers.GroupNormalization -InstanceNormalization = keras_contrib.layers.InstanceNormalization -""" - code_import_dlib_string = \ -""" -dlib = nnlib.dlib -""" - - code_import_all_string = \ -""" -DSSIMMSEMaskLoss = nnlib.DSSIMMSEMaskLoss -ResNet = nnlib.ResNet -UNet = nnlib.UNet -UNetTemporalPredictor = nnlib.UNetTemporalPredictor -NLayerDiscriminator = nnlib.NLayerDiscriminator -""" - @staticmethod - def import_torch(device_config=None): - if nnlib.torch is not None: - return - - if device_config is None: - device_config = nnlib.active_DeviceConfig - else: - nnlib.active_DeviceConfig = device_config - - if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): - os.environ.pop('CUDA_VISIBLE_DEVICES') - - io.log_info ("Using PyTorch backend.") - import torch - nnlib.torch = torch - - if device_config.cpu_only or device_config.backend == 'plaidML': - nnlib.torch_device = torch.device(type='cpu') - else: - nnlib.torch_device = torch.device(type='cuda', index=device_config.gpu_idxs[0] ) - torch.cuda.set_device(nnlib.torch_device) - - @staticmethod - def _import_tf(device_config): - if nnlib.tf is not None: - return nnlib.code_import_tf - - if 'TF_SUPPRESS_STD' in os.environ.keys() and os.environ['TF_SUPPRESS_STD'] == '1': - suppressor = std_utils.suppress_stdout_stderr().__enter__() - else: - suppressor = None - - if 'CUDA_VISIBLE_DEVICES' in os.environ.keys(): - os.environ.pop('CUDA_VISIBLE_DEVICES') - - os.environ['CUDA_​CACHE_​MAXSIZE'] = '536870912' #512Mb (32mb default) - - if sys.platform[0:3] == 'win': - if len(device_config.gpu_idxs) == 1: - os.environ['CUDA_CACHE_PATH'] = \ - str(Path(os.environ['APPDATA']) / 'NVIDIA' / ('ComputeCache_' + device_config.gpu_names[0].replace(' ','_'))) - - os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '2' - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' #tf log errors only - - import warnings - warnings.simplefilter(action='ignore', category=FutureWarning) - - import tensorflow as tf - nnlib.tf = tf - - if device_config.cpu_only: - config = tf.ConfigProto(device_count={'GPU': 0}) - else: - config = tf.ConfigProto() - - visible_device_list = '' - for idx in device_config.gpu_idxs: - visible_device_list += str(idx) + ',' - config.gpu_options.visible_device_list=visible_device_list[:-1] - - config.gpu_options.force_gpu_compatible = True - config.gpu_options.allow_growth = device_config.allow_growth - nnlib.tf_sess_config = config - - nnlib.tf_sess = tf.Session(config=config) - - if suppressor is not None: - suppressor.__exit__() - - @staticmethod - def import_keras(device_config): - if nnlib.keras is not None: - return nnlib.code_import_keras - - nnlib.backend = device_config.backend - if "tensorflow" in nnlib.backend: - nnlib._import_tf(device_config) - elif nnlib.backend == "plaidML": - os.environ["KERAS_BACKEND"] = "plaidml.keras.backend" - os.environ["PLAIDML_DEVICE_IDS"] = ",".join ( [ nnlib.device.getDeviceID(idx) for idx in device_config.gpu_idxs] ) - - #if "tensorflow" in nnlib.backend: - # nnlib.keras = nnlib.tf.keras - #else: - import keras as keras_ - nnlib.keras = keras_ - - if 'KERAS_BACKEND' in os.environ: - os.environ.pop('KERAS_BACKEND') - - if nnlib.backend == "plaidML": - import plaidml - import plaidml.tile - nnlib.PML = plaidml - nnlib.PMLK = plaidml.keras.backend - nnlib.PMLTile = plaidml.tile - - if device_config.use_fp16: - nnlib.keras.backend.set_floatx('float16') - - if "tensorflow" in nnlib.backend: - nnlib.keras.backend.set_session(nnlib.tf_sess) - - nnlib.keras.backend.set_image_data_format('channels_last') - - nnlib.code_import_keras = compile (nnlib.code_import_keras_string,'','exec') - nnlib.__initialize_keras_functions() - - return nnlib.code_import_keras - - @staticmethod - def __initialize_keras_functions(): - keras = nnlib.keras - K = keras.backend - KL = keras.layers - backend = nnlib.backend - - def modelify(model_functor): - def func(tensor): - return keras.models.Model (tensor, model_functor(tensor)) - return func - - nnlib.modelify = modelify - - def gaussian_blur(radius=2.0): - def gaussian(x, mu, sigma): - return np.exp(-(float(x) - float(mu)) ** 2 / (2 * sigma ** 2)) - - def make_kernel(sigma): - kernel_size = max(3, int(2 * 2 * sigma + 1)) - mean = np.floor(0.5 * kernel_size) - kernel_1d = np.array([gaussian(x, mean, sigma) for x in range(kernel_size)]) - np_kernel = np.outer(kernel_1d, kernel_1d).astype(dtype=K.floatx()) - kernel = np_kernel / np.sum(np_kernel) - return kernel - - gauss_kernel = make_kernel(radius) - gauss_kernel = gauss_kernel[:, :,np.newaxis, np.newaxis] - - def func(input): - inputs = [ input[:,:,:,i:i+1] for i in range( K.int_shape( input )[-1] ) ] - - outputs = [] - for i in range(len(inputs)): - outputs += [ K.conv2d( inputs[i] , K.constant(gauss_kernel) , strides=(1,1), padding="same") ] - - return K.concatenate (outputs, axis=-1) - return func - nnlib.gaussian_blur = gaussian_blur - - def style_loss(gaussian_blur_radius=0.0, loss_weight=1.0, wnd_size=0, step_size=1): - if gaussian_blur_radius > 0.0: - gblur = gaussian_blur(gaussian_blur_radius) - - def sd(content, style, loss_weight): - content_nc = K.int_shape(content)[-1] - style_nc = K.int_shape(style)[-1] - if content_nc != style_nc: - raise Exception("style_loss() content_nc != style_nc") - - axes = [1,2] - c_mean, c_var = K.mean(content, axis=axes, keepdims=True), K.var(content, axis=axes, keepdims=True) - s_mean, s_var = K.mean(style, axis=axes, keepdims=True), K.var(style, axis=axes, keepdims=True) - c_std, s_std = K.sqrt(c_var + 1e-5), K.sqrt(s_var + 1e-5) - - mean_loss = K.sum(K.square(c_mean-s_mean)) - std_loss = K.sum(K.square(c_std-s_std)) - - return (mean_loss + std_loss) * ( loss_weight / float(content_nc) ) - - def func(target, style): - if wnd_size == 0: - if gaussian_blur_radius > 0.0: - return sd( gblur(target), gblur(style), loss_weight=loss_weight) - else: - return sd( target, style, loss_weight=loss_weight ) - else: - #currently unused - if nnlib.tf is not None: - sh = K.int_shape(target)[1] - k = (sh-wnd_size) // step_size + 1 - if gaussian_blur_radius > 0.0: - target, style = gblur(target), gblur(style) - target = nnlib.tf.image.extract_image_patches(target, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') - style = nnlib.tf.image.extract_image_patches(style, [1,k,k,1], [1,1,1,1], [1,step_size,step_size,1], 'VALID') - return sd( target, style, loss_weight ) - if nnlib.PML is not None: - print ("Sorry, plaidML backend does not support style_loss") - return 0 - return func - nnlib.style_loss = style_loss - - def dssim(kernel_size=11, k1=0.01, k2=0.03, max_value=1.0): - # port of tf.image.ssim to pure keras in order to work on plaidML backend. - - def func(y_true, y_pred): - ch = K.shape(y_pred)[-1] - - def _fspecial_gauss(size, sigma): - #Function to mimic the 'fspecial' gaussian MATLAB function. - coords = np.arange(0, size, dtype=K.floatx()) - coords -= (size - 1 ) / 2.0 - g = coords**2 - g *= ( -0.5 / (sigma**2) ) - g = np.reshape (g, (1,-1)) + np.reshape(g, (-1,1) ) - g = K.constant ( np.reshape (g, (1,-1)) ) - g = K.softmax(g) - g = K.reshape (g, (size, size, 1, 1)) - g = K.tile (g, (1,1,ch,1)) - return g - - kernel = _fspecial_gauss(kernel_size,1.5) - - def reducer(x): - return K.depthwise_conv2d(x, kernel, strides=(1, 1), padding='valid') - - c1 = (k1 * max_value) ** 2 - c2 = (k2 * max_value) ** 2 - - mean0 = reducer(y_true) - mean1 = reducer(y_pred) - num0 = mean0 * mean1 * 2.0 - den0 = K.square(mean0) + K.square(mean1) - luminance = (num0 + c1) / (den0 + c1) - - num1 = reducer(y_true * y_pred) * 2.0 - den1 = reducer(K.square(y_true) + K.square(y_pred)) - c2 *= 1.0 #compensation factor - cs = (num1 - num0 + c2) / (den1 - den0 + c2) - - ssim_val = K.mean(luminance * cs, axis=(-3, -2) ) - return(1.0 - ssim_val ) / 2.0 - - return func - - nnlib.dssim = dssim - - if 'tensorflow' in backend: - class PixelShuffler(keras.layers.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(PixelShuffler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - if self.data_format == 'channels_first': - return K.tf.depth_to_space(inputs, self.size[0], 'NCHW') - - elif self.data_format == 'channels_last': - return K.tf.depth_to_space(inputs, self.size[0], 'NHWC') - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - if self.data_format == 'channels_first': - height = input_shape[2] * self.size[0] if input_shape[2] is not None else None - width = input_shape[3] * self.size[1] if input_shape[3] is not None else None - channels = input_shape[1] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[1]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - channels, - height, - width) - - elif self.data_format == 'channels_last': - height = input_shape[1] * self.size[0] if input_shape[1] is not None else None - width = input_shape[2] * self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[3]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - height, - width, - channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(PixelShuffler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - else: - class PixelShuffler(KL.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(PixelShuffler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - if self.data_format == 'channels_first': - batch_size, c, h, w = input_shape[0], K.int_shape(inputs)[1], input_shape[2], input_shape[3] - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = K.reshape(inputs, (batch_size, rh, rw, oc, h, w)) - out = K.permute_dimensions(out, (0, 3, 4, 1, 5, 2)) - out = K.reshape(out, (batch_size, oc, oh, ow)) - return out - - elif self.data_format == 'channels_last': - batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] - rh, rw = self.size - oh, ow = h * rh, w * rw - oc = c // (rh * rw) - - out = K.reshape(inputs, (batch_size, h, w, rh, rw, oc)) - out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) - out = K.reshape(out, (batch_size, oh, ow, oc)) - return out - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - if self.data_format == 'channels_first': - height = input_shape[2] * self.size[0] if input_shape[2] is not None else None - width = input_shape[3] * self.size[1] if input_shape[3] is not None else None - channels = input_shape[1] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[1]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - channels, - height, - width) - - elif self.data_format == 'channels_last': - height = input_shape[1] * self.size[0] if input_shape[1] is not None else None - width = input_shape[2] * self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] // self.size[0] // self.size[1] - - if channels * self.size[0] * self.size[1] != input_shape[3]: - raise ValueError('channels of input and size are incompatible') - - return (input_shape[0], - height, - width, - channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(PixelShuffler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - - nnlib.PixelShuffler = PixelShuffler - nnlib.SubpixelUpscaler = PixelShuffler - - if 'tensorflow' in backend: - class SubpixelDownscaler(KL.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(SubpixelDownscaler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - return K.tf.space_to_depth(inputs, self.size[0], 'NHWC') - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - height = input_shape[1] // self.size[0] if input_shape[1] is not None else None - width = input_shape[2] // self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] * self.size[0] * self.size[1] - - return (input_shape[0], height, width, channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(SubpixelDownscaler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - else: - class SubpixelDownscaler(KL.Layer): - def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): - super(SubpixelDownscaler, self).__init__(**kwargs) - self.data_format = data_format - self.size = size - - def call(self, inputs): - - input_shape = K.shape(inputs) - if K.int_shape(input_shape)[0] != 4: - raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) - - batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] - rh, rw = self.size - oh, ow = h // rh, w // rw - oc = c * (rh * rw) - - out = K.reshape(inputs, (batch_size, oh, rh, ow, rw, c)) - out = K.permute_dimensions(out, (0, 1, 3, 2, 4, 5)) - out = K.reshape(out, (batch_size, oh, ow, oc)) - return out - - def compute_output_shape(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - - height = input_shape[1] // self.size[0] if input_shape[1] is not None else None - width = input_shape[2] // self.size[1] if input_shape[2] is not None else None - channels = input_shape[3] * self.size[0] * self.size[1] - - return (input_shape[0], height, width, channels) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(SubpixelDownscaler, self).get_config() - - return dict(list(base_config.items()) + list(config.items())) - - nnlib.SubpixelDownscaler = SubpixelDownscaler - - class BlurPool(KL.Layer): - """ - https://arxiv.org/abs/1904.11486 https://github.com/adobe/antialiased-cnns - """ - def __init__(self, filt_size=3, stride=2, **kwargs): - self.strides = (stride,stride) - self.filt_size = filt_size - self.padding = ( (int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ), (int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)) ) ) - if(self.filt_size==1): - self.a = np.array([1.,]) - elif(self.filt_size==2): - self.a = np.array([1., 1.]) - elif(self.filt_size==3): - self.a = np.array([1., 2., 1.]) - elif(self.filt_size==4): - self.a = np.array([1., 3., 3., 1.]) - elif(self.filt_size==5): - self.a = np.array([1., 4., 6., 4., 1.]) - elif(self.filt_size==6): - self.a = np.array([1., 5., 10., 10., 5., 1.]) - elif(self.filt_size==7): - self.a = np.array([1., 6., 15., 20., 15., 6., 1.]) - - super(BlurPool, self).__init__(**kwargs) - - def compute_output_shape(self, input_shape): - height = input_shape[1] // self.strides[0] - width = input_shape[2] // self.strides[1] - channels = input_shape[3] - return (input_shape[0], height, width, channels) - - def call(self, x): - k = self.a - k = k[:,None]*k[None,:] - k = k / np.sum(k) - k = np.tile (k[:,:,None,None], (1,1,K.int_shape(x)[-1],1) ) - k = K.constant (k, dtype=K.floatx() ) - - x = K.spatial_2d_padding(x, padding=self.padding) - x = K.depthwise_conv2d(x, k, strides=self.strides, padding='valid') - return x - - nnlib.BlurPool = BlurPool - - class FUNITAdain(KL.Layer): - """ - differents from NVLabs/FUNIT: - I moved two dense blocks inside this layer, - so we don't need to slice outter MLP block and assign weights every call, just pass MLP inside. - also size of dense blocks is calculated automatically - """ - def __init__(self, axis=-1, epsilon=1e-5, momentum=0.99, kernel_initializer='glorot_uniform', **kwargs): - self.axis = axis - self.epsilon = epsilon - self.momentum = momentum - self.kernel_initializer = kernel_initializer - super(FUNITAdain, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = None - x, mlp = input_shape - units = x[self.axis] - - self.kernel1 = self.add_weight(shape=(units, units), initializer=self.kernel_initializer, name='kernel1') - self.bias1 = self.add_weight(shape=(units,), initializer='zeros', name='bias1') - self.kernel2 = self.add_weight(shape=(units, units), initializer=self.kernel_initializer, name='kernel2') - self.bias2 = self.add_weight(shape=(units,), initializer='zeros', name='bias2') - - self.built = True - - def call(self, inputs, training=None): - x, mlp = inputs - - gamma = K.dot(mlp, self.kernel1) - gamma = K.bias_add(gamma, self.bias1, data_format='channels_last') - - beta = K.dot(mlp, self.kernel2) - beta = K.bias_add(beta, self.bias2, data_format='channels_last') - - input_shape = K.int_shape(x) - - reduction_axes = list(range(len(input_shape))) - del reduction_axes[self.axis] - del reduction_axes[0] - - broadcast_shape = [1] * len(input_shape) - broadcast_shape[self.axis] = input_shape[self.axis] - mean = K.mean(x, reduction_axes, keepdims=True) - stddev = K.std(x, reduction_axes, keepdims=True) + self.epsilon - normed = (x - mean) / stddev - normed *= K.reshape(gamma,[-1]+broadcast_shape[1:] ) - normed += K.reshape(beta, [-1]+broadcast_shape[1:] ) - return normed - - def get_config(self): - config = {'axis': self.axis, 'epsilon': self.epsilon } - - base_config = super(FUNITAdain, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - nnlib.FUNITAdain = FUNITAdain - - class Scale(KL.Layer): - """ - GAN Custom Scal Layer - Code borrows from https://github.com/flyyufelix/cnn_finetune - """ - def __init__(self, weights=None, axis=-1, gamma_init='zero', **kwargs): - self.axis = axis - self.gamma_init = keras.initializers.get(gamma_init) - self.initial_weights = weights - super(Scale, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [keras.engine.InputSpec(shape=input_shape)] - - # Compatibility with TensorFlow >= 1.0.0 - self.gamma = K.variable(self.gamma_init((1,)), name='{}_gamma'.format(self.name)) - self.trainable_weights = [self.gamma] - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - - def call(self, x, mask=None): - return self.gamma * x - - def get_config(self): - config = {"axis": self.axis} - base_config = super(Scale, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.Scale = Scale - - - """ - unable to work in plaidML, due to unimplemented ops - - class BilinearInterpolation(KL.Layer): - def __init__(self, size=(2,2), **kwargs): - self.size = size - super(BilinearInterpolation, self).__init__(**kwargs) - - def compute_output_shape(self, input_shape): - return (input_shape[0], input_shape[1]*self.size[1], input_shape[2]*self.size[0], input_shape[3]) - - - def call(self, X): - _,h,w,_ = K.int_shape(X) - - X = K.concatenate( [ X, X[:,:,-2:-1,:] ],axis=2 ) - X = K.concatenate( [ X, X[:,:,-2:-1,:] ],axis=2 ) - X = K.concatenate( [ X, X[:,-2:-1,:,:] ],axis=1 ) - X = K.concatenate( [ X, X[:,-2:-1,:,:] ],axis=1 ) - - X_sh = K.shape(X) - batch_size, height, width, num_channels = X_sh[0], X_sh[1], X_sh[2], X_sh[3] - - output_h, output_w = (h*self.size[1]+4, w*self.size[0]+4) - - x_linspace = np.linspace(-1. , 1. - 2/output_w, output_w)# - y_linspace = np.linspace(-1. , 1. - 2/output_h, output_h)# - - x_coordinates, y_coordinates = np.meshgrid(x_linspace, y_linspace) - x_coordinates = K.flatten(K.constant(x_coordinates, dtype=K.floatx() )) - y_coordinates = K.flatten(K.constant(y_coordinates, dtype=K.floatx() )) - - grid = K.concatenate([x_coordinates, y_coordinates, K.ones_like(x_coordinates)], 0) - grid = K.flatten(grid) - - - grids = K.tile(grid, ( batch_size, ) ) - grids = K.reshape(grids, (batch_size, 3, output_h * output_w )) - - - x = K.cast(K.flatten(grids[:, 0:1, :]), dtype='float32') - y = K.cast(K.flatten(grids[:, 1:2, :]), dtype='float32') - x = .5 * (x + 1.0) * K.cast(width, dtype='float32') - y = .5 * (y + 1.0) * K.cast(height, dtype='float32') - x0 = K.cast(x, 'int32') - x1 = x0 + 1 - y0 = K.cast(y, 'int32') - y1 = y0 + 1 - max_x = int(K.int_shape(X)[2] -1) - max_y = int(K.int_shape(X)[1] -1) - - x0 = K.clip(x0, 0, max_x) - x1 = K.clip(x1, 0, max_x) - y0 = K.clip(y0, 0, max_y) - y1 = K.clip(y1, 0, max_y) - - - pixels_batch = K.constant ( np.arange(0, batch_size) * (height * width), dtype=K.floatx() ) - - pixels_batch = K.expand_dims(pixels_batch, axis=-1) - - base = K.tile(pixels_batch, (1, output_h * output_w ) ) - base = K.flatten(base) - - base_y0 = base + y0 * width - - base_y1 = base + y1 * width - - indices_a = base_y0 + x0 - indices_b = base_y1 + x0 - indices_c = base_y0 + x1 - indices_d = base_y1 + x1 - - flat_image = K.reshape(X, (-1, num_channels) ) - flat_image = K.cast(flat_image, dtype='float32') - pixel_values_a = K.gather(flat_image, indices_a) - pixel_values_b = K.gather(flat_image, indices_b) - pixel_values_c = K.gather(flat_image, indices_c) - pixel_values_d = K.gather(flat_image, indices_d) - - x0 = K.cast(x0, 'float32') - x1 = K.cast(x1, 'float32') - y0 = K.cast(y0, 'float32') - y1 = K.cast(y1, 'float32') - - area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1) - area_b = K.expand_dims(((x1 - x) * (y - y0)), 1) - area_c = K.expand_dims(((x - x0) * (y1 - y)), 1) - area_d = K.expand_dims(((x - x0) * (y - y0)), 1) - - values_a = area_a * pixel_values_a - values_b = area_b * pixel_values_b - values_c = area_c * pixel_values_c - values_d = area_d * pixel_values_d - interpolated_image = values_a + values_b + values_c + values_d - - new_shape = (batch_size, output_h, output_w, num_channels) - interpolated_image = K.reshape(interpolated_image, new_shape) - - interpolated_image = interpolated_image[:,:-4,:-4,:] - return interpolated_image - - def get_config(self): - config = {"size": self.size} - base_config = super(BilinearInterpolation, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - """ - class BilinearInterpolation(KL.Layer): - def __init__(self, size=(2,2), **kwargs): - self.size = size - super(BilinearInterpolation, self).__init__(**kwargs) - - def compute_output_shape(self, input_shape): - return (input_shape[0], input_shape[1]*self.size[1], input_shape[2]*self.size[0], input_shape[3]) - - def call(self, X): - _,h,w,_ = K.int_shape(X) - - return K.cast( K.tf.image.resize_images(X, (h*self.size[1],w*self.size[0]) ), K.floatx() ) - - def get_config(self): - config = {"size": self.size} - base_config = super(BilinearInterpolation, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - nnlib.BilinearInterpolation = BilinearInterpolation - - class WScaleConv2DLayer(KL.Conv2D): - def __init__(self, *args, gain=None, **kwargs): - kwargs['kernel_initializer'] = keras.initializers.random_normal() - - if gain is None: - gain = np.sqrt(2) - - self.gain = gain - - super(WScaleConv2DLayer,self).__init__(*args,**kwargs) - - def build(self, input_shape): - super().build(input_shape) - kernel_shape = K.int_shape(self.kernel) - std = np.sqrt(self.gain) / np.sqrt( np.prod(kernel_shape[:-1]) ) - self.wscale = K.constant(std, dtype=K.floatx() ) - - def call(self, input, **kwargs): - k = self.kernel - self.kernel = self.kernel*self.wscale - x = super().call(input,**kwargs) - self.kernel = k - return x - - def get_config(self): - config = {"gain": self.gain} - base_config = super(WScaleConv2DLayer, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - nnlib.WScaleConv2DLayer = WScaleConv2DLayer - - class SelfAttention(KL.Layer): - def __init__(self, nc, squeeze_factor=8, **kwargs): - assert nc//squeeze_factor > 0, f"Input channels must be >= {squeeze_factor}, recieved nc={nc}" - - self.nc = nc - self.squeeze_factor = squeeze_factor - super(SelfAttention, self).__init__(**kwargs) - - def compute_output_shape(self, input_shape): - return (input_shape[0], input_shape[1], input_shape[2], self.nc) - - def call(self, inp): - x = inp - shape_x = x.get_shape().as_list() - - f = Conv2D(self.nc//self.squeeze_factor, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x) - g = Conv2D(self.nc//self.squeeze_factor, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x) - h = Conv2D(self.nc, 1, kernel_regularizer=keras.regularizers.l2(1e-4))(x) - - shape_f = f.get_shape().as_list() - shape_g = g.get_shape().as_list() - shape_h = h.get_shape().as_list() - flat_f = Reshape( (-1, shape_f[-1]) )(f) - flat_g = Reshape( (-1, shape_g[-1]) )(g) - flat_h = Reshape( (-1, shape_h[-1]) )(h) - - s = Lambda(lambda x: K.batch_dot(x[0], keras.layers.Permute((2,1))(x[1]) ))([flat_g, flat_f]) - beta = keras.layers.Softmax(axis=-1)(s) - o = Lambda(lambda x: K.batch_dot(x[0], x[1]))([beta, flat_h]) - - o = Reshape(shape_x[1:])(o) - o = Scale()(o) - - out = Add()([o, inp]) - return out - nnlib.SelfAttention = SelfAttention - - class RMSprop(keras.optimizers.Optimizer): - """RMSProp optimizer. - It is recommended to leave the parameters of this optimizer - at their default values - (except the learning rate, which can be freely tuned). - # Arguments - learning_rate: float >= 0. Learning rate. - rho: float >= 0. - # References - - [rmsprop: Divide the gradient by a running average of its recent magnitude - ](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) - - tf_cpu_mode: only for tensorflow backend - 0 - default, no changes. - 1 - allows to train x2 bigger network on same VRAM consuming RAM - 2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power. - """ - - def __init__(self, learning_rate=0.001, rho=0.9, lr_dropout=0, tf_cpu_mode=0, **kwargs): - self.initial_decay = kwargs.pop('decay', 0.0) - self.epsilon = kwargs.pop('epsilon', K.epsilon()) - self.lr_dropout = lr_dropout - self.tf_cpu_mode = tf_cpu_mode - - learning_rate = kwargs.pop('lr', learning_rate) - super(RMSprop, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.learning_rate = K.variable(learning_rate, name='learning_rate') - self.rho = K.variable(rho, name='rho') - self.decay = K.variable(self.initial_decay, name='decay') - self.iterations = K.variable(0, dtype='int64', name='iterations') - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - - - e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None - if e: e.__enter__() - accumulators = [K.zeros(K.int_shape(p), - dtype=K.dtype(p), - name='accumulator_' + str(i)) - for (i, p) in enumerate(params)] - if self.lr_dropout != 0: - lr_rnds = [ K.random_binomial(K.int_shape(p), p=self.lr_dropout, dtype=K.dtype(p)) for p in params ] - if e: e.__exit__(None, None, None) - - self.weights = [self.iterations] + accumulators - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.learning_rate - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - for i, (p, g, a) in enumerate(zip(params, grads, accumulators)): - # update accumulator - e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None - if e: e.__enter__() - new_a = self.rho * a + (1. - self.rho) * K.square(g) - p_diff = - lr * g / (K.sqrt(new_a) + self.epsilon) - if self.lr_dropout != 0: - p_diff *= lr_rnds[i] - new_p = p + p_diff - if e: e.__exit__(None, None, None) - - self.updates.append(K.update(a, new_a)) - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def set_weights(self, weights): - params = self.weights - # Override set_weights for backward compatibility of Keras 2.2.4 optimizer - # since it does not include iteration at head of the weight list. Set - # iteration to 0. - if len(params) == len(weights) + 1: - weights = [np.array(0)] + weights - super(RMSprop, self).set_weights(weights) - - def get_config(self): - config = {'learning_rate': float(K.get_value(self.learning_rate)), - 'rho': float(K.get_value(self.rho)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon, - 'lr_dropout' : self.lr_dropout } - base_config = super(RMSprop, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.RMSprop = RMSprop - - class Adam(keras.optimizers.Optimizer): - """Adam optimizer. - - Default parameters follow those provided in the original paper. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - amsgrad: boolean. Whether to apply the AMSGrad variant of this - algorithm from the paper "On the Convergence of Adam and - Beyond". - lr_dropout: float [0.0 .. 1.0] Learning rate dropout https://arxiv.org/pdf/1912.00144 - tf_cpu_mode: only for tensorflow backend - 0 - default, no changes. - 1 - allows to train x2 bigger network on same VRAM consuming RAM - 2 - allows to train x3 bigger network on same VRAM consuming RAM*2 and CPU power. - - # References - - [Adam - A Method for Stochastic Optimization] - (https://arxiv.org/abs/1412.6980v8) - - [On the Convergence of Adam and Beyond] - (https://openreview.net/forum?id=ryQu7f-RZ) - """ - - def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, - epsilon=None, decay=0., amsgrad=False, lr_dropout=0, tf_cpu_mode=0, **kwargs): - super(Adam, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - self.amsgrad = amsgrad - self.lr_dropout = lr_dropout - self.tf_cpu_mode = tf_cpu_mode - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None - if e: e.__enter__() - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - if self.amsgrad: - vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - else: - vhats = [K.zeros(1) for _ in params] - - - if self.lr_dropout != 0: - lr_rnds = [ K.random_binomial(K.int_shape(p), p=self.lr_dropout, dtype=K.dtype(p)) for p in params ] - - if e: e.__exit__(None, None, None) - - self.weights = [self.iterations] + ms + vs + vhats - - for i, (p, g, m, v, vhat) in enumerate( zip(params, grads, ms, vs, vhats) ): - e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None - if e: e.__enter__() - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) - - if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) - self.updates.append(K.update(vhat, vhat_t)) - if e: e.__exit__(None, None, None) - - if self.amsgrad: - p_diff = - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) - else: - p_diff = - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - - if self.lr_dropout != 0: - p_diff *= lr_rnds[i] - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - new_p = p + p_diff - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad, - 'lr_dropout' : self.lr_dropout} - base_config = super(Adam, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.Adam = Adam - - class LookaheadOptimizer(keras.optimizers.Optimizer): - def __init__(self, optimizer, sync_period=5, slow_step=0.5, tf_cpu_mode=0, **kwargs): - super(LookaheadOptimizer, self).__init__(**kwargs) - self.optimizer = optimizer - self.tf_cpu_mode = tf_cpu_mode - - with K.name_scope(self.__class__.__name__): - self.sync_period = K.variable(sync_period, dtype='int64', name='sync_period') - self.slow_step = K.variable(slow_step, name='slow_step') - - @property - def lr(self): - return self.optimizer.lr - - @lr.setter - def lr(self, lr): - self.optimizer.lr = lr - - @property - def learning_rate(self): - return self.optimizer.learning_rate - - @learning_rate.setter - def learning_rate(self, learning_rate): - self.optimizer.learning_rate = learning_rate - - @property - def iterations(self): - return self.optimizer.iterations - - def get_updates(self, loss, params): - sync_cond = K.equal((self.iterations + 1) // self.sync_period * self.sync_period, (self.iterations + 1)) - - e = K.tf.device("/cpu:0") if self.tf_cpu_mode > 0 else None - if e: e.__enter__() - slow_params = [K.variable(K.get_value(p), name='sp_{}'.format(i)) for i, p in enumerate(params)] - if e: e.__exit__(None, None, None) - - - self.updates = self.optimizer.get_updates(loss, params) - slow_updates = [] - for p, sp in zip(params, slow_params): - - e = K.tf.device("/cpu:0") if self.tf_cpu_mode == 2 else None - if e: e.__enter__() - sp_t = sp + self.slow_step * (p - sp) - if e: e.__exit__(None, None, None) - - slow_updates.append(K.update(sp, K.switch( - sync_cond, - sp_t, - sp, - ))) - slow_updates.append(K.update_add(p, K.switch( - sync_cond, - sp_t - p, - K.zeros_like(p), - ))) - - self.updates += slow_updates - self.weights = self.optimizer.weights + slow_params - return self.updates - - def get_config(self): - config = { - 'optimizer': keras.optimizers.serialize(self.optimizer), - 'sync_period': int(K.get_value(self.sync_period)), - 'slow_step': float(K.get_value(self.slow_step)), - } - base_config = super(LookaheadOptimizer, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - optimizer = keras.optimizers.deserialize(config.pop('optimizer')) - return cls(optimizer, **config) - nnlib.LookaheadOptimizer = LookaheadOptimizer - - class DenseMaxout(keras.layers.Layer): - """A dense maxout layer. - A `MaxoutDense` layer takes the element-wise maximum of - `nb_feature` `Dense(input_dim, output_dim)` linear layers. - This allows the layer to learn a convex, - piecewise linear activation function over the inputs. - Note that this is a *linear* layer; - if you wish to apply activation function - (you shouldn't need to --they are universal function approximators), - an `Activation` layer must be added after. - # Arguments - output_dim: int > 0. - nb_feature: number of Dense layers to use internally. - init: name of initialization function for the weights of the layer - (see [initializations](../initializations.md)), - or alternatively, Theano function to use for weights - initialization. This parameter is only relevant - if you don't pass a `weights` argument. - weights: list of Numpy arrays to set as initial weights. - The list should have 2 elements, of shape `(input_dim, output_dim)` - and (output_dim,) for weights and biases respectively. - W_regularizer: instance of [WeightRegularizer](../regularizers.md) - (eg. L1 or L2 regularization), applied to the main weights matrix. - b_regularizer: instance of [WeightRegularizer](../regularizers.md), - applied to the bias. - activity_regularizer: instance of [ActivityRegularizer](../regularizers.md), - applied to the network output. - W_constraint: instance of the [constraints](../constraints.md) module - (eg. maxnorm, nonneg), applied to the main weights matrix. - b_constraint: instance of the [constraints](../constraints.md) module, - applied to the bias. - bias: whether to include a bias - (i.e. make the layer affine rather than linear). - input_dim: dimensionality of the input (integer). This argument - (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - # Input shape - 2D tensor with shape: `(nb_samples, input_dim)`. - # Output shape - 2D tensor with shape: `(nb_samples, output_dim)`. - # References - - [Maxout Networks](http://arxiv.org/abs/1302.4389) - """ - - def __init__(self, output_dim, - nb_feature=4, - kernel_initializer='glorot_uniform', - weights=None, - W_regularizer=None, - b_regularizer=None, - activity_regularizer=None, - W_constraint=None, - b_constraint=None, - bias=True, - input_dim=None, - **kwargs): - self.output_dim = output_dim - self.nb_feature = nb_feature - self.kernel_initializer = keras.initializers.get(kernel_initializer) - - self.W_regularizer = keras.regularizers.get(W_regularizer) - self.b_regularizer = keras.regularizers.get(b_regularizer) - self.activity_regularizer = keras.regularizers.get(activity_regularizer) - - self.W_constraint = keras.constraints.get(W_constraint) - self.b_constraint = keras.constraints.get(b_constraint) - - self.bias = bias - self.initial_weights = weights - self.input_spec = keras.layers.InputSpec(ndim=2) - - self.input_dim = input_dim - if self.input_dim: - kwargs['input_shape'] = (self.input_dim,) - super(DenseMaxout, self).__init__(**kwargs) - - def build(self, input_shape): - input_dim = input_shape[1] - self.input_spec = keras.layers.InputSpec(dtype=K.floatx(), - shape=(None, input_dim)) - - self.W = self.add_weight(shape=(self.nb_feature, input_dim, self.output_dim), - initializer=self.kernel_initializer, - name='W', - regularizer=self.W_regularizer, - constraint=self.W_constraint) - if self.bias: - self.b = self.add_weight(shape=(self.nb_feature, self.output_dim,), - initializer='zero', - name='b', - regularizer=self.b_regularizer, - constraint=self.b_constraint) - else: - self.b = None - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - self.built = True - - def compute_output_shape(self, input_shape): - assert input_shape and len(input_shape) == 2 - return (input_shape[0], self.output_dim) - - def call(self, x): - # no activation, this layer is only linear. - output = K.dot(x, self.W) - if self.bias: - output += self.b - output = K.max(output, axis=1) - return output - - def get_config(self): - config = {'output_dim': self.output_dim, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'nb_feature': self.nb_feature, - 'W_regularizer': regularizers.serialize(self.W_regularizer), - 'b_regularizer': regularizers.serialize(self.b_regularizer), - 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'W_constraint': constraints.serialize(self.W_constraint), - 'b_constraint': constraints.serialize(self.b_constraint), - 'bias': self.bias, - 'input_dim': self.input_dim} - base_config = super(DenseMaxout, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - nnlib.DenseMaxout = DenseMaxout - - class GeLU(KL.Layer): - """Gaussian Error Linear Unit. - A smoother version of ReLU generally used - in the BERT or BERT architecture based models. - Original paper: https://arxiv.org/abs/1606.08415 - Input shape: - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - Output shape: - Same shape as the input. - """ - - def __init__(self, approximate=True, **kwargs): - super(GeLU, self).__init__(**kwargs) - self.approximate = approximate - self.supports_masking = True - - def call(self, inputs): - cdf = 0.5 * (1.0 + K.tanh((np.sqrt(2 / np.pi) * (inputs + 0.044715 * K.pow(inputs, 3))))) - return inputs * cdf - - def get_config(self): - config = {'approximate': self.approximate} - base_config = super(GeLU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - nnlib.GeLU = GeLU - - def CAInitializerMP( conv_weights_list ): - #Convolution Aware Initialization https://arxiv.org/abs/1702.06295 - data = [ (i, K.int_shape(conv_weights)) for i, conv_weights in enumerate(conv_weights_list) ] - data = sorted(data, key=lambda data: np.prod(data[1]) ) - result = CAInitializerMPSubprocessor (data, K.floatx(), K.image_data_format() ).run() - for idx, weights in result: - K.set_value ( conv_weights_list[idx], weights ) - nnlib.CAInitializerMP = CAInitializerMP - - - if backend == "plaidML": - class TileOP_ReflectionPadding2D(nnlib.PMLTile.Operation): - def __init__(self, input, w_pad, h_pad): - if K.image_data_format() == 'channels_last': - if input.shape.ndims == 4: - H, W = input.shape.dims[1:3] - if (type(H) == int and h_pad >= H) or \ - (type(W) == int and w_pad >= W): - raise ValueError("Paddings must be less than dimensions.") - - c = """ function (I[B, H, W, C] ) -> (O) {{ - WE = W + {w_pad}*2; - HE = H + {h_pad}*2; - """.format(h_pad=h_pad, w_pad=w_pad) - if w_pad > 0: - c += """ - LEFT_PAD [b, h, w , c : B, H, WE, C ] = =(I[b, h, {w_pad}-w, c]), w < {w_pad} ; - HCENTER [b, h, w , c : B, H, WE, C ] = =(I[b, h, w-{w_pad}, c]), w < W+{w_pad}-1 ; - RIGHT_PAD[b, h, w , c : B, H, WE, C ] = =(I[b, h, 2*W - (w-{w_pad}) -2, c]); - LCR = LEFT_PAD+HCENTER+RIGHT_PAD; - """.format(h_pad=h_pad, w_pad=w_pad) - else: - c += "LCR = I;" - - if h_pad > 0: - c += """ - TOP_PAD [b, h, w , c : B, HE, WE, C ] = =(LCR[b, {h_pad}-h, w, c]), h < {h_pad}; - VCENTER [b, h, w , c : B, HE, WE, C ] = =(LCR[b, h-{h_pad}, w, c]), h < H+{h_pad}-1 ; - BOTTOM_PAD[b, h, w , c : B, HE, WE, C ] = =(LCR[b, 2*H - (h-{h_pad}) -2, w, c]); - TVB = TOP_PAD+VCENTER+BOTTOM_PAD; - """.format(h_pad=h_pad, w_pad=w_pad) - else: - c += "TVB = LCR;" - - c += "O = TVB; }" - - inp_dims = input.shape.dims - out_dims = (inp_dims[0], inp_dims[1]+h_pad*2, inp_dims[2]+w_pad*2, inp_dims[3]) - else: - raise NotImplemented - else: - raise NotImplemented - - super(TileOP_ReflectionPadding2D, self).__init__(c, [('I', input) ], - [('O', nnlib.PMLTile.Shape(input.shape.dtype, out_dims ) )]) - - class ReflectionPadding2D(keras.layers.Layer): - def __init__(self, padding=(1, 1), **kwargs): - self.padding = tuple(padding) - self.input_spec = [keras.layers.InputSpec(ndim=4)] - super(ReflectionPadding2D, self).__init__(**kwargs) - - def compute_output_shape(self, s): - """ If you are using "channels_last" configuration""" - return (s[0], s[1] + 2 * self.padding[0], s[2] + 2 * self.padding[1], s[3]) - - def call(self, x, mask=None): - w_pad,h_pad = self.padding - if "tensorflow" in backend: - return K.tf.pad(x, [[0,0], [h_pad,h_pad], [w_pad,w_pad], [0,0] ], 'REFLECT') - elif backend == "plaidML": - return TileOP_ReflectionPadding2D.function(x, self.padding[0], self.padding[1]) - else: - if K.image_data_format() == 'channels_last': - if x.shape.ndims == 4: - w = K.concatenate ([ x[:,:,w_pad:0:-1,:], - x, - x[:,:,-2:-w_pad-2:-1,:] ], axis=2 ) - h = K.concatenate ([ w[:,h_pad:0:-1,:,:], - w, - w[:,-2:-h_pad-2:-1,:,:] ], axis=1 ) - return h - else: - raise NotImplemented - else: - raise NotImplemented - - nnlib.ReflectionPadding2D = ReflectionPadding2D - - class Conv2D(): - def __init__ (self, *args, **kwargs): - self.reflect_pad = False - padding = kwargs.get('padding','') - if padding == 'zero': - kwargs['padding'] = 'same' - if padding == 'reflect': - kernel_size = kwargs['kernel_size'] - if (kernel_size % 2) == 1: - self.pad = (kernel_size // 2,)*2 - kwargs['padding'] = 'valid' - self.reflect_pad = True - self.func = keras.layers.Conv2D (*args, **kwargs) - - def __call__(self,x): - if self.reflect_pad: - x = ReflectionPadding2D( self.pad ) (x) - return self.func(x) - nnlib.Conv2D = Conv2D - - class Conv2DTranspose(): - def __init__ (self, *args, **kwargs): - self.reflect_pad = False - padding = kwargs.get('padding','') - if padding == 'zero': - kwargs['padding'] = 'same' - if padding == 'reflect': - kernel_size = kwargs['kernel_size'] - if (kernel_size % 2) == 1: - self.pad = (kernel_size // 2,)*2 - kwargs['padding'] = 'valid' - self.reflect_pad = True - self.func = keras.layers.Conv2DTranspose (*args, **kwargs) - - def __call__(self,x): - if self.reflect_pad: - x = ReflectionPadding2D( self.pad ) (x) - return self.func(x) - nnlib.Conv2DTranspose = Conv2DTranspose - - class EqualConv2D(KL.Conv2D): - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - activation=None, - use_bias=True, - gain=np.sqrt(2), - **kwargs): - super().__init__( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=keras.initializers.RandomNormal(mean=0.0, stddev=1.0), - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs) - self.gain = gain - - def build(self, input_shape): - super().build(input_shape) - - self.wscale = self.gain / np.sqrt( np.prod( K.int_shape(self.kernel)[:-1]) ) - self.wscale_t = K.constant (self.wscale, dtype=K.floatx() ) - - def call(self, inputs): - k = self.kernel * self.wscale_t - - outputs = K.conv2d( - inputs, - k, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) - - if self.use_bias: - outputs = K.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - return outputs - nnlib.EqualConv2D = EqualConv2D - - class PixelNormalization(KL.Layer): - # initialize the layer - def __init__(self, **kwargs): - super(PixelNormalization, self).__init__(**kwargs) - - # perform the operation - def call(self, inputs): - # calculate square pixel values - values = inputs**2.0 - # calculate the mean pixel values - mean_values = K.mean(values, axis=-1, keepdims=True) - # ensure the mean is not zero - mean_values += 1.0e-8 - # calculate the sqrt of the mean squared value (L2 norm) - l2 = K.sqrt(mean_values) - # normalize values by the l2 norm - normalized = inputs / l2 - return normalized - - # define the output shape of the layer - def compute_output_shape(self, input_shape): - return input_shape - nnlib.PixelNormalization = PixelNormalization - - @staticmethod - def import_keras_contrib(device_config): - if nnlib.keras_contrib is not None: - return nnlib.code_import_keras_contrib - - import keras_contrib as keras_contrib_ - nnlib.keras_contrib = keras_contrib_ - nnlib.__initialize_keras_contrib_functions() - nnlib.code_import_keras_contrib = compile (nnlib.code_import_keras_contrib_string,'','exec') - - @staticmethod - def __initialize_keras_contrib_functions(): - pass - - @staticmethod - def import_dlib( device_config = None): - if nnlib.dlib is not None: - return nnlib.code_import_dlib - - import dlib as dlib_ - nnlib.dlib = dlib_ - if not device_config.cpu_only and "tensorflow" in device_config.backend and len(device_config.gpu_idxs) > 0: - nnlib.dlib.cuda.set_device(device_config.gpu_idxs[0]) - - nnlib.code_import_dlib = compile (nnlib.code_import_dlib_string,'','exec') - - @staticmethod - def import_all(device_config = None): - if nnlib.code_import_all is None: - if device_config is None: - device_config = nnlib.active_DeviceConfig - else: - nnlib.active_DeviceConfig = device_config - - nnlib.import_keras(device_config) - nnlib.import_keras_contrib(device_config) - nnlib.code_import_all = compile (nnlib.code_import_keras_string + '\n' - + nnlib.code_import_keras_contrib_string - + nnlib.code_import_all_string,'','exec') - nnlib.__initialize_all_functions() - - return nnlib.code_import_all - - @staticmethod - def __initialize_all_functions(): - exec (nnlib.import_keras(nnlib.active_DeviceConfig), locals(), globals()) - exec (nnlib.import_keras_contrib(nnlib.active_DeviceConfig), locals(), globals()) - - class DSSIMMSEMaskLoss(object): - def __init__(self, mask, is_mse=False): - self.mask = mask - self.is_mse = is_mse - def __call__(self,y_true, y_pred): - total_loss = None - mask = self.mask - if self.is_mse: - blur_mask = gaussian_blur(max(1, K.int_shape(mask)[1] // 64))(mask) - return K.mean ( 50*K.square( y_true*blur_mask - y_pred*blur_mask ) ) - else: - return 10*dssim() (y_true*mask, y_pred*mask) - nnlib.DSSIMMSEMaskLoss = DSSIMMSEMaskLoss - - - ''' - def ResNet(output_nc, use_batch_norm, ngf=64, n_blocks=6, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) - - def func(input): - - - def ResnetBlock(dim): - def func(input): - x = input - - x = ReflectionPadding2D((1,1))(x) - x = Conv2D(dim, 3, 1, padding='valid')(x) - x = XNormalization(x) - x = ReLU()(x) - - if use_dropout: - x = Dropout(0.5)(x) - - x = ReflectionPadding2D((1,1))(x) - x = Conv2D(dim, 3, 1, padding='valid')(x) - x = XNormalization(x) - x = ReLU()(x) - return Add()([x,input]) - return func - - x = input - - x = ReflectionPadding2D((3,3))(x) - x = Conv2D(ngf, 7, 1, 'valid')(x) - - x = ReLU()(XNormalization(Conv2D(ngf*2, 4, 2, 'same')(x))) - x = ReLU()(XNormalization(Conv2D(ngf*4, 4, 2, 'same')(x))) - - for i in range(n_blocks): - x = ResnetBlock(ngf*4)(x) - - x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf*2 *4, 3, 1, 'same')(x)))) - x = ReLU()(XNormalization(PixelShuffler()(Conv2D(ngf *4, 3, 1, 'same')(x)))) - - x = ReflectionPadding2D((3,3))(x) - x = Conv2D(output_nc, 7, 1, 'valid')(x) - x = tanh(x) - - return x - - return func - - nnlib.ResNet = ResNet - - # Defines the Unet generator. - # |num_downs|: number of downsamplings in UNet. For example, - # if |num_downs| == 7, image of size 128x128 will become of size 1x1 - # at the bottleneck - def UNet(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def Conv2DTranspose(filters, kernel_size, strides=(1, 1), padding='valid', output_padding=None, data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer='glorot_uniform', bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2DTranspose(filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, output_padding=output_padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint) - - def UNetSkipConnection(outer_nc, inner_nc, sub_model=None, outermost=False, innermost=False, use_dropout=False): - def func(inp): - x = inp - - x = Conv2D(inner_nc, 4, 2, 'valid')(ReflectionPadding2D( (1,1) )(x)) - x = XNormalization(x) - x = ReLU()(x) - - if not innermost: - x = sub_model(x) - - if not outermost: - x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) - x = XNormalization(x) - x = ReLU()(x) - - if not innermost: - if use_dropout: - x = Dropout(0.5)(x) - - x = Concatenate(axis=3)([inp, x]) - else: - x = Conv2DTranspose(outer_nc, 3, 2, 'same')(x) - x = tanh(x) - - - return x - - return func - - def func(input): - - unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=None, innermost=True) - - for i in range(num_downs - 5): - unet_block = UNetSkipConnection(ngf * 8, ngf * 8, sub_model=unet_block, use_dropout=use_dropout) - - unet_block = UNetSkipConnection(ngf * 4 , ngf * 8, sub_model=unet_block) - unet_block = UNetSkipConnection(ngf * 2 , ngf * 4, sub_model=unet_block) - unet_block = UNetSkipConnection(ngf , ngf * 2, sub_model=unet_block) - unet_block = UNetSkipConnection(output_nc, ngf , sub_model=unet_block, outermost=True) - - return unet_block(input) - return func - nnlib.UNet = UNet - - #predicts based on two past_image_tensors - def UNetTemporalPredictor(output_nc, use_batch_norm, num_downs, ngf=64, use_dropout=False): - exec (nnlib.import_all(), locals(), globals()) - def func(inputs): - past_2_image_tensor, past_1_image_tensor = inputs - - x = Concatenate(axis=3)([ past_2_image_tensor, past_1_image_tensor ]) - x = UNet(3, use_batch_norm, num_downs=num_downs, ngf=ngf, use_dropout=use_dropout) (x) - - return x - - return func - nnlib.UNetTemporalPredictor = UNetTemporalPredictor - - def NLayerDiscriminator(use_batch_norm, ndf=64, n_layers=3): - exec (nnlib.import_all(), locals(), globals()) - - if not use_batch_norm: - use_bias = True - def XNormalization(x): - return InstanceNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x)#GroupNormalization (axis=3, groups=K.int_shape (x)[3] // 4, gamma_initializer=RandomNormal(1., 0.02))(x) - else: - use_bias = False - def XNormalization(x): - return BatchNormalization (axis=3, gamma_initializer=RandomNormal(1., 0.02))(x) - - def Conv2D (filters, kernel_size, strides=(1, 1), padding='valid', data_format=None, dilation_rate=(1, 1), activation=None, use_bias=use_bias, kernel_initializer=RandomNormal(0, 0.02), bias_initializer='zeros', kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None): - return keras.layers.Conv2D( filters=filters, kernel_size=kernel_size, strides=strides, padding=padding, data_format=data_format, dilation_rate=dilation_rate, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint ) - - def func(input): - x = input - - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf, 4, 2, 'valid')(x) - x = LeakyReLU(0.2)(x) - - for i in range(1, n_layers): - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf * min(2 ** i, 8), 4, 2, 'valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - x = Conv2D( ndf * min(2 ** n_layers, 8), 4, 1, 'valid')(x) - x = XNormalization(x) - x = LeakyReLU(0.2)(x) - - x = ZeroPadding2D((1,1))(x) - return Conv2D( 1, 4, 1, 'valid')(x) - return func - nnlib.NLayerDiscriminator = NLayerDiscriminator - ''' - @staticmethod - def finalize_all(): - if nnlib.keras_contrib is not None: - nnlib.keras_contrib = None - - if nnlib.keras is not None: - nnlib.keras.backend.clear_session() - nnlib.keras = None - - if nnlib.tf is not None: - nnlib.tf_sess = None - nnlib.tf = None - - -class CAInitializerMPSubprocessor(Subprocessor): - class Cli(Subprocessor.Cli): - - #override - def on_initialize(self, client_dict): - self.floatx = client_dict['floatx'] - self.data_format = client_dict['data_format'] - - #override - def process_data(self, data): - idx, shape = data - weights = CAGenerateWeights (shape, self.floatx, self.data_format) - return idx, weights - - #override - def get_data_name (self, data): - #return string identificator of your data - return "undefined" - - #override - def __init__(self, idx_shapes_list, floatx, data_format ): - self.idx_shapes_list = idx_shapes_list - self.floatx = floatx - self.data_format = data_format - - self.result = [] - super().__init__('CAInitializerMP', CAInitializerMPSubprocessor.Cli) - - #override - def on_clients_initialized(self): - io.progress_bar ("Initializing CA weights", len (self.idx_shapes_list)) - - #override - def on_clients_finalized(self): - io.progress_bar_close() - - #override - def process_info_generator(self): - for i in range(multiprocessing.cpu_count()): - yield 'CPU%d' % (i), {}, {'device_idx': i, - 'device_name': 'CPU%d' % (i), - 'floatx' : self.floatx, - 'data_format' : self.data_format - } - - #override - def get_data(self, host_dict): - if len (self.idx_shapes_list) > 0: - return self.idx_shapes_list.pop(0) - - return None - - #override - def on_data_return (self, host_dict, data): - self.idx_shapes_list.insert(0, data) - - #override - def on_result (self, host_dict, data, result): - self.result.append ( result ) - io.progress_bar_inc(1) - - #override - def get_result(self): - return self.result diff --git a/project.code-workspace b/project.code-workspace new file mode 100644 index 0000000..07fae2f --- /dev/null +++ b/project.code-workspace @@ -0,0 +1,50 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "workbench.colorTheme": "Visual Studio Light", + "diffEditor.ignoreTrimWhitespace": true, + "workbench.sideBar.location": "right", + "breadcrumbs.enabled": false, + "editor.renderWhitespace": "none", + "editor.minimap.enabled": false, + "workbench.activityBar.visible": true, + "window.menuBarVisibility": "default", + "editor.fastScrollSensitivity": 10, + "editor.mouseWheelScrollSensitivity": 2, + "window.zoomLevel": 0, + "extensions.ignoreRecommendations": true, + + "python.linting.pylintEnabled": false, + "python.linting.enabled": false, + "python.linting.pylamaEnabled": false, + "python.linting.pydocstyleEnabled": false, + "python.pythonPath": "${env:PYTHON_EXECUTABLE}", + "workbench.editor.tabCloseButton": "off", + "workbench.editor.tabSizing": "shrink", + "workbench.editor.highlightModifiedTabs": true, + "editor.mouseWheelScrollSensitivity": 3, + "editor.folding": false, + "editor.glyphMargin": false, + "files.exclude": { + "**/__pycache__": true, + "**/.github": true, + "**/.vscode": true, + "**/*.dat": true, + "**/*.h5": true, + "**/*.npy": true + }, + "editor.quickSuggestions": { + "other": false, + "comments": false, + "strings": false + }, + "editor.trimAutoWhitespace": false, + "python.linting.pylintArgs": [ + "--disable=import-error" + ] + } +} \ No newline at end of file diff --git a/requirements-colab.txt b/requirements-colab.txt index ccdf38b..496aaf0 100644 --- a/requirements-colab.txt +++ b/requirements-colab.txt @@ -1,16 +1,9 @@ +tqdm numpy==1.17.0 h5py==2.9.0 -Keras==2.2.4 opencv-python==4.1.0.25 -tensorflow-gpu==1.13.1 -plaidml-keras==0.5.0 -scikit-image -tqdm ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git - -# -# install following packages directly via pip! -# -# pip install torch===1.3.1 -f https://download.pytorch.org/whl/torch_stable.html -# pip install torchvision===0.4.0 -f https://download.pytorch.org/whl/torch_stable.html \ No newline at end of file +scikit-image==0.14.2 +scipy==1.4.1 +colorama +tensorflow-gpu==1.13.1 \ No newline at end of file diff --git a/requirements-cpu.txt b/requirements-cpu.txt deleted file mode 100644 index d44148c..0000000 --- a/requirements-cpu.txt +++ /dev/null @@ -1,15 +0,0 @@ -numpy==1.17.0 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.1.0.25 -tensorflow==1.12.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git - -# -# install following packages directly via pip! -# -# pip install torch===1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html -# pip install torchvision===0.4.0+cpu -f https://download.pytorch.org/whl/torch_stable.html diff --git a/requirements-cuda.txt b/requirements-cuda.txt index edfa576..b1d5f55 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -1,17 +1,9 @@ +tqdm numpy==1.17.0 h5py==2.9.0 -Keras==2.2.4 opencv-python==4.1.0.25 -tensorflow-gpu==1.12.0 -plaidml==0.6.0 -plaidml-keras==0.5.0 -scikit-image -tqdm ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git - -# -# install following packages directly via pip! -# -# pip install torch===1.3.1 -f https://download.pytorch.org/whl/torch_stable.html -# pip install torchvision===0.4.0 -f https://download.pytorch.org/whl/torch_stable.html \ No newline at end of file +scikit-image==0.14.2 +scipy==1.4.1 +colorama +tensorflow-gpu==1.12.0 \ No newline at end of file diff --git a/requirements-opencl.txt b/requirements-opencl.txt deleted file mode 100644 index 44b0b00..0000000 --- a/requirements-opencl.txt +++ /dev/null @@ -1,17 +0,0 @@ -numpy==1.17.0 -h5py==2.9.0 -Keras==2.2.4 -opencv-python==4.1.0.25 -tensorflow==1.12.0 -plaidml==0.6.0 -plaidml-keras==0.5.0 -scikit-image -tqdm -ffmpeg-python==0.1.17 -git+https://www.github.com/keras-team/keras-contrib.git - -# -# install following packages directly via pip! -# -# pip install torch===1.3.1+cpu -f https://download.pytorch.org/whl/torch_stable.html -# pip install torchvision===0.4.0+cpu -f https://download.pytorch.org/whl/torch_stable.html \ No newline at end of file diff --git a/samplelib/PackedFaceset.py b/samplelib/PackedFaceset.py index 57895c3..c194776 100644 --- a/samplelib/PackedFaceset.py +++ b/samplelib/PackedFaceset.py @@ -4,9 +4,9 @@ import struct from pathlib import Path import samplelib.SampleHost -from interact import interact as io +from core.interact import interact as io from samplelib import Sample -from utils import Path_utils +from core import pathex packed_faceset_filename = 'faceset.pak' @@ -19,20 +19,20 @@ class PackedFaceset(): if samples_dat_path.exists(): io.log_info(f"{samples_dat_path} : file already exists !") - io.input_bool("Press enter to continue and overwrite.", False) + io.input("Press enter to continue and overwrite.") as_person_faceset = False - dir_names = Path_utils.get_all_dir_names(samples_path) + dir_names = pathex.get_all_dir_names(samples_path) if len(dir_names) != 0: - as_person_faceset = io.input_bool(f"{len(dir_names)} subdirectories found, process as person faceset? (y/n) skip:y : ", True) + as_person_faceset = io.input_bool(f"{len(dir_names)} subdirectories found, process as person faceset?", True) if as_person_faceset: image_paths = [] for dir_name in dir_names: - image_paths += Path_utils.get_image_paths(samples_path / dir_name) + image_paths += pathex.get_image_paths(samples_path / dir_name) else: - image_paths = Path_utils.get_image_paths(samples_path) + image_paths = pathex.get_image_paths(samples_path) samples = samplelib.SampleHost.load_face_samples(image_paths) samples_len = len(samples) diff --git a/samplelib/Sample.py b/samplelib/Sample.py index 8012a64..3430315 100644 --- a/samplelib/Sample.py +++ b/samplelib/Sample.py @@ -4,10 +4,10 @@ from pathlib import Path import cv2 import numpy as np -from utils.cv2_utils import * +from core.cv2ex import * from DFLIMG import * from facelib import LandmarksProcessor -from imagelib import IEPolys +from core.imagelib import IEPolys class SampleType(IntEnum): IMAGE = 0 #raw image diff --git a/samplelib/SampleGeneratorBase.py b/samplelib/SampleGeneratorBase.py index cf98d8d..ef98974 100644 --- a/samplelib/SampleGeneratorBase.py +++ b/samplelib/SampleGeneratorBase.py @@ -15,20 +15,16 @@ class SampleGeneratorBase(object): self.batch_size = 1 if self.debug else batch_size self.last_generation = None self.active = True - + def set_active(self, is_active): self.active = is_active - + def generate_next(self): if not self.active and self.last_generation is not None: return self.last_generation self.last_generation = next(self) return self.last_generation - - #overridable - def get_total_sample_count(self): - return 0 - + #overridable def __iter__(self): #implement your own iterator diff --git a/samplelib/SampleGeneratorFace.py b/samplelib/SampleGeneratorFace.py index a76e006..195e45a 100644 --- a/samplelib/SampleGeneratorFace.py +++ b/samplelib/SampleGeneratorFace.py @@ -1,13 +1,16 @@ import multiprocessing -import traceback import pickle +import time +import traceback + import cv2 import numpy as np -import time + +from core import mplib +from core.joblib import SubprocessGenerator, ThisThreadGenerator from facelib import LandmarksProcessor from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, SampleType) -from utils import iter_utils, mp_utils ''' @@ -34,37 +37,33 @@ class SampleGeneratorFace(SampleGeneratorBase): if self.debug: self.generators_count = 1 else: - self.generators_count = np.clip(multiprocessing.cpu_count(), 2, generators_count) - + self.generators_count = max(1, generators_count) + samples = SampleHost.load (SampleType.FACE, self.samples_path) self.samples_len = len(samples) if self.samples_len == 0: raise ValueError('No training data provided.') - index_host = mp_utils.IndexHost(self.samples_len) + index_host = mplib.IndexHost(self.samples_len) if random_ct_samples_path is not None: ct_samples = SampleHost.load (SampleType.FACE, random_ct_samples_path) - ct_index_host = mp_utils.IndexHost( len(ct_samples) ) + ct_index_host = mplib.IndexHost( len(ct_samples) ) else: ct_samples = None ct_index_host = None pickled_samples = pickle.dumps(samples, 4) ct_pickled_samples = pickle.dumps(ct_samples, 4) if ct_samples is not None else None - + if self.debug: - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None) )] + self.generators = [ThisThreadGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None) )] else: - self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None), start_now=True ) for i in range(self.generators_count) ] + self.generators = [SubprocessGenerator ( self.batch_func, (pickled_samples, index_host.create_cli(), ct_pickled_samples, ct_index_host.create_cli() if ct_index_host is not None else None), start_now=True ) for i in range(self.generators_count) ] self.generator_counter = -1 - #overridable - def get_total_sample_count(self): - return self.samples_len - def __iter__(self): return self @@ -75,8 +74,8 @@ class SampleGeneratorFace(SampleGeneratorBase): def batch_func(self, param ): pickled_samples, index_host, ct_pickled_samples, ct_index_host = param - - samples = pickle.loads(pickled_samples) + + samples = pickle.loads(pickled_samples) ct_samples = pickle.loads(ct_pickled_samples) if ct_pickled_samples is not None else None bs = self.batch_size @@ -89,9 +88,9 @@ class SampleGeneratorFace(SampleGeneratorBase): t = time.time() for n_batch in range(bs): sample_idx = indexes[n_batch] - sample = samples[sample_idx] - - ct_sample = None + sample = samples[sample_idx] + + ct_sample = None if ct_samples is not None: ct_sample = ct_samples[ct_indexes[n_batch]] diff --git a/samplelib/SampleGeneratorFacePerson.py b/samplelib/SampleGeneratorFacePerson.py index d254063..d691341 100644 --- a/samplelib/SampleGeneratorFacePerson.py +++ b/samplelib/SampleGeneratorFacePerson.py @@ -5,10 +5,11 @@ import traceback import cv2 import numpy as np +from core import mplib +from core.joblib import SubprocessGenerator, ThisThreadGenerator from facelib import LandmarksProcessor from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, SampleType) -from utils import iter_utils, mp_utils ''' @@ -19,12 +20,12 @@ output_sample_types = [ ] ''' class SampleGeneratorFacePerson(SampleGeneratorBase): - def __init__ (self, samples_path, debug=False, batch_size=1, - sample_process_options=SampleProcessor.Options(), - output_sample_types=[], + def __init__ (self, samples_path, debug=False, batch_size=1, + sample_process_options=SampleProcessor.Options(), + output_sample_types=[], person_id_mode=1, **kwargs): - + super().__init__(samples_path, debug, batch_size) self.sample_process_options = sample_process_options self.output_sample_types = output_sample_types @@ -39,13 +40,13 @@ class SampleGeneratorFacePerson(SampleGeneratorBase): if self.samples_len == 0: raise ValueError('No training data provided.') - unique_person_names = { sample.person_name for sample in samples } - persons_name_idxs = { person_name : [] for person_name in unique_person_names } - for i,sample in enumerate(samples): - persons_name_idxs[sample.person_name].append (i) + unique_person_names = { sample.person_name for sample in samples } + persons_name_idxs = { person_name : [] for person_name in unique_person_names } + for i,sample in enumerate(samples): + persons_name_idxs[sample.person_name].append (i) indexes2D = [ persons_name_idxs[person_name] for person_name in unique_person_names ] - index2d_host = mp_utils.Index2DHost(indexes2D) - + index2d_host = mplib.Index2DHost(indexes2D) + if self.debug: self.generators_count = 1 self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (samples_host.create_cli(), index2d_host.create_cli(),) )] @@ -54,11 +55,7 @@ class SampleGeneratorFacePerson(SampleGeneratorBase): self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (samples_host.create_cli(), index2d_host.create_cli(),), start_now=True ) for i in range(self.generators_count) ] self.generator_counter = -1 - - #overridable - def get_total_sample_count(self): - return self.samples_len - + def __iter__(self): return self @@ -67,14 +64,14 @@ class SampleGeneratorFacePerson(SampleGeneratorBase): generator = self.generators[self.generator_counter % len(self.generators) ] return next(generator) - def batch_func(self, param ): + def batch_func(self, param ): samples, index2d_host, = param bs = self.batch_size while True: - person_idxs = index2d_host.get_1D(bs) + person_idxs = index2d_host.get_1D(bs) samples_idxs = index2d_host.get_2D(person_idxs, 1) - + batches = None for n_batch in range(bs): person_id = person_idxs[n_batch] @@ -85,10 +82,10 @@ class SampleGeneratorFacePerson(SampleGeneratorBase): x, = SampleProcessor.process ([sample], self.sample_process_options, self.output_sample_types, self.debug) except: raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - + if batches is None: batches = [ [] for _ in range(len(x)) ] - + batches += [ [] ] i_person_id = len(batches)-1 @@ -96,9 +93,9 @@ class SampleGeneratorFacePerson(SampleGeneratorBase): batches[i].append ( x[i] ) batches[i_person_id].append ( np.array([person_id]) ) - + yield [ np.array(batch) for batch in batches] - + @staticmethod def get_person_id_max_count(samples_path): return SampleHost.get_person_id_max_count(samples_path) @@ -110,43 +107,43 @@ if self.person_id_mode==1: shuffle_idxs = [] elif self.person_id_mode==2: persons_count = len(samples) - + person_idxs = [] for j in range(persons_count): for i in range(j+1,persons_count): person_idxs += [ [i,j] ] shuffle_person_idxs = [] - + samples_idxs = [None]*persons_count shuffle_idxs = [None]*persons_count - + for i in range(persons_count): samples_idxs[i] = [*range(len(samples[i]))] shuffle_idxs[i] = [] elif self.person_id_mode==3: persons_count = len(samples) - + person_idxs = [ *range(persons_count) ] shuffle_person_idxs = [] - + samples_idxs = [None]*persons_count shuffle_idxs = [None]*persons_count - + for i in range(persons_count): samples_idxs[i] = [*range(len(samples[i]))] shuffle_idxs[i] = [] - -if self.person_id_mode==2: + +if self.person_id_mode==2: if len(shuffle_person_idxs) == 0: shuffle_person_idxs = person_idxs.copy() np.random.shuffle(shuffle_person_idxs) person_ids = shuffle_person_idxs.pop() - - + + batches = None for n_batch in range(self.batch_size): - + if self.person_id_mode==1: if len(shuffle_idxs) == 0: shuffle_idxs = samples_idxs.copy() @@ -154,7 +151,7 @@ if self.person_id_mode==2: idx = shuffle_idxs.pop() sample = samples[ idx ] - + try: x, = SampleProcessor.process ([sample], self.sample_process_options, self.output_sample_types, self.debug) except: @@ -165,7 +162,7 @@ if self.person_id_mode==2: if batches is None: batches = [ [] for _ in range(len(x)) ] - + batches += [ [] ] i_person_id = len(batches)-1 @@ -174,30 +171,30 @@ if self.person_id_mode==2: batches[i_person_id].append ( np.array([sample.person_id]) ) - + elif self.person_id_mode==2: person_id1, person_id2 = person_ids - + if len(shuffle_idxs[person_id1]) == 0: shuffle_idxs[person_id1] = samples_idxs[person_id1].copy() np.random.shuffle(shuffle_idxs[person_id1]) idx = shuffle_idxs[person_id1].pop() sample1 = samples[person_id1][idx] - + if len(shuffle_idxs[person_id2]) == 0: shuffle_idxs[person_id2] = samples_idxs[person_id2].copy() np.random.shuffle(shuffle_idxs[person_id2]) idx = shuffle_idxs[person_id2].pop() sample2 = samples[person_id2][idx] - + if sample1 is not None and sample2 is not None: try: x1, = SampleProcessor.process ([sample1], self.sample_process_options, self.output_sample_types, self.debug) except: raise Exception ("Exception occured in sample %s. Error: %s" % (sample1.filename, traceback.format_exc() ) ) - + try: x2, = SampleProcessor.process ([sample2], self.sample_process_options, self.output_sample_types, self.debug) except: @@ -205,50 +202,50 @@ if self.person_id_mode==2: x1_len = len(x1) if batches is None: - batches = [ [] for _ in range(x1_len) ] + batches = [ [] for _ in range(x1_len) ] batches += [ [] ] i_person_id1 = len(batches)-1 - - batches += [ [] for _ in range(len(x2)) ] + + batches += [ [] for _ in range(len(x2)) ] batches += [ [] ] i_person_id2 = len(batches)-1 for i in range(x1_len): batches[i].append ( x1[i] ) - + for i in range(len(x2)): batches[x1_len+1+i].append ( x2[i] ) batches[i_person_id1].append ( np.array([sample1.person_id]) ) batches[i_person_id2].append ( np.array([sample2.person_id]) ) - - elif self.person_id_mode==3: + + elif self.person_id_mode==3: if len(shuffle_person_idxs) == 0: shuffle_person_idxs = person_idxs.copy() np.random.shuffle(shuffle_person_idxs) person_id = shuffle_person_idxs.pop() - + if len(shuffle_idxs[person_id]) == 0: shuffle_idxs[person_id] = samples_idxs[person_id].copy() np.random.shuffle(shuffle_idxs[person_id]) idx = shuffle_idxs[person_id].pop() sample1 = samples[person_id][idx] - + if len(shuffle_idxs[person_id]) == 0: shuffle_idxs[person_id] = samples_idxs[person_id].copy() np.random.shuffle(shuffle_idxs[person_id]) idx = shuffle_idxs[person_id].pop() sample2 = samples[person_id][idx] - + if sample1 is not None and sample2 is not None: try: x1, = SampleProcessor.process ([sample1], self.sample_process_options, self.output_sample_types, self.debug) except: raise Exception ("Exception occured in sample %s. Error: %s" % (sample1.filename, traceback.format_exc() ) ) - + try: x2, = SampleProcessor.process ([sample2], self.sample_process_options, self.output_sample_types, self.debug) except: @@ -256,21 +253,21 @@ if self.person_id_mode==2: x1_len = len(x1) if batches is None: - batches = [ [] for _ in range(x1_len) ] + batches = [ [] for _ in range(x1_len) ] batches += [ [] ] i_person_id1 = len(batches)-1 - - batches += [ [] for _ in range(len(x2)) ] + + batches += [ [] for _ in range(len(x2)) ] batches += [ [] ] i_person_id2 = len(batches)-1 for i in range(x1_len): batches[i].append ( x1[i] ) - + for i in range(len(x2)): batches[x1_len+1+i].append ( x2[i] ) batches[i_person_id1].append ( np.array([sample1.person_id]) ) - batches[i_person_id2].append ( np.array([sample2.person_id]) ) -""" \ No newline at end of file + batches[i_person_id2].append ( np.array([sample2.person_id]) ) +""" diff --git a/samplelib/SampleGeneratorFaceTemporal.py b/samplelib/SampleGeneratorFaceTemporal.py deleted file mode 100644 index d1a6500..0000000 --- a/samplelib/SampleGeneratorFaceTemporal.py +++ /dev/null @@ -1,91 +0,0 @@ -import pickle -import traceback - -import cv2 -import numpy as np - -from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, - SampleType) -from utils import iter_utils - - -''' -output_sample_types = [ - [SampleProcessor.TypeFlags, size, (optional) {} opts ] , - ... - ] -''' -class SampleGeneratorFaceTemporal(SampleGeneratorBase): - def __init__ (self, samples_path, debug, batch_size, temporal_image_count, sample_process_options=SampleProcessor.Options(), output_sample_types=[], generators_count=2, **kwargs): - super().__init__(samples_path, debug, batch_size) - - self.temporal_image_count = temporal_image_count - self.sample_process_options = sample_process_options - self.output_sample_types = output_sample_types - - if self.debug: - self.generators_count = 1 - else: - self.generators_count = generators_count - - samples = SampleHost.load (SampleType.FACE_TEMPORAL_SORTED, self.samples_path) - samples_len = len(samples) - if samples_len == 0: - raise ValueError('No training data provided.') - - pickled_samples = pickle.dumps(samples, 4) - if self.debug: - self.generators = [iter_utils.ThisThreadGenerator ( self.batch_func, (0, pickled_samples) )] - else: - self.generators = [iter_utils.SubprocessGenerator ( self.batch_func, (i, pickled_samples) ) for i in range(self.generators_count) ] - - self.generator_counter = -1 - - def __iter__(self): - return self - - def __next__(self): - self.generator_counter += 1 - generator = self.generators[self.generator_counter % len(self.generators) ] - return next(generator) - - def batch_func(self, param): - generator_id, pickled_samples = param - samples = pickle.loads(pickled_samples) - samples_len = len(samples) - - mult_max = 1 - l = samples_len - ( (self.temporal_image_count)*mult_max - (mult_max-1) ) - - samples_idxs = [ *range(l+1) ] - - if len(samples_idxs) - self.temporal_image_count < 0: - raise ValueError('Not enough samples to fit temporal line.') - - shuffle_idxs = [] - - while True: - batches = None - for n_batch in range(self.batch_size): - if len(shuffle_idxs) == 0: - shuffle_idxs = samples_idxs.copy() - np.random.shuffle (shuffle_idxs) - - idx = shuffle_idxs.pop() - - temporal_samples = [] - mult = np.random.randint(mult_max)+1 - for i in range( self.temporal_image_count ): - sample = samples[ idx+i*mult ] - try: - temporal_samples += SampleProcessor.process ([sample], self.sample_process_options, self.output_sample_types, self.debug)[0] - except: - raise Exception ("Exception occured in sample %s. Error: %s" % (sample.filename, traceback.format_exc() ) ) - - if batches is None: - batches = [ [] for _ in range(len(temporal_samples)) ] - - for i in range(len(temporal_samples)): - batches[i].append ( temporal_samples[i] ) - - yield [ np.array(batch) for batch in batches] diff --git a/samplelib/SampleGeneratorImageTemporal.py b/samplelib/SampleGeneratorImageTemporal.py index 57b91b1..69b0440 100644 --- a/samplelib/SampleGeneratorImageTemporal.py +++ b/samplelib/SampleGeneratorImageTemporal.py @@ -1,10 +1,12 @@ import traceback -import numpy as np + import cv2 +import numpy as np -from utils import iter_utils +from core.joblib import SubprocessGenerator, ThisThreadGenerator +from samplelib import (SampleGeneratorBase, SampleHost, SampleProcessor, + SampleType) -from samplelib import SampleType, SampleProcessor, SampleHost, SampleGeneratorBase ''' output_sample_types = [ diff --git a/samplelib/SampleHost.py b/samplelib/SampleHost.py index 0a53b71..8429915 100644 --- a/samplelib/SampleHost.py +++ b/samplelib/SampleHost.py @@ -1,14 +1,15 @@ import multiprocessing import operator +import pickle import traceback from pathlib import Path -import pickle + import samplelib.PackedFaceset +from core import pathex +from core.interact import interact as io +from core.joblib import Subprocessor from DFLIMG import * from facelib import FaceType, LandmarksProcessor -from interact import interact as io -from joblib import Subprocessor -from utils import Path_utils, mp_utils from .Sample import Sample, SampleType @@ -45,7 +46,7 @@ class SampleHost: if sample_type == SampleType.IMAGE: if samples[sample_type] is None: - samples[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( Path_utils.get_image_paths(samples_path), "Loading") ] + samples[sample_type] = [ Sample(filename=filename) for filename in io.progress_bar_generator( pathex.get_image_paths(samples_path), "Loading") ] elif sample_type == SampleType.FACE: if samples[sample_type] is None: @@ -58,7 +59,7 @@ class SampleHost: io.log_info (f"Loaded {len(result)} packed faces from {samples_path}") if result is None: - result = SampleHost.load_face_samples( Path_utils.get_image_paths(samples_path) ) + result = SampleHost.load_face_samples( pathex.get_image_paths(samples_path) ) samples[sample_type] = result elif sample_type == SampleType.FACE_TEMPORAL_SORTED: @@ -68,6 +69,31 @@ class SampleHost: return samples[sample_type] + @staticmethod + def load_face_samples ( image_paths): + result = FaceSamplesLoaderSubprocessor(image_paths).run() + sample_list = [] + + for filename, \ + ( face_type, + shape, + landmarks, + ie_polys, + eyebrows_expand_mod, + source_filename, + ) in result: + sample_list.append( Sample(filename=filename, + sample_type=SampleType.FACE, + face_type=FaceType.fromString (face_type), + shape=shape, + landmarks=landmarks, + ie_polys=ie_polys, + eyebrows_expand_mod=eyebrows_expand_mod, + source_filename=source_filename, + )) + return sample_list + + """ @staticmethod def load_face_samples ( image_paths): sample_list = [] @@ -87,10 +113,80 @@ class SampleHost: source_filename=dflimg.get_source_filename(), )) return sample_list - + """ + @staticmethod def upgradeToFaceTemporalSortedSamples( samples ): new_s = [ (s, s.source_filename) for s in samples] new_s = sorted(new_s, key=operator.itemgetter(1)) return [ s[0] for s in new_s] + + +class FaceSamplesLoaderSubprocessor(Subprocessor): + #override + def __init__(self, image_paths ): + self.image_paths = image_paths + self.image_paths_len = len(image_paths) + self.idxs = [*range(self.image_paths_len)] + self.result = [None]*self.image_paths_len + super().__init__('FaceSamplesLoader', FaceSamplesLoaderSubprocessor.Cli, 60) + + #override + def on_clients_initialized(self): + io.progress_bar ("Loading samples", len (self.image_paths)) + + #override + def on_clients_finalized(self): + io.progress_bar_close() + + #override + def process_info_generator(self): + for i in range(min(multiprocessing.cpu_count(), 8) ): + yield 'CPU%d' % (i), {}, {} + + #override + def get_data(self, host_dict): + if len (self.idxs) > 0: + idx = self.idxs.pop(0) + return idx, self.image_paths[idx] + + return None + + #override + def on_data_return (self, host_dict, data): + self.idxs.insert(0, data[0]) + + #override + def on_result (self, host_dict, data, result): + idx, dflimg = result + self.result[idx] = (self.image_paths[idx], dflimg) + io.progress_bar_inc(1) + + #override + def get_result(self): + return self.result + + class Cli(Subprocessor.Cli): + #override + def process_data(self, data): + idx, filename = data + dflimg = DFLIMG.load (Path(filename)) + + if dflimg is None: + self.log_err (f"FaceSamplesLoader: {filename} is not a dfl image file.") + data = None + else: + data = (dflimg.get_face_type(), + dflimg.get_shape(), + dflimg.get_landmarks(), + dflimg.get_ie_polys(), + dflimg.get_eyebrows_expand_mod(), + dflimg.get_source_filename() ) + + return idx, data + + #override + def get_data_name (self, data): + #return string identificator of your data + return data[1] diff --git a/samplelib/SampleProcessor.py b/samplelib/SampleProcessor.py index 726920f..e5f67ee 100644 --- a/samplelib/SampleProcessor.py +++ b/samplelib/SampleProcessor.py @@ -4,7 +4,7 @@ from enum import IntEnum import cv2 import numpy as np -import imagelib +from core import imagelib from facelib import FaceType, LandmarksProcessor @@ -154,9 +154,9 @@ class SampleProcessor(object): yaw = -yaw if img_type == SPTF.IMG_PITCH_YAW_ROLL_SIGMOID: - pitch = (pitch+1.0) / 2.0 - yaw = (yaw+1.0) / 2.0 - roll = (roll+1.0) / 2.0 + pitch = np.clip( (pitch / math.pi) / 2.0 + 1.0, 0, 1) + yaw = np.clip( (yaw / math.pi) / 2.0 + 1.0, 0, 1) + roll = np.clip( (roll / math.pi) / 2.0 + 1.0, 0, 1) img = (pitch, yaw, roll) else: diff --git a/samplelib/__init__.py b/samplelib/__init__.py index 67630c5..ecfbfec 100644 --- a/samplelib/__init__.py +++ b/samplelib/__init__.py @@ -5,6 +5,5 @@ from .SampleProcessor import SampleProcessor from .SampleGeneratorBase import SampleGeneratorBase from .SampleGeneratorFace import SampleGeneratorFace from .SampleGeneratorFacePerson import SampleGeneratorFacePerson -from .SampleGeneratorFaceTemporal import SampleGeneratorFaceTemporal from .SampleGeneratorImageTemporal import SampleGeneratorImageTemporal from .PackedFaceset import PackedFaceset \ No newline at end of file diff --git a/utils/pickle_utils.py b/utils/pickle_utils.py deleted file mode 100644 index 37c4c72..0000000 --- a/utils/pickle_utils.py +++ /dev/null @@ -1,9 +0,0 @@ -class AntiPickler(): - def __init__(self, obj): - self.obj = obj - - def __getstate__(self): - return dict() - - def __setstate__(self, d): - self.__dict__.update(d) \ No newline at end of file