diff --git a/converters/ConvertAvatar.py b/converters/ConvertAvatar.py index 2dcfeaa..28e5c52 100644 --- a/converters/ConvertAvatar.py +++ b/converters/ConvertAvatar.py @@ -26,6 +26,9 @@ def ConvertFaceAvatar (cfg, prev_temporal_frame_infos, frame_info, next_temporal prd_f = cfg.predictor_func ( prev_imgs, img, next_imgs ) + if cfg.super_resolution_mode != 0: + prd_f = cfg.superres_func(cfg.super_resolution_mode, prd_f) + out_img = np.clip(prd_f, 0.0, 1.0) if cfg.add_source_image: diff --git a/converters/ConvertMasked.py b/converters/ConvertMasked.py index 6c33328..3130842 100644 --- a/converters/ConvertMasked.py +++ b/converters/ConvertMasked.py @@ -27,7 +27,7 @@ def ConvertMaskedFace (cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmar out_merging_mask = None output_size = cfg.predictor_input_shape[0] - if cfg.super_resolution: + if cfg.super_resolution_mode != 0: output_size *= 2 face_mat = LandmarksProcessor.get_transform_mat (img_face_landmarks, output_size, face_type=cfg.face_type) @@ -48,12 +48,12 @@ def ConvertMaskedFace (cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmar prd_face_bgr = np.clip (predicted, 0, 1.0 ) prd_face_mask_a_0 = cv2.resize (dst_face_mask_a_0, cfg.predictor_input_shape[0:2] ) - if cfg.super_resolution: + if cfg.super_resolution_mode: #if debug: # tmp = cv2.resize (prd_face_bgr, (output_size,output_size), cv2.INTER_CUBIC) # debugs += [ np.clip( cv2.warpAffine( tmp, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] - prd_face_bgr = cfg.dcscn_upscale_func(prd_face_bgr) + prd_face_bgr = cfg.superres_func(cfg.super_resolution_mode, prd_face_bgr) #if debug: # debugs += [ np.clip( cv2.warpAffine( prd_face_bgr, face_output_mat, img_size, img_bgr.copy(), cv2.WARP_INVERSE_MAP | cv2.INTER_CUBIC, cv2.BORDER_TRANSPARENT ), 0, 1.0) ] @@ -335,7 +335,7 @@ def ConvertMaskedFace (cfg, frame_info, img_bgr_uint8, img_bgr, img_face_landmar k_size = int(frame_info.motion_power*cfg_mp) if k_size >= 1: k_size = np.clip (k_size+1, 2, 50) - if cfg.super_resolution: + if cfg.super_resolution_mode: k_size *= 2 out_face_bgr = cv2.warpAffine( out_img, face_mat, (output_size, output_size) ) new_out_face_bgr = imagelib.LinearMotionBlur (out_face_bgr, k_size , frame_info.motion_deg) diff --git a/converters/ConverterConfig.py b/converters/ConverterConfig.py index 819bf0c..dc43774 100644 --- a/converters/ConverterConfig.py +++ b/converters/ConverterConfig.py @@ -92,7 +92,7 @@ class ConverterConfigMasked(ConverterConfig): self.motion_blur_power = 0 self.output_face_scale = 0 self.color_transfer_mode = 0 - self.super_resolution = False + self.super_resolution_mode = 0 self.color_degrade_power = 0 self.export_mask_alpha = False @@ -118,9 +118,11 @@ class ConverterConfigMasked(ConverterConfig): 2:'dst', 4:'FAN-dst', 7:'learned*FAN-dst'} - + self.ctm_dict = { 0: "None", 1:"rct", 2:"lct" } self.ctm_str_dict = {None:0, "rct":1, "lct": 2 } + + self.super_res_dict = {0:"None", 1:'RankSRGAN'} def copy(self): return copy.copy(self) @@ -158,8 +160,9 @@ class ConverterConfigMasked(ConverterConfig): def toggle_color_transfer_mode(self): self.color_transfer_mode = (self.color_transfer_mode+1) % 3 - def toggle_super_resolution(self): - self.super_resolution = not self.super_resolution + def toggle_super_resolution_mode(self): + a = list( self.super_res_dict.keys() ) + self.super_resolution_mode = a[ (a.index(self.super_resolution_mode)+1) % len(a) ] def add_color_degrade_power(self, diff): self.color_degrade_power = np.clip ( self.color_degrade_power+diff , 0, 100) @@ -210,8 +213,13 @@ class ConverterConfigMasked(ConverterConfig): self.color_transfer_mode = io.input_str ("Apply color transfer to predicted face? Choose mode ( rct/lct skip:None ) : ", None, ['rct','lct']) self.color_transfer_mode = self.ctm_str_dict[self.color_transfer_mode] - self.super_resolution = io.input_bool("Apply super resolution? (y/n ?:help skip:n) : ", False, help_message="Enhance details by applying DCSCN network.") + s = """Choose super resolution mode: \n""" + for key in self.super_res_dict.keys(): + s += f"""({key}) {self.super_res_dict[key]}\n""" + s += f"""?:help Default: {list(self.super_res_dict.keys())[0]} : """ + self.super_resolution_mode = io.input_int (s, 0, valid_list=self.super_res_dict.keys(), help_message="Enhance details by applying superresolution network.") + if 'raw' not in self.mode: self.color_degrade_power = np.clip ( io.input_int ("Degrade color power of final image [0..100] (skip:0) : ", 0), 0, 100) self.export_mask_alpha = io.input_bool("Export png with alpha channel of the mask? (y/n skip:n) : ", False) @@ -231,7 +239,7 @@ class ConverterConfigMasked(ConverterConfig): self.motion_blur_power == other.motion_blur_power and \ self.output_face_scale == other.output_face_scale and \ self.color_transfer_mode == other.color_transfer_mode and \ - self.super_resolution == other.super_resolution and \ + self.super_resolution_mode == other.super_resolution_mode and \ self.color_degrade_power == other.color_degrade_power and \ self.export_mask_alpha == other.export_mask_alpha @@ -264,7 +272,7 @@ class ConverterConfigMasked(ConverterConfig): if 'raw' not in self.mode: r += f"""color_transfer_mode: { self.ctm_dict[self.color_transfer_mode]}\n""" - r += f"""super_resolution: {self.super_resolution}\n""" + r += f"""super_resolution_mode: {self.super_res_dict[self.super_resolution_mode]}\n""" if 'raw' not in self.mode: r += (f"""color_degrade_power: {self.color_degrade_power}\n""" @@ -289,6 +297,8 @@ class ConverterConfigFaceAvatar(ConverterConfig): #changeable params self.add_source_image = False + self.super_resolution_mode = 0 + self.super_res_dict = {0:"None", 1:'RankSRGAN'} def copy(self): return copy.copy(self) @@ -296,22 +306,33 @@ class ConverterConfigFaceAvatar(ConverterConfig): #override def ask_settings(self): self.add_source_image = io.input_bool("Add source image? (y/n ?:help skip:n) : ", False, help_message="Add source image for comparison.") + + s = """Choose super resolution mode: \n""" + for key in self.super_res_dict.keys(): + s += f"""({key}) {self.super_res_dict[key]}\n""" + s += f"""?:help Default: {list(self.super_res_dict.keys())[0]} : """ + self.super_resolution_mode = io.input_int (s, 0, valid_list=self.super_res_dict.keys(), help_message="Enhance details by applying superresolution network.") def toggle_add_source_image(self): self.add_source_image = not self.add_source_image + + def toggle_super_resolution_mode(self): + a = list( self.super_res_dict.keys() ) + self.super_resolution_mode = a[ (a.index(self.super_resolution_mode)+1) % len(a) ] #override def __eq__(self, other): #check equality of changeable params if isinstance(other, ConverterConfigFaceAvatar): - return self.add_source_image == other.add_source_image - + return self.add_source_image == other.add_source_image and \ + self.super_resolution_mode == other.super_resolution_mode return False #override def __str__(self): return ("ConverterConfig: \n" f"add_source_image : {self.add_source_image}\n" + f"super_resolution_mode : {self.super_res_dict[self.super_resolution_mode]}\n" "================" ) \ No newline at end of file diff --git a/doc/manual_en_google_translated.docx b/doc/manual_en_google_translated.docx index d575ec6..4c53cd7 100644 Binary files a/doc/manual_en_google_translated.docx and b/doc/manual_en_google_translated.docx differ diff --git a/doc/manual_en_google_translated.pdf b/doc/manual_en_google_translated.pdf index e878346..16beeea 100644 Binary files a/doc/manual_en_google_translated.pdf and b/doc/manual_en_google_translated.pdf differ diff --git a/doc/manual_ru.pdf b/doc/manual_ru.pdf index 9567d7e..7a485d4 100644 Binary files a/doc/manual_ru.pdf and b/doc/manual_ru.pdf differ diff --git a/doc/manual_ru_source.docx b/doc/manual_ru_source.docx index 1686ec6..14e7646 100644 Binary files a/doc/manual_ru_source.docx and b/doc/manual_ru_source.docx differ diff --git a/imagelib/DCSCN.h5 b/imagelib/DCSCN.h5 deleted file mode 100644 index d9c8ac7..0000000 Binary files a/imagelib/DCSCN.h5 and /dev/null differ diff --git a/imagelib/DCSCN.py b/imagelib/DCSCN.py deleted file mode 100644 index 332e70a..0000000 --- a/imagelib/DCSCN.py +++ /dev/null @@ -1,164 +0,0 @@ -import numpy as np -import cv2 -from pathlib import Path -from nnlib import nnlib -from interact import interact as io - -class DCSCN(): - def __init__(self): - exec( nnlib.import_all(), locals(), globals() ) - - inp_x = KL.Input([None, None, 1]) - inp_x2 = KL.Input([None, None, 1]) - - x = inp_x - layers_count = 12 - layers = [] - for i in range(1,layers_count+1): - if i == 1: - output_feature_num = 196 - else: - x1 = (i-1) / float(layers_count - 1) - y1 = x1 ** (1.0 / 1.5) - output_feature_num = int((196 - 48) * (1 - y1) + 48) - x = Conv2D(output_feature_num, kernel_size=3, strides=1, padding='same', name='CNN%d' % (i) ) (x) - x = PReLU(shared_axes=[1,2], name='CNN%d_prelu' % (i) ) (x) - layers.append(x) - - x_concat = KL.Concatenate()(layers) - - A1 = Conv2D(64, kernel_size=1, strides=1, padding='same', name='A1' ) (x_concat) - A1 = PReLU(shared_axes=[1,2], name='A1_prelu') (A1) - - B1 = Conv2D(32, kernel_size=1, strides=1, padding='same', name='B1' ) (x_concat) - B1 = PReLU(shared_axes=[1,2], name='B1_prelu') (B1) - - B2 = Conv2D(32, kernel_size=3, strides=1, padding='same', name='B2' ) (B1) - B2 = PReLU(shared_axes=[1,2], name='B2_prelu') (B2) - - x = KL.Concatenate()([B2,A1]) - x = Conv2D(96*4, kernel_size=3, strides=1, padding='same', name='Up_PS' )(x) - x = PixelShuffler()(x) - x = Conv2D(1, kernel_size=3, strides=1, padding='same', name='R_CNN1', use_bias=False )(x) - x = KL.Add()([x, inp_x2]) - self.model = keras.models.Model ([inp_x, inp_x2], [x]) - self.model.load_weights ( Path(__file__).parent / 'DCSCN.h5' ) - - def upscale(self, img, is_bgr=True, is_float=True): - if is_bgr: - img = img[...,::-1] - - if is_float: - img = np.clip (img*255, 0, 255) - - img_shape_len = len(img.shape) - h, w = img.shape[:2] - ch = img.shape[2] if len(img.shape) >= 3 else 1 - - nh, nw = h*2, w*2 - - img_x = self.convert_rgb_to_y(img) - - img_bx = cv2.resize(img_x, (nh, nw), cv2.INTER_CUBIC) - - ensemble = 8 - - output = np.zeros([nh,nw,1], dtype=np.float32) - - for i in range(ensemble): - x = np.reshape( self.flip(img_x, i), (1,h,w,1) ) - bx = np.reshape( self.flip(img_bx, i), (1,nh,nw,1) ) - y = self.model.predict([x,bx])[0] - y = self.flip(y, i, invert=True) - output += y - - output /= ensemble - - bimg = cv2.resize(img, (nh, nw), cv2.INTER_CUBIC) - bimg_ycbcr = self.convert_rgb_to_ycbcr(bimg) - - if ch > 1: - output = self.convert_y_and_cbcr_to_rgb(output, bimg_ycbcr[:, :, 1:3]) - - if is_float: - output = np.clip (output/255.0, 0, 1.0) - - if is_bgr: - output = output[...,::-1] - - return output - - def convert_rgb_to_y(self, image): - if len(image.shape) <= 2 or image.shape[2] == 1: - return image - - xform = np.array([[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0]], dtype=np.float32) - y_image = image.dot(xform.T) + 16.0 - - return y_image - - - def convert_rgb_to_ycbcr(self, image): - if len(image.shape) <= 2 or image.shape[2] == 1: - return image - - xform = np.array( - [[65.738 / 256.0, 129.057 / 256.0, 25.064 / 256.0], - [- 37.945 / 256.0, - 74.494 / 256.0, 112.439 / 256.0], - [112.439 / 256.0, - 94.154 / 256.0, - 18.285 / 256.0]], dtype=np.float32) - - ycbcr_image = image.dot(xform.T) - ycbcr_image[:, :, 0] += 16.0 - ycbcr_image[:, :, [1, 2]] += 128.0 - - return ycbcr_image - - def convert_ycbcr_to_rgb(self,ycbcr_image): - rgb_image = np.zeros([ycbcr_image.shape[0], ycbcr_image.shape[1], 3], dtype=np.float32) - - rgb_image[:, :, 0] = ycbcr_image[:, :, 0] - 16.0 - rgb_image[:, :, [1, 2]] = ycbcr_image[:, :, [1, 2]] - 128.0 - xform = np.array( - [[298.082 / 256.0, 0, 408.583 / 256.0], - [298.082 / 256.0, -100.291 / 256.0, -208.120 / 256.0], - [298.082 / 256.0, 516.412 / 256.0, 0]], dtype=np.float32) - rgb_image = rgb_image.dot(xform.T) - - return rgb_image - - def convert_y_and_cbcr_to_rgb(self,y_image, cbcr_image): - if len(y_image.shape) <= 2: - y_image = y_image.reshape[y_image.shape[0], y_image.shape[1], 1] - - if len(y_image.shape) == 3 and y_image.shape[2] == 3: - y_image = y_image[:, :, 0:1] - - ycbcr_image = np.zeros([y_image.shape[0], y_image.shape[1], 3], dtype=np.float32) - ycbcr_image[:, :, 0] = y_image[:, :, 0] - ycbcr_image[:, :, 1:3] = cbcr_image[:, :, 0:2] - - return self.convert_ycbcr_to_rgb(ycbcr_image) - - def flip(self, image, flip_type, invert=False): - if flip_type == 0: - return image - elif flip_type == 1: - return np.flipud(image) - elif flip_type == 2: - return np.fliplr(image) - elif flip_type == 3: - return np.flipud(np.fliplr(image)) - elif flip_type == 4: - return np.rot90(image, 1 if invert is False else -1) - elif flip_type == 5: - return np.rot90(image, -1 if invert is False else 1) - elif flip_type == 6: - if invert is False: - return np.flipud(np.rot90(image)) - else: - return np.rot90(np.flipud(image), -1) - elif flip_type == 7: - if invert is False: - return np.flipud(np.rot90(image, -1)) - else: - return np.rot90(np.flipud(image), 1) diff --git a/imagelib/RankSRGAN.h5 b/imagelib/RankSRGAN.h5 new file mode 100644 index 0000000..765ae2e Binary files /dev/null and b/imagelib/RankSRGAN.h5 differ diff --git a/imagelib/RankSRGAN.py b/imagelib/RankSRGAN.py new file mode 100644 index 0000000..529dea8 --- /dev/null +++ b/imagelib/RankSRGAN.py @@ -0,0 +1,109 @@ +import numpy as np +import cv2 +from pathlib import Path +from nnlib import nnlib +from interact import interact as io + +class RankSRGAN(): + def __init__(self): + exec( nnlib.import_all(), locals(), globals() ) + + class PixelShufflerTorch(KL.Layer): + def __init__(self, size=(2, 2), data_format='channels_last', **kwargs): + super(PixelShufflerTorch, self).__init__(**kwargs) + self.data_format = data_format + self.size = size + + def call(self, inputs): + input_shape = K.shape(inputs) + if K.int_shape(input_shape)[0] != 4: + raise ValueError('Inputs should have rank 4; Received input shape:', str(K.int_shape(inputs))) + + batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], K.int_shape(inputs)[-1] + rh, rw = self.size + oh, ow = h * rh, w * rw + oc = c // (rh * rw) + + out = inputs + out = K.permute_dimensions(out, (0, 3, 1, 2)) #NCHW + + out = K.reshape(out, (batch_size, oc, rh, rw, h, w)) + out = K.permute_dimensions(out, (0, 1, 4, 2, 5, 3)) + out = K.reshape(out, (batch_size, oc, oh, ow)) + + out = K.permute_dimensions(out, (0, 2, 3, 1)) + return out + + def compute_output_shape(self, input_shape): + if len(input_shape) != 4: + raise ValueError('Inputs should have rank ' + str(4) + '; Received input shape:', str(input_shape)) + + height = input_shape[1] * self.size[0] if input_shape[1] is not None else None + width = input_shape[2] * self.size[1] if input_shape[2] is not None else None + channels = input_shape[3] // self.size[0] // self.size[1] + + if channels * self.size[0] * self.size[1] != input_shape[3]: + raise ValueError('channels of input and size are incompatible') + + return (input_shape[0], + height, + width, + channels) + + def get_config(self): + config = {'size': self.size, + 'data_format': self.data_format} + base_config = super(PixelShufflerTorch, self).get_config() + + return dict(list(base_config.items()) + list(config.items())) + + def res_block(inp, name_prefix): + x = inp + x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', activation="relu", name=name_prefix+"0")(x) + x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name=name_prefix+"2")(x) + return Add()([inp,x]) + + ndf = 64 + nb = 16 + inp = Input ( (None, None,3) ) + x = inp + + x = x0 = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name="model0")(x) + for i in range(nb): + x = res_block(x, "model1%.2d" %i ) + x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', name="model1160")(x) + x = Add()([x0,x]) + + x = ReLU() ( PixelShufflerTorch() ( Conv2D (ndf*4, kernel_size=3, strides=1, padding='same', name="model2")(x) ) ) + x = ReLU() ( PixelShufflerTorch() ( Conv2D (ndf*4, kernel_size=3, strides=1, padding='same', name="model5")(x) ) ) + + x = Conv2D (ndf, kernel_size=3, strides=1, padding='same', activation="relu", name="model8")(x) + x = Conv2D (3, kernel_size=3, strides=1, padding='same', name="model10")(x) + self.model = Model(inp, x ) + self.model.load_weights ( Path(__file__).parent / 'RankSRGAN.h5') + + def upscale(self, img, scale=2, is_bgr=True, is_float=True): + if scale not in [2,4]: + raise ValueError ("RankSRGAN: supported scale are 2 or 4.") + + if not is_bgr: + img = img[...,::-1] + + if not is_float: + img /= 255.0 + + h, w = img.shape[:2] + ch = img.shape[2] if len(img.shape) >= 3 else 1 + + output = self.model.predict([img[None,...]])[0] + + if scale == 2: + output = cv2.resize (output, (w*scale, h*scale), cv2.INTER_CUBIC) + + if not is_float: + output = np.clip (output * 255.0, 0, 255.0) + + if not is_bgr: + output = output[...,::-1] + + return output \ No newline at end of file diff --git a/imagelib/__init__.py b/imagelib/__init__.py index 1d8ce5e..4d6b2a7 100644 --- a/imagelib/__init__.py +++ b/imagelib/__init__.py @@ -13,7 +13,7 @@ from .reduce_colors import reduce_colors from .color_transfer import color_hist_match, reinhard_color_transfer, linear_color_transfer -from .DCSCN import DCSCN +from .RankSRGAN import RankSRGAN from .common import normalize_channels, overlay_alpha_image diff --git a/mainscripts/Converter.py b/mainscripts/Converter.py index 5811371..a1cf6f2 100644 --- a/mainscripts/Converter.py +++ b/mainscripts/Converter.py @@ -71,7 +71,7 @@ class ConvertSubprocessor(Subprocessor): self.device_idx = client_dict['device_idx'] self.device_name = client_dict['device_name'] self.predictor_func = client_dict['predictor_func'] - self.dcscn_upscale_func = client_dict['dcscn_upscale_func'] + self.superres_func = client_dict['superres_func'] #transfer and set stdin in order to work code.interact in debug subprocess stdin_fd = client_dict['stdin_fd'] @@ -103,7 +103,8 @@ class ConvertSubprocessor(Subprocessor): def process_data(self, pf): #pf=ProcessingFrame cfg = pf.cfg.copy() cfg.predictor_func = self.predictor_func - + cfg.superres_func = self.superres_func + frame_info = pf.frame_info filename = frame_info.filename @@ -127,7 +128,6 @@ class ConvertSubprocessor(Subprocessor): pf.image = img_bgr else: if cfg.type == ConverterConfig.TYPE_MASKED: - cfg.dcscn_upscale_func = self.dcscn_upscale_func cfg.fanseg_input_size = self.fanseg_input_size cfg.fanseg_extract_func = self.fanseg_extract_func @@ -176,12 +176,14 @@ class ConvertSubprocessor(Subprocessor): self.converter_config.predictor_func = None self.dcscn = None - def DCSCN_upscale(*args, **kwargs): - if self.dcscn is None: - self.dcscn = imagelib.DCSCN() - return self.dcscn.upscale(*args, **kwargs) + self.ranksrgan = None + def superres_func(mode, *args, **kwargs): + if mode == 1: + if self.ranksrgan is None: + self.ranksrgan = imagelib.RankSRGAN() + return self.ranksrgan.upscale(*args, **kwargs) - self.dcscn_host, self.dcscn_upscale_func = SubprocessFunctionCaller.make_pair(DCSCN_upscale) + self.dcscn_host, self.superres_func = SubprocessFunctionCaller.make_pair(superres_func) self.frames = frames self.output_path = output_path @@ -205,7 +207,7 @@ class ConvertSubprocessor(Subprocessor): yield 'CPU%d' % (i), {}, {'device_idx': i, 'device_name': 'CPU%d' % (i), 'predictor_func': self.predictor_func, - 'dcscn_upscale_func': self.dcscn_upscale_func, + 'superres_func': self.superres_func, 'stdin_fd': sys.stdin.fileno() if CONVERTER_DEBUG else None } @@ -333,12 +335,14 @@ class ConvertSubprocessor(Subprocessor): elif chr_key == 'c': cfg.toggle_color_transfer_mode() elif chr_key == 'v': - cfg.toggle_super_resolution() + cfg.toggle_super_resolution_mode() elif chr_key == 'b': cfg.toggle_export_mask_alpha() else: if chr_key == 's': cfg.toggle_add_source_image() + elif chr_key == 'v': + cfg.toggle_super_resolution_mode() if prev_cfg != cfg: io.log_info (cfg) diff --git a/mainscripts/gfx/help_converter_face_avatar.jpg b/mainscripts/gfx/help_converter_face_avatar.jpg index 67f8885..c9fff23 100644 Binary files a/mainscripts/gfx/help_converter_face_avatar.jpg and b/mainscripts/gfx/help_converter_face_avatar.jpg differ diff --git a/mainscripts/gfx/help_converter_face_avatar_source.psd b/mainscripts/gfx/help_converter_face_avatar_source.psd index 69b46c4..98ec516 100644 Binary files a/mainscripts/gfx/help_converter_face_avatar_source.psd and b/mainscripts/gfx/help_converter_face_avatar_source.psd differ