removing CuPy. FaceMerger now works with any OpenCL1.2-compatible GPU.

This commit is contained in:
iperov 2021-09-30 18:31:11 +04:00
parent c2ba2bab9d
commit 4fe2da23c0
6 changed files with 134 additions and 292 deletions

View file

@ -3,7 +3,7 @@ from enum import IntEnum
import numexpr as ne import numexpr as ne
import numpy as np import numpy as np
from xlib import cupy as lib_cp from xlib import avecl as lib_cl
from xlib import os as lib_os from xlib import os as lib_os
from xlib.image import ImageProcessor from xlib.image import ImageProcessor
from xlib.mp import csw as lib_csw from xlib.mp import csw as lib_csw
@ -57,12 +57,13 @@ class FaceMergerWorker(BackendWorker):
cs.face_opacity.call_on_number(self.on_cs_face_opacity) cs.face_opacity.call_on_number(self.on_cs_face_opacity)
cs.device.enable() cs.device.enable()
cs.device.set_choices( ['CPU'] + lib_cp.get_available_devices(), none_choice_name='@misc.menu_select')
cs.device.set_choices( ['CPU'] + lib_cl.get_available_devices_info(), none_choice_name='@misc.menu_select')
cs.device.select(state.device if state.device is not None else 'CPU') cs.device.select(state.device if state.device is not None else 'CPU')
def on_cs_device(self, idxs, device : lib_cp.CuPyDeviceInfo): def on_cs_device(self, idxs, device : lib_cl.DeviceInfo):
state, cs = self.get_state(), self.get_control_sheet() state, cs = self.get_state(), self.get_control_sheet()
if device is not None and state.device == device: if device is not None and state.device == device:
cs.face_x_offset.enable() cs.face_x_offset.enable()
@ -94,17 +95,6 @@ class FaceMergerWorker(BackendWorker):
cs.face_opacity.set_config(lib_csw.Number.Config(min=0.0, max=1.0, step=0.01, decimals=2, allow_instant_update=True)) cs.face_opacity.set_config(lib_csw.Number.Config(min=0.0, max=1.0, step=0.01, decimals=2, allow_instant_update=True))
cs.face_opacity.set_number(state.face_opacity if state.face_opacity is not None else 1.0) cs.face_opacity.set_number(state.face_opacity if state.face_opacity is not None else 1.0)
if device != 'CPU':
self.is_gpu = True
global cp
import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
cp.cuda.Device( device.get_index() ).use()
self.cp_mask_clip_kernel = cp.ElementwiseKernel('T x', 'T z', 'z = x < 0.004 ? 0 : x > 1.0 ? 1.0 : x', 'mask_clip_kernel')
self.cp_merge_kernel = cp.ElementwiseKernel('T bg, T face, T mask', 'T z', 'z = bg*(1.0-mask) + face*mask', 'merge_kernel')
self.cp_merge_kernel_opacity = cp.ElementwiseKernel('T bg, T face, T mask, T opacity', 'T z', 'z = bg*(1.0-mask) + bg*mask*(1.0-opacity) + face*mask*opacity', 'merge_kernel_opacity')
else: else:
state.device = device state.device = device
self.save_state() self.save_state()
@ -164,7 +154,75 @@ class FaceMergerWorker(BackendWorker):
cs.face_opacity.set_number(face_opacity) cs.face_opacity.set_number(face_opacity)
self.save_state() self.save_state()
self.reemit_frame_signal.send() self.reemit_frame_signal.send()
def _merge_on_cpu(self, frame_image, face_align_mask_img, face_swap_img, face_swap_mask_img, aligned_to_source_uni_mat, frame_width, frame_height ):
state = self.get_state()
frame_image = ImageProcessor(frame_image).to_ufloat32().get_image('HWC')
face_align_mask_img = ImageProcessor(face_align_mask_img).to_ufloat32().get_image('HW')
face_swap_mask_img = ImageProcessor(face_swap_mask_img).to_ufloat32().get_image('HW')
if state.face_mask_type == FaceMaskType.SRC:
face_mask = face_align_mask_img
elif state.face_mask_type == FaceMaskType.CELEB:
face_mask = face_swap_mask_img
elif state.face_mask_type == FaceMaskType.SRC_M_CELEB:
face_mask = face_align_mask_img*face_swap_mask_img
# Combine face mask
face_mask_ip = ImageProcessor(face_mask).erode_blur(state.face_mask_erode, state.face_mask_blur, fade_to_border=True) \
.warpAffine(aligned_to_source_uni_mat, frame_width, frame_height)
face_mask_ip.clip2( (1.0/255.0), 0.0, 1.0, 1.0)
frame_face_mask = face_mask_ip.get_image('HWC')
frame_face_swap_img = ImageProcessor(face_swap_img) \
.to_ufloat32().warpAffine(aligned_to_source_uni_mat, frame_width, frame_height).get_image('HWC')
# Combine final frame
opacity = state.face_opacity
if opacity == 1.0:
frame_final = ne.evaluate('frame_image*(1.0-frame_face_mask) + frame_face_swap_img*frame_face_mask')
else:
frame_final = ne.evaluate('frame_image*(1.0-frame_face_mask) + frame_image*frame_face_mask*(1.0-opacity) + frame_face_swap_img*frame_face_mask*opacity')
return frame_final
def _merge_on_gpu(self, frame_image, face_align_mask_img, face_swap_img, face_swap_mask_img, aligned_to_source_uni_mat, frame_width, frame_height ):
state = self.get_state()
if state.face_mask_type == FaceMaskType.SRC:
face_mask_t = lib_cl.Tensor.from_value(face_align_mask_img, device=state.device)
face_mask_t = face_mask_t.transpose( (2,0,1), op_text='O = (I <= 128 ? 0 : 1);', dtype=np.uint8)
elif state.face_mask_type == FaceMaskType.CELEB:
face_mask_t = lib_cl.Tensor.from_value(face_swap_mask_img, device=state.device)
face_mask_t = face_mask_t.transpose( (2,0,1), op_text='O = (I <= 128 ? 0 : 1);', dtype=np.uint8)
elif state.face_mask_type == FaceMaskType.SRC_M_CELEB:
face_mask_t = lib_cl.any_wise('float X = (((float)I0) / 255.0) * (((float)I1) / 255.0); O = (X <= 0.5 ? 0 : 1);',
lib_cl.Tensor.from_value(face_align_mask_img, device=state.device),
lib_cl.Tensor.from_value(face_swap_mask_img, device=state.device),
dtype=np.uint8).transpose( (2,0,1) )
face_mask_t = lib_cl.binary_morph(face_mask_t, state.face_mask_erode, state.face_mask_blur, fade_to_border=True, dtype=np.float32)
face_swap_img_t = lib_cl.Tensor.from_value(face_swap_img, device=state.device)
face_swap_img_t = face_swap_img_t.transpose( (2,0,1), op_text='O = ((O_TYPE)I) / 255.0', dtype=np.float32)
frame_face_mask_t = lib_cl.remap_np_affine(face_mask_t, aligned_to_source_uni_mat, output_size=(frame_height, frame_width) )
frame_face_swap_img_t = lib_cl.remap_np_affine(face_swap_img_t, aligned_to_source_uni_mat, output_size=(frame_height, frame_width) )
frame_image_t = lib_cl.Tensor.from_value(frame_image, device=state.device).transpose( (2,0,1) )
opacity = state.face_opacity
if opacity == 1.0:
frame_final_t = lib_cl.any_wise('float I0f = (((float)I0) / 255.0); I1 = (I1 <= (1.0/255.0) ? 0.0 : I1 > 1.0 ? 1.0 : I1); O = I0f*(1.0-I1) + I2*I1', frame_image_t, frame_face_mask_t, frame_face_swap_img_t, dtype=np.float32)
else:
frame_final_t = lib_cl.any_wise('float I0f = (((float)I0) / 255.0); I1 = (I1 <= (1.0/255.0) ? 0.0 : I1 > 1.0 ? 1.0 : I1); O = I0f*(1.0-I1) + I0f*I1*(1.0-I3) + I2*I1*I3', frame_image_t, frame_face_mask_t, frame_face_swap_img_t, opacity, dtype=np.float32)
return frame_final_t.transpose( (1,2,0) ).np()
def on_tick(self): def on_tick(self):
state, cs = self.get_state(), self.get_control_sheet() state, cs = self.get_state(), self.get_control_sheet()
@ -189,72 +247,29 @@ class FaceMergerWorker(BackendWorker):
face_swap_mask = face_swap.get_face_mask() face_swap_mask = face_swap.get_face_mask()
if face_swap_mask is not None: if face_swap_mask is not None:
face_align_img = bcd.get_image(face_align.get_image_name()) face_align_img_shape, _ = bcd.get_image_shape_dtype(face_align.get_image_name())
face_swap_img = bcd.get_image(face_swap.get_image_name())
face_align_mask_img = bcd.get_image(face_align_mask.get_image_name()) face_align_mask_img = bcd.get_image(face_align_mask.get_image_name())
face_swap_img = bcd.get_image(face_swap.get_image_name())
face_swap_mask_img = bcd.get_image(face_swap_mask.get_image_name()) face_swap_mask_img = bcd.get_image(face_swap_mask.get_image_name())
source_to_aligned_uni_mat = face_align.get_source_to_aligned_uni_mat() source_to_aligned_uni_mat = face_align.get_source_to_aligned_uni_mat()
face_mask_type = state.face_mask_type if all_is_not_None(face_align_img_shape, face_align_mask_img, face_swap_img, face_swap_mask_img):
face_height, face_width = face_align_img_shape[:2]
if all_is_not_None(face_align_img, face_align_mask_img, face_swap_img, face_swap_mask_img, face_mask_type): frame_height, frame_width = frame_image.shape[:2]
face_height, face_width = face_align_img.shape[:2]
if self.is_gpu:
frame_image = cp.asarray(frame_image)
face_align_mask_img = cp.asarray(face_align_mask_img)
face_swap_mask_img = cp.asarray(face_swap_mask_img)
face_swap_img = cp.asarray(face_swap_img)
frame_image_ip = ImageProcessor(frame_image).to_ufloat32()
frame_image, (_, frame_height, frame_width, _) = frame_image_ip.get_image('HWC'), frame_image_ip.get_dims()
face_align_mask_img = ImageProcessor(face_align_mask_img).to_ufloat32().get_image('HW')
face_swap_mask_img = ImageProcessor(face_swap_mask_img).to_ufloat32().get_image('HW')
aligned_to_source_uni_mat = source_to_aligned_uni_mat.invert() aligned_to_source_uni_mat = source_to_aligned_uni_mat.invert()
aligned_to_source_uni_mat = aligned_to_source_uni_mat.source_translated(-state.face_x_offset, -state.face_y_offset) aligned_to_source_uni_mat = aligned_to_source_uni_mat.source_translated(-state.face_x_offset, -state.face_y_offset)
aligned_to_source_uni_mat = aligned_to_source_uni_mat.source_scaled_around_center(state.face_scale,state.face_scale) aligned_to_source_uni_mat = aligned_to_source_uni_mat.source_scaled_around_center(state.face_scale,state.face_scale)
aligned_to_source_uni_mat = aligned_to_source_uni_mat.to_exact_mat (face_width, face_height, frame_width, frame_height) aligned_to_source_uni_mat = aligned_to_source_uni_mat.to_exact_mat (face_width, face_height, frame_width, frame_height)
if face_mask_type == FaceMaskType.SRC: if state.device == 'CPU':
face_mask = face_align_mask_img merged_frame = self._merge_on_cpu(frame_image, face_align_mask_img, face_swap_img, face_swap_mask_img, aligned_to_source_uni_mat, frame_width, frame_height )
elif face_mask_type == FaceMaskType.CELEB:
face_mask = face_swap_mask_img
elif face_mask_type == FaceMaskType.SRC_M_CELEB:
face_mask = face_align_mask_img*face_swap_mask_img
# Combine face mask
face_mask_ip = ImageProcessor(face_mask).erode_blur(state.face_mask_erode, state.face_mask_blur, fade_to_border=True) \
.warpAffine(aligned_to_source_uni_mat, frame_width, frame_height)
if self.is_gpu:
face_mask_ip.apply( lambda img: self.cp_mask_clip_kernel(img) )
else: else:
face_mask_ip.clip2( (1.0/255.0), 0.0, 1.0, 1.0) merged_frame = self._merge_on_gpu(frame_image, face_align_mask_img, face_swap_img, face_swap_mask_img, aligned_to_source_uni_mat, frame_width, frame_height )
frame_face_mask = face_mask_ip.get_image('HWC')
frame_face_swap_img = ImageProcessor(face_swap_img) \
.to_ufloat32().warpAffine(aligned_to_source_uni_mat, frame_width, frame_height).get_image('HWC')
# Combine final frame
opacity = state.face_opacity
if self.is_gpu:
if opacity == 1.0:
frame_final = self.cp_merge_kernel(frame_image, frame_face_swap_img, frame_face_mask)
else:
frame_final = self.cp_merge_kernel_opacity(frame_image, frame_face_swap_img, frame_face_mask, opacity)
frame_final = cp.asnumpy(frame_final)
else:
if opacity == 1.0:
frame_final = ne.evaluate('frame_image*(1.0-frame_face_mask) + frame_face_swap_img*frame_face_mask')
else:
frame_final = ne.evaluate('frame_image*(1.0-frame_face_mask) + frame_image*frame_face_mask*(1.0-opacity) + frame_face_swap_img*frame_face_mask*opacity')
# keep image in float32 in order not to extra load FaceMerger # keep image in float32 in order not to extra load FaceMerger
merged_frame_name = f'{frame_name}_merged' merged_frame_name = f'{frame_name}_merged'
bcd.set_merged_frame_name(merged_frame_name) bcd.set_merged_frame_name(merged_frame_name)
bcd.set_image(merged_frame_name, frame_final) bcd.set_image(merged_frame_name, merged_frame)
break break
self.stop_profile_timing() self.stop_profile_timing()
@ -297,7 +312,7 @@ class Sheet:
self.face_opacity = lib_csw.Number.Host() self.face_opacity = lib_csw.Number.Host()
class WorkerState(BackendWorkerState): class WorkerState(BackendWorkerState):
device : lib_cp.CuPyDeviceInfo = None device : lib_cl.DeviceInfo = None
face_x_offset : float = None face_x_offset : float = None
face_y_offset : float = None face_y_offset : float = None
face_scale : float = None face_scale : float = None
@ -305,3 +320,4 @@ class WorkerState(BackendWorkerState):
face_mask_erode : int = None face_mask_erode : int = None
face_mask_blur : int = None face_mask_blur : int = None
face_opacity : float = None face_opacity : float = None

View file

@ -9,7 +9,7 @@ RUN ln -s /usr/bin/python3 /usr/bin/python
RUN git clone https://github.com/iperov/DeepFaceLive.git RUN git clone https://github.com/iperov/DeepFaceLive.git
RUN python -m pip install --upgrade pip RUN python -m pip install --upgrade pip
RUN python -m pip install onnxruntime-gpu==1.8.1 cupy-cuda113 numpy==1.21.2 scipy==1.5.4 numexpr opencv-python==4.5.3.56 opencv-contrib-python==4.5.3.56 pyqt6==6.1.1 onnx==1.10.1 torch==1.8.1 torchvision==0.9.1 RUN python -m pip install onnxruntime-gpu==1.8.1 numpy==1.21.2 numexpr opencv-python==4.5.3.56 opencv-contrib-python==4.5.3.56 pyqt6==6.1.1 onnx==1.10.1 torch==1.8.1 torchvision==0.9.1
WORKDIR /app/DeepFaceLive WORKDIR /app/DeepFaceLive
COPY example.sh example.sh COPY example.sh example.sh

View file

@ -472,7 +472,6 @@ def build_deepfacelive_windows(release_dir, cache_dir, python_ver='3.7.9', backe
# PIP INSTALLATIONS # PIP INSTALLATIONS
builder.install_pip_package('numpy==1.21.2') builder.install_pip_package('numpy==1.21.2')
builder.install_pip_package('scipy==1.5.4')
builder.install_pip_package('numexpr') builder.install_pip_package('numexpr')
builder.install_pip_package('opencv-python==4.5.3.56') builder.install_pip_package('opencv-python==4.5.3.56')
builder.install_pip_package('opencv-contrib-python==4.5.3.56') builder.install_pip_package('opencv-contrib-python==4.5.3.56')
@ -482,7 +481,6 @@ def build_deepfacelive_windows(release_dir, cache_dir, python_ver='3.7.9', backe
if backend == 'cuda': if backend == 'cuda':
builder.install_pip_package('torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html') builder.install_pip_package('torch==1.8.1+cu111 torchvision==0.9.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html')
builder.install_pip_package('onnxruntime-gpu==1.9.0') builder.install_pip_package('onnxruntime-gpu==1.9.0')
builder.install_pip_package('cupy-cuda111===9.0.0')
elif backend == 'directml': elif backend == 'directml':
if python_ver[:3] == '3.7': if python_ver[:3] == '3.7':
builder.install_pip_package('https://github.com/iperov/DeepFaceLive/releases/download/ort-dml/onnxruntime_directml-1.8.2-cp37-cp37m-win_amd64.whl') builder.install_pip_package('https://github.com/iperov/DeepFaceLive/releases/download/ort-dml/onnxruntime_directml-1.8.2-cp37-cp37m-win_amd64.whl')

View file

@ -1 +0,0 @@
from .device import get_available_devices, CuPyDeviceInfo

View file

@ -1,71 +0,0 @@
from typing import List
from .. import appargs as lib_appargs
class CuPyDeviceInfo:
"""
Represents picklable CuPy device info
"""
def __init__(self, index=None, name=None, total_memory=None):
self._index : int = index
self._name : str = name
self._total_memory : int = total_memory
def __getstate__(self):
return self.__dict__.copy()
def __setstate__(self, d):
self.__init__()
self.__dict__.update(d)
def is_cpu(self) -> bool: return self._index == -1
def get_index(self) -> int:
return self._index
def get_name(self) -> str:
return self._name
def get_total_memory(self) -> int:
return self._total_memory
def __eq__(self, other):
if self is not None and other is not None and isinstance(self, CuPyDeviceInfo) and isinstance(other, CuPyDeviceInfo):
return self._index == other._index
return False
def __hash__(self):
return self._index
def __str__(self):
if self.is_cpu():
return "CPU"
else:
return f"[{self._index}] {self._name} [{(self._total_memory / 1024**3) :.3}Gb]"
def __repr__(self):
return f'{self.__class__.__name__} object: ' + self.__str__()
_cupy_devices = None
def get_available_devices() -> List[CuPyDeviceInfo]:
"""
returns a list of available CuPyDeviceInfo
"""
if lib_appargs.get_arg_bool('NO_CUDA'):
return []
global _cupy_devices
if _cupy_devices is None:
import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
devices = []
for i in range (cp.cuda.runtime.getDeviceCount()):
device_props = cp.cuda.runtime.getDeviceProperties(i)
devices.append ( CuPyDeviceInfo(index=i, name=device_props['name'].decode('utf-8'), total_memory=device_props['totalGlobalMem']))
_cupy_devices = devices
return _cupy_devices

View file

@ -7,33 +7,16 @@ import numpy as np
class ImageProcessor: class ImageProcessor:
""" """
Generic image processor for numpy or cupy images Generic image processor for numpy images
arguments arguments
img np.ndarray| img np.ndarray HW (2 ndim)
cp.ndarray
HW (2 ndim)
HWC (3 ndim) HWC (3 ndim)
NHWC (4 ndim) NHWC (4 ndim)
for cupy you should set device before using ImageProcessor
""" """
def __init__(self, img : Union[np.ndarray,'cp.ndarray'], copy=False): def __init__(self, img : np.ndarray, copy=False):
if img.__class__ == np.ndarray:
self._xp = np
import scipy
import scipy.ndimage
self._sp = scipy
if copy:
img = img.copy()
else:
import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
import cupyx.scipy.ndimage
self._xp = cp
self._sp = cupyx.scipy
ndim = img.ndim ndim = img.ndim
if ndim not in [2,3,4]: if ndim not in [2,3,4]:
raise ValueError(f'img.ndim must be 2,3,4, not {ndim}.') raise ValueError(f'img.ndim must be 2,3,4, not {ndim}.')
@ -55,8 +38,6 @@ class ImageProcessor:
""" """
ip = ImageProcessor.__new__(ImageProcessor) ip = ImageProcessor.__new__(ImageProcessor)
ip._img = self._img ip._img = self._img
ip._xp = self._xp
ip._sp = self._sp
return ip return ip
def get_dims(self) -> Tuple[int,int,int,int]: def get_dims(self) -> Tuple[int,int,int,int]:
@ -73,16 +54,11 @@ class ImageProcessor:
def adjust_gamma(self, red : float, green : float, blue : float) -> 'ImageProcessor': def adjust_gamma(self, red : float, green : float, blue : float) -> 'ImageProcessor':
dtype = self.get_dtype() dtype = self.get_dtype()
self.to_ufloat32() self.to_ufloat32()
img = self._img
xp, img = self._xp , self._img, np.power(img, np.array([1.0 / blue, 1.0 / green, 1.0 / red], np.float32), out=img)
np.clip(img, 0, 1.0, out=img)
xp.power(img, xp.array([1.0 / blue, 1.0 / green, 1.0 / red], xp.float32), out=img)
xp.clip(img, 0, 1.0, out=img)
self._img = img self._img = img
self.to_dtype(dtype) self.to_dtype(dtype)
return self return self
@ -124,7 +100,6 @@ class ImageProcessor:
""" """
#if interpolation is None: #if interpolation is None:
# interpolation = ImageProcessor.Interpolation.LINEAR # interpolation = ImageProcessor.Interpolation.LINEAR
xp, sp = self._xp, self._sp
img = self._img img = self._img
N,H,W,C = img.shape N,H,W,C = img.shape
@ -146,12 +121,7 @@ class ImageProcessor:
if scale != 1.0: if scale != 1.0:
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
img = cv2.resize (img, ( int(W*scale), int(H*scale) ), interpolation=ImageProcessor.Interpolation.LINEAR)
if self._xp == np:
img = cv2.resize (img, ( int(W*scale), int(H*scale) ), interpolation=ImageProcessor.Interpolation.LINEAR)
else:
img = sp.ndimage.zoom(img, (scale, scale, 1.0), order=1)
H,W = img.shape[0:2] H,W = img.shape[0:2]
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
@ -159,14 +129,13 @@ class ImageProcessor:
w_pad = (TW-W) if TW is not None else 0 w_pad = (TW-W) if TW is not None else 0
h_pad = (TH-H) if TH is not None else 0 h_pad = (TH-H) if TH is not None else 0
if w_pad != 0 or h_pad != 0: if w_pad != 0 or h_pad != 0:
img = xp.pad(img, ( (0,0), (0,h_pad), (0,w_pad), (0,0) )) img = np.pad(img, ( (0,0), (0,h_pad), (0,w_pad), (0,0) ))
self._img = img self._img = img
return scale return scale
def clip(self, min, max) -> 'ImageProcessor': def clip(self, min, max) -> 'ImageProcessor':
xp = self._xp np.clip(self._img, min, max, out=self._img)
xp.clip(self._img, min, max, out=self._img)
return self return self
def clip2(self, low_check, low_val, high_check, high_val) -> 'ImageProcessor': def clip2(self, low_check, low_val, high_check, high_val) -> 'ImageProcessor':
@ -188,22 +157,14 @@ class ImageProcessor:
if interpolation is None: if interpolation is None:
interpolation = ImageProcessor.Interpolation.LINEAR interpolation = ImageProcessor.Interpolation.LINEAR
xp, sp, img = self._xp, self._sp, self._img img = self._img
N,H,W,C = img.shape N,H,W,C = img.shape
W_lr = max(4, int(W*(1.0-power)))
H_lr = max(4, int(H*(1.0-power)))
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
img = cv2.resize (img, (W_lr,H_lr), interpolation=_cv_inter[interpolation])
if xp == np: img = cv2.resize (img, (W,H) , interpolation=_cv_inter[interpolation])
W_lr = max(4, int(W*(1.0-power)))
H_lr = max(4, int(H*(1.0-power)))
img = cv2.resize (img, (W_lr,H_lr), interpolation=_cv_inter[interpolation])
img = cv2.resize (img, (W,H) , interpolation=_cv_inter[interpolation])
else:
W_lr = max(4, round(W*(1.0-power)))
H_lr = max(4, round(H*(1.0-power)))
img = sp.ndimage.zoom(img, (H_lr/H, W_lr/W, 1), order=_scipy_order[interpolation])
img = sp.ndimage.zoom(img, (H/img.shape[0], W/img.shape[1], 1), order=_scipy_order[interpolation])
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
self._img = img self._img = img
@ -223,18 +184,14 @@ class ImageProcessor:
dtype = self.get_dtype() dtype = self.get_dtype()
self.to_ufloat32() self.to_ufloat32()
xp, sp, img = self._xp, self._sp, self._img img = self._img
N,H,W,C = img.shape N,H,W,C = img.shape
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
if xp == np: img_blur = cv2.medianBlur(img, size)
img_blur = cv2.medianBlur(img, size) img = ne.evaluate('img*(1.0-power) + img_blur*power')
img = ne.evaluate('img*(1.0-power) + img_blur*power')
else:
img_blur = sp.ndimage.median_filter(img, size=(size,size,1) )
img = img*(1.0-power) + img_blur*power
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
self._img = img self._img = img
@ -250,32 +207,23 @@ class ImageProcessor:
fade_to_border(False) clip the image in order fade_to_border(False) clip the image in order
to fade smoothly to the border with specified blur amount to fade smoothly to the border with specified blur amount
""" """
xp, sp = self._xp, self._sp
erode, blur = int(erode), int(blur) erode, blur = int(erode), int(blur)
img = self._img img = self._img
dtype = img.dtype
N,H,W,C = img.shape N,H,W,C = img.shape
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
img = xp.pad (img, ( (H,H), (W,W), (0,0) ) ) img = np.pad (img, ( (H,H), (W,W), (0,0) ) )
if erode > 0: if erode > 0:
el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))) el = np.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)))
iterations = max(1,erode//2) iterations = max(1,erode//2)
if self._xp == np: img = cv2.erode(img, el, iterations = iterations )
img = cv2.erode(img, el, iterations = iterations )
else:
img = sp.ndimage.binary_erosion(img, el[...,None], iterations = iterations, brute_force=True ).astype(dtype)
elif erode < 0: elif erode < 0:
el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))) el = np.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)))
iterations = max(1,-erode//2) iterations = max(1,-erode//2)
if self._xp == np: img = cv2.dilate(img, el, iterations = iterations )
img = cv2.dilate(img, el, iterations = iterations )
else:
img = sp.ndimage.binary_dilation(img, el[...,None], iterations = iterations, brute_force=True).astype(dtype)
if fade_to_border: if fade_to_border:
h_clip_size = H + blur // 2 h_clip_size = H + blur // 2
@ -287,13 +235,8 @@ class ImageProcessor:
if blur > 0: if blur > 0:
sigma = blur * 0.125 * 2 sigma = blur * 0.125 * 2
if self._xp == np: img = cv2.GaussianBlur(img, (0, 0), sigma)
img = cv2.GaussianBlur(img, (0, 0), sigma)
else:
img = sp.ndimage.gaussian_filter(img, (sigma, sigma,0), mode='constant')
#if img.ndim == 2:
# img = img[...,None]
img = img[H:-H,W:-W] img = img[H:-H,W:-W]
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
@ -301,15 +244,15 @@ class ImageProcessor:
return self return self
def rotate90(self) -> 'ImageProcessor': def rotate90(self) -> 'ImageProcessor':
self._img = self._xp.rot90(self._img, k=1, axes=(1,2) ) self._img = np.rot90(self._img, k=1, axes=(1,2) )
return self return self
def rotate180(self) -> 'ImageProcessor': def rotate180(self) -> 'ImageProcessor':
self._img = self._xp.rot90(self._img, k=2, axes=(1,2) ) self._img = np.rot90(self._img, k=2, axes=(1,2) )
return self return self
def rotate270(self) -> 'ImageProcessor': def rotate270(self) -> 'ImageProcessor':
self._img = self._xp.rot90(self._img, k=3, axes=(1,2) ) self._img = np.rot90(self._img, k=3, axes=(1,2) )
return self return self
def flip_horizontal(self) -> 'ImageProcessor': def flip_horizontal(self) -> 'ImageProcessor':
@ -330,11 +273,7 @@ class ImageProcessor:
""" """
""" """
xp = self._xp self._img = np.pad(self._img, ( (0,0), (t_h,b_h), (l_w,r_w), (0,0) ))
img = self._img
img = xp.pad(img, ( (0,0), (t_h,b_h), (l_w,r_w), (0,0) ))
self._img = img
return self return self
def pad_to_next_divisor(self, dw=None, dh=None) -> 'ImageProcessor': def pad_to_next_divisor(self, dw=None, dh=None) -> 'ImageProcessor':
@ -343,7 +282,6 @@ class ImageProcessor:
dw,dh int dw,dh int
""" """
xp = self._xp
img = self._img img = self._img
_,H,W,_ = img.shape _,H,W,_ = img.shape
@ -360,24 +298,18 @@ class ImageProcessor:
h_pad = dh - h_pad h_pad = dh - h_pad
if w_pad != 0 or h_pad != 0: if w_pad != 0 or h_pad != 0:
img = xp.pad(img, ( (0,0), (0,h_pad), (0,w_pad), (0,0) )) img = np.pad(img, ( (0,0), (0,h_pad), (0,w_pad), (0,0) ))
self._img = img self._img = img
return self return self
def sharpen(self, factor : float, kernel_size=3) -> 'ImageProcessor': def sharpen(self, factor : float, kernel_size=3) -> 'ImageProcessor':
xp = self._xp
img = self._img img = self._img
N,H,W,C = img.shape N,H,W,C = img.shape
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
blur = cv2.GaussianBlur(img, (kernel_size, kernel_size) , 0)
if xp == np: img = cv2.addWeighted(img, 1.0 + (0.5 * factor), blur, -(0.5 * factor), 0)
blur = cv2.GaussianBlur(img, (kernel_size, kernel_size) , 0)
img = cv2.addWeighted(img, 1.0 + (0.5 * factor), blur, -(0.5 * factor), 0)
else:
raise
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
self._img = img self._img = img
@ -394,8 +326,6 @@ class ImageProcessor:
zero dim will be set to 1 zero dim will be set to 1
""" """
xp = self._xp
format = format.upper() format = format.upper()
img = self._img img = self._img
@ -418,7 +348,7 @@ class ImageProcessor:
transpose_order = [ d[s] for s in format ] transpose_order = [ d[s] for s in format ]
img = img.transpose(transpose_order) img = img.transpose(transpose_order)
return xp.ascontiguousarray(img) return np.ascontiguousarray(img)
def ch(self, TC : int) -> 'ImageProcessor': def ch(self, TC : int) -> 'ImageProcessor':
""" """
@ -426,7 +356,6 @@ class ImageProcessor:
TC int >= 1 TC int >= 1
""" """
xp = self._xp
img = self._img img = self._img
N,H,W,C = img.shape N,H,W,C = img.shape
@ -436,7 +365,7 @@ class ImageProcessor:
if TC > C: if TC > C:
# Ch expand # Ch expand
img = img[...,0:1] # Clip to single ch first. img = img[...,0:1] # Clip to single ch first.
img = xp.repeat (img, TC, -1) # Expand by repeat img = np.repeat (img, TC, -1) # Expand by repeat
elif TC < C: elif TC < C:
# Ch reduction clip # Ch reduction clip
img = img[...,:TC] img = img[...,:TC]
@ -448,7 +377,7 @@ class ImageProcessor:
""" """
Converts 3 ch bgr to grayscale. Converts 3 ch bgr to grayscale.
""" """
img, xp = self._img, self._xp img = self._img
_,_,_,C = img.shape _,_,_,C = img.shape
if C != 1: if C != 1:
dtype = self.get_dtype() dtype = self.get_dtype()
@ -458,7 +387,7 @@ class ImageProcessor:
elif C >= 3: elif C >= 3:
img = img[...,:3] img = img[...,:3]
img = xp.dot(img, xp.array([0.1140, 0.5870, 0.2989], xp.float32)) [...,None] img = np.dot(img, np.array([0.1140, 0.5870, 0.2989], np.float32)) [...,None]
img = img.astype(dtype) img = img.astype(dtype)
self._img = img self._img = img
@ -468,8 +397,6 @@ class ImageProcessor:
""" """
resize to (W,H) resize to (W,H)
""" """
xp, sp = self._xp, self._sp
img = self._img img = self._img
N,H,W,C = img.shape N,H,W,C = img.shape
@ -479,12 +406,7 @@ class ImageProcessor:
interpolation = ImageProcessor.Interpolation.LINEAR interpolation = ImageProcessor.Interpolation.LINEAR
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
img = cv2.resize (img, (TW, TH), interpolation=_cv_inter[interpolation])
if self._xp == np:
img = cv2.resize (img, (TW, TH), interpolation=_cv_inter[interpolation])
else:
img = sp.ndimage.zoom(img, (TW/W, TH/H, 1), order=_scipy_order[interpolation])
img = img.reshape( (TH,TW,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (TH,TW,N,C) ).transpose( (2,0,1,3) )
if new_ip: if new_ip:
@ -498,26 +420,15 @@ class ImageProcessor:
""" """
img HWC img HWC
""" """
xp, sp, img = self._xp, self._sp, self._img img = self._img
N,H,W,C = img.shape N,H,W,C = img.shape
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
if interpolation is None: if interpolation is None:
interpolation = ImageProcessor.Interpolation.LINEAR interpolation = ImageProcessor.Interpolation.LINEAR
if xp == np: img = cv2.warpAffine(img, mat, (out_width, out_height), flags=_cv_inter[interpolation] )
img = cv2.warpAffine(img, mat, (out_width, out_height), flags=_cv_inter[interpolation] )
else:
# AffineMat inverse
xp_mat = xp.get_array_module(mat)
mat = xp_mat.linalg.inv(xp_mat.concatenate( ( mat, xp_mat.array([[0,0,1]], xp_mat.float32)), 0) )[0:2,:]
mx, my = xp.meshgrid( xp.arange(0, out_width, dtype=xp.float32), xp.arange(0, out_height, dtype=xp.float32) )
coords = xp.concatenate( (mx[None,...], my[None,...], xp.ones( (1, out_height,out_width), dtype=xp.float32)), 0 )
mat_coords = xp.matmul (xp.asarray(mat), coords.reshape( (3,-1) ) ).reshape( (2,out_height,out_width))
img = xp.concatenate([sp.ndimage.map_coordinates( img[...,c], mat_coords[::-1,...], order=_scipy_order[interpolation], mode='opencv' )[...,None] for c in range(N*C) ], -1)
img = img.reshape( (out_height,out_width,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (out_height,out_width,N,C) ).transpose( (2,0,1,3) )
self._img = img self._img = img
return self return self
@ -531,23 +442,20 @@ class ImageProcessor:
""" """
change image format to float32 change image format to float32
""" """
xp = self._xp self._img = self._img.astype(np.float32)
self._img = self._img.astype(xp.float32)
return self return self
def as_uint8(self) -> 'ImageProcessor': def as_uint8(self) -> 'ImageProcessor':
""" """
change image format to uint8 change image format to uint8
""" """
xp = self._xp self._img = self._img.astype(np.uint8)
self._img = self._img.astype(xp.uint8)
return self return self
def to_dtype(self, dtype) -> 'ImageProcessor': def to_dtype(self, dtype) -> 'ImageProcessor':
xp = self._xp if dtype == np.float32:
if dtype == xp.float32:
return self.to_ufloat32() return self.to_ufloat32()
elif dtype == xp.uint8: elif dtype == np.uint8:
return self.to_uint8() return self.to_uint8()
else: else:
raise ValueError('unsupported dtype') raise ValueError('unsupported dtype')
@ -558,9 +466,8 @@ class ImageProcessor:
if current image dtype uint8, then image will be divided by / 255.0 if current image dtype uint8, then image will be divided by / 255.0
otherwise no operation otherwise no operation
""" """
xp = self._xp if self._img.dtype == np.uint8:
if self._img.dtype == xp.uint8: self._img = self._img.astype(np.float32)
self._img = self._img.astype(xp.float32)
self._img /= 255.0 self._img /= 255.0
return self return self
@ -571,17 +478,13 @@ class ImageProcessor:
if current image dtype is float32/64, then image will be multiplied by *255 if current image dtype is float32/64, then image will be multiplied by *255
""" """
xp = self._xp
img = self._img img = self._img
if img.dtype in [xp.float32, xp.float64]: if img.dtype in [np.float32, np.float64]:
img *= 255.0 img *= 255.0
img[img < 0] = 0 np.clip(img, 0, 255, out=img)
img[img > 255] = 255
img = img.astype(xp.uint8, copy=False)
self._img = img
self._img = img.astype(np.uint8, copy=False)
return self return self
class Interpolation(IntEnum): class Interpolation(IntEnum):
@ -589,7 +492,4 @@ class ImageProcessor:
CUBIC = 1 CUBIC = 1
_cv_inter = { ImageProcessor.Interpolation.LINEAR : cv2.INTER_LINEAR, _cv_inter = { ImageProcessor.Interpolation.LINEAR : cv2.INTER_LINEAR,
ImageProcessor.Interpolation.CUBIC : cv2.INTER_CUBIC } ImageProcessor.Interpolation.CUBIC : cv2.INTER_CUBIC }
_scipy_order = { ImageProcessor.Interpolation.LINEAR : 1,
ImageProcessor.Interpolation.CUBIC : 3 }