diff --git a/apps/DeepFaceLive/backend/FaceMerger.py b/apps/DeepFaceLive/backend/FaceMerger.py index 56baaa2..5457a55 100644 --- a/apps/DeepFaceLive/backend/FaceMerger.py +++ b/apps/DeepFaceLive/backend/FaceMerger.py @@ -1,7 +1,6 @@ import time from enum import IntEnum -import cupy as cp import numexpr as ne import numpy as np from xlib import cupy as lib_cp @@ -38,6 +37,8 @@ class FaceMergerWorker(BackendWorker): def get_control_sheet(self) -> 'Sheet.Worker': return super().get_control_sheet() def on_start(self, weak_heap : BackendWeakHeap, reemit_frame_signal : BackendSignal, bc_in : BackendConnection, bc_out : BackendConnection): + + self.weak_heap = weak_heap self.reemit_frame_signal = reemit_frame_signal self.bc_in = bc_in @@ -95,6 +96,9 @@ class FaceMergerWorker(BackendWorker): if device != 'CPU': self.is_gpu = True + + global cp + import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand cp.cuda.Device( device.get_index() ).use() self.cp_mask_clip_kernel = cp.ElementwiseKernel('T x', 'T z', 'z = x < 0.004 ? 0 : x > 1.0 ? 1.0 : x', 'mask_clip_kernel') diff --git a/xlib/cupy/device.py b/xlib/cupy/device.py index 8f7a5bd..dccc9ca 100644 --- a/xlib/cupy/device.py +++ b/xlib/cupy/device.py @@ -1,6 +1,6 @@ from typing import List -import cupy as cp + class CuPyDeviceInfo: @@ -55,6 +55,7 @@ def get_available_devices() -> List[CuPyDeviceInfo]: """ global _cupy_devices if _cupy_devices is None: + import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand devices = [] for i in range (cp.cuda.runtime.getDeviceCount()): diff --git a/xlib/image/ImageProcessor.py b/xlib/image/ImageProcessor.py index 69430b1..5997794 100644 --- a/xlib/image/ImageProcessor.py +++ b/xlib/image/ImageProcessor.py @@ -1,14 +1,9 @@ from enum import IntEnum from typing import Tuple, Union -import cupy as cp -import cupyx.scipy.ndimage import cv2 import numexpr as ne import numpy as np -import scipy -import scipy.ndimage - class ImageProcessor: """ @@ -24,13 +19,20 @@ class ImageProcessor: for cupy you should set device before using ImageProcessor """ - def __init__(self, img : Union[np.ndarray,cp.ndarray], copy=False): - self._xp = xp = cp.get_array_module(img) - - if copy and xp == np: - img = img.copy() - - self._sp = cupyx.scipy if xp == cp else scipy + def __init__(self, img : Union[np.ndarray,'cp.ndarray'], copy=False): + + if img.__class__ == np.ndarray: + self._xp = np + import scipy + import scipy.ndimage + self._sp = scipy + if copy: + img = img.copy() + else: + import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand + import cupyx.scipy.ndimage + self._xp = cp + self._sp = cupyx.scipy ndim = img.ndim if ndim not in [2,3,4]: @@ -145,10 +147,10 @@ class ImageProcessor: if scale != 1.0: img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) - if self._xp == cp: - img = sp.ndimage.zoom(img, (scale, scale, 1.0), order=1) - else: + if self._xp == np: img = cv2.resize (img, ( int(W*scale), int(H*scale) ), interpolation=ImageProcessor.Interpolation.LINEAR) + else: + img = sp.ndimage.zoom(img, (scale, scale, 1.0), order=1) H,W,_ = img.shape img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) @@ -191,17 +193,17 @@ class ImageProcessor: N,H,W,C = img.shape img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) - if xp == cp: - W_lr = max(4, round(W*(1.0-power))) - H_lr = max(4, round(H*(1.0-power))) - img = sp.ndimage.zoom(img, (H_lr/H, W_lr/W, 1), order=_scipy_order[interpolation]) - img = sp.ndimage.zoom(img, (H/img.shape[0], W/img.shape[1], 1), order=_scipy_order[interpolation]) - else: + if xp == np: W_lr = max(4, int(W*(1.0-power))) H_lr = max(4, int(H*(1.0-power))) img = cv2.resize (img, (W_lr,H_lr), interpolation=_cv_inter[interpolation]) img = cv2.resize (img, (W,H) , interpolation=_cv_inter[interpolation]) - + else: + W_lr = max(4, round(W*(1.0-power))) + H_lr = max(4, round(H*(1.0-power))) + img = sp.ndimage.zoom(img, (H_lr/H, W_lr/W, 1), order=_scipy_order[interpolation]) + img = sp.ndimage.zoom(img, (H/img.shape[0], W/img.shape[1], 1), order=_scipy_order[interpolation]) + img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) self._img = img @@ -226,13 +228,13 @@ class ImageProcessor: img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) - if xp == cp: - img_blur = sp.ndimage.median_filter(img, size=(size,size,1) ) - img = img*(1.0-power) + img_blur*power - else: + if xp == np: img_blur = cv2.medianBlur(img, size) img = ne.evaluate('img*(1.0-power) + img_blur*power') - + else: + img_blur = sp.ndimage.median_filter(img, size=(size,size,1) ) + img = img*(1.0-power) + img_blur*power + img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) self._img = img @@ -262,18 +264,18 @@ class ImageProcessor: if erode > 0: el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))) iterations = max(1,erode//2) - if self._xp == cp: - img = sp.ndimage.binary_erosion(img, el[...,None], iterations = iterations, brute_force=True ).astype(dtype) - else: + if self._xp == np: img = cv2.erode(img, el, iterations = iterations ) + else: + img = sp.ndimage.binary_erosion(img, el[...,None], iterations = iterations, brute_force=True ).astype(dtype) elif erode < 0: el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))) iterations = max(1,-erode//2) - if self._xp == cp: - img = sp.ndimage.binary_dilation(img, el[...,None], iterations = iterations, brute_force=True).astype(dtype) - else: + if self._xp == np: img = cv2.dilate(img, el, iterations = iterations ) + else: + img = sp.ndimage.binary_dilation(img, el[...,None], iterations = iterations, brute_force=True).astype(dtype) if fade_to_border: h_clip_size = H + blur // 2 @@ -285,10 +287,10 @@ class ImageProcessor: if blur > 0: sigma = blur * 0.125 * 2 - if self._xp == cp: - img = sp.ndimage.gaussian_filter(img, (sigma, sigma,0), mode='constant') - else: + if self._xp == np: img = cv2.GaussianBlur(img, (0, 0), sigma) + else: + img = sp.ndimage.gaussian_filter(img, (sigma, sigma,0), mode='constant') #if img.ndim == 2: # img = img[...,None] @@ -370,11 +372,11 @@ class ImageProcessor: N,H,W,C = img.shape img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) - if xp == cp: - raise - else: + if xp == np: blur = cv2.GaussianBlur(img, (kernel_size, kernel_size) , 0) img = cv2.addWeighted(img, 1.0 + (0.5 * factor), blur, -(0.5 * factor), 0) + else: + raise img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) @@ -478,10 +480,10 @@ class ImageProcessor: img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) - if self._xp == cp: - img = sp.ndimage.zoom(img, (TW/W, TH/H, 1), order=_scipy_order[interpolation]) - else: + if self._xp == np: img = cv2.resize (img, (TW, TH), interpolation=_cv_inter[interpolation]) + else: + img = sp.ndimage.zoom(img, (TW/W, TH/H, 1), order=_scipy_order[interpolation]) img = img.reshape( (TH,TW,N,C) ).transpose( (2,0,1,3) ) @@ -503,9 +505,11 @@ class ImageProcessor: if interpolation is None: interpolation = ImageProcessor.Interpolation.LINEAR - if xp == cp: + if xp == np: + img = cv2.warpAffine(img, mat, (out_width, out_height), flags=_cv_inter[interpolation] ) + else: # AffineMat inverse - xp_mat = cp.get_array_module(mat) + xp_mat = xp.get_array_module(mat) mat = xp_mat.linalg.inv(xp_mat.concatenate( ( mat, xp_mat.array([[0,0,1]], xp_mat.float32)), 0) )[0:2,:] mx, my = xp.meshgrid( xp.arange(0, out_width, dtype=xp.float32), xp.arange(0, out_height, dtype=xp.float32) ) @@ -513,10 +517,7 @@ class ImageProcessor: mat_coords = xp.matmul (xp.asarray(mat), coords.reshape( (3,-1) ) ).reshape( (2,out_height,out_width)) img = xp.concatenate([sp.ndimage.map_coordinates( img[...,c], mat_coords[::-1,...], order=_scipy_order[interpolation], mode='opencv' )[...,None] for c in range(N*C) ], -1) - - else: - img = cv2.warpAffine(img, mat, (out_width, out_height), flags=_cv_inter[interpolation] ) - + img = img.reshape( (out_height,out_width,N,C) ).transpose( (2,0,1,3) ) self._img = img return self