import cupy on demand , because it eats almost 2Gb paging file in every subprocess

This commit is contained in:
iperov 2021-07-24 10:48:42 +04:00
parent 1a47d99278
commit b546c735a1
3 changed files with 56 additions and 50 deletions

View file

@ -1,7 +1,6 @@
import time import time
from enum import IntEnum from enum import IntEnum
import cupy as cp
import numexpr as ne import numexpr as ne
import numpy as np import numpy as np
from xlib import cupy as lib_cp from xlib import cupy as lib_cp
@ -38,6 +37,8 @@ class FaceMergerWorker(BackendWorker):
def get_control_sheet(self) -> 'Sheet.Worker': return super().get_control_sheet() def get_control_sheet(self) -> 'Sheet.Worker': return super().get_control_sheet()
def on_start(self, weak_heap : BackendWeakHeap, reemit_frame_signal : BackendSignal, bc_in : BackendConnection, bc_out : BackendConnection): def on_start(self, weak_heap : BackendWeakHeap, reemit_frame_signal : BackendSignal, bc_in : BackendConnection, bc_out : BackendConnection):
self.weak_heap = weak_heap self.weak_heap = weak_heap
self.reemit_frame_signal = reemit_frame_signal self.reemit_frame_signal = reemit_frame_signal
self.bc_in = bc_in self.bc_in = bc_in
@ -95,6 +96,9 @@ class FaceMergerWorker(BackendWorker):
if device != 'CPU': if device != 'CPU':
self.is_gpu = True self.is_gpu = True
global cp
import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
cp.cuda.Device( device.get_index() ).use() cp.cuda.Device( device.get_index() ).use()
self.cp_mask_clip_kernel = cp.ElementwiseKernel('T x', 'T z', 'z = x < 0.004 ? 0 : x > 1.0 ? 1.0 : x', 'mask_clip_kernel') self.cp_mask_clip_kernel = cp.ElementwiseKernel('T x', 'T z', 'z = x < 0.004 ? 0 : x > 1.0 ? 1.0 : x', 'mask_clip_kernel')

View file

@ -1,6 +1,6 @@
from typing import List from typing import List
import cupy as cp
class CuPyDeviceInfo: class CuPyDeviceInfo:
@ -55,6 +55,7 @@ def get_available_devices() -> List[CuPyDeviceInfo]:
""" """
global _cupy_devices global _cupy_devices
if _cupy_devices is None: if _cupy_devices is None:
import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
devices = [] devices = []
for i in range (cp.cuda.runtime.getDeviceCount()): for i in range (cp.cuda.runtime.getDeviceCount()):

View file

@ -1,14 +1,9 @@
from enum import IntEnum from enum import IntEnum
from typing import Tuple, Union from typing import Tuple, Union
import cupy as cp
import cupyx.scipy.ndimage
import cv2 import cv2
import numexpr as ne import numexpr as ne
import numpy as np import numpy as np
import scipy
import scipy.ndimage
class ImageProcessor: class ImageProcessor:
""" """
@ -24,13 +19,20 @@ class ImageProcessor:
for cupy you should set device before using ImageProcessor for cupy you should set device before using ImageProcessor
""" """
def __init__(self, img : Union[np.ndarray,cp.ndarray], copy=False): def __init__(self, img : Union[np.ndarray,'cp.ndarray'], copy=False):
self._xp = xp = cp.get_array_module(img)
if copy and xp == np: if img.__class__ == np.ndarray:
self._xp = np
import scipy
import scipy.ndimage
self._sp = scipy
if copy:
img = img.copy() img = img.copy()
else:
self._sp = cupyx.scipy if xp == cp else scipy import cupy as cp # BUG eats 1.8Gb paging file per process, so import on demand
import cupyx.scipy.ndimage
self._xp = cp
self._sp = cupyx.scipy
ndim = img.ndim ndim = img.ndim
if ndim not in [2,3,4]: if ndim not in [2,3,4]:
@ -145,10 +147,10 @@ class ImageProcessor:
if scale != 1.0: if scale != 1.0:
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
if self._xp == cp: if self._xp == np:
img = sp.ndimage.zoom(img, (scale, scale, 1.0), order=1)
else:
img = cv2.resize (img, ( int(W*scale), int(H*scale) ), interpolation=ImageProcessor.Interpolation.LINEAR) img = cv2.resize (img, ( int(W*scale), int(H*scale) ), interpolation=ImageProcessor.Interpolation.LINEAR)
else:
img = sp.ndimage.zoom(img, (scale, scale, 1.0), order=1)
H,W,_ = img.shape H,W,_ = img.shape
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
@ -191,16 +193,16 @@ class ImageProcessor:
N,H,W,C = img.shape N,H,W,C = img.shape
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
if xp == cp: if xp == np:
W_lr = max(4, round(W*(1.0-power)))
H_lr = max(4, round(H*(1.0-power)))
img = sp.ndimage.zoom(img, (H_lr/H, W_lr/W, 1), order=_scipy_order[interpolation])
img = sp.ndimage.zoom(img, (H/img.shape[0], W/img.shape[1], 1), order=_scipy_order[interpolation])
else:
W_lr = max(4, int(W*(1.0-power))) W_lr = max(4, int(W*(1.0-power)))
H_lr = max(4, int(H*(1.0-power))) H_lr = max(4, int(H*(1.0-power)))
img = cv2.resize (img, (W_lr,H_lr), interpolation=_cv_inter[interpolation]) img = cv2.resize (img, (W_lr,H_lr), interpolation=_cv_inter[interpolation])
img = cv2.resize (img, (W,H) , interpolation=_cv_inter[interpolation]) img = cv2.resize (img, (W,H) , interpolation=_cv_inter[interpolation])
else:
W_lr = max(4, round(W*(1.0-power)))
H_lr = max(4, round(H*(1.0-power)))
img = sp.ndimage.zoom(img, (H_lr/H, W_lr/W, 1), order=_scipy_order[interpolation])
img = sp.ndimage.zoom(img, (H/img.shape[0], W/img.shape[1], 1), order=_scipy_order[interpolation])
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
@ -226,12 +228,12 @@ class ImageProcessor:
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
if xp == cp: if xp == np:
img_blur = sp.ndimage.median_filter(img, size=(size,size,1) )
img = img*(1.0-power) + img_blur*power
else:
img_blur = cv2.medianBlur(img, size) img_blur = cv2.medianBlur(img, size)
img = ne.evaluate('img*(1.0-power) + img_blur*power') img = ne.evaluate('img*(1.0-power) + img_blur*power')
else:
img_blur = sp.ndimage.median_filter(img, size=(size,size,1) )
img = img*(1.0-power) + img_blur*power
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
self._img = img self._img = img
@ -262,18 +264,18 @@ class ImageProcessor:
if erode > 0: if erode > 0:
el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))) el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)))
iterations = max(1,erode//2) iterations = max(1,erode//2)
if self._xp == cp: if self._xp == np:
img = sp.ndimage.binary_erosion(img, el[...,None], iterations = iterations, brute_force=True ).astype(dtype)
else:
img = cv2.erode(img, el, iterations = iterations ) img = cv2.erode(img, el, iterations = iterations )
else:
img = sp.ndimage.binary_erosion(img, el[...,None], iterations = iterations, brute_force=True ).astype(dtype)
elif erode < 0: elif erode < 0:
el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))) el = xp.asarray(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3)))
iterations = max(1,-erode//2) iterations = max(1,-erode//2)
if self._xp == cp: if self._xp == np:
img = sp.ndimage.binary_dilation(img, el[...,None], iterations = iterations, brute_force=True).astype(dtype)
else:
img = cv2.dilate(img, el, iterations = iterations ) img = cv2.dilate(img, el, iterations = iterations )
else:
img = sp.ndimage.binary_dilation(img, el[...,None], iterations = iterations, brute_force=True).astype(dtype)
if fade_to_border: if fade_to_border:
h_clip_size = H + blur // 2 h_clip_size = H + blur // 2
@ -285,10 +287,10 @@ class ImageProcessor:
if blur > 0: if blur > 0:
sigma = blur * 0.125 * 2 sigma = blur * 0.125 * 2
if self._xp == cp: if self._xp == np:
img = sp.ndimage.gaussian_filter(img, (sigma, sigma,0), mode='constant')
else:
img = cv2.GaussianBlur(img, (0, 0), sigma) img = cv2.GaussianBlur(img, (0, 0), sigma)
else:
img = sp.ndimage.gaussian_filter(img, (sigma, sigma,0), mode='constant')
#if img.ndim == 2: #if img.ndim == 2:
# img = img[...,None] # img = img[...,None]
@ -370,11 +372,11 @@ class ImageProcessor:
N,H,W,C = img.shape N,H,W,C = img.shape
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
if xp == cp: if xp == np:
raise
else:
blur = cv2.GaussianBlur(img, (kernel_size, kernel_size) , 0) blur = cv2.GaussianBlur(img, (kernel_size, kernel_size) , 0)
img = cv2.addWeighted(img, 1.0 + (0.5 * factor), blur, -(0.5 * factor), 0) img = cv2.addWeighted(img, 1.0 + (0.5 * factor), blur, -(0.5 * factor), 0)
else:
raise
img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (H,W,N,C) ).transpose( (2,0,1,3) )
@ -478,10 +480,10 @@ class ImageProcessor:
img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) ) img = img.transpose( (1,2,0,3) ).reshape( (H,W,N*C) )
if self._xp == cp: if self._xp == np:
img = sp.ndimage.zoom(img, (TW/W, TH/H, 1), order=_scipy_order[interpolation])
else:
img = cv2.resize (img, (TW, TH), interpolation=_cv_inter[interpolation]) img = cv2.resize (img, (TW, TH), interpolation=_cv_inter[interpolation])
else:
img = sp.ndimage.zoom(img, (TW/W, TH/H, 1), order=_scipy_order[interpolation])
img = img.reshape( (TH,TW,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (TH,TW,N,C) ).transpose( (2,0,1,3) )
@ -503,9 +505,11 @@ class ImageProcessor:
if interpolation is None: if interpolation is None:
interpolation = ImageProcessor.Interpolation.LINEAR interpolation = ImageProcessor.Interpolation.LINEAR
if xp == cp: if xp == np:
img = cv2.warpAffine(img, mat, (out_width, out_height), flags=_cv_inter[interpolation] )
else:
# AffineMat inverse # AffineMat inverse
xp_mat = cp.get_array_module(mat) xp_mat = xp.get_array_module(mat)
mat = xp_mat.linalg.inv(xp_mat.concatenate( ( mat, xp_mat.array([[0,0,1]], xp_mat.float32)), 0) )[0:2,:] mat = xp_mat.linalg.inv(xp_mat.concatenate( ( mat, xp_mat.array([[0,0,1]], xp_mat.float32)), 0) )[0:2,:]
mx, my = xp.meshgrid( xp.arange(0, out_width, dtype=xp.float32), xp.arange(0, out_height, dtype=xp.float32) ) mx, my = xp.meshgrid( xp.arange(0, out_width, dtype=xp.float32), xp.arange(0, out_height, dtype=xp.float32) )
@ -514,9 +518,6 @@ class ImageProcessor:
mat_coords = xp.matmul (xp.asarray(mat), coords.reshape( (3,-1) ) ).reshape( (2,out_height,out_width)) mat_coords = xp.matmul (xp.asarray(mat), coords.reshape( (3,-1) ) ).reshape( (2,out_height,out_width))
img = xp.concatenate([sp.ndimage.map_coordinates( img[...,c], mat_coords[::-1,...], order=_scipy_order[interpolation], mode='opencv' )[...,None] for c in range(N*C) ], -1) img = xp.concatenate([sp.ndimage.map_coordinates( img[...,c], mat_coords[::-1,...], order=_scipy_order[interpolation], mode='opencv' )[...,None] for c in range(N*C) ], -1)
else:
img = cv2.warpAffine(img, mat, (out_width, out_height), flags=_cv_inter[interpolation] )
img = img.reshape( (out_height,out_width,N,C) ).transpose( (2,0,1,3) ) img = img.reshape( (out_height,out_width,N,C) ).transpose( (2,0,1,3) )
self._img = img self._img = img
return self return self