mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-19 21:13:21 -07:00
update xlib.avecl
This commit is contained in:
parent
bd48b455cb
commit
65fb01ff39
6 changed files with 143 additions and 7 deletions
|
@ -18,6 +18,7 @@ from ._internal.backend import (Device, DeviceInfo, Kernel,
|
||||||
get_available_devices_info, get_best_device,
|
get_available_devices_info, get_best_device,
|
||||||
get_default_device, get_device,
|
get_default_device, get_device,
|
||||||
set_default_device)
|
set_default_device)
|
||||||
|
from ._internal.EInterpolation import EInterpolation
|
||||||
from ._internal.HArgs import HArgs
|
from ._internal.HArgs import HArgs
|
||||||
from ._internal.HKernel import HKernel
|
from ._internal.HKernel import HKernel
|
||||||
from ._internal.HTensor import HTensor
|
from ._internal.HTensor import HTensor
|
||||||
|
|
8
xlib/avecl/_internal/EInterpolation.py
Normal file
8
xlib/avecl/_internal/EInterpolation.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from enum import IntEnum, IntEnum
|
||||||
|
|
||||||
|
class EInterpolation(IntEnum):
|
||||||
|
NEAREST = 0
|
||||||
|
LINEAR = 1
|
||||||
|
CUBIC = 2
|
||||||
|
LANCZOS3 = 3
|
||||||
|
LANCZOS4 = 4
|
|
@ -102,6 +102,7 @@ class Tensor:
|
||||||
def __setitem__(self, slices, value): ...
|
def __setitem__(self, slices, value): ...
|
||||||
|
|
||||||
def as_shape(self, shape) -> 'Tensor': ...
|
def as_shape(self, shape) -> 'Tensor': ...
|
||||||
|
def cast(self, dtype) -> 'Tensor': ...
|
||||||
def copy(self) -> 'Tensor': ...
|
def copy(self) -> 'Tensor': ...
|
||||||
def max(self, axes=None, keepdims=False) -> 'Tensor': ...
|
def max(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||||
def mean(self, axes=None, keepdims=False) -> 'Tensor': ...
|
def mean(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||||
|
|
|
@ -60,6 +60,7 @@ def Tensor_as_shape(self : Tensor, shape) -> Tensor:
|
||||||
return TensorRef(self, shape)
|
return TensorRef(self, shape)
|
||||||
Tensor.as_shape = Tensor_as_shape
|
Tensor.as_shape = Tensor_as_shape
|
||||||
|
|
||||||
|
Tensor.cast = cast
|
||||||
def Tensor_copy(self : Tensor) -> Tensor:
|
def Tensor_copy(self : Tensor) -> Tensor:
|
||||||
return Tensor.from_value(self)
|
return Tensor.from_value(self)
|
||||||
Tensor.copy = Tensor_copy
|
Tensor.copy = Tensor_copy
|
||||||
|
|
|
@ -41,5 +41,5 @@ def binary_morph(input_t : Tensor, erode_dilate : int, blur : float, fade_to_bor
|
||||||
x = gaussian_blur(x, blur * 0.250, dtype=dtype)
|
x = gaussian_blur(x, blur * 0.250, dtype=dtype)
|
||||||
else:
|
else:
|
||||||
x = cast(x, dtype=dtype)
|
x = cast(x, dtype=dtype)
|
||||||
|
|
||||||
return x[...,H:-H,W:-W]
|
return x[...,H:-H,W:-W]
|
||||||
|
|
|
@ -2,11 +2,13 @@ import numpy as np
|
||||||
|
|
||||||
from ..AShape import AShape
|
from ..AShape import AShape
|
||||||
from ..backend import Kernel
|
from ..backend import Kernel
|
||||||
|
from ..EInterpolation import EInterpolation
|
||||||
from ..HKernel import HKernel
|
from ..HKernel import HKernel
|
||||||
from ..SCacheton import SCacheton
|
from ..SCacheton import SCacheton
|
||||||
from ..Tensor import Tensor
|
from ..Tensor import Tensor
|
||||||
|
|
||||||
def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, output_size=None, dtype=None) -> Tensor:
|
|
||||||
|
def remap_np_affine (input_t : Tensor, affine_n : np.ndarray, interpolation : EInterpolation = None, inverse=False, output_size=None, dtype=None) -> Tensor:
|
||||||
"""
|
"""
|
||||||
remap affine operator for all channels using single numpy affine mat
|
remap affine operator for all channels using single numpy affine mat
|
||||||
|
|
||||||
|
@ -16,12 +18,14 @@ def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, outpu
|
||||||
|
|
||||||
affine_n np.array (2,3)
|
affine_n np.array (2,3)
|
||||||
|
|
||||||
|
interpolation EInterpolation
|
||||||
|
|
||||||
dtype
|
dtype
|
||||||
"""
|
"""
|
||||||
if affine_n.shape != (2,3):
|
if affine_n.shape != (2,3):
|
||||||
raise ValueError('affine_n.shape must be (2,3)')
|
raise ValueError('affine_n.shape must be (2,3)')
|
||||||
|
|
||||||
op = SCacheton.get(_RemapAffineOp, input_t.shape, input_t.dtype, output_size, dtype)
|
op = SCacheton.get(_RemapAffineOp, input_t.shape, input_t.dtype, interpolation, output_size, dtype)
|
||||||
|
|
||||||
output_t = Tensor( op.o_shape, op.o_dtype, device=input_t.get_device() )
|
output_t = Tensor( op.o_shape, op.o_dtype, device=input_t.get_device() )
|
||||||
|
|
||||||
|
@ -33,7 +37,7 @@ def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, outpu
|
||||||
D = 1.0 / D if D != 0.0 else 0.0
|
D = 1.0 / D if D != 0.0 else 0.0
|
||||||
a, b, c, d, e, f = ( e*D, -b*D, (b*f-e*c)*D ,
|
a, b, c, d, e, f = ( e*D, -b*D, (b*f-e*c)*D ,
|
||||||
-d*D, a*D, (d*c-a*f)*D )
|
-d*D, a*D, (d*c-a*f)*D )
|
||||||
|
|
||||||
input_t.get_device().run_kernel(op.forward_krn, output_t.get_buffer(), input_t.get_buffer(),
|
input_t.get_device().run_kernel(op.forward_krn, output_t.get_buffer(), input_t.get_buffer(),
|
||||||
np.float32(a), np.float32(b), np.float32(c), np.float32(d), np.float32(e), np.float32(f) )
|
np.float32(a), np.float32(b), np.float32(c), np.float32(d), np.float32(e), np.float32(f) )
|
||||||
|
|
||||||
|
@ -41,11 +45,13 @@ def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, outpu
|
||||||
|
|
||||||
|
|
||||||
class _RemapAffineOp():
|
class _RemapAffineOp():
|
||||||
def __init__(self, i_shape : AShape, i_dtype, o_size, o_dtype):
|
def __init__(self, i_shape : AShape, i_dtype, interpolation, o_size, o_dtype):
|
||||||
if np.dtype(i_dtype).type == np.bool_:
|
if np.dtype(i_dtype).type == np.bool_:
|
||||||
raise ValueError('np.bool_ dtype of i_dtype is not supported.')
|
raise ValueError('np.bool_ dtype of i_dtype is not supported.')
|
||||||
if i_shape.ndim < 2:
|
if i_shape.ndim < 2:
|
||||||
raise ValueError('i_shape.ndim must be >= 2 (...,H,W)')
|
raise ValueError('i_shape.ndim must be >= 2 (...,H,W)')
|
||||||
|
if interpolation is None:
|
||||||
|
interpolation = EInterpolation.LINEAR
|
||||||
|
|
||||||
IH,IW = i_shape[-2:]
|
IH,IW = i_shape[-2:]
|
||||||
if o_size is not None:
|
if o_size is not None:
|
||||||
|
@ -60,7 +66,8 @@ class _RemapAffineOp():
|
||||||
self.o_shape = o_shape
|
self.o_shape = o_shape
|
||||||
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else i_dtype
|
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else i_dtype
|
||||||
|
|
||||||
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
if interpolation == EInterpolation.LINEAR:
|
||||||
|
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
||||||
|
|
||||||
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||||
{HKernel.define_tensor('I', i_shape, i_dtype)}
|
{HKernel.define_tensor('I', i_shape, i_dtype)}
|
||||||
|
@ -93,4 +100,122 @@ __kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const I_PTR_TYPE* I
|
||||||
|
|
||||||
O_GLOBAL_STORE(gid, p00 + p01 + p10 + p11);
|
O_GLOBAL_STORE(gid, p00 + p01 + p10 + p11);
|
||||||
}}
|
}}
|
||||||
""")
|
""")
|
||||||
|
elif interpolation == EInterpolation.CUBIC:
|
||||||
|
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
||||||
|
|
||||||
|
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||||
|
{HKernel.define_tensor('I', i_shape, i_dtype)}
|
||||||
|
|
||||||
|
float cubic(float p0, float p1, float p2, float p3, float x)
|
||||||
|
{{
|
||||||
|
float a0 = p1;
|
||||||
|
float a1 = p2 - p0;
|
||||||
|
float a2 = 2 * p0 - 5 * p1 + 4 * p2 - p3;
|
||||||
|
float a3 = 3 * (p1 - p2) + p3 - p0;
|
||||||
|
return a0 + 0.5 * x * (a1 + x * (a2 + x * a3));
|
||||||
|
}}
|
||||||
|
|
||||||
|
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const I_PTR_TYPE* I_PTR_NAME,
|
||||||
|
float a, float b, float c,
|
||||||
|
float d, float e, float f)
|
||||||
|
{{
|
||||||
|
size_t gid = get_global_id(0);
|
||||||
|
|
||||||
|
{HKernel.decompose_idx_to_axes_idxs('gid', 'O', o_shape.ndim)}
|
||||||
|
|
||||||
|
float cx01f = om1*a + om2*b + c;
|
||||||
|
float cy01f = om1*d + om2*e + f;
|
||||||
|
|
||||||
|
float cxf = floor(cx01f); int cx = (int)cxf;
|
||||||
|
float cyf = floor(cy01f); int cy = (int)cyf;
|
||||||
|
|
||||||
|
float dx = cx01f-cxf;
|
||||||
|
float dy = cy01f-cyf;
|
||||||
|
|
||||||
|
float row[4];
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int y=cy-1, j=0; y<=cy+2; y++, j++)
|
||||||
|
{{
|
||||||
|
float col[4];
|
||||||
|
#pragma unroll
|
||||||
|
for (int x=cx-1, i=0; x<=cx+2; x++, i++)
|
||||||
|
{{
|
||||||
|
float sxy = I_GLOBAL_LOAD(I_IDX_MOD({HKernel.axes_seq_enum('O', o_shape.ndim-2, suffix='y,x')}));
|
||||||
|
|
||||||
|
col[i] = sxy*(y >= 0 & y < Im2 & x >= 0 & x < Im1);
|
||||||
|
}}
|
||||||
|
row[j] = cubic(col[0], col[1], col[2], col[3], dx);
|
||||||
|
}}
|
||||||
|
|
||||||
|
float O = cubic(row[0], row[1], row[2], row[3], dy);
|
||||||
|
|
||||||
|
O_GLOBAL_STORE(gid, O);
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
elif interpolation in [EInterpolation.LANCZOS3, EInterpolation.LANCZOS4]:
|
||||||
|
RAD = 3 if interpolation == EInterpolation.LANCZOS3 else 4
|
||||||
|
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
||||||
|
|
||||||
|
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||||
|
{HKernel.define_tensor('I', i_shape, i_dtype)}
|
||||||
|
|
||||||
|
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const I_PTR_TYPE* I_PTR_NAME,
|
||||||
|
float a, float b, float c,
|
||||||
|
float d, float e, float f)
|
||||||
|
{{
|
||||||
|
size_t gid = get_global_id(0);
|
||||||
|
|
||||||
|
{HKernel.decompose_idx_to_axes_idxs('gid', 'O', o_shape.ndim)}
|
||||||
|
|
||||||
|
float cx01f = om1*a + om2*b + c;
|
||||||
|
float cy01f = om1*d + om2*e + f;
|
||||||
|
|
||||||
|
float cxf = floor(cx01f); int cx = (int)cxf;
|
||||||
|
float cyf = floor(cy01f); int cy = (int)cyf;
|
||||||
|
|
||||||
|
#define RAD {RAD}
|
||||||
|
float Fy[2 * RAD];
|
||||||
|
float Fx[2 * RAD];
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int y=cy-RAD+1, j=0; y<=cy+RAD; y++, j++)
|
||||||
|
{{
|
||||||
|
float dy = fabs(cy01f - y);
|
||||||
|
if (dy < 1e-4) Fy[j] = 1;
|
||||||
|
else if (dy > RAD) Fy[j] = 0;
|
||||||
|
else Fy[j] = ( RAD * sin(M_PI * dy) * sin(M_PI * dy / RAD) ) / ( (M_PI*M_PI)*dy*dy );
|
||||||
|
}}
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int x=cx-RAD+1, i=0; x<=cx+RAD; x++, i++)
|
||||||
|
{{
|
||||||
|
float dx = fabs(cx01f - x);
|
||||||
|
if (dx < 1e-4) Fx[i] = 1;
|
||||||
|
else if (dx > RAD) Fx[i] = 0;
|
||||||
|
else Fx[i] = ( RAD * sin(M_PI * dx) * sin(M_PI * dx / RAD) ) / ( (M_PI*M_PI)*dx*dx );
|
||||||
|
}}
|
||||||
|
|
||||||
|
float FxFysum = 0;
|
||||||
|
float O = 0;
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int y=cy-RAD+1, j=0; y<=cy+RAD; y++, j++)
|
||||||
|
#pragma unroll
|
||||||
|
for (int x=cx-RAD+1, i=0; x<=cx+RAD; x++, i++)
|
||||||
|
{{
|
||||||
|
float sxy = I_GLOBAL_LOAD(I_IDX_MOD({HKernel.axes_seq_enum('O', o_shape.ndim-2, suffix='y,x')}));
|
||||||
|
|
||||||
|
float Fxyv = Fx[i]*Fy[j];
|
||||||
|
FxFysum += Fxyv;
|
||||||
|
|
||||||
|
O += sxy*Fxyv*(y >= 0 & y < Im2 & x >= 0 & x < Im1);
|
||||||
|
}}
|
||||||
|
O = O / FxFysum;
|
||||||
|
|
||||||
|
O_GLOBAL_STORE(gid, O);
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
else:
|
||||||
|
raise ValueError(f'Unsupported interpolation type {interpolation}')
|
Loading…
Add table
Add a link
Reference in a new issue