update xlib.avecl

This commit is contained in:
iperov 2021-10-06 14:50:53 +04:00
commit 65fb01ff39
6 changed files with 143 additions and 7 deletions

View file

@ -18,6 +18,7 @@ from ._internal.backend import (Device, DeviceInfo, Kernel,
get_available_devices_info, get_best_device, get_available_devices_info, get_best_device,
get_default_device, get_device, get_default_device, get_device,
set_default_device) set_default_device)
from ._internal.EInterpolation import EInterpolation
from ._internal.HArgs import HArgs from ._internal.HArgs import HArgs
from ._internal.HKernel import HKernel from ._internal.HKernel import HKernel
from ._internal.HTensor import HTensor from ._internal.HTensor import HTensor

View file

@ -0,0 +1,8 @@
from enum import IntEnum, IntEnum
class EInterpolation(IntEnum):
NEAREST = 0
LINEAR = 1
CUBIC = 2
LANCZOS3 = 3
LANCZOS4 = 4

View file

@ -102,6 +102,7 @@ class Tensor:
def __setitem__(self, slices, value): ... def __setitem__(self, slices, value): ...
def as_shape(self, shape) -> 'Tensor': ... def as_shape(self, shape) -> 'Tensor': ...
def cast(self, dtype) -> 'Tensor': ...
def copy(self) -> 'Tensor': ... def copy(self) -> 'Tensor': ...
def max(self, axes=None, keepdims=False) -> 'Tensor': ... def max(self, axes=None, keepdims=False) -> 'Tensor': ...
def mean(self, axes=None, keepdims=False) -> 'Tensor': ... def mean(self, axes=None, keepdims=False) -> 'Tensor': ...

View file

@ -60,6 +60,7 @@ def Tensor_as_shape(self : Tensor, shape) -> Tensor:
return TensorRef(self, shape) return TensorRef(self, shape)
Tensor.as_shape = Tensor_as_shape Tensor.as_shape = Tensor_as_shape
Tensor.cast = cast
def Tensor_copy(self : Tensor) -> Tensor: def Tensor_copy(self : Tensor) -> Tensor:
return Tensor.from_value(self) return Tensor.from_value(self)
Tensor.copy = Tensor_copy Tensor.copy = Tensor_copy

View file

@ -41,5 +41,5 @@ def binary_morph(input_t : Tensor, erode_dilate : int, blur : float, fade_to_bor
x = gaussian_blur(x, blur * 0.250, dtype=dtype) x = gaussian_blur(x, blur * 0.250, dtype=dtype)
else: else:
x = cast(x, dtype=dtype) x = cast(x, dtype=dtype)
return x[...,H:-H,W:-W] return x[...,H:-H,W:-W]

View file

@ -2,11 +2,13 @@ import numpy as np
from ..AShape import AShape from ..AShape import AShape
from ..backend import Kernel from ..backend import Kernel
from ..EInterpolation import EInterpolation
from ..HKernel import HKernel from ..HKernel import HKernel
from ..SCacheton import SCacheton from ..SCacheton import SCacheton
from ..Tensor import Tensor from ..Tensor import Tensor
def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, output_size=None, dtype=None) -> Tensor:
def remap_np_affine (input_t : Tensor, affine_n : np.ndarray, interpolation : EInterpolation = None, inverse=False, output_size=None, dtype=None) -> Tensor:
""" """
remap affine operator for all channels using single numpy affine mat remap affine operator for all channels using single numpy affine mat
@ -16,12 +18,14 @@ def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, outpu
affine_n np.array (2,3) affine_n np.array (2,3)
interpolation EInterpolation
dtype dtype
""" """
if affine_n.shape != (2,3): if affine_n.shape != (2,3):
raise ValueError('affine_n.shape must be (2,3)') raise ValueError('affine_n.shape must be (2,3)')
op = SCacheton.get(_RemapAffineOp, input_t.shape, input_t.dtype, output_size, dtype) op = SCacheton.get(_RemapAffineOp, input_t.shape, input_t.dtype, interpolation, output_size, dtype)
output_t = Tensor( op.o_shape, op.o_dtype, device=input_t.get_device() ) output_t = Tensor( op.o_shape, op.o_dtype, device=input_t.get_device() )
@ -33,7 +37,7 @@ def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, outpu
D = 1.0 / D if D != 0.0 else 0.0 D = 1.0 / D if D != 0.0 else 0.0
a, b, c, d, e, f = ( e*D, -b*D, (b*f-e*c)*D , a, b, c, d, e, f = ( e*D, -b*D, (b*f-e*c)*D ,
-d*D, a*D, (d*c-a*f)*D ) -d*D, a*D, (d*c-a*f)*D )
input_t.get_device().run_kernel(op.forward_krn, output_t.get_buffer(), input_t.get_buffer(), input_t.get_device().run_kernel(op.forward_krn, output_t.get_buffer(), input_t.get_buffer(),
np.float32(a), np.float32(b), np.float32(c), np.float32(d), np.float32(e), np.float32(f) ) np.float32(a), np.float32(b), np.float32(c), np.float32(d), np.float32(e), np.float32(f) )
@ -41,11 +45,13 @@ def remap_np_affine (input_t : Tensor, affine_n : np.array, inverse=False, outpu
class _RemapAffineOp(): class _RemapAffineOp():
def __init__(self, i_shape : AShape, i_dtype, o_size, o_dtype): def __init__(self, i_shape : AShape, i_dtype, interpolation, o_size, o_dtype):
if np.dtype(i_dtype).type == np.bool_: if np.dtype(i_dtype).type == np.bool_:
raise ValueError('np.bool_ dtype of i_dtype is not supported.') raise ValueError('np.bool_ dtype of i_dtype is not supported.')
if i_shape.ndim < 2: if i_shape.ndim < 2:
raise ValueError('i_shape.ndim must be >= 2 (...,H,W)') raise ValueError('i_shape.ndim must be >= 2 (...,H,W)')
if interpolation is None:
interpolation = EInterpolation.LINEAR
IH,IW = i_shape[-2:] IH,IW = i_shape[-2:]
if o_size is not None: if o_size is not None:
@ -60,7 +66,8 @@ class _RemapAffineOp():
self.o_shape = o_shape self.o_shape = o_shape
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else i_dtype self.o_dtype = o_dtype = o_dtype if o_dtype is not None else i_dtype
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f""" if interpolation == EInterpolation.LINEAR:
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
{HKernel.define_tensor('O', o_shape, o_dtype)} {HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor('I', i_shape, i_dtype)} {HKernel.define_tensor('I', i_shape, i_dtype)}
@ -93,4 +100,122 @@ __kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const I_PTR_TYPE* I
O_GLOBAL_STORE(gid, p00 + p01 + p10 + p11); O_GLOBAL_STORE(gid, p00 + p01 + p10 + p11);
}} }}
""") """)
elif interpolation == EInterpolation.CUBIC:
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
{HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor('I', i_shape, i_dtype)}
float cubic(float p0, float p1, float p2, float p3, float x)
{{
float a0 = p1;
float a1 = p2 - p0;
float a2 = 2 * p0 - 5 * p1 + 4 * p2 - p3;
float a3 = 3 * (p1 - p2) + p3 - p0;
return a0 + 0.5 * x * (a1 + x * (a2 + x * a3));
}}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const I_PTR_TYPE* I_PTR_NAME,
float a, float b, float c,
float d, float e, float f)
{{
size_t gid = get_global_id(0);
{HKernel.decompose_idx_to_axes_idxs('gid', 'O', o_shape.ndim)}
float cx01f = om1*a + om2*b + c;
float cy01f = om1*d + om2*e + f;
float cxf = floor(cx01f); int cx = (int)cxf;
float cyf = floor(cy01f); int cy = (int)cyf;
float dx = cx01f-cxf;
float dy = cy01f-cyf;
float row[4];
#pragma unroll
for (int y=cy-1, j=0; y<=cy+2; y++, j++)
{{
float col[4];
#pragma unroll
for (int x=cx-1, i=0; x<=cx+2; x++, i++)
{{
float sxy = I_GLOBAL_LOAD(I_IDX_MOD({HKernel.axes_seq_enum('O', o_shape.ndim-2, suffix='y,x')}));
col[i] = sxy*(y >= 0 & y < Im2 & x >= 0 & x < Im1);
}}
row[j] = cubic(col[0], col[1], col[2], col[3], dx);
}}
float O = cubic(row[0], row[1], row[2], row[3], dy);
O_GLOBAL_STORE(gid, O);
}}
""")
elif interpolation in [EInterpolation.LANCZOS3, EInterpolation.LANCZOS4]:
RAD = 3 if interpolation == EInterpolation.LANCZOS3 else 4
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
{HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor('I', i_shape, i_dtype)}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const I_PTR_TYPE* I_PTR_NAME,
float a, float b, float c,
float d, float e, float f)
{{
size_t gid = get_global_id(0);
{HKernel.decompose_idx_to_axes_idxs('gid', 'O', o_shape.ndim)}
float cx01f = om1*a + om2*b + c;
float cy01f = om1*d + om2*e + f;
float cxf = floor(cx01f); int cx = (int)cxf;
float cyf = floor(cy01f); int cy = (int)cyf;
#define RAD {RAD}
float Fy[2 * RAD];
float Fx[2 * RAD];
#pragma unroll
for (int y=cy-RAD+1, j=0; y<=cy+RAD; y++, j++)
{{
float dy = fabs(cy01f - y);
if (dy < 1e-4) Fy[j] = 1;
else if (dy > RAD) Fy[j] = 0;
else Fy[j] = ( RAD * sin(M_PI * dy) * sin(M_PI * dy / RAD) ) / ( (M_PI*M_PI)*dy*dy );
}}
#pragma unroll
for (int x=cx-RAD+1, i=0; x<=cx+RAD; x++, i++)
{{
float dx = fabs(cx01f - x);
if (dx < 1e-4) Fx[i] = 1;
else if (dx > RAD) Fx[i] = 0;
else Fx[i] = ( RAD * sin(M_PI * dx) * sin(M_PI * dx / RAD) ) / ( (M_PI*M_PI)*dx*dx );
}}
float FxFysum = 0;
float O = 0;
#pragma unroll
for (int y=cy-RAD+1, j=0; y<=cy+RAD; y++, j++)
#pragma unroll
for (int x=cx-RAD+1, i=0; x<=cx+RAD; x++, i++)
{{
float sxy = I_GLOBAL_LOAD(I_IDX_MOD({HKernel.axes_seq_enum('O', o_shape.ndim-2, suffix='y,x')}));
float Fxyv = Fx[i]*Fy[j];
FxFysum += Fxyv;
O += sxy*Fxyv*(y >= 0 & y < Im2 & x >= 0 & x < Im1);
}}
O = O / FxFysum;
O_GLOBAL_STORE(gid, O);
}}
""")
else:
raise ValueError(f'Unsupported interpolation type {interpolation}')