update xlib.avecl

This commit is contained in:
iperov 2021-10-20 18:02:50 +04:00
commit 6da916cc66
14 changed files with 246 additions and 184 deletions

View file

@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D
from .gaussian_blur import gaussian_blur
from .matmul import matmul, matmulc
from .pad import pad
from .rct import rct
from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
reduce_sum, reduce_variance)
from .remap import remap
from .remap_np_affine import remap_np_affine
from .reshape import reshape
from .slice_ import slice_
from .slice_ import slice_, split
from .slice_set import slice_set
from .stack import stack
from .tile import tile

View file

@ -1,27 +1,31 @@
import numpy as np
from ..AAxes import AAxes
from ..AShape import AShape
from ..backend import Kernel
from ..HArgs import HArgs
from ..HKernel import HKernel
from ..HType import HType
from ..info import BroadcastInfo
from ..info import BroadcastInfo, ReductionInfo
from ..SCacheton import SCacheton
from ..Tensor import Tensor
def any_wise(op_text : str,
*args,
dim_wise_axis : int = None,
dtype : np.dtype = None,
output_t:Tensor=None) -> Tensor:
"""
operator for N-wise ops with N inputs
elements-wise operator with N inputs
arguments
op_text example: O=(2*I0*I1)+I2
*args List[ Tensor | number ]
dim_wise_axis(None)
dtype
output_t compute result to this Tensor.
@ -33,7 +37,7 @@ def any_wise(op_text : str,
shape_list, dtype_list, krn_args = HArgs.decompose(args)
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text)
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text)
if output_t is None:
output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
@ -45,59 +49,60 @@ def any_wise(op_text : str,
return output_t
class _AnyWiseOp:
def __init__(self, shape_list, dtype_list, o_dtype, op_text : str):
def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str):
if len(shape_list) != len(dtype_list):
raise ValueError('len(shape_list) != len(dtype_list)')
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
self.o_shape = o_shape = info.o_shape
if len(shape_list) == 1:
# element-wise.
i_shape, i_dtype = shape_list[0], dtype_list[0]
self.o_shape = o_shape = i_shape
g_shape = o_shape
if dim_wise_axis is not None:
dim_wise_axis = o_shape.check_axis(dim_wise_axis)
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
{HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor('IN', i_shape, i_dtype)}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME)
{{
size_t gid = get_global_id(0);
dim_wise_axis_size = o_shape[dim_wise_axis]
if dim_wise_axis_size > 16:
raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}')
O_TYPE O = O_GLOBAL_LOAD(gid);
IN_TYPE I0 = IN_GLOBAL_LOAD(gid);
{op_text};
O_GLOBAL_STORE(gid, O);
}}
""")
else:
# Multi arg.
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape
self.o_shape = o_shape = info.o_shape
defs, arg_defs, impls = [], [], []
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
t_name = f'I{i}'
if t_shape is not None:
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
defs, arg_defs, impls = [], [], []
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
t_name = f'I{i}'
if t_shape is not None:
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));")
if dim_wise_axis is not None:
for i_elem in range(dim_wise_axis_size):
impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));")
else:
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));")
else:
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
if dim_wise_axis is not None:
o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) )
o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) )
else:
o_def = 'O_TYPE O;'
o_store = 'O_GLOBAL_STORE(gid, O);'
self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f"""
{defs}
{HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor_shape('G', g_shape)}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
{{
size_t gid = get_global_id(0);
{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)}
{HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)}
{impls}
O_TYPE O;
{o_def}
{op_text};
O_GLOBAL_STORE(gid, O);
{o_store}
}}
""")

View file

@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype
return output_t
_allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
_allowed_dtypes = [np.float16, np.float32, np.float64]
_allowed_dtypes = [np.float16, np.float32]
class _CvtColor32Op():
def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
@ -100,54 +100,74 @@ class _CvtColor32Op():
self.forward_krn = krn
@staticmethod
def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str:
def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str:
return f"""
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')}
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)}
{_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)}
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)}
"""
@staticmethod
def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str:
def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str:
return f"""
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')}
{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)}
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)}
{_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)}
"""
@staticmethod
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str:
def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
return f"""
{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B}));
{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B}));
{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B}));
"""
@staticmethod
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str:
return f"""
{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z}));
{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y}, 0.0415560*{Z}));
{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y}, 1.0572252*{Z}));
{R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92);
{G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92);
{B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92);
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) }
"""
@staticmethod
def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str:
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
return f"""
{bgr_type} {b} = {R};
{bgr_type} {g} = {G};
{bgr_type} {r} = {B};
{'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423;
{'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169;
{'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227;
"""
@staticmethod
def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str:
def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
return f"""
{rgb_type} {r} = {B};
{rgb_type} {g} = {G};
{rgb_type} {b} = {R};
{_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) }
{R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92);
{G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92);
{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92);
"""
@staticmethod
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str:
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715 + {Z}*-0.498535, 0.0, 1.0 );
{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 );
{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 );
"""
@staticmethod
def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {b} = {R};
{'float' if declare_out_type else ''} {g} = {G};
{'float' if declare_out_type else ''} {r} = {B};
"""
@staticmethod
def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {r} = {B};
{'float' if declare_out_type else ''} {g} = {G};
{'float' if declare_out_type else ''} {b} = {R};
"""
@staticmethod
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str:
beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
xyz_xn = '(0.9556)'
xyz_xn = '(0.950456)'
xyz_zn = '(1.088754)'
return f"""
{X} /= {xyz_xn};
@ -157,20 +177,20 @@ class _CvtColor32Op():
{Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
{Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);
{lab_type} {L} = 116.0*{Y}-16.0;
{lab_type} {A} = 500.0*({X}-{Y});
{lab_type} {B} = 200.0*({Y}-{Z});
{'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0;
{'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y});
{'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z});
"""
@staticmethod
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str:
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str:
beta = '(6.0/29.0)'
beta2 = '((6.0/29.0)*(6.0/29.0))'
xyz_xn = '(0.9556)'
xyz_xn = '(0.950456)'
xyz_zn = '(1.088754)'
return f"""
{xyz_type} {Y} = ({L} + 16.0) / 116.0;
{xyz_type} {X} = {Y} + {A} / 500.0;
{xyz_type} {Z} = {Y} - {B} / 200.0;
{'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0;
{'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0;
{'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0;
{Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
{X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};

View file

@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False):
mean = reduce_mean(input_t, axes, keepdims=True)
return reduce_mean(square(input_t - mean), axes, keepdims)
def moments(input_t, axes=None, keepdims=False):
def moments(input_t, axes=None):
"""
Returns (mean, variance) of input_t
@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False):
Iterable of ints.
None - all axes
keepdims(False) keep reduced axes
"""
mean = reduce_mean(input_t, axes, keepdims)
mean_shape_keepdims = mean._op.info.o_shape_kd
var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
mean = reduce_mean(input_t, axes, True)
var = reduce_mean(square(input_t - mean), axes, True)
return mean, var
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:

View file

@ -1,6 +1,9 @@
from typing import List
import numpy as np
from ..AShape import AShape
from ..AAxes import AAxes
from ..backend import Kernel
from ..HKernel import HKernel
from ..HType import HType
@ -9,6 +12,29 @@ from ..SCacheton import SCacheton
from ..Tensor import Tensor
def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]:
"""
arguments
input_t Tensor
axis
"""
shape = input_t.shape
result = []
for i in range(shape[axis]):
slices = [slice(None, None, None)]*shape.ndim
slices[axis] = i if not keepdims else slice(i,i+1,1)
result.append( slice_(input_t, slices) )
return result
def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
"""
arguments: