update xlib.avecl

2025-08-14 18:57:24 -07:00 · 2021-10-20 18:02:50 +04:00 · 2021-10-20 18:02:50 +04:00 · 6da916cc66
commit 6da916cc66
parent 2d401f47f8
14 changed files with 246 additions and 184 deletions
--- a/xlib/avecl/_internal/op/init.py
+++ b/xlib/avecl/_internal/op/init.py
@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D
 from .gaussian_blur import gaussian_blur
 from .matmul import matmul, matmulc
 from .pad import pad
+from .rct import rct
 from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
                     reduce_sum, reduce_variance)
 from .remap import remap
 from .remap_np_affine import remap_np_affine
 from .reshape import reshape
-from .slice_ import slice_
+from .slice_ import slice_, split
 from .slice_set import slice_set
 from .stack import stack
 from .tile import tile
--- a/xlib/avecl/_internal/op/any_wise.py
+++ b/xlib/avecl/_internal/op/any_wise.py
@ -1,27 +1,31 @@
 import numpy as np

+from ..AAxes import AAxes
 from ..AShape import AShape
 from ..backend import Kernel
 from ..HArgs import HArgs
 from ..HKernel import HKernel
 from ..HType import HType
-from ..info import BroadcastInfo
+from ..info import BroadcastInfo, ReductionInfo
 from ..SCacheton import SCacheton
 from ..Tensor import Tensor


 def any_wise(op_text : str,
             *args,
+             dim_wise_axis : int = None,
             dtype : np.dtype = None,
             output_t:Tensor=None) -> Tensor:
    """
-    operator for N-wise ops with N inputs
+    elements-wise operator with N inputs

    arguments
        op_text     example: O=(2*I0*I1)+I2

        *args       List[ Tensor | number ]

+        dim_wise_axis(None)
+
        dtype

        output_t            compute result to this Tensor.
@ -33,7 +37,7 @@ def any_wise(op_text : str,

    shape_list, dtype_list, krn_args = HArgs.decompose(args)

-    op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text)
+    op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text)

    if output_t is None:
        output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
@ -45,59 +49,60 @@ def any_wise(op_text : str,
    return output_t

 class _AnyWiseOp:
-    def __init__(self, shape_list, dtype_list, o_dtype, op_text : str):
+    def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str):
        if len(shape_list) != len(dtype_list):
            raise ValueError('len(shape_list) != len(dtype_list)')

        self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
+        self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
+        self.o_shape = o_shape = info.o_shape

-        if len(shape_list) == 1:
-            # element-wise.
-            i_shape, i_dtype = shape_list[0], dtype_list[0]
-            self.o_shape = o_shape = i_shape
+        g_shape = o_shape
+        if dim_wise_axis is not None:
+            dim_wise_axis = o_shape.check_axis(dim_wise_axis)

-            self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
-{HKernel.define_tensor('O', o_shape, o_dtype)}
-{HKernel.define_tensor('IN', i_shape, i_dtype)}
-__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME)
-{{
-size_t gid = get_global_id(0);
+            dim_wise_axis_size = o_shape[dim_wise_axis]
+            if dim_wise_axis_size > 16:
+                raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}')

-O_TYPE O = O_GLOBAL_LOAD(gid);
-IN_TYPE I0 = IN_GLOBAL_LOAD(gid);
-{op_text};
-O_GLOBAL_STORE(gid, O);
-}}
-""")
-        else:
-            # Multi arg.
-            self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
+            g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape

-            self.o_shape = o_shape = info.o_shape
+        defs, arg_defs, impls = [], [], []
+        for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
+            t_name = f'I{i}'
+            if t_shape is not None:
+                defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
+                arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )

-            defs, arg_defs, impls = [], [], []
-            for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
-                t_name = f'I{i}'
-                if t_shape is not None:
-                    defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
-                    arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
-                    impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));")
+                if dim_wise_axis is not None:
+                    for i_elem in range(dim_wise_axis_size):
+                        impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));")
                else:
-                    arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
+                        impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));")
+            else:
+                arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )

-            defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
+        defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)

-            self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
+        if dim_wise_axis is not None:
+            o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) )
+            o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) )
+        else:
+            o_def   = 'O_TYPE O;'
+            o_store = 'O_GLOBAL_STORE(gid, O);'
+
+        self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f"""
 {defs}
 {HKernel.define_tensor('O', o_shape, o_dtype)}
+{HKernel.define_tensor_shape('G', g_shape)}
 __kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
 {{
 size_t gid = get_global_id(0);
-{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)}
+{HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)}
 {impls}
-O_TYPE O;
+{o_def}
 {op_text};
-O_GLOBAL_STORE(gid, O);
+{o_store}
 }}
 """)

--- a/xlib/avecl/_internal/op/cvt_color.py
+++ b/xlib/avecl/_internal/op/cvt_color.py
@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype
    return output_t

 _allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
-_allowed_dtypes = [np.float16, np.float32, np.float64]
+_allowed_dtypes = [np.float16, np.float32]

 class _CvtColor32Op():
    def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
@ -100,54 +100,74 @@ class _CvtColor32Op():
            self.forward_krn = krn

    @staticmethod
-    def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str:
+    def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str:
        return f"""
-{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')}
-{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)}
+{_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)}
+{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)}
 """

    @staticmethod
-    def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str:
+    def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str:
        return f"""
-{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')}
-{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)}
+{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)}
+{_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)}
 """

    @staticmethod
-    def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str:
+    def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
        return f"""
-{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B}));
-{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B}));
-{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B}));
-"""
-    @staticmethod
-    def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str:
-        return f"""
-{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z}));
-{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y},  0.0415560*{Z}));
-{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y},  1.0572252*{Z}));
+{R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92);
+{G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92);
+{B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92);
+
+{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) }
 """

    @staticmethod
-    def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str:
+    def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
        return f"""
-{bgr_type} {b} = {R};
-{bgr_type} {g} = {G};
-{bgr_type} {r} = {B};
+{'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423;
+{'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169;
+{'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227;
 """

    @staticmethod
-    def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str:
+    def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
        return f"""
-{rgb_type} {r} = {B};
-{rgb_type} {g} = {G};
-{rgb_type} {b} = {R};
+{_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) }
+{R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92);
+{G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92);
+{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92);
 """

    @staticmethod
-    def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str:
+    def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
+        return f"""
+{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715  + {Z}*-0.498535, 0.0, 1.0 );
+{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 );
+{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 );
+"""
+
+    @staticmethod
+    def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str:
+        return f"""
+{'float' if declare_out_type else ''} {b} = {R};
+{'float' if declare_out_type else ''} {g} = {G};
+{'float' if declare_out_type else ''} {r} = {B};
+"""
+
+    @staticmethod
+    def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str:
+        return f"""
+{'float' if declare_out_type else ''} {r} = {B};
+{'float' if declare_out_type else ''} {g} = {G};
+{'float' if declare_out_type else ''} {b} = {R};
+"""
+
+    @staticmethod
+    def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str:
        beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
-        xyz_xn = '(0.9556)'
+        xyz_xn = '(0.950456)'
        xyz_zn = '(1.088754)'
        return f"""
 {X} /= {xyz_xn};
@ -157,20 +177,20 @@ class _CvtColor32Op():
 {Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
 {Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);

-{lab_type} {L} = 116.0*{Y}-16.0;
-{lab_type} {A} = 500.0*({X}-{Y});
-{lab_type} {B} = 200.0*({Y}-{Z});
+{'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0;
+{'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y});
+{'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z});
 """
    @staticmethod
-    def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str:
+    def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str:
        beta = '(6.0/29.0)'
        beta2 = '((6.0/29.0)*(6.0/29.0))'
-        xyz_xn = '(0.9556)'
+        xyz_xn = '(0.950456)'
        xyz_zn = '(1.088754)'
        return f"""
-{xyz_type} {Y} = ({L} + 16.0) / 116.0;
-{xyz_type} {X} = {Y} + {A} / 500.0;
-{xyz_type} {Z} = {Y} - {B} / 200.0;
+{'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0;
+{'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0;
+{'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0;

 {Y} = ({Y} > {beta})*({Y}*{Y}*{Y})          + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
 {X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};
--- a/xlib/avecl/_internal/op/reduce.py
+++ b/xlib/avecl/_internal/op/reduce.py
@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False):
    mean = reduce_mean(input_t, axes, keepdims=True)
    return reduce_mean(square(input_t - mean), axes, keepdims)

-def moments(input_t, axes=None, keepdims=False):
+def moments(input_t, axes=None):
    """
    Returns (mean, variance) of input_t

@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False):
                    Iterable of ints.
                    None - all axes

-        keepdims(False)     keep reduced axes
    """
-    mean = reduce_mean(input_t, axes, keepdims)
-    mean_shape_keepdims = mean._op.info.o_shape_kd
-    var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
+    mean = reduce_mean(input_t, axes, True)
+    var = reduce_mean(square(input_t - mean), axes, True)
    return mean, var

 def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
--- a/xlib/avecl/_internal/op/slice_.py
+++ b/xlib/avecl/_internal/op/slice_.py
@ -1,6 +1,9 @@
+from typing import List
+
 import numpy as np

 from ..AShape import AShape
+from ..AAxes import AAxes
 from ..backend import Kernel
 from ..HKernel import HKernel
 from ..HType import HType
@ -9,6 +12,29 @@ from ..SCacheton import SCacheton
 from ..Tensor import Tensor


+def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]:
+    """
+
+    arguments
+
+     input_t    Tensor
+
+     axis
+
+    """
+    shape = input_t.shape
+
+    result = []
+    for i in range(shape[axis]):
+        slices = [slice(None, None, None)]*shape.ndim
+
+        slices[axis] = i if not keepdims else slice(i,i+1,1)
+
+        result.append( slice_(input_t, slices) )
+
+    return result
+
+
 def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
    """
    arguments: