From 6da916cc66a4186fcb02b06fc19727fcf54ae8da Mon Sep 17 00:00:00 2001 From: iperov Date: Wed, 20 Oct 2021 18:02:50 +0400 Subject: [PATCH] update xlib.avecl --- xlib/avecl/__init__.py | 1 + xlib/avecl/_internal/AShape.py | 33 ++++-- xlib/avecl/_internal/HKernel.py | 62 ++++------ xlib/avecl/_internal/HType.py | 8 +- xlib/avecl/_internal/NTest.py | 111 +++++++++++------- xlib/avecl/_internal/Tensor.py | 1 + xlib/avecl/_internal/TensorImpl.py | 1 + xlib/avecl/_internal/backend/Device.py | 3 +- .../initializer/InitRandomUniform.py | 2 - xlib/avecl/_internal/op/__init__.py | 3 +- xlib/avecl/_internal/op/any_wise.py | 79 +++++++------ xlib/avecl/_internal/op/cvt_color.py | 92 +++++++++------ xlib/avecl/_internal/op/reduce.py | 8 +- xlib/avecl/_internal/op/slice_.py | 26 ++++ 14 files changed, 246 insertions(+), 184 deletions(-) diff --git a/xlib/avecl/__init__.py b/xlib/avecl/__init__.py index 6e2d849..be2dc5d 100644 --- a/xlib/avecl/__init__.py +++ b/xlib/avecl/__init__.py @@ -3,6 +3,7 @@ AveCL ! Make OpenCL great again. Lightweight ndarray library using OpenCL 1.2 written in pure python. Applicable for high-performance general purpose n-dim array computations for every device that supports OpenCL 1.2. +Supports any dtype except float64. Works in python 3.5+. Dependencies: numpy. diff --git a/xlib/avecl/_internal/AShape.py b/xlib/avecl/_internal/AShape.py index eb27fdf..ba6bea1 100644 --- a/xlib/avecl/_internal/AShape.py +++ b/xlib/avecl/_internal/AShape.py @@ -15,7 +15,7 @@ class AShape(Iterable): shape AShape Iterable - + AShape cannot be scalar shape, thus minimal AShape is (1,) can raise ValueError during the construction @@ -50,13 +50,26 @@ class AShape(Iterable): self.size = size else: raise ValueError('Invalid type to create AShape') - + def copy(self) -> 'AShape': return AShape(self) - + def as_list(self) -> List[int]: return list(self.shape) - + + def check_axis(self, axis : int) -> int: + """ + Check axis and returns normalized axis value + + can raise ValueError + """ + if axis < 0: + axis += self.ndim + + if axis < 0 or axis >= self.ndim: + raise ValueError(f'axis {axis} out of bound of ndim {self.ndim}') + return axis + def axes_arange(self) -> AAxes: """ Returns tuple of axes arange. @@ -64,7 +77,7 @@ class AShape(Iterable): Example (0,1,2) for ndim 3 """ return AAxes(range(self.ndim)) - + def replaced_axes(self, axes, dims) -> 'AShape': """ returns new AShape where axes replaced with new dims @@ -76,22 +89,22 @@ class AShape(Iterable): axis = ndim + axis if axis < 0 or axis >= ndim: raise ValueError(f'invalid axis value {axis}') - + new_shape[axis] = dim return AShape(new_shape) - + def split(self, axis) -> Tuple['AShape', 'AShape']: """ split AShape at specified axis - - returns two AShape before+exclusive and inclusive+after + + returns two AShape before+exclusive and inclusive+after """ if axis < 0: axis = self.ndim + axis if axis < 0 or axis >= self.ndim: raise ValueError(f'invalid axis value {axis}') - + return self[:axis], self[axis:] def transpose_by_axes(self, axes) -> 'AShape': diff --git a/xlib/avecl/_internal/HKernel.py b/xlib/avecl/_internal/HKernel.py index 8512926..71211f5 100644 --- a/xlib/avecl/_internal/HKernel.py +++ b/xlib/avecl/_internal/HKernel.py @@ -15,12 +15,9 @@ class HKernel: np.int64 : 'long', np.uint64 : 'ulong', np.float16 : 'half', - np.float32 : 'float', - np.float64 : 'double' + np.float32 : 'float' } - - @staticmethod def np_dtype_to_cl(dtype : np.dtype): """ @@ -134,30 +131,33 @@ class HKernel: out += [f'#define {name_upper}_GLOBAL_STORE8(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)'] out += [f'#define {name_upper}_GLOBAL_STORE16(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)'] - if dtype in [np.float32, np.float64]: + if dtype in [np.float32]: out += [f'#define {name_upper}_TO_FLOATX(x) x'] elif dtype in [np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32,np.uint32, np.float16]: out += [f'#define {name_upper}_TO_FLOATX(x) ((float)x)'] elif dtype in [np.int64,np.uint64]: out += [f'#define {name_upper}_TO_FLOATX(x) ((double)x)'] return '\n'.join(out) - + @staticmethod def define_ndim_idx(ndim): """ + define macro to calculate index for n-dim shape + example for ndim=3 + #define NDIM3_IDX(t0,t1,t2,T0,T1,T2) (((size_t)(t0))*T1*T2+((size_t)(t1))*T2+((size_t)(t2))) #define NDIM3_IDX_MOD(t0,t1,t2,T0,T1,T2) (((size_t)(t0) % T0)*T1*T2+((size_t)(t1) % T1)*T2+((size_t)(t2) % T2)) """ - + out = [f'#define NDIM{ndim}_IDX(' + \ ','.join([f't{i}' for i in range(ndim)] + [f'T{i}' for i in range(ndim)]) + \ ') (' + '+'.join([f'((size_t)(t{i}))' + ''.join(f'*T{j}' for j in range(i+1,ndim)) for i in range(ndim) ]) + ')'] - + out +=[f'#define NDIM{ndim}_IDX_MOD(' + \ ','.join([f't{i}' for i in range(ndim)] + [f'T{i}' for i in range(ndim)]) + \ ') (' + '+'.join([f'((size_t)(t{i}) % T{i})' + ''.join(f'*T{j}' for j in range(i+1,ndim)) for i in range(ndim) ]) + ')'] - + return '\n'.join(out) @staticmethod @@ -165,14 +165,14 @@ class HKernel: """ Returns a definitions for operations with tensor shape - example for 'O', (7,3), + example for 'O', (2,3), - #define O0 7 + #define O0 2 #define O1 3 #define Om1 3 - #define Om2 7 - #define O_IDX(o0,o1) ( (size_t)(o0) )*3 +( o1 ) - #define O_IDX_MOD(o0,o1) ( (size_t)(o0) % 7 )*3 +( (o1) % 3 ) + #define Om2 2 + #define O_IDX(o0,o1) (((size_t)(o0))*3+((size_t)(o1))) + #define O_IDX_MOD(o0,o1) (((size_t)(o0) % 2)*3+((size_t)(o1) % 3)) """ shape = tuple(shape) ndim = len(shape) @@ -183,36 +183,14 @@ class HKernel: axes_symbols = "".join([str(i) for i in range(ndim)]) axes_symbols = axes_symbols.upper() - out = [] - for i in range(ndim): - out += [f'#define {name_upper}{axes_symbols[i]} {shape[i]}'] + out = [f'#define {name_upper}{axes_symbols[i]} {shape[i]}' for i in range(ndim)] + out += [f'#define {name_upper}m{i} {shape[-i]}' for i in range(1,ndim+1)] - for i in range(1,ndim+1): - out += [f'#define {name_upper}m{i} {shape[-i]}'] + out += [f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) (' + \ + '+'.join([f'((size_t)({name_lower}{i}))' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')'] - line = f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) ' - - for i in range(ndim): - line += f'( (size_t)({name_lower}{i}) )' - - for j in range(i+1,ndim): - line += f'*{shape[j]} ' - if i != ndim-1: - line += '+' - - out += [line] - - line = f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) ' - - for i in range(ndim): - line += f'( (size_t)({name_lower}{i}) % {shape[i]} )' - - for j in range(i+1,ndim): - line += f'*{shape[j]} ' - if i != ndim-1: - line += '+' - - out += [line,''] + out += [f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) (' + \ + '+'.join([f'((size_t)({name_lower}{i}) % {shape[i]})' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')'] return '\n'.join(out) diff --git a/xlib/avecl/_internal/HType.py b/xlib/avecl/_internal/HType.py index 355a706..cc60a8e 100644 --- a/xlib/avecl/_internal/HType.py +++ b/xlib/avecl/_internal/HType.py @@ -3,10 +3,10 @@ from typing import Iterable, List import numpy as np scalar_types = [int, float, np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64, - np.float16, np.float32, np.float64, np.bool_] + np.float16, np.float32, np.bool_] np_scalar_types = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64, - np.float16, np.float32, np.float64, np.bool_] + np.float16, np.float32, np.bool_] _np_dtype_to_cl = { np.bool_ : 'bool', @@ -20,7 +20,6 @@ _np_dtype_to_cl = { np.int64 : 'long', np.float16 : 'half', np.float32 : 'float', - np.float64 : 'double', } _np_dtype_weight = { @@ -34,8 +33,7 @@ _np_dtype_weight = { np.uint64 : 8, np.int64 : 9, np.float16 : 10, - np.float32 : 11, - np.float64 : 12, + np.float32 : 11 } class HType: diff --git a/xlib/avecl/_internal/NTest.py b/xlib/avecl/_internal/NTest.py index 8fa9236..63ca398 100644 --- a/xlib/avecl/_internal/NTest.py +++ b/xlib/avecl/_internal/NTest.py @@ -1,14 +1,13 @@ -import traceback - import numpy as np -from .HType import HType -from .NCore import NCore -from .backend import get_device, get_default_device, set_default_device -from .Tensor import Tensor from . import op -from .initializer import InitRandomUniform, InitCoords2DArange +from .backend import get_default_device, get_device, set_default_device +from .HType import HType from .info import Conv2DInfo +from .initializer import InitCoords2DArange, InitRandomUniform +from .NCore import NCore +from .Tensor import Tensor + class NTest(): @@ -45,6 +44,7 @@ class NTest(): binary_dilate_circle_test, binary_morph_test, cvt_color_test, + rct_test, ] for test_func in test_funcs: @@ -62,18 +62,39 @@ class NTest(): def _all_close(x,y, atol=1, btol=1): return np.allclose( np.ndarray.flatten(x[None,...]), np.ndarray.flatten(y[None,...]), atol, btol ) +def rct_test(): + for _ in range(10): + for dtype in [np.float16, np.float32]: + base_shape = list(np.random.randint(1, 8, size=4) ) + shape = base_shape.copy() + shape[1] = 3 + + mask_shape = base_shape.copy() + mask_shape[1] = 3 + + print(f'rct {shape} {str(np.dtype(dtype).name)} ... ', end='', flush=True) + + source_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform()) + target_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform()) + mask_t = Tensor(shape=mask_shape, dtype=dtype, initializer=InitRandomUniform()) + + result_t = op.rct(target_t, source_t, target_mask_t=mask_t, source_mask_t=mask_t ) + + print('pass') + + def cvt_color_test(): for _ in range(10): for shape_len in range(2,6): for in_mode in ['RGB','BGR','XYZ','LAB']: for out_mode in ['RGB','BGR','XYZ','LAB']: - for dtype in [np.float16, np.float32, np.float64]: + for dtype in [np.float16, np.float32]: shape = list(np.random.randint(1, 8, size=shape_len) ) ch_axis = np.random.randint(len(shape)) shape[ch_axis] = 3 - print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='') + print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='', flush=True) inp_n = np.random.uniform(size=shape ).astype(dtype) inp_t = Tensor.from_value(inp_n) @@ -81,7 +102,9 @@ def cvt_color_test(): out_t = op.cvt_color(inp_t, in_mode=in_mode, out_mode=out_mode, ch_axis=ch_axis) inp_t2 = op.cvt_color(out_t, in_mode=out_mode, out_mode=in_mode, ch_axis=ch_axis) - if not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1): + is_check = in_mode in ['RGB','BGR','XYZ'] and out_mode in ['XYZ','LAB'] + + if is_check and not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1): raise Exception(f'data is not equal') print('pass') @@ -91,7 +114,7 @@ def cast_test(): for out_dtype in HType.get_np_scalar_types(): shape = tuple(np.random.randint(1, 8, size=( np.random.randint(1,5))) ) - print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='') + print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='', flush=True) val_n = np.random.uniform( -64, 64, size=shape ).astype(in_dtype) cast_n = val_n.astype(out_dtype) @@ -113,7 +136,7 @@ def binary_morph_test(): input_n = np.random.randint( 2, size=shape ).astype(dtype) input_t = Tensor.from_value(input_n) - print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='') + print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='', flush=True) op.binary_morph(input_t, erode_dilate=erode_dilate, blur=blur, fade_to_border=True) @@ -130,7 +153,7 @@ def binary_erode_circle_test(): input_n = np.random.randint( 2, size=shape ).astype(dtype) input_t = Tensor.from_value(input_n) - print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='') + print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True) op.binary_erode_circle(input_t, radius=radius, iterations=iterations) @@ -147,7 +170,7 @@ def binary_dilate_circle_test(): input_n = np.random.randint( 2, size=shape ).astype(dtype) input_t = Tensor.from_value(input_n) - print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='') + print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True) op.binary_dilate_circle(input_t, radius=radius, iterations=iterations) @@ -156,11 +179,11 @@ def binary_dilate_circle_test(): def gaussian_blur_test(): for shape_len in range(2,5): - for dtype in [np.float16, np.float32, np.float64]: + for dtype in [np.float16, np.float32]: shape = np.random.randint( 1, 64, size=(shape_len,) ) sigma = np.random.rand() * 10 - print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='') + print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='', flush=True) val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_t = Tensor.from_value(val_n) @@ -179,7 +202,7 @@ def pad_test(): paddings = tuple( (np.random.randint(8), np.random.randint(8)) for i in range(len(shape)) ) - print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='') + print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='', flush=True) val_n = np.random.randint( 2**8, size=shape ).astype(dtype) pad_n = np.pad(val_n, paddings, mode=mode) @@ -187,7 +210,7 @@ def pad_test(): val_t = Tensor.from_value(val_n) pad_t = op.pad(val_t, paddings, mode=mode) - print(f'{pad_n.shape} == {pad_t.shape} ... ', end='') + print(f'{pad_n.shape} == {pad_t.shape} ... ', end='', flush=True) if pad_n.shape != pad_t.shape: raise Exception(f'shape is not equal') @@ -241,7 +264,7 @@ def slice_set_test(): shape = tuple(shape) slices = tuple(slices) - print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='') + print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True) val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_t = Tensor.from_value(val_n) @@ -330,7 +353,7 @@ def depthwise_conv2d_test(): input_shape = (n, ic, ih, iw) kernel_shape = (ic, ks, ks) - print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='') + print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='', flush=True) input_n = np.random.randint( 64, size=input_shape ).astype(dtype) kernel_n = np.ones(shape=kernel_shape ).astype(dtype) @@ -358,7 +381,7 @@ def warp_affine_test(): H = np.random.randint(8, 64) W = np.random.randint(8, 64) - print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='') + print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True) input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) ) @@ -380,7 +403,7 @@ def remap_np_affine_test(): H = np.random.randint(8, 64) W = np.random.randint(8, 64) - print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='') + print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True) input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) ) @@ -402,7 +425,7 @@ def remap_test(): H = np.random.randint(8, 64) W = np.random.randint(8, 64) - print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='') + print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True) input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) ) @@ -422,7 +445,7 @@ def tile_test(): shape = tuple(np.random.randint( 8, size=(shape_len,) )+1) tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1) - print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='') + print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='', flush=True) val_n = np.random.randint( 2**8, size=shape ).astype(dtype) tiled_n = np.tile(val_n, tiles) @@ -430,7 +453,7 @@ def tile_test(): val_t = Tensor.from_value(val_n) tiled_t = op.tile(val_t, tiles) - print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='') + print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='', flush=True) if tiled_n.shape != tiled_t.shape: raise Exception(f'shape is not equal') @@ -448,7 +471,7 @@ def stack_test(): axis = np.random.randint(shape_len+1) stack_count = np.random.randint(4)+1 - print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='') + print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='', flush=True) vals_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for i in range(stack_count) ] stack_n = np.stack(vals_n, axis) @@ -456,7 +479,7 @@ def stack_test(): vals_t = [ Tensor.from_value(vals_n[i]) for i in range(stack_count) ] stack_t = op.stack(vals_t, axis) - print(f'{stack_n.shape} == {stack_t.shape} ... ', end='') + print(f'{stack_n.shape} == {stack_t.shape} ... ', end='', flush=True) if stack_n.shape != stack_t.shape: raise Exception('shape is not equal') @@ -483,9 +506,9 @@ def reduce_test(): keepdims = np.random.randint(2) == 0 - print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='') + print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='', flush=True) - if dtype in [np.float16, np.float32, np.float64]: + if dtype in [np.float16, np.float32]: value_n = np.random.uniform(size=shape).astype(dtype) else: value_n = np.random.randint( max(1, int(np.iinfo(dtype).max / np.prod(shape)) ), size=shape, dtype=dtype ) @@ -518,7 +541,7 @@ def InitRandomUniform_test(): for shape_len in range(1, 5): shape = np.random.randint( 8, size=(shape_len,) )+1 - print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='') + print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='', flush=True) Tensor(shape, dtype, initializer=InitRandomUniform()).np() @@ -534,7 +557,7 @@ def InitCoords2DArange_test(): w_start = np.random.randint(80) w_stop = w_start + np.random.randint(80) - print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='') + print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='', flush=True) Tensor(shape, dtype, initializer=InitCoords2DArange(h_start,h_stop,w_start,w_stop )).np() @@ -551,17 +574,17 @@ def concat_test(): for i,dim in enumerate(shape) ) for shape in ([shape] * count) ) - print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='') + print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='', flush=True) V_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for shape in shapes ] O_n = np.concatenate(V_n, axis) - print(f'{O_n.shape} == ', end='') + print(f'{O_n.shape} == ', end='', flush=True) V_t = [ Tensor.from_value(V_n[i]) for i in range(count) ] O_t = op.concat(V_t, axis) - print(f'{O_t.shape} ... ', end='') + print(f'{O_t.shape} ... ', end='', flush=True) if O_n.shape != O_t.shape: raise Exception('shape is not equal') @@ -596,19 +619,19 @@ def matmul_test(): A_shape = (BATCH, M, K) B_shape = (BATCH, K, N) - print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='') + print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='', flush=True) A_n = np.random.randint( 2**4, size=A_shape ).astype(dtype) B_n = np.random.randint( 2**4, size=B_shape ).astype(dtype) O_n = np.matmul(A_n, B_n) - print(f'{O_n.shape} == ', end='') + print(f'{O_n.shape} == ', end='', flush=True) A_t = Tensor.from_value(A_n) B_t = Tensor.from_value(B_n) O_t = op.matmul(A_t, B_t) - print(f'{O_t.shape} ... ', end='') + print(f'{O_t.shape} ... ', end='', flush=True) if O_n.shape != O_t.shape: raise Exception('shape is not equal') @@ -659,17 +682,17 @@ def slice_test(): shape = tuple(shape) slices = tuple(slices) - print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='') + print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True) val_n = np.random.randint( 2**8, size=shape ).astype(dtype) sliced_n = val_n[slices] - print(f'{sliced_n.shape} ... ', end='') + print(f'{sliced_n.shape} ... ', end='', flush=True) sliced_t = Tensor.from_value(val_n)[slices] - print(f'{sliced_t.shape} ... ', end='') + print(f'{sliced_t.shape} ... ', end='', flush=True) if 0 in sliced_n.shape: # some cases like 0:1:-1 will produce zero shape and invalid array on numpy @@ -694,17 +717,17 @@ def transpose_test(): axes_order = np.array([*range(shape_len)]) np.random.shuffle(axes_order) - print(f'transpose: {shape} {axes_order} ... ', end='') + print(f'transpose: {shape} {axes_order} ... ', end='', flush=True) val_n = np.random.randint( 2**8, size=shape ).astype(dtype) transposed_n = np.transpose(val_n, axes_order) - print(f'{transposed_n.shape} ... ', end='') + print(f'{transposed_n.shape} ... ', end='', flush=True) val_t = Tensor.from_value(val_n) transposed_t = op.transpose (val_t, axes_order ) - print(f'{transposed_t.shape} ... ', end='') + print(f'{transposed_t.shape} ... ', end='', flush=True) if transposed_n.shape != transposed_t.shape: raise Exception('shape is not equal') @@ -736,7 +759,7 @@ def any_wise_op_test(): shapes = shapes[::-1] a_shape, b_shape = shapes - print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='') + print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='', flush=True) a_n = np.random.randint( 1, 2**8, size=a_shape ).astype(dtype) b_n = np.random.randint( 1, 2**8, size=b_shape ).astype(dtype) diff --git a/xlib/avecl/_internal/Tensor.py b/xlib/avecl/_internal/Tensor.py index 100225c..2f2f741 100644 --- a/xlib/avecl/_internal/Tensor.py +++ b/xlib/avecl/_internal/Tensor.py @@ -109,6 +109,7 @@ class Tensor: def min(self, axes=None, keepdims=False) -> 'Tensor': ... def reshape(self, new_shape) -> 'Tensor': ... def sum(self, axes=None, keepdims=False) -> 'Tensor': ... + def std(self, axes=None, keepdims=False) -> 'Tensor': ... def transpose(self, axes_order, op_text=None, dtype=None) -> 'Tensor': ... @property diff --git a/xlib/avecl/_internal/TensorImpl.py b/xlib/avecl/_internal/TensorImpl.py index 1491582..d252879 100644 --- a/xlib/avecl/_internal/TensorImpl.py +++ b/xlib/avecl/_internal/TensorImpl.py @@ -70,6 +70,7 @@ Tensor.mean = reduce_mean Tensor.min = reduce_min Tensor.reshape = reshape Tensor.sum = reduce_sum +Tensor.std = reduce_std Tensor.transpose = transpose class TensorRef(Tensor): diff --git a/xlib/avecl/_internal/backend/Device.py b/xlib/avecl/_internal/backend/Device.py index 9dd40fc..f8d63b3 100644 --- a/xlib/avecl/_internal/backend/Device.py +++ b/xlib/avecl/_internal/backend/Device.py @@ -18,8 +18,7 @@ _np_dtype_to_cl = { np.uint8: CL.cl_uchar, np.uint64: CL.cl_ulong, np.int64: CL.cl_long, np.float16: CL.cl_half, - np.float32: CL.cl_float, - np.float64: CL.cl_double } + np.float32: CL.cl_float} _opencl_device_ids = None _default_device = None diff --git a/xlib/avecl/_internal/initializer/InitRandomUniform.py b/xlib/avecl/_internal/initializer/InitRandomUniform.py index a657de0..73e0d60 100644 --- a/xlib/avecl/_internal/initializer/InitRandomUniform.py +++ b/xlib/avecl/_internal/initializer/InitRandomUniform.py @@ -38,8 +38,6 @@ class InitRandomUniform(Initializer): gen_expression = f'hash_ulong_from_ulong(gid+seed64) % {int(hl)} + {int(l)}' elif tensor.dtype in [np.float16, np.float32]: gen_expression = f'hash_float_from_uint(gid+seed32)*{hl} + {l}' - elif tensor.dtype in [np.float64]: - gen_expression = f'hash_double_from_ulong(gid+seed64)*{hl} + {l}' kernel = Kernel(kernel_text=f""" {HKernel.include_hash()} diff --git a/xlib/avecl/_internal/op/__init__.py b/xlib/avecl/_internal/op/__init__.py index 6aa933f..3ef73f1 100644 --- a/xlib/avecl/_internal/op/__init__.py +++ b/xlib/avecl/_internal/op/__init__.py @@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D from .gaussian_blur import gaussian_blur from .matmul import matmul, matmulc from .pad import pad +from .rct import rct from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std, reduce_sum, reduce_variance) from .remap import remap from .remap_np_affine import remap_np_affine from .reshape import reshape -from .slice_ import slice_ +from .slice_ import slice_, split from .slice_set import slice_set from .stack import stack from .tile import tile diff --git a/xlib/avecl/_internal/op/any_wise.py b/xlib/avecl/_internal/op/any_wise.py index 71f5fe1..ac03b3d 100644 --- a/xlib/avecl/_internal/op/any_wise.py +++ b/xlib/avecl/_internal/op/any_wise.py @@ -1,27 +1,31 @@ import numpy as np +from ..AAxes import AAxes from ..AShape import AShape from ..backend import Kernel from ..HArgs import HArgs from ..HKernel import HKernel from ..HType import HType -from ..info import BroadcastInfo +from ..info import BroadcastInfo, ReductionInfo from ..SCacheton import SCacheton from ..Tensor import Tensor def any_wise(op_text : str, *args, + dim_wise_axis : int = None, dtype : np.dtype = None, output_t:Tensor=None) -> Tensor: """ - operator for N-wise ops with N inputs + elements-wise operator with N inputs arguments op_text example: O=(2*I0*I1)+I2 *args List[ Tensor | number ] + dim_wise_axis(None) + dtype output_t compute result to this Tensor. @@ -33,7 +37,7 @@ def any_wise(op_text : str, shape_list, dtype_list, krn_args = HArgs.decompose(args) - op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text) + op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text) if output_t is None: output_t = Tensor ( op.o_shape, op.o_dtype, device=device ) @@ -45,59 +49,60 @@ def any_wise(op_text : str, return output_t class _AnyWiseOp: - def __init__(self, shape_list, dtype_list, o_dtype, op_text : str): + def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str): if len(shape_list) != len(dtype_list): raise ValueError('len(shape_list) != len(dtype_list)') self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list) + self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ]) + self.o_shape = o_shape = info.o_shape - if len(shape_list) == 1: - # element-wise. - i_shape, i_dtype = shape_list[0], dtype_list[0] - self.o_shape = o_shape = i_shape + g_shape = o_shape + if dim_wise_axis is not None: + dim_wise_axis = o_shape.check_axis(dim_wise_axis) - self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f""" -{HKernel.define_tensor('O', o_shape, o_dtype)} -{HKernel.define_tensor('IN', i_shape, i_dtype)} -__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME) -{{ -size_t gid = get_global_id(0); + dim_wise_axis_size = o_shape[dim_wise_axis] + if dim_wise_axis_size > 16: + raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}') -O_TYPE O = O_GLOBAL_LOAD(gid); -IN_TYPE I0 = IN_GLOBAL_LOAD(gid); -{op_text}; -O_GLOBAL_STORE(gid, O); -}} -""") - else: - # Multi arg. - self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ]) + g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape - self.o_shape = o_shape = info.o_shape + defs, arg_defs, impls = [], [], [] + for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)): + t_name = f'I{i}' + if t_shape is not None: + defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) ) + arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" ) - defs, arg_defs, impls = [], [], [] - for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)): - t_name = f'I{i}' - if t_shape is not None: - defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) ) - arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" ) - impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));") + if dim_wise_axis is not None: + for i_elem in range(dim_wise_axis_size): + impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));") else: - arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" ) + impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));") + else: + arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" ) - defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls) + defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls) - self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f""" + if dim_wise_axis is not None: + o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) ) + o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) ) + else: + o_def = 'O_TYPE O;' + o_store = 'O_GLOBAL_STORE(gid, O);' + + self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f""" {defs} {HKernel.define_tensor('O', o_shape, o_dtype)} +{HKernel.define_tensor_shape('G', g_shape)} __kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs}) {{ size_t gid = get_global_id(0); -{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)} +{HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)} {impls} -O_TYPE O; +{o_def} {op_text}; -O_GLOBAL_STORE(gid, O); +{o_store} }} """) diff --git a/xlib/avecl/_internal/op/cvt_color.py b/xlib/avecl/_internal/op/cvt_color.py index 6a7d822..35add96 100644 --- a/xlib/avecl/_internal/op/cvt_color.py +++ b/xlib/avecl/_internal/op/cvt_color.py @@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype return output_t _allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB'] -_allowed_dtypes = [np.float16, np.float32, np.float64] +_allowed_dtypes = [np.float16, np.float32] class _CvtColor32Op(): def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis): @@ -100,54 +100,74 @@ class _CvtColor32Op(): self.forward_krn = krn @staticmethod - def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str: + def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str: return f""" -{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')} -{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)} +{_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)} +{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)} """ @staticmethod - def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str: + def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str: return f""" -{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')} -{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)} +{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)} +{_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)} """ @staticmethod - def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str: + def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str: return f""" -{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B})); -{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B})); -{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B})); -""" - @staticmethod - def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str: - return f""" -{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z})); -{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y}, 0.0415560*{Z})); -{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y}, 1.0572252*{Z})); +{R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92); +{G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92); +{B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92); + +{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) } """ @staticmethod - def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str: + def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str: return f""" -{bgr_type} {b} = {R}; -{bgr_type} {g} = {G}; -{bgr_type} {r} = {B}; +{'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423; +{'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169; +{'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227; """ @staticmethod - def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str: + def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str: return f""" -{rgb_type} {r} = {B}; -{rgb_type} {g} = {G}; -{rgb_type} {b} = {R}; +{_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) } +{R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92); +{G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92); +{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92); """ @staticmethod - def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str: + def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str: + return f""" +{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715 + {Z}*-0.498535, 0.0, 1.0 ); +{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 ); +{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 ); +""" + + @staticmethod + def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str: + return f""" +{'float' if declare_out_type else ''} {b} = {R}; +{'float' if declare_out_type else ''} {g} = {G}; +{'float' if declare_out_type else ''} {r} = {B}; +""" + + @staticmethod + def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str: + return f""" +{'float' if declare_out_type else ''} {r} = {B}; +{'float' if declare_out_type else ''} {g} = {G}; +{'float' if declare_out_type else ''} {b} = {R}; +""" + + @staticmethod + def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str: beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))' - xyz_xn = '(0.9556)' + xyz_xn = '(0.950456)' xyz_zn = '(1.088754)' return f""" {X} /= {xyz_xn}; @@ -157,20 +177,20 @@ class _CvtColor32Op(): {Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0); {Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0); -{lab_type} {L} = 116.0*{Y}-16.0; -{lab_type} {A} = 500.0*({X}-{Y}); -{lab_type} {B} = 200.0*({Y}-{Z}); +{'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0; +{'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y}); +{'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z}); """ @staticmethod - def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str: + def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str: beta = '(6.0/29.0)' beta2 = '((6.0/29.0)*(6.0/29.0))' - xyz_xn = '(0.9556)' + xyz_xn = '(0.950456)' xyz_zn = '(1.088754)' return f""" -{xyz_type} {Y} = ({L} + 16.0) / 116.0; -{xyz_type} {X} = {Y} + {A} / 500.0; -{xyz_type} {Z} = {Y} - {B} / 200.0; +{'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0; +{'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0; +{'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0; {Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2}; {X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn}; diff --git a/xlib/avecl/_internal/op/reduce.py b/xlib/avecl/_internal/op/reduce.py index 6356eee..823f0c6 100644 --- a/xlib/avecl/_internal/op/reduce.py +++ b/xlib/avecl/_internal/op/reduce.py @@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False): mean = reduce_mean(input_t, axes, keepdims=True) return reduce_mean(square(input_t - mean), axes, keepdims) -def moments(input_t, axes=None, keepdims=False): +def moments(input_t, axes=None): """ Returns (mean, variance) of input_t @@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False): Iterable of ints. None - all axes - keepdims(False) keep reduced axes """ - mean = reduce_mean(input_t, axes, keepdims) - mean_shape_keepdims = mean._op.info.o_shape_kd - var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims) + mean = reduce_mean(input_t, axes, True) + var = reduce_mean(square(input_t - mean), axes, True) return mean, var def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor: diff --git a/xlib/avecl/_internal/op/slice_.py b/xlib/avecl/_internal/op/slice_.py index 2503d42..d692470 100644 --- a/xlib/avecl/_internal/op/slice_.py +++ b/xlib/avecl/_internal/op/slice_.py @@ -1,6 +1,9 @@ +from typing import List + import numpy as np from ..AShape import AShape +from ..AAxes import AAxes from ..backend import Kernel from ..HKernel import HKernel from ..HType import HType @@ -9,6 +12,29 @@ from ..SCacheton import SCacheton from ..Tensor import Tensor +def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]: + """ + + arguments + + input_t Tensor + + axis + + """ + shape = input_t.shape + + result = [] + for i in range(shape[axis]): + slices = [slice(None, None, None)]*shape.ndim + + slices[axis] = i if not keepdims else slice(i,i+1,1) + + result.append( slice_(input_t, slices) ) + + return result + + def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor: """ arguments: