update xlib.avecl

This commit is contained in:
iperov 2021-10-20 18:02:50 +04:00
commit 6da916cc66
14 changed files with 246 additions and 184 deletions

View file

@ -3,6 +3,7 @@ AveCL ! Make OpenCL great again.
Lightweight ndarray library using OpenCL 1.2 written in pure python. Lightweight ndarray library using OpenCL 1.2 written in pure python.
Applicable for high-performance general purpose n-dim array computations for every device that supports OpenCL 1.2. Applicable for high-performance general purpose n-dim array computations for every device that supports OpenCL 1.2.
Supports any dtype except float64.
Works in python 3.5+. Dependencies: numpy. Works in python 3.5+. Dependencies: numpy.

View file

@ -57,6 +57,19 @@ class AShape(Iterable):
def as_list(self) -> List[int]: def as_list(self) -> List[int]:
return list(self.shape) return list(self.shape)
def check_axis(self, axis : int) -> int:
"""
Check axis and returns normalized axis value
can raise ValueError
"""
if axis < 0:
axis += self.ndim
if axis < 0 or axis >= self.ndim:
raise ValueError(f'axis {axis} out of bound of ndim {self.ndim}')
return axis
def axes_arange(self) -> AAxes: def axes_arange(self) -> AAxes:
""" """
Returns tuple of axes arange. Returns tuple of axes arange.

View file

@ -15,12 +15,9 @@ class HKernel:
np.int64 : 'long', np.int64 : 'long',
np.uint64 : 'ulong', np.uint64 : 'ulong',
np.float16 : 'half', np.float16 : 'half',
np.float32 : 'float', np.float32 : 'float'
np.float64 : 'double'
} }
@staticmethod @staticmethod
def np_dtype_to_cl(dtype : np.dtype): def np_dtype_to_cl(dtype : np.dtype):
""" """
@ -134,7 +131,7 @@ class HKernel:
out += [f'#define {name_upper}_GLOBAL_STORE8(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)'] out += [f'#define {name_upper}_GLOBAL_STORE8(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
out += [f'#define {name_upper}_GLOBAL_STORE16(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)'] out += [f'#define {name_upper}_GLOBAL_STORE16(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
if dtype in [np.float32, np.float64]: if dtype in [np.float32]:
out += [f'#define {name_upper}_TO_FLOATX(x) x'] out += [f'#define {name_upper}_TO_FLOATX(x) x']
elif dtype in [np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32,np.uint32, np.float16]: elif dtype in [np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32,np.uint32, np.float16]:
out += [f'#define {name_upper}_TO_FLOATX(x) ((float)x)'] out += [f'#define {name_upper}_TO_FLOATX(x) ((float)x)']
@ -145,7 +142,10 @@ class HKernel:
@staticmethod @staticmethod
def define_ndim_idx(ndim): def define_ndim_idx(ndim):
""" """
define macro to calculate index for n-dim shape
example for ndim=3 example for ndim=3
#define NDIM3_IDX(t0,t1,t2,T0,T1,T2) (((size_t)(t0))*T1*T2+((size_t)(t1))*T2+((size_t)(t2))) #define NDIM3_IDX(t0,t1,t2,T0,T1,T2) (((size_t)(t0))*T1*T2+((size_t)(t1))*T2+((size_t)(t2)))
#define NDIM3_IDX_MOD(t0,t1,t2,T0,T1,T2) (((size_t)(t0) % T0)*T1*T2+((size_t)(t1) % T1)*T2+((size_t)(t2) % T2)) #define NDIM3_IDX_MOD(t0,t1,t2,T0,T1,T2) (((size_t)(t0) % T0)*T1*T2+((size_t)(t1) % T1)*T2+((size_t)(t2) % T2))
""" """
@ -165,14 +165,14 @@ class HKernel:
""" """
Returns a definitions for operations with tensor shape Returns a definitions for operations with tensor shape
example for 'O', (7,3), example for 'O', (2,3),
#define O0 7 #define O0 2
#define O1 3 #define O1 3
#define Om1 3 #define Om1 3
#define Om2 7 #define Om2 2
#define O_IDX(o0,o1) ( (size_t)(o0) )*3 +( o1 ) #define O_IDX(o0,o1) (((size_t)(o0))*3+((size_t)(o1)))
#define O_IDX_MOD(o0,o1) ( (size_t)(o0) % 7 )*3 +( (o1) % 3 ) #define O_IDX_MOD(o0,o1) (((size_t)(o0) % 2)*3+((size_t)(o1) % 3))
""" """
shape = tuple(shape) shape = tuple(shape)
ndim = len(shape) ndim = len(shape)
@ -183,36 +183,14 @@ class HKernel:
axes_symbols = "".join([str(i) for i in range(ndim)]) axes_symbols = "".join([str(i) for i in range(ndim)])
axes_symbols = axes_symbols.upper() axes_symbols = axes_symbols.upper()
out = [] out = [f'#define {name_upper}{axes_symbols[i]} {shape[i]}' for i in range(ndim)]
for i in range(ndim): out += [f'#define {name_upper}m{i} {shape[-i]}' for i in range(1,ndim+1)]
out += [f'#define {name_upper}{axes_symbols[i]} {shape[i]}']
for i in range(1,ndim+1): out += [f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) (' + \
out += [f'#define {name_upper}m{i} {shape[-i]}'] '+'.join([f'((size_t)({name_lower}{i}))' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
line = f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) ' out += [f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) (' + \
'+'.join([f'((size_t)({name_lower}{i}) % {shape[i]})' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
for i in range(ndim):
line += f'( (size_t)({name_lower}{i}) )'
for j in range(i+1,ndim):
line += f'*{shape[j]} '
if i != ndim-1:
line += '+'
out += [line]
line = f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) '
for i in range(ndim):
line += f'( (size_t)({name_lower}{i}) % {shape[i]} )'
for j in range(i+1,ndim):
line += f'*{shape[j]} '
if i != ndim-1:
line += '+'
out += [line,'']
return '\n'.join(out) return '\n'.join(out)

View file

@ -3,10 +3,10 @@ from typing import Iterable, List
import numpy as np import numpy as np
scalar_types = [int, float, np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64, scalar_types = [int, float, np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
np.float16, np.float32, np.float64, np.bool_] np.float16, np.float32, np.bool_]
np_scalar_types = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64, np_scalar_types = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
np.float16, np.float32, np.float64, np.bool_] np.float16, np.float32, np.bool_]
_np_dtype_to_cl = { _np_dtype_to_cl = {
np.bool_ : 'bool', np.bool_ : 'bool',
@ -20,7 +20,6 @@ _np_dtype_to_cl = {
np.int64 : 'long', np.int64 : 'long',
np.float16 : 'half', np.float16 : 'half',
np.float32 : 'float', np.float32 : 'float',
np.float64 : 'double',
} }
_np_dtype_weight = { _np_dtype_weight = {
@ -34,8 +33,7 @@ _np_dtype_weight = {
np.uint64 : 8, np.uint64 : 8,
np.int64 : 9, np.int64 : 9,
np.float16 : 10, np.float16 : 10,
np.float32 : 11, np.float32 : 11
np.float64 : 12,
} }
class HType: class HType:

View file

@ -1,14 +1,13 @@
import traceback
import numpy as np import numpy as np
from .HType import HType
from .NCore import NCore
from .backend import get_device, get_default_device, set_default_device
from .Tensor import Tensor
from . import op from . import op
from .initializer import InitRandomUniform, InitCoords2DArange from .backend import get_default_device, get_device, set_default_device
from .HType import HType
from .info import Conv2DInfo from .info import Conv2DInfo
from .initializer import InitCoords2DArange, InitRandomUniform
from .NCore import NCore
from .Tensor import Tensor
class NTest(): class NTest():
@ -45,6 +44,7 @@ class NTest():
binary_dilate_circle_test, binary_dilate_circle_test,
binary_morph_test, binary_morph_test,
cvt_color_test, cvt_color_test,
rct_test,
] ]
for test_func in test_funcs: for test_func in test_funcs:
@ -62,18 +62,39 @@ class NTest():
def _all_close(x,y, atol=1, btol=1): def _all_close(x,y, atol=1, btol=1):
return np.allclose( np.ndarray.flatten(x[None,...]), np.ndarray.flatten(y[None,...]), atol, btol ) return np.allclose( np.ndarray.flatten(x[None,...]), np.ndarray.flatten(y[None,...]), atol, btol )
def rct_test():
for _ in range(10):
for dtype in [np.float16, np.float32]:
base_shape = list(np.random.randint(1, 8, size=4) )
shape = base_shape.copy()
shape[1] = 3
mask_shape = base_shape.copy()
mask_shape[1] = 3
print(f'rct {shape} {str(np.dtype(dtype).name)} ... ', end='', flush=True)
source_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
target_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
mask_t = Tensor(shape=mask_shape, dtype=dtype, initializer=InitRandomUniform())
result_t = op.rct(target_t, source_t, target_mask_t=mask_t, source_mask_t=mask_t )
print('pass')
def cvt_color_test(): def cvt_color_test():
for _ in range(10): for _ in range(10):
for shape_len in range(2,6): for shape_len in range(2,6):
for in_mode in ['RGB','BGR','XYZ','LAB']: for in_mode in ['RGB','BGR','XYZ','LAB']:
for out_mode in ['RGB','BGR','XYZ','LAB']: for out_mode in ['RGB','BGR','XYZ','LAB']:
for dtype in [np.float16, np.float32, np.float64]: for dtype in [np.float16, np.float32]:
shape = list(np.random.randint(1, 8, size=shape_len) ) shape = list(np.random.randint(1, 8, size=shape_len) )
ch_axis = np.random.randint(len(shape)) ch_axis = np.random.randint(len(shape))
shape[ch_axis] = 3 shape[ch_axis] = 3
print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='') print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='', flush=True)
inp_n = np.random.uniform(size=shape ).astype(dtype) inp_n = np.random.uniform(size=shape ).astype(dtype)
inp_t = Tensor.from_value(inp_n) inp_t = Tensor.from_value(inp_n)
@ -81,7 +102,9 @@ def cvt_color_test():
out_t = op.cvt_color(inp_t, in_mode=in_mode, out_mode=out_mode, ch_axis=ch_axis) out_t = op.cvt_color(inp_t, in_mode=in_mode, out_mode=out_mode, ch_axis=ch_axis)
inp_t2 = op.cvt_color(out_t, in_mode=out_mode, out_mode=in_mode, ch_axis=ch_axis) inp_t2 = op.cvt_color(out_t, in_mode=out_mode, out_mode=in_mode, ch_axis=ch_axis)
if not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1): is_check = in_mode in ['RGB','BGR','XYZ'] and out_mode in ['XYZ','LAB']
if is_check and not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1):
raise Exception(f'data is not equal') raise Exception(f'data is not equal')
print('pass') print('pass')
@ -91,7 +114,7 @@ def cast_test():
for out_dtype in HType.get_np_scalar_types(): for out_dtype in HType.get_np_scalar_types():
shape = tuple(np.random.randint(1, 8, size=( np.random.randint(1,5))) ) shape = tuple(np.random.randint(1, 8, size=( np.random.randint(1,5))) )
print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='') print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='', flush=True)
val_n = np.random.uniform( -64, 64, size=shape ).astype(in_dtype) val_n = np.random.uniform( -64, 64, size=shape ).astype(in_dtype)
cast_n = val_n.astype(out_dtype) cast_n = val_n.astype(out_dtype)
@ -113,7 +136,7 @@ def binary_morph_test():
input_n = np.random.randint( 2, size=shape ).astype(dtype) input_n = np.random.randint( 2, size=shape ).astype(dtype)
input_t = Tensor.from_value(input_n) input_t = Tensor.from_value(input_n)
print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='') print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='', flush=True)
op.binary_morph(input_t, erode_dilate=erode_dilate, blur=blur, fade_to_border=True) op.binary_morph(input_t, erode_dilate=erode_dilate, blur=blur, fade_to_border=True)
@ -130,7 +153,7 @@ def binary_erode_circle_test():
input_n = np.random.randint( 2, size=shape ).astype(dtype) input_n = np.random.randint( 2, size=shape ).astype(dtype)
input_t = Tensor.from_value(input_n) input_t = Tensor.from_value(input_n)
print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='') print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
op.binary_erode_circle(input_t, radius=radius, iterations=iterations) op.binary_erode_circle(input_t, radius=radius, iterations=iterations)
@ -147,7 +170,7 @@ def binary_dilate_circle_test():
input_n = np.random.randint( 2, size=shape ).astype(dtype) input_n = np.random.randint( 2, size=shape ).astype(dtype)
input_t = Tensor.from_value(input_n) input_t = Tensor.from_value(input_n)
print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='') print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
op.binary_dilate_circle(input_t, radius=radius, iterations=iterations) op.binary_dilate_circle(input_t, radius=radius, iterations=iterations)
@ -156,11 +179,11 @@ def binary_dilate_circle_test():
def gaussian_blur_test(): def gaussian_blur_test():
for shape_len in range(2,5): for shape_len in range(2,5):
for dtype in [np.float16, np.float32, np.float64]: for dtype in [np.float16, np.float32]:
shape = np.random.randint( 1, 64, size=(shape_len,) ) shape = np.random.randint( 1, 64, size=(shape_len,) )
sigma = np.random.rand() * 10 sigma = np.random.rand() * 10
print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='') print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
val_t = Tensor.from_value(val_n) val_t = Tensor.from_value(val_n)
@ -179,7 +202,7 @@ def pad_test():
paddings = tuple( (np.random.randint(8), np.random.randint(8)) for i in range(len(shape)) ) paddings = tuple( (np.random.randint(8), np.random.randint(8)) for i in range(len(shape)) )
print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='') print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
pad_n = np.pad(val_n, paddings, mode=mode) pad_n = np.pad(val_n, paddings, mode=mode)
@ -187,7 +210,7 @@ def pad_test():
val_t = Tensor.from_value(val_n) val_t = Tensor.from_value(val_n)
pad_t = op.pad(val_t, paddings, mode=mode) pad_t = op.pad(val_t, paddings, mode=mode)
print(f'{pad_n.shape} == {pad_t.shape} ... ', end='') print(f'{pad_n.shape} == {pad_t.shape} ... ', end='', flush=True)
if pad_n.shape != pad_t.shape: if pad_n.shape != pad_t.shape:
raise Exception(f'shape is not equal') raise Exception(f'shape is not equal')
@ -241,7 +264,7 @@ def slice_set_test():
shape = tuple(shape) shape = tuple(shape)
slices = tuple(slices) slices = tuple(slices)
print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='') print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
val_t = Tensor.from_value(val_n) val_t = Tensor.from_value(val_n)
@ -330,7 +353,7 @@ def depthwise_conv2d_test():
input_shape = (n, ic, ih, iw) input_shape = (n, ic, ih, iw)
kernel_shape = (ic, ks, ks) kernel_shape = (ic, ks, ks)
print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='') print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='', flush=True)
input_n = np.random.randint( 64, size=input_shape ).astype(dtype) input_n = np.random.randint( 64, size=input_shape ).astype(dtype)
kernel_n = np.ones(shape=kernel_shape ).astype(dtype) kernel_n = np.ones(shape=kernel_shape ).astype(dtype)
@ -358,7 +381,7 @@ def warp_affine_test():
H = np.random.randint(8, 64) H = np.random.randint(8, 64)
W = np.random.randint(8, 64) W = np.random.randint(8, 64)
print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='') print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) ) input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
@ -380,7 +403,7 @@ def remap_np_affine_test():
H = np.random.randint(8, 64) H = np.random.randint(8, 64)
W = np.random.randint(8, 64) W = np.random.randint(8, 64)
print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='') print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) ) input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
@ -402,7 +425,7 @@ def remap_test():
H = np.random.randint(8, 64) H = np.random.randint(8, 64)
W = np.random.randint(8, 64) W = np.random.randint(8, 64)
print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='') print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) ) input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
@ -422,7 +445,7 @@ def tile_test():
shape = tuple(np.random.randint( 8, size=(shape_len,) )+1) shape = tuple(np.random.randint( 8, size=(shape_len,) )+1)
tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1) tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1)
print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='') print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
tiled_n = np.tile(val_n, tiles) tiled_n = np.tile(val_n, tiles)
@ -430,7 +453,7 @@ def tile_test():
val_t = Tensor.from_value(val_n) val_t = Tensor.from_value(val_n)
tiled_t = op.tile(val_t, tiles) tiled_t = op.tile(val_t, tiles)
print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='') print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='', flush=True)
if tiled_n.shape != tiled_t.shape: if tiled_n.shape != tiled_t.shape:
raise Exception(f'shape is not equal') raise Exception(f'shape is not equal')
@ -448,7 +471,7 @@ def stack_test():
axis = np.random.randint(shape_len+1) axis = np.random.randint(shape_len+1)
stack_count = np.random.randint(4)+1 stack_count = np.random.randint(4)+1
print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='') print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='', flush=True)
vals_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for i in range(stack_count) ] vals_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for i in range(stack_count) ]
stack_n = np.stack(vals_n, axis) stack_n = np.stack(vals_n, axis)
@ -456,7 +479,7 @@ def stack_test():
vals_t = [ Tensor.from_value(vals_n[i]) for i in range(stack_count) ] vals_t = [ Tensor.from_value(vals_n[i]) for i in range(stack_count) ]
stack_t = op.stack(vals_t, axis) stack_t = op.stack(vals_t, axis)
print(f'{stack_n.shape} == {stack_t.shape} ... ', end='') print(f'{stack_n.shape} == {stack_t.shape} ... ', end='', flush=True)
if stack_n.shape != stack_t.shape: if stack_n.shape != stack_t.shape:
raise Exception('shape is not equal') raise Exception('shape is not equal')
@ -483,9 +506,9 @@ def reduce_test():
keepdims = np.random.randint(2) == 0 keepdims = np.random.randint(2) == 0
print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='') print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='', flush=True)
if dtype in [np.float16, np.float32, np.float64]: if dtype in [np.float16, np.float32]:
value_n = np.random.uniform(size=shape).astype(dtype) value_n = np.random.uniform(size=shape).astype(dtype)
else: else:
value_n = np.random.randint( max(1, int(np.iinfo(dtype).max / np.prod(shape)) ), size=shape, dtype=dtype ) value_n = np.random.randint( max(1, int(np.iinfo(dtype).max / np.prod(shape)) ), size=shape, dtype=dtype )
@ -518,7 +541,7 @@ def InitRandomUniform_test():
for shape_len in range(1, 5): for shape_len in range(1, 5):
shape = np.random.randint( 8, size=(shape_len,) )+1 shape = np.random.randint( 8, size=(shape_len,) )+1
print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='') print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
Tensor(shape, dtype, initializer=InitRandomUniform()).np() Tensor(shape, dtype, initializer=InitRandomUniform()).np()
@ -534,7 +557,7 @@ def InitCoords2DArange_test():
w_start = np.random.randint(80) w_start = np.random.randint(80)
w_stop = w_start + np.random.randint(80) w_stop = w_start + np.random.randint(80)
print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='') print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
Tensor(shape, dtype, initializer=InitCoords2DArange(h_start,h_stop,w_start,w_stop )).np() Tensor(shape, dtype, initializer=InitCoords2DArange(h_start,h_stop,w_start,w_stop )).np()
@ -551,17 +574,17 @@ def concat_test():
for i,dim in enumerate(shape) ) for i,dim in enumerate(shape) )
for shape in ([shape] * count) ) for shape in ([shape] * count) )
print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='') print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='', flush=True)
V_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for shape in shapes ] V_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for shape in shapes ]
O_n = np.concatenate(V_n, axis) O_n = np.concatenate(V_n, axis)
print(f'{O_n.shape} == ', end='') print(f'{O_n.shape} == ', end='', flush=True)
V_t = [ Tensor.from_value(V_n[i]) for i in range(count) ] V_t = [ Tensor.from_value(V_n[i]) for i in range(count) ]
O_t = op.concat(V_t, axis) O_t = op.concat(V_t, axis)
print(f'{O_t.shape} ... ', end='') print(f'{O_t.shape} ... ', end='', flush=True)
if O_n.shape != O_t.shape: if O_n.shape != O_t.shape:
raise Exception('shape is not equal') raise Exception('shape is not equal')
@ -596,19 +619,19 @@ def matmul_test():
A_shape = (BATCH, M, K) A_shape = (BATCH, M, K)
B_shape = (BATCH, K, N) B_shape = (BATCH, K, N)
print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='') print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='', flush=True)
A_n = np.random.randint( 2**4, size=A_shape ).astype(dtype) A_n = np.random.randint( 2**4, size=A_shape ).astype(dtype)
B_n = np.random.randint( 2**4, size=B_shape ).astype(dtype) B_n = np.random.randint( 2**4, size=B_shape ).astype(dtype)
O_n = np.matmul(A_n, B_n) O_n = np.matmul(A_n, B_n)
print(f'{O_n.shape} == ', end='') print(f'{O_n.shape} == ', end='', flush=True)
A_t = Tensor.from_value(A_n) A_t = Tensor.from_value(A_n)
B_t = Tensor.from_value(B_n) B_t = Tensor.from_value(B_n)
O_t = op.matmul(A_t, B_t) O_t = op.matmul(A_t, B_t)
print(f'{O_t.shape} ... ', end='') print(f'{O_t.shape} ... ', end='', flush=True)
if O_n.shape != O_t.shape: if O_n.shape != O_t.shape:
raise Exception('shape is not equal') raise Exception('shape is not equal')
@ -659,17 +682,17 @@ def slice_test():
shape = tuple(shape) shape = tuple(shape)
slices = tuple(slices) slices = tuple(slices)
print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='') print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
sliced_n = val_n[slices] sliced_n = val_n[slices]
print(f'{sliced_n.shape} ... ', end='') print(f'{sliced_n.shape} ... ', end='', flush=True)
sliced_t = Tensor.from_value(val_n)[slices] sliced_t = Tensor.from_value(val_n)[slices]
print(f'{sliced_t.shape} ... ', end='') print(f'{sliced_t.shape} ... ', end='', flush=True)
if 0 in sliced_n.shape: if 0 in sliced_n.shape:
# some cases like 0:1:-1 will produce zero shape and invalid array on numpy # some cases like 0:1:-1 will produce zero shape and invalid array on numpy
@ -694,17 +717,17 @@ def transpose_test():
axes_order = np.array([*range(shape_len)]) axes_order = np.array([*range(shape_len)])
np.random.shuffle(axes_order) np.random.shuffle(axes_order)
print(f'transpose: {shape} {axes_order} ... ', end='') print(f'transpose: {shape} {axes_order} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype) val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
transposed_n = np.transpose(val_n, axes_order) transposed_n = np.transpose(val_n, axes_order)
print(f'{transposed_n.shape} ... ', end='') print(f'{transposed_n.shape} ... ', end='', flush=True)
val_t = Tensor.from_value(val_n) val_t = Tensor.from_value(val_n)
transposed_t = op.transpose (val_t, axes_order ) transposed_t = op.transpose (val_t, axes_order )
print(f'{transposed_t.shape} ... ', end='') print(f'{transposed_t.shape} ... ', end='', flush=True)
if transposed_n.shape != transposed_t.shape: if transposed_n.shape != transposed_t.shape:
raise Exception('shape is not equal') raise Exception('shape is not equal')
@ -736,7 +759,7 @@ def any_wise_op_test():
shapes = shapes[::-1] shapes = shapes[::-1]
a_shape, b_shape = shapes a_shape, b_shape = shapes
print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='') print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='', flush=True)
a_n = np.random.randint( 1, 2**8, size=a_shape ).astype(dtype) a_n = np.random.randint( 1, 2**8, size=a_shape ).astype(dtype)
b_n = np.random.randint( 1, 2**8, size=b_shape ).astype(dtype) b_n = np.random.randint( 1, 2**8, size=b_shape ).astype(dtype)

View file

@ -109,6 +109,7 @@ class Tensor:
def min(self, axes=None, keepdims=False) -> 'Tensor': ... def min(self, axes=None, keepdims=False) -> 'Tensor': ...
def reshape(self, new_shape) -> 'Tensor': ... def reshape(self, new_shape) -> 'Tensor': ...
def sum(self, axes=None, keepdims=False) -> 'Tensor': ... def sum(self, axes=None, keepdims=False) -> 'Tensor': ...
def std(self, axes=None, keepdims=False) -> 'Tensor': ...
def transpose(self, axes_order, op_text=None, dtype=None) -> 'Tensor': ... def transpose(self, axes_order, op_text=None, dtype=None) -> 'Tensor': ...
@property @property

View file

@ -70,6 +70,7 @@ Tensor.mean = reduce_mean
Tensor.min = reduce_min Tensor.min = reduce_min
Tensor.reshape = reshape Tensor.reshape = reshape
Tensor.sum = reduce_sum Tensor.sum = reduce_sum
Tensor.std = reduce_std
Tensor.transpose = transpose Tensor.transpose = transpose
class TensorRef(Tensor): class TensorRef(Tensor):

View file

@ -18,8 +18,7 @@ _np_dtype_to_cl = { np.uint8: CL.cl_uchar,
np.uint64: CL.cl_ulong, np.uint64: CL.cl_ulong,
np.int64: CL.cl_long, np.int64: CL.cl_long,
np.float16: CL.cl_half, np.float16: CL.cl_half,
np.float32: CL.cl_float, np.float32: CL.cl_float}
np.float64: CL.cl_double }
_opencl_device_ids = None _opencl_device_ids = None
_default_device = None _default_device = None

View file

@ -38,8 +38,6 @@ class InitRandomUniform(Initializer):
gen_expression = f'hash_ulong_from_ulong(gid+seed64) % {int(hl)} + {int(l)}' gen_expression = f'hash_ulong_from_ulong(gid+seed64) % {int(hl)} + {int(l)}'
elif tensor.dtype in [np.float16, np.float32]: elif tensor.dtype in [np.float16, np.float32]:
gen_expression = f'hash_float_from_uint(gid+seed32)*{hl} + {l}' gen_expression = f'hash_float_from_uint(gid+seed32)*{hl} + {l}'
elif tensor.dtype in [np.float64]:
gen_expression = f'hash_double_from_ulong(gid+seed64)*{hl} + {l}'
kernel = Kernel(kernel_text=f""" kernel = Kernel(kernel_text=f"""
{HKernel.include_hash()} {HKernel.include_hash()}

View file

@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D
from .gaussian_blur import gaussian_blur from .gaussian_blur import gaussian_blur
from .matmul import matmul, matmulc from .matmul import matmul, matmulc
from .pad import pad from .pad import pad
from .rct import rct
from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std, from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
reduce_sum, reduce_variance) reduce_sum, reduce_variance)
from .remap import remap from .remap import remap
from .remap_np_affine import remap_np_affine from .remap_np_affine import remap_np_affine
from .reshape import reshape from .reshape import reshape
from .slice_ import slice_ from .slice_ import slice_, split
from .slice_set import slice_set from .slice_set import slice_set
from .stack import stack from .stack import stack
from .tile import tile from .tile import tile

View file

@ -1,27 +1,31 @@
import numpy as np import numpy as np
from ..AAxes import AAxes
from ..AShape import AShape from ..AShape import AShape
from ..backend import Kernel from ..backend import Kernel
from ..HArgs import HArgs from ..HArgs import HArgs
from ..HKernel import HKernel from ..HKernel import HKernel
from ..HType import HType from ..HType import HType
from ..info import BroadcastInfo from ..info import BroadcastInfo, ReductionInfo
from ..SCacheton import SCacheton from ..SCacheton import SCacheton
from ..Tensor import Tensor from ..Tensor import Tensor
def any_wise(op_text : str, def any_wise(op_text : str,
*args, *args,
dim_wise_axis : int = None,
dtype : np.dtype = None, dtype : np.dtype = None,
output_t:Tensor=None) -> Tensor: output_t:Tensor=None) -> Tensor:
""" """
operator for N-wise ops with N inputs elements-wise operator with N inputs
arguments arguments
op_text example: O=(2*I0*I1)+I2 op_text example: O=(2*I0*I1)+I2
*args List[ Tensor | number ] *args List[ Tensor | number ]
dim_wise_axis(None)
dtype dtype
output_t compute result to this Tensor. output_t compute result to this Tensor.
@ -33,7 +37,7 @@ def any_wise(op_text : str,
shape_list, dtype_list, krn_args = HArgs.decompose(args) shape_list, dtype_list, krn_args = HArgs.decompose(args)
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text) op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text)
if output_t is None: if output_t is None:
output_t = Tensor ( op.o_shape, op.o_dtype, device=device ) output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
@ -45,59 +49,60 @@ def any_wise(op_text : str,
return output_t return output_t
class _AnyWiseOp: class _AnyWiseOp:
def __init__(self, shape_list, dtype_list, o_dtype, op_text : str): def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str):
if len(shape_list) != len(dtype_list): if len(shape_list) != len(dtype_list):
raise ValueError('len(shape_list) != len(dtype_list)') raise ValueError('len(shape_list) != len(dtype_list)')
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list) self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
if len(shape_list) == 1:
# element-wise.
i_shape, i_dtype = shape_list[0], dtype_list[0]
self.o_shape = o_shape = i_shape
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
{HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor('IN', i_shape, i_dtype)}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME)
{{
size_t gid = get_global_id(0);
O_TYPE O = O_GLOBAL_LOAD(gid);
IN_TYPE I0 = IN_GLOBAL_LOAD(gid);
{op_text};
O_GLOBAL_STORE(gid, O);
}}
""")
else:
# Multi arg.
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ]) self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
self.o_shape = o_shape = info.o_shape self.o_shape = o_shape = info.o_shape
g_shape = o_shape
if dim_wise_axis is not None:
dim_wise_axis = o_shape.check_axis(dim_wise_axis)
dim_wise_axis_size = o_shape[dim_wise_axis]
if dim_wise_axis_size > 16:
raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}')
g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape
defs, arg_defs, impls = [], [], [] defs, arg_defs, impls = [], [], []
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)): for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
t_name = f'I{i}' t_name = f'I{i}'
if t_shape is not None: if t_shape is not None:
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) ) defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" ) arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));")
if dim_wise_axis is not None:
for i_elem in range(dim_wise_axis_size):
impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));")
else:
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));")
else: else:
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" ) arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls) defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f""" if dim_wise_axis is not None:
o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) )
o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) )
else:
o_def = 'O_TYPE O;'
o_store = 'O_GLOBAL_STORE(gid, O);'
self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f"""
{defs} {defs}
{HKernel.define_tensor('O', o_shape, o_dtype)} {HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor_shape('G', g_shape)}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs}) __kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
{{ {{
size_t gid = get_global_id(0); size_t gid = get_global_id(0);
{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)} {HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)}
{impls} {impls}
O_TYPE O; {o_def}
{op_text}; {op_text};
O_GLOBAL_STORE(gid, O); {o_store}
}} }}
""") """)

View file

@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype
return output_t return output_t
_allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB'] _allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
_allowed_dtypes = [np.float16, np.float32, np.float64] _allowed_dtypes = [np.float16, np.float32]
class _CvtColor32Op(): class _CvtColor32Op():
def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis): def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
@ -100,54 +100,74 @@ class _CvtColor32Op():
self.forward_krn = krn self.forward_krn = krn
@staticmethod @staticmethod
def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str: def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str:
return f""" return f"""
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')} {_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)}
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)} {_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)}
""" """
@staticmethod @staticmethod
def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str: def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str:
return f""" return f"""
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')} {_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)}
{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)} {_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)}
""" """
@staticmethod @staticmethod
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str: def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
return f""" return f"""
{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B})); {R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92);
{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B})); {G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92);
{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B})); {B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92);
"""
@staticmethod {_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) }
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str:
return f"""
{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z}));
{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y}, 0.0415560*{Z}));
{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y}, 1.0572252*{Z}));
""" """
@staticmethod @staticmethod
def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str: def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
return f""" return f"""
{bgr_type} {b} = {R}; {'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423;
{bgr_type} {g} = {G}; {'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169;
{bgr_type} {r} = {B}; {'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227;
""" """
@staticmethod @staticmethod
def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str: def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
return f""" return f"""
{rgb_type} {r} = {B}; {_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) }
{rgb_type} {g} = {G}; {R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92);
{rgb_type} {b} = {R}; {G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92);
{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92);
""" """
@staticmethod @staticmethod
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str: def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715 + {Z}*-0.498535, 0.0, 1.0 );
{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 );
{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 );
"""
@staticmethod
def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {b} = {R};
{'float' if declare_out_type else ''} {g} = {G};
{'float' if declare_out_type else ''} {r} = {B};
"""
@staticmethod
def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {r} = {B};
{'float' if declare_out_type else ''} {g} = {G};
{'float' if declare_out_type else ''} {b} = {R};
"""
@staticmethod
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str:
beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))' beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
xyz_xn = '(0.9556)' xyz_xn = '(0.950456)'
xyz_zn = '(1.088754)' xyz_zn = '(1.088754)'
return f""" return f"""
{X} /= {xyz_xn}; {X} /= {xyz_xn};
@ -157,20 +177,20 @@ class _CvtColor32Op():
{Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0); {Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
{Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0); {Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);
{lab_type} {L} = 116.0*{Y}-16.0; {'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0;
{lab_type} {A} = 500.0*({X}-{Y}); {'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y});
{lab_type} {B} = 200.0*({Y}-{Z}); {'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z});
""" """
@staticmethod @staticmethod
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str: def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str:
beta = '(6.0/29.0)' beta = '(6.0/29.0)'
beta2 = '((6.0/29.0)*(6.0/29.0))' beta2 = '((6.0/29.0)*(6.0/29.0))'
xyz_xn = '(0.9556)' xyz_xn = '(0.950456)'
xyz_zn = '(1.088754)' xyz_zn = '(1.088754)'
return f""" return f"""
{xyz_type} {Y} = ({L} + 16.0) / 116.0; {'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0;
{xyz_type} {X} = {Y} + {A} / 500.0; {'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0;
{xyz_type} {Z} = {Y} - {B} / 200.0; {'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0;
{Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2}; {Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
{X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn}; {X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};

View file

@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False):
mean = reduce_mean(input_t, axes, keepdims=True) mean = reduce_mean(input_t, axes, keepdims=True)
return reduce_mean(square(input_t - mean), axes, keepdims) return reduce_mean(square(input_t - mean), axes, keepdims)
def moments(input_t, axes=None, keepdims=False): def moments(input_t, axes=None):
""" """
Returns (mean, variance) of input_t Returns (mean, variance) of input_t
@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False):
Iterable of ints. Iterable of ints.
None - all axes None - all axes
keepdims(False) keep reduced axes
""" """
mean = reduce_mean(input_t, axes, keepdims) mean = reduce_mean(input_t, axes, True)
mean_shape_keepdims = mean._op.info.o_shape_kd var = reduce_mean(square(input_t - mean), axes, True)
var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
return mean, var return mean, var
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor: def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:

View file

@ -1,6 +1,9 @@
from typing import List
import numpy as np import numpy as np
from ..AShape import AShape from ..AShape import AShape
from ..AAxes import AAxes
from ..backend import Kernel from ..backend import Kernel
from ..HKernel import HKernel from ..HKernel import HKernel
from ..HType import HType from ..HType import HType
@ -9,6 +12,29 @@ from ..SCacheton import SCacheton
from ..Tensor import Tensor from ..Tensor import Tensor
def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]:
"""
arguments
input_t Tensor
axis
"""
shape = input_t.shape
result = []
for i in range(shape[axis]):
slices = [slice(None, None, None)]*shape.ndim
slices[axis] = i if not keepdims else slice(i,i+1,1)
result.append( slice_(input_t, slices) )
return result
def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor: def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
""" """
arguments: arguments: