update xlib.avecl

This commit is contained in:
iperov 2021-10-20 18:02:50 +04:00
commit 6da916cc66
14 changed files with 246 additions and 184 deletions

View file

@ -3,6 +3,7 @@ AveCL ! Make OpenCL great again.
Lightweight ndarray library using OpenCL 1.2 written in pure python.
Applicable for high-performance general purpose n-dim array computations for every device that supports OpenCL 1.2.
Supports any dtype except float64.
Works in python 3.5+. Dependencies: numpy.

View file

@ -15,7 +15,7 @@ class AShape(Iterable):
shape AShape
Iterable
AShape cannot be scalar shape, thus minimal AShape is (1,)
can raise ValueError during the construction
@ -50,13 +50,26 @@ class AShape(Iterable):
self.size = size
else:
raise ValueError('Invalid type to create AShape')
def copy(self) -> 'AShape':
return AShape(self)
def as_list(self) -> List[int]:
return list(self.shape)
def check_axis(self, axis : int) -> int:
"""
Check axis and returns normalized axis value
can raise ValueError
"""
if axis < 0:
axis += self.ndim
if axis < 0 or axis >= self.ndim:
raise ValueError(f'axis {axis} out of bound of ndim {self.ndim}')
return axis
def axes_arange(self) -> AAxes:
"""
Returns tuple of axes arange.
@ -64,7 +77,7 @@ class AShape(Iterable):
Example (0,1,2) for ndim 3
"""
return AAxes(range(self.ndim))
def replaced_axes(self, axes, dims) -> 'AShape':
"""
returns new AShape where axes replaced with new dims
@ -76,22 +89,22 @@ class AShape(Iterable):
axis = ndim + axis
if axis < 0 or axis >= ndim:
raise ValueError(f'invalid axis value {axis}')
new_shape[axis] = dim
return AShape(new_shape)
def split(self, axis) -> Tuple['AShape', 'AShape']:
"""
split AShape at specified axis
returns two AShape before+exclusive and inclusive+after
returns two AShape before+exclusive and inclusive+after
"""
if axis < 0:
axis = self.ndim + axis
if axis < 0 or axis >= self.ndim:
raise ValueError(f'invalid axis value {axis}')
return self[:axis], self[axis:]
def transpose_by_axes(self, axes) -> 'AShape':

View file

@ -15,12 +15,9 @@ class HKernel:
np.int64 : 'long',
np.uint64 : 'ulong',
np.float16 : 'half',
np.float32 : 'float',
np.float64 : 'double'
np.float32 : 'float'
}
@staticmethod
def np_dtype_to_cl(dtype : np.dtype):
"""
@ -134,30 +131,33 @@ class HKernel:
out += [f'#define {name_upper}_GLOBAL_STORE8(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
out += [f'#define {name_upper}_GLOBAL_STORE16(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
if dtype in [np.float32, np.float64]:
if dtype in [np.float32]:
out += [f'#define {name_upper}_TO_FLOATX(x) x']
elif dtype in [np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32,np.uint32, np.float16]:
out += [f'#define {name_upper}_TO_FLOATX(x) ((float)x)']
elif dtype in [np.int64,np.uint64]:
out += [f'#define {name_upper}_TO_FLOATX(x) ((double)x)']
return '\n'.join(out)
@staticmethod
def define_ndim_idx(ndim):
"""
define macro to calculate index for n-dim shape
example for ndim=3
#define NDIM3_IDX(t0,t1,t2,T0,T1,T2) (((size_t)(t0))*T1*T2+((size_t)(t1))*T2+((size_t)(t2)))
#define NDIM3_IDX_MOD(t0,t1,t2,T0,T1,T2) (((size_t)(t0) % T0)*T1*T2+((size_t)(t1) % T1)*T2+((size_t)(t2) % T2))
"""
out = [f'#define NDIM{ndim}_IDX(' + \
','.join([f't{i}' for i in range(ndim)] + [f'T{i}' for i in range(ndim)]) + \
') (' + '+'.join([f'((size_t)(t{i}))' + ''.join(f'*T{j}' for j in range(i+1,ndim)) for i in range(ndim) ]) + ')']
out +=[f'#define NDIM{ndim}_IDX_MOD(' + \
','.join([f't{i}' for i in range(ndim)] + [f'T{i}' for i in range(ndim)]) + \
') (' + '+'.join([f'((size_t)(t{i}) % T{i})' + ''.join(f'*T{j}' for j in range(i+1,ndim)) for i in range(ndim) ]) + ')']
return '\n'.join(out)
@staticmethod
@ -165,14 +165,14 @@ class HKernel:
"""
Returns a definitions for operations with tensor shape
example for 'O', (7,3),
example for 'O', (2,3),
#define O0 7
#define O0 2
#define O1 3
#define Om1 3
#define Om2 7
#define O_IDX(o0,o1) ( (size_t)(o0) )*3 +( o1 )
#define O_IDX_MOD(o0,o1) ( (size_t)(o0) % 7 )*3 +( (o1) % 3 )
#define Om2 2
#define O_IDX(o0,o1) (((size_t)(o0))*3+((size_t)(o1)))
#define O_IDX_MOD(o0,o1) (((size_t)(o0) % 2)*3+((size_t)(o1) % 3))
"""
shape = tuple(shape)
ndim = len(shape)
@ -183,36 +183,14 @@ class HKernel:
axes_symbols = "".join([str(i) for i in range(ndim)])
axes_symbols = axes_symbols.upper()
out = []
for i in range(ndim):
out += [f'#define {name_upper}{axes_symbols[i]} {shape[i]}']
out = [f'#define {name_upper}{axes_symbols[i]} {shape[i]}' for i in range(ndim)]
out += [f'#define {name_upper}m{i} {shape[-i]}' for i in range(1,ndim+1)]
for i in range(1,ndim+1):
out += [f'#define {name_upper}m{i} {shape[-i]}']
out += [f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) (' + \
'+'.join([f'((size_t)({name_lower}{i}))' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
line = f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) '
for i in range(ndim):
line += f'( (size_t)({name_lower}{i}) )'
for j in range(i+1,ndim):
line += f'*{shape[j]} '
if i != ndim-1:
line += '+'
out += [line]
line = f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) '
for i in range(ndim):
line += f'( (size_t)({name_lower}{i}) % {shape[i]} )'
for j in range(i+1,ndim):
line += f'*{shape[j]} '
if i != ndim-1:
line += '+'
out += [line,'']
out += [f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) (' + \
'+'.join([f'((size_t)({name_lower}{i}) % {shape[i]})' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
return '\n'.join(out)

View file

@ -3,10 +3,10 @@ from typing import Iterable, List
import numpy as np
scalar_types = [int, float, np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
np.float16, np.float32, np.float64, np.bool_]
np.float16, np.float32, np.bool_]
np_scalar_types = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
np.float16, np.float32, np.float64, np.bool_]
np.float16, np.float32, np.bool_]
_np_dtype_to_cl = {
np.bool_ : 'bool',
@ -20,7 +20,6 @@ _np_dtype_to_cl = {
np.int64 : 'long',
np.float16 : 'half',
np.float32 : 'float',
np.float64 : 'double',
}
_np_dtype_weight = {
@ -34,8 +33,7 @@ _np_dtype_weight = {
np.uint64 : 8,
np.int64 : 9,
np.float16 : 10,
np.float32 : 11,
np.float64 : 12,
np.float32 : 11
}
class HType:

View file

@ -1,14 +1,13 @@
import traceback
import numpy as np
from .HType import HType
from .NCore import NCore
from .backend import get_device, get_default_device, set_default_device
from .Tensor import Tensor
from . import op
from .initializer import InitRandomUniform, InitCoords2DArange
from .backend import get_default_device, get_device, set_default_device
from .HType import HType
from .info import Conv2DInfo
from .initializer import InitCoords2DArange, InitRandomUniform
from .NCore import NCore
from .Tensor import Tensor
class NTest():
@ -45,6 +44,7 @@ class NTest():
binary_dilate_circle_test,
binary_morph_test,
cvt_color_test,
rct_test,
]
for test_func in test_funcs:
@ -62,18 +62,39 @@ class NTest():
def _all_close(x,y, atol=1, btol=1):
return np.allclose( np.ndarray.flatten(x[None,...]), np.ndarray.flatten(y[None,...]), atol, btol )
def rct_test():
for _ in range(10):
for dtype in [np.float16, np.float32]:
base_shape = list(np.random.randint(1, 8, size=4) )
shape = base_shape.copy()
shape[1] = 3
mask_shape = base_shape.copy()
mask_shape[1] = 3
print(f'rct {shape} {str(np.dtype(dtype).name)} ... ', end='', flush=True)
source_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
target_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
mask_t = Tensor(shape=mask_shape, dtype=dtype, initializer=InitRandomUniform())
result_t = op.rct(target_t, source_t, target_mask_t=mask_t, source_mask_t=mask_t )
print('pass')
def cvt_color_test():
for _ in range(10):
for shape_len in range(2,6):
for in_mode in ['RGB','BGR','XYZ','LAB']:
for out_mode in ['RGB','BGR','XYZ','LAB']:
for dtype in [np.float16, np.float32, np.float64]:
for dtype in [np.float16, np.float32]:
shape = list(np.random.randint(1, 8, size=shape_len) )
ch_axis = np.random.randint(len(shape))
shape[ch_axis] = 3
print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='')
print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='', flush=True)
inp_n = np.random.uniform(size=shape ).astype(dtype)
inp_t = Tensor.from_value(inp_n)
@ -81,7 +102,9 @@ def cvt_color_test():
out_t = op.cvt_color(inp_t, in_mode=in_mode, out_mode=out_mode, ch_axis=ch_axis)
inp_t2 = op.cvt_color(out_t, in_mode=out_mode, out_mode=in_mode, ch_axis=ch_axis)
if not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1):
is_check = in_mode in ['RGB','BGR','XYZ'] and out_mode in ['XYZ','LAB']
if is_check and not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1):
raise Exception(f'data is not equal')
print('pass')
@ -91,7 +114,7 @@ def cast_test():
for out_dtype in HType.get_np_scalar_types():
shape = tuple(np.random.randint(1, 8, size=( np.random.randint(1,5))) )
print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='')
print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='', flush=True)
val_n = np.random.uniform( -64, 64, size=shape ).astype(in_dtype)
cast_n = val_n.astype(out_dtype)
@ -113,7 +136,7 @@ def binary_morph_test():
input_n = np.random.randint( 2, size=shape ).astype(dtype)
input_t = Tensor.from_value(input_n)
print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='')
print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='', flush=True)
op.binary_morph(input_t, erode_dilate=erode_dilate, blur=blur, fade_to_border=True)
@ -130,7 +153,7 @@ def binary_erode_circle_test():
input_n = np.random.randint( 2, size=shape ).astype(dtype)
input_t = Tensor.from_value(input_n)
print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='')
print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
op.binary_erode_circle(input_t, radius=radius, iterations=iterations)
@ -147,7 +170,7 @@ def binary_dilate_circle_test():
input_n = np.random.randint( 2, size=shape ).astype(dtype)
input_t = Tensor.from_value(input_n)
print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='')
print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
op.binary_dilate_circle(input_t, radius=radius, iterations=iterations)
@ -156,11 +179,11 @@ def binary_dilate_circle_test():
def gaussian_blur_test():
for shape_len in range(2,5):
for dtype in [np.float16, np.float32, np.float64]:
for dtype in [np.float16, np.float32]:
shape = np.random.randint( 1, 64, size=(shape_len,) )
sigma = np.random.rand() * 10
print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='')
print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
val_t = Tensor.from_value(val_n)
@ -179,7 +202,7 @@ def pad_test():
paddings = tuple( (np.random.randint(8), np.random.randint(8)) for i in range(len(shape)) )
print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='')
print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
pad_n = np.pad(val_n, paddings, mode=mode)
@ -187,7 +210,7 @@ def pad_test():
val_t = Tensor.from_value(val_n)
pad_t = op.pad(val_t, paddings, mode=mode)
print(f'{pad_n.shape} == {pad_t.shape} ... ', end='')
print(f'{pad_n.shape} == {pad_t.shape} ... ', end='', flush=True)
if pad_n.shape != pad_t.shape:
raise Exception(f'shape is not equal')
@ -241,7 +264,7 @@ def slice_set_test():
shape = tuple(shape)
slices = tuple(slices)
print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='')
print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
val_t = Tensor.from_value(val_n)
@ -330,7 +353,7 @@ def depthwise_conv2d_test():
input_shape = (n, ic, ih, iw)
kernel_shape = (ic, ks, ks)
print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='')
print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='', flush=True)
input_n = np.random.randint( 64, size=input_shape ).astype(dtype)
kernel_n = np.ones(shape=kernel_shape ).astype(dtype)
@ -358,7 +381,7 @@ def warp_affine_test():
H = np.random.randint(8, 64)
W = np.random.randint(8, 64)
print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
@ -380,7 +403,7 @@ def remap_np_affine_test():
H = np.random.randint(8, 64)
W = np.random.randint(8, 64)
print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
@ -402,7 +425,7 @@ def remap_test():
H = np.random.randint(8, 64)
W = np.random.randint(8, 64)
print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
@ -422,7 +445,7 @@ def tile_test():
shape = tuple(np.random.randint( 8, size=(shape_len,) )+1)
tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1)
print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='')
print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
tiled_n = np.tile(val_n, tiles)
@ -430,7 +453,7 @@ def tile_test():
val_t = Tensor.from_value(val_n)
tiled_t = op.tile(val_t, tiles)
print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='')
print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='', flush=True)
if tiled_n.shape != tiled_t.shape:
raise Exception(f'shape is not equal')
@ -448,7 +471,7 @@ def stack_test():
axis = np.random.randint(shape_len+1)
stack_count = np.random.randint(4)+1
print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='')
print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='', flush=True)
vals_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for i in range(stack_count) ]
stack_n = np.stack(vals_n, axis)
@ -456,7 +479,7 @@ def stack_test():
vals_t = [ Tensor.from_value(vals_n[i]) for i in range(stack_count) ]
stack_t = op.stack(vals_t, axis)
print(f'{stack_n.shape} == {stack_t.shape} ... ', end='')
print(f'{stack_n.shape} == {stack_t.shape} ... ', end='', flush=True)
if stack_n.shape != stack_t.shape:
raise Exception('shape is not equal')
@ -483,9 +506,9 @@ def reduce_test():
keepdims = np.random.randint(2) == 0
print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='')
print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='', flush=True)
if dtype in [np.float16, np.float32, np.float64]:
if dtype in [np.float16, np.float32]:
value_n = np.random.uniform(size=shape).astype(dtype)
else:
value_n = np.random.randint( max(1, int(np.iinfo(dtype).max / np.prod(shape)) ), size=shape, dtype=dtype )
@ -518,7 +541,7 @@ def InitRandomUniform_test():
for shape_len in range(1, 5):
shape = np.random.randint( 8, size=(shape_len,) )+1
print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='')
print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
Tensor(shape, dtype, initializer=InitRandomUniform()).np()
@ -534,7 +557,7 @@ def InitCoords2DArange_test():
w_start = np.random.randint(80)
w_stop = w_start + np.random.randint(80)
print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='')
print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
Tensor(shape, dtype, initializer=InitCoords2DArange(h_start,h_stop,w_start,w_stop )).np()
@ -551,17 +574,17 @@ def concat_test():
for i,dim in enumerate(shape) )
for shape in ([shape] * count) )
print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='')
print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='', flush=True)
V_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for shape in shapes ]
O_n = np.concatenate(V_n, axis)
print(f'{O_n.shape} == ', end='')
print(f'{O_n.shape} == ', end='', flush=True)
V_t = [ Tensor.from_value(V_n[i]) for i in range(count) ]
O_t = op.concat(V_t, axis)
print(f'{O_t.shape} ... ', end='')
print(f'{O_t.shape} ... ', end='', flush=True)
if O_n.shape != O_t.shape:
raise Exception('shape is not equal')
@ -596,19 +619,19 @@ def matmul_test():
A_shape = (BATCH, M, K)
B_shape = (BATCH, K, N)
print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='')
print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='', flush=True)
A_n = np.random.randint( 2**4, size=A_shape ).astype(dtype)
B_n = np.random.randint( 2**4, size=B_shape ).astype(dtype)
O_n = np.matmul(A_n, B_n)
print(f'{O_n.shape} == ', end='')
print(f'{O_n.shape} == ', end='', flush=True)
A_t = Tensor.from_value(A_n)
B_t = Tensor.from_value(B_n)
O_t = op.matmul(A_t, B_t)
print(f'{O_t.shape} ... ', end='')
print(f'{O_t.shape} ... ', end='', flush=True)
if O_n.shape != O_t.shape:
raise Exception('shape is not equal')
@ -659,17 +682,17 @@ def slice_test():
shape = tuple(shape)
slices = tuple(slices)
print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='')
print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
sliced_n = val_n[slices]
print(f'{sliced_n.shape} ... ', end='')
print(f'{sliced_n.shape} ... ', end='', flush=True)
sliced_t = Tensor.from_value(val_n)[slices]
print(f'{sliced_t.shape} ... ', end='')
print(f'{sliced_t.shape} ... ', end='', flush=True)
if 0 in sliced_n.shape:
# some cases like 0:1:-1 will produce zero shape and invalid array on numpy
@ -694,17 +717,17 @@ def transpose_test():
axes_order = np.array([*range(shape_len)])
np.random.shuffle(axes_order)
print(f'transpose: {shape} {axes_order} ... ', end='')
print(f'transpose: {shape} {axes_order} ... ', end='', flush=True)
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
transposed_n = np.transpose(val_n, axes_order)
print(f'{transposed_n.shape} ... ', end='')
print(f'{transposed_n.shape} ... ', end='', flush=True)
val_t = Tensor.from_value(val_n)
transposed_t = op.transpose (val_t, axes_order )
print(f'{transposed_t.shape} ... ', end='')
print(f'{transposed_t.shape} ... ', end='', flush=True)
if transposed_n.shape != transposed_t.shape:
raise Exception('shape is not equal')
@ -736,7 +759,7 @@ def any_wise_op_test():
shapes = shapes[::-1]
a_shape, b_shape = shapes
print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='')
print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='', flush=True)
a_n = np.random.randint( 1, 2**8, size=a_shape ).astype(dtype)
b_n = np.random.randint( 1, 2**8, size=b_shape ).astype(dtype)

View file

@ -109,6 +109,7 @@ class Tensor:
def min(self, axes=None, keepdims=False) -> 'Tensor': ...
def reshape(self, new_shape) -> 'Tensor': ...
def sum(self, axes=None, keepdims=False) -> 'Tensor': ...
def std(self, axes=None, keepdims=False) -> 'Tensor': ...
def transpose(self, axes_order, op_text=None, dtype=None) -> 'Tensor': ...
@property

View file

@ -70,6 +70,7 @@ Tensor.mean = reduce_mean
Tensor.min = reduce_min
Tensor.reshape = reshape
Tensor.sum = reduce_sum
Tensor.std = reduce_std
Tensor.transpose = transpose
class TensorRef(Tensor):

View file

@ -18,8 +18,7 @@ _np_dtype_to_cl = { np.uint8: CL.cl_uchar,
np.uint64: CL.cl_ulong,
np.int64: CL.cl_long,
np.float16: CL.cl_half,
np.float32: CL.cl_float,
np.float64: CL.cl_double }
np.float32: CL.cl_float}
_opencl_device_ids = None
_default_device = None

View file

@ -38,8 +38,6 @@ class InitRandomUniform(Initializer):
gen_expression = f'hash_ulong_from_ulong(gid+seed64) % {int(hl)} + {int(l)}'
elif tensor.dtype in [np.float16, np.float32]:
gen_expression = f'hash_float_from_uint(gid+seed32)*{hl} + {l}'
elif tensor.dtype in [np.float64]:
gen_expression = f'hash_double_from_ulong(gid+seed64)*{hl} + {l}'
kernel = Kernel(kernel_text=f"""
{HKernel.include_hash()}

View file

@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D
from .gaussian_blur import gaussian_blur
from .matmul import matmul, matmulc
from .pad import pad
from .rct import rct
from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
reduce_sum, reduce_variance)
from .remap import remap
from .remap_np_affine import remap_np_affine
from .reshape import reshape
from .slice_ import slice_
from .slice_ import slice_, split
from .slice_set import slice_set
from .stack import stack
from .tile import tile

View file

@ -1,27 +1,31 @@
import numpy as np
from ..AAxes import AAxes
from ..AShape import AShape
from ..backend import Kernel
from ..HArgs import HArgs
from ..HKernel import HKernel
from ..HType import HType
from ..info import BroadcastInfo
from ..info import BroadcastInfo, ReductionInfo
from ..SCacheton import SCacheton
from ..Tensor import Tensor
def any_wise(op_text : str,
*args,
dim_wise_axis : int = None,
dtype : np.dtype = None,
output_t:Tensor=None) -> Tensor:
"""
operator for N-wise ops with N inputs
elements-wise operator with N inputs
arguments
op_text example: O=(2*I0*I1)+I2
*args List[ Tensor | number ]
dim_wise_axis(None)
dtype
output_t compute result to this Tensor.
@ -33,7 +37,7 @@ def any_wise(op_text : str,
shape_list, dtype_list, krn_args = HArgs.decompose(args)
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text)
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text)
if output_t is None:
output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
@ -45,59 +49,60 @@ def any_wise(op_text : str,
return output_t
class _AnyWiseOp:
def __init__(self, shape_list, dtype_list, o_dtype, op_text : str):
def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str):
if len(shape_list) != len(dtype_list):
raise ValueError('len(shape_list) != len(dtype_list)')
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
self.o_shape = o_shape = info.o_shape
if len(shape_list) == 1:
# element-wise.
i_shape, i_dtype = shape_list[0], dtype_list[0]
self.o_shape = o_shape = i_shape
g_shape = o_shape
if dim_wise_axis is not None:
dim_wise_axis = o_shape.check_axis(dim_wise_axis)
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
{HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor('IN', i_shape, i_dtype)}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME)
{{
size_t gid = get_global_id(0);
dim_wise_axis_size = o_shape[dim_wise_axis]
if dim_wise_axis_size > 16:
raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}')
O_TYPE O = O_GLOBAL_LOAD(gid);
IN_TYPE I0 = IN_GLOBAL_LOAD(gid);
{op_text};
O_GLOBAL_STORE(gid, O);
}}
""")
else:
# Multi arg.
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape
self.o_shape = o_shape = info.o_shape
defs, arg_defs, impls = [], [], []
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
t_name = f'I{i}'
if t_shape is not None:
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
defs, arg_defs, impls = [], [], []
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
t_name = f'I{i}'
if t_shape is not None:
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));")
if dim_wise_axis is not None:
for i_elem in range(dim_wise_axis_size):
impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));")
else:
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));")
else:
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
if dim_wise_axis is not None:
o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) )
o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) )
else:
o_def = 'O_TYPE O;'
o_store = 'O_GLOBAL_STORE(gid, O);'
self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f"""
{defs}
{HKernel.define_tensor('O', o_shape, o_dtype)}
{HKernel.define_tensor_shape('G', g_shape)}
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
{{
size_t gid = get_global_id(0);
{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)}
{HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)}
{impls}
O_TYPE O;
{o_def}
{op_text};
O_GLOBAL_STORE(gid, O);
{o_store}
}}
""")

View file

@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype
return output_t
_allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
_allowed_dtypes = [np.float16, np.float32, np.float64]
_allowed_dtypes = [np.float16, np.float32]
class _CvtColor32Op():
def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
@ -100,54 +100,74 @@ class _CvtColor32Op():
self.forward_krn = krn
@staticmethod
def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str:
def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str:
return f"""
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')}
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)}
{_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)}
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)}
"""
@staticmethod
def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str:
def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str:
return f"""
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')}
{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)}
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)}
{_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)}
"""
@staticmethod
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str:
def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
return f"""
{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B}));
{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B}));
{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B}));
"""
@staticmethod
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str:
return f"""
{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z}));
{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y}, 0.0415560*{Z}));
{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y}, 1.0572252*{Z}));
{R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92);
{G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92);
{B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92);
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) }
"""
@staticmethod
def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str:
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
return f"""
{bgr_type} {b} = {R};
{bgr_type} {g} = {G};
{bgr_type} {r} = {B};
{'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423;
{'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169;
{'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227;
"""
@staticmethod
def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str:
def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
return f"""
{rgb_type} {r} = {B};
{rgb_type} {g} = {G};
{rgb_type} {b} = {R};
{_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) }
{R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92);
{G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92);
{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92);
"""
@staticmethod
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str:
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715 + {Z}*-0.498535, 0.0, 1.0 );
{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 );
{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 );
"""
@staticmethod
def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {b} = {R};
{'float' if declare_out_type else ''} {g} = {G};
{'float' if declare_out_type else ''} {r} = {B};
"""
@staticmethod
def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str:
return f"""
{'float' if declare_out_type else ''} {r} = {B};
{'float' if declare_out_type else ''} {g} = {G};
{'float' if declare_out_type else ''} {b} = {R};
"""
@staticmethod
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str:
beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
xyz_xn = '(0.9556)'
xyz_xn = '(0.950456)'
xyz_zn = '(1.088754)'
return f"""
{X} /= {xyz_xn};
@ -157,20 +177,20 @@ class _CvtColor32Op():
{Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
{Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);
{lab_type} {L} = 116.0*{Y}-16.0;
{lab_type} {A} = 500.0*({X}-{Y});
{lab_type} {B} = 200.0*({Y}-{Z});
{'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0;
{'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y});
{'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z});
"""
@staticmethod
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str:
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str:
beta = '(6.0/29.0)'
beta2 = '((6.0/29.0)*(6.0/29.0))'
xyz_xn = '(0.9556)'
xyz_xn = '(0.950456)'
xyz_zn = '(1.088754)'
return f"""
{xyz_type} {Y} = ({L} + 16.0) / 116.0;
{xyz_type} {X} = {Y} + {A} / 500.0;
{xyz_type} {Z} = {Y} - {B} / 200.0;
{'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0;
{'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0;
{'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0;
{Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
{X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};

View file

@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False):
mean = reduce_mean(input_t, axes, keepdims=True)
return reduce_mean(square(input_t - mean), axes, keepdims)
def moments(input_t, axes=None, keepdims=False):
def moments(input_t, axes=None):
"""
Returns (mean, variance) of input_t
@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False):
Iterable of ints.
None - all axes
keepdims(False) keep reduced axes
"""
mean = reduce_mean(input_t, axes, keepdims)
mean_shape_keepdims = mean._op.info.o_shape_kd
var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
mean = reduce_mean(input_t, axes, True)
var = reduce_mean(square(input_t - mean), axes, True)
return mean, var
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:

View file

@ -1,6 +1,9 @@
from typing import List
import numpy as np
from ..AShape import AShape
from ..AAxes import AAxes
from ..backend import Kernel
from ..HKernel import HKernel
from ..HType import HType
@ -9,6 +12,29 @@ from ..SCacheton import SCacheton
from ..Tensor import Tensor
def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]:
"""
arguments
input_t Tensor
axis
"""
shape = input_t.shape
result = []
for i in range(shape[axis]):
slices = [slice(None, None, None)]*shape.ndim
slices[axis] = i if not keepdims else slice(i,i+1,1)
result.append( slice_(input_t, slices) )
return result
def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
"""
arguments: