mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-19 13:09:58 -07:00
update xlib.avecl
This commit is contained in:
parent
2d401f47f8
commit
6da916cc66
14 changed files with 246 additions and 184 deletions
|
@ -3,6 +3,7 @@ AveCL ! Make OpenCL great again.
|
||||||
|
|
||||||
Lightweight ndarray library using OpenCL 1.2 written in pure python.
|
Lightweight ndarray library using OpenCL 1.2 written in pure python.
|
||||||
Applicable for high-performance general purpose n-dim array computations for every device that supports OpenCL 1.2.
|
Applicable for high-performance general purpose n-dim array computations for every device that supports OpenCL 1.2.
|
||||||
|
Supports any dtype except float64.
|
||||||
|
|
||||||
Works in python 3.5+. Dependencies: numpy.
|
Works in python 3.5+. Dependencies: numpy.
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,19 @@ class AShape(Iterable):
|
||||||
def as_list(self) -> List[int]:
|
def as_list(self) -> List[int]:
|
||||||
return list(self.shape)
|
return list(self.shape)
|
||||||
|
|
||||||
|
def check_axis(self, axis : int) -> int:
|
||||||
|
"""
|
||||||
|
Check axis and returns normalized axis value
|
||||||
|
|
||||||
|
can raise ValueError
|
||||||
|
"""
|
||||||
|
if axis < 0:
|
||||||
|
axis += self.ndim
|
||||||
|
|
||||||
|
if axis < 0 or axis >= self.ndim:
|
||||||
|
raise ValueError(f'axis {axis} out of bound of ndim {self.ndim}')
|
||||||
|
return axis
|
||||||
|
|
||||||
def axes_arange(self) -> AAxes:
|
def axes_arange(self) -> AAxes:
|
||||||
"""
|
"""
|
||||||
Returns tuple of axes arange.
|
Returns tuple of axes arange.
|
||||||
|
|
|
@ -15,12 +15,9 @@ class HKernel:
|
||||||
np.int64 : 'long',
|
np.int64 : 'long',
|
||||||
np.uint64 : 'ulong',
|
np.uint64 : 'ulong',
|
||||||
np.float16 : 'half',
|
np.float16 : 'half',
|
||||||
np.float32 : 'float',
|
np.float32 : 'float'
|
||||||
np.float64 : 'double'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def np_dtype_to_cl(dtype : np.dtype):
|
def np_dtype_to_cl(dtype : np.dtype):
|
||||||
"""
|
"""
|
||||||
|
@ -134,7 +131,7 @@ class HKernel:
|
||||||
out += [f'#define {name_upper}_GLOBAL_STORE8(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
|
out += [f'#define {name_upper}_GLOBAL_STORE8(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
|
||||||
out += [f'#define {name_upper}_GLOBAL_STORE16(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
|
out += [f'#define {name_upper}_GLOBAL_STORE16(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
|
||||||
|
|
||||||
if dtype in [np.float32, np.float64]:
|
if dtype in [np.float32]:
|
||||||
out += [f'#define {name_upper}_TO_FLOATX(x) x']
|
out += [f'#define {name_upper}_TO_FLOATX(x) x']
|
||||||
elif dtype in [np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32,np.uint32, np.float16]:
|
elif dtype in [np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32,np.uint32, np.float16]:
|
||||||
out += [f'#define {name_upper}_TO_FLOATX(x) ((float)x)']
|
out += [f'#define {name_upper}_TO_FLOATX(x) ((float)x)']
|
||||||
|
@ -145,7 +142,10 @@ class HKernel:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def define_ndim_idx(ndim):
|
def define_ndim_idx(ndim):
|
||||||
"""
|
"""
|
||||||
|
define macro to calculate index for n-dim shape
|
||||||
|
|
||||||
example for ndim=3
|
example for ndim=3
|
||||||
|
|
||||||
#define NDIM3_IDX(t0,t1,t2,T0,T1,T2) (((size_t)(t0))*T1*T2+((size_t)(t1))*T2+((size_t)(t2)))
|
#define NDIM3_IDX(t0,t1,t2,T0,T1,T2) (((size_t)(t0))*T1*T2+((size_t)(t1))*T2+((size_t)(t2)))
|
||||||
#define NDIM3_IDX_MOD(t0,t1,t2,T0,T1,T2) (((size_t)(t0) % T0)*T1*T2+((size_t)(t1) % T1)*T2+((size_t)(t2) % T2))
|
#define NDIM3_IDX_MOD(t0,t1,t2,T0,T1,T2) (((size_t)(t0) % T0)*T1*T2+((size_t)(t1) % T1)*T2+((size_t)(t2) % T2))
|
||||||
"""
|
"""
|
||||||
|
@ -165,14 +165,14 @@ class HKernel:
|
||||||
"""
|
"""
|
||||||
Returns a definitions for operations with tensor shape
|
Returns a definitions for operations with tensor shape
|
||||||
|
|
||||||
example for 'O', (7,3),
|
example for 'O', (2,3),
|
||||||
|
|
||||||
#define O0 7
|
#define O0 2
|
||||||
#define O1 3
|
#define O1 3
|
||||||
#define Om1 3
|
#define Om1 3
|
||||||
#define Om2 7
|
#define Om2 2
|
||||||
#define O_IDX(o0,o1) ( (size_t)(o0) )*3 +( o1 )
|
#define O_IDX(o0,o1) (((size_t)(o0))*3+((size_t)(o1)))
|
||||||
#define O_IDX_MOD(o0,o1) ( (size_t)(o0) % 7 )*3 +( (o1) % 3 )
|
#define O_IDX_MOD(o0,o1) (((size_t)(o0) % 2)*3+((size_t)(o1) % 3))
|
||||||
"""
|
"""
|
||||||
shape = tuple(shape)
|
shape = tuple(shape)
|
||||||
ndim = len(shape)
|
ndim = len(shape)
|
||||||
|
@ -183,36 +183,14 @@ class HKernel:
|
||||||
axes_symbols = "".join([str(i) for i in range(ndim)])
|
axes_symbols = "".join([str(i) for i in range(ndim)])
|
||||||
axes_symbols = axes_symbols.upper()
|
axes_symbols = axes_symbols.upper()
|
||||||
|
|
||||||
out = []
|
out = [f'#define {name_upper}{axes_symbols[i]} {shape[i]}' for i in range(ndim)]
|
||||||
for i in range(ndim):
|
out += [f'#define {name_upper}m{i} {shape[-i]}' for i in range(1,ndim+1)]
|
||||||
out += [f'#define {name_upper}{axes_symbols[i]} {shape[i]}']
|
|
||||||
|
|
||||||
for i in range(1,ndim+1):
|
out += [f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) (' + \
|
||||||
out += [f'#define {name_upper}m{i} {shape[-i]}']
|
'+'.join([f'((size_t)({name_lower}{i}))' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
|
||||||
|
|
||||||
line = f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) '
|
out += [f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) (' + \
|
||||||
|
'+'.join([f'((size_t)({name_lower}{i}) % {shape[i]})' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
|
||||||
for i in range(ndim):
|
|
||||||
line += f'( (size_t)({name_lower}{i}) )'
|
|
||||||
|
|
||||||
for j in range(i+1,ndim):
|
|
||||||
line += f'*{shape[j]} '
|
|
||||||
if i != ndim-1:
|
|
||||||
line += '+'
|
|
||||||
|
|
||||||
out += [line]
|
|
||||||
|
|
||||||
line = f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) '
|
|
||||||
|
|
||||||
for i in range(ndim):
|
|
||||||
line += f'( (size_t)({name_lower}{i}) % {shape[i]} )'
|
|
||||||
|
|
||||||
for j in range(i+1,ndim):
|
|
||||||
line += f'*{shape[j]} '
|
|
||||||
if i != ndim-1:
|
|
||||||
line += '+'
|
|
||||||
|
|
||||||
out += [line,'']
|
|
||||||
|
|
||||||
return '\n'.join(out)
|
return '\n'.join(out)
|
||||||
|
|
||||||
|
|
|
@ -3,10 +3,10 @@ from typing import Iterable, List
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
scalar_types = [int, float, np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
|
scalar_types = [int, float, np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
|
||||||
np.float16, np.float32, np.float64, np.bool_]
|
np.float16, np.float32, np.bool_]
|
||||||
|
|
||||||
np_scalar_types = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
|
np_scalar_types = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
|
||||||
np.float16, np.float32, np.float64, np.bool_]
|
np.float16, np.float32, np.bool_]
|
||||||
|
|
||||||
_np_dtype_to_cl = {
|
_np_dtype_to_cl = {
|
||||||
np.bool_ : 'bool',
|
np.bool_ : 'bool',
|
||||||
|
@ -20,7 +20,6 @@ _np_dtype_to_cl = {
|
||||||
np.int64 : 'long',
|
np.int64 : 'long',
|
||||||
np.float16 : 'half',
|
np.float16 : 'half',
|
||||||
np.float32 : 'float',
|
np.float32 : 'float',
|
||||||
np.float64 : 'double',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
_np_dtype_weight = {
|
_np_dtype_weight = {
|
||||||
|
@ -34,8 +33,7 @@ _np_dtype_weight = {
|
||||||
np.uint64 : 8,
|
np.uint64 : 8,
|
||||||
np.int64 : 9,
|
np.int64 : 9,
|
||||||
np.float16 : 10,
|
np.float16 : 10,
|
||||||
np.float32 : 11,
|
np.float32 : 11
|
||||||
np.float64 : 12,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
class HType:
|
class HType:
|
||||||
|
|
|
@ -1,14 +1,13 @@
|
||||||
import traceback
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from .HType import HType
|
|
||||||
from .NCore import NCore
|
|
||||||
from .backend import get_device, get_default_device, set_default_device
|
|
||||||
from .Tensor import Tensor
|
|
||||||
from . import op
|
from . import op
|
||||||
from .initializer import InitRandomUniform, InitCoords2DArange
|
from .backend import get_default_device, get_device, set_default_device
|
||||||
|
from .HType import HType
|
||||||
from .info import Conv2DInfo
|
from .info import Conv2DInfo
|
||||||
|
from .initializer import InitCoords2DArange, InitRandomUniform
|
||||||
|
from .NCore import NCore
|
||||||
|
from .Tensor import Tensor
|
||||||
|
|
||||||
|
|
||||||
class NTest():
|
class NTest():
|
||||||
|
|
||||||
|
@ -45,6 +44,7 @@ class NTest():
|
||||||
binary_dilate_circle_test,
|
binary_dilate_circle_test,
|
||||||
binary_morph_test,
|
binary_morph_test,
|
||||||
cvt_color_test,
|
cvt_color_test,
|
||||||
|
rct_test,
|
||||||
]
|
]
|
||||||
|
|
||||||
for test_func in test_funcs:
|
for test_func in test_funcs:
|
||||||
|
@ -62,18 +62,39 @@ class NTest():
|
||||||
def _all_close(x,y, atol=1, btol=1):
|
def _all_close(x,y, atol=1, btol=1):
|
||||||
return np.allclose( np.ndarray.flatten(x[None,...]), np.ndarray.flatten(y[None,...]), atol, btol )
|
return np.allclose( np.ndarray.flatten(x[None,...]), np.ndarray.flatten(y[None,...]), atol, btol )
|
||||||
|
|
||||||
|
def rct_test():
|
||||||
|
for _ in range(10):
|
||||||
|
for dtype in [np.float16, np.float32]:
|
||||||
|
base_shape = list(np.random.randint(1, 8, size=4) )
|
||||||
|
shape = base_shape.copy()
|
||||||
|
shape[1] = 3
|
||||||
|
|
||||||
|
mask_shape = base_shape.copy()
|
||||||
|
mask_shape[1] = 3
|
||||||
|
|
||||||
|
print(f'rct {shape} {str(np.dtype(dtype).name)} ... ', end='', flush=True)
|
||||||
|
|
||||||
|
source_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
|
||||||
|
target_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
|
||||||
|
mask_t = Tensor(shape=mask_shape, dtype=dtype, initializer=InitRandomUniform())
|
||||||
|
|
||||||
|
result_t = op.rct(target_t, source_t, target_mask_t=mask_t, source_mask_t=mask_t )
|
||||||
|
|
||||||
|
print('pass')
|
||||||
|
|
||||||
|
|
||||||
def cvt_color_test():
|
def cvt_color_test():
|
||||||
for _ in range(10):
|
for _ in range(10):
|
||||||
for shape_len in range(2,6):
|
for shape_len in range(2,6):
|
||||||
for in_mode in ['RGB','BGR','XYZ','LAB']:
|
for in_mode in ['RGB','BGR','XYZ','LAB']:
|
||||||
for out_mode in ['RGB','BGR','XYZ','LAB']:
|
for out_mode in ['RGB','BGR','XYZ','LAB']:
|
||||||
for dtype in [np.float16, np.float32, np.float64]:
|
for dtype in [np.float16, np.float32]:
|
||||||
shape = list(np.random.randint(1, 8, size=shape_len) )
|
shape = list(np.random.randint(1, 8, size=shape_len) )
|
||||||
|
|
||||||
ch_axis = np.random.randint(len(shape))
|
ch_axis = np.random.randint(len(shape))
|
||||||
shape[ch_axis] = 3
|
shape[ch_axis] = 3
|
||||||
|
|
||||||
print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='')
|
print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='', flush=True)
|
||||||
|
|
||||||
inp_n = np.random.uniform(size=shape ).astype(dtype)
|
inp_n = np.random.uniform(size=shape ).astype(dtype)
|
||||||
inp_t = Tensor.from_value(inp_n)
|
inp_t = Tensor.from_value(inp_n)
|
||||||
|
@ -81,7 +102,9 @@ def cvt_color_test():
|
||||||
out_t = op.cvt_color(inp_t, in_mode=in_mode, out_mode=out_mode, ch_axis=ch_axis)
|
out_t = op.cvt_color(inp_t, in_mode=in_mode, out_mode=out_mode, ch_axis=ch_axis)
|
||||||
inp_t2 = op.cvt_color(out_t, in_mode=out_mode, out_mode=in_mode, ch_axis=ch_axis)
|
inp_t2 = op.cvt_color(out_t, in_mode=out_mode, out_mode=in_mode, ch_axis=ch_axis)
|
||||||
|
|
||||||
if not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1):
|
is_check = in_mode in ['RGB','BGR','XYZ'] and out_mode in ['XYZ','LAB']
|
||||||
|
|
||||||
|
if is_check and not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1):
|
||||||
raise Exception(f'data is not equal')
|
raise Exception(f'data is not equal')
|
||||||
|
|
||||||
print('pass')
|
print('pass')
|
||||||
|
@ -91,7 +114,7 @@ def cast_test():
|
||||||
for out_dtype in HType.get_np_scalar_types():
|
for out_dtype in HType.get_np_scalar_types():
|
||||||
shape = tuple(np.random.randint(1, 8, size=( np.random.randint(1,5))) )
|
shape = tuple(np.random.randint(1, 8, size=( np.random.randint(1,5))) )
|
||||||
|
|
||||||
print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='')
|
print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_n = np.random.uniform( -64, 64, size=shape ).astype(in_dtype)
|
val_n = np.random.uniform( -64, 64, size=shape ).astype(in_dtype)
|
||||||
cast_n = val_n.astype(out_dtype)
|
cast_n = val_n.astype(out_dtype)
|
||||||
|
@ -113,7 +136,7 @@ def binary_morph_test():
|
||||||
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
||||||
input_t = Tensor.from_value(input_n)
|
input_t = Tensor.from_value(input_n)
|
||||||
|
|
||||||
print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='')
|
print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
op.binary_morph(input_t, erode_dilate=erode_dilate, blur=blur, fade_to_border=True)
|
op.binary_morph(input_t, erode_dilate=erode_dilate, blur=blur, fade_to_border=True)
|
||||||
|
|
||||||
|
@ -130,7 +153,7 @@ def binary_erode_circle_test():
|
||||||
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
||||||
input_t = Tensor.from_value(input_n)
|
input_t = Tensor.from_value(input_n)
|
||||||
|
|
||||||
print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='')
|
print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
op.binary_erode_circle(input_t, radius=radius, iterations=iterations)
|
op.binary_erode_circle(input_t, radius=radius, iterations=iterations)
|
||||||
|
|
||||||
|
@ -147,7 +170,7 @@ def binary_dilate_circle_test():
|
||||||
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
||||||
input_t = Tensor.from_value(input_n)
|
input_t = Tensor.from_value(input_n)
|
||||||
|
|
||||||
print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='')
|
print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
op.binary_dilate_circle(input_t, radius=radius, iterations=iterations)
|
op.binary_dilate_circle(input_t, radius=radius, iterations=iterations)
|
||||||
|
|
||||||
|
@ -156,11 +179,11 @@ def binary_dilate_circle_test():
|
||||||
|
|
||||||
def gaussian_blur_test():
|
def gaussian_blur_test():
|
||||||
for shape_len in range(2,5):
|
for shape_len in range(2,5):
|
||||||
for dtype in [np.float16, np.float32, np.float64]:
|
for dtype in [np.float16, np.float32]:
|
||||||
|
|
||||||
shape = np.random.randint( 1, 64, size=(shape_len,) )
|
shape = np.random.randint( 1, 64, size=(shape_len,) )
|
||||||
sigma = np.random.rand() * 10
|
sigma = np.random.rand() * 10
|
||||||
print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='')
|
print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||||
val_t = Tensor.from_value(val_n)
|
val_t = Tensor.from_value(val_n)
|
||||||
|
@ -179,7 +202,7 @@ def pad_test():
|
||||||
|
|
||||||
paddings = tuple( (np.random.randint(8), np.random.randint(8)) for i in range(len(shape)) )
|
paddings = tuple( (np.random.randint(8), np.random.randint(8)) for i in range(len(shape)) )
|
||||||
|
|
||||||
print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='')
|
print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||||
pad_n = np.pad(val_n, paddings, mode=mode)
|
pad_n = np.pad(val_n, paddings, mode=mode)
|
||||||
|
@ -187,7 +210,7 @@ def pad_test():
|
||||||
val_t = Tensor.from_value(val_n)
|
val_t = Tensor.from_value(val_n)
|
||||||
pad_t = op.pad(val_t, paddings, mode=mode)
|
pad_t = op.pad(val_t, paddings, mode=mode)
|
||||||
|
|
||||||
print(f'{pad_n.shape} == {pad_t.shape} ... ', end='')
|
print(f'{pad_n.shape} == {pad_t.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
if pad_n.shape != pad_t.shape:
|
if pad_n.shape != pad_t.shape:
|
||||||
raise Exception(f'shape is not equal')
|
raise Exception(f'shape is not equal')
|
||||||
|
@ -241,7 +264,7 @@ def slice_set_test():
|
||||||
shape = tuple(shape)
|
shape = tuple(shape)
|
||||||
slices = tuple(slices)
|
slices = tuple(slices)
|
||||||
|
|
||||||
print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='')
|
print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||||
val_t = Tensor.from_value(val_n)
|
val_t = Tensor.from_value(val_n)
|
||||||
|
@ -330,7 +353,7 @@ def depthwise_conv2d_test():
|
||||||
input_shape = (n, ic, ih, iw)
|
input_shape = (n, ic, ih, iw)
|
||||||
kernel_shape = (ic, ks, ks)
|
kernel_shape = (ic, ks, ks)
|
||||||
|
|
||||||
print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='')
|
print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
input_n = np.random.randint( 64, size=input_shape ).astype(dtype)
|
input_n = np.random.randint( 64, size=input_shape ).astype(dtype)
|
||||||
kernel_n = np.ones(shape=kernel_shape ).astype(dtype)
|
kernel_n = np.ones(shape=kernel_shape ).astype(dtype)
|
||||||
|
@ -358,7 +381,7 @@ def warp_affine_test():
|
||||||
H = np.random.randint(8, 64)
|
H = np.random.randint(8, 64)
|
||||||
W = np.random.randint(8, 64)
|
W = np.random.randint(8, 64)
|
||||||
|
|
||||||
print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
|
print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
||||||
|
|
||||||
|
@ -380,7 +403,7 @@ def remap_np_affine_test():
|
||||||
H = np.random.randint(8, 64)
|
H = np.random.randint(8, 64)
|
||||||
W = np.random.randint(8, 64)
|
W = np.random.randint(8, 64)
|
||||||
|
|
||||||
print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
|
print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
||||||
|
|
||||||
|
@ -402,7 +425,7 @@ def remap_test():
|
||||||
H = np.random.randint(8, 64)
|
H = np.random.randint(8, 64)
|
||||||
W = np.random.randint(8, 64)
|
W = np.random.randint(8, 64)
|
||||||
|
|
||||||
print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
|
print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
||||||
|
|
||||||
|
@ -422,7 +445,7 @@ def tile_test():
|
||||||
shape = tuple(np.random.randint( 8, size=(shape_len,) )+1)
|
shape = tuple(np.random.randint( 8, size=(shape_len,) )+1)
|
||||||
tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1)
|
tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1)
|
||||||
|
|
||||||
print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='')
|
print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||||
tiled_n = np.tile(val_n, tiles)
|
tiled_n = np.tile(val_n, tiles)
|
||||||
|
@ -430,7 +453,7 @@ def tile_test():
|
||||||
val_t = Tensor.from_value(val_n)
|
val_t = Tensor.from_value(val_n)
|
||||||
tiled_t = op.tile(val_t, tiles)
|
tiled_t = op.tile(val_t, tiles)
|
||||||
|
|
||||||
print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='')
|
print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
if tiled_n.shape != tiled_t.shape:
|
if tiled_n.shape != tiled_t.shape:
|
||||||
raise Exception(f'shape is not equal')
|
raise Exception(f'shape is not equal')
|
||||||
|
@ -448,7 +471,7 @@ def stack_test():
|
||||||
axis = np.random.randint(shape_len+1)
|
axis = np.random.randint(shape_len+1)
|
||||||
stack_count = np.random.randint(4)+1
|
stack_count = np.random.randint(4)+1
|
||||||
|
|
||||||
print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='')
|
print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
vals_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for i in range(stack_count) ]
|
vals_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for i in range(stack_count) ]
|
||||||
stack_n = np.stack(vals_n, axis)
|
stack_n = np.stack(vals_n, axis)
|
||||||
|
@ -456,7 +479,7 @@ def stack_test():
|
||||||
vals_t = [ Tensor.from_value(vals_n[i]) for i in range(stack_count) ]
|
vals_t = [ Tensor.from_value(vals_n[i]) for i in range(stack_count) ]
|
||||||
stack_t = op.stack(vals_t, axis)
|
stack_t = op.stack(vals_t, axis)
|
||||||
|
|
||||||
print(f'{stack_n.shape} == {stack_t.shape} ... ', end='')
|
print(f'{stack_n.shape} == {stack_t.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
if stack_n.shape != stack_t.shape:
|
if stack_n.shape != stack_t.shape:
|
||||||
raise Exception('shape is not equal')
|
raise Exception('shape is not equal')
|
||||||
|
@ -483,9 +506,9 @@ def reduce_test():
|
||||||
|
|
||||||
keepdims = np.random.randint(2) == 0
|
keepdims = np.random.randint(2) == 0
|
||||||
|
|
||||||
print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='')
|
print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='', flush=True)
|
||||||
|
|
||||||
if dtype in [np.float16, np.float32, np.float64]:
|
if dtype in [np.float16, np.float32]:
|
||||||
value_n = np.random.uniform(size=shape).astype(dtype)
|
value_n = np.random.uniform(size=shape).astype(dtype)
|
||||||
else:
|
else:
|
||||||
value_n = np.random.randint( max(1, int(np.iinfo(dtype).max / np.prod(shape)) ), size=shape, dtype=dtype )
|
value_n = np.random.randint( max(1, int(np.iinfo(dtype).max / np.prod(shape)) ), size=shape, dtype=dtype )
|
||||||
|
@ -518,7 +541,7 @@ def InitRandomUniform_test():
|
||||||
for shape_len in range(1, 5):
|
for shape_len in range(1, 5):
|
||||||
shape = np.random.randint( 8, size=(shape_len,) )+1
|
shape = np.random.randint( 8, size=(shape_len,) )+1
|
||||||
|
|
||||||
print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='')
|
print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
Tensor(shape, dtype, initializer=InitRandomUniform()).np()
|
Tensor(shape, dtype, initializer=InitRandomUniform()).np()
|
||||||
|
|
||||||
|
@ -534,7 +557,7 @@ def InitCoords2DArange_test():
|
||||||
w_start = np.random.randint(80)
|
w_start = np.random.randint(80)
|
||||||
w_stop = w_start + np.random.randint(80)
|
w_stop = w_start + np.random.randint(80)
|
||||||
|
|
||||||
print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='')
|
print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
Tensor(shape, dtype, initializer=InitCoords2DArange(h_start,h_stop,w_start,w_stop )).np()
|
Tensor(shape, dtype, initializer=InitCoords2DArange(h_start,h_stop,w_start,w_stop )).np()
|
||||||
|
|
||||||
|
@ -551,17 +574,17 @@ def concat_test():
|
||||||
for i,dim in enumerate(shape) )
|
for i,dim in enumerate(shape) )
|
||||||
for shape in ([shape] * count) )
|
for shape in ([shape] * count) )
|
||||||
|
|
||||||
print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='')
|
print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
V_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for shape in shapes ]
|
V_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for shape in shapes ]
|
||||||
O_n = np.concatenate(V_n, axis)
|
O_n = np.concatenate(V_n, axis)
|
||||||
|
|
||||||
print(f'{O_n.shape} == ', end='')
|
print(f'{O_n.shape} == ', end='', flush=True)
|
||||||
|
|
||||||
V_t = [ Tensor.from_value(V_n[i]) for i in range(count) ]
|
V_t = [ Tensor.from_value(V_n[i]) for i in range(count) ]
|
||||||
O_t = op.concat(V_t, axis)
|
O_t = op.concat(V_t, axis)
|
||||||
|
|
||||||
print(f'{O_t.shape} ... ', end='')
|
print(f'{O_t.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
if O_n.shape != O_t.shape:
|
if O_n.shape != O_t.shape:
|
||||||
raise Exception('shape is not equal')
|
raise Exception('shape is not equal')
|
||||||
|
@ -596,19 +619,19 @@ def matmul_test():
|
||||||
A_shape = (BATCH, M, K)
|
A_shape = (BATCH, M, K)
|
||||||
B_shape = (BATCH, K, N)
|
B_shape = (BATCH, K, N)
|
||||||
|
|
||||||
print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='')
|
print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||||
|
|
||||||
A_n = np.random.randint( 2**4, size=A_shape ).astype(dtype)
|
A_n = np.random.randint( 2**4, size=A_shape ).astype(dtype)
|
||||||
B_n = np.random.randint( 2**4, size=B_shape ).astype(dtype)
|
B_n = np.random.randint( 2**4, size=B_shape ).astype(dtype)
|
||||||
|
|
||||||
O_n = np.matmul(A_n, B_n)
|
O_n = np.matmul(A_n, B_n)
|
||||||
|
|
||||||
print(f'{O_n.shape} == ', end='')
|
print(f'{O_n.shape} == ', end='', flush=True)
|
||||||
|
|
||||||
A_t = Tensor.from_value(A_n)
|
A_t = Tensor.from_value(A_n)
|
||||||
B_t = Tensor.from_value(B_n)
|
B_t = Tensor.from_value(B_n)
|
||||||
O_t = op.matmul(A_t, B_t)
|
O_t = op.matmul(A_t, B_t)
|
||||||
print(f'{O_t.shape} ... ', end='')
|
print(f'{O_t.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
if O_n.shape != O_t.shape:
|
if O_n.shape != O_t.shape:
|
||||||
raise Exception('shape is not equal')
|
raise Exception('shape is not equal')
|
||||||
|
@ -659,17 +682,17 @@ def slice_test():
|
||||||
shape = tuple(shape)
|
shape = tuple(shape)
|
||||||
slices = tuple(slices)
|
slices = tuple(slices)
|
||||||
|
|
||||||
print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='')
|
print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||||
|
|
||||||
sliced_n = val_n[slices]
|
sliced_n = val_n[slices]
|
||||||
|
|
||||||
print(f'{sliced_n.shape} ... ', end='')
|
print(f'{sliced_n.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
sliced_t = Tensor.from_value(val_n)[slices]
|
sliced_t = Tensor.from_value(val_n)[slices]
|
||||||
|
|
||||||
print(f'{sliced_t.shape} ... ', end='')
|
print(f'{sliced_t.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
if 0 in sliced_n.shape:
|
if 0 in sliced_n.shape:
|
||||||
# some cases like 0:1:-1 will produce zero shape and invalid array on numpy
|
# some cases like 0:1:-1 will produce zero shape and invalid array on numpy
|
||||||
|
@ -694,17 +717,17 @@ def transpose_test():
|
||||||
axes_order = np.array([*range(shape_len)])
|
axes_order = np.array([*range(shape_len)])
|
||||||
np.random.shuffle(axes_order)
|
np.random.shuffle(axes_order)
|
||||||
|
|
||||||
print(f'transpose: {shape} {axes_order} ... ', end='')
|
print(f'transpose: {shape} {axes_order} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||||
transposed_n = np.transpose(val_n, axes_order)
|
transposed_n = np.transpose(val_n, axes_order)
|
||||||
|
|
||||||
print(f'{transposed_n.shape} ... ', end='')
|
print(f'{transposed_n.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
val_t = Tensor.from_value(val_n)
|
val_t = Tensor.from_value(val_n)
|
||||||
transposed_t = op.transpose (val_t, axes_order )
|
transposed_t = op.transpose (val_t, axes_order )
|
||||||
|
|
||||||
print(f'{transposed_t.shape} ... ', end='')
|
print(f'{transposed_t.shape} ... ', end='', flush=True)
|
||||||
|
|
||||||
if transposed_n.shape != transposed_t.shape:
|
if transposed_n.shape != transposed_t.shape:
|
||||||
raise Exception('shape is not equal')
|
raise Exception('shape is not equal')
|
||||||
|
@ -736,7 +759,7 @@ def any_wise_op_test():
|
||||||
shapes = shapes[::-1]
|
shapes = shapes[::-1]
|
||||||
a_shape, b_shape = shapes
|
a_shape, b_shape = shapes
|
||||||
|
|
||||||
print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='')
|
print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='', flush=True)
|
||||||
|
|
||||||
a_n = np.random.randint( 1, 2**8, size=a_shape ).astype(dtype)
|
a_n = np.random.randint( 1, 2**8, size=a_shape ).astype(dtype)
|
||||||
b_n = np.random.randint( 1, 2**8, size=b_shape ).astype(dtype)
|
b_n = np.random.randint( 1, 2**8, size=b_shape ).astype(dtype)
|
||||||
|
|
|
@ -109,6 +109,7 @@ class Tensor:
|
||||||
def min(self, axes=None, keepdims=False) -> 'Tensor': ...
|
def min(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||||
def reshape(self, new_shape) -> 'Tensor': ...
|
def reshape(self, new_shape) -> 'Tensor': ...
|
||||||
def sum(self, axes=None, keepdims=False) -> 'Tensor': ...
|
def sum(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||||
|
def std(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||||
def transpose(self, axes_order, op_text=None, dtype=None) -> 'Tensor': ...
|
def transpose(self, axes_order, op_text=None, dtype=None) -> 'Tensor': ...
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -70,6 +70,7 @@ Tensor.mean = reduce_mean
|
||||||
Tensor.min = reduce_min
|
Tensor.min = reduce_min
|
||||||
Tensor.reshape = reshape
|
Tensor.reshape = reshape
|
||||||
Tensor.sum = reduce_sum
|
Tensor.sum = reduce_sum
|
||||||
|
Tensor.std = reduce_std
|
||||||
Tensor.transpose = transpose
|
Tensor.transpose = transpose
|
||||||
|
|
||||||
class TensorRef(Tensor):
|
class TensorRef(Tensor):
|
||||||
|
|
|
@ -18,8 +18,7 @@ _np_dtype_to_cl = { np.uint8: CL.cl_uchar,
|
||||||
np.uint64: CL.cl_ulong,
|
np.uint64: CL.cl_ulong,
|
||||||
np.int64: CL.cl_long,
|
np.int64: CL.cl_long,
|
||||||
np.float16: CL.cl_half,
|
np.float16: CL.cl_half,
|
||||||
np.float32: CL.cl_float,
|
np.float32: CL.cl_float}
|
||||||
np.float64: CL.cl_double }
|
|
||||||
|
|
||||||
_opencl_device_ids = None
|
_opencl_device_ids = None
|
||||||
_default_device = None
|
_default_device = None
|
||||||
|
|
|
@ -38,8 +38,6 @@ class InitRandomUniform(Initializer):
|
||||||
gen_expression = f'hash_ulong_from_ulong(gid+seed64) % {int(hl)} + {int(l)}'
|
gen_expression = f'hash_ulong_from_ulong(gid+seed64) % {int(hl)} + {int(l)}'
|
||||||
elif tensor.dtype in [np.float16, np.float32]:
|
elif tensor.dtype in [np.float16, np.float32]:
|
||||||
gen_expression = f'hash_float_from_uint(gid+seed32)*{hl} + {l}'
|
gen_expression = f'hash_float_from_uint(gid+seed32)*{hl} + {l}'
|
||||||
elif tensor.dtype in [np.float64]:
|
|
||||||
gen_expression = f'hash_double_from_ulong(gid+seed64)*{hl} + {l}'
|
|
||||||
|
|
||||||
kernel = Kernel(kernel_text=f"""
|
kernel = Kernel(kernel_text=f"""
|
||||||
{HKernel.include_hash()}
|
{HKernel.include_hash()}
|
||||||
|
|
|
@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D
|
||||||
from .gaussian_blur import gaussian_blur
|
from .gaussian_blur import gaussian_blur
|
||||||
from .matmul import matmul, matmulc
|
from .matmul import matmul, matmulc
|
||||||
from .pad import pad
|
from .pad import pad
|
||||||
|
from .rct import rct
|
||||||
from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
|
from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
|
||||||
reduce_sum, reduce_variance)
|
reduce_sum, reduce_variance)
|
||||||
from .remap import remap
|
from .remap import remap
|
||||||
from .remap_np_affine import remap_np_affine
|
from .remap_np_affine import remap_np_affine
|
||||||
from .reshape import reshape
|
from .reshape import reshape
|
||||||
from .slice_ import slice_
|
from .slice_ import slice_, split
|
||||||
from .slice_set import slice_set
|
from .slice_set import slice_set
|
||||||
from .stack import stack
|
from .stack import stack
|
||||||
from .tile import tile
|
from .tile import tile
|
||||||
|
|
|
@ -1,27 +1,31 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from ..AAxes import AAxes
|
||||||
from ..AShape import AShape
|
from ..AShape import AShape
|
||||||
from ..backend import Kernel
|
from ..backend import Kernel
|
||||||
from ..HArgs import HArgs
|
from ..HArgs import HArgs
|
||||||
from ..HKernel import HKernel
|
from ..HKernel import HKernel
|
||||||
from ..HType import HType
|
from ..HType import HType
|
||||||
from ..info import BroadcastInfo
|
from ..info import BroadcastInfo, ReductionInfo
|
||||||
from ..SCacheton import SCacheton
|
from ..SCacheton import SCacheton
|
||||||
from ..Tensor import Tensor
|
from ..Tensor import Tensor
|
||||||
|
|
||||||
|
|
||||||
def any_wise(op_text : str,
|
def any_wise(op_text : str,
|
||||||
*args,
|
*args,
|
||||||
|
dim_wise_axis : int = None,
|
||||||
dtype : np.dtype = None,
|
dtype : np.dtype = None,
|
||||||
output_t:Tensor=None) -> Tensor:
|
output_t:Tensor=None) -> Tensor:
|
||||||
"""
|
"""
|
||||||
operator for N-wise ops with N inputs
|
elements-wise operator with N inputs
|
||||||
|
|
||||||
arguments
|
arguments
|
||||||
op_text example: O=(2*I0*I1)+I2
|
op_text example: O=(2*I0*I1)+I2
|
||||||
|
|
||||||
*args List[ Tensor | number ]
|
*args List[ Tensor | number ]
|
||||||
|
|
||||||
|
dim_wise_axis(None)
|
||||||
|
|
||||||
dtype
|
dtype
|
||||||
|
|
||||||
output_t compute result to this Tensor.
|
output_t compute result to this Tensor.
|
||||||
|
@ -33,7 +37,7 @@ def any_wise(op_text : str,
|
||||||
|
|
||||||
shape_list, dtype_list, krn_args = HArgs.decompose(args)
|
shape_list, dtype_list, krn_args = HArgs.decompose(args)
|
||||||
|
|
||||||
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text)
|
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text)
|
||||||
|
|
||||||
if output_t is None:
|
if output_t is None:
|
||||||
output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
|
output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
|
||||||
|
@ -45,59 +49,60 @@ def any_wise(op_text : str,
|
||||||
return output_t
|
return output_t
|
||||||
|
|
||||||
class _AnyWiseOp:
|
class _AnyWiseOp:
|
||||||
def __init__(self, shape_list, dtype_list, o_dtype, op_text : str):
|
def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str):
|
||||||
if len(shape_list) != len(dtype_list):
|
if len(shape_list) != len(dtype_list):
|
||||||
raise ValueError('len(shape_list) != len(dtype_list)')
|
raise ValueError('len(shape_list) != len(dtype_list)')
|
||||||
|
|
||||||
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
|
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
|
||||||
|
|
||||||
if len(shape_list) == 1:
|
|
||||||
# element-wise.
|
|
||||||
i_shape, i_dtype = shape_list[0], dtype_list[0]
|
|
||||||
self.o_shape = o_shape = i_shape
|
|
||||||
|
|
||||||
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
|
||||||
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
|
||||||
{HKernel.define_tensor('IN', i_shape, i_dtype)}
|
|
||||||
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME)
|
|
||||||
{{
|
|
||||||
size_t gid = get_global_id(0);
|
|
||||||
|
|
||||||
O_TYPE O = O_GLOBAL_LOAD(gid);
|
|
||||||
IN_TYPE I0 = IN_GLOBAL_LOAD(gid);
|
|
||||||
{op_text};
|
|
||||||
O_GLOBAL_STORE(gid, O);
|
|
||||||
}}
|
|
||||||
""")
|
|
||||||
else:
|
|
||||||
# Multi arg.
|
|
||||||
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
|
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
|
||||||
|
|
||||||
self.o_shape = o_shape = info.o_shape
|
self.o_shape = o_shape = info.o_shape
|
||||||
|
|
||||||
|
g_shape = o_shape
|
||||||
|
if dim_wise_axis is not None:
|
||||||
|
dim_wise_axis = o_shape.check_axis(dim_wise_axis)
|
||||||
|
|
||||||
|
dim_wise_axis_size = o_shape[dim_wise_axis]
|
||||||
|
if dim_wise_axis_size > 16:
|
||||||
|
raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}')
|
||||||
|
|
||||||
|
g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape
|
||||||
|
|
||||||
defs, arg_defs, impls = [], [], []
|
defs, arg_defs, impls = [], [], []
|
||||||
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
|
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
|
||||||
t_name = f'I{i}'
|
t_name = f'I{i}'
|
||||||
if t_shape is not None:
|
if t_shape is not None:
|
||||||
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
|
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
|
||||||
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
|
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
|
||||||
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));")
|
|
||||||
|
if dim_wise_axis is not None:
|
||||||
|
for i_elem in range(dim_wise_axis_size):
|
||||||
|
impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));")
|
||||||
|
else:
|
||||||
|
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));")
|
||||||
else:
|
else:
|
||||||
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
|
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
|
||||||
|
|
||||||
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
|
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
|
||||||
|
|
||||||
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
if dim_wise_axis is not None:
|
||||||
|
o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) )
|
||||||
|
o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) )
|
||||||
|
else:
|
||||||
|
o_def = 'O_TYPE O;'
|
||||||
|
o_store = 'O_GLOBAL_STORE(gid, O);'
|
||||||
|
|
||||||
|
self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f"""
|
||||||
{defs}
|
{defs}
|
||||||
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||||
|
{HKernel.define_tensor_shape('G', g_shape)}
|
||||||
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
|
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
|
||||||
{{
|
{{
|
||||||
size_t gid = get_global_id(0);
|
size_t gid = get_global_id(0);
|
||||||
{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)}
|
{HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)}
|
||||||
{impls}
|
{impls}
|
||||||
O_TYPE O;
|
{o_def}
|
||||||
{op_text};
|
{op_text};
|
||||||
O_GLOBAL_STORE(gid, O);
|
{o_store}
|
||||||
}}
|
}}
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype
|
||||||
return output_t
|
return output_t
|
||||||
|
|
||||||
_allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
|
_allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
|
||||||
_allowed_dtypes = [np.float16, np.float32, np.float64]
|
_allowed_dtypes = [np.float16, np.float32]
|
||||||
|
|
||||||
class _CvtColor32Op():
|
class _CvtColor32Op():
|
||||||
def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
|
def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
|
||||||
|
@ -100,54 +100,74 @@ class _CvtColor32Op():
|
||||||
self.forward_krn = krn
|
self.forward_krn = krn
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str:
|
def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')}
|
{_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)}
|
||||||
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)}
|
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str:
|
def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')}
|
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)}
|
||||||
{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)}
|
{_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str:
|
def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B}));
|
{R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92);
|
||||||
{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B}));
|
{G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92);
|
||||||
{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B}));
|
{B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92);
|
||||||
"""
|
|
||||||
@staticmethod
|
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) }
|
||||||
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str:
|
|
||||||
return f"""
|
|
||||||
{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z}));
|
|
||||||
{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y}, 0.0415560*{Z}));
|
|
||||||
{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y}, 1.0572252*{Z}));
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str:
|
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{bgr_type} {b} = {R};
|
{'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423;
|
||||||
{bgr_type} {g} = {G};
|
{'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169;
|
||||||
{bgr_type} {r} = {B};
|
{'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227;
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str:
|
def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
|
||||||
return f"""
|
return f"""
|
||||||
{rgb_type} {r} = {B};
|
{_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) }
|
||||||
{rgb_type} {g} = {G};
|
{R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92);
|
||||||
{rgb_type} {b} = {R};
|
{G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92);
|
||||||
|
{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92);
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str:
|
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
|
||||||
|
return f"""
|
||||||
|
{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715 + {Z}*-0.498535, 0.0, 1.0 );
|
||||||
|
{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 );
|
||||||
|
{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 );
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str:
|
||||||
|
return f"""
|
||||||
|
{'float' if declare_out_type else ''} {b} = {R};
|
||||||
|
{'float' if declare_out_type else ''} {g} = {G};
|
||||||
|
{'float' if declare_out_type else ''} {r} = {B};
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str:
|
||||||
|
return f"""
|
||||||
|
{'float' if declare_out_type else ''} {r} = {B};
|
||||||
|
{'float' if declare_out_type else ''} {g} = {G};
|
||||||
|
{'float' if declare_out_type else ''} {b} = {R};
|
||||||
|
"""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str:
|
||||||
beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
|
beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
|
||||||
xyz_xn = '(0.9556)'
|
xyz_xn = '(0.950456)'
|
||||||
xyz_zn = '(1.088754)'
|
xyz_zn = '(1.088754)'
|
||||||
return f"""
|
return f"""
|
||||||
{X} /= {xyz_xn};
|
{X} /= {xyz_xn};
|
||||||
|
@ -157,20 +177,20 @@ class _CvtColor32Op():
|
||||||
{Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
|
{Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
|
||||||
{Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);
|
{Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);
|
||||||
|
|
||||||
{lab_type} {L} = 116.0*{Y}-16.0;
|
{'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0;
|
||||||
{lab_type} {A} = 500.0*({X}-{Y});
|
{'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y});
|
||||||
{lab_type} {B} = 200.0*({Y}-{Z});
|
{'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z});
|
||||||
"""
|
"""
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str:
|
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str:
|
||||||
beta = '(6.0/29.0)'
|
beta = '(6.0/29.0)'
|
||||||
beta2 = '((6.0/29.0)*(6.0/29.0))'
|
beta2 = '((6.0/29.0)*(6.0/29.0))'
|
||||||
xyz_xn = '(0.9556)'
|
xyz_xn = '(0.950456)'
|
||||||
xyz_zn = '(1.088754)'
|
xyz_zn = '(1.088754)'
|
||||||
return f"""
|
return f"""
|
||||||
{xyz_type} {Y} = ({L} + 16.0) / 116.0;
|
{'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0;
|
||||||
{xyz_type} {X} = {Y} + {A} / 500.0;
|
{'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0;
|
||||||
{xyz_type} {Z} = {Y} - {B} / 200.0;
|
{'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0;
|
||||||
|
|
||||||
{Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
|
{Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
|
||||||
{X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};
|
{X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};
|
||||||
|
|
|
@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False):
|
||||||
mean = reduce_mean(input_t, axes, keepdims=True)
|
mean = reduce_mean(input_t, axes, keepdims=True)
|
||||||
return reduce_mean(square(input_t - mean), axes, keepdims)
|
return reduce_mean(square(input_t - mean), axes, keepdims)
|
||||||
|
|
||||||
def moments(input_t, axes=None, keepdims=False):
|
def moments(input_t, axes=None):
|
||||||
"""
|
"""
|
||||||
Returns (mean, variance) of input_t
|
Returns (mean, variance) of input_t
|
||||||
|
|
||||||
|
@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False):
|
||||||
Iterable of ints.
|
Iterable of ints.
|
||||||
None - all axes
|
None - all axes
|
||||||
|
|
||||||
keepdims(False) keep reduced axes
|
|
||||||
"""
|
"""
|
||||||
mean = reduce_mean(input_t, axes, keepdims)
|
mean = reduce_mean(input_t, axes, True)
|
||||||
mean_shape_keepdims = mean._op.info.o_shape_kd
|
var = reduce_mean(square(input_t - mean), axes, True)
|
||||||
var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
|
|
||||||
return mean, var
|
return mean, var
|
||||||
|
|
||||||
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
|
from typing import List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from ..AShape import AShape
|
from ..AShape import AShape
|
||||||
|
from ..AAxes import AAxes
|
||||||
from ..backend import Kernel
|
from ..backend import Kernel
|
||||||
from ..HKernel import HKernel
|
from ..HKernel import HKernel
|
||||||
from ..HType import HType
|
from ..HType import HType
|
||||||
|
@ -9,6 +12,29 @@ from ..SCacheton import SCacheton
|
||||||
from ..Tensor import Tensor
|
from ..Tensor import Tensor
|
||||||
|
|
||||||
|
|
||||||
|
def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]:
|
||||||
|
"""
|
||||||
|
|
||||||
|
arguments
|
||||||
|
|
||||||
|
input_t Tensor
|
||||||
|
|
||||||
|
axis
|
||||||
|
|
||||||
|
"""
|
||||||
|
shape = input_t.shape
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for i in range(shape[axis]):
|
||||||
|
slices = [slice(None, None, None)]*shape.ndim
|
||||||
|
|
||||||
|
slices[axis] = i if not keepdims else slice(i,i+1,1)
|
||||||
|
|
||||||
|
result.append( slice_(input_t, slices) )
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
|
def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
|
||||||
"""
|
"""
|
||||||
arguments:
|
arguments:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue