mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-14 02:37:01 -07:00
update xlib.avecl
This commit is contained in:
parent
2d401f47f8
commit
6da916cc66
14 changed files with 246 additions and 184 deletions
|
@ -3,6 +3,7 @@ AveCL ! Make OpenCL great again.
|
|||
|
||||
Lightweight ndarray library using OpenCL 1.2 written in pure python.
|
||||
Applicable for high-performance general purpose n-dim array computations for every device that supports OpenCL 1.2.
|
||||
Supports any dtype except float64.
|
||||
|
||||
Works in python 3.5+. Dependencies: numpy.
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ class AShape(Iterable):
|
|||
|
||||
shape AShape
|
||||
Iterable
|
||||
|
||||
|
||||
AShape cannot be scalar shape, thus minimal AShape is (1,)
|
||||
|
||||
can raise ValueError during the construction
|
||||
|
@ -50,13 +50,26 @@ class AShape(Iterable):
|
|||
self.size = size
|
||||
else:
|
||||
raise ValueError('Invalid type to create AShape')
|
||||
|
||||
|
||||
def copy(self) -> 'AShape':
|
||||
return AShape(self)
|
||||
|
||||
|
||||
def as_list(self) -> List[int]:
|
||||
return list(self.shape)
|
||||
|
||||
|
||||
def check_axis(self, axis : int) -> int:
|
||||
"""
|
||||
Check axis and returns normalized axis value
|
||||
|
||||
can raise ValueError
|
||||
"""
|
||||
if axis < 0:
|
||||
axis += self.ndim
|
||||
|
||||
if axis < 0 or axis >= self.ndim:
|
||||
raise ValueError(f'axis {axis} out of bound of ndim {self.ndim}')
|
||||
return axis
|
||||
|
||||
def axes_arange(self) -> AAxes:
|
||||
"""
|
||||
Returns tuple of axes arange.
|
||||
|
@ -64,7 +77,7 @@ class AShape(Iterable):
|
|||
Example (0,1,2) for ndim 3
|
||||
"""
|
||||
return AAxes(range(self.ndim))
|
||||
|
||||
|
||||
def replaced_axes(self, axes, dims) -> 'AShape':
|
||||
"""
|
||||
returns new AShape where axes replaced with new dims
|
||||
|
@ -76,22 +89,22 @@ class AShape(Iterable):
|
|||
axis = ndim + axis
|
||||
if axis < 0 or axis >= ndim:
|
||||
raise ValueError(f'invalid axis value {axis}')
|
||||
|
||||
|
||||
new_shape[axis] = dim
|
||||
return AShape(new_shape)
|
||||
|
||||
|
||||
|
||||
def split(self, axis) -> Tuple['AShape', 'AShape']:
|
||||
"""
|
||||
split AShape at specified axis
|
||||
|
||||
returns two AShape before+exclusive and inclusive+after
|
||||
|
||||
returns two AShape before+exclusive and inclusive+after
|
||||
"""
|
||||
if axis < 0:
|
||||
axis = self.ndim + axis
|
||||
if axis < 0 or axis >= self.ndim:
|
||||
raise ValueError(f'invalid axis value {axis}')
|
||||
|
||||
|
||||
return self[:axis], self[axis:]
|
||||
|
||||
def transpose_by_axes(self, axes) -> 'AShape':
|
||||
|
|
|
@ -15,12 +15,9 @@ class HKernel:
|
|||
np.int64 : 'long',
|
||||
np.uint64 : 'ulong',
|
||||
np.float16 : 'half',
|
||||
np.float32 : 'float',
|
||||
np.float64 : 'double'
|
||||
np.float32 : 'float'
|
||||
}
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def np_dtype_to_cl(dtype : np.dtype):
|
||||
"""
|
||||
|
@ -134,30 +131,33 @@ class HKernel:
|
|||
out += [f'#define {name_upper}_GLOBAL_STORE8(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
|
||||
out += [f'#define {name_upper}_GLOBAL_STORE16(offset,value) {name_upper}_PTR_NAME[(offset)] = (value)']
|
||||
|
||||
if dtype in [np.float32, np.float64]:
|
||||
if dtype in [np.float32]:
|
||||
out += [f'#define {name_upper}_TO_FLOATX(x) x']
|
||||
elif dtype in [np.bool_, np.int8, np.uint8, np.int16, np.uint16, np.int32,np.uint32, np.float16]:
|
||||
out += [f'#define {name_upper}_TO_FLOATX(x) ((float)x)']
|
||||
elif dtype in [np.int64,np.uint64]:
|
||||
out += [f'#define {name_upper}_TO_FLOATX(x) ((double)x)']
|
||||
return '\n'.join(out)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def define_ndim_idx(ndim):
|
||||
"""
|
||||
define macro to calculate index for n-dim shape
|
||||
|
||||
example for ndim=3
|
||||
|
||||
#define NDIM3_IDX(t0,t1,t2,T0,T1,T2) (((size_t)(t0))*T1*T2+((size_t)(t1))*T2+((size_t)(t2)))
|
||||
#define NDIM3_IDX_MOD(t0,t1,t2,T0,T1,T2) (((size_t)(t0) % T0)*T1*T2+((size_t)(t1) % T1)*T2+((size_t)(t2) % T2))
|
||||
"""
|
||||
|
||||
|
||||
out = [f'#define NDIM{ndim}_IDX(' + \
|
||||
','.join([f't{i}' for i in range(ndim)] + [f'T{i}' for i in range(ndim)]) + \
|
||||
') (' + '+'.join([f'((size_t)(t{i}))' + ''.join(f'*T{j}' for j in range(i+1,ndim)) for i in range(ndim) ]) + ')']
|
||||
|
||||
|
||||
out +=[f'#define NDIM{ndim}_IDX_MOD(' + \
|
||||
','.join([f't{i}' for i in range(ndim)] + [f'T{i}' for i in range(ndim)]) + \
|
||||
') (' + '+'.join([f'((size_t)(t{i}) % T{i})' + ''.join(f'*T{j}' for j in range(i+1,ndim)) for i in range(ndim) ]) + ')']
|
||||
|
||||
|
||||
return '\n'.join(out)
|
||||
|
||||
@staticmethod
|
||||
|
@ -165,14 +165,14 @@ class HKernel:
|
|||
"""
|
||||
Returns a definitions for operations with tensor shape
|
||||
|
||||
example for 'O', (7,3),
|
||||
example for 'O', (2,3),
|
||||
|
||||
#define O0 7
|
||||
#define O0 2
|
||||
#define O1 3
|
||||
#define Om1 3
|
||||
#define Om2 7
|
||||
#define O_IDX(o0,o1) ( (size_t)(o0) )*3 +( o1 )
|
||||
#define O_IDX_MOD(o0,o1) ( (size_t)(o0) % 7 )*3 +( (o1) % 3 )
|
||||
#define Om2 2
|
||||
#define O_IDX(o0,o1) (((size_t)(o0))*3+((size_t)(o1)))
|
||||
#define O_IDX_MOD(o0,o1) (((size_t)(o0) % 2)*3+((size_t)(o1) % 3))
|
||||
"""
|
||||
shape = tuple(shape)
|
||||
ndim = len(shape)
|
||||
|
@ -183,36 +183,14 @@ class HKernel:
|
|||
axes_symbols = "".join([str(i) for i in range(ndim)])
|
||||
axes_symbols = axes_symbols.upper()
|
||||
|
||||
out = []
|
||||
for i in range(ndim):
|
||||
out += [f'#define {name_upper}{axes_symbols[i]} {shape[i]}']
|
||||
out = [f'#define {name_upper}{axes_symbols[i]} {shape[i]}' for i in range(ndim)]
|
||||
out += [f'#define {name_upper}m{i} {shape[-i]}' for i in range(1,ndim+1)]
|
||||
|
||||
for i in range(1,ndim+1):
|
||||
out += [f'#define {name_upper}m{i} {shape[-i]}']
|
||||
out += [f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) (' + \
|
||||
'+'.join([f'((size_t)({name_lower}{i}))' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
|
||||
|
||||
line = f'#define {name_upper}_IDX({HKernel.axes_seq_enum(name, ndim)}) '
|
||||
|
||||
for i in range(ndim):
|
||||
line += f'( (size_t)({name_lower}{i}) )'
|
||||
|
||||
for j in range(i+1,ndim):
|
||||
line += f'*{shape[j]} '
|
||||
if i != ndim-1:
|
||||
line += '+'
|
||||
|
||||
out += [line]
|
||||
|
||||
line = f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) '
|
||||
|
||||
for i in range(ndim):
|
||||
line += f'( (size_t)({name_lower}{i}) % {shape[i]} )'
|
||||
|
||||
for j in range(i+1,ndim):
|
||||
line += f'*{shape[j]} '
|
||||
if i != ndim-1:
|
||||
line += '+'
|
||||
|
||||
out += [line,'']
|
||||
out += [f'#define {name_upper}_IDX_MOD({HKernel.axes_seq_enum(name, ndim)}) (' + \
|
||||
'+'.join([f'((size_t)({name_lower}{i}) % {shape[i]})' + ''.join(f'*{shape[j]}' for j in range(i+1,ndim)) for i in range(ndim)]) + ')']
|
||||
|
||||
return '\n'.join(out)
|
||||
|
||||
|
|
|
@ -3,10 +3,10 @@ from typing import Iterable, List
|
|||
import numpy as np
|
||||
|
||||
scalar_types = [int, float, np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
|
||||
np.float16, np.float32, np.float64, np.bool_]
|
||||
np.float16, np.float32, np.bool_]
|
||||
|
||||
np_scalar_types = [np.uint8, np.int8, np.uint16, np.int16, np.uint32, np.int32, np.uint64, np.int64,
|
||||
np.float16, np.float32, np.float64, np.bool_]
|
||||
np.float16, np.float32, np.bool_]
|
||||
|
||||
_np_dtype_to_cl = {
|
||||
np.bool_ : 'bool',
|
||||
|
@ -20,7 +20,6 @@ _np_dtype_to_cl = {
|
|||
np.int64 : 'long',
|
||||
np.float16 : 'half',
|
||||
np.float32 : 'float',
|
||||
np.float64 : 'double',
|
||||
}
|
||||
|
||||
_np_dtype_weight = {
|
||||
|
@ -34,8 +33,7 @@ _np_dtype_weight = {
|
|||
np.uint64 : 8,
|
||||
np.int64 : 9,
|
||||
np.float16 : 10,
|
||||
np.float32 : 11,
|
||||
np.float64 : 12,
|
||||
np.float32 : 11
|
||||
}
|
||||
|
||||
class HType:
|
||||
|
|
|
@ -1,14 +1,13 @@
|
|||
import traceback
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .HType import HType
|
||||
from .NCore import NCore
|
||||
from .backend import get_device, get_default_device, set_default_device
|
||||
from .Tensor import Tensor
|
||||
from . import op
|
||||
from .initializer import InitRandomUniform, InitCoords2DArange
|
||||
from .backend import get_default_device, get_device, set_default_device
|
||||
from .HType import HType
|
||||
from .info import Conv2DInfo
|
||||
from .initializer import InitCoords2DArange, InitRandomUniform
|
||||
from .NCore import NCore
|
||||
from .Tensor import Tensor
|
||||
|
||||
|
||||
class NTest():
|
||||
|
||||
|
@ -45,6 +44,7 @@ class NTest():
|
|||
binary_dilate_circle_test,
|
||||
binary_morph_test,
|
||||
cvt_color_test,
|
||||
rct_test,
|
||||
]
|
||||
|
||||
for test_func in test_funcs:
|
||||
|
@ -62,18 +62,39 @@ class NTest():
|
|||
def _all_close(x,y, atol=1, btol=1):
|
||||
return np.allclose( np.ndarray.flatten(x[None,...]), np.ndarray.flatten(y[None,...]), atol, btol )
|
||||
|
||||
def rct_test():
|
||||
for _ in range(10):
|
||||
for dtype in [np.float16, np.float32]:
|
||||
base_shape = list(np.random.randint(1, 8, size=4) )
|
||||
shape = base_shape.copy()
|
||||
shape[1] = 3
|
||||
|
||||
mask_shape = base_shape.copy()
|
||||
mask_shape[1] = 3
|
||||
|
||||
print(f'rct {shape} {str(np.dtype(dtype).name)} ... ', end='', flush=True)
|
||||
|
||||
source_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
|
||||
target_t = Tensor(shape=shape, dtype=dtype, initializer=InitRandomUniform())
|
||||
mask_t = Tensor(shape=mask_shape, dtype=dtype, initializer=InitRandomUniform())
|
||||
|
||||
result_t = op.rct(target_t, source_t, target_mask_t=mask_t, source_mask_t=mask_t )
|
||||
|
||||
print('pass')
|
||||
|
||||
|
||||
def cvt_color_test():
|
||||
for _ in range(10):
|
||||
for shape_len in range(2,6):
|
||||
for in_mode in ['RGB','BGR','XYZ','LAB']:
|
||||
for out_mode in ['RGB','BGR','XYZ','LAB']:
|
||||
for dtype in [np.float16, np.float32, np.float64]:
|
||||
for dtype in [np.float16, np.float32]:
|
||||
shape = list(np.random.randint(1, 8, size=shape_len) )
|
||||
|
||||
ch_axis = np.random.randint(len(shape))
|
||||
shape[ch_axis] = 3
|
||||
|
||||
print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='')
|
||||
print(f'cvt_color {shape} {str(np.dtype(dtype).name)} {in_mode}->{out_mode} ... ', end='', flush=True)
|
||||
|
||||
inp_n = np.random.uniform(size=shape ).astype(dtype)
|
||||
inp_t = Tensor.from_value(inp_n)
|
||||
|
@ -81,7 +102,9 @@ def cvt_color_test():
|
|||
out_t = op.cvt_color(inp_t, in_mode=in_mode, out_mode=out_mode, ch_axis=ch_axis)
|
||||
inp_t2 = op.cvt_color(out_t, in_mode=out_mode, out_mode=in_mode, ch_axis=ch_axis)
|
||||
|
||||
if not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1):
|
||||
is_check = in_mode in ['RGB','BGR','XYZ'] and out_mode in ['XYZ','LAB']
|
||||
|
||||
if is_check and not _all_close(inp_t.np(), inp_t2.np(), atol=0.1, btol=0.1):
|
||||
raise Exception(f'data is not equal')
|
||||
|
||||
print('pass')
|
||||
|
@ -91,7 +114,7 @@ def cast_test():
|
|||
for out_dtype in HType.get_np_scalar_types():
|
||||
shape = tuple(np.random.randint(1, 8, size=( np.random.randint(1,5))) )
|
||||
|
||||
print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='')
|
||||
print(f'cast: {shape} in_dtype:{str(np.dtype(in_dtype).name)} out_dtype:{str(np.dtype(out_dtype).name)} ... ', end='', flush=True)
|
||||
|
||||
val_n = np.random.uniform( -64, 64, size=shape ).astype(in_dtype)
|
||||
cast_n = val_n.astype(out_dtype)
|
||||
|
@ -113,7 +136,7 @@ def binary_morph_test():
|
|||
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
||||
input_t = Tensor.from_value(input_n)
|
||||
|
||||
print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'binary_morph: {shape} erode_dilate:{erode_dilate} blur:{blur} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
op.binary_morph(input_t, erode_dilate=erode_dilate, blur=blur, fade_to_border=True)
|
||||
|
||||
|
@ -130,7 +153,7 @@ def binary_erode_circle_test():
|
|||
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
||||
input_t = Tensor.from_value(input_n)
|
||||
|
||||
print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'binary_erode_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
op.binary_erode_circle(input_t, radius=radius, iterations=iterations)
|
||||
|
||||
|
@ -147,7 +170,7 @@ def binary_dilate_circle_test():
|
|||
input_n = np.random.randint( 2, size=shape ).astype(dtype)
|
||||
input_t = Tensor.from_value(input_n)
|
||||
|
||||
print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'binary_dilate_circle: {shape} radius:{radius} iters:{iterations} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
op.binary_dilate_circle(input_t, radius=radius, iterations=iterations)
|
||||
|
||||
|
@ -156,11 +179,11 @@ def binary_dilate_circle_test():
|
|||
|
||||
def gaussian_blur_test():
|
||||
for shape_len in range(2,5):
|
||||
for dtype in [np.float16, np.float32, np.float64]:
|
||||
for dtype in [np.float16, np.float32]:
|
||||
|
||||
shape = np.random.randint( 1, 64, size=(shape_len,) )
|
||||
sigma = np.random.rand() * 10
|
||||
print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'gaussian_blur: {shape} sigma:{sigma} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||
val_t = Tensor.from_value(val_n)
|
||||
|
@ -179,7 +202,7 @@ def pad_test():
|
|||
|
||||
paddings = tuple( (np.random.randint(8), np.random.randint(8)) for i in range(len(shape)) )
|
||||
|
||||
print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'pad: {shape} {paddings} {mode} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||
pad_n = np.pad(val_n, paddings, mode=mode)
|
||||
|
@ -187,7 +210,7 @@ def pad_test():
|
|||
val_t = Tensor.from_value(val_n)
|
||||
pad_t = op.pad(val_t, paddings, mode=mode)
|
||||
|
||||
print(f'{pad_n.shape} == {pad_t.shape} ... ', end='')
|
||||
print(f'{pad_n.shape} == {pad_t.shape} ... ', end='', flush=True)
|
||||
|
||||
if pad_n.shape != pad_t.shape:
|
||||
raise Exception(f'shape is not equal')
|
||||
|
@ -241,7 +264,7 @@ def slice_set_test():
|
|||
shape = tuple(shape)
|
||||
slices = tuple(slices)
|
||||
|
||||
print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='')
|
||||
print(f'slice_set: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
|
||||
|
||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||
val_t = Tensor.from_value(val_n)
|
||||
|
@ -330,7 +353,7 @@ def depthwise_conv2d_test():
|
|||
input_shape = (n, ic, ih, iw)
|
||||
kernel_shape = (ic, ks, ks)
|
||||
|
||||
print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='')
|
||||
print(f'depthwise_conv2d: {input_shape},{kernel_shape},{padding},{stride},{dilation},{np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
input_n = np.random.randint( 64, size=input_shape ).astype(dtype)
|
||||
kernel_n = np.ones(shape=kernel_shape ).astype(dtype)
|
||||
|
@ -358,7 +381,7 @@ def warp_affine_test():
|
|||
H = np.random.randint(8, 64)
|
||||
W = np.random.randint(8, 64)
|
||||
|
||||
print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'warp_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
||||
|
||||
|
@ -380,7 +403,7 @@ def remap_np_affine_test():
|
|||
H = np.random.randint(8, 64)
|
||||
W = np.random.randint(8, 64)
|
||||
|
||||
print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'remap_np_affine: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
||||
|
||||
|
@ -402,7 +425,7 @@ def remap_test():
|
|||
H = np.random.randint(8, 64)
|
||||
W = np.random.randint(8, 64)
|
||||
|
||||
print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'remap: [{H},{W}] {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
input_t = Tensor ( [H,W,2], dtype, initializer=InitCoords2DArange(0, H-1, 0, W-1) ).sum( (-1,) )
|
||||
|
||||
|
@ -422,7 +445,7 @@ def tile_test():
|
|||
shape = tuple(np.random.randint( 8, size=(shape_len,) )+1)
|
||||
tiles = tuple(np.random.randint( 4, size=(shape_len,) )+1)
|
||||
|
||||
print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'tile: {shape} {tiles} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||
tiled_n = np.tile(val_n, tiles)
|
||||
|
@ -430,7 +453,7 @@ def tile_test():
|
|||
val_t = Tensor.from_value(val_n)
|
||||
tiled_t = op.tile(val_t, tiles)
|
||||
|
||||
print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='')
|
||||
print(f'{tiled_n.shape} == {tiled_t.shape} ... ', end='', flush=True)
|
||||
|
||||
if tiled_n.shape != tiled_t.shape:
|
||||
raise Exception(f'shape is not equal')
|
||||
|
@ -448,7 +471,7 @@ def stack_test():
|
|||
axis = np.random.randint(shape_len+1)
|
||||
stack_count = np.random.randint(4)+1
|
||||
|
||||
print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'stack: {shape}*{stack_count} axis:{axis} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
vals_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for i in range(stack_count) ]
|
||||
stack_n = np.stack(vals_n, axis)
|
||||
|
@ -456,7 +479,7 @@ def stack_test():
|
|||
vals_t = [ Tensor.from_value(vals_n[i]) for i in range(stack_count) ]
|
||||
stack_t = op.stack(vals_t, axis)
|
||||
|
||||
print(f'{stack_n.shape} == {stack_t.shape} ... ', end='')
|
||||
print(f'{stack_n.shape} == {stack_t.shape} ... ', end='', flush=True)
|
||||
|
||||
if stack_n.shape != stack_t.shape:
|
||||
raise Exception('shape is not equal')
|
||||
|
@ -483,9 +506,9 @@ def reduce_test():
|
|||
|
||||
keepdims = np.random.randint(2) == 0
|
||||
|
||||
print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='')
|
||||
print(f'reduce {op_type}: {shape} {np.dtype(dtype).name} axes={reduction_axes} keepdims={keepdims} ... ', end='', flush=True)
|
||||
|
||||
if dtype in [np.float16, np.float32, np.float64]:
|
||||
if dtype in [np.float16, np.float32]:
|
||||
value_n = np.random.uniform(size=shape).astype(dtype)
|
||||
else:
|
||||
value_n = np.random.randint( max(1, int(np.iinfo(dtype).max / np.prod(shape)) ), size=shape, dtype=dtype )
|
||||
|
@ -518,7 +541,7 @@ def InitRandomUniform_test():
|
|||
for shape_len in range(1, 5):
|
||||
shape = np.random.randint( 8, size=(shape_len,) )+1
|
||||
|
||||
print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'InitRandomUniform: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
Tensor(shape, dtype, initializer=InitRandomUniform()).np()
|
||||
|
||||
|
@ -534,7 +557,7 @@ def InitCoords2DArange_test():
|
|||
w_start = np.random.randint(80)
|
||||
w_stop = w_start + np.random.randint(80)
|
||||
|
||||
print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'InitCoords2DArange: {shape} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
Tensor(shape, dtype, initializer=InitCoords2DArange(h_start,h_stop,w_start,w_stop )).np()
|
||||
|
||||
|
@ -551,17 +574,17 @@ def concat_test():
|
|||
for i,dim in enumerate(shape) )
|
||||
for shape in ([shape] * count) )
|
||||
|
||||
print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'concat: {shapes} axis={axis} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
V_n = [ np.random.randint( 2**8, size=shape ).astype(dtype) for shape in shapes ]
|
||||
O_n = np.concatenate(V_n, axis)
|
||||
|
||||
print(f'{O_n.shape} == ', end='')
|
||||
print(f'{O_n.shape} == ', end='', flush=True)
|
||||
|
||||
V_t = [ Tensor.from_value(V_n[i]) for i in range(count) ]
|
||||
O_t = op.concat(V_t, axis)
|
||||
|
||||
print(f'{O_t.shape} ... ', end='')
|
||||
print(f'{O_t.shape} ... ', end='', flush=True)
|
||||
|
||||
if O_n.shape != O_t.shape:
|
||||
raise Exception('shape is not equal')
|
||||
|
@ -596,19 +619,19 @@ def matmul_test():
|
|||
A_shape = (BATCH, M, K)
|
||||
B_shape = (BATCH, K, N)
|
||||
|
||||
print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='')
|
||||
print(f'matmul: {A_shape} {B_shape} {np.dtype(dtype).name} ... ', end='', flush=True)
|
||||
|
||||
A_n = np.random.randint( 2**4, size=A_shape ).astype(dtype)
|
||||
B_n = np.random.randint( 2**4, size=B_shape ).astype(dtype)
|
||||
|
||||
O_n = np.matmul(A_n, B_n)
|
||||
|
||||
print(f'{O_n.shape} == ', end='')
|
||||
print(f'{O_n.shape} == ', end='', flush=True)
|
||||
|
||||
A_t = Tensor.from_value(A_n)
|
||||
B_t = Tensor.from_value(B_n)
|
||||
O_t = op.matmul(A_t, B_t)
|
||||
print(f'{O_t.shape} ... ', end='')
|
||||
print(f'{O_t.shape} ... ', end='', flush=True)
|
||||
|
||||
if O_n.shape != O_t.shape:
|
||||
raise Exception('shape is not equal')
|
||||
|
@ -659,17 +682,17 @@ def slice_test():
|
|||
shape = tuple(shape)
|
||||
slices = tuple(slices)
|
||||
|
||||
print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='')
|
||||
print(f'slice: {shape} {np.dtype(dtype).name} {slices} ... ', end='', flush=True)
|
||||
|
||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||
|
||||
sliced_n = val_n[slices]
|
||||
|
||||
print(f'{sliced_n.shape} ... ', end='')
|
||||
print(f'{sliced_n.shape} ... ', end='', flush=True)
|
||||
|
||||
sliced_t = Tensor.from_value(val_n)[slices]
|
||||
|
||||
print(f'{sliced_t.shape} ... ', end='')
|
||||
print(f'{sliced_t.shape} ... ', end='', flush=True)
|
||||
|
||||
if 0 in sliced_n.shape:
|
||||
# some cases like 0:1:-1 will produce zero shape and invalid array on numpy
|
||||
|
@ -694,17 +717,17 @@ def transpose_test():
|
|||
axes_order = np.array([*range(shape_len)])
|
||||
np.random.shuffle(axes_order)
|
||||
|
||||
print(f'transpose: {shape} {axes_order} ... ', end='')
|
||||
print(f'transpose: {shape} {axes_order} ... ', end='', flush=True)
|
||||
|
||||
val_n = np.random.randint( 2**8, size=shape ).astype(dtype)
|
||||
transposed_n = np.transpose(val_n, axes_order)
|
||||
|
||||
print(f'{transposed_n.shape} ... ', end='')
|
||||
print(f'{transposed_n.shape} ... ', end='', flush=True)
|
||||
|
||||
val_t = Tensor.from_value(val_n)
|
||||
transposed_t = op.transpose (val_t, axes_order )
|
||||
|
||||
print(f'{transposed_t.shape} ... ', end='')
|
||||
print(f'{transposed_t.shape} ... ', end='', flush=True)
|
||||
|
||||
if transposed_n.shape != transposed_t.shape:
|
||||
raise Exception('shape is not equal')
|
||||
|
@ -736,7 +759,7 @@ def any_wise_op_test():
|
|||
shapes = shapes[::-1]
|
||||
a_shape, b_shape = shapes
|
||||
|
||||
print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='')
|
||||
print(f'any_wise: {a_shape} {str(op_type)} {b_shape}:{str(np.dtype(dtype).name)} ...', end='', flush=True)
|
||||
|
||||
a_n = np.random.randint( 1, 2**8, size=a_shape ).astype(dtype)
|
||||
b_n = np.random.randint( 1, 2**8, size=b_shape ).astype(dtype)
|
||||
|
|
|
@ -109,6 +109,7 @@ class Tensor:
|
|||
def min(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||
def reshape(self, new_shape) -> 'Tensor': ...
|
||||
def sum(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||
def std(self, axes=None, keepdims=False) -> 'Tensor': ...
|
||||
def transpose(self, axes_order, op_text=None, dtype=None) -> 'Tensor': ...
|
||||
|
||||
@property
|
||||
|
|
|
@ -70,6 +70,7 @@ Tensor.mean = reduce_mean
|
|||
Tensor.min = reduce_min
|
||||
Tensor.reshape = reshape
|
||||
Tensor.sum = reduce_sum
|
||||
Tensor.std = reduce_std
|
||||
Tensor.transpose = transpose
|
||||
|
||||
class TensorRef(Tensor):
|
||||
|
|
|
@ -18,8 +18,7 @@ _np_dtype_to_cl = { np.uint8: CL.cl_uchar,
|
|||
np.uint64: CL.cl_ulong,
|
||||
np.int64: CL.cl_long,
|
||||
np.float16: CL.cl_half,
|
||||
np.float32: CL.cl_float,
|
||||
np.float64: CL.cl_double }
|
||||
np.float32: CL.cl_float}
|
||||
|
||||
_opencl_device_ids = None
|
||||
_default_device = None
|
||||
|
|
|
@ -38,8 +38,6 @@ class InitRandomUniform(Initializer):
|
|||
gen_expression = f'hash_ulong_from_ulong(gid+seed64) % {int(hl)} + {int(l)}'
|
||||
elif tensor.dtype in [np.float16, np.float32]:
|
||||
gen_expression = f'hash_float_from_uint(gid+seed32)*{hl} + {l}'
|
||||
elif tensor.dtype in [np.float64]:
|
||||
gen_expression = f'hash_double_from_ulong(gid+seed64)*{hl} + {l}'
|
||||
|
||||
kernel = Kernel(kernel_text=f"""
|
||||
{HKernel.include_hash()}
|
||||
|
|
|
@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D
|
|||
from .gaussian_blur import gaussian_blur
|
||||
from .matmul import matmul, matmulc
|
||||
from .pad import pad
|
||||
from .rct import rct
|
||||
from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
|
||||
reduce_sum, reduce_variance)
|
||||
from .remap import remap
|
||||
from .remap_np_affine import remap_np_affine
|
||||
from .reshape import reshape
|
||||
from .slice_ import slice_
|
||||
from .slice_ import slice_, split
|
||||
from .slice_set import slice_set
|
||||
from .stack import stack
|
||||
from .tile import tile
|
||||
|
|
|
@ -1,27 +1,31 @@
|
|||
import numpy as np
|
||||
|
||||
from ..AAxes import AAxes
|
||||
from ..AShape import AShape
|
||||
from ..backend import Kernel
|
||||
from ..HArgs import HArgs
|
||||
from ..HKernel import HKernel
|
||||
from ..HType import HType
|
||||
from ..info import BroadcastInfo
|
||||
from ..info import BroadcastInfo, ReductionInfo
|
||||
from ..SCacheton import SCacheton
|
||||
from ..Tensor import Tensor
|
||||
|
||||
|
||||
def any_wise(op_text : str,
|
||||
*args,
|
||||
dim_wise_axis : int = None,
|
||||
dtype : np.dtype = None,
|
||||
output_t:Tensor=None) -> Tensor:
|
||||
"""
|
||||
operator for N-wise ops with N inputs
|
||||
elements-wise operator with N inputs
|
||||
|
||||
arguments
|
||||
op_text example: O=(2*I0*I1)+I2
|
||||
|
||||
*args List[ Tensor | number ]
|
||||
|
||||
dim_wise_axis(None)
|
||||
|
||||
dtype
|
||||
|
||||
output_t compute result to this Tensor.
|
||||
|
@ -33,7 +37,7 @@ def any_wise(op_text : str,
|
|||
|
||||
shape_list, dtype_list, krn_args = HArgs.decompose(args)
|
||||
|
||||
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text)
|
||||
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text)
|
||||
|
||||
if output_t is None:
|
||||
output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
|
||||
|
@ -45,59 +49,60 @@ def any_wise(op_text : str,
|
|||
return output_t
|
||||
|
||||
class _AnyWiseOp:
|
||||
def __init__(self, shape_list, dtype_list, o_dtype, op_text : str):
|
||||
def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str):
|
||||
if len(shape_list) != len(dtype_list):
|
||||
raise ValueError('len(shape_list) != len(dtype_list)')
|
||||
|
||||
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
|
||||
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
|
||||
self.o_shape = o_shape = info.o_shape
|
||||
|
||||
if len(shape_list) == 1:
|
||||
# element-wise.
|
||||
i_shape, i_dtype = shape_list[0], dtype_list[0]
|
||||
self.o_shape = o_shape = i_shape
|
||||
g_shape = o_shape
|
||||
if dim_wise_axis is not None:
|
||||
dim_wise_axis = o_shape.check_axis(dim_wise_axis)
|
||||
|
||||
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
||||
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||
{HKernel.define_tensor('IN', i_shape, i_dtype)}
|
||||
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME)
|
||||
{{
|
||||
size_t gid = get_global_id(0);
|
||||
dim_wise_axis_size = o_shape[dim_wise_axis]
|
||||
if dim_wise_axis_size > 16:
|
||||
raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}')
|
||||
|
||||
O_TYPE O = O_GLOBAL_LOAD(gid);
|
||||
IN_TYPE I0 = IN_GLOBAL_LOAD(gid);
|
||||
{op_text};
|
||||
O_GLOBAL_STORE(gid, O);
|
||||
}}
|
||||
""")
|
||||
else:
|
||||
# Multi arg.
|
||||
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
|
||||
g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape
|
||||
|
||||
self.o_shape = o_shape = info.o_shape
|
||||
defs, arg_defs, impls = [], [], []
|
||||
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
|
||||
t_name = f'I{i}'
|
||||
if t_shape is not None:
|
||||
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
|
||||
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
|
||||
|
||||
defs, arg_defs, impls = [], [], []
|
||||
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
|
||||
t_name = f'I{i}'
|
||||
if t_shape is not None:
|
||||
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
|
||||
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
|
||||
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));")
|
||||
if dim_wise_axis is not None:
|
||||
for i_elem in range(dim_wise_axis_size):
|
||||
impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));")
|
||||
else:
|
||||
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
|
||||
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));")
|
||||
else:
|
||||
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
|
||||
|
||||
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
|
||||
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
|
||||
|
||||
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
||||
if dim_wise_axis is not None:
|
||||
o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) )
|
||||
o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) )
|
||||
else:
|
||||
o_def = 'O_TYPE O;'
|
||||
o_store = 'O_GLOBAL_STORE(gid, O);'
|
||||
|
||||
self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f"""
|
||||
{defs}
|
||||
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||
{HKernel.define_tensor_shape('G', g_shape)}
|
||||
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
|
||||
{{
|
||||
size_t gid = get_global_id(0);
|
||||
{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)}
|
||||
{HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)}
|
||||
{impls}
|
||||
O_TYPE O;
|
||||
{o_def}
|
||||
{op_text};
|
||||
O_GLOBAL_STORE(gid, O);
|
||||
{o_store}
|
||||
}}
|
||||
""")
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype
|
|||
return output_t
|
||||
|
||||
_allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
|
||||
_allowed_dtypes = [np.float16, np.float32, np.float64]
|
||||
_allowed_dtypes = [np.float16, np.float32]
|
||||
|
||||
class _CvtColor32Op():
|
||||
def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
|
||||
|
@ -100,54 +100,74 @@ class _CvtColor32Op():
|
|||
self.forward_krn = krn
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str:
|
||||
def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')}
|
||||
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)}
|
||||
{_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)}
|
||||
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)}
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str:
|
||||
def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')}
|
||||
{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)}
|
||||
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)}
|
||||
{_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)}
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str:
|
||||
def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B}));
|
||||
{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B}));
|
||||
{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B}));
|
||||
"""
|
||||
@staticmethod
|
||||
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str:
|
||||
return f"""
|
||||
{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z}));
|
||||
{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y}, 0.0415560*{Z}));
|
||||
{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y}, 1.0572252*{Z}));
|
||||
{R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92);
|
||||
{G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92);
|
||||
{B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92);
|
||||
|
||||
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) }
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str:
|
||||
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{bgr_type} {b} = {R};
|
||||
{bgr_type} {g} = {G};
|
||||
{bgr_type} {r} = {B};
|
||||
{'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423;
|
||||
{'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169;
|
||||
{'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227;
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str:
|
||||
def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{rgb_type} {r} = {B};
|
||||
{rgb_type} {g} = {G};
|
||||
{rgb_type} {b} = {R};
|
||||
{_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) }
|
||||
{R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92);
|
||||
{G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92);
|
||||
{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92);
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str:
|
||||
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715 + {Z}*-0.498535, 0.0, 1.0 );
|
||||
{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 );
|
||||
{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 );
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{'float' if declare_out_type else ''} {b} = {R};
|
||||
{'float' if declare_out_type else ''} {g} = {G};
|
||||
{'float' if declare_out_type else ''} {r} = {B};
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{'float' if declare_out_type else ''} {r} = {B};
|
||||
{'float' if declare_out_type else ''} {g} = {G};
|
||||
{'float' if declare_out_type else ''} {b} = {R};
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str:
|
||||
beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
|
||||
xyz_xn = '(0.9556)'
|
||||
xyz_xn = '(0.950456)'
|
||||
xyz_zn = '(1.088754)'
|
||||
return f"""
|
||||
{X} /= {xyz_xn};
|
||||
|
@ -157,20 +177,20 @@ class _CvtColor32Op():
|
|||
{Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
|
||||
{Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);
|
||||
|
||||
{lab_type} {L} = 116.0*{Y}-16.0;
|
||||
{lab_type} {A} = 500.0*({X}-{Y});
|
||||
{lab_type} {B} = 200.0*({Y}-{Z});
|
||||
{'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0;
|
||||
{'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y});
|
||||
{'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z});
|
||||
"""
|
||||
@staticmethod
|
||||
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str:
|
||||
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str:
|
||||
beta = '(6.0/29.0)'
|
||||
beta2 = '((6.0/29.0)*(6.0/29.0))'
|
||||
xyz_xn = '(0.9556)'
|
||||
xyz_xn = '(0.950456)'
|
||||
xyz_zn = '(1.088754)'
|
||||
return f"""
|
||||
{xyz_type} {Y} = ({L} + 16.0) / 116.0;
|
||||
{xyz_type} {X} = {Y} + {A} / 500.0;
|
||||
{xyz_type} {Z} = {Y} - {B} / 200.0;
|
||||
{'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0;
|
||||
{'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0;
|
||||
{'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0;
|
||||
|
||||
{Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
|
||||
{X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};
|
||||
|
|
|
@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False):
|
|||
mean = reduce_mean(input_t, axes, keepdims=True)
|
||||
return reduce_mean(square(input_t - mean), axes, keepdims)
|
||||
|
||||
def moments(input_t, axes=None, keepdims=False):
|
||||
def moments(input_t, axes=None):
|
||||
"""
|
||||
Returns (mean, variance) of input_t
|
||||
|
||||
|
@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False):
|
|||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
mean = reduce_mean(input_t, axes, keepdims)
|
||||
mean_shape_keepdims = mean._op.info.o_shape_kd
|
||||
var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
|
||||
mean = reduce_mean(input_t, axes, True)
|
||||
var = reduce_mean(square(input_t - mean), axes, True)
|
||||
return mean, var
|
||||
|
||||
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..AShape import AShape
|
||||
from ..AAxes import AAxes
|
||||
from ..backend import Kernel
|
||||
from ..HKernel import HKernel
|
||||
from ..HType import HType
|
||||
|
@ -9,6 +12,29 @@ from ..SCacheton import SCacheton
|
|||
from ..Tensor import Tensor
|
||||
|
||||
|
||||
def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]:
|
||||
"""
|
||||
|
||||
arguments
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axis
|
||||
|
||||
"""
|
||||
shape = input_t.shape
|
||||
|
||||
result = []
|
||||
for i in range(shape[axis]):
|
||||
slices = [slice(None, None, None)]*shape.ndim
|
||||
|
||||
slices[axis] = i if not keepdims else slice(i,i+1,1)
|
||||
|
||||
result.append( slice_(input_t, slices) )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
"""
|
||||
arguments:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue