mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-14 18:57:24 -07:00
update xlib.avecl
This commit is contained in:
parent
2d401f47f8
commit
6da916cc66
14 changed files with 246 additions and 184 deletions
|
@ -9,12 +9,13 @@ from .depthwise_conv2D import depthwise_conv2D
|
|||
from .gaussian_blur import gaussian_blur
|
||||
from .matmul import matmul, matmulc
|
||||
from .pad import pad
|
||||
from .rct import rct
|
||||
from .reduce import (moments, reduce_max, reduce_mean, reduce_min, reduce_std,
|
||||
reduce_sum, reduce_variance)
|
||||
from .remap import remap
|
||||
from .remap_np_affine import remap_np_affine
|
||||
from .reshape import reshape
|
||||
from .slice_ import slice_
|
||||
from .slice_ import slice_, split
|
||||
from .slice_set import slice_set
|
||||
from .stack import stack
|
||||
from .tile import tile
|
||||
|
|
|
@ -1,27 +1,31 @@
|
|||
import numpy as np
|
||||
|
||||
from ..AAxes import AAxes
|
||||
from ..AShape import AShape
|
||||
from ..backend import Kernel
|
||||
from ..HArgs import HArgs
|
||||
from ..HKernel import HKernel
|
||||
from ..HType import HType
|
||||
from ..info import BroadcastInfo
|
||||
from ..info import BroadcastInfo, ReductionInfo
|
||||
from ..SCacheton import SCacheton
|
||||
from ..Tensor import Tensor
|
||||
|
||||
|
||||
def any_wise(op_text : str,
|
||||
*args,
|
||||
dim_wise_axis : int = None,
|
||||
dtype : np.dtype = None,
|
||||
output_t:Tensor=None) -> Tensor:
|
||||
"""
|
||||
operator for N-wise ops with N inputs
|
||||
elements-wise operator with N inputs
|
||||
|
||||
arguments
|
||||
op_text example: O=(2*I0*I1)+I2
|
||||
|
||||
*args List[ Tensor | number ]
|
||||
|
||||
dim_wise_axis(None)
|
||||
|
||||
dtype
|
||||
|
||||
output_t compute result to this Tensor.
|
||||
|
@ -33,7 +37,7 @@ def any_wise(op_text : str,
|
|||
|
||||
shape_list, dtype_list, krn_args = HArgs.decompose(args)
|
||||
|
||||
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dtype, op_text)
|
||||
op = SCacheton.get(_AnyWiseOp, shape_list, dtype_list, dim_wise_axis, dtype, op_text)
|
||||
|
||||
if output_t is None:
|
||||
output_t = Tensor ( op.o_shape, op.o_dtype, device=device )
|
||||
|
@ -45,59 +49,60 @@ def any_wise(op_text : str,
|
|||
return output_t
|
||||
|
||||
class _AnyWiseOp:
|
||||
def __init__(self, shape_list, dtype_list, o_dtype, op_text : str):
|
||||
def __init__(self, shape_list, dtype_list, dim_wise_axis, o_dtype, op_text : str):
|
||||
if len(shape_list) != len(dtype_list):
|
||||
raise ValueError('len(shape_list) != len(dtype_list)')
|
||||
|
||||
self.o_dtype = o_dtype = o_dtype if o_dtype is not None else HType.get_most_weighted_dtype (dtype_list)
|
||||
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
|
||||
self.o_shape = o_shape = info.o_shape
|
||||
|
||||
if len(shape_list) == 1:
|
||||
# element-wise.
|
||||
i_shape, i_dtype = shape_list[0], dtype_list[0]
|
||||
self.o_shape = o_shape = i_shape
|
||||
g_shape = o_shape
|
||||
if dim_wise_axis is not None:
|
||||
dim_wise_axis = o_shape.check_axis(dim_wise_axis)
|
||||
|
||||
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
||||
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||
{HKernel.define_tensor('IN', i_shape, i_dtype)}
|
||||
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME, __global const IN_PTR_TYPE* IN_PTR_NAME)
|
||||
{{
|
||||
size_t gid = get_global_id(0);
|
||||
dim_wise_axis_size = o_shape[dim_wise_axis]
|
||||
if dim_wise_axis_size > 16:
|
||||
raise ValueError(f'dim_wise_axis size > 16: {dim_wise_axis_size}')
|
||||
|
||||
O_TYPE O = O_GLOBAL_LOAD(gid);
|
||||
IN_TYPE I0 = IN_GLOBAL_LOAD(gid);
|
||||
{op_text};
|
||||
O_GLOBAL_STORE(gid, O);
|
||||
}}
|
||||
""")
|
||||
else:
|
||||
# Multi arg.
|
||||
self.info = info = BroadcastInfo( [ shape if shape is not None else AShape((1,)) for shape in shape_list ])
|
||||
g_shape = ReductionInfo( o_shape, AAxes(dim_wise_axis), False ).o_shape
|
||||
|
||||
self.o_shape = o_shape = info.o_shape
|
||||
defs, arg_defs, impls = [], [], []
|
||||
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
|
||||
t_name = f'I{i}'
|
||||
if t_shape is not None:
|
||||
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
|
||||
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
|
||||
|
||||
defs, arg_defs, impls = [], [], []
|
||||
for i, (t_shape, t_dtype) in enumerate(zip(shape_list, dtype_list)):
|
||||
t_name = f'I{i}'
|
||||
if t_shape is not None:
|
||||
defs.append( HKernel.define_tensor(t_name, info.br_shapes[i], t_dtype) )
|
||||
arg_defs.append( f", __global const {t_name}_PTR_TYPE* {t_name}_PTR_NAME" )
|
||||
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('O', info.o_shape.ndim)}));")
|
||||
if dim_wise_axis is not None:
|
||||
for i_elem in range(dim_wise_axis_size):
|
||||
impls.append( f"{t_name}_TYPE {t_name}_{i_elem} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}));")
|
||||
else:
|
||||
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
|
||||
impls.append( f"{t_name}_TYPE {t_name} = {t_name}_GLOBAL_LOAD({t_name}_IDX_MOD({HKernel.axes_seq_enum('G', g_shape.ndim)}));")
|
||||
else:
|
||||
arg_defs.append( f", {HKernel.define_scalar_func_arg(t_name, t_dtype)}" )
|
||||
|
||||
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
|
||||
defs, arg_defs, impls = '\n'.join(defs), '\n'.join(arg_defs), '\n'.join(impls)
|
||||
|
||||
self.forward_krn = Kernel(global_shape=(o_shape.size,), kernel_text=f"""
|
||||
if dim_wise_axis is not None:
|
||||
o_def = '\n'.join( f"O_TYPE O_{i_elem};" for i_elem in range(dim_wise_axis_size) )
|
||||
o_store = '\n'.join( f"O_GLOBAL_STORE(O_IDX({HKernel.axes_seq_enum('G', g_shape.ndim, new_axis=(f'{i_elem}', dim_wise_axis) )}), O_{i_elem});" for i_elem in range(dim_wise_axis_size) )
|
||||
else:
|
||||
o_def = 'O_TYPE O;'
|
||||
o_store = 'O_GLOBAL_STORE(gid, O);'
|
||||
|
||||
self.forward_krn = Kernel(global_shape=(g_shape.size,), kernel_text=f"""
|
||||
{defs}
|
||||
{HKernel.define_tensor('O', o_shape, o_dtype)}
|
||||
{HKernel.define_tensor_shape('G', g_shape)}
|
||||
__kernel void impl(__global O_PTR_TYPE* O_PTR_NAME{arg_defs})
|
||||
{{
|
||||
size_t gid = get_global_id(0);
|
||||
{HKernel.decompose_idx_to_axes_idxs('gid', 'o', o_shape.ndim)}
|
||||
{HKernel.decompose_idx_to_axes_idxs('gid', 'G', g_shape.ndim)}
|
||||
{impls}
|
||||
O_TYPE O;
|
||||
{o_def}
|
||||
{op_text};
|
||||
O_GLOBAL_STORE(gid, O);
|
||||
{o_store}
|
||||
}}
|
||||
""")
|
||||
|
||||
|
|
|
@ -39,7 +39,7 @@ def cvt_color (input_t : Tensor, in_mode : str, out_mode : str, ch_axis=1, dtype
|
|||
return output_t
|
||||
|
||||
_allowed_modes = ['RGB', 'BGR', 'XYZ', 'LAB']
|
||||
_allowed_dtypes = [np.float16, np.float32, np.float64]
|
||||
_allowed_dtypes = [np.float16, np.float32]
|
||||
|
||||
class _CvtColor32Op():
|
||||
def __init__(self, i_shape : AShape, i_dtype, in_mode, o_dtype, out_mode, ch_axis):
|
||||
|
@ -100,54 +100,74 @@ class _CvtColor32Op():
|
|||
self.forward_krn = krn
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_LAB_body(R,G,B,L,a,b,lab_type='') -> str:
|
||||
def get_RGB_to_LAB_body(R,G,B,L,a,b, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,'X','Y','Z', xyz_type='float')}
|
||||
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, lab_type=lab_type)}
|
||||
{_CvtColor32Op.get_sRGB_to_XYZ_body(R,G,B,'X','Y','Z', declare_out_type=True)}
|
||||
{_CvtColor32Op.get_XYZ_to_LAB_body('X','Y','Z',L,a,b, declare_out_type=declare_out_type)}
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_LAB_to_RGB_body(L,a,b,R,G,B,rgb_type='') -> str:
|
||||
def get_LAB_to_RGB_body(L,a,b,R,G,B, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', xyz_type='float')}
|
||||
{_CvtColor32Op.get_XYZ_to_RGB_body('X','Y','Z',R,G,B,rgb_type=rgb_type)}
|
||||
{_CvtColor32Op.get_LAB_to_XYZ_body(L,a,b,'X','Y','Z', declare_out_type=True)}
|
||||
{_CvtColor32Op.get_XYZ_to_sRGB_body('X','Y','Z',R,G,B, declare_out_type=declare_out_type)}
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z,xyz_type='') -> str:
|
||||
def get_sRGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{xyz_type} {X} = fma(0.4124564, {R}, fma(0.3575761, {G}, 0.1804375*{B}));
|
||||
{xyz_type} {Y} = fma(0.2126729, {R}, fma(0.7151522, {G}, 0.0721750*{B}));
|
||||
{xyz_type} {Z} = fma(0.0193339, {R}, fma(0.1191920, {G}, 0.9503041*{B}));
|
||||
"""
|
||||
@staticmethod
|
||||
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B,rgb_type='') -> str:
|
||||
return f"""
|
||||
{rgb_type} {R} = fma( 3.2404542, {X}, fma(-1.5371385, {Y}, -0.4985314*{Z}));
|
||||
{rgb_type} {G} = fma(-0.9692660, {X}, fma( 1.8760108, {Y}, 0.0415560*{Z}));
|
||||
{rgb_type} {B} = fma( 0.0556434, {X}, fma(-0.2040259, {Y}, 1.0572252*{Z}));
|
||||
{R} = ({R} > 0.04045)*( pow( ({R}+0.055)/1.055, 2.4) ) + ({R} <= 0.04045)*({R} / 12.92);
|
||||
{G} = ({G} > 0.04045)*( pow( ({G}+0.055)/1.055, 2.4) ) + ({G} <= 0.04045)*({G} / 12.92);
|
||||
{B} = ({B} > 0.04045)*( pow( ({B}+0.055)/1.055, 2.4) ) + ({B} <= 0.04045)*({B} / 12.92);
|
||||
|
||||
{_CvtColor32Op.get_RGB_to_XYZ_body(R,G,B,X,Y,Z,declare_out_type=declare_out_type) }
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_BGR_body(R,G,B,b,g,r,bgr_type='') -> str:
|
||||
def get_RGB_to_XYZ_body(R,G,B,X,Y,Z, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{bgr_type} {b} = {R};
|
||||
{bgr_type} {g} = {G};
|
||||
{bgr_type} {r} = {B};
|
||||
{'float' if declare_out_type else ''} {X} = {R}*0.412453 + {G}*0.357580 + {B}*0.180423;
|
||||
{'float' if declare_out_type else ''} {Y} = {R}*0.212671 + {G}*0.715160 + {B}*0.072169;
|
||||
{'float' if declare_out_type else ''} {Z} = {R}*0.019334 + {G}*0.119193 + {B}*0.950227;
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_BGR_to_RGB_body(B,G,R,r,g,b,rgb_type='') -> str:
|
||||
def get_XYZ_to_sRGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{rgb_type} {r} = {B};
|
||||
{rgb_type} {g} = {G};
|
||||
{rgb_type} {b} = {R};
|
||||
{_CvtColor32Op.get_XYZ_to_RGB_body(X,Y,Z,R,G,B,declare_out_type=declare_out_type) }
|
||||
{R} = ({R} > 0.0031308)*( 1.055*pow({R},1.0/2.4)-0.055 ) + ({R} <= 0.0031308)*({R} * 12.92);
|
||||
{G} = ({G} > 0.0031308)*( 1.055*pow({G},1.0/2.4)-0.055 ) + ({G} <= 0.0031308)*({G} * 12.92);
|
||||
{B} = ({B} > 0.0031308)*( 1.055*pow({B},1.0/2.4)-0.055 ) + ({B} <= 0.0031308)*({B} * 12.92);
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B,lab_type='') -> str:
|
||||
def get_XYZ_to_RGB_body(X,Y,Z,R,G,B, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{'float' if declare_out_type else ''} {R} = clamp( {X}* 3.240479 + {Y}*-1.53715 + {Z}*-0.498535, 0.0, 1.0 );
|
||||
{'float' if declare_out_type else ''} {G} = clamp( {X}*-0.969256 + {Y}* 1.875991 + {Z}* 0.041556, 0.0, 1.0 );
|
||||
{'float' if declare_out_type else ''} {B} = clamp( {X}* 0.055648 + {Y}*-0.204043 + {Z}* 1.057311, 0.0, 1.0 );
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_RGB_to_BGR_body(R,G,B,b,g,r, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{'float' if declare_out_type else ''} {b} = {R};
|
||||
{'float' if declare_out_type else ''} {g} = {G};
|
||||
{'float' if declare_out_type else ''} {r} = {B};
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_BGR_to_RGB_body(B,G,R,r,g,b, declare_out_type=False) -> str:
|
||||
return f"""
|
||||
{'float' if declare_out_type else ''} {r} = {B};
|
||||
{'float' if declare_out_type else ''} {g} = {G};
|
||||
{'float' if declare_out_type else ''} {b} = {R};
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def get_XYZ_to_LAB_body(X,Y,Z,L,A,B, declare_out_type=False) -> str:
|
||||
beta3 = '((6.0/29.0)*(6.0/29.0)*(6.0/29.0))'
|
||||
xyz_xn = '(0.9556)'
|
||||
xyz_xn = '(0.950456)'
|
||||
xyz_zn = '(1.088754)'
|
||||
return f"""
|
||||
{X} /= {xyz_xn};
|
||||
|
@ -157,20 +177,20 @@ class _CvtColor32Op():
|
|||
{Y} = ({Y} > {beta3})*rootn({Y}, 3) + ({Y} <= {beta3})*(7.787*{Y}+4.0/29.0);
|
||||
{Z} = ({Z} > {beta3})*rootn({Z}, 3) + ({Z} <= {beta3})*(7.787*{Z}+4.0/29.0);
|
||||
|
||||
{lab_type} {L} = 116.0*{Y}-16.0;
|
||||
{lab_type} {A} = 500.0*({X}-{Y});
|
||||
{lab_type} {B} = 200.0*({Y}-{Z});
|
||||
{'float' if declare_out_type else ''} {L} = 116.0*{Y}-16.0;
|
||||
{'float' if declare_out_type else ''} {A} = 500.0*({X}-{Y});
|
||||
{'float' if declare_out_type else ''} {B} = 200.0*({Y}-{Z});
|
||||
"""
|
||||
@staticmethod
|
||||
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z,xyz_type='') -> str:
|
||||
def get_LAB_to_XYZ_body(L,A,B,X,Y,Z, declare_out_type=False) -> str:
|
||||
beta = '(6.0/29.0)'
|
||||
beta2 = '((6.0/29.0)*(6.0/29.0))'
|
||||
xyz_xn = '(0.9556)'
|
||||
xyz_xn = '(0.950456)'
|
||||
xyz_zn = '(1.088754)'
|
||||
return f"""
|
||||
{xyz_type} {Y} = ({L} + 16.0) / 116.0;
|
||||
{xyz_type} {X} = {Y} + {A} / 500.0;
|
||||
{xyz_type} {Z} = {Y} - {B} / 200.0;
|
||||
{'float' if declare_out_type else ''} {Y} = ({L} + 16.0) / 116.0;
|
||||
{'float' if declare_out_type else ''} {X} = {Y} + {A} / 500.0;
|
||||
{'float' if declare_out_type else ''} {Z} = {Y} - {B} / 200.0;
|
||||
|
||||
{Y} = ({Y} > {beta})*({Y}*{Y}*{Y}) + ({Y} <= {beta})*({Y}-16.0/116.0)*3*{beta2};
|
||||
{X} = ({X} > {beta})*({X}*{X}*{X}*{xyz_xn}) + ({X} <= {beta})*({X}-16.0/116.0)*3*{beta2}*{xyz_xn};
|
||||
|
|
|
@ -58,7 +58,7 @@ def reduce_variance(input_t, axes=None, keepdims=False):
|
|||
mean = reduce_mean(input_t, axes, keepdims=True)
|
||||
return reduce_mean(square(input_t - mean), axes, keepdims)
|
||||
|
||||
def moments(input_t, axes=None, keepdims=False):
|
||||
def moments(input_t, axes=None):
|
||||
"""
|
||||
Returns (mean, variance) of input_t
|
||||
|
||||
|
@ -68,11 +68,9 @@ def moments(input_t, axes=None, keepdims=False):
|
|||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
mean = reduce_mean(input_t, axes, keepdims)
|
||||
mean_shape_keepdims = mean._op.info.o_shape_kd
|
||||
var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
|
||||
mean = reduce_mean(input_t, axes, True)
|
||||
var = reduce_mean(square(input_t - mean), axes, True)
|
||||
return mean, var
|
||||
|
||||
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..AShape import AShape
|
||||
from ..AAxes import AAxes
|
||||
from ..backend import Kernel
|
||||
from ..HKernel import HKernel
|
||||
from ..HType import HType
|
||||
|
@ -9,6 +12,29 @@ from ..SCacheton import SCacheton
|
|||
from ..Tensor import Tensor
|
||||
|
||||
|
||||
def split(input_t : Tensor, axis, keepdims=False) -> List[Tensor]:
|
||||
"""
|
||||
|
||||
arguments
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axis
|
||||
|
||||
"""
|
||||
shape = input_t.shape
|
||||
|
||||
result = []
|
||||
for i in range(shape[axis]):
|
||||
slices = [slice(None, None, None)]*shape.ndim
|
||||
|
||||
slices[axis] = i if not keepdims else slice(i,i+1,1)
|
||||
|
||||
result.append( slice_(input_t, slices) )
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def slice_(input_t : Tensor, slices, dtype : np.dtype = None, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
"""
|
||||
arguments:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue