mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-14 10:47:00 -07:00
add xlib.avecl
This commit is contained in:
parent
932edfe875
commit
0058474da7
56 changed files with 5569 additions and 0 deletions
214
xlib/avecl/_internal/op/reduce.py
Normal file
214
xlib/avecl/_internal/op/reduce.py
Normal file
|
@ -0,0 +1,214 @@
|
|||
import math
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..AAxes import AAxes
|
||||
from ..AShape import AShape
|
||||
from ..backend import Kernel
|
||||
from ..HKernel import HKernel
|
||||
from ..info import ReductionInfo, TransposeInfo
|
||||
from ..SCacheton import SCacheton
|
||||
from ..Tensor import Tensor
|
||||
from .slice_ import slice_
|
||||
from .transpose import transpose
|
||||
from .any_wise import square, sqrt
|
||||
|
||||
|
||||
def reduce_mean (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
"""
|
||||
Reduce mean operator.
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axes(None) int
|
||||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
return reduce_op ('mean', input_t, axes=axes, keepdims=keepdims, output_t=output_t, is_add_to_output=is_add_to_output)
|
||||
|
||||
def reduce_std(input_t, axes=None, keepdims=False):
|
||||
"""
|
||||
Reduce std operator.
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axes(None) int
|
||||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
return sqrt(reduce_variance(input_t, axes, keepdims))
|
||||
|
||||
|
||||
def reduce_variance(input_t, axes=None, keepdims=False):
|
||||
"""
|
||||
Reduce variance operator.
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axes(None) int
|
||||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
mean = reduce_mean(input_t, axes, keepdims=True)
|
||||
return reduce_mean(square(input_t - mean), axes, keepdims)
|
||||
|
||||
def moments(input_t, axes=None, keepdims=False):
|
||||
"""
|
||||
Returns (mean, variance) of input_t
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axes(None) int
|
||||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
mean = reduce_mean(input_t, axes, keepdims)
|
||||
mean_shape_keepdims = mean._op.info.o_shape_kd
|
||||
var = reduce_mean(square(input_t - mean.reshape(mean_shape_keepdims) ), axes, keepdims)
|
||||
return mean, var
|
||||
|
||||
def reduce_min (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
"""
|
||||
Reduce min operator.
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axes(None) int
|
||||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
return reduce_op ('min', input_t, axes=axes, keepdims=keepdims, output_t=output_t, is_add_to_output=is_add_to_output)
|
||||
|
||||
def reduce_max (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
"""
|
||||
Reduce max operator.
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axes(None) int
|
||||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
return reduce_op ('max', input_t, axes=axes, keepdims=keepdims, output_t=output_t, is_add_to_output=is_add_to_output)
|
||||
|
||||
def reduce_sum (input_t : Tensor, axes=None, keepdims=False, output_t=None, is_add_to_output=False) -> Tensor:
|
||||
"""
|
||||
Reduce sum operator.
|
||||
|
||||
input_t Tensor
|
||||
|
||||
axes(None) int
|
||||
Iterable of ints.
|
||||
None - all axes
|
||||
|
||||
keepdims(False) keep reduced axes
|
||||
"""
|
||||
return reduce_op ('sum', input_t, axes=axes, keepdims=keepdims, output_t=output_t, is_add_to_output=is_add_to_output)
|
||||
|
||||
def reduce_op (op_type : str, input_t, axes=None, keepdims=False, output_t=None, is_add_to_output=False):
|
||||
"""
|
||||
arguments
|
||||
|
||||
op_type 'sum' 'mean' 'min' 'max'
|
||||
|
||||
output_t compute result to this Tensor.
|
||||
Tensor may be with different shape,
|
||||
but should match total size.
|
||||
gradfn will not be set.
|
||||
|
||||
is_add_to_output add result to output_t if output_t is set.
|
||||
"""
|
||||
|
||||
op = SCacheton.get(_ReduceOp, op_type, input_t.shape, input_t.dtype, AAxes(axes, input_t.shape.ndim), keepdims)
|
||||
|
||||
if output_t is None:
|
||||
output_t = Tensor ( op.info.o_shape, input_t.dtype, device=input_t.get_device() )
|
||||
elif output_t.shape.size != op.info.o_shape.size:
|
||||
raise ValueError(f'output_t must have size {op.info.o_shape.size}')
|
||||
|
||||
# Make an intermediate tensor
|
||||
input_t_inter = transpose(input_t, op.intermediate_transpose_axes)
|
||||
|
||||
# Perform multistage inplace operation in intermediate tensor
|
||||
for stage, (shape, STAGE_COLS, STAGE_VALID_COLS) in enumerate(zip(op.forward_krn_shapes, op.forward_krn_stage_cols, op.forward_krn_stage_valid_cols)):
|
||||
input_t_inter.get_device().run_kernel(op.forward_krn, input_t_inter.get_buffer(), np.int64(op.COLS), np.int64(STAGE_COLS), np.int64(STAGE_VALID_COLS),
|
||||
global_shape=shape)
|
||||
|
||||
if op_type == 'mean':
|
||||
# divide values in ROWS by number of COLS
|
||||
input_t_inter.get_device().run_kernel(op.mean_div_forward_krn, input_t_inter.get_buffer(), np.int64(op.COLS), global_shape=(op.ROWS,) )
|
||||
|
||||
# Fetch final tensor from zero indexes using slices argument
|
||||
slice_(input_t_inter, op.inter_slices, output_t=output_t, is_add_to_output=is_add_to_output)
|
||||
|
||||
return output_t
|
||||
|
||||
|
||||
class _ReduceOp:
|
||||
def __init__(self, op_type, i_shape : AShape, i_dtype : np.dtype, axes : AAxes, keepdims=False):
|
||||
self.op_type = op_type
|
||||
self.info = info = ReductionInfo(i_shape, axes, keepdims)
|
||||
|
||||
# Determine transpose order for intermediate tensor, where reduction axes will be at the end
|
||||
self.intermediate_transpose_axes = info.o_axes + info.reduction_axes
|
||||
self.intermediate_shape = TransposeInfo(i_shape, self.intermediate_transpose_axes).o_shape
|
||||
|
||||
# slices argument to fetch processed tensor from zero indexes
|
||||
self.inter_slices = ( slice(None,None,None), ) * info.o_axes.ndim + (0,) * info.reduction_axes.ndim
|
||||
|
||||
# COLS are reduction axes, ROWS are remaining axes
|
||||
rows_ndim = info.o_axes.ndim
|
||||
self.ROWS = ROWS = self.intermediate_shape[:rows_ndim].size
|
||||
self.COLS = COLS = self.intermediate_shape[rows_ndim:].size
|
||||
|
||||
# Number of stages to operate COLS
|
||||
n_stages = (COLS-1).bit_length()
|
||||
self.forward_krn_shapes = [ (ROWS * math.ceil(COLS/ (2**(stage+1)) ),) for stage in range(n_stages) ]
|
||||
self.forward_krn_stage_cols = [ math.ceil(COLS / (2**(stage+1)) ) for stage in range(n_stages) ]
|
||||
self.forward_krn_stage_valid_cols = [ math.ceil(COLS / (2** stage ) ) for stage in range(n_stages) ]
|
||||
|
||||
self.forward_krn = Kernel(f"""
|
||||
{HKernel.define_tensor('I', (1,), i_dtype)}
|
||||
|
||||
__kernel void impl(__global I_PTR_TYPE* I_PTR_NAME, long COLS, long STAGE_COLS, long STAGE_VALID_COLS)
|
||||
{{
|
||||
size_t gid = get_global_id(0);
|
||||
|
||||
size_t col = gid % STAGE_COLS;
|
||||
size_t row = gid / STAGE_COLS;
|
||||
size_t i_idx = row*COLS + col;
|
||||
|
||||
size_t other_col = col + STAGE_COLS;
|
||||
if (other_col < STAGE_VALID_COLS)
|
||||
{{
|
||||
I_TYPE val_a = I_GLOBAL_LOAD(i_idx);
|
||||
I_TYPE val_b = I_GLOBAL_LOAD(row*COLS + other_col);
|
||||
|
||||
{'I_TYPE val_x = val_a + val_b;' if op_type in ['sum','mean'] else
|
||||
'I_TYPE val_x = fmin( I_TO_FLOATX(val_a), I_TO_FLOATX(val_b) );' if op_type == 'min' else
|
||||
'I_TYPE val_x = fmax( I_TO_FLOATX(val_a), I_TO_FLOATX(val_b) );' if op_type == 'max' else ''
|
||||
}
|
||||
I_GLOBAL_STORE(i_idx, val_x);
|
||||
}}
|
||||
}}
|
||||
""")
|
||||
self.mean_div_forward_krn = Kernel(f"""
|
||||
{HKernel.define_tensor('I', (1,), i_dtype)}
|
||||
__kernel void impl(__global I_PTR_TYPE* I_PTR_NAME, long COLS)
|
||||
{{
|
||||
size_t row = get_global_id(0);
|
||||
I_GLOBAL_STORE(row*COLS, I_GLOBAL_LOAD(row*COLS) / COLS );
|
||||
}}
|
||||
""")
|
Loading…
Add table
Add a link
Reference in a new issue