mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-19 04:59:28 -07:00
update xlib.avecl
This commit is contained in:
parent
7aef4d2b1e
commit
acbe4957e6
3 changed files with 16 additions and 14 deletions
|
@ -82,9 +82,9 @@ class Tensor:
|
|||
self.get_buffer().set(value)
|
||||
return self
|
||||
|
||||
def np(self):
|
||||
def np(self, out=None):
|
||||
"""Returns numpy value of a Tensor"""
|
||||
return self.get_buffer().np(self.shape, self.dtype)
|
||||
return self.get_buffer().np(self.shape, self.dtype, out=out)
|
||||
|
||||
|
||||
### OPERATORS
|
||||
|
|
|
@ -85,20 +85,21 @@ class Buffer:
|
|||
CL.clWaitForEvents(1, ( CL.cl_event * 1 )(ev) )
|
||||
CL.clReleaseEvent(ev)
|
||||
|
||||
def np(self, shape : Iterable, dtype : np.dtype):
|
||||
def np(self, shape : Iterable, dtype : np.dtype, out=None):
|
||||
"""
|
||||
Returns data of buffer as np.ndarray with specified shape and dtype
|
||||
"""
|
||||
out_np_value = np.empty (shape, dtype)
|
||||
if out is None:
|
||||
out = np.empty (shape, dtype)
|
||||
|
||||
if out_np_value.nbytes != self._size:
|
||||
if out.nbytes != self._size:
|
||||
raise ValueError(f'Unable to represent Buffer with size {self._size} as shape {shape} with dtype {dtype}')
|
||||
|
||||
clr = CL.clEnqueueReadBuffer(self._device._get_ctx_q(), self.get_cl_mem(), True, 0, self._size, out_np_value.ctypes.data, 0, None, None)
|
||||
clr = CL.clEnqueueReadBuffer(self._device._get_ctx_q(), self.get_cl_mem(), True, 0, self._size, out.ctypes.data, 0, None, None)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clEnqueueReadBuffer error: {clr}')
|
||||
|
||||
return out_np_value
|
||||
return out
|
||||
|
||||
def __str__(self):
|
||||
return f'Buffer [{self._size} bytes][{f"{self._cl_mem.value}" if self._cl_mem is not None else "unallocated"}] on {str(self._device)}'
|
||||
|
|
|
@ -86,7 +86,6 @@ class Device:
|
|||
"""
|
||||
|
||||
compiled_krn, prog = self._cached_kernels.get(key, (None, None) )
|
||||
|
||||
if compiled_krn is None:
|
||||
clr = CL.CLRESULT()
|
||||
prog = CL.clCreateProgramWithSource(self._get_ctx(), 1, CL.c_char_p(kernel_text.encode()), None, clr )
|
||||
|
@ -343,8 +342,6 @@ N of cacheddata: {len(self._cached_data)}
|
|||
|
||||
wait(False) wait execution to complete
|
||||
"""
|
||||
ckernel = self._compile_kernel(kernel, kernel.get_kernel_text())
|
||||
|
||||
if global_shape is None:
|
||||
global_shape = kernel.get_global_shape()
|
||||
if global_shape is None:
|
||||
|
@ -376,8 +373,8 @@ N of cacheddata: {len(self._cached_data)}
|
|||
for i,v in enumerate(local_shape):
|
||||
global_shape_offsets_ar[i] = v
|
||||
|
||||
krn_args = []
|
||||
for i, arg in enumerate(args):
|
||||
|
||||
if isinstance(arg, Buffer):
|
||||
arg = arg.get_cl_mem()
|
||||
else:
|
||||
|
@ -385,7 +382,11 @@ N of cacheddata: {len(self._cached_data)}
|
|||
if cl_type is None:
|
||||
raise ValueError(f'Cannot convert type {arg.__class__} to OpenCL type.')
|
||||
arg = cl_type(arg)
|
||||
krn_args.append(arg)
|
||||
|
||||
ckernel = self._compile_kernel(kernel, kernel.get_kernel_text())
|
||||
|
||||
for i, arg in enumerate(krn_args):
|
||||
clr = CL.clSetKernelArg(ckernel, i, CL.sizeof(arg), CL.byref(arg))
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clSetKernelArg error: {clr}')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue