update xlib.avecl

This commit is contained in:
iperov 2021-10-22 19:31:35 +04:00
commit acbe4957e6
3 changed files with 16 additions and 14 deletions

View file

@ -82,9 +82,9 @@ class Tensor:
self.get_buffer().set(value) self.get_buffer().set(value)
return self return self
def np(self): def np(self, out=None):
"""Returns numpy value of a Tensor""" """Returns numpy value of a Tensor"""
return self.get_buffer().np(self.shape, self.dtype) return self.get_buffer().np(self.shape, self.dtype, out=out)
### OPERATORS ### OPERATORS

View file

@ -85,20 +85,21 @@ class Buffer:
CL.clWaitForEvents(1, ( CL.cl_event * 1 )(ev) ) CL.clWaitForEvents(1, ( CL.cl_event * 1 )(ev) )
CL.clReleaseEvent(ev) CL.clReleaseEvent(ev)
def np(self, shape : Iterable, dtype : np.dtype): def np(self, shape : Iterable, dtype : np.dtype, out=None):
""" """
Returns data of buffer as np.ndarray with specified shape and dtype Returns data of buffer as np.ndarray with specified shape and dtype
""" """
out_np_value = np.empty (shape, dtype) if out is None:
out = np.empty (shape, dtype)
if out_np_value.nbytes != self._size: if out.nbytes != self._size:
raise ValueError(f'Unable to represent Buffer with size {self._size} as shape {shape} with dtype {dtype}') raise ValueError(f'Unable to represent Buffer with size {self._size} as shape {shape} with dtype {dtype}')
clr = CL.clEnqueueReadBuffer(self._device._get_ctx_q(), self.get_cl_mem(), True, 0, self._size, out_np_value.ctypes.data, 0, None, None) clr = CL.clEnqueueReadBuffer(self._device._get_ctx_q(), self.get_cl_mem(), True, 0, self._size, out.ctypes.data, 0, None, None)
if clr != CL.CLERROR.SUCCESS: if clr != CL.CLERROR.SUCCESS:
raise Exception(f'clEnqueueReadBuffer error: {clr}') raise Exception(f'clEnqueueReadBuffer error: {clr}')
return out_np_value return out
def __str__(self): def __str__(self):
return f'Buffer [{self._size} bytes][{f"{self._cl_mem.value}" if self._cl_mem is not None else "unallocated"}] on {str(self._device)}' return f'Buffer [{self._size} bytes][{f"{self._cl_mem.value}" if self._cl_mem is not None else "unallocated"}] on {str(self._device)}'

View file

@ -86,7 +86,6 @@ class Device:
""" """
compiled_krn, prog = self._cached_kernels.get(key, (None, None) ) compiled_krn, prog = self._cached_kernels.get(key, (None, None) )
if compiled_krn is None: if compiled_krn is None:
clr = CL.CLRESULT() clr = CL.CLRESULT()
prog = CL.clCreateProgramWithSource(self._get_ctx(), 1, CL.c_char_p(kernel_text.encode()), None, clr ) prog = CL.clCreateProgramWithSource(self._get_ctx(), 1, CL.c_char_p(kernel_text.encode()), None, clr )
@ -209,7 +208,7 @@ class Device:
""" """
pool = self._pooled_buffers pool = self._pooled_buffers
mems = [ (k,x) for k in pool.keys() for x in pool[k] ] mems = [ (k,x) for k in pool.keys() for x in pool[k] ]
if len(mems) != 0: if len(mems) != 0:
mems = random.sample(mems, max(1,int(len(mems)*0.25)) ) mems = random.sample(mems, max(1,int(len(mems)*0.25)) )
for k, mem in mems: for k, mem in mems:
@ -241,7 +240,7 @@ class Device:
self._pooled_buffers = {} self._pooled_buffers = {}
self._total_memory_pooled = 0 self._total_memory_pooled = 0
self._total_buffers_pooled = 0 self._total_buffers_pooled = 0
def cleanup_cached_kernels(self): def cleanup_cached_kernels(self):
for kernel, prog in self._cached_kernels.values(): for kernel, prog in self._cached_kernels.values():
clr = CL.clReleaseKernel(kernel) clr = CL.clReleaseKernel(kernel)
@ -252,7 +251,7 @@ class Device:
if clr != CL.CLERROR.SUCCESS: if clr != CL.CLERROR.SUCCESS:
raise Exception(f'clReleaseProgram error: {clr}') raise Exception(f'clReleaseProgram error: {clr}')
self._cached_kernels = {} self._cached_kernels = {}
def cleanup(self): def cleanup(self):
""" """
Frees all resources from this Device. Frees all resources from this Device.
@ -343,8 +342,6 @@ N of cacheddata: {len(self._cached_data)}
wait(False) wait execution to complete wait(False) wait execution to complete
""" """
ckernel = self._compile_kernel(kernel, kernel.get_kernel_text())
if global_shape is None: if global_shape is None:
global_shape = kernel.get_global_shape() global_shape = kernel.get_global_shape()
if global_shape is None: if global_shape is None:
@ -376,8 +373,8 @@ N of cacheddata: {len(self._cached_data)}
for i,v in enumerate(local_shape): for i,v in enumerate(local_shape):
global_shape_offsets_ar[i] = v global_shape_offsets_ar[i] = v
krn_args = []
for i, arg in enumerate(args): for i, arg in enumerate(args):
if isinstance(arg, Buffer): if isinstance(arg, Buffer):
arg = arg.get_cl_mem() arg = arg.get_cl_mem()
else: else:
@ -385,7 +382,11 @@ N of cacheddata: {len(self._cached_data)}
if cl_type is None: if cl_type is None:
raise ValueError(f'Cannot convert type {arg.__class__} to OpenCL type.') raise ValueError(f'Cannot convert type {arg.__class__} to OpenCL type.')
arg = cl_type(arg) arg = cl_type(arg)
krn_args.append(arg)
ckernel = self._compile_kernel(kernel, kernel.get_kernel_text())
for i, arg in enumerate(krn_args):
clr = CL.clSetKernelArg(ckernel, i, CL.sizeof(arg), CL.byref(arg)) clr = CL.clSetKernelArg(ckernel, i, CL.sizeof(arg), CL.byref(arg))
if clr != CL.CLERROR.SUCCESS: if clr != CL.CLERROR.SUCCESS:
raise Exception(f'clSetKernelArg error: {clr}') raise Exception(f'clSetKernelArg error: {clr}')