mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-19 04:59:28 -07:00
update xlib.avecl
This commit is contained in:
parent
7aef4d2b1e
commit
acbe4957e6
3 changed files with 16 additions and 14 deletions
|
@ -82,9 +82,9 @@ class Tensor:
|
||||||
self.get_buffer().set(value)
|
self.get_buffer().set(value)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def np(self):
|
def np(self, out=None):
|
||||||
"""Returns numpy value of a Tensor"""
|
"""Returns numpy value of a Tensor"""
|
||||||
return self.get_buffer().np(self.shape, self.dtype)
|
return self.get_buffer().np(self.shape, self.dtype, out=out)
|
||||||
|
|
||||||
|
|
||||||
### OPERATORS
|
### OPERATORS
|
||||||
|
|
|
@ -85,20 +85,21 @@ class Buffer:
|
||||||
CL.clWaitForEvents(1, ( CL.cl_event * 1 )(ev) )
|
CL.clWaitForEvents(1, ( CL.cl_event * 1 )(ev) )
|
||||||
CL.clReleaseEvent(ev)
|
CL.clReleaseEvent(ev)
|
||||||
|
|
||||||
def np(self, shape : Iterable, dtype : np.dtype):
|
def np(self, shape : Iterable, dtype : np.dtype, out=None):
|
||||||
"""
|
"""
|
||||||
Returns data of buffer as np.ndarray with specified shape and dtype
|
Returns data of buffer as np.ndarray with specified shape and dtype
|
||||||
"""
|
"""
|
||||||
out_np_value = np.empty (shape, dtype)
|
if out is None:
|
||||||
|
out = np.empty (shape, dtype)
|
||||||
|
|
||||||
if out_np_value.nbytes != self._size:
|
if out.nbytes != self._size:
|
||||||
raise ValueError(f'Unable to represent Buffer with size {self._size} as shape {shape} with dtype {dtype}')
|
raise ValueError(f'Unable to represent Buffer with size {self._size} as shape {shape} with dtype {dtype}')
|
||||||
|
|
||||||
clr = CL.clEnqueueReadBuffer(self._device._get_ctx_q(), self.get_cl_mem(), True, 0, self._size, out_np_value.ctypes.data, 0, None, None)
|
clr = CL.clEnqueueReadBuffer(self._device._get_ctx_q(), self.get_cl_mem(), True, 0, self._size, out.ctypes.data, 0, None, None)
|
||||||
if clr != CL.CLERROR.SUCCESS:
|
if clr != CL.CLERROR.SUCCESS:
|
||||||
raise Exception(f'clEnqueueReadBuffer error: {clr}')
|
raise Exception(f'clEnqueueReadBuffer error: {clr}')
|
||||||
|
|
||||||
return out_np_value
|
return out
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return f'Buffer [{self._size} bytes][{f"{self._cl_mem.value}" if self._cl_mem is not None else "unallocated"}] on {str(self._device)}'
|
return f'Buffer [{self._size} bytes][{f"{self._cl_mem.value}" if self._cl_mem is not None else "unallocated"}] on {str(self._device)}'
|
||||||
|
|
|
@ -86,7 +86,6 @@ class Device:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
compiled_krn, prog = self._cached_kernels.get(key, (None, None) )
|
compiled_krn, prog = self._cached_kernels.get(key, (None, None) )
|
||||||
|
|
||||||
if compiled_krn is None:
|
if compiled_krn is None:
|
||||||
clr = CL.CLRESULT()
|
clr = CL.CLRESULT()
|
||||||
prog = CL.clCreateProgramWithSource(self._get_ctx(), 1, CL.c_char_p(kernel_text.encode()), None, clr )
|
prog = CL.clCreateProgramWithSource(self._get_ctx(), 1, CL.c_char_p(kernel_text.encode()), None, clr )
|
||||||
|
@ -209,7 +208,7 @@ class Device:
|
||||||
"""
|
"""
|
||||||
pool = self._pooled_buffers
|
pool = self._pooled_buffers
|
||||||
mems = [ (k,x) for k in pool.keys() for x in pool[k] ]
|
mems = [ (k,x) for k in pool.keys() for x in pool[k] ]
|
||||||
|
|
||||||
if len(mems) != 0:
|
if len(mems) != 0:
|
||||||
mems = random.sample(mems, max(1,int(len(mems)*0.25)) )
|
mems = random.sample(mems, max(1,int(len(mems)*0.25)) )
|
||||||
for k, mem in mems:
|
for k, mem in mems:
|
||||||
|
@ -241,7 +240,7 @@ class Device:
|
||||||
self._pooled_buffers = {}
|
self._pooled_buffers = {}
|
||||||
self._total_memory_pooled = 0
|
self._total_memory_pooled = 0
|
||||||
self._total_buffers_pooled = 0
|
self._total_buffers_pooled = 0
|
||||||
|
|
||||||
def cleanup_cached_kernels(self):
|
def cleanup_cached_kernels(self):
|
||||||
for kernel, prog in self._cached_kernels.values():
|
for kernel, prog in self._cached_kernels.values():
|
||||||
clr = CL.clReleaseKernel(kernel)
|
clr = CL.clReleaseKernel(kernel)
|
||||||
|
@ -252,7 +251,7 @@ class Device:
|
||||||
if clr != CL.CLERROR.SUCCESS:
|
if clr != CL.CLERROR.SUCCESS:
|
||||||
raise Exception(f'clReleaseProgram error: {clr}')
|
raise Exception(f'clReleaseProgram error: {clr}')
|
||||||
self._cached_kernels = {}
|
self._cached_kernels = {}
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
"""
|
"""
|
||||||
Frees all resources from this Device.
|
Frees all resources from this Device.
|
||||||
|
@ -343,8 +342,6 @@ N of cacheddata: {len(self._cached_data)}
|
||||||
|
|
||||||
wait(False) wait execution to complete
|
wait(False) wait execution to complete
|
||||||
"""
|
"""
|
||||||
ckernel = self._compile_kernel(kernel, kernel.get_kernel_text())
|
|
||||||
|
|
||||||
if global_shape is None:
|
if global_shape is None:
|
||||||
global_shape = kernel.get_global_shape()
|
global_shape = kernel.get_global_shape()
|
||||||
if global_shape is None:
|
if global_shape is None:
|
||||||
|
@ -376,8 +373,8 @@ N of cacheddata: {len(self._cached_data)}
|
||||||
for i,v in enumerate(local_shape):
|
for i,v in enumerate(local_shape):
|
||||||
global_shape_offsets_ar[i] = v
|
global_shape_offsets_ar[i] = v
|
||||||
|
|
||||||
|
krn_args = []
|
||||||
for i, arg in enumerate(args):
|
for i, arg in enumerate(args):
|
||||||
|
|
||||||
if isinstance(arg, Buffer):
|
if isinstance(arg, Buffer):
|
||||||
arg = arg.get_cl_mem()
|
arg = arg.get_cl_mem()
|
||||||
else:
|
else:
|
||||||
|
@ -385,7 +382,11 @@ N of cacheddata: {len(self._cached_data)}
|
||||||
if cl_type is None:
|
if cl_type is None:
|
||||||
raise ValueError(f'Cannot convert type {arg.__class__} to OpenCL type.')
|
raise ValueError(f'Cannot convert type {arg.__class__} to OpenCL type.')
|
||||||
arg = cl_type(arg)
|
arg = cl_type(arg)
|
||||||
|
krn_args.append(arg)
|
||||||
|
|
||||||
|
ckernel = self._compile_kernel(kernel, kernel.get_kernel_text())
|
||||||
|
|
||||||
|
for i, arg in enumerate(krn_args):
|
||||||
clr = CL.clSetKernelArg(ckernel, i, CL.sizeof(arg), CL.byref(arg))
|
clr = CL.clSetKernelArg(ckernel, i, CL.sizeof(arg), CL.byref(arg))
|
||||||
if clr != CL.CLERROR.SUCCESS:
|
if clr != CL.CLERROR.SUCCESS:
|
||||||
raise Exception(f'clSetKernelArg error: {clr}')
|
raise Exception(f'clSetKernelArg error: {clr}')
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue