mirror of
https://github.com/iperov/DeepFaceLive
synced 2025-08-21 14:03:20 -07:00
add xlib.avecl
This commit is contained in:
parent
932edfe875
commit
0058474da7
56 changed files with 5569 additions and 0 deletions
109
xlib/avecl/_internal/backend/Buffer.py
Normal file
109
xlib/avecl/_internal/backend/Buffer.py
Normal file
|
@ -0,0 +1,109 @@
|
|||
from typing import Iterable, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import OpenCL as CL
|
||||
|
||||
class Buffer:
|
||||
__slots__ = ['_device','_cl_mem','_size','_on_initialize']
|
||||
|
||||
def __init__(self, device : 'Device', size : int, on_initialize = None):
|
||||
"""
|
||||
represents physical buffer associated with physical device
|
||||
|
||||
device Device
|
||||
|
||||
size int
|
||||
"""
|
||||
|
||||
Buffer._object_count += 1
|
||||
self._device = device
|
||||
self._size = size
|
||||
self._cl_mem = None
|
||||
self._on_initialize = on_initialize
|
||||
|
||||
def __del__(self):
|
||||
#print('Buffer.__del__')
|
||||
Buffer._object_count -= 1
|
||||
self.free_cl_mem()
|
||||
|
||||
def get_device(self) -> 'Device': return self._device
|
||||
def get_size(self) -> int: return self._size
|
||||
|
||||
def has_cl_mem(self) -> bool: return self._cl_mem is not None
|
||||
def get_cl_mem(self) -> CL.cl_mem:
|
||||
if self._cl_mem is None:
|
||||
self._cl_mem = self._device._cl_mem_pool_alloc(self._size)
|
||||
if self._on_initialize is not None:
|
||||
self._on_initialize()
|
||||
|
||||
return self._cl_mem
|
||||
|
||||
def free_cl_mem(self):
|
||||
if self._cl_mem is not None:
|
||||
self._device._cl_mem_pool_free(self._cl_mem)
|
||||
self._cl_mem = None
|
||||
|
||||
def set(self, value : Union['Buffer', np.ndarray]):
|
||||
"""
|
||||
Parameters
|
||||
|
||||
value Buffer copy data from other Buffer.
|
||||
|
||||
np.ndarray copies values from ndarray
|
||||
to Buffer's memory
|
||||
|
||||
"""
|
||||
if isinstance(value, Buffer):
|
||||
if self != value:
|
||||
if self._size != value._size:
|
||||
raise Exception(f'Unable to copy from Buffer with {value._size} size to buffer with {self._size} size.')
|
||||
|
||||
if self._device == value._device:
|
||||
CL.clEnqueueCopyBuffer(self._device._get_ctx_q(), value.get_cl_mem(), self.get_cl_mem(), 0, 0, self._size, 0, None, None)
|
||||
else:
|
||||
# Transfer between devices will cause low performance
|
||||
raise NotImplementedError()
|
||||
else:
|
||||
if not isinstance(value, np.ndarray):
|
||||
raise ValueError (f'Invalid type {value.__class__}. Must be np.ndarray.')
|
||||
|
||||
if value.nbytes != self._size:
|
||||
raise ValueError(f'Value size {value.nbytes} does not match Buffer size {self._size}.')
|
||||
|
||||
if not value.flags.contiguous:
|
||||
value = value.reshape(-1)
|
||||
if not value.flags.contiguous:
|
||||
raise ValueError ("Unable to write from non-contiguous np array.")
|
||||
|
||||
ev = CL.cl_event()
|
||||
|
||||
clr = CL.clEnqueueWriteBuffer(self._device._get_ctx_q(), self.get_cl_mem(), False, 0, value.nbytes, value.ctypes.data_as(CL.c_void_p), 0, None, ev)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clEnqueueWriteBuffer error: {clr}')
|
||||
|
||||
CL.clWaitForEvents(1, ( CL.cl_event * 1 )(ev) )
|
||||
CL.clReleaseEvent(ev)
|
||||
|
||||
def np(self, shape : Iterable, dtype : np.dtype):
|
||||
"""
|
||||
Returns data of buffer as np.ndarray with specified shape and dtype
|
||||
"""
|
||||
out_np_value = np.empty (shape, dtype)
|
||||
|
||||
if out_np_value.nbytes != self._size:
|
||||
raise ValueError(f'Unable to represent Buffer with size {self._size} as shape {shape} with dtype {dtype}')
|
||||
|
||||
clr = CL.clEnqueueReadBuffer(self._device._get_ctx_q(), self.get_cl_mem(), True, 0, self._size, out_np_value.ctypes.data, 0, None, None)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clEnqueueReadBuffer error: {clr}')
|
||||
|
||||
return out_np_value
|
||||
|
||||
def __str__(self):
|
||||
return f'Buffer [{self._size} bytes][{f"{self._cl_mem.value}" if self._cl_mem is not None else "unallocated"}] on {str(self._device)}'
|
||||
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
_object_count = 0
|
522
xlib/avecl/_internal/backend/Device.py
Normal file
522
xlib/avecl/_internal/backend/Device.py
Normal file
|
@ -0,0 +1,522 @@
|
|||
from typing import List, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from . import OpenCL as CL
|
||||
from .Buffer import Buffer
|
||||
from .DeviceInfo import DeviceInfo
|
||||
from .Kernel import Kernel
|
||||
|
||||
_np_dtype_to_cl = { np.uint8: CL.cl_uchar,
|
||||
np.int8: CL.cl_char,
|
||||
np.uint16: CL.cl_ushort,
|
||||
np.int16: CL.cl_short,
|
||||
np.uint32: CL.cl_uint,
|
||||
np.int32: CL.cl_int,
|
||||
np.uint64: CL.cl_ulong,
|
||||
np.int64: CL.cl_long,
|
||||
np.float16: CL.cl_half,
|
||||
np.float32: CL.cl_float,
|
||||
np.float64: CL.cl_double }
|
||||
|
||||
_opencl_device_ids = None
|
||||
_default_device = None
|
||||
_devices = {}
|
||||
|
||||
class Device:
|
||||
"""
|
||||
Represents physical TensorCL device
|
||||
"""
|
||||
|
||||
def __init__(self, device_info : DeviceInfo, **kwargs):
|
||||
if kwargs.get('_check', None) is None:
|
||||
raise Exception('You should not to create Device from constructor. Use get_device()')
|
||||
|
||||
self._cached_data = {} # cached data (per device) by key
|
||||
self._pooled_buffers = {} # Pool of cached device buffers.
|
||||
self._compiled_kernels = {} # compiled kernels by key
|
||||
self._ctx_q = None # CL command queue
|
||||
self._ctx = None # CL context
|
||||
|
||||
self._total_memory_allocated = 0
|
||||
self._total_buffers_allocated = 0
|
||||
self._total_memory_pooled = 0
|
||||
self._total_buffers_pooled = 0
|
||||
|
||||
self._device_info = device_info
|
||||
self._device_id = _get_opencl_device_ids()[device_info.get_index()]
|
||||
|
||||
def __del__(self):
|
||||
self.cleanup()
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is not None and other is not None and isinstance(self, Device) and isinstance(other, Device):
|
||||
return self._device_id.value == other._device_id.value
|
||||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return self._device_id.value
|
||||
|
||||
def _get_ctx(self) -> CL.cl_context:
|
||||
# Create OpenCL context on demand
|
||||
if self._ctx is None:
|
||||
clr = CL.CLRESULT()
|
||||
ctx = CL.clCreateContext( None, 1, (CL.cl_device_id * 1)( self._device_id ), None, None, clr)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception('Unable to create OpenCL context.')
|
||||
self._ctx = ctx
|
||||
return self._ctx
|
||||
|
||||
def _get_ctx_q(self) -> CL.cl_command_queue:
|
||||
# Create CommandQueue on demand
|
||||
if self._ctx_q is None:
|
||||
clr = CL.CLRESULT()
|
||||
ctx_q = CL.clCreateCommandQueue(self._get_ctx(), self._device_id, CL.cl_command_queue_properties(0), clr)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception('Unable to create OpenCL CommandQueue.')
|
||||
self._ctx_q = ctx_q
|
||||
return self._ctx_q
|
||||
|
||||
def get_description(self) -> str:
|
||||
return f"{self._device_info.get_name()} [{(self._device_info.get_total_memory() / 1024**3) :.3}Gb]"
|
||||
|
||||
def __str__(self):
|
||||
return self.get_description()
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__} object: ' + self.__str__()
|
||||
|
||||
def set_cached_data(self, key, value):
|
||||
"""
|
||||
|
||||
All cached data will be freed with cleanup()
|
||||
"""
|
||||
self._cached_data[key] = value
|
||||
|
||||
def get_cached_data(self, key):
|
||||
return self._cached_data.get(key, None)
|
||||
|
||||
def get_total_allocated_memory(self):
|
||||
return self._total_memory_allocated
|
||||
|
||||
def get_max_malloc_size(self) -> int:
|
||||
size = CL.cl_ulong()
|
||||
clr = CL.clGetDeviceInfo(self._device_id, CL.CL_DEVICE_MAX_MEM_ALLOC_SIZE, CL.sizeof(size), CL.byref(size), None)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clGetDeviceInfo error: {clr}')
|
||||
return size.value
|
||||
|
||||
def _compile_kernel(self, key, kernel_text) -> CL.cl_kernel:
|
||||
"""
|
||||
compile or get cached kernel
|
||||
"""
|
||||
|
||||
compiled_krn, prog = self._compiled_kernels.get(key, (None, None) )
|
||||
|
||||
if compiled_krn is None:
|
||||
clr = CL.CLRESULT()
|
||||
prog = CL.clCreateProgramWithSource(self._get_ctx(), 1, CL.c_char_p(kernel_text.encode()), None, clr )
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clCreateProgramWithSource error {clr}, with kernel_text:\n{kernel_text}')
|
||||
|
||||
clr = CL.clBuildProgram(prog, 1, (CL.cl_device_id*1)(self._device_id), CL.c_char_p('-cl-std=CL1.2 -cl-single-precision-constant'.encode()), None, None )
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
build_log_size = CL.c_size_t()
|
||||
clr = CL.clGetProgramBuildInfo(prog, self._device_id, CL.CL_PROGRAM_BUILD_LOG, 0, None, CL.byref(build_log_size) )
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clGetProgramBuildInfo,error: {clr}')
|
||||
|
||||
build_log = CL.create_string_buffer(build_log_size.value)
|
||||
clr = CL.clGetProgramBuildInfo(prog, self._device_id, CL.CL_PROGRAM_BUILD_LOG, build_log_size.value, build_log, None )
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clGetProgramBuildInfo error: {clr}')
|
||||
|
||||
build_log = str(build_log.value, 'utf-8')
|
||||
raise Exception(f'clBuildProgram error:\n\n{build_log}')
|
||||
|
||||
num_kernels = CL.cl_uint()
|
||||
clr = CL.clCreateKernelsInProgram(prog, 0, None, CL.byref(num_kernels))
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clCreateKernelsInProgram error: {clr}')
|
||||
|
||||
if num_kernels.value != 1:
|
||||
raise Exception(f'Kernel must contain only one __kernel:\n\n{kernel_text}')
|
||||
|
||||
kernels = (CL.cl_kernel * num_kernels.value)()
|
||||
clr = CL.clCreateKernelsInProgram(prog, num_kernels.value, kernels, None)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clCreateKernelsInProgram error: {clr}')
|
||||
|
||||
compiled_krn = kernels[0]
|
||||
self._compiled_kernels[key] = (compiled_krn, prog)
|
||||
|
||||
return compiled_krn
|
||||
|
||||
def _cl_mem_alloc(self, size) -> CL.cl_mem:
|
||||
clr = CL.CLRESULT()
|
||||
mem = CL.clCreateBuffer(self._get_ctx(), CL.CL_MEM_READ_WRITE, size, None, clr)
|
||||
if clr == CL.CLERROR.SUCCESS:
|
||||
# Fill one byte to check memory availability
|
||||
ev = CL.cl_event()
|
||||
clr = CL.clEnqueueFillBuffer (self._get_ctx_q(), mem, (CL.c_char * 1)(), 1, 0, 1, 0, None, ev )
|
||||
if clr == CL.CLERROR.SUCCESS:
|
||||
CL.clReleaseEvent(ev)
|
||||
self._total_memory_allocated += size
|
||||
self._total_buffers_allocated += 1
|
||||
return mem
|
||||
return None
|
||||
|
||||
def _cl_mem_free(self, mem : CL.cl_mem):
|
||||
size = CL.c_size_t()
|
||||
clr = CL.clGetMemObjectInfo(mem, CL.CL_MEM_SIZE, CL.sizeof(size), CL.byref(size), None )
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clGetMemObjectInfo error: {clr}')
|
||||
size = size.value
|
||||
self._total_memory_allocated -= size
|
||||
self._total_buffers_allocated -= 1
|
||||
clr = CL.clReleaseMemObject(mem)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clReleaseMemObject error: {clr}')
|
||||
|
||||
def _cl_mem_pool_alloc(self, size):
|
||||
"""
|
||||
allocate or get cl_mem from pool
|
||||
"""
|
||||
pool = self._pooled_buffers
|
||||
|
||||
# First try to get pooled buffer
|
||||
ar = pool.get(size, None)
|
||||
if ar is not None and len(ar) != 0:
|
||||
mem = ar.pop(-1)
|
||||
self._total_memory_pooled -= size
|
||||
self._total_buffers_pooled -= 1
|
||||
else:
|
||||
# No pooled buffer, try to allocate new
|
||||
while True:
|
||||
mem = self._cl_mem_alloc(size)
|
||||
if mem is None:
|
||||
# MemoryError. Finding largest pooled buffer to release
|
||||
buf_to_release = None
|
||||
for size_key in sorted(list(pool.keys()), reverse=True):
|
||||
ar = pool[size_key]
|
||||
if len(ar) != 0:
|
||||
buf_to_release = ar.pop(-1)
|
||||
break
|
||||
|
||||
if buf_to_release is not None:
|
||||
# Release pooled buffer and try to allocate again
|
||||
self._cl_mem_free(buf_to_release)
|
||||
continue
|
||||
|
||||
raise Exception(f'Unable to allocate {size // 1024**2}Mb on {str(self)}')
|
||||
|
||||
|
||||
break
|
||||
|
||||
return mem
|
||||
|
||||
def _cl_mem_pool_free(self, mem : CL.cl_mem):
|
||||
"""
|
||||
Put cl_mem to pool for reuse in future.
|
||||
"""
|
||||
size = CL.c_size_t()
|
||||
clr = CL.clGetMemObjectInfo(mem, CL.CL_MEM_SIZE, CL.sizeof(size), CL.byref(size), None )
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clGetMemObjectInfo error: {clr}')
|
||||
size = size.value
|
||||
|
||||
pool = self._pooled_buffers
|
||||
ar = pool.get(size, None)
|
||||
if ar is None:
|
||||
ar = pool[size] = []
|
||||
ar.append(mem)
|
||||
|
||||
self._total_memory_pooled += size
|
||||
self._total_buffers_pooled += 1
|
||||
|
||||
def print_stat(self):
|
||||
s = f'''
|
||||
Total memory allocated: {self._total_memory_allocated}
|
||||
Total buffers allocated: {self._total_buffers_allocated}
|
||||
Total memory pooled: {self._total_memory_pooled}
|
||||
Total buffers pooled: {self._total_buffers_pooled}
|
||||
N of compiled kernels: {len(self._compiled_kernels)}
|
||||
N of cacheddata: {len(self._cached_data)}
|
||||
'''
|
||||
print(s)
|
||||
|
||||
def run_kernel(self, kernel : Kernel, *args, global_shape=None, local_shape=None, global_shape_offsets=None, wait=False):
|
||||
"""
|
||||
Run kernel on Device
|
||||
|
||||
Arguments
|
||||
|
||||
*args arguments will be passed to OpenCL kernel
|
||||
allowed types:
|
||||
|
||||
Buffer
|
||||
np single value
|
||||
|
||||
global_shape(None) tuple of ints, up to 3 dims
|
||||
amount of parallel kernel executions.
|
||||
in OpenCL kernel,
|
||||
id can be obtained via get_global_id(dim)
|
||||
|
||||
local_shape(None) tuple of ints, up to 3 dims
|
||||
specifies local groups of every dim of global_shape.
|
||||
in OpenCL kernel,
|
||||
id can be obtained via get_local_id(dim)
|
||||
|
||||
global_shape_offsets(None) tuple of ints
|
||||
offsets for global_shape
|
||||
|
||||
wait(False) wait execution to complete
|
||||
"""
|
||||
ckernel = self._compile_kernel(kernel, kernel.get_kernel_text())
|
||||
|
||||
if global_shape is None:
|
||||
global_shape = kernel.get_global_shape()
|
||||
if global_shape is None:
|
||||
raise ValueError('global_shape must be defined.')
|
||||
|
||||
work_dim = len(global_shape)
|
||||
global_shape_ar = (CL.c_size_t*work_dim)()
|
||||
for i,v in enumerate(global_shape):
|
||||
global_shape_ar[i] = v
|
||||
|
||||
local_shape_ar = None
|
||||
if local_shape is None:
|
||||
local_shape = kernel.get_local_shape()
|
||||
if local_shape is not None:
|
||||
if len(local_shape) != work_dim:
|
||||
raise ValueError('len of local_shape must match global_shape')
|
||||
|
||||
local_shape_ar = (CL.c_size_t*work_dim)()
|
||||
for i,v in enumerate(local_shape):
|
||||
local_shape_ar[i] = v
|
||||
|
||||
|
||||
global_shape_offsets_ar = None
|
||||
if global_shape_offsets is not None:
|
||||
if len(global_shape_offsets) != work_dim:
|
||||
raise ValueError('len of global_shape_offsets must match global_shape')
|
||||
|
||||
global_shape_offsets_ar = (CL.c_size_t*work_dim)()
|
||||
for i,v in enumerate(local_shape):
|
||||
global_shape_offsets_ar[i] = v
|
||||
|
||||
for i, arg in enumerate(args):
|
||||
|
||||
if isinstance(arg, Buffer):
|
||||
arg = arg.get_cl_mem()
|
||||
else:
|
||||
cl_type = _np_dtype_to_cl.get(arg.__class__, None)
|
||||
if cl_type is None:
|
||||
raise ValueError(f'Cannot convert type {arg.__class__} to OpenCL type.')
|
||||
arg = cl_type(arg)
|
||||
|
||||
clr = CL.clSetKernelArg(ckernel, i, CL.sizeof(arg), CL.byref(arg))
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clSetKernelArg error: {clr}')
|
||||
|
||||
ev = CL.cl_event() if wait else None
|
||||
|
||||
clr = CL.clEnqueueNDRangeKernel(self._get_ctx_q(), ckernel, work_dim, global_shape_offsets_ar, global_shape_ar, local_shape_ar, 0, None, ev)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clEnqueueNDRangeKernel error: {clr}')
|
||||
|
||||
if wait:
|
||||
CL.clWaitForEvents(1, (CL.cl_event*1)(ev) )
|
||||
CL.clReleaseEvent(ev)
|
||||
|
||||
def wait(self):
|
||||
"""
|
||||
Wait to finish all queued operations on this Device
|
||||
"""
|
||||
clr = CL.clFinish(self._get_ctx_q())
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clFinish error: {clr}')
|
||||
|
||||
def cleanup(self):
|
||||
"""
|
||||
Frees all resources from this Device.
|
||||
"""
|
||||
self._cached_data = {}
|
||||
|
||||
pool = self._pooled_buffers
|
||||
for size_key in pool.keys():
|
||||
for mem in pool[size_key]:
|
||||
self._cl_mem_free(mem)
|
||||
self._pooled_buffers = {}
|
||||
self._total_memory_pooled = 0
|
||||
self._total_buffers_pooled = 0
|
||||
|
||||
if self._total_memory_allocated != 0:
|
||||
raise Exception('Unable to cleanup CLDevice, while not all Buffers are deallocated.')
|
||||
|
||||
for kernel, prog in self._compiled_kernels.values():
|
||||
clr = CL.clReleaseKernel(kernel)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clReleaseKernel error: {clr}')
|
||||
|
||||
clr = CL.clReleaseProgram(prog)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clReleaseProgram error: {clr}')
|
||||
self._compiled_kernels = {}
|
||||
|
||||
if self._ctx_q is not None:
|
||||
clr = CL.clReleaseCommandQueue(self._ctx_q)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clReleaseCommandQueue error: {clr}')
|
||||
self._ctx_q = None
|
||||
|
||||
if self._ctx is not None:
|
||||
clr = CL.clReleaseContext(self._ctx)
|
||||
if clr != CL.CLERROR.SUCCESS:
|
||||
raise Exception(f'clReleaseContext error: {clr}')
|
||||
self._ctx = None
|
||||
|
||||
def _get_opencl_device_ids() -> List[CL.cl_device_id]:
|
||||
global _opencl_device_ids
|
||||
if _opencl_device_ids is None:
|
||||
_opencl_device_ids = []
|
||||
device_types = CL.CL_DEVICE_TYPE_CPU | CL.CL_DEVICE_TYPE_ACCELERATOR | CL.CL_DEVICE_TYPE_GPU
|
||||
|
||||
while True:
|
||||
num_platforms = CL.cl_uint()
|
||||
if CL.clGetPlatformIDs(0, None, num_platforms) != CL.CLERROR.SUCCESS or \
|
||||
num_platforms.value == 0:
|
||||
break
|
||||
|
||||
platforms = (CL.cl_platform_id * num_platforms.value) ()
|
||||
if CL.clGetPlatformIDs(num_platforms.value, platforms, None) != CL.CLERROR.SUCCESS:
|
||||
break
|
||||
|
||||
for i_platform in range(num_platforms.value):
|
||||
platform = platforms[i_platform]
|
||||
num_devices = CL.cl_uint(0)
|
||||
if CL.clGetDeviceIDs(platform, device_types, 0, None, num_devices) != CL.CLERROR.SUCCESS or \
|
||||
num_devices.value == 0:
|
||||
continue
|
||||
|
||||
device_ids = (CL.cl_device_id * num_devices.value)()
|
||||
if CL.clGetDeviceIDs(platform, device_types, num_devices.value, device_ids, None) != CL.CLERROR.SUCCESS:
|
||||
continue
|
||||
|
||||
for i in range(num_devices.value):
|
||||
device_id = device_ids[i]
|
||||
# Check OpenCL version.
|
||||
if device_id is not None:
|
||||
device_version_size = CL.c_size_t()
|
||||
if CL.clGetDeviceInfo(device_id, CL.CL_DEVICE_VERSION, 0, None, device_version_size) == CL.CLERROR.SUCCESS:
|
||||
device_version = CL.create_string_buffer(device_version_size.value)
|
||||
if CL.clGetDeviceInfo(device_id, CL.CL_DEVICE_VERSION, device_version_size.value, device_version, None) == CL.CLERROR.SUCCESS:
|
||||
device_version = str(device_version.value, 'ascii')
|
||||
|
||||
major, minor = device_version.split(' ')[1].split('.')
|
||||
opencl_version = int(major)*10+int(minor)
|
||||
if opencl_version >= 12:
|
||||
_opencl_device_ids.append(device_id)
|
||||
break
|
||||
return _opencl_device_ids
|
||||
|
||||
def get_available_devices_info() -> List[DeviceInfo]:
|
||||
"""
|
||||
returns a list of available picklable DeviceInfo's
|
||||
"""
|
||||
devices = []
|
||||
for device_index, device_id in enumerate(_get_opencl_device_ids()):
|
||||
device_name = 'undefined'
|
||||
device_total_memory = 0
|
||||
|
||||
name_size = CL.c_size_t()
|
||||
if CL.clGetDeviceInfo(device_id, CL.CL_DEVICE_NAME, 0, None, name_size) == CL.CLERROR.SUCCESS:
|
||||
name_value = CL.create_string_buffer(name_size.value)
|
||||
if CL.clGetDeviceInfo(device_id, CL.CL_DEVICE_NAME, name_size.value, name_value, None) == CL.CLERROR.SUCCESS:
|
||||
device_name = str(name_value.value, 'ascii')
|
||||
|
||||
global_mem_size = CL.cl_ulong()
|
||||
if CL.clGetDeviceInfo(device_id, CL.CL_DEVICE_GLOBAL_MEM_SIZE, CL.sizeof(global_mem_size), CL.byref(global_mem_size), None) == CL.CLERROR.SUCCESS:
|
||||
device_total_memory = global_mem_size.value
|
||||
|
||||
vendor_id = CL.cl_uint()
|
||||
CL.clGetDeviceInfo(device_id, CL.CL_DEVICE_VENDOR_ID, CL.sizeof(vendor_id), CL.byref(vendor_id), None)
|
||||
vendor_id = vendor_id.value
|
||||
|
||||
max_compute_units = CL.cl_uint()
|
||||
CL.clGetDeviceInfo(device_id, CL.CL_DEVICE_MAX_COMPUTE_UNITS, CL.sizeof(max_compute_units), CL.byref(max_compute_units), None)
|
||||
max_compute_units = max_compute_units.value
|
||||
|
||||
performance_level = max_compute_units
|
||||
|
||||
if vendor_id == 0x8086: # Intel device
|
||||
performance_level -= 1000
|
||||
|
||||
devices.append( DeviceInfo(index=device_index, name=device_name, total_memory=device_total_memory, performance_level=performance_level ) )
|
||||
|
||||
return devices
|
||||
|
||||
def get_default_device() -> Union[Device, None]:
|
||||
global _default_device
|
||||
if _default_device is None:
|
||||
_default_device = get_device(0)
|
||||
return _default_device
|
||||
|
||||
def set_default_device(device : Device):
|
||||
if not isinstance(device, Device):
|
||||
raise ValueError('device must be an instance of Device')
|
||||
|
||||
global _default_device
|
||||
_default_device = device
|
||||
|
||||
def get_device(arg : Union[None, int, Device, DeviceInfo]) -> Union[Device, None]:
|
||||
"""
|
||||
get physical TensorCL device.
|
||||
|
||||
arg None - get best device
|
||||
int - by index
|
||||
DeviceInfo - by device info
|
||||
Device - returns the same
|
||||
"""
|
||||
global _devices
|
||||
|
||||
if arg is None:
|
||||
return get_best_device()
|
||||
elif isinstance(arg, int):
|
||||
devices_info = get_available_devices_info()
|
||||
if arg < len(devices_info):
|
||||
arg = devices_info[arg]
|
||||
else:
|
||||
return None
|
||||
elif isinstance(arg, Device):
|
||||
return arg
|
||||
elif not isinstance(arg, DeviceInfo):
|
||||
raise ValueError(f'Unknown type of arg {arg.__class__}')
|
||||
|
||||
device = _devices.get(arg, None)
|
||||
if device is None:
|
||||
device = _devices[arg] = Device(arg, _check=1)
|
||||
|
||||
return device
|
||||
|
||||
def get_best_device() -> Union[Device, None]:
|
||||
"""
|
||||
returns best device from available.
|
||||
"""
|
||||
perf_level = -999999
|
||||
result = None
|
||||
for device_info in get_available_devices_info():
|
||||
dev_perf_level = device_info.get_performance_level()
|
||||
if perf_level < dev_perf_level:
|
||||
perf_level = dev_perf_level
|
||||
result = device_info
|
||||
if result is not None:
|
||||
result = get_device(result)
|
||||
return result
|
||||
|
||||
def cleanup_devices():
|
||||
global _devices
|
||||
|
||||
for device in list(_devices.values()):
|
||||
device.cleanup()
|
||||
_devices = {}
|
44
xlib/avecl/_internal/backend/DeviceInfo.py
Normal file
44
xlib/avecl/_internal/backend/DeviceInfo.py
Normal file
|
@ -0,0 +1,44 @@
|
|||
class DeviceInfo:
|
||||
"""
|
||||
Represents picklable OpenCL device info
|
||||
"""
|
||||
|
||||
def __init__(self, index : int = None, name : str = None, total_memory : int = None, performance_level : int = None):
|
||||
self._index = index
|
||||
self._name = name
|
||||
self._total_memory = total_memory
|
||||
self._performance_level = performance_level
|
||||
|
||||
def __getstate__(self):
|
||||
return self.__dict__.copy()
|
||||
|
||||
def __setstate__(self, d):
|
||||
self.__init__()
|
||||
self.__dict__.update(d)
|
||||
|
||||
def get_index(self) -> int:
|
||||
return self._index
|
||||
|
||||
def get_name(self) -> str:
|
||||
return self._name
|
||||
|
||||
def get_total_memory(self) -> int:
|
||||
return self._total_memory
|
||||
|
||||
def get_performance_level(self) -> int:
|
||||
return self._performance_level
|
||||
|
||||
def __eq__(self, other):
|
||||
if self is not None and other is not None and isinstance(self, DeviceInfo) and isinstance(other, DeviceInfo):
|
||||
return self._index == other._index
|
||||
return False
|
||||
|
||||
def __hash__(self):
|
||||
return self._index
|
||||
|
||||
def __str__(self):
|
||||
return f"[{self._index}] {self._name} [{(self._total_memory / 1024**3) :.3}Gb]"
|
||||
|
||||
def __repr__(self):
|
||||
return f'{self.__class__.__name__} object: ' + self.__str__()
|
||||
|
26
xlib/avecl/_internal/backend/Kernel.py
Normal file
26
xlib/avecl/_internal/backend/Kernel.py
Normal file
|
@ -0,0 +1,26 @@
|
|||
class Kernel:
|
||||
"""
|
||||
TensorCL kernel.
|
||||
|
||||
It does not allocate any resources, thus can be used as static variable within class.
|
||||
|
||||
arguments
|
||||
|
||||
kernel_text OpenCL text of kernel. Must contain only one __kernel
|
||||
|
||||
global_shape default global_shape for .run()
|
||||
|
||||
local_shape default local_shape for .run()
|
||||
"""
|
||||
def __init__(self, kernel_text, global_shape=None, local_shape=None):
|
||||
self._kernel_text = kernel_text
|
||||
self._global_shape = global_shape
|
||||
self._local_shape = local_shape
|
||||
|
||||
def get_kernel_text(self) -> str: return self._kernel_text
|
||||
def get_global_shape(self): return self._global_shape
|
||||
def get_local_shape(self): return self._local_shape
|
||||
|
||||
def __str__(self): return f'Kernel: \n{self._kernel_text}'
|
||||
def __repr__(self): return self.__str__()
|
||||
|
278
xlib/avecl/_internal/backend/OpenCL/OpenCL.py
Normal file
278
xlib/avecl/_internal/backend/OpenCL/OpenCL.py
Normal file
|
@ -0,0 +1,278 @@
|
|||
"""
|
||||
Minimal OpenCL 1.2 low level ctypes API.
|
||||
"""
|
||||
import ctypes
|
||||
from ctypes import POINTER, create_string_buffer, sizeof, c_char_p, c_char, c_size_t, c_void_p, byref
|
||||
from ctypes.util import find_library
|
||||
from enum import IntEnum
|
||||
|
||||
dlls_by_name = {}
|
||||
def dll_import(dll_name):
|
||||
dll = dlls_by_name.get(dll_name, None)
|
||||
if dll is None:
|
||||
try:
|
||||
dll = ctypes.cdll.LoadLibrary(find_library(dll_name))
|
||||
except:
|
||||
pass
|
||||
if dll is None:
|
||||
raise RuntimeError(f'Unable to load {dll_name} library.')
|
||||
dlls_by_name[dll_name] = dll
|
||||
|
||||
def decorator(func):
|
||||
dll_func = getattr(dll, func.__name__)
|
||||
anno = list(func.__annotations__.values())
|
||||
dll_func.argtypes = anno[:-1]
|
||||
dll_func.restype = anno[-1]
|
||||
def wrapper(*args):
|
||||
return dll_func(*args)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
class cl_char(ctypes.c_int8): pass
|
||||
class cl_uchar(ctypes.c_uint8): pass
|
||||
class cl_short(ctypes.c_int16): pass
|
||||
class cl_ushort(ctypes.c_uint16): pass
|
||||
class cl_int(ctypes.c_int32): pass
|
||||
class cl_uint(ctypes.c_uint32): pass
|
||||
class cl_long(ctypes.c_int64): pass
|
||||
class cl_ulong(ctypes.c_uint64): pass
|
||||
class cl_half(ctypes.c_uint16): pass
|
||||
class cl_float(ctypes.c_float): pass
|
||||
class cl_double(ctypes.c_double): pass
|
||||
class cl_bool(cl_uint): pass
|
||||
class cl_bitfield(cl_ulong):
|
||||
def __or__(self, other):
|
||||
assert isinstance(other, self.__class__)
|
||||
return self.__class__(self.value | other.value)
|
||||
def __and__(self, other):
|
||||
assert isinstance(other, self.__class__)
|
||||
return self.__class__(self.value & other.value)
|
||||
def __xor__(self, other):
|
||||
assert isinstance(other, self.__class__)
|
||||
return self.__class__(self.value ^ other.value)
|
||||
def __not__(self):
|
||||
return self.__class__(~self.value)
|
||||
def __contains__(self, other):
|
||||
assert isinstance(other, self.__class__)
|
||||
return (self.value & other.value) == other.value
|
||||
def __hash__(self):
|
||||
return self.value.__hash__()
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, self.__class__):
|
||||
return False
|
||||
else:
|
||||
return self.value == other.value
|
||||
def __ne__(self, other):
|
||||
return not(self == other)
|
||||
def __repr__(self):
|
||||
return f'cl_bitfield: {self.value}'
|
||||
|
||||
class CLERROR(IntEnum):
|
||||
SUCCESS = 0
|
||||
DEVICE_NOT_FOUND = -1
|
||||
DEVICE_NOT_AVAILABLE = -2
|
||||
COMPILER_NOT_AVAILABLE = -3
|
||||
MEM_OBJECT_ALLOCATION_FAILURE = -4
|
||||
OUT_OF_RESOURCES = -5
|
||||
OUT_OF_HOST_MEMORY = -6
|
||||
PROFILING_INFO_NOT_AVAILABLE = -7
|
||||
MEM_COPY_OVERLAP = -8
|
||||
IMAGE_FORMAT_MISMATCH = -9
|
||||
IMAGE_FORMAT_NOT_SUPPORTED = -10
|
||||
BUILD_PROGRAM_FAILURE = -11
|
||||
MAP_FAILURE = -12
|
||||
MISALIGNED_SUB_BUFFER_OFFSET = -13
|
||||
EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST = -14
|
||||
INVALID_VALUE = -30
|
||||
INVALID_DEVICE_TYPE = -31
|
||||
INVALID_PLATFORM = -32
|
||||
INVALID_DEVICE = -33
|
||||
INVALID_CONTEXT = -34
|
||||
INVALID_QUEUE_PROPERTIES = -35
|
||||
INVALID_COMMAND_QUEUE = -36
|
||||
INVALID_HOST_PTR = -37
|
||||
INVALID_MEM_OBJECT = -38
|
||||
INVALID_IMAGE_FORMAT_DESCRIPTOR = -39
|
||||
INVALID_IMAGE_SIZE = -40
|
||||
INVALID_SAMPLER = -41
|
||||
INVALID_BINARY = -42
|
||||
INVALID_BUILD_OPTIONS = -43
|
||||
INVALID_PROGRAM = -44
|
||||
INVALID_PROGRAM_EXECUTABLE = -45
|
||||
INVALID_KERNEL_NAME = -46
|
||||
INVALID_KERNEL_DEFINITION = -47
|
||||
INVALID_KERNEL = -48
|
||||
INVALID_ARG_INDEX = -49
|
||||
INVALID_ARG_VALUE = -50
|
||||
INVALID_ARG_SIZE = -51
|
||||
INVALID_KERNEL_ARGS = -52
|
||||
INVALID_WORK_DIMENSION = -53
|
||||
INVALID_WORK_GROUP_SIZE = -54
|
||||
INVALID_WORK_ITEM_SIZE = -55
|
||||
INVALID_GLOBAL_OFFSET = -56
|
||||
INVALID_EVENT_WAIT_LIST = -57
|
||||
INVALID_EVENT = -58
|
||||
INVALID_OPERATION = -59
|
||||
INVALID_GL_OBJECT = -60
|
||||
INVALID_BUFFER_SIZE = -61
|
||||
INVALID_MIP_LEVEL = -62
|
||||
INVALID_GLOBAL_WORK_SIZE = -63
|
||||
INVALID_PROPERTY = -64
|
||||
INVALID_GL_SHAREGROUP_REFERENCE_KHR = -1000
|
||||
PLATFORM_NOT_FOUND_KHR = -1001
|
||||
|
||||
class CLRESULT(cl_int):
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, int):
|
||||
return self.value == other
|
||||
elif isinstance(other, self.__class__):
|
||||
return self.value == other.value
|
||||
else:
|
||||
return False
|
||||
def __ne__(self, other):
|
||||
return not(self == other)
|
||||
def __hash__(self):
|
||||
return self.value.__hash__()
|
||||
def __str__(self):
|
||||
try:
|
||||
return f'CLRESULT ({str(CLERROR(self.value))})'
|
||||
except:
|
||||
return f'CLRESULT ({self.value})'
|
||||
def __repr__(self):
|
||||
return self.__str__()
|
||||
|
||||
class cl_platform_id(c_void_p): ...
|
||||
class cl_platform_info(cl_uint): ...
|
||||
class cl_device_id(c_void_p): ...
|
||||
class cl_device_type(cl_bitfield): ...
|
||||
class cl_device_info(cl_uint): ...
|
||||
class cl_context(c_void_p): ...
|
||||
class cl_context_properties(c_void_p): ...
|
||||
class cl_command_queue(c_void_p): ...
|
||||
class cl_command_queue_properties(cl_bitfield): ...
|
||||
class cl_event(c_void_p): ...
|
||||
class cl_mem(c_void_p): ...
|
||||
class cl_mem_info(cl_uint): ...
|
||||
class cl_mem_flags(cl_bitfield): ...
|
||||
class cl_program(c_void_p): ...
|
||||
class cl_program_build_info(cl_uint): ...
|
||||
class cl_kernel(c_void_p): ...
|
||||
|
||||
# https://github.com/KhronosGroup/OpenCL-Headers/blob/master/CL/cl.h
|
||||
CL_PLATFORM_PROFILE = cl_platform_info(0x0900)
|
||||
CL_PLATFORM_VERSION = cl_platform_info(0x0901)
|
||||
CL_PLATFORM_NAME = cl_platform_info(0x0902)
|
||||
CL_PLATFORM_VENDOR = cl_platform_info(0x0903)
|
||||
CL_PLATFORM_EXTENSIONS = cl_platform_info(0x0904)
|
||||
|
||||
CL_DEVICE_TYPE_DEFAULT = cl_device_type( (1 << 0) )
|
||||
CL_DEVICE_TYPE_CPU = cl_device_type( (1 << 1) )
|
||||
CL_DEVICE_TYPE_GPU = cl_device_type( (1 << 2) )
|
||||
CL_DEVICE_TYPE_ACCELERATOR = cl_device_type( (1 << 3) )
|
||||
CL_DEVICE_TYPE_ALL = cl_device_type( 0xFFFFFFFF )
|
||||
|
||||
CL_DEVICE_TYPE = cl_device_info (0x1000)
|
||||
CL_DEVICE_VENDOR_ID = cl_device_info (0x1001)
|
||||
CL_DEVICE_MAX_COMPUTE_UNITS = cl_device_info (0x1002)
|
||||
CL_DEVICE_GLOBAL_MEM_SIZE = cl_device_info (0x101F)
|
||||
CL_DEVICE_NAME = cl_device_info (0x102B)
|
||||
CL_DEVICE_VERSION = cl_device_info (0x102F)
|
||||
CL_DEVICE_MAX_MEM_ALLOC_SIZE = cl_device_info (0x1010)
|
||||
CL_DEVICE_MAX_WORK_GROUP_SIZE = cl_device_info (0x1004)
|
||||
CL_DRIVER_VERSION = cl_device_info (0x102D)
|
||||
CL_DEVICE_EXTENSIONS = cl_device_info (0x1030)
|
||||
|
||||
# cl_mem_flags
|
||||
CL_MEM_READ_WRITE = cl_mem_flags( (1 << 0) )
|
||||
CL_MEM_WRITE_ONLY = cl_mem_flags( (1 << 1) )
|
||||
CL_MEM_READ_ONLY = cl_mem_flags( (1 << 2) )
|
||||
CL_MEM_USE_HOST_PTR = cl_mem_flags( (1 << 3) )
|
||||
CL_MEM_ALLOC_HOST_PTR = cl_mem_flags( (1 << 4) )
|
||||
CL_MEM_COPY_HOST_PTR = cl_mem_flags( (1 << 5) )
|
||||
|
||||
# cl_mem_info
|
||||
CL_MEM_SIZE = cl_mem_info(0x1102)
|
||||
|
||||
# cl_program_build_info
|
||||
CL_PROGRAM_BUILD_STATUS = cl_program_build_info(0x1181)
|
||||
CL_PROGRAM_BUILD_OPTIONS = cl_program_build_info(0x1182)
|
||||
CL_PROGRAM_BUILD_LOG = cl_program_build_info(0x1183)
|
||||
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clGetPlatformIDs (num_entries : cl_uint, platforms : POINTER(cl_platform_id), num_platforms : POINTER(cl_uint) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clGetPlatformInfo (platform : cl_platform_id, param_name : cl_platform_info, param_value_size : c_size_t, param_value : c_void_p, param_value_size_ret : POINTER(c_size_t)) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clGetDeviceIDs (platform : cl_platform_id, device_type : cl_device_type, num_entries : cl_uint, devices : POINTER(cl_device_id), num_devices : POINTER(cl_uint)) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clGetDeviceInfo(device : cl_device_id, param_name : cl_device_info, param_value_size : c_size_t, param_value : c_void_p, param_value_size_ret : POINTER(c_size_t)) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clCreateContext(properties : cl_context_properties, num_devices : cl_uint, devices : POINTER(cl_device_id), pfn_notify : c_void_p, user_data : c_void_p, errcode_ret : POINTER(CLRESULT) ) -> cl_context: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clReleaseContext(context : cl_context) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clCreateCommandQueue(context : cl_context, device : cl_device_id, properties : cl_command_queue_properties, errcode_ret : POINTER(CLRESULT) ) -> cl_command_queue: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clReleaseCommandQueue(command_queue : cl_command_queue) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clFinish(command_queue : cl_command_queue) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clWaitForEvents(num_events : cl_uint, event_list : POINTER(cl_event) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clReleaseEvent(event : cl_event) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clCreateBuffer(context : cl_context, flags : cl_mem_flags, size : c_size_t, host_ptr : c_void_p, errcode_ret : POINTER(CLRESULT) ) -> cl_mem: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clGetMemObjectInfo(memobj : cl_mem, param_name : cl_mem_info, param_value_size : c_size_t, param_value : c_void_p, param_value_size_ret : POINTER(c_size_t) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clReleaseMemObject(memobj : cl_mem) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clEnqueueReadBuffer (command_queue : cl_command_queue, buffer : cl_mem, blocking_read : cl_bool, offset : c_size_t, cb : c_size_t, ptr : c_void_p, num_events_in_wait_list : cl_uint, event_wait_list : POINTER(cl_event), event : POINTER(cl_event) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clEnqueueWriteBuffer (command_queue : cl_command_queue, buffer : cl_mem, blocking_write : cl_bool, offset : c_size_t, size : c_size_t, ptr : c_void_p, num_events_in_wait_list : cl_uint, event_wait_list : POINTER(cl_event), event : POINTER(cl_event) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clEnqueueCopyBuffer (command_queue : cl_command_queue, src_buffer : cl_mem, dst_buffer : cl_mem, src_offset : c_size_t, dst_offset : c_size_t, cb : c_size_t, num_events_in_wait_list : cl_uint, event_wait_list : POINTER(cl_event), event : cl_event) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clEnqueueFillBuffer (command_queue : cl_command_queue, buffer : cl_mem, pattern : c_void_p, pattern_size : c_size_t, offset : c_size_t, size : c_size_t, num_events_in_wait_list : cl_uint, event_wait_list : POINTER(cl_event), event : POINTER(cl_event) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clCreateProgramWithSource (context : cl_context, count : cl_uint, strings : POINTER(c_char_p), lengths : POINTER(c_size_t), errcode_ret : POINTER(CLRESULT) ) -> cl_program: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clReleaseProgram (program : cl_program) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clBuildProgram (program : cl_program, num_devices : cl_uint, device_list : POINTER(cl_device_id), options : c_char_p, pfn_notify : c_void_p, user_data : c_void_p) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clGetProgramBuildInfo (program : cl_program, device : cl_device_id, param_name : cl_program_build_info, param_value_size : c_size_t, param_value : c_void_p, param_value_size_ret : POINTER(c_size_t) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clCreateKernelsInProgram (program : cl_program, num_kernels : cl_uint, kernels : POINTER(cl_kernel), num_kernels_ret : POINTER(cl_uint) ) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clReleaseKernel (program : cl_kernel) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clSetKernelArg (kernel : cl_kernel, arg_index : cl_uint, arg_size : c_size_t, arg_value : c_void_p) -> CLRESULT: ...
|
||||
|
||||
@dll_import('OpenCL')
|
||||
def clEnqueueNDRangeKernel (command_queue : cl_command_queue, kernel : cl_kernel, work_dim : cl_uint, global_work_offset : POINTER(c_size_t), global_work_size : POINTER(c_size_t), local_work_size : POINTER(c_size_t), num_events_in_wait_list : cl_uint, event_wait_list : POINTER(cl_event), event : POINTER(cl_event) ) -> CLRESULT: ...
|
37
xlib/avecl/_internal/backend/OpenCL/__init__.py
Normal file
37
xlib/avecl/_internal/backend/OpenCL/__init__.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
"""
|
||||
Minimal OpenCL 1.2 low level ctypes API.
|
||||
"""
|
||||
from .OpenCL import (CL_DEVICE_EXTENSIONS, CL_DEVICE_GLOBAL_MEM_SIZE,
|
||||
CL_DEVICE_MAX_COMPUTE_UNITS, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
|
||||
CL_DEVICE_MAX_WORK_GROUP_SIZE, CL_DEVICE_NAME,
|
||||
CL_DEVICE_TYPE, CL_DEVICE_TYPE_ACCELERATOR,
|
||||
CL_DEVICE_TYPE_ALL, CL_DEVICE_TYPE_CPU,
|
||||
CL_DEVICE_TYPE_DEFAULT, CL_DEVICE_TYPE_GPU,
|
||||
CL_DEVICE_VENDOR_ID, CL_DEVICE_VERSION, CL_DRIVER_VERSION,
|
||||
CL_MEM_ALLOC_HOST_PTR, CL_MEM_COPY_HOST_PTR,
|
||||
CL_MEM_READ_ONLY, CL_MEM_READ_WRITE, CL_MEM_SIZE,
|
||||
CL_MEM_USE_HOST_PTR, CL_MEM_WRITE_ONLY,
|
||||
CL_PLATFORM_EXTENSIONS, CL_PLATFORM_NAME,
|
||||
CL_PLATFORM_PROFILE, CL_PLATFORM_VENDOR,
|
||||
CL_PLATFORM_VERSION, CL_PROGRAM_BUILD_LOG,
|
||||
CL_PROGRAM_BUILD_OPTIONS, CL_PROGRAM_BUILD_STATUS,
|
||||
CLERROR, CLRESULT, byref, c_char, c_char_p, c_size_t,
|
||||
c_void_p, cl_bitfield, cl_bool, cl_char, cl_command_queue,
|
||||
cl_command_queue_properties, cl_context,
|
||||
cl_context_properties, cl_device_id, cl_device_info,
|
||||
cl_device_type, cl_double, cl_event, cl_float, cl_half,
|
||||
cl_int, cl_kernel, cl_long, cl_mem, cl_mem_info,
|
||||
cl_platform_id, cl_platform_info, cl_program,
|
||||
cl_program_build_info, cl_short, cl_uchar, cl_uint,
|
||||
cl_ulong, cl_ushort, clBuildProgram, clCreateBuffer,
|
||||
clCreateCommandQueue, clCreateContext,
|
||||
clCreateKernelsInProgram, clCreateProgramWithSource,
|
||||
clEnqueueCopyBuffer, clEnqueueFillBuffer,
|
||||
clEnqueueNDRangeKernel, clEnqueueReadBuffer,
|
||||
clEnqueueWriteBuffer, clFinish, clGetDeviceIDs,
|
||||
clGetDeviceInfo, clGetMemObjectInfo, clGetPlatformIDs,
|
||||
clGetPlatformInfo, clGetProgramBuildInfo,
|
||||
clReleaseCommandQueue, clReleaseContext, clReleaseEvent,
|
||||
clReleaseKernel, clReleaseMemObject, clReleaseProgram,
|
||||
clSetKernelArg, clWaitForEvents, create_string_buffer,
|
||||
ctypes, sizeof)
|
6
xlib/avecl/_internal/backend/__init__.py
Normal file
6
xlib/avecl/_internal/backend/__init__.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from .Buffer import Buffer
|
||||
from .Device import (Device, cleanup_devices, get_available_devices_info,
|
||||
get_best_device, get_default_device, get_device,
|
||||
set_default_device)
|
||||
from .DeviceInfo import DeviceInfo
|
||||
from .Kernel import Kernel
|
Loading…
Add table
Add a link
Reference in a new issue