Source code for nabu.processing.roll_opencl
#
# WIP !
#
import numpy as np
from ..opencl.utils import __has_pyopencl__
from ..utils import get_opencl_srcfile
if __has_pyopencl__:
import pyopencl as cl
from ..opencl.processing import OpenCLProcessing
from ..opencl.kernel import OpenCLKernel
from pyopencl.tools import dtype_to_ctype as cl_dtype_to_ctype
[docs]
class OpenCLRoll:
def __init__(self, dtype, direction=1, offset=None, **processing_kwargs):
self.processing = OpenCLProcessing(queue=processing_kwargs.get("queue", None))
self.dtype = np.dtype(dtype)
compile_options = ["-DDTYPE=%s" % cl_dtype_to_ctype(self.dtype)]
self.offset = offset or 0
self.roll_kernel = OpenCLKernel(
"roll_forward_x",
None,
queue=self.processing.queue,
filename=get_opencl_srcfile("roll.cl"),
options=compile_options,
)
self.shmem = cl.LocalMemory(self.dtype.itemsize)
self.direction = direction
if self.direction < 0:
self.revert_kernel = OpenCLKernel(
"revert_array_x",
None,
queue=self.processing.queue,
filename=get_opencl_srcfile("roll.cl"),
options=compile_options,
)
def __call__(self, arr):
ny, nx = arr.shape
# Launch one big horizontal workgroup
wg_x = min((nx - self.offset) // 2, self.processing.queue.device.max_work_group_size)
local_size = (wg_x, 1, 1)
global_size = [wg_x, ny]
if self.direction < 0:
local_size2 = None
global_size2 = [nx - self.offset, ny]
self.revert_kernel(
arr, np.int32(nx), np.int32(ny), np.int32(self.offset), local_size=local_size2, global_size=global_size2
)
self.roll_kernel(
arr,
np.int32(nx),
np.int32(ny),
np.int32(self.offset),
self.shmem,
local_size=local_size,
global_size=global_size,
)
if self.direction < 0:
self.revert_kernel(
arr, np.int32(nx), np.int32(ny), np.int32(self.offset), local_size=local_size2, global_size=global_size2
)
return arr