Source code for nabu.opencl.padding

import numpy as np
from ..utils import get_opencl_srcfile, updiv, check_supported
from .kernel import OpenCLKernel
from .processing import OpenCLProcessing
from .memcpy import OpenCLMemcpy2D
import pyopencl.array as parray
from ..misc.padding_base import PaddingBase


[docs] class OpenCLPadding(PaddingBase): """ A class for performing padding on GPU using Cuda """ # TODO docstring from base class def __init__(self, shape, pad_width, mode="constant", opencl_options=None, **kwargs): super().__init__(shape, pad_width, mode=mode, **kwargs) self.opencl_processing = OpenCLProcessing(**(opencl_options or {})) self.queue = self.opencl_processing.queue self._init_opencl_coordinate_transform() def _init_opencl_coordinate_transform(self): if self.mode == "constant": self.d_padded_array_constant = parray.to_device(self.queue, self.padded_array_constant) self.memcpy2D = OpenCLMemcpy2D(ctx=self.opencl_processing.ctx, queue=self.queue) return self._coords_transform_kernel = OpenCLKernel( "coordinate_transform", self.opencl_processing.ctx, filename=get_opencl_srcfile("padding.cl"), ) self._coords_transform_global_size = self.padded_shape[::-1] self.d_coords_rows = parray.to_device(self.queue, self.coords_rows) self.d_coords_cols = parray.to_device(self.queue, self.coords_cols) def _pad_constant(self, image, output): pad_y, pad_x = self.pad_width # the following line is not implemented in pyopencl # self.d_padded_array_constant[pad_y[0] : pad_y[0] + self.shape[0], pad_x[0] : pad_x[0] + self.shape[1]] = image[:] # cl.enqueue_copy is too cumbersome to use for Buffer <-> Buffer. # Use a dedicated kernel instead. # This is not optimal (two copies) - TODO write a constant padding kernel self.memcpy2D(self.d_padded_array_constant, image, image.shape[::-1], dst_offset_xy=(pad_x[0], pad_y[0])) output[:] = self.d_padded_array_constant[:] return output
[docs] def pad(self, image, output=None): """ Pad an array. Parameters ---------- image: pycuda.gpuarray.GPUArray Image to pad output: pycuda.gpuarray.GPUArray, optional Output image. If provided, must be in the expected shape. """ if output is None: output = self.opencl_processing.allocate_array("d_output", self.padded_shape) if self.mode == "constant": return self._pad_constant(image, output) self._coords_transform_kernel( self.queue, image, output, self.d_coords_cols, self.d_coords_rows, np.int32(self.shape[1]), np.int32(self.padded_shape[1]), np.int32(self.padded_shape[0]), global_size=self._coords_transform_global_size, ) return output