#!/usr/bin/env python
#
# Project: Sift implementation in Python + OpenCL
# https://github.com/silx-kit/silx
#
# Copyright (C) 2013-2023 European Synchrotron Radiation Facility, Grenoble, France
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
"""
This module provides a class for CBF byte offset compression/decompression.
"""
__authors__ = ["Jérôme Kieffer"]
__contact__ = "jerome.kieffer@esrf.eu"
__license__ = "MIT"
__copyright__ = "European Synchrotron Radiation Facility, Grenoble, France"
__date__ = "11/10/2018"
__status__ = "production"
import functools
import os
import numpy
from ..common import ocl, pyopencl
from ..processing import BufferDescription, EventDescription, OpenclProcessing
import logging
logger = logging.getLogger(__name__)
if pyopencl:
import pyopencl.version
if pyopencl.version.VERSION < (2016, 0):
from pyopencl.scan import GenericScanKernel, GenericDebugScanKernel
else:
from pyopencl.algorithm import GenericScanKernel
from pyopencl.scan import GenericDebugScanKernel
else:
logger.warning("No PyOpenCL, no byte-offset, please see fabio")
[docs]
class ByteOffset(OpenclProcessing):
"""Perform the byte offset compression/decompression on the GPU
See :class:`OpenclProcessing` for optional arguments description.
:param int raw_size:
Size of the raw stream for decompression.
It can be (slightly) larger than the array.
:param int dec_size:
Size of the decompression output array
(mandatory for decompression)
"""
def __init__(
self,
raw_size=None,
dec_size=None,
ctx=None,
devicetype="all",
platformid=None,
deviceid=None,
block_size=None,
profile=False,
):
OpenclProcessing.__init__(
self,
ctx=ctx,
devicetype=devicetype,
platformid=platformid,
deviceid=deviceid,
block_size=block_size,
profile=profile,
)
if self.block_size is None:
self.block_size = self.device.max_work_group_size
wg = self.block_size
buffers = [BufferDescription("counter", 1, numpy.int32, None)]
if raw_size is None:
self.raw_size = -1
self.padded_raw_size = -1
else:
self.raw_size = int(raw_size)
self.padded_raw_size = int((self.raw_size + wg - 1) & ~(wg - 1))
buffers += [
BufferDescription("raw", self.padded_raw_size, numpy.int8, None),
BufferDescription("mask", self.padded_raw_size, numpy.int32, None),
BufferDescription("values", self.padded_raw_size, numpy.int32, None),
BufferDescription(
"exceptions", self.padded_raw_size, numpy.int32, None
),
]
if dec_size is None:
self.dec_size = None
else:
self.dec_size = numpy.int32(dec_size)
buffers += [
BufferDescription("data_float", self.dec_size, numpy.float32, None),
BufferDescription("data_int", self.dec_size, numpy.int32, None),
]
self.allocate_buffers(buffers, use_array=True)
self.compile_kernels([os.path.join("codec", "byte_offset")])
self.kernels.__setattr__("scan", self._init_double_scan())
self.kernels.__setattr__("compression_scan", self._init_compression_scan())
def _init_double_scan(self):
"""generates a double scan on indexes and values in one operation"""
arguments = "__global int *value", "__global int *index"
int2 = pyopencl.tools.get_or_register_dtype("int2")
input_expr = "index[i]>0 ? (int2)(0, 0) : (int2)(value[i], 1)"
scan_expr = "a+b"
neutral = "(int2)(0,0)"
output_statement = "value[i] = item.s0; index[i+1] = item.s1;"
if self.block_size > 256:
knl = GenericScanKernel(
self.ctx,
dtype=int2,
arguments=arguments,
input_expr=input_expr,
scan_expr=scan_expr,
neutral=neutral,
output_statement=output_statement,
)
else: # MacOS on CPU
knl = GenericDebugScanKernel(
self.ctx,
dtype=int2,
arguments=arguments,
input_expr=input_expr,
scan_expr=scan_expr,
neutral=neutral,
output_statement=output_statement,
)
return knl
[docs]
def decode(self, raw, as_float=False, out=None):
"""This function actually performs the decompression by calling the kernels
:param numpy.ndarray raw: The compressed data as a 1D numpy array of char.
:param bool as_float: True to decompress as float32,
False (default) to decompress as int32
:param pyopencl.array out: pyopencl array in which to place the result.
:return: The decompressed image as an pyopencl array.
:rtype: pyopencl.array
"""
assert (
self.dec_size is not None
), "dec_size is a mandatory ByteOffset init argument for decompression"
events = []
with self.sem:
len_raw = numpy.int32(len(raw))
if len_raw > self.padded_raw_size:
wg = self.block_size
self.raw_size = int(len(raw))
self.padded_raw_size = (self.raw_size + wg - 1) & ~(wg - 1)
logger.info("increase raw buffer size to %s", self.padded_raw_size)
buffers = {
"raw": pyopencl.array.empty(
self.queue, self.padded_raw_size, dtype=numpy.int8
),
"mask": pyopencl.array.empty(
self.queue, self.padded_raw_size, dtype=numpy.int32
),
"exceptions": pyopencl.array.empty(
self.queue, self.padded_raw_size, dtype=numpy.int32
),
"values": pyopencl.array.empty(
self.queue, self.padded_raw_size, dtype=numpy.int32
),
}
self.cl_mem.update(buffers)
else:
wg = self.block_size
evt = pyopencl.enqueue_copy(
self.queue, self.cl_mem["raw"].data, raw, is_blocking=False
)
events.append(EventDescription("copy raw H -> D", evt))
evt = self.kernels.fill_int_mem(
self.queue,
(self.padded_raw_size,),
(wg,),
self.cl_mem["mask"].data,
numpy.int32(self.padded_raw_size),
numpy.int32(0),
numpy.int32(0),
)
events.append(EventDescription("memset mask", evt))
evt = self.kernels.fill_int_mem(
self.queue,
(1,),
(1,),
self.cl_mem["counter"].data,
numpy.int32(1),
numpy.int32(0),
numpy.int32(0),
)
events.append(EventDescription("memset counter", evt))
evt = self.kernels.mark_exceptions(
self.queue,
(self.padded_raw_size,),
(wg,),
self.cl_mem["raw"].data,
len_raw,
numpy.int32(self.raw_size),
self.cl_mem["mask"].data,
self.cl_mem["values"].data,
self.cl_mem["counter"].data,
self.cl_mem["exceptions"].data,
)
events.append(EventDescription("mark exceptions", evt))
nb_exceptions = numpy.empty(1, dtype=numpy.int32)
evt = pyopencl.enqueue_copy(
self.queue,
nb_exceptions,
self.cl_mem["counter"].data,
is_blocking=False,
)
events.append(EventDescription("copy counter D -> H", evt))
evt.wait()
nbexc = int(nb_exceptions[0])
if nbexc == 0:
logger.info("nbexc %i", nbexc)
else:
evt = self.kernels.treat_exceptions(
self.queue,
(nbexc,),
(1,),
self.cl_mem["raw"].data,
len_raw,
self.cl_mem["mask"].data,
self.cl_mem["exceptions"].data,
self.cl_mem["values"].data,
)
events.append(EventDescription("treat_exceptions", evt))
# self.cl_mem["copy_values"] = self.cl_mem["values"].copy()
# self.cl_mem["copy_mask"] = self.cl_mem["mask"].copy()
evt = self.kernels.scan(
self.cl_mem["values"],
self.cl_mem["mask"],
queue=self.queue,
size=int(len_raw),
wait_for=(evt,),
)
events.append(EventDescription("double scan", evt))
# evt.wait()
if out is not None:
if out.dtype == numpy.float32:
copy_results = self.kernels.copy_result_float
else:
copy_results = self.kernels.copy_result_int
else:
if as_float:
out = self.cl_mem["data_float"]
copy_results = self.kernels.copy_result_float
else:
out = self.cl_mem["data_int"]
copy_results = self.kernels.copy_result_int
evt = copy_results(
self.queue,
(self.padded_raw_size,),
(wg,),
self.cl_mem["values"].data,
self.cl_mem["mask"].data,
len_raw,
self.dec_size,
out.data,
)
events.append(EventDescription("copy_results", evt))
# evt.wait()
if self.profile:
self.events += events
return out
__call__ = decode
def _init_compression_scan(self):
"""Initialize CBF compression scan kernels"""
preamble = """
int compressed_size(int diff) {
int abs_diff = abs(diff);
if (abs_diff < 128) {
return 1;
}
else if (abs_diff < 32768) {
return 3;
}
else {
return 7;
}
}
void write(const int index,
const int diff,
global char *output) {
int abs_diff = abs(diff);
if (abs_diff < 128) {
output[index] = (char) diff;
}
else if (abs_diff < 32768) {
output[index] = -128;
output[index + 1] = (char) (diff >> 0);
output[index + 2] = (char) (diff >> 8);
}
else {
output[index] = -128;
output[index + 1] = 0;
output[index + 2] = -128;
output[index + 3] = (char) (diff >> 0);
output[index + 4] = (char) (diff >> 8);
output[index + 5] = (char) (diff >> 16);
output[index + 6] = (char) (diff >> 24);
}
}
"""
arguments = (
"__global const int *data, __global char *compressed, __global int *size"
)
input_expr = "compressed_size((i == 0) ? data[0] : (data[i] - data[i - 1]))"
scan_expr = "a+b"
neutral = "0"
output_statement = """
if (prev_item == 0) { // 1st thread store compressed data size
size[0] = last_item;
}
write(prev_item, (i == 0) ? data[0] : (data[i] - data[i - 1]), compressed);
"""
if self.block_size >= 64:
knl = GenericScanKernel(
self.ctx,
dtype=numpy.int32,
preamble=preamble,
arguments=arguments,
input_expr=input_expr,
scan_expr=scan_expr,
neutral=neutral,
output_statement=output_statement,
)
else: # MacOS on CPU
knl = GenericDebugScanKernel(
self.ctx,
dtype=numpy.int32,
preamble=preamble,
arguments=arguments,
input_expr=input_expr,
scan_expr=scan_expr,
neutral=neutral,
output_statement=output_statement,
)
return knl
[docs]
def encode(self, data, out=None):
"""Compress data to CBF.
:param data: The data to compress as a numpy array
(or a pyopencl Array) of int32.
:type data: Union[numpy.ndarray, pyopencl.array.Array]
:param pyopencl.array out:
pyopencl array of int8 in which to store the result.
The array should be large enough to store the compressed data.
:return: The compressed data as a pyopencl array.
If out is provided, this array shares the backing buffer,
but has the exact size of the compressed data and the queue
of the ByteOffset instance.
:rtype: pyopencl.array
:raises ValueError: if out array is not large enough
"""
events = []
with self.sem:
if isinstance(data, pyopencl.array.Array):
d_data = data # Uses provided array
else: # Copy data to device
data = numpy.ascontiguousarray(data, dtype=numpy.int32).ravel()
# Make sure data array exists and is large enough
if (
"data_input" not in self.cl_mem
or self.cl_mem["data_input"].size < data.size
):
logger.info("increase data input buffer size to %s", data.size)
self.cl_mem.update(
{
"data_input": pyopencl.array.empty(
self.queue, data.size, dtype=numpy.int32
)
}
)
d_data = self.cl_mem["data_input"]
evt = pyopencl.enqueue_copy(
self.queue, d_data.data, data, is_blocking=False
)
events.append(EventDescription("copy data H -> D", evt))
# Make sure compressed array exists and is large enough
compressed_size = d_data.size * 7
if (
"compressed" not in self.cl_mem
or self.cl_mem["compressed"].size < compressed_size
):
logger.info("increase compressed buffer size to %s", compressed_size)
self.cl_mem.update(
{
"compressed": pyopencl.array.empty(
self.queue, compressed_size, dtype=numpy.int8
)
}
)
d_compressed = self.cl_mem["compressed"]
d_size = self.cl_mem["counter"] # Shared with decompression
evt = self.kernels.compression_scan(d_data, d_compressed, d_size)
events.append(EventDescription("compression scan", evt))
byte_count = int(d_size.get()[0])
if out is None:
# Create out array from a sub-region of the compressed buffer
out = pyopencl.array.Array(
self.queue,
shape=(byte_count,),
dtype=numpy.int8,
allocator=functools.partial(
d_compressed.base_data.get_sub_region, d_compressed.offset
),
)
elif out.size < byte_count:
raise ValueError(
"Provided output buffer is not large enough: "
"requires %d bytes, got %d" % (byte_count, out.size)
)
else: # out.size >= byte_count
# Create an array with a sub-region of out and this class queue
out = pyopencl.array.Array(
self.queue,
shape=(byte_count,),
dtype=numpy.int8,
allocator=functools.partial(
out.base_data.get_sub_region, out.offset
),
)
evt = pyopencl.enqueue_copy(
self.queue, out.data, d_compressed.data, byte_count=byte_count
)
events.append(EventDescription("copy D -> D: internal -> out", evt))
if self.profile:
self.events += events
return out
[docs]
def encode_to_bytes(self, data):
"""Compresses data to CBF and returns compressed data as bytes.
Usage:
Provided an image (`image`) stored as a numpy array of int32,
first, create a byte offset compression/decompression object:
>>> from silx.opencl.codec.byte_offset import ByteOffset
>>> byte_offset_codec = ByteOffset()
Then, compress an image into bytes:
>>> compressed = byte_offset_codec.encode_to_bytes(image)
:param data: The data to compress as a numpy array
(or a pyopencl Array) of int32.
:type data: Union[numpy.ndarray, pyopencl.array.Array]
:return: The compressed data as bytes.
:rtype: bytes
"""
compressed_array = self.encode(data)
return compressed_array.get().tobytes()