# /*##########################################################################
#
# Copyright (c) 2016-2021 European Synchrotron Radiation Facility
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# ###########################################################################*/
"""Functions and classes for array-like objects, implementing common numpy
array features for datasets or nested sequences, while trying to avoid copying
data.
Classes:
- :class:`DatasetView`: Similar to a numpy view, to access
a h5py dataset as if it was transposed, without casting it into a
numpy array (this lets h5py handle reading the data from the
file into memory, as needed).
- :class:`ListOfImages`: Similar to a numpy view, to access
a list of 2D numpy arrays as if it was a 3D array (possibly transposed),
without casting it into a numpy array.
Functions:
- :func:`is_array`
- :func:`is_list_of_arrays`
- :func:`is_nested_sequence`
- :func:`get_shape`
- :func:`get_dtype`
- :func:`get_concatenated_dtype`
"""
import numpy
import numbers
__authors__ = ["P. Knobel"]
__license__ = "MIT"
__date__ = "26/04/2017"
[docs]
def is_array(obj):
"""Return True if object implements necessary attributes to be
considered similar to a numpy array.
Attributes needed are "shape", "dtype", "__getitem__"
and "__array__".
:param obj: Array-like object (numpy array, h5py dataset...)
:return: boolean
"""
# add more required attribute if necessary
for attr in ("shape", "dtype", "__array__", "__getitem__"):
if not hasattr(obj, attr):
return False
return True
[docs]
def is_list_of_arrays(obj):
"""Return True if object is a sequence of numpy arrays,
e.g. a list of images as 2D arrays.
:param obj: list of arrays
:return: boolean"""
# object must not be a numpy array
if is_array(obj):
return False
# object must have a __len__ method
if not hasattr(obj, "__len__"):
return False
# all elements in sequence must be arrays
for arr in obj:
if not is_array(arr):
return False
return True
[docs]
def is_nested_sequence(obj):
"""Return True if object is a nested sequence.
A simple 1D sequence is considered to be a nested sequence.
Numpy arrays and h5py datasets are not considered to be nested sequences.
To test if an object is a nested sequence in a more general sense,
including arrays and datasets, use::
is_nested_sequence(obj) or is_array(obj)
:param obj: nested sequence (numpy array, h5py dataset...)
:return: boolean"""
# object must not be a numpy array
if is_array(obj):
return False
if not hasattr(obj, "__len__"):
return False
# obj must not be a list of (lists of) numpy arrays
subsequence = obj
while hasattr(subsequence, "__len__"):
if is_array(subsequence):
return False
# strings cause infinite loops
if isinstance(subsequence, (str, bytes)):
return True
subsequence = subsequence[0]
# object has __len__ and is not an array
return True
[docs]
def get_shape(array_like):
"""Return shape of an array like object.
In case the object is a nested sequence but not an array or dataset
(list of lists, tuples...), the size of each dimension is assumed to be
uniform, and is deduced from the length of the first sequence.
:param array_like: Array like object: numpy array, hdf5 dataset,
multi-dimensional sequence
:return: Shape of array, as a tuple of integers
"""
if hasattr(array_like, "shape"):
return array_like.shape
shape = []
subsequence = array_like
while hasattr(subsequence, "__len__"):
shape.append(len(subsequence))
# strings cause infinite loops
if isinstance(subsequence, (str, bytes)):
break
subsequence = subsequence[0]
return tuple(shape)
[docs]
def get_dtype(array_like):
"""Return dtype of an array like object.
In the case of a nested sequence, the type of the first value
is inspected.
:param array_like: Array like object: numpy array, hdf5 dataset,
multi-dimensional nested sequence
:return: numpy dtype of object
"""
if hasattr(array_like, "dtype"):
return array_like.dtype
subsequence = array_like
while hasattr(subsequence, "__len__"):
# strings cause infinite loops
if isinstance(subsequence, (str, bytes)):
break
subsequence = subsequence[0]
return numpy.dtype(type(subsequence))
[docs]
def get_concatenated_dtype(arrays):
"""Return dtype of array resulting of concatenation
of a list of arrays (without actually concatenating
them).
:param arrays: list of numpy arrays
:return: resulting dtype after concatenating arrays
"""
dtypes = {a.dtype for a in arrays}
dummy = []
for dt in dtypes:
dummy.append(numpy.zeros((1, 1), dtype=dt))
return numpy.array(dummy).dtype
[docs]
class ListOfImages(object):
"""This class provides a way to access values and slices in a stack of
images stored as a list of 2D numpy arrays, without creating a 3D numpy
array first.
A transposition can be specified, as a 3-tuple of dimensions in the wanted
order. For example, to transpose from ``xyz`` ``(0, 1, 2)`` into ``yzx``,
the transposition tuple is ``(1, 2, 0)``
All the 2D arrays in the list must have the same shape.
The global dtype of the stack of images is the one that would be obtained
by casting the list of 2D arrays into a 3D numpy array.
:param images: list of 2D numpy arrays, or :class:`ListOfImages` object
:param transposition: Tuple of dimension numbers in the wanted order
"""
def __init__(self, images, transposition=None):
""" """
super(ListOfImages, self).__init__()
# if images is a ListOfImages instance, get the underlying data
# as a list of 2D arrays
if isinstance(images, ListOfImages):
images = images.images
# test stack of images is as expected
assert is_list_of_arrays(images), "Image stack must be a list of arrays"
image0_shape = images[0].shape
for image in images:
assert image.ndim == 2, "Images must be 2D numpy arrays"
assert image.shape == image0_shape, "All images must have the same shape"
self.images = images
"""List of images"""
self.shape = (len(images),) + image0_shape
"""Tuple of array dimensions"""
self.dtype = get_concatenated_dtype(images)
"""Data-type of the global array"""
self.ndim = 3
"""Number of array dimensions"""
self.size = len(images) * image0_shape[0] * image0_shape[1]
"""Number of elements in the array."""
self.transposition = list(range(self.ndim))
"""List of dimension indices, in an order depending on the
specified transposition. By default this is simply
[0, ..., self.ndim], but it can be changed by specifying a different
``transposition`` parameter at initialization.
Use :meth:`transpose`, to create a new :class:`ListOfImages`
with a different :attr:`transposition`.
"""
if transposition is not None:
assert len(transposition) == self.ndim
assert set(transposition) == set(
list(range(self.ndim))
), "Transposition must be a sequence containing all dimensions"
self.transposition = transposition
self.__sort_shape()
def __sort_shape(self):
"""Sort shape in the order defined in :attr:`transposition`"""
new_shape = tuple(self.shape[dim] for dim in self.transposition)
self.shape = new_shape
def __sort_indices(self, indices):
"""Return array indices sorted in the order needed
to access data in the original non-transposed images.
:param indices: Tuple of ndim indices, in the order needed
to access the transposed view
:return: Sorted tuple of indices, to access original data
"""
assert len(indices) == self.ndim
sorted_indices = tuple(
idx for (_, idx) in sorted(zip(self.transposition, indices))
)
return sorted_indices
def __array__(self, dtype=None):
"""Cast the images into a numpy array, and return it.
If a transposition has been done on this images, return
a transposed view of a numpy array."""
return numpy.transpose(
numpy.array(self.images, dtype=dtype), self.transposition
)
def __len__(self):
return self.shape[0]
[docs]
def transpose(self, transposition=None):
"""Return a re-ordered (dimensions permutated)
:class:`ListOfImages`.
The returned object refers to
the same images but with a different :attr:`transposition`.
:param List[int] transposition: List/tuple of dimension numbers in the
wanted order.
If ``None`` (default), reverse the dimensions.
:return: new :class:`ListOfImages` object
"""
# by default, reverse the dimensions
if transposition is None:
transposition = list(reversed(self.transposition))
# If this ListOfImages is already transposed, sort new transposition
# relative to old transposition
elif list(self.transposition) != list(range(self.ndim)):
transposition = [self.transposition[i] for i in transposition]
return ListOfImages(self.images, transposition)
@property
def T(self):
"""
Same as self.transpose()
:return: DatasetView with dimensions reversed."""
return self.transpose()
def __getitem__(self, item):
"""Handle a subset of numpy indexing with regards to the dimension
order as specified in :attr:`transposition`
Following features are **not supported**:
- fancy indexing using numpy arrays
- using ellipsis objects
:param item: Index
:return: value or slice as a numpy array
"""
# 1-D slicing -> n-D slicing (n=1)
if not hasattr(item, "__len__"):
# first dimension index is given
item = [item]
# following dimensions are indexed with : (all elements)
item += [slice(None) for _i in range(self.ndim - 1)]
# n-dimensional slicing
if len(item) != self.ndim:
raise IndexError(
"N-dim slicing requires a tuple of N indices/slices. "
+ "Needed dimensions: %d" % self.ndim
)
# get list of indices sorted in the original images order
sorted_indices = self.__sort_indices(item)
list_idx, array_idx = sorted_indices[0], sorted_indices[1:]
images_selection = self.images[list_idx]
# now we must transpose the output data
output_dimensions = []
frozen_dimensions = []
for i, idx in enumerate(item):
# slices and sequences
if not isinstance(idx, numbers.Integral):
output_dimensions.append(self.transposition[i])
# regular integer index
else:
# whenever a dimension is fixed (indexed by an integer)
# the number of output dimension is reduced
frozen_dimensions.append(self.transposition[i])
# decrement output dimensions that are above frozen dimensions
for frozen_dim in reversed(sorted(frozen_dimensions)):
for i, out_dim in enumerate(output_dimensions):
if out_dim > frozen_dim:
output_dimensions[i] -= 1
assert (len(output_dimensions) + len(frozen_dimensions)) == self.ndim
assert set(output_dimensions) == set(range(len(output_dimensions)))
# single list elements selected
if isinstance(images_selection, numpy.ndarray):
return numpy.transpose(images_selection[array_idx], axes=output_dimensions)
# muliple list elements selected
else:
# apply selection first
output_stack = []
for img in images_selection:
output_stack.append(img[array_idx])
# then cast into a numpy array, and transpose
return numpy.transpose(numpy.array(output_stack), axes=output_dimensions)
[docs]
def min(self):
"""
:return: Global minimum value
"""
min_value = self.images[0].min()
if len(self.images) > 1:
for img in self.images[1:]:
min_value = min(min_value, img.min())
return min_value
[docs]
def max(self):
"""
:return: Global maximum value
"""
max_value = self.images[0].max()
if len(self.images) > 1:
for img in self.images[1:]:
max_value = max(max_value, img.max())
return max_value
[docs]
class DatasetView(object):
"""This class provides a way to transpose a dataset without
casting it into a numpy array. This way, the dataset in a file need not
necessarily be integrally read into memory to view it in a different
transposition.
.. note::
The performances depend a lot on the way the dataset was written
to file. Depending on the chunking strategy, reading a complete 2D slice
in an unfavorable direction may still require the entire dataset to
be read from disk.
:param dataset: h5py dataset
:param transposition: List of dimensions sorted in the order of
transposition (relative to the original h5py dataset)
"""
def __init__(self, dataset, transposition=None):
""" """
super(DatasetView, self).__init__()
self.dataset = dataset
"""original dataset"""
self.shape = dataset.shape
"""Tuple of array dimensions"""
self.dtype = dataset.dtype
"""Data-type of the array’s element"""
self.ndim = len(dataset.shape)
"""Number of array dimensions"""
size = 0
if self.ndim:
size = 1
for dimsize in self.shape:
size *= dimsize
self.size = size
"""Number of elements in the array."""
self.transposition = list(range(self.ndim))
"""List of dimension indices, in an order depending on the
specified transposition. By default this is simply
[0, ..., self.ndim], but it can be changed by specifying a different
`transposition` parameter at initialization.
Use :meth:`transpose`, to create a new :class:`DatasetView`
with a different :attr:`transposition`.
"""
if transposition is not None:
assert len(transposition) == self.ndim
assert set(transposition) == set(
list(range(self.ndim))
), "Transposition must be a list containing all dimensions"
self.transposition = transposition
self.__sort_shape()
def __sort_shape(self):
"""Sort shape in the order defined in :attr:`transposition`"""
new_shape = tuple(self.shape[dim] for dim in self.transposition)
self.shape = new_shape
def __sort_indices(self, indices):
"""Return array indices sorted in the order needed
to access data in the original non-transposed dataset.
:param indices: Tuple of ndim indices, in the order needed
to access the view
:return: Sorted tuple of indices, to access original data
"""
assert len(indices) == self.ndim
sorted_indices = tuple(
idx for (_, idx) in sorted(zip(self.transposition, indices))
)
return sorted_indices
def __getitem__(self, item):
"""Handle fancy indexing with regards to the dimension order as
specified in :attr:`transposition`
The supported fancy-indexing syntax is explained at
http://docs.h5py.org/en/latest/high/dataset.html#fancy-indexing.
Additional restrictions exist if the data has been transposed:
- numpy boolean array indexing is not supported
- ellipsis objects are not supported
:param item: Index, possibly fancy index (must be supported by h5py)
:return: Sliced numpy array or numpy scalar
"""
# no transposition, let the original dataset handle indexing
if self.transposition == list(range(self.ndim)):
return self.dataset[item]
# 1-D slicing: create a list of indices to switch to n-D slicing
if not hasattr(item, "__len__"):
# first dimension index (list index) is given
item = [item]
# following dimensions are indexed with slices representing all elements
item += [slice(None) for _i in range(self.ndim - 1)]
# n-dimensional slicing
if len(item) != self.ndim:
raise IndexError(
"N-dim slicing requires a tuple of N indices/slices. "
+ "Needed dimensions: %d" % self.ndim
)
# get list of indices sorted in the original dataset order
sorted_indices = self.__sort_indices(item)
output_data_not_transposed = self.dataset[sorted_indices]
# now we must transpose the output data
output_dimensions = []
frozen_dimensions = []
for i, idx in enumerate(item):
# slices and sequences
if not isinstance(idx, int):
output_dimensions.append(self.transposition[i])
# regular integer index
else:
# whenever a dimension is fixed (indexed by an integer)
# the number of output dimension is reduced
frozen_dimensions.append(self.transposition[i])
# decrement output dimensions that are above frozen dimensions
for frozen_dim in reversed(sorted(frozen_dimensions)):
for i, out_dim in enumerate(output_dimensions):
if out_dim > frozen_dim:
output_dimensions[i] -= 1
assert (len(output_dimensions) + len(frozen_dimensions)) == self.ndim
assert set(output_dimensions) == set(range(len(output_dimensions)))
return numpy.transpose(output_data_not_transposed, axes=output_dimensions)
def __array__(self, dtype=None):
"""Cast the dataset into a numpy array, and return it.
If a transposition has been done on this dataset, return
a transposed view of a numpy array."""
return numpy.transpose(
numpy.array(self.dataset, dtype=dtype), self.transposition
)
def __len__(self):
return self.shape[0]
[docs]
def transpose(self, transposition=None):
"""Return a re-ordered (dimensions permutated)
:class:`DatasetView`.
The returned object refers to
the same dataset but with a different :attr:`transposition`.
:param List[int] transposition: List of dimension numbers in the wanted order.
If ``None`` (default), reverse the dimensions.
:return: Transposed DatasetView
"""
# by default, reverse the dimensions
if transposition is None:
transposition = list(reversed(self.transposition))
# If this DatasetView is already transposed, sort new transposition
# relative to old transposition
elif list(self.transposition) != list(range(self.ndim)):
transposition = [self.transposition[i] for i in transposition]
return DatasetView(self.dataset, transposition)
@property
def T(self):
"""
Same as self.transpose()
:return: DatasetView with dimensions reversed."""
return self.transpose()