Source code for nabu.reconstruction.hbp

import math
import numpy as np

from ..utils import get_cuda_srcfile
from ..cuda.processing import __has_pycuda__

if __has_pycuda__:
    from ..cuda.kernel import CudaKernel
    from .sinogram_cuda import CudaSinoMult

from .fbp import CudaBackprojector


try:
    import pycuda.driver as cuda
    from pycuda import gpuarray as garray

    __have_hbp__ = True
except:
    __have_hbp__ = False


[docs] def buildConebeamGeometry( anglesRad, rotAxisProjectionFromLeftPixelUnits, sourceSampleDistanceVoxelUnits, opticalAxisFromLeftPixelUnits=None ): """Generate fanbeam/conebeam projection matrices (as required by the backprojector) based on geometry parameters""" if opticalAxisFromLeftPixelUnits is None: if hasattr(rotAxisProjectionFromLeftPixelUnits, "__iter__"): opticalAxisFromLeftPixelUnits = rotAxisProjectionFromLeftPixelUnits[0] else: opticalAxisFromLeftPixelUnits = rotAxisProjectionFromLeftPixelUnits t = opticalAxisFromLeftPixelUnits d = sourceSampleDistanceVoxelUnits if hasattr(rotAxisProjectionFromLeftPixelUnits, "__iter__"): P_list = [ np.array([[0, -t / d, 1, a], [1, 0, 0, 0], [0, -1 / d, 0, 1]], dtype=np.float64) # pylint: disable=E1130 for a in rotAxisProjectionFromLeftPixelUnits ] else: a = rotAxisProjectionFromLeftPixelUnits P_list = [ np.array([[0, -t / d, 1, a], [1, 0, 0, 0], [0, -1 / d, 0, 1]], dtype=np.float64) # pylint: disable=E1130 ] * len(anglesRad) R = lambda w: np.array( [[1, 0, 0, 0], [0, np.cos(w), np.sin(w), 0], [0, -np.sin(w), np.cos(w), 0], [0, 0, 0, 1]], dtype=np.float64 ) return np.array([P @ R(-w) for P, w in zip(P_list, anglesRad)])
[docs] class HierarchicalBackprojector(CudaBackprojector): kernel_filename = "hierarchical_backproj.cu" def _init_geometry(self, sino_shape, slice_shape, angles, rot_center, halftomo, slice_roi): super()._init_geometry(sino_shape, slice_shape, angles, rot_center, halftomo, slice_roi) # pylint: disable=E1130 # -angles because different convention for the rotation direction self.angles = -self.angles # to do the reconstruction in reduction_steps steps self.reduction_steps = self.extra_options.get("hbp_reduction_steps", 2) reduction_factor = int(math.ceil((sino_shape[-2]) ** (1 / self.reduction_steps))) # TODO customize axis_source_meters = 1.0e9 voxel_size_microns = 1.0 # axis_cor = self.extra_options.get("axis_correction", None) if axis_cor is None: axis_cor = 0 bpgeometry = buildConebeamGeometry( self.angles, self.rot_center + axis_cor, 1.0e6 * axis_source_meters / voxel_size_microns ) self.setup_hbp(bpgeometry, reductionFactor=reduction_factor, legs=self.extra_options.get("hbp_legs", 4))
[docs] def setup_hbp( self, bpgeometry, reductionFactor=20, grid_wh_factors=(1, 1), fac=1, legs=4, ): # This implementation seems not to use textures self._use_textures = False # for the non texture implementation, this big number will discard texture limitations large_factor_for_non_texture_memory_access = 2**10 # TODO: read limits from device info. self.GPU_MAX_GRIDSIZE = 2**15 * large_factor_for_non_texture_memory_access self.GPU_MAX_GRIDS = 2**11 * large_factor_for_non_texture_memory_access if self.sino_shape[0] != len(bpgeometry): raise ValueError("self.sino_shape[0] != len(bpgeometry)") if self.sino_shape[0] != len(self.angles): raise ValueError("self.sino_shape[0] != len(self.angles)") if self.sino_shape[1] > self.GPU_MAX_GRIDSIZE: raise ValueError(f"self.sino_shape[1] > {self.GPU_MAX_GRIDSIZE} not supported by GPU") if self.sino_shape[0] > self.GPU_MAX_GRIDSIZE: raise ValueError(f"self.sino_shape[0] > {self.GPU_MAX_GRIDSIZE} currently not supported") self.reductionFactor = reductionFactor self.legs = legs self.bpsetupsH = bpgeometry.astype(np.float32) # self.bpsetupsD = cuda.mem_alloc(self.bpsetupsH.nbytes) # cuda.memcpy_htod(self.bpsetupsD, self.bpsetupsH) self.bpsetupsD = self._processing.to_device("bpsetupsD", self.bpsetupsH) # if allocate_cuda_sinogram: # self.sinogramD = cuda.mem_alloc(self.sino_shape[0] * self.sino_shape[1] * self.float_size) # else: # self.sinogramD = None self.sinogramD = None self.whf = grid_wh_factors if self.sino_shape[1] * 2 * self.whf[0] * fac > self.GPU_MAX_GRIDSIZE: print(f"WARNING: gridsampling limited to {self.GPU_MAX_GRIDSIZE}") self.whf[0] = self.GPU_MAX_GRIDSIZE / (self.sino_shape[1] * 2 * fac) ############################################### ########## create intermediate grids ########## ############################################### self.reductionFactors = [] self.grids = [] # shapes self.gridTransforms = [] # grid-to-world self.gridInvTransforms = [] # world-to-grid self.gridTransformsH = [] # host buffer self.gridTransformsD = [] # device buffer ### first level grid: will receive backprojections # #################################################### N = self.slice_shape[1] * fac angularRange = abs(self.angles.ptp()) / self.sino_shape[0] * reductionFactor ngrids = int(math.ceil(self.sino_shape[0] / reductionFactor)) grid_width = int( np.rint(2 * N * self.whf[0]) ) # double sampling to account/compensate for diamond shaped grid of ray-intersections grid_height = int( math.ceil(angularRange * N * self.whf[1]) ) # small-angle approximation, generates as much "lines" as needed to account for all intersection levels m = (len(self.angles) // reductionFactor) * reductionFactor # TODO: improve angle calculation for more general cases tmpangles = np.angle( np.average(np.exp(1.0j * self.angles[:m].reshape(m // reductionFactor, reductionFactor)), axis=1) ) tmpangles = np.concatenate((tmpangles, (np.angle(np.average(np.exp(1.0j * self.angles[m:]))),)))[:ngrids] gridAinvT = self._getAinvT(N, grid_height, grid_width) setupRs = self._getRotationMatrices(tmpangles) pad = int(math.ceil(ngrids / legs) * legs - ngrids) # add nan-padding for inline-signaling of unused grids self.gridTransforms += [ np.array( [(R @ gridAinvT) for R in setupRs] + [np.ones((3, 3), np.float32) * math.nan] * pad, dtype=np.float32 ) ] self.gridInvTransforms += [np.array([np.linalg.inv(t) for t in self.gridTransforms[-1]], dtype=np.float32)] self.grids += [(grid_height, grid_width, int(math.ceil(ngrids / legs)))] self.reductionFactors += [reductionFactor] ### intermediate level grids: accumulation grids ### #################################################### # Actual iteration count typically within 1-5. Cf. break condition for i in range(100): # for a reasonable (with regard to memory requirement) grid-aspect ratio in the intermediate levels, # the covered angular range per grid should not exceed 28.6°, i.e., # fewer than 7 (6.3) or 13 (12.6) grids for a 180° / 360° scan is not reasonable if int(math.ceil(ngrids / reductionFactor)) < 20: break angularRange *= reductionFactor ngrids = int(math.ceil(ngrids / reductionFactor)) grid_height = int( math.ceil(angularRange * N * self.whf[1]) ) # implicit small angle approximation, whose validity is # asserted by the preceding "break" gridAinvT = self._getAinvT(N, grid_height, grid_width) prevAngles = tmpangles m = (len(prevAngles) // reductionFactor) * reductionFactor # TODO: improve angle calculation for more general cases tmpangles = np.angle( np.average(np.exp(1.0j * prevAngles[:m].reshape(m // reductionFactor, reductionFactor)), axis=1) ) tmpangles = np.concatenate((tmpangles, (np.angle(np.average(np.exp(1.0j * prevAngles[m:]))),)))[:ngrids] setupRsRed = self._getRotationMatrices(tmpangles) pad = int(math.ceil(ngrids / legs) * legs - ngrids) self.gridTransforms += [ np.array( [(R @ gridAinvT) for R in setupRsRed] + [np.ones((3, 3), np.float32) * math.nan] * pad, dtype=np.float32, ) ] self.gridInvTransforms += [np.array([np.linalg.inv(t) for t in self.gridTransforms[-1]], dtype=np.float32)] self.grids += [(grid_height, grid_width, int(math.ceil(ngrids / legs)))] self.reductionFactors += [reductionFactor] ##### final accumulation grid ################# ############################################### reductionFactor = ngrids ngrids = 1 grid_size = self.slice_shape[1] grid_width = grid_size grid_height = grid_size # gridAinvT = self._getAinvT(N, grid_height, grid_width) gridAinvT = self._getAinvT(N, grid_height, grid_width, 1 / fac) self.gridTransforms += [ np.array([gridAinvT] * legs, dtype=np.float32) ] # inflate transform list for convenience in reconstruction loop self.gridInvTransforms += [np.array([np.linalg.inv(t) for t in self.gridTransforms[-1]], dtype=np.float32)] self.grids += [(grid_height, grid_width, ngrids)] self.reductionFactors += [reductionFactor] #### accumulation grids ##### self.gridTransformsD = [] self.gridInvTransformsD = [] self.gridsD = [] max_grid_size = get_max_grid_size(self.grids) for i in range(len(self.grids)): gridTransformH = np.array(self.gridTransforms[i][:, :2, :3], dtype=np.float32, order="C").copy() gridInvTransformH = np.array(self.gridInvTransforms[i][:, :2, :3], dtype=np.float32, order="C").copy() self.gridTransformsD.append(self._processing.to_device("gridTransformsD%d " % i, gridTransformH.ravel())) self.gridInvTransformsD.append( self._processing.to_device("gridInvTransformsD%d" % i, gridInvTransformH.ravel()) ) if legs == 1 or i + 1 != (len(self.grids)): if i < 2: self.gridsD.append(self._processing.allocate_array("gridsD%d" % i, max_grid_size)) else: self.gridsD.append(self.gridsD[i % 2]) else: self.gridsD.append(self._processing.allocate_array("gridsD%d" % i, get_max_grid_size(self.grids[-1:]))) self.imageBufferShape = (grid_size, grid_size) self.imageBufferD = self._processing.allocate_array( "imageBufferD", self.imageBufferShape[0] * self.imageBufferShape[1] ) self.imageBufferH = np.zeros(self.imageBufferShape, dtype=np.float32)
def _getAinvT(self, finalGridWidthAndHeight, currentGridHeight, currentGridWidth, scale=1): N = finalGridWidthAndHeight grid_height = currentGridHeight grid_width = currentGridWidth # shifts a texture coordinate from corner origin to center origin T = np.array(((1, 0, -0.5 * (grid_height - 1)), (0, 1, -0.5 * (grid_width - 1)), (0, 0, 1)), dtype=np.float32) # scales texture coordinates (of subsampled grid) into the unit/cooridnate system of a fully sampled grid Ainv = np.array( (((N - 1) / (grid_height - 1) * scale, 0, 0), (0, (N - 1) / (grid_width - 1) * scale, 0), (0, 0, 1)), dtype=np.float32, ) return Ainv @ T def _getRotationMatrices(self, angles): return [ np.array(((np.cos(a), np.sin(a), 0), (-np.sin(a), np.cos(a), 0), (0, 0, 1)), dtype=np.float32) for a in angles ] def _compile_kernels(self): # pylint: disable=E0606 self.backprojector = CudaKernel( "backprojector", filename=get_cuda_srcfile(self.kernel_filename), signature="PPiiiiPiifPi", ) self.aggregator = CudaKernel( "aggregator", filename=get_cuda_srcfile(self.kernel_filename), signature="iPPiiiiPiiiP" ) self.clip_outer_circle_kernel = CudaKernel( "clip_outer_circle", filename=get_cuda_srcfile(self.kernel_filename), signature="Pii" ) # Duplicate of fbp.py ... if self.halftomo and self.rot_center < self.dwidth: self.sino_mult = CudaSinoMult(self.sino_shape, self.rot_center, ctx=self._processing.ctx) # def _set_sino(self, sino, do_checks=True): if do_checks and not (sino.flags.c_contiguous): raise ValueError("Expected C-Contiguous array") else: self._d_sino = self._processing.allocate_array("_d_sino", self.sino_shape) if id(self._d_sino) == id(sino): return self._d_sino[:] = sino[:]
[docs] def backproj(self, sino, output=None, do_checks=True, reference=False): if self.halftomo and self.rot_center < self.dwidth: self.sino_mult.prepare_sino(sino) self._set_sino(sino) lws = (64, 4, 4) if reference: gws = getGridSize(self.grids[-1], lws) (grid_height, grid_width, ngrids) = self.grids[-1] self.backprojector( self.bpsetupsD, self.gridTransformsD[-1].gpudata, np.int32(self.sino_shape[0]), np.int32(grid_width), np.int32(grid_height), np.int32(ngrids), self.gridsD[-1], np.int32(self.sino_shape[1]), np.int32(self.sino_shape[0]), np.float32(self._backproj_scale_factor), self._d_sino, np.int32(0), # offset block=lws, grid=gws, ) else: for leg in list(range(0, self.legs)): gridOffset = leg * self.grids[0][2] projOffset = gridOffset * self.reductionFactors[0] gws = getGridSize(self.grids[0], lws) (grid_height, grid_width, ngrids) = self.grids[0] self.backprojector( self.bpsetupsD, self.gridTransformsD[0][6 * gridOffset :], np.int32(self.reductionFactors[0]), np.int32(grid_width), np.int32(grid_height), np.int32(ngrids), self.gridsD[0], np.int32(self.sino_shape[1]), np.int32(self.sino_shape[0]), np.float32(self._backproj_scale_factor), self._d_sino, np.int32(projOffset), block=lws, grid=gws, ) for i in range(1, len(self.grids)): if self.grids[i][2] >= 8: lws = (16, 16, 4) else: lws = (32, 32, 1) gws = getGridSize(self.grids[i], lws) (new_grid_height, new_grid_width, new_ngrids) = self.grids[i] (prev_grid_height, prev_grid_width, prev_ngrids) = self.grids[i - 1] gridOffset = leg * self.grids[i][2] prevGridOffset = leg * self.grids[i - 1][2] self.aggregator( np.int32((i + 1 == len(self.grids)) and (leg > 0)), self.gridTransformsD[i][6 * gridOffset :], self.gridInvTransformsD[i - 1][6 * prevGridOffset :], np.int32(self.reductionFactors[i]), np.int32(new_grid_width), np.int32(new_grid_height), np.int32(new_ngrids), self.gridsD[i], np.int32(prev_grid_width), np.int32(prev_grid_height), np.int32(prev_ngrids), self.gridsD[i - 1], block=lws, grid=gws, ) if self.extra_options.get("clip_outer_circle", False): lws = (16, 16, 1) ny, nx = self.slice_shape gws = getGridSize((nx, ny, 1), lws) self.clip_outer_circle_kernel(self.gridsD[-1], np.int32(ny), np.int32(nx), block=lws, grid=gws) # FIXME pycuda fails to do a discontiguous memcpy for more than 2^31 bytes if self.gridsD[-1].nbytes > 2**31: r1d = self.gridsD[-1].get() r2d = np.ascontiguousarray(r1d.reshape(self.slice_shape)) if output is not None: output[:] = r2d[:] return output else: return r2d # -------- else: return self.gridsD[-1].reshape(self.slice_shape).get(ary=output)
[docs] def get_max_grid_size(grids): size_max = 0 for dims in grids: size = 1 for d in dims: size = size * d if size > size_max: size_max = size return size_max
[docs] def getGridSize(minimum, local): m, l = np.array(minimum), np.array(local) new = (m // l) * l new[new < m] += l[new < m] return tuple(map(int, new // l))