Weighted vs Unweighted average for azimuthal integration#

The aim of this tutorial is to investigate the ability to preform unweighted averages during azimuthal integration and validate the result for intensity and uncertainties propagation.

[1]:
%matplotlib inline
# use `widget` instead of `inline` for better user-exeperience. `inline` allows to store plots into notebooks.
import time
import numpy
from matplotlib.pyplot import subplots
from scipy.stats import chi2 as chi2_dist
import fabio
import pyFAI
from pyFAI.gui import jupyter
from pyFAI.test.utilstest import UtilsTest
from pyFAI.utils.mathutil import rwp
t0 = time.perf_counter()
[2]:
img = fabio.open(UtilsTest.getimage("Pilatus1M.edf")).data
ai = pyFAI.load(UtilsTest.getimage("Pilatus1M.poni"))
print(ai)
jupyter.display(img)
Detector Pilatus 1M      PixelSize= 172µm, 172µm         BottomRight (3)
Wavelength= 1.000000e-10 m
SampleDetDist= 1.583231e+00 m   PONI= 3.341702e-02, 4.122778e-02 m      rot1=0.006487  rot2=0.007558  rot3=0.000000 rad
DirectBeamDist= 1583.310 mm     Center: x=179.981, y=263.859 pix        Tilt= 0.571° tiltPlanRotation= 130.640° 𝛌= 1.000Å
[2]:
<Axes: >
../../../_images/usage_tutorial_Variance_Unweighted_2_2.png
[3]:
method = pyFAI.method_registry.IntegrationMethod.parse(("no", "csr", "cython"), dim=1)
weighted = method
unweighted = method.unweighted
[4]:
#Note
weighted.weighted_average, unweighted.weighted_average, weighted == unweighted
[4]:
(True, False, True)
[5]:
res_w = ai.integrate1d(img, 1000, method=weighted, error_model="poisson", polarization_factor=0.99)
res_u = ai.integrate1d(img, 1000, method=unweighted, error_model="poisson", polarization_factor=0.99)

rwp(res_u, res_w)
[5]:
np.float64(0.0026844928696383593)
[6]:
ax = jupyter.plot1d(res_w, label="weighted")
ax = jupyter.plot1d(res_u, label="unweighted", ax=ax)
ax.plot(res_w.radial, abs(res_w.intensity-res_u.intensity), label="delta")
ax.legend()
ax.set_yscale("log")
../../../_images/usage_tutorial_Variance_Unweighted_6_0.png

About statstics#

Work on a dataset with 1000 frames in a WAXS like configuration

[7]:

ai_init = {"dist":0.1, "poni1":0.1, "poni2":0.1, "rot1":0.0, "rot2":0.0, "rot3":0.0, "detector": "Pilatus1M", "wavelength":1e-10} ai = pyFAI.load(ai_init) unit = pyFAI.units.to_unit("q_A^-1") detector = ai.detector npt = 1000 nimg = 1000 wl = 1e-10 I0 = 1e4 polarization = 0.99 kwargs = {"npt":npt, "polarization_factor": polarization, "safe":False, "error_model": pyFAI.containers.ErrorModel.POISSON} print(ai)
Detector Pilatus 1M      PixelSize= 172µm, 172µm         BottomRight (3)
Wavelength= 1.000000e-10 m
SampleDetDist= 1.000000e-01 m   PONI= 1.000000e-01, 1.000000e-01 m      rot1=0.000000  rot2=0.000000  rot3=0.000000 rad
DirectBeamDist= 100.000 mm      Center: x=581.395, y=581.395 pix        Tilt= 0.000° tiltPlanRotation= 0.000° 𝛌= 1.000Å
[8]:
# Generation of a "SAXS-like" curve with the shape of a lorentzian curve
flat = numpy.random.random(detector.shape) + 0.5
[9]:
qmax = ai.integrate1d(flat, method=method, **kwargs).radial.max()
q = numpy.linspace(0, ai.array_from_unit(unit="q_A^-1").max(), npt)
I = I0/(1+q**2)
jupyter.plot1d((q,I))
pass
../../../_images/usage_tutorial_Variance_Unweighted_10_0.png
[10]:
#Reconstruction of diffusion image:

img_theo = ai.calcfrom1d(q, I, dim1_unit=unit,
                         correctSolidAngle=True,
                         flat=flat,
                         polarization_factor=polarization,
                         mask=ai.detector.mask)
kwargs["flat"] = flat

jupyter.display(img_theo, label="Diffusion image")
pass
../../../_images/usage_tutorial_Variance_Unweighted_11_0.png
[11]:
res_theo = ai.integrate1d(img_theo, method=method, **kwargs)
jupyter.plot1d(res_theo)
[11]:
<Axes: title={'center': '1D integration'}, xlabel='Scattering vector $q$ ($nm^{-1}$)', ylabel='Intensity'>
../../../_images/usage_tutorial_Variance_Unweighted_12_1.png
[12]:
%%time
if "dataset" not in dir():
    dataset = numpy.random.poisson(img_theo, (nimg,) + img_theo.shape)
# else avoid wasting time
print(dataset.nbytes/(1<<20), "MBytes", dataset.shape)
7806.266784667969 MBytes (1000, 1043, 981)
CPU times: user 36.8 s, sys: 888 ms, total: 37.7 s
Wall time: 37.7 s
[13]:
res_weighted = ai.integrate1d(dataset[0], method=weighted, **kwargs)
res_unweighted = ai.integrate1d(dataset[0], method=unweighted, **kwargs)

ax = jupyter.plot1d(res_theo, label="theo")
ax = jupyter.plot1d(res_weighted, label="weighted", ax=ax)
ax = jupyter.plot1d(res_unweighted, label="unweighted", ax=ax)
../../../_images/usage_tutorial_Variance_Unweighted_14_0.png
[14]:
fix,ax = subplots(2, figsize=(12,4))
ax[0].plot(res_theo.radial, res_theo.intensity, label="theo")
ax[1].plot(res_theo.radial, res_theo.sigma, label="theo")
ax[0].plot(res_weighted.radial, res_weighted.intensity, "3", label="weighted")
ax[1].plot(res_weighted.radial, res_weighted.sigma, "3",label="weighted")
ax[0].plot(res_unweighted.radial, res_unweighted.intensity, "4", label="unweighted")
ax[1].plot(res_unweighted.radial, res_unweighted.sigma, "4", label="unweighted")
ax[0].set_ylabel("Intensities")
ax[1].set_ylabel("Uncertainties")
_ = ax[0].legend()
../../../_images/usage_tutorial_Variance_Unweighted_15_0.png
[15]:
print("Number of paires of images: ", nimg*(nimg-1)//2)
Number of paires of images:  499500
[16]:
def chi2_curves(res1, res2):
    """Calculate the Chi² value for a pair of integrated data"""
    I = res1.intensity
    J = res2.intensity
    l = len(I)
    assert len(J) == l
    sigma_I = res1.sigma
    sigma_J = res2.sigma
    return ((I-J)**2/(sigma_I**2+sigma_J**2)).sum()/(l-1)
[17]:
def plot_distribution(ai, kwargs, nbins=100, method=method, ax=None):
    ai.reset()
    results = []
    c2 = []
    integrate = ai.integrate1d
    for i in range(nimg):
        data = dataset[i, :, :]
        r = integrate(data, method=method, **kwargs)
        results.append(r)
        for j in results[:i]:
            c2.append(chi2_curves(r, j))
    c2 = numpy.array(c2)
    if ax is None:
        fig, ax = subplots()
    h,b,_ = ax.hist(c2, nbins, label="Measured histogram")
    y_sim = chi2_dist.pdf(b*(npt-1), npt)
    y_sim *= h.sum()/y_sim.sum()
    ax.plot(b, y_sim, label=r"$\chi^{2}$ distribution")
    ax.set_title(f"Integrated curves with {integrate.__name__}")
    ax.set_xlabel("$\chi^{2}$ values (histogrammed)")
    ax.set_ylabel("Number of occurences")
    ax.legend()
    return ax
[18]:
fig,ax = subplots(1,2, figsize=(12,4))
a=plot_distribution(ai, kwargs, method=unweighted, ax=ax[0])
a.set_title("Unweighted")
a=plot_distribution(ai, kwargs, method=weighted, ax=ax[1])
a.set_title("Weighted")
[18]:
Text(0.5, 1.0, 'Weighted')
../../../_images/usage_tutorial_Variance_Unweighted_19_1.png
[19]:
print(rwp(ai.integrate1d(dataset[100], method=weighted, **kwargs),
          ai.integrate1d(dataset[100], method=unweighted, **kwargs)))
0.044544383800129525
[20]:
print(f"Total run-time: {time.perf_counter()-t0:.3f}s")
Total run-time: 86.408s

Conclusion#

The two algorithms provide similar results but not strictly the same. The difference is largely beyond the numerical noise since the Rwp between two results is in the range of a few percent. Their performances for the speed is also equivalent. Their results are different but on a statistical point of view, it is difficult to distinguish them.

To me (J. Kieffer, author of pyFAI), the question of the best algorithm remains open.

[ ]: