Automatic documentation generated from docstrings

This page is auto-generated from the docstrings of functions, methods, classes, and modules. Each lowest-level module in Mass2 (i.e., each python file) that you want documented and indexed for searching should be listed in this docstrings2.md file. The exception are the Core docstrings for the mass2.core docstrings.

Calibration

Energy to/from pulse heights

Objects to assist with calibration from pulse heights to absolute energies.

Created on May 16, 2011 Completely redesigned January 2025

Curvetypes

Bases: Enum

Enumerate the types of calibration curves supported by Mass2.

Source code in mass2/calibration/energy_calibration.py
24
25
26
27
28
29
30
31
32
class Curvetypes(Enum):
    """Enumerate the types of calibration curves supported by Mass2."""

    LINEAR = auto()
    LINEAR_PLUS_ZERO = auto()
    LOGLOG = auto()
    GAIN = auto()
    INVGAIN = auto()
    LOGGAIN = auto()

EnergyCalibration dataclass

An energy calibration object that can convert pulse heights to (estimated) energies.

Subclasses implement the math of either exact or approximating calibration curves. Methods allow you to convert between pulse heights and energies, estimate energy uncertainties, and estimate pulse heights for lines whose names are know, or estimate the cal curve slope. Methods allow you to plot the calibration curve with its anchor points.

Returns:
Raises:
  • ValueError

    If there is not at least one anchor point.

Source code in mass2/calibration/energy_calibration.py
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
@dataclass(frozen=True)
class EnergyCalibration:
    """An energy calibration object that can convert pulse heights to (estimated) energies.

    Subclasses implement the math of either exact or approximating calibration curves.
    Methods allow you to convert between pulse heights and energies, estimate energy uncertainties,
    and estimate pulse heights for lines whose names are know, or estimate the cal curve slope.
    Methods allow you to plot the calibration curve with its anchor points.

    Returns
    -------
    EnergyCalibration

    Raises
    ------
    ValueError
        If there is not at least one anchor point.
    """

    ph: NDArray[np.float64]
    energy: NDArray[np.float64]
    dph: NDArray[np.float64]
    de: NDArray[np.float64]
    names: list[str]
    curvename: Curvetypes
    approximating: bool
    spline: Callable[..., NDArray[np.float64]]
    energy2ph: Callable[[ArrayLike], NDArray[np.float64]]
    ph2uncertainty: Callable[[ArrayLike], NDArray[np.float64]]
    input_transform: Callable
    output_transform: Callable | None = None
    extra_info: dict[str, Any] | None = None

    def __post_init__(self) -> None:
        """Fail for inputs of length zero."""
        assert self.npts > 0

    def copy(self, **changes: Any) -> EnergyCalibration:
        """Make a copy of this object, optionally changing some attributes."""
        return dataclasses.replace(self, **changes)

    @property
    def npts(self) -> int:
        """Return the number of calibration anchor points."""
        return len(self.ph)

    @staticmethod
    def _ecal_input_identity(ph: NDArray, der: int = 0) -> NDArray:
        "Use ph as the argument to the spline"
        assert der >= 0
        if der == 0:
            return ph
        elif der == 1:
            return np.ones_like(ph)
        return np.zeros_like(ph)

    @staticmethod
    def _ecal_input_log(ph: NDArray, der: int = 0) -> NDArray:
        "Use log(ph) as the argument to the spline"
        assert der >= 0
        if der == 0:
            return np.log(ph)
        elif der == 1:
            return 1.0 / ph
        raise ValueError(f"der={der}, should be one of (0,1)")

    @staticmethod
    def _ecal_output_identity(ph: NDArray, yspline: NDArray, der: int = 0, dery: int = 0) -> NDArray:
        "Use the spline result as E itself"
        assert der >= 0 and dery >= 0
        if der > 0:
            return np.zeros_like(ph)
        if dery == 0:
            return yspline
        elif dery == 1:
            return np.ones_like(ph)
        else:
            return np.zeros_like(ph)

    @staticmethod
    def _ecal_output_log(ph: NDArray, yspline: NDArray, der: int = 0, dery: int = 0) -> NDArray:
        "Use the spline result as log(E)"
        assert der >= 0 and dery >= 0
        if der == 0:
            # Any order of d/dy equals E(y) itself, or exp(y).
            return np.exp(yspline)
        else:
            return np.zeros_like(ph)

    @staticmethod
    def _ecal_output_gain(ph: NDArray, yspline: NDArray, der: int = 0, dery: int = 0) -> NDArray:
        "Use the spline result as gain = ph/E"
        assert der >= 0 and dery >= 0
        if dery == 0:
            if der == 0:
                return ph / yspline
            elif der == 1:
                return 1.0 / yspline
            else:
                return np.zeros_like(ph)
        assert dery == 1
        return -ph / yspline**2

    @staticmethod
    def _ecal_output_invgain(ph: NDArray, yspline: NDArray, der: int = 0, dery: int = 0) -> NDArray:
        "Use the spline result as the inverse gain = E/ph"
        assert der >= 0 and dery >= 0
        if dery == 0:
            if der == 0:
                return ph * yspline
            elif der == 1:
                return yspline
            else:
                return np.zeros_like(ph)
        assert dery == 1
        return ph

    @staticmethod
    def _ecal_output_loggain(ph: NDArray, yspline: NDArray, der: int = 0, dery: int = 0) -> NDArray:
        "Use the spline result as the log of the gain, or log(ph/E)"
        assert der >= 0 and dery >= 0
        if dery == 0:
            if der == 0:
                return ph * np.exp(-yspline)
            elif der == 1:
                return np.exp(-yspline)
            else:
                return np.zeros_like(ph)
        assert dery == 1
        return -ph * np.exp(-yspline)

    @property
    def ismonotonic(self) -> np.bool:
        """Is the curve monotonic from 0 to 1.05 times the max anchor point's pulse height?
        Test at 1001 points, equally spaced in pulse height."""
        nsamples = 1001
        ph = np.linspace(0, 1.05 * self.ph.max(), nsamples)
        e = self(ph)
        return np.all(np.diff(e) > 0)

    def name2ph(self, name: str) -> NDArray[np.float64]:
        """Convert a named energy feature to pulse height. `name` need not be a calibration point."""
        energy = STANDARD_FEATURES[name]
        return self.energy2ph(energy)

    def energy2dedph(self, energies: ArrayLike) -> NDArray[np.float64]:
        """Calculate the slope at the given energies."""
        return self.ph2dedph(self.energy2ph(energies))

    def energy2uncertainty(self, energies: ArrayLike) -> NDArray[np.float64]:
        """Cal uncertainty in eV at the given energies."""
        ph = self.energy2ph(energies)
        return self.ph2uncertainty(ph)

    def __str__(self) -> str:
        """A full description of the calibration."""
        seq = [f"EnergyCalibration({self.curvename})"]
        for name, pulse_ht, energy in zip(self.names, self.ph, self.energy):
            seq.append(f"  energy(ph={pulse_ht:7.2f}) --> {energy:9.2f} eV ({name})")
        return "\n".join(seq)

    def ph2energy(self, ph: ArrayLike) -> NDArray[np.float64]:
        """Apply the calibration, converting pulse heights `ph` to energies.

        Parameters
        ----------
        ph : ArrayLike
            The pulse heights to convert to energies.

        Returns
        -------
        NDArray[np.float64]
            Energies in eV.
        """
        ph = np.asarray(ph)
        x = self.input_transform(ph)
        y = self.spline(x, der=0)
        if self.output_transform is None:
            E = y
        else:
            E = self.output_transform(ph, y)
        return E

    __call__ = ph2energy

    def ph2dedph(self, ph: ArrayLike) -> NDArray[np.float64]:
        """Calculate the calibration curve's slope at pulse heights `ph`."""
        ph = np.asarray(ph)
        x = self.input_transform(ph)
        dgdP = self.input_transform(ph, der=1)
        dydx = self.spline(x, der=1)
        dEdP = dydx * dgdP
        if self.output_transform is not None:
            y = self.spline(x)
            dfdP = self.output_transform(ph, y, der=1)
            dfdy = self.output_transform(ph, y, dery=1)
            dEdP = dfdP + dfdy * dydx * dgdP
        return dEdP

    def energy2ph_exact(self, E: ArrayLike) -> NDArray[np.float64]:
        """An exact inversion of the calibration curve, converting energies `E` to pulse heights.
        This is still in TO DO status, as it simply uses the spline in the forward direction.

        Parameters
        ----------
        E : ArrayLike
            Energies in eV to be converted back to pulse heihgts

        Returns
        -------
        NDArray[np.float64]
            Pulse heights corresponding to the given energies.
        """
        # TODO use the spline as a starting point for Brent's method
        return self.energy2ph(E)

    def save_to_hdf5(self, hdf5_group: h5py.Group, name: str) -> None:
        """Save this calibration to an HDF5 group in a new subordinate group with the given name."""
        if name in hdf5_group:
            del hdf5_group[name]

        cal_group = hdf5_group.create_group(name)
        cal_group["name"] = [str(n).encode() for n in self.names]
        cal_group["ph"] = self.ph
        cal_group["energy"] = self.energy
        cal_group["dph"] = self.dph
        cal_group["de"] = self.de
        cal_group.attrs["curvetype"] = self.curvename.name
        cal_group.attrs["approximate"] = self.approximating

    @staticmethod
    def load_from_hdf5(hdf5_group: h5py.Group, name: str) -> EnergyCalibration:
        """Load a calibration from an HDF5 group with the given name."""
        cal_group = hdf5_group[name]

        # Fix a behavior of h5py for writing in py2, reading in py3.
        ctype = cal_group.attrs["curvetype"]
        if isinstance(ctype, bytes):
            ctype = ctype.decode("utf-8")
        curvetype = Curvetypes[ctype]

        maker = EnergyCalibrationMaker(
            cal_group["ph"][:], cal_group["energy"][:], cal_group["dph"][:], cal_group["de"][:], cal_group["name"][:]
        )
        approximate = cal_group.attrs["approximate"]
        return maker.make_calibration(curvetype, approximate=approximate)

    def plotgain(self, **kwargs: Any) -> None:
        """Plot the calibration curve as gain (PH/eV) vs pulse height."""
        kwargs["plottype"] = "gain"
        self.plot(**kwargs)

    def plotinvgain(self, **kwargs: Any) -> None:
        """Plot the calibration curve as inverse gain (eV/PH) vs pulse height."""
        kwargs["plottype"] = "invgain"
        self.plot(**kwargs)

    def plotloggain(self, **kwargs: Any) -> None:
        """Plot the calibration curve as log-gain log(PH/eV) vs pulse height."""
        kwargs["plottype"] = "loggain"
        self.plot(**kwargs)

    def plot(  # noqa: PLR0917
        self,
        axis: plt.Axes | None = None,
        color: str = "blue",
        markercolor: str = "red",
        plottype: str = "linear",
        ph_rescale_power: float = 0.0,
        removeslope: bool = False,
        energy_x: bool = False,
        showtext: bool = True,
        showerrors: bool = True,
        min_energy: float | None = None,
        max_energy: float | None = None,
    ) -> None:
        """Plot the calibration curve, with options."""
        # Plot smooth curve
        minph, maxph = self.ph.min() * 0.9, self.ph.max() * 1.1
        if min_energy is not None:
            minph = self.energy2ph(min_energy)
        if max_energy is not None:
            maxph = self.energy2ph(max_energy)
        phplot = np.linspace(minph, maxph, 1000)
        eplot = self(phplot)
        gplot = phplot / eplot
        dyplot = None
        gains = self.ph / self.energy
        slope = 0.0
        xplot = phplot
        x = self.ph
        xerr = self.dph
        if energy_x:
            xplot = eplot
            x = self.energy
            xerr = self.de

        if axis is None:
            plt.clf()
            axis = plt.subplot(111)
            # axis.set_xlim([x[0], x[-1]*1.1])
        if energy_x:
            axis.set_xlabel("Energy (eV)")
        else:
            axis.set_xlabel("Pulse height")

        if plottype == "linear":
            yplot = self(phplot) / (phplot**ph_rescale_power)
            if self.approximating:
                dyplot = self.ph2uncertainty(phplot) / (phplot**ph_rescale_power)
            y = self.energy / (self.ph**ph_rescale_power)
            if ph_rescale_power == 0.0:
                ylabel = "Energy (eV)"
                axis.set_title("Energy calibration curve")
            else:
                ylabel = f"Energy (eV) / PH^{ph_rescale_power:.4f}"
                axis.set_title(f"Energy calibration curve, scaled by {ph_rescale_power:.4f} power of PH")
        elif plottype == "gain":
            yplot = gplot
            if self.approximating:
                dyplot = self.ph2uncertainty(phplot) / eplot * gplot
            y = gains
            ylabel = "Gain (PH/eV)"
            axis.set_title("Energy calibration curve, gain")
        elif plottype == "invgain":
            yplot = 1.0 / gplot
            if self.approximating:
                dyplot = self.ph2uncertainty(phplot) / phplot
            y = 1.0 / gains
            ylabel = "Inverse Gain (eV/PH)"
            axis.set_title("Energy calibration curve, inverse gain")
        elif plottype == "loggain":
            yplot = np.log(gplot)
            if self.approximating:
                dyplot = self.ph2uncertainty(phplot) / eplot
            y = np.log(gains)
            ylabel = "Log Gain: log(eV/PH)"
            axis.set_title("Energy calibration curve, log gain")
        elif plottype == "loglog":
            yplot = np.log(eplot)
            xplot = np.log(phplot)
            if self.approximating:
                dyplot = self.ph2uncertainty(phplot) / eplot
            y = np.log(self.energy)
            x = np.log(self.ph)
            xerr = self.dph / self.ph
            ylabel = "Log energy/1 eV"
            axis.set_xlabel("log(Pulse height/arbs)")
            axis.set_title("Energy calibration curve, log gain")
        else:
            raise ValueError("plottype must be one of ('linear', 'gain','loggain','invgain').")

        if removeslope:
            slope = (y[-1] - y[0]) / (x[-1] - x[0])
            yplot -= slope * xplot

        axis.plot(xplot, yplot, color=color)
        if dyplot is not None and showerrors:
            axis.plot(xplot, yplot + dyplot, color=color, alpha=0.35)
            axis.plot(xplot, yplot - dyplot, color=color, alpha=0.35)

        # Plot and label cal points
        dy = ((self.de / self.energy) ** 2 + (self.dph / self.ph) ** 2) ** 0.5 * y
        axis.errorbar(x, y - slope * x, yerr=dy, xerr=xerr, fmt="o", mec="black", mfc=markercolor, capsize=0)
        axis.grid(True)
        if removeslope:
            ylabel = f"{ylabel} slope removed"
        axis.set_ylabel(ylabel)
        if showtext:
            for xval, name, yval in zip(x, self.names, y):
                axis.text(xval, yval - slope * xval, name + "  ", ha="right")

ismonotonic property

Is the curve monotonic from 0 to 1.05 times the max anchor point's pulse height? Test at 1001 points, equally spaced in pulse height.

npts property

Return the number of calibration anchor points.

__post_init__()

Fail for inputs of length zero.

Source code in mass2/calibration/energy_calibration.py
507
508
509
def __post_init__(self) -> None:
    """Fail for inputs of length zero."""
    assert self.npts > 0

__str__()

A full description of the calibration.

Source code in mass2/calibration/energy_calibration.py
628
629
630
631
632
633
def __str__(self) -> str:
    """A full description of the calibration."""
    seq = [f"EnergyCalibration({self.curvename})"]
    for name, pulse_ht, energy in zip(self.names, self.ph, self.energy):
        seq.append(f"  energy(ph={pulse_ht:7.2f}) --> {energy:9.2f} eV ({name})")
    return "\n".join(seq)

copy(**changes)

Make a copy of this object, optionally changing some attributes.

Source code in mass2/calibration/energy_calibration.py
511
512
513
def copy(self, **changes: Any) -> EnergyCalibration:
    """Make a copy of this object, optionally changing some attributes."""
    return dataclasses.replace(self, **changes)

energy2dedph(energies)

Calculate the slope at the given energies.

Source code in mass2/calibration/energy_calibration.py
619
620
621
def energy2dedph(self, energies: ArrayLike) -> NDArray[np.float64]:
    """Calculate the slope at the given energies."""
    return self.ph2dedph(self.energy2ph(energies))

energy2ph_exact(E)

An exact inversion of the calibration curve, converting energies E to pulse heights. This is still in TO DO status, as it simply uses the spline in the forward direction.

Parameters:
  • E (ArrayLike) –

    Energies in eV to be converted back to pulse heihgts

Returns:
  • NDArray[float64]

    Pulse heights corresponding to the given energies.

Source code in mass2/calibration/energy_calibration.py
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
def energy2ph_exact(self, E: ArrayLike) -> NDArray[np.float64]:
    """An exact inversion of the calibration curve, converting energies `E` to pulse heights.
    This is still in TO DO status, as it simply uses the spline in the forward direction.

    Parameters
    ----------
    E : ArrayLike
        Energies in eV to be converted back to pulse heihgts

    Returns
    -------
    NDArray[np.float64]
        Pulse heights corresponding to the given energies.
    """
    # TODO use the spline as a starting point for Brent's method
    return self.energy2ph(E)

energy2uncertainty(energies)

Cal uncertainty in eV at the given energies.

Source code in mass2/calibration/energy_calibration.py
623
624
625
626
def energy2uncertainty(self, energies: ArrayLike) -> NDArray[np.float64]:
    """Cal uncertainty in eV at the given energies."""
    ph = self.energy2ph(energies)
    return self.ph2uncertainty(ph)

load_from_hdf5(hdf5_group, name) staticmethod

Load a calibration from an HDF5 group with the given name.

Source code in mass2/calibration/energy_calibration.py
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
@staticmethod
def load_from_hdf5(hdf5_group: h5py.Group, name: str) -> EnergyCalibration:
    """Load a calibration from an HDF5 group with the given name."""
    cal_group = hdf5_group[name]

    # Fix a behavior of h5py for writing in py2, reading in py3.
    ctype = cal_group.attrs["curvetype"]
    if isinstance(ctype, bytes):
        ctype = ctype.decode("utf-8")
    curvetype = Curvetypes[ctype]

    maker = EnergyCalibrationMaker(
        cal_group["ph"][:], cal_group["energy"][:], cal_group["dph"][:], cal_group["de"][:], cal_group["name"][:]
    )
    approximate = cal_group.attrs["approximate"]
    return maker.make_calibration(curvetype, approximate=approximate)

name2ph(name)

Convert a named energy feature to pulse height. name need not be a calibration point.

Source code in mass2/calibration/energy_calibration.py
614
615
616
617
def name2ph(self, name: str) -> NDArray[np.float64]:
    """Convert a named energy feature to pulse height. `name` need not be a calibration point."""
    energy = STANDARD_FEATURES[name]
    return self.energy2ph(energy)

ph2dedph(ph)

Calculate the calibration curve's slope at pulse heights ph.

Source code in mass2/calibration/energy_calibration.py
659
660
661
662
663
664
665
666
667
668
669
670
671
def ph2dedph(self, ph: ArrayLike) -> NDArray[np.float64]:
    """Calculate the calibration curve's slope at pulse heights `ph`."""
    ph = np.asarray(ph)
    x = self.input_transform(ph)
    dgdP = self.input_transform(ph, der=1)
    dydx = self.spline(x, der=1)
    dEdP = dydx * dgdP
    if self.output_transform is not None:
        y = self.spline(x)
        dfdP = self.output_transform(ph, y, der=1)
        dfdy = self.output_transform(ph, y, dery=1)
        dEdP = dfdP + dfdy * dydx * dgdP
    return dEdP

ph2energy(ph)

Apply the calibration, converting pulse heights ph to energies.

Parameters:
  • ph (ArrayLike) –

    The pulse heights to convert to energies.

Returns:
  • NDArray[float64]

    Energies in eV.

Source code in mass2/calibration/energy_calibration.py
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
def ph2energy(self, ph: ArrayLike) -> NDArray[np.float64]:
    """Apply the calibration, converting pulse heights `ph` to energies.

    Parameters
    ----------
    ph : ArrayLike
        The pulse heights to convert to energies.

    Returns
    -------
    NDArray[np.float64]
        Energies in eV.
    """
    ph = np.asarray(ph)
    x = self.input_transform(ph)
    y = self.spline(x, der=0)
    if self.output_transform is None:
        E = y
    else:
        E = self.output_transform(ph, y)
    return E

plot(axis=None, color='blue', markercolor='red', plottype='linear', ph_rescale_power=0.0, removeslope=False, energy_x=False, showtext=True, showerrors=True, min_energy=None, max_energy=None)

Plot the calibration curve, with options.

Source code in mass2/calibration/energy_calibration.py
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
def plot(  # noqa: PLR0917
    self,
    axis: plt.Axes | None = None,
    color: str = "blue",
    markercolor: str = "red",
    plottype: str = "linear",
    ph_rescale_power: float = 0.0,
    removeslope: bool = False,
    energy_x: bool = False,
    showtext: bool = True,
    showerrors: bool = True,
    min_energy: float | None = None,
    max_energy: float | None = None,
) -> None:
    """Plot the calibration curve, with options."""
    # Plot smooth curve
    minph, maxph = self.ph.min() * 0.9, self.ph.max() * 1.1
    if min_energy is not None:
        minph = self.energy2ph(min_energy)
    if max_energy is not None:
        maxph = self.energy2ph(max_energy)
    phplot = np.linspace(minph, maxph, 1000)
    eplot = self(phplot)
    gplot = phplot / eplot
    dyplot = None
    gains = self.ph / self.energy
    slope = 0.0
    xplot = phplot
    x = self.ph
    xerr = self.dph
    if energy_x:
        xplot = eplot
        x = self.energy
        xerr = self.de

    if axis is None:
        plt.clf()
        axis = plt.subplot(111)
        # axis.set_xlim([x[0], x[-1]*1.1])
    if energy_x:
        axis.set_xlabel("Energy (eV)")
    else:
        axis.set_xlabel("Pulse height")

    if plottype == "linear":
        yplot = self(phplot) / (phplot**ph_rescale_power)
        if self.approximating:
            dyplot = self.ph2uncertainty(phplot) / (phplot**ph_rescale_power)
        y = self.energy / (self.ph**ph_rescale_power)
        if ph_rescale_power == 0.0:
            ylabel = "Energy (eV)"
            axis.set_title("Energy calibration curve")
        else:
            ylabel = f"Energy (eV) / PH^{ph_rescale_power:.4f}"
            axis.set_title(f"Energy calibration curve, scaled by {ph_rescale_power:.4f} power of PH")
    elif plottype == "gain":
        yplot = gplot
        if self.approximating:
            dyplot = self.ph2uncertainty(phplot) / eplot * gplot
        y = gains
        ylabel = "Gain (PH/eV)"
        axis.set_title("Energy calibration curve, gain")
    elif plottype == "invgain":
        yplot = 1.0 / gplot
        if self.approximating:
            dyplot = self.ph2uncertainty(phplot) / phplot
        y = 1.0 / gains
        ylabel = "Inverse Gain (eV/PH)"
        axis.set_title("Energy calibration curve, inverse gain")
    elif plottype == "loggain":
        yplot = np.log(gplot)
        if self.approximating:
            dyplot = self.ph2uncertainty(phplot) / eplot
        y = np.log(gains)
        ylabel = "Log Gain: log(eV/PH)"
        axis.set_title("Energy calibration curve, log gain")
    elif plottype == "loglog":
        yplot = np.log(eplot)
        xplot = np.log(phplot)
        if self.approximating:
            dyplot = self.ph2uncertainty(phplot) / eplot
        y = np.log(self.energy)
        x = np.log(self.ph)
        xerr = self.dph / self.ph
        ylabel = "Log energy/1 eV"
        axis.set_xlabel("log(Pulse height/arbs)")
        axis.set_title("Energy calibration curve, log gain")
    else:
        raise ValueError("plottype must be one of ('linear', 'gain','loggain','invgain').")

    if removeslope:
        slope = (y[-1] - y[0]) / (x[-1] - x[0])
        yplot -= slope * xplot

    axis.plot(xplot, yplot, color=color)
    if dyplot is not None and showerrors:
        axis.plot(xplot, yplot + dyplot, color=color, alpha=0.35)
        axis.plot(xplot, yplot - dyplot, color=color, alpha=0.35)

    # Plot and label cal points
    dy = ((self.de / self.energy) ** 2 + (self.dph / self.ph) ** 2) ** 0.5 * y
    axis.errorbar(x, y - slope * x, yerr=dy, xerr=xerr, fmt="o", mec="black", mfc=markercolor, capsize=0)
    axis.grid(True)
    if removeslope:
        ylabel = f"{ylabel} slope removed"
    axis.set_ylabel(ylabel)
    if showtext:
        for xval, name, yval in zip(x, self.names, y):
            axis.text(xval, yval - slope * xval, name + "  ", ha="right")

plotgain(**kwargs)

Plot the calibration curve as gain (PH/eV) vs pulse height.

Source code in mass2/calibration/energy_calibration.py
721
722
723
724
def plotgain(self, **kwargs: Any) -> None:
    """Plot the calibration curve as gain (PH/eV) vs pulse height."""
    kwargs["plottype"] = "gain"
    self.plot(**kwargs)

plotinvgain(**kwargs)

Plot the calibration curve as inverse gain (eV/PH) vs pulse height.

Source code in mass2/calibration/energy_calibration.py
726
727
728
729
def plotinvgain(self, **kwargs: Any) -> None:
    """Plot the calibration curve as inverse gain (eV/PH) vs pulse height."""
    kwargs["plottype"] = "invgain"
    self.plot(**kwargs)

plotloggain(**kwargs)

Plot the calibration curve as log-gain log(PH/eV) vs pulse height.

Source code in mass2/calibration/energy_calibration.py
731
732
733
734
def plotloggain(self, **kwargs: Any) -> None:
    """Plot the calibration curve as log-gain log(PH/eV) vs pulse height."""
    kwargs["plottype"] = "loggain"
    self.plot(**kwargs)

save_to_hdf5(hdf5_group, name)

Save this calibration to an HDF5 group in a new subordinate group with the given name.

Source code in mass2/calibration/energy_calibration.py
690
691
692
693
694
695
696
697
698
699
700
701
702
def save_to_hdf5(self, hdf5_group: h5py.Group, name: str) -> None:
    """Save this calibration to an HDF5 group in a new subordinate group with the given name."""
    if name in hdf5_group:
        del hdf5_group[name]

    cal_group = hdf5_group.create_group(name)
    cal_group["name"] = [str(n).encode() for n in self.names]
    cal_group["ph"] = self.ph
    cal_group["energy"] = self.energy
    cal_group["dph"] = self.dph
    cal_group["de"] = self.de
    cal_group.attrs["curvetype"] = self.curvename.name
    cal_group.attrs["approximate"] = self.approximating

EnergyCalibrationMaker dataclass

An object that can make energy calibration curves under various assumptions, but using a single set of calibration anchor points and uncertainties on them.

Returns:
  • EnergyCalibrationMaker

    A factory for making various EnergyCalibration objects from the same anchor points.

Raises:
  • ValueError

    When calibration data arrays have unequal length, or ph is not monotone in energy.

Source code in mass2/calibration/energy_calibration.py
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
@dataclass(frozen=True)
class EnergyCalibrationMaker:
    """An object that can make energy calibration curves under various assumptions,
    but using a single set of calibration anchor points and uncertainties on them.

    Returns
    -------
    EnergyCalibrationMaker
        A factory for making various `EnergyCalibration` objects from the same anchor points.

    Raises
    ------
    ValueError
        When calibration data arrays have unequal length, or `ph` is not monotone in `energy`.
    """

    ph: NDArray[np.float64]
    energy: NDArray[np.float64]
    dph: NDArray[np.float64]
    de: NDArray[np.float64]
    names: list[str]

    @classmethod
    def init(
        cls,
        ph: ArrayLike | None = None,
        energy: ArrayLike | None = None,
        dph: ArrayLike | None = None,
        de: ArrayLike | None = None,
        names: list[str] | None = None,
    ) -> EnergyCalibrationMaker:
        """Create an EnergyCalibrationMaker, filling in any missing requirements with empty arrays."""
        if ph is None:
            ph = np.array([], dtype=float)
        else:
            ph = np.asarray(ph)
        if energy is None:
            energy = np.array([], dtype=float)
        else:
            energy = np.asarray(energy)
        if dph is None:
            dph = 1e-3 * ph
        else:
            dph = np.asarray(dph)
        if de is None:
            de = 1e-3 * energy
        else:
            de = np.asarray(de)
        if names is None:
            names = ["dummy"] * len(dph)
        return cls(ph, energy, dph, de, names)

    def __post_init__(self) -> None:
        """Check for inputs of unequal length. Check for monotone anchor points.
        Sort the input data by energy."""
        N = len(self.ph)
        assert N == len(self.energy)
        assert N == len(self.dph)
        assert N == len(self.de)
        assert N == len(self.names)

        # First sort according to energy of the calibration point
        if not np.all(np.diff(self.energy) > 0):
            sortkeys = np.argsort(self.energy)
            self.ph[:] = self.ph[sortkeys]
            self.energy[:] = self.energy[sortkeys]
            self.dph[:] = self.dph[sortkeys]
            self.de[:] = self.de[sortkeys]
            self.names[:] = [self.names[i] for i in sortkeys]

        # Then confirm that the pulse heights are also in order
        order_ph = self.ph.argsort()
        order_en = self.energy.argsort()
        if not np.all(order_ph == order_en):
            a = f"PH:     {self.ph[order_ph]}"
            b = f"Energy: {self.energy[order_ph]}"
            raise ValueError(f"Calibration points are not monotone:\n{a}\n{b}")

    @property
    def npts(self) -> int:
        """The number of calibration anchor points."""
        return len(self.ph)

    def _remove_cal_point_idx(self, idx: int) -> EnergyCalibrationMaker:
        """Remove calibration point number `idx` from the calibration. Return a new maker."""
        ph = np.delete(self.ph, idx)
        energy = np.delete(self.energy, idx)
        dph = np.delete(self.dph, idx)
        de = np.delete(self.de, idx)
        names = self.names.copy()
        names.pop(idx)
        return EnergyCalibrationMaker(ph, energy, dph, de, names)

    def remove_cal_point_name(self, name: str) -> EnergyCalibrationMaker:
        """Remove calibration point named `name`. Return a new maker."""
        idx = self.names.index(name)
        return self._remove_cal_point_idx(idx)

    def remove_cal_point_prefix(self, prefix: str) -> EnergyCalibrationMaker:
        """This removes all cal points whose name starts with `prefix`.  Return a new maker."""
        # Work recursively: remove the first match and make a new Maker, and repeat until none match.
        # This is clearly less efficient when removing N matches, as N copies are made. So what?
        # This feature is likely to be rarely used, and we favor clarity over performance here.
        for name in tuple(self.names):
            if name.startswith(prefix):
                return self.remove_cal_point_name(name).remove_cal_point_prefix(prefix)
        return self

    def remove_cal_point_energy(self, energy: float, de: float) -> EnergyCalibrationMaker:
        """Remove cal points at energies within ±`de` of `energy`. Return a new maker."""
        idxs = np.nonzero(np.abs(self.energy - energy) < de)[0]
        if len(idxs) == 0:
            return self
        # Also recursive and less efficient. See previous method's comment.
        return self._remove_cal_point_idx(idxs[0]).remove_cal_point_energy(energy, de)

    def add_cal_point(
        self,
        ph: float,
        energy: float | str,
        name: str = "",
        ph_error: float | None = None,
        e_error: float | None = None,
        replace: bool = True,
    ) -> EnergyCalibrationMaker:
        """Add a single energy calibration point.

        Can call as .add_cal_point(ph, energy, name) or if the "energy" is a line name, then
        .add_cal_point(ph, name) will find energy as `energy=mass2.STANDARD_FEATURES[name]`.
        Thus the following are equivalent:

            cal = cal.add_cal_point(12345.6, 5898.801, "Mn Ka1")
            cal = cal.add_cal_point(12456.6, "Mn Ka1")

        `ph` must be in units of the self.ph_field and `energy` is in eV.
        `ph_error` is the 1-sigma uncertainty on the pulse height.  If None
        (the default), then assign ph_error = `ph`/1000. `e_error` is the
        1-sigma uncertainty on the energy itself. If None (the default), then
        assign e_error=0.01 eV.

        Careful!  If you give a name that's already in the list, or you add an equivalent
        energy but do NOT give a name, then this value replaces the previous one.
        You can prevent overwriting (and instead raise an error) by setting `replace`=False.
        """

        # If <energy> is a string and a known spectral feature's name, use it as the name instead
        # Otherwise, it needs to be a numeric type convertible to float.
        try:
            energy = float(energy)
        except ValueError:
            try:
                if type(energy) is str:
                    name = energy
                else:
                    name = str(energy)
                energy = STANDARD_FEATURES[name]
            except Exception:
                raise ValueError("2nd argument must be an energy or a known name" + " from mass2.energy_calibration.STANDARD_FEATURES")

        if ph_error is None:
            ph_error = ph * 0.001
        if e_error is None:
            e_error = 0.01  # Assume 0.01 eV error if none given

        update_index: int | None = None
        if self.npts > 0:
            if name and name in self.names:  # Update an existing point by name
                if not replace:
                    raise ValueError(f"Calibration point '{name}' is already known and overwrite is False")
                update_index = self.names.index(name)

            elif np.abs(energy - self.energy).min() <= e_error:  # Update existing point
                if not replace:
                    raise ValueError(f"Calibration point at energy {energy:.2f} eV is already known and overwrite is False")
                update_index = int(np.abs(energy - self.energy).argmin())

        if update_index is None:  # Add a new calibration anchor point
            new_ph = np.hstack((self.ph, ph))
            new_energy = np.hstack((self.energy, energy))
            new_dph = np.hstack((self.dph, ph_error))
            new_de = np.hstack((self.de, e_error))
            new_names = self.names + [name]
        else:  # Replace an existing calibration anchor point.
            new_ph = self.ph.copy()
            new_energy = self.energy.copy()
            new_dph = self.dph.copy()
            new_de = self.de.copy()
            new_names = self.names.copy()
            new_ph[update_index] = ph
            new_energy[update_index] = energy
            new_dph[update_index] = ph_error
            new_de[update_index] = e_error
            new_names[update_index] = name
        return EnergyCalibrationMaker(new_ph, new_energy, new_dph, new_de, new_names)

    @staticmethod
    def heuristic_samplepoints(anchors: ArrayLike) -> np.ndarray:
        """Given a set of calibration anchor points, return a few hundred
        sample points, reasonably spaced below, between, and above the anchor points.

        Parameters
        ----------
        anchors : ArrayLike
            The anchor points (in pulse height space)

        Returns
        -------
        np.ndarray
            _description_
        """
        anchors = np.asarray(anchors)
        # Prescription is 50 points up to lowest anchor (but exclude 0):
        x = [np.linspace(0, anchors.min(), 51)[1:]]
        # Then one points, plus one extra per 1% spacing between (and at) each anchor
        for i in range(len(anchors) - 1):
            low, high = anchors[i : i + 2]
            n = 1 + int(100 * (high / low - 1) + 0.5)
            x.append(np.linspace(low, high, n + 1)[1:])
        # Finally, 100 more points between the highest anchor and 2x that.
        x.append(anchors.max() * np.linspace(1, 2, 101)[1:])
        return np.hstack(x)

    def make_calibration_loglog(
        self, approximate: bool = False, powerlaw: float = 1.15, extra_info: dict[str, Any] | None = None
    ) -> EnergyCalibration:
        """Create a calibration curve that is a spline in log(energy) vs log(pulse height)."""
        return self.make_calibration(Curvetypes.LOGLOG, approximate=approximate, powerlaw=powerlaw, extra_info=extra_info)

    def make_calibration_gain(self, approximate: bool = False, extra_info: dict[str, Any] | None = None) -> EnergyCalibration:
        """Create a calibration curve that is a spline in (pulse height/energy) vs pulse height."""
        return self.make_calibration(Curvetypes.GAIN, approximate=approximate, extra_info=extra_info)

    def make_calibration_invgain(self, approximate: bool = False, extra_info: dict[str, Any] | None = None) -> EnergyCalibration:
        """Create a calibration curve that is a spline in (energy/pulse height) vs pulse height."""
        return self.make_calibration(Curvetypes.INVGAIN, approximate=approximate, extra_info=extra_info)

    def make_calibration_loggain(self, approximate: bool = False, extra_info: dict[str, Any] | None = None) -> EnergyCalibration:
        """Create a calibration curve that is a spline in log(pulse height/energy) vs pulse height."""
        return self.make_calibration(Curvetypes.LOGGAIN, approximate=approximate, extra_info=extra_info)

    def make_calibration_linear(
        self, approximate: bool = False, addzero: bool = False, extra_info: dict[str, Any] | None = None
    ) -> EnergyCalibration:
        """Create a calibration curve that is a spline in energy vs pulse height. If `addzero` include a (0,0) anchor point."""
        curvename = Curvetypes.LINEAR_PLUS_ZERO if addzero else Curvetypes.LINEAR
        return self.make_calibration(curvename, approximate=approximate, extra_info=extra_info)

    def make_calibration(
        self,
        curvename: Curvetypes = Curvetypes.LOGLOG,
        approximate: bool = False,
        powerlaw: float = 1.15,
        extra_info: dict[str, Any] | None = None,
    ) -> EnergyCalibration:
        """Create an energy calibration curve of the specified type.

        Parameters
        ----------
        curvename : Curvetypes, optional
            Which curve type to use, by default Curvetypes.LOGLOG
        approximate : bool, optional
            Whether to approximate the anchor point data given the uncertainties, by default False
        powerlaw : float, optional
            An approximate powerlaw guess used by LOGLOG curves, by default 1.15
        extra_info : dict[str, Any] | None, optional
            Extra text to store in the result, by default None

        Returns
        -------
        EnergyCalibration
            The calibration object.

        Raises
        ------
        ValueError
            If there are too few anchor points for an approximating curve, or if `curvename` is not in `Curvetypes`.
        """
        if approximate and self.npts < 3:
            raise ValueError(f"approximating curves require 3 or more cal anchor points, have {self.npts}")
        if curvename not in Curvetypes:
            raise ValueError(f"{curvename=}, must be in {Curvetypes}")

        # Use a heuristic to repair negative uncertainties.
        def regularize_uncertainties(x: NDArray[np.float64]) -> np.ndarray:
            """Replace negative uncertainties with the minimum non-negative uncertainty, or zero."""
            if not np.any(x < 0):
                return x
            target = max(0.0, x.min())
            x = x.copy()
            x[x < 0] = target
            return x

        dph = regularize_uncertainties(self.dph)
        de = regularize_uncertainties(self.de)

        if curvename == Curvetypes.LOGLOG:
            input_transform = EnergyCalibration._ecal_input_log
            output_transform = EnergyCalibration._ecal_output_log
            x = np.log(self.ph)
            y = np.log(self.energy)
            # When there's only one point, enhance it by a fake point to enforce power-law behavior
            if self.npts == 1:
                arboffset = 1.0
                x = np.hstack([x, x + arboffset])
                y = np.hstack([y, y + arboffset / powerlaw])
            dx = dph / self.ph
            dy = de / self.energy

        elif curvename == Curvetypes.GAIN:
            input_transform = EnergyCalibration._ecal_input_identity
            output_transform = EnergyCalibration._ecal_output_gain
            x = self.ph
            y = self.ph / self.energy
            # Estimate spline uncertainties using slope of best-fit line
            slope = np.polyfit(x, y, 1)[0]
            dy = y * (((slope * self.energy - 1) * dph / x) ** 2 + (de / self.energy) ** 2) ** 0.5
            dx = dph

        elif curvename == Curvetypes.INVGAIN:
            input_transform = EnergyCalibration._ecal_input_identity
            output_transform = EnergyCalibration._ecal_output_invgain
            x = self.ph
            y = self.energy / self.ph
            # Estimate spline uncertainties using slope of best-fit line
            slope = np.polyfit(x, y, 1)[0]
            dy = y * (((slope * self.ph / y + 1) * dph / x) ** 2 + (de / self.energy) ** 2) ** 0.5
            dx = dph

        elif curvename in {Curvetypes.LINEAR, Curvetypes.LINEAR_PLUS_ZERO}:
            input_transform = EnergyCalibration._ecal_input_identity
            output_transform = EnergyCalibration._ecal_output_identity
            x = self.ph
            y = self.energy
            dx = dph
            dy = de
            if (curvename == Curvetypes.LINEAR_PLUS_ZERO) and (0.0 not in x):
                # Add a "zero"-energy and -PH point. But to avoid numerical problems, actually just use
                # 1e-3 times the lowest value, giving ±100% uncertainty on the values.
                x = np.hstack(([x.min() * 1e-3], x))
                y = np.hstack(([y.min() * 1e-3], y))
                dx = np.hstack(([x[0] * 1e-3], dx))
                dy = np.hstack((y[0] * 1e-3, dy))

        elif curvename == Curvetypes.LOGGAIN:
            input_transform = EnergyCalibration._ecal_input_identity
            output_transform = EnergyCalibration._ecal_output_loggain
            x = self.ph
            y = np.log(self.ph / self.energy)
            # Estimate spline uncertainties using slope of best-fit line
            slope = np.polyfit(x, y, 1)[0]
            dy = y * (((slope * x - 1) * dph / x) ** 2 + (de / self.energy) ** 2) ** 0.5
            dx = dph

        else:
            raise ValueError(f"curvename='{curvename}' not recognized")

        if approximate:
            internal_spline: CubicSpline = GPRSpline(x, y, dy, dx)
        elif len(x) > 1:
            internal_spline = CubicSpline(x, y)
        else:
            internal_spline = CubicSpline(x * [1, 2], y * [1, 2])

        ph_samplepoints = EnergyCalibrationMaker.heuristic_samplepoints(self.ph)
        E_samplepoints = output_transform(ph_samplepoints, internal_spline(input_transform(ph_samplepoints)))
        energy2ph = CubicSpline(E_samplepoints, ph_samplepoints)

        if approximate:
            dspline = internal_spline.variance(ph_samplepoints) ** 0.5
            if curvename == Curvetypes.LOGLOG:
                de_samplepoints = dspline * internal_spline(input_transform(ph_samplepoints))
            elif curvename == Curvetypes.GAIN:
                de_samplepoints = dspline * E_samplepoints**2 / ph_samplepoints
            elif curvename == Curvetypes.INVGAIN:
                de_samplepoints = dspline * ph_samplepoints
            elif curvename in {Curvetypes.LINEAR, Curvetypes.LINEAR_PLUS_ZERO}:
                de_samplepoints = dspline
            elif curvename == Curvetypes.LOGGAIN:
                abs_dfdp = np.abs(internal_spline(ph_samplepoints, der=1))
                de_samplepoints = dspline * E_samplepoints * abs_dfdp
            else:
                raise ValueError(f"curvename='{curvename}' not recognized")

            uncertainty_spline: Callable = CubicSpline(ph_samplepoints, de_samplepoints)
        else:
            uncertainty_spline = np.zeros_like

        return EnergyCalibration(
            self.ph,
            self.energy,
            self.dph,
            self.de,
            self.names,
            curvename=curvename,
            approximating=approximate,
            spline=internal_spline,
            energy2ph=energy2ph,
            ph2uncertainty=uncertainty_spline,
            input_transform=input_transform,
            output_transform=output_transform,
            extra_info=extra_info,
        )

    def drop_one_errors(
        self, curvename: Curvetypes = Curvetypes.LOGLOG, approximate: bool = False, powerlaw: float = 1.15
    ) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
        """For each calibration point, calculate the difference between the 'correct' energy
        and the energy predicted by creating a calibration without that point and using
        ph2energy to calculate the predicted energy

        Parameters
        ----------
        curvename : Curvetypes, optional
            Calibration curve type to employ, by default Curvetypes.LOGLOG
        approximate : bool, optional
            Whether to approximate the anchor point data given the uncertainties, by default False
        powerlaw : float, optional
            An approximate powerlaw guess used by LOGLOG curves, by default 1.15

        Returns
        -------
        tuple[NDArray[np.float64], NDArray[np.float64]]
            An array of the anchor point energies, and an array of the differences between the predicted
            and actual energies for each anchor point.
        """
        # """For each calibration point, calculate the difference between the 'correct' energy
        # and the energy predicted by creating a calibration without that point and using
        # ph2energy to calculate the predicted energy, return (energies, drop_one_energy_diff)"""
        drop_one_energy_diff = np.zeros(self.npts)
        for i in range(self.npts):
            dropped_pulseheight = self.ph[i]
            dropped_energy = self.energy[i]
            drop_one_maker = self._remove_cal_point_idx(i)
            drop_one_cal = drop_one_maker.make_calibration(curvename=curvename, approximate=approximate, powerlaw=powerlaw)
            predicted_energy = drop_one_cal.ph2energy(dropped_pulseheight).item(0)
            drop_one_energy_diff[i] = predicted_energy - dropped_energy
        return self.energy, drop_one_energy_diff

npts property

The number of calibration anchor points.

__post_init__()

Check for inputs of unequal length. Check for monotone anchor points. Sort the input data by energy.

Source code in mass2/calibration/energy_calibration.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def __post_init__(self) -> None:
    """Check for inputs of unequal length. Check for monotone anchor points.
    Sort the input data by energy."""
    N = len(self.ph)
    assert N == len(self.energy)
    assert N == len(self.dph)
    assert N == len(self.de)
    assert N == len(self.names)

    # First sort according to energy of the calibration point
    if not np.all(np.diff(self.energy) > 0):
        sortkeys = np.argsort(self.energy)
        self.ph[:] = self.ph[sortkeys]
        self.energy[:] = self.energy[sortkeys]
        self.dph[:] = self.dph[sortkeys]
        self.de[:] = self.de[sortkeys]
        self.names[:] = [self.names[i] for i in sortkeys]

    # Then confirm that the pulse heights are also in order
    order_ph = self.ph.argsort()
    order_en = self.energy.argsort()
    if not np.all(order_ph == order_en):
        a = f"PH:     {self.ph[order_ph]}"
        b = f"Energy: {self.energy[order_ph]}"
        raise ValueError(f"Calibration points are not monotone:\n{a}\n{b}")

add_cal_point(ph, energy, name='', ph_error=None, e_error=None, replace=True)

Add a single energy calibration point.

Can call as .add_cal_point(ph, energy, name) or if the "energy" is a line name, then .add_cal_point(ph, name) will find energy as energy=mass2.STANDARD_FEATURES[name]. Thus the following are equivalent:

cal = cal.add_cal_point(12345.6, 5898.801, "Mn Ka1")
cal = cal.add_cal_point(12456.6, "Mn Ka1")

ph must be in units of the self.ph_field and energy is in eV. ph_error is the 1-sigma uncertainty on the pulse height. If None (the default), then assign ph_error = ph/1000. e_error is the 1-sigma uncertainty on the energy itself. If None (the default), then assign e_error=0.01 eV.

Careful! If you give a name that's already in the list, or you add an equivalent energy but do NOT give a name, then this value replaces the previous one. You can prevent overwriting (and instead raise an error) by setting replace=False.

Source code in mass2/calibration/energy_calibration.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
def add_cal_point(
    self,
    ph: float,
    energy: float | str,
    name: str = "",
    ph_error: float | None = None,
    e_error: float | None = None,
    replace: bool = True,
) -> EnergyCalibrationMaker:
    """Add a single energy calibration point.

    Can call as .add_cal_point(ph, energy, name) or if the "energy" is a line name, then
    .add_cal_point(ph, name) will find energy as `energy=mass2.STANDARD_FEATURES[name]`.
    Thus the following are equivalent:

        cal = cal.add_cal_point(12345.6, 5898.801, "Mn Ka1")
        cal = cal.add_cal_point(12456.6, "Mn Ka1")

    `ph` must be in units of the self.ph_field and `energy` is in eV.
    `ph_error` is the 1-sigma uncertainty on the pulse height.  If None
    (the default), then assign ph_error = `ph`/1000. `e_error` is the
    1-sigma uncertainty on the energy itself. If None (the default), then
    assign e_error=0.01 eV.

    Careful!  If you give a name that's already in the list, or you add an equivalent
    energy but do NOT give a name, then this value replaces the previous one.
    You can prevent overwriting (and instead raise an error) by setting `replace`=False.
    """

    # If <energy> is a string and a known spectral feature's name, use it as the name instead
    # Otherwise, it needs to be a numeric type convertible to float.
    try:
        energy = float(energy)
    except ValueError:
        try:
            if type(energy) is str:
                name = energy
            else:
                name = str(energy)
            energy = STANDARD_FEATURES[name]
        except Exception:
            raise ValueError("2nd argument must be an energy or a known name" + " from mass2.energy_calibration.STANDARD_FEATURES")

    if ph_error is None:
        ph_error = ph * 0.001
    if e_error is None:
        e_error = 0.01  # Assume 0.01 eV error if none given

    update_index: int | None = None
    if self.npts > 0:
        if name and name in self.names:  # Update an existing point by name
            if not replace:
                raise ValueError(f"Calibration point '{name}' is already known and overwrite is False")
            update_index = self.names.index(name)

        elif np.abs(energy - self.energy).min() <= e_error:  # Update existing point
            if not replace:
                raise ValueError(f"Calibration point at energy {energy:.2f} eV is already known and overwrite is False")
            update_index = int(np.abs(energy - self.energy).argmin())

    if update_index is None:  # Add a new calibration anchor point
        new_ph = np.hstack((self.ph, ph))
        new_energy = np.hstack((self.energy, energy))
        new_dph = np.hstack((self.dph, ph_error))
        new_de = np.hstack((self.de, e_error))
        new_names = self.names + [name]
    else:  # Replace an existing calibration anchor point.
        new_ph = self.ph.copy()
        new_energy = self.energy.copy()
        new_dph = self.dph.copy()
        new_de = self.de.copy()
        new_names = self.names.copy()
        new_ph[update_index] = ph
        new_energy[update_index] = energy
        new_dph[update_index] = ph_error
        new_de[update_index] = e_error
        new_names[update_index] = name
    return EnergyCalibrationMaker(new_ph, new_energy, new_dph, new_de, new_names)

drop_one_errors(curvename=Curvetypes.LOGLOG, approximate=False, powerlaw=1.15)

For each calibration point, calculate the difference between the 'correct' energy and the energy predicted by creating a calibration without that point and using ph2energy to calculate the predicted energy

Parameters:
  • curvename (Curvetypes, default: LOGLOG ) –

    Calibration curve type to employ, by default Curvetypes.LOGLOG

  • approximate (bool, default: False ) –

    Whether to approximate the anchor point data given the uncertainties, by default False

  • powerlaw (float, default: 1.15 ) –

    An approximate powerlaw guess used by LOGLOG curves, by default 1.15

Returns:
  • tuple[NDArray[float64], NDArray[float64]]

    An array of the anchor point energies, and an array of the differences between the predicted and actual energies for each anchor point.

Source code in mass2/calibration/energy_calibration.py
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
def drop_one_errors(
    self, curvename: Curvetypes = Curvetypes.LOGLOG, approximate: bool = False, powerlaw: float = 1.15
) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
    """For each calibration point, calculate the difference between the 'correct' energy
    and the energy predicted by creating a calibration without that point and using
    ph2energy to calculate the predicted energy

    Parameters
    ----------
    curvename : Curvetypes, optional
        Calibration curve type to employ, by default Curvetypes.LOGLOG
    approximate : bool, optional
        Whether to approximate the anchor point data given the uncertainties, by default False
    powerlaw : float, optional
        An approximate powerlaw guess used by LOGLOG curves, by default 1.15

    Returns
    -------
    tuple[NDArray[np.float64], NDArray[np.float64]]
        An array of the anchor point energies, and an array of the differences between the predicted
        and actual energies for each anchor point.
    """
    # """For each calibration point, calculate the difference between the 'correct' energy
    # and the energy predicted by creating a calibration without that point and using
    # ph2energy to calculate the predicted energy, return (energies, drop_one_energy_diff)"""
    drop_one_energy_diff = np.zeros(self.npts)
    for i in range(self.npts):
        dropped_pulseheight = self.ph[i]
        dropped_energy = self.energy[i]
        drop_one_maker = self._remove_cal_point_idx(i)
        drop_one_cal = drop_one_maker.make_calibration(curvename=curvename, approximate=approximate, powerlaw=powerlaw)
        predicted_energy = drop_one_cal.ph2energy(dropped_pulseheight).item(0)
        drop_one_energy_diff[i] = predicted_energy - dropped_energy
    return self.energy, drop_one_energy_diff

heuristic_samplepoints(anchors) staticmethod

Given a set of calibration anchor points, return a few hundred sample points, reasonably spaced below, between, and above the anchor points.

Parameters:
  • anchors (ArrayLike) –

    The anchor points (in pulse height space)

Returns:
  • ndarray

    description

Source code in mass2/calibration/energy_calibration.py
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
@staticmethod
def heuristic_samplepoints(anchors: ArrayLike) -> np.ndarray:
    """Given a set of calibration anchor points, return a few hundred
    sample points, reasonably spaced below, between, and above the anchor points.

    Parameters
    ----------
    anchors : ArrayLike
        The anchor points (in pulse height space)

    Returns
    -------
    np.ndarray
        _description_
    """
    anchors = np.asarray(anchors)
    # Prescription is 50 points up to lowest anchor (but exclude 0):
    x = [np.linspace(0, anchors.min(), 51)[1:]]
    # Then one points, plus one extra per 1% spacing between (and at) each anchor
    for i in range(len(anchors) - 1):
        low, high = anchors[i : i + 2]
        n = 1 + int(100 * (high / low - 1) + 0.5)
        x.append(np.linspace(low, high, n + 1)[1:])
    # Finally, 100 more points between the highest anchor and 2x that.
    x.append(anchors.max() * np.linspace(1, 2, 101)[1:])
    return np.hstack(x)

init(ph=None, energy=None, dph=None, de=None, names=None) classmethod

Create an EnergyCalibrationMaker, filling in any missing requirements with empty arrays.

Source code in mass2/calibration/energy_calibration.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
@classmethod
def init(
    cls,
    ph: ArrayLike | None = None,
    energy: ArrayLike | None = None,
    dph: ArrayLike | None = None,
    de: ArrayLike | None = None,
    names: list[str] | None = None,
) -> EnergyCalibrationMaker:
    """Create an EnergyCalibrationMaker, filling in any missing requirements with empty arrays."""
    if ph is None:
        ph = np.array([], dtype=float)
    else:
        ph = np.asarray(ph)
    if energy is None:
        energy = np.array([], dtype=float)
    else:
        energy = np.asarray(energy)
    if dph is None:
        dph = 1e-3 * ph
    else:
        dph = np.asarray(dph)
    if de is None:
        de = 1e-3 * energy
    else:
        de = np.asarray(de)
    if names is None:
        names = ["dummy"] * len(dph)
    return cls(ph, energy, dph, de, names)

make_calibration(curvename=Curvetypes.LOGLOG, approximate=False, powerlaw=1.15, extra_info=None)

Create an energy calibration curve of the specified type.

Parameters:
  • curvename (Curvetypes, default: LOGLOG ) –

    Which curve type to use, by default Curvetypes.LOGLOG

  • approximate (bool, default: False ) –

    Whether to approximate the anchor point data given the uncertainties, by default False

  • powerlaw (float, default: 1.15 ) –

    An approximate powerlaw guess used by LOGLOG curves, by default 1.15

  • extra_info (dict[str, Any] | None, default: None ) –

    Extra text to store in the result, by default None

Returns:
Raises:
  • ValueError

    If there are too few anchor points for an approximating curve, or if curvename is not in Curvetypes.

Source code in mass2/calibration/energy_calibration.py
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
def make_calibration(
    self,
    curvename: Curvetypes = Curvetypes.LOGLOG,
    approximate: bool = False,
    powerlaw: float = 1.15,
    extra_info: dict[str, Any] | None = None,
) -> EnergyCalibration:
    """Create an energy calibration curve of the specified type.

    Parameters
    ----------
    curvename : Curvetypes, optional
        Which curve type to use, by default Curvetypes.LOGLOG
    approximate : bool, optional
        Whether to approximate the anchor point data given the uncertainties, by default False
    powerlaw : float, optional
        An approximate powerlaw guess used by LOGLOG curves, by default 1.15
    extra_info : dict[str, Any] | None, optional
        Extra text to store in the result, by default None

    Returns
    -------
    EnergyCalibration
        The calibration object.

    Raises
    ------
    ValueError
        If there are too few anchor points for an approximating curve, or if `curvename` is not in `Curvetypes`.
    """
    if approximate and self.npts < 3:
        raise ValueError(f"approximating curves require 3 or more cal anchor points, have {self.npts}")
    if curvename not in Curvetypes:
        raise ValueError(f"{curvename=}, must be in {Curvetypes}")

    # Use a heuristic to repair negative uncertainties.
    def regularize_uncertainties(x: NDArray[np.float64]) -> np.ndarray:
        """Replace negative uncertainties with the minimum non-negative uncertainty, or zero."""
        if not np.any(x < 0):
            return x
        target = max(0.0, x.min())
        x = x.copy()
        x[x < 0] = target
        return x

    dph = regularize_uncertainties(self.dph)
    de = regularize_uncertainties(self.de)

    if curvename == Curvetypes.LOGLOG:
        input_transform = EnergyCalibration._ecal_input_log
        output_transform = EnergyCalibration._ecal_output_log
        x = np.log(self.ph)
        y = np.log(self.energy)
        # When there's only one point, enhance it by a fake point to enforce power-law behavior
        if self.npts == 1:
            arboffset = 1.0
            x = np.hstack([x, x + arboffset])
            y = np.hstack([y, y + arboffset / powerlaw])
        dx = dph / self.ph
        dy = de / self.energy

    elif curvename == Curvetypes.GAIN:
        input_transform = EnergyCalibration._ecal_input_identity
        output_transform = EnergyCalibration._ecal_output_gain
        x = self.ph
        y = self.ph / self.energy
        # Estimate spline uncertainties using slope of best-fit line
        slope = np.polyfit(x, y, 1)[0]
        dy = y * (((slope * self.energy - 1) * dph / x) ** 2 + (de / self.energy) ** 2) ** 0.5
        dx = dph

    elif curvename == Curvetypes.INVGAIN:
        input_transform = EnergyCalibration._ecal_input_identity
        output_transform = EnergyCalibration._ecal_output_invgain
        x = self.ph
        y = self.energy / self.ph
        # Estimate spline uncertainties using slope of best-fit line
        slope = np.polyfit(x, y, 1)[0]
        dy = y * (((slope * self.ph / y + 1) * dph / x) ** 2 + (de / self.energy) ** 2) ** 0.5
        dx = dph

    elif curvename in {Curvetypes.LINEAR, Curvetypes.LINEAR_PLUS_ZERO}:
        input_transform = EnergyCalibration._ecal_input_identity
        output_transform = EnergyCalibration._ecal_output_identity
        x = self.ph
        y = self.energy
        dx = dph
        dy = de
        if (curvename == Curvetypes.LINEAR_PLUS_ZERO) and (0.0 not in x):
            # Add a "zero"-energy and -PH point. But to avoid numerical problems, actually just use
            # 1e-3 times the lowest value, giving ±100% uncertainty on the values.
            x = np.hstack(([x.min() * 1e-3], x))
            y = np.hstack(([y.min() * 1e-3], y))
            dx = np.hstack(([x[0] * 1e-3], dx))
            dy = np.hstack((y[0] * 1e-3, dy))

    elif curvename == Curvetypes.LOGGAIN:
        input_transform = EnergyCalibration._ecal_input_identity
        output_transform = EnergyCalibration._ecal_output_loggain
        x = self.ph
        y = np.log(self.ph / self.energy)
        # Estimate spline uncertainties using slope of best-fit line
        slope = np.polyfit(x, y, 1)[0]
        dy = y * (((slope * x - 1) * dph / x) ** 2 + (de / self.energy) ** 2) ** 0.5
        dx = dph

    else:
        raise ValueError(f"curvename='{curvename}' not recognized")

    if approximate:
        internal_spline: CubicSpline = GPRSpline(x, y, dy, dx)
    elif len(x) > 1:
        internal_spline = CubicSpline(x, y)
    else:
        internal_spline = CubicSpline(x * [1, 2], y * [1, 2])

    ph_samplepoints = EnergyCalibrationMaker.heuristic_samplepoints(self.ph)
    E_samplepoints = output_transform(ph_samplepoints, internal_spline(input_transform(ph_samplepoints)))
    energy2ph = CubicSpline(E_samplepoints, ph_samplepoints)

    if approximate:
        dspline = internal_spline.variance(ph_samplepoints) ** 0.5
        if curvename == Curvetypes.LOGLOG:
            de_samplepoints = dspline * internal_spline(input_transform(ph_samplepoints))
        elif curvename == Curvetypes.GAIN:
            de_samplepoints = dspline * E_samplepoints**2 / ph_samplepoints
        elif curvename == Curvetypes.INVGAIN:
            de_samplepoints = dspline * ph_samplepoints
        elif curvename in {Curvetypes.LINEAR, Curvetypes.LINEAR_PLUS_ZERO}:
            de_samplepoints = dspline
        elif curvename == Curvetypes.LOGGAIN:
            abs_dfdp = np.abs(internal_spline(ph_samplepoints, der=1))
            de_samplepoints = dspline * E_samplepoints * abs_dfdp
        else:
            raise ValueError(f"curvename='{curvename}' not recognized")

        uncertainty_spline: Callable = CubicSpline(ph_samplepoints, de_samplepoints)
    else:
        uncertainty_spline = np.zeros_like

    return EnergyCalibration(
        self.ph,
        self.energy,
        self.dph,
        self.de,
        self.names,
        curvename=curvename,
        approximating=approximate,
        spline=internal_spline,
        energy2ph=energy2ph,
        ph2uncertainty=uncertainty_spline,
        input_transform=input_transform,
        output_transform=output_transform,
        extra_info=extra_info,
    )

make_calibration_gain(approximate=False, extra_info=None)

Create a calibration curve that is a spline in (pulse height/energy) vs pulse height.

Source code in mass2/calibration/energy_calibration.py
263
264
265
def make_calibration_gain(self, approximate: bool = False, extra_info: dict[str, Any] | None = None) -> EnergyCalibration:
    """Create a calibration curve that is a spline in (pulse height/energy) vs pulse height."""
    return self.make_calibration(Curvetypes.GAIN, approximate=approximate, extra_info=extra_info)

make_calibration_invgain(approximate=False, extra_info=None)

Create a calibration curve that is a spline in (energy/pulse height) vs pulse height.

Source code in mass2/calibration/energy_calibration.py
267
268
269
def make_calibration_invgain(self, approximate: bool = False, extra_info: dict[str, Any] | None = None) -> EnergyCalibration:
    """Create a calibration curve that is a spline in (energy/pulse height) vs pulse height."""
    return self.make_calibration(Curvetypes.INVGAIN, approximate=approximate, extra_info=extra_info)

make_calibration_linear(approximate=False, addzero=False, extra_info=None)

Create a calibration curve that is a spline in energy vs pulse height. If addzero include a (0,0) anchor point.

Source code in mass2/calibration/energy_calibration.py
275
276
277
278
279
280
def make_calibration_linear(
    self, approximate: bool = False, addzero: bool = False, extra_info: dict[str, Any] | None = None
) -> EnergyCalibration:
    """Create a calibration curve that is a spline in energy vs pulse height. If `addzero` include a (0,0) anchor point."""
    curvename = Curvetypes.LINEAR_PLUS_ZERO if addzero else Curvetypes.LINEAR
    return self.make_calibration(curvename, approximate=approximate, extra_info=extra_info)

make_calibration_loggain(approximate=False, extra_info=None)

Create a calibration curve that is a spline in log(pulse height/energy) vs pulse height.

Source code in mass2/calibration/energy_calibration.py
271
272
273
def make_calibration_loggain(self, approximate: bool = False, extra_info: dict[str, Any] | None = None) -> EnergyCalibration:
    """Create a calibration curve that is a spline in log(pulse height/energy) vs pulse height."""
    return self.make_calibration(Curvetypes.LOGGAIN, approximate=approximate, extra_info=extra_info)

make_calibration_loglog(approximate=False, powerlaw=1.15, extra_info=None)

Create a calibration curve that is a spline in log(energy) vs log(pulse height).

Source code in mass2/calibration/energy_calibration.py
257
258
259
260
261
def make_calibration_loglog(
    self, approximate: bool = False, powerlaw: float = 1.15, extra_info: dict[str, Any] | None = None
) -> EnergyCalibration:
    """Create a calibration curve that is a spline in log(energy) vs log(pulse height)."""
    return self.make_calibration(Curvetypes.LOGLOG, approximate=approximate, powerlaw=powerlaw, extra_info=extra_info)

remove_cal_point_energy(energy, de)

Remove cal points at energies within ±de of energy. Return a new maker.

Source code in mass2/calibration/energy_calibration.py
143
144
145
146
147
148
149
def remove_cal_point_energy(self, energy: float, de: float) -> EnergyCalibrationMaker:
    """Remove cal points at energies within ±`de` of `energy`. Return a new maker."""
    idxs = np.nonzero(np.abs(self.energy - energy) < de)[0]
    if len(idxs) == 0:
        return self
    # Also recursive and less efficient. See previous method's comment.
    return self._remove_cal_point_idx(idxs[0]).remove_cal_point_energy(energy, de)

remove_cal_point_name(name)

Remove calibration point named name. Return a new maker.

Source code in mass2/calibration/energy_calibration.py
128
129
130
131
def remove_cal_point_name(self, name: str) -> EnergyCalibrationMaker:
    """Remove calibration point named `name`. Return a new maker."""
    idx = self.names.index(name)
    return self._remove_cal_point_idx(idx)

remove_cal_point_prefix(prefix)

This removes all cal points whose name starts with prefix. Return a new maker.

Source code in mass2/calibration/energy_calibration.py
133
134
135
136
137
138
139
140
141
def remove_cal_point_prefix(self, prefix: str) -> EnergyCalibrationMaker:
    """This removes all cal points whose name starts with `prefix`.  Return a new maker."""
    # Work recursively: remove the first match and make a new Maker, and repeat until none match.
    # This is clearly less efficient when removing N matches, as N copies are made. So what?
    # This feature is likely to be rarely used, and we favor clarity over performance here.
    for name in tuple(self.names):
        if name.startswith(prefix):
            return self.remove_cal_point_name(name).remove_cal_point_prefix(prefix)
    return self

Fluorescence line shapes

fluorescence_lines.py

Tools for fitting and simulating X-ray fluorescence lines.

AmplitudeType

Bases: Enum

AmplitudeType: which form of amplitude is used in the reference data.

Source code in mass2/calibration/fluorescence_lines.py
76
77
78
79
80
81
class AmplitudeType(Enum):
    """AmplitudeType: which form of amplitude is used in the reference data."""

    LORENTZIAN_PEAK_HEIGHT = "Peak height of Lorentzians"
    LORENTZIAN_INTEGRAL_INTENSITY = "Integrated intensity of Lorentzians"
    VOIGT_PEAK_HEIGHT = "Peak height of Voigts"

LineshapeReference dataclass

Description of our source of information on a line shape. Might be a reference to the literature, or notes on conversations. They are stored in a YAML file mass2/data/fluorescence_line_references.yaml

Source code in mass2/calibration/fluorescence_lines.py
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
@dataclass(frozen=True)
class LineshapeReference:
    """Description of our source of information on a line shape. Might be a reference to the literature,
    or notes on conversations. They are stored in a YAML file mass2/data/fluorescence_line_references.yaml
    """

    tag: str
    description: str
    url: str

    @classmethod
    def load(cls, filename: pathlib.Path | str | None = None) -> dict:
        """Load the reference comments from a YAML file. If filename is None, load the default file in mass2/data.

        Parameters
        ----------
        filename : pathlib.Path | str | None, optional
            The file to read containing reference comments, by default None

        Returns
        -------
        dict
            A dictionary of LineshapeReference objects, keyed by their tag.
        """
        references = {"unknown": LineshapeReference("unknown", "unknown", "")}
        if filename is None:
            filename = str(pkg_resources.files("mass2") / "data" / "fluorescence_line_references.yaml")
        with open(filename, "r", encoding="utf-8") as file:
            d = yaml.safe_load(file)
            for item in d:
                url = item.get("URL", "")
                references[item["tag"]] = LineshapeReference(item["tag"], item["description"], url)
        return references

    def __str__(self) -> str:
        """The citation string for this reference."""
        lines = [f'lineshape_references["{self.tag}"]:']
        lines.append(self.description.rstrip("\n"))
        if len(self.url) > 1:
            lines.append(f"url: {self.url}")
        return "\n".join(lines)

__str__()

The citation string for this reference.

Source code in mass2/calibration/fluorescence_lines.py
427
428
429
430
431
432
433
def __str__(self) -> str:
    """The citation string for this reference."""
    lines = [f'lineshape_references["{self.tag}"]:']
    lines.append(self.description.rstrip("\n"))
    if len(self.url) > 1:
        lines.append(f"url: {self.url}")
    return "\n".join(lines)

load(filename=None) classmethod

Load the reference comments from a YAML file. If filename is None, load the default file in mass2/data.

Parameters:
  • filename (Path | str | None, default: None ) –

    The file to read containing reference comments, by default None

Returns:
  • dict

    A dictionary of LineshapeReference objects, keyed by their tag.

Source code in mass2/calibration/fluorescence_lines.py
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
@classmethod
def load(cls, filename: pathlib.Path | str | None = None) -> dict:
    """Load the reference comments from a YAML file. If filename is None, load the default file in mass2/data.

    Parameters
    ----------
    filename : pathlib.Path | str | None, optional
        The file to read containing reference comments, by default None

    Returns
    -------
    dict
        A dictionary of LineshapeReference objects, keyed by their tag.
    """
    references = {"unknown": LineshapeReference("unknown", "unknown", "")}
    if filename is None:
        filename = str(pkg_resources.files("mass2") / "data" / "fluorescence_line_references.yaml")
    with open(filename, "r", encoding="utf-8") as file:
        d = yaml.safe_load(file)
        for item in d:
            url = item.get("URL", "")
            references[item["tag"]] = LineshapeReference(item["tag"], item["description"], url)
    return references

SpectralLine dataclass

An abstract base class for modeling spectral lines as a sum of Voigt profiles (i.e., Gaussian-convolved Lorentzians).

Call SpectralLine.addline(...) to create a new instance.

The API follows scipy.stats.stats.rv_continuous and is kind of like rv_frozen. Calling this object with an argument evalutes the pdf at the argument, it does not return an rv_frozen. So far, we ony define rvs and pdf.

Source code in mass2/calibration/fluorescence_lines.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
@dataclass(frozen=True)
class SpectralLine:
    """An abstract base class for modeling spectral lines as a sum
    of Voigt profiles (i.e., Gaussian-convolved Lorentzians).

    Call `SpectralLine.addline(...)` to create a new instance.

    The API follows scipy.stats.stats.rv_continuous and is kind of like rv_frozen.
    Calling this object with an argument evalutes the pdf at the argument, it does not
    return an rv_frozen. So far, we ony define `rvs` and `pdf`.
    """

    element: str
    material: str
    linetype: str
    nominal_peak_energy: float
    energies: NDArray[np.float64]
    lorentzian_fwhm: NDArray[np.float64]
    reference_amplitude: NDArray[np.float64]
    reference_amplitude_type: AmplitudeType = AmplitudeType.LORENTZIAN_INTEGRAL_INTENSITY
    reference_measurement_type: str | None = "unknown"
    intrinsic_sigma: float = 0.0
    reference_plot_instrument_gaussian_fwhm: float | None = 0.0
    reference_short: str = "unknown"
    position_uncertainty: float = 0.0
    is_default_material: bool = True

    @cached_property
    def peak_energy(self) -> float:
        """Find the peak energy of the line shape assuming ideal instrument resolution."""
        try:
            peak_energy = sp.optimize.brent(
                lambda x: -self.pdf(x, instrument_gaussian_fwhm=0), brack=np.array((0.5, 1, 1.5)) * self.nominal_peak_energy
            )
        except ValueError:
            peak_energy = self.nominal_peak_energy
        return peak_energy

    @property
    def cumulative_amplitudes(self) -> NDArray:
        """Cumulative sum of the Lorentzian integral intensities."""
        return self.lorentzian_integral_intensity.cumsum()

    @cached_property
    def lorentzian_integral_intensity(self) -> NDArray:
        """Return (and cache) computed integrated intensities of the Lorentzian components."""
        if self.reference_amplitude_type == AmplitudeType.VOIGT_PEAK_HEIGHT:
            sigma = self.reference_plot_instrument_gaussian_fwhm / FWHM_OVER_SIGMA
            return np.array([
                ph / voigt(0, 0, fwhm / 2.0, sigma) for (ph, fwhm) in zip(self.reference_amplitude, self.lorentzian_fwhm)
            ])
        if self.reference_amplitude_type == AmplitudeType.LORENTZIAN_PEAK_HEIGHT:
            return self.reference_amplitude * (0.5 * np.pi * self.lorentzian_fwhm)

        if self.reference_amplitude_type == AmplitudeType.LORENTZIAN_INTEGRAL_INTENSITY:
            return self.reference_amplitude

    @cached_property
    def normalized_lorentzian_integral_intensity(self) -> NDArray:
        """Return (and cache) computed integrated intensities of the Lorentzian components, normalized so sum=1."""
        x = self.lorentzian_integral_intensity
        return x / np.sum(x)

    @cached_property
    def lorentz_amplitude(self) -> NDArray:
        """Return (and cache) computed Lorentzian peak heights of the components."""
        return self.lorentzian_integral_intensity / self.lorentzian_fwhm

    def __call__(self, x: ArrayLike, instrument_gaussian_fwhm: float) -> NDArray:
        """Make the class callable, returning the same value as the self.pdf method."""
        return self.pdf(x, instrument_gaussian_fwhm)

    def pdf(self, x: ArrayLike, instrument_gaussian_fwhm: float) -> NDArray:
        """Spectrum (units of fraction per eV) as a function of <x>, the energy in eV"""
        gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
        x = np.asarray(x, dtype=float)
        result = np.zeros_like(x)
        for energy, fwhm, ampl in zip(self.energies, self.lorentzian_fwhm, self.normalized_lorentzian_integral_intensity):
            result += ampl * voigt(x, energy, hwhm=fwhm * 0.5, sigma=gaussian_sigma)
            # mass2.voigt() is normalized to have unit integrated intensity
        return result

    def components(self, x: ArrayLike, instrument_gaussian_fwhm: float) -> list[NDArray]:
        """List of spectrum components as a function of <x>, the energy in eV"""
        gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
        x = np.asarray(x, dtype=float)
        components = []
        for energy, fwhm, ampl in zip(self.energies, self.lorentzian_fwhm, self.lorentzian_integral_intensity):
            components.append(ampl * voigt(x, energy, hwhm=fwhm * 0.5, sigma=gaussian_sigma))
        return components

    def plot(
        self,
        x: ArrayLike | None = None,
        instrument_gaussian_fwhm: float = 0,
        axis: plt.Axes | None = None,
        components: bool = True,
        label: str | None = None,
        setylim: bool = True,
        color: str | None = None,
    ) -> plt.Axes:
        """Plot the spectrum.
        x - np array of energy in eV to plot at (sensible default)
        axis - axis to plot on (default creates new figure)
        components - True plots each voigt component in addition to the spectrum
        label - a string to label the plot with (optional)"""
        gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
        if x is None:
            width = max(2 * gaussian_sigma, 3 * float(np.amax(self.lorentzian_fwhm)))
            lo = np.amin(self.energies) - width
            hi = np.amax(self.energies) + width
            x = np.linspace(lo, hi, 500)
        x = np.asarray(x)
        if axis is None:
            plt.figure()
            axis = plt.gca()
        if components:
            for component in self.components(x, instrument_gaussian_fwhm):
                axis.plot(x, component, "--")
        pdf = self.pdf(x, instrument_gaussian_fwhm)
        axis.plot(x, pdf, lw=2, label=label, color=color)
        axis.set_xlabel("Energy (eV)")
        axis.set_ylabel(f"Counts per {float(x[1] - x[0]):.2} eV bin")
        axis.set_xlim(x[0], x[-1])
        if setylim:
            axis.set_ylim(0, np.amax(pdf) * 1.05)
        axis.set_title(f"{self.shortname} with resolution {instrument_gaussian_fwhm:.2f} eV FWHM")
        return axis

    def plot_like_reference(self, axis: plt.Axes | None = None) -> plt.Axes:
        """Plot the spectrum to match the instrument resolution used in the reference data publication, if known."""
        if self.reference_plot_instrument_gaussian_fwhm is None:
            fwhm = 0.001
        else:
            fwhm = self.reference_plot_instrument_gaussian_fwhm
        axis = self.plot(axis=axis, instrument_gaussian_fwhm=fwhm)
        return axis

    def rvs(self, size: int | tuple[int] | None, instrument_gaussian_fwhm: float, rng: np.random.Generator | None = None) -> NDArray:
        """The CDF and PPF (cumulative distribution and percentile point functions) are hard to
        compute.  But it's easy enough to generate the random variates themselves, so we
        override that method."""
        if rng is None:
            rng = _rng
        gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
        # Choose from among the N Lorentzian lines in proportion to the line amplitudes
        iline = self.cumulative_amplitudes.searchsorted(rng.uniform(0, self.cumulative_amplitudes[-1], size=size))
        # Choose Lorentzian variates of the appropriate width (but centered on 0)
        lor = rng.standard_cauchy(size=size) * self.lorentzian_fwhm[iline] * 0.5
        # If necessary, add a Gaussian variate to mimic finite resolution
        if gaussian_sigma > 0.0:
            lor += rng.standard_normal(size=size) * gaussian_sigma
        # Finally, add the line centers.
        results = lor + self.energies[iline]
        # We must check for non-positive results and replace them by recursive call
        # to self.rvs().
        not_positive = results <= 0.0
        if np.any(not_positive):
            Nbad = not_positive.sum()
            results[not_positive] = self.rvs(size=Nbad, instrument_gaussian_fwhm=instrument_gaussian_fwhm)
        return results

    @property
    def shortname(self) -> str:
        """A short name for the line, suitable for use as a dictionary key."""
        if self.is_default_material:
            return f"{self.element}{self.linetype}"
        else:
            return f"{self.element}{self.linetype}_{self.material}"

    @property
    def reference(self) -> str:
        """The full comment and/or citation for the reference data."""
        return lineshape_references[self.reference_short]

    def _gaussian_sigma(self, instrument_gaussian_fwhm: float) -> float:
        """combined intrinstic_sigma and insturment_gaussian_fwhm in quadrature and return the result"""
        assert instrument_gaussian_fwhm >= 0
        return ((instrument_gaussian_fwhm / FWHM_OVER_SIGMA) ** 2 + self.intrinsic_sigma**2) ** 0.5

    def __repr__(self) -> str:
        """String representation of the SpectralLine."""
        return f"SpectralLine: {self.shortname}"

    def model(
        self, has_linear_background: bool = True, has_tails: bool = False, prefix: str = "", qemodel: Callable | None = None
    ) -> GenericLineModel:
        """Generate a LineModel instance from a SpectralLine"""
        model_class = GenericLineModel
        name = f"{self.element}{self.linetype}"
        m = model_class(
            name=name, spect=self, has_linear_background=has_linear_background, has_tails=has_tails, prefix=prefix, qemodel=qemodel
        )
        return m

    def fitter(self) -> GenericLineModel:
        """Generate a GenericLineModel instance for fitting this SpectralLine."""
        fitter_class = GenericLineModel
        f = fitter_class(self)
        f.name = f"{self.element}{self.linetype}"
        return f

    def minimum_fwhm(self, instrument_gaussian_fwhm: float) -> float:
        """for the narrowest lorentzian in the line model, calculate the combined fwhm including
        the lorentzian, intrinstic_sigma, and instrument_gaussian_fwhm"""
        fwhm2 = np.amin(self.lorentzian_fwhm) ** 2 + instrument_gaussian_fwhm**2 + (self.intrinsic_sigma * FWHM_OVER_SIGMA) ** 2
        return np.sqrt(fwhm2)

    @classmethod
    def quick_monochromatic_line(
        cls, name: str, energy: float, lorentzian_fwhm: float, intrinsic_sigma: float = 0.0
    ) -> "SpectralLine":
        """
        Create a quick monochromatic line. Intended for use in calibration when we know a line energy, but not a lineshape model.
        Returns and instrance of SpectralLine with most fields having contents like "unknown: quick_line". The line will have
        a single lorentzian element with the given energy, fwhm, and intrinsic_sigma values.
        """
        energy = float(energy)
        element = name
        material = "unknown: quick_line"
        if lorentzian_fwhm <= 0 and intrinsic_sigma <= 1e-6:
            intrinsic_sigma = 1e-6
        linetype = "Gaussian"
        reference_short = "unknown: quick_line"
        reference_amplitude = np.array([1.0])
        reference_amplitude_type = AmplitudeType.LORENTZIAN_INTEGRAL_INTENSITY
        nominal_peak_energy = energy
        position_uncertainty = 0.0
        reference_measurement_type = "unkown: quick_line"
        return cls(
            element=element,
            material=material,
            linetype=linetype,
            energies=np.array([energy]),
            lorentzian_fwhm=np.array([lorentzian_fwhm]),
            intrinsic_sigma=intrinsic_sigma,
            reference_short=reference_short,
            reference_amplitude=reference_amplitude,
            reference_amplitude_type=reference_amplitude_type,
            nominal_peak_energy=nominal_peak_energy,
            position_uncertainty=position_uncertainty,
            reference_measurement_type=reference_measurement_type,
        )

    @classmethod
    def addline(  # noqa: PLR0917
        cls,
        element: str,
        linetype: str,
        material: str,
        reference_short: str,
        reference_plot_instrument_gaussian_fwhm: float | None,
        nominal_peak_energy: float,
        energies: ArrayLike,
        lorentzian_fwhm: ArrayLike,
        reference_amplitude: ArrayLike,
        reference_amplitude_type: AmplitudeType,
        ka12_energy_diff: float | None = None,
        position_uncertainty: float = np.nan,
        intrinsic_sigma: float = 0,
        reference_measurement_type: str | None = None,
        is_default_material: bool = True,
        allow_replacement: bool = True,
    ) -> "SpectralLine":
        """Add a new SpectralLine to the `mass2.fluorescence_lines.spectra` dictionary, and as a variable in this module."""
        # require exactly one method of specifying the amplitude of each component
        assert reference_amplitude_type in {
            AmplitudeType.LORENTZIAN_PEAK_HEIGHT,
            AmplitudeType.LORENTZIAN_INTEGRAL_INTENSITY,
            AmplitudeType.VOIGT_PEAK_HEIGHT,
        }
        # require the reference exists in lineshape_references
        assert reference_short in lineshape_references

        # require kalpha lines to have ka12_energy_diff
        if linetype.startswith("KAlpha") and ka12_energy_diff is not None:
            ka12_energy_diff = float(ka12_energy_diff)
        # require reference_plot_instrument_gaussian_fwhm to be a float or None
        assert reference_plot_instrument_gaussian_fwhm is None or isinstance(reference_plot_instrument_gaussian_fwhm, float)

        line = cls(
            element=element,
            material=material,
            linetype=linetype,
            nominal_peak_energy=float(nominal_peak_energy),
            energies=np.array(energies),
            lorentzian_fwhm=np.array(lorentzian_fwhm),
            reference_amplitude=np.array(reference_amplitude),
            reference_amplitude_type=reference_amplitude_type,
            reference_measurement_type=reference_measurement_type,
            intrinsic_sigma=intrinsic_sigma,
            reference_plot_instrument_gaussian_fwhm=reference_plot_instrument_gaussian_fwhm,
            reference_short=reference_short,
            position_uncertainty=float(position_uncertainty),
            is_default_material=is_default_material,
        )
        name = line.shortname
        if name in spectra.keys() and (not allow_replacement):
            raise ValueError(f"spectrum {name} already exists")

        # Add this SpectralLine to spectra dict AND make it be a variable in the module
        spectra[name] = line
        globals()[name] = line
        return line

cumulative_amplitudes property

Cumulative sum of the Lorentzian integral intensities.

lorentz_amplitude cached property

Return (and cache) computed Lorentzian peak heights of the components.

lorentzian_integral_intensity cached property

Return (and cache) computed integrated intensities of the Lorentzian components.

normalized_lorentzian_integral_intensity cached property

Return (and cache) computed integrated intensities of the Lorentzian components, normalized so sum=1.

peak_energy cached property

Find the peak energy of the line shape assuming ideal instrument resolution.

reference property

The full comment and/or citation for the reference data.

shortname property

A short name for the line, suitable for use as a dictionary key.

__call__(x, instrument_gaussian_fwhm)

Make the class callable, returning the same value as the self.pdf method.

Source code in mass2/calibration/fluorescence_lines.py
152
153
154
def __call__(self, x: ArrayLike, instrument_gaussian_fwhm: float) -> NDArray:
    """Make the class callable, returning the same value as the self.pdf method."""
    return self.pdf(x, instrument_gaussian_fwhm)

__repr__()

String representation of the SpectralLine.

Source code in mass2/calibration/fluorescence_lines.py
264
265
266
def __repr__(self) -> str:
    """String representation of the SpectralLine."""
    return f"SpectralLine: {self.shortname}"

addline(element, linetype, material, reference_short, reference_plot_instrument_gaussian_fwhm, nominal_peak_energy, energies, lorentzian_fwhm, reference_amplitude, reference_amplitude_type, ka12_energy_diff=None, position_uncertainty=np.nan, intrinsic_sigma=0, reference_measurement_type=None, is_default_material=True, allow_replacement=True) classmethod

Add a new SpectralLine to the mass2.fluorescence_lines.spectra dictionary, and as a variable in this module.

Source code in mass2/calibration/fluorescence_lines.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
@classmethod
def addline(  # noqa: PLR0917
    cls,
    element: str,
    linetype: str,
    material: str,
    reference_short: str,
    reference_plot_instrument_gaussian_fwhm: float | None,
    nominal_peak_energy: float,
    energies: ArrayLike,
    lorentzian_fwhm: ArrayLike,
    reference_amplitude: ArrayLike,
    reference_amplitude_type: AmplitudeType,
    ka12_energy_diff: float | None = None,
    position_uncertainty: float = np.nan,
    intrinsic_sigma: float = 0,
    reference_measurement_type: str | None = None,
    is_default_material: bool = True,
    allow_replacement: bool = True,
) -> "SpectralLine":
    """Add a new SpectralLine to the `mass2.fluorescence_lines.spectra` dictionary, and as a variable in this module."""
    # require exactly one method of specifying the amplitude of each component
    assert reference_amplitude_type in {
        AmplitudeType.LORENTZIAN_PEAK_HEIGHT,
        AmplitudeType.LORENTZIAN_INTEGRAL_INTENSITY,
        AmplitudeType.VOIGT_PEAK_HEIGHT,
    }
    # require the reference exists in lineshape_references
    assert reference_short in lineshape_references

    # require kalpha lines to have ka12_energy_diff
    if linetype.startswith("KAlpha") and ka12_energy_diff is not None:
        ka12_energy_diff = float(ka12_energy_diff)
    # require reference_plot_instrument_gaussian_fwhm to be a float or None
    assert reference_plot_instrument_gaussian_fwhm is None or isinstance(reference_plot_instrument_gaussian_fwhm, float)

    line = cls(
        element=element,
        material=material,
        linetype=linetype,
        nominal_peak_energy=float(nominal_peak_energy),
        energies=np.array(energies),
        lorentzian_fwhm=np.array(lorentzian_fwhm),
        reference_amplitude=np.array(reference_amplitude),
        reference_amplitude_type=reference_amplitude_type,
        reference_measurement_type=reference_measurement_type,
        intrinsic_sigma=intrinsic_sigma,
        reference_plot_instrument_gaussian_fwhm=reference_plot_instrument_gaussian_fwhm,
        reference_short=reference_short,
        position_uncertainty=float(position_uncertainty),
        is_default_material=is_default_material,
    )
    name = line.shortname
    if name in spectra.keys() and (not allow_replacement):
        raise ValueError(f"spectrum {name} already exists")

    # Add this SpectralLine to spectra dict AND make it be a variable in the module
    spectra[name] = line
    globals()[name] = line
    return line

components(x, instrument_gaussian_fwhm)

List of spectrum components as a function of , the energy in eV

Source code in mass2/calibration/fluorescence_lines.py
166
167
168
169
170
171
172
173
def components(self, x: ArrayLike, instrument_gaussian_fwhm: float) -> list[NDArray]:
    """List of spectrum components as a function of <x>, the energy in eV"""
    gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
    x = np.asarray(x, dtype=float)
    components = []
    for energy, fwhm, ampl in zip(self.energies, self.lorentzian_fwhm, self.lorentzian_integral_intensity):
        components.append(ampl * voigt(x, energy, hwhm=fwhm * 0.5, sigma=gaussian_sigma))
    return components

fitter()

Generate a GenericLineModel instance for fitting this SpectralLine.

Source code in mass2/calibration/fluorescence_lines.py
279
280
281
282
283
284
def fitter(self) -> GenericLineModel:
    """Generate a GenericLineModel instance for fitting this SpectralLine."""
    fitter_class = GenericLineModel
    f = fitter_class(self)
    f.name = f"{self.element}{self.linetype}"
    return f

minimum_fwhm(instrument_gaussian_fwhm)

for the narrowest lorentzian in the line model, calculate the combined fwhm including the lorentzian, intrinstic_sigma, and instrument_gaussian_fwhm

Source code in mass2/calibration/fluorescence_lines.py
286
287
288
289
290
def minimum_fwhm(self, instrument_gaussian_fwhm: float) -> float:
    """for the narrowest lorentzian in the line model, calculate the combined fwhm including
    the lorentzian, intrinstic_sigma, and instrument_gaussian_fwhm"""
    fwhm2 = np.amin(self.lorentzian_fwhm) ** 2 + instrument_gaussian_fwhm**2 + (self.intrinsic_sigma * FWHM_OVER_SIGMA) ** 2
    return np.sqrt(fwhm2)

model(has_linear_background=True, has_tails=False, prefix='', qemodel=None)

Generate a LineModel instance from a SpectralLine

Source code in mass2/calibration/fluorescence_lines.py
268
269
270
271
272
273
274
275
276
277
def model(
    self, has_linear_background: bool = True, has_tails: bool = False, prefix: str = "", qemodel: Callable | None = None
) -> GenericLineModel:
    """Generate a LineModel instance from a SpectralLine"""
    model_class = GenericLineModel
    name = f"{self.element}{self.linetype}"
    m = model_class(
        name=name, spect=self, has_linear_background=has_linear_background, has_tails=has_tails, prefix=prefix, qemodel=qemodel
    )
    return m

pdf(x, instrument_gaussian_fwhm)

Spectrum (units of fraction per eV) as a function of , the energy in eV

Source code in mass2/calibration/fluorescence_lines.py
156
157
158
159
160
161
162
163
164
def pdf(self, x: ArrayLike, instrument_gaussian_fwhm: float) -> NDArray:
    """Spectrum (units of fraction per eV) as a function of <x>, the energy in eV"""
    gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
    x = np.asarray(x, dtype=float)
    result = np.zeros_like(x)
    for energy, fwhm, ampl in zip(self.energies, self.lorentzian_fwhm, self.normalized_lorentzian_integral_intensity):
        result += ampl * voigt(x, energy, hwhm=fwhm * 0.5, sigma=gaussian_sigma)
        # mass2.voigt() is normalized to have unit integrated intensity
    return result

plot(x=None, instrument_gaussian_fwhm=0, axis=None, components=True, label=None, setylim=True, color=None)

Plot the spectrum. x - np array of energy in eV to plot at (sensible default) axis - axis to plot on (default creates new figure) components - True plots each voigt component in addition to the spectrum label - a string to label the plot with (optional)

Source code in mass2/calibration/fluorescence_lines.py
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
def plot(
    self,
    x: ArrayLike | None = None,
    instrument_gaussian_fwhm: float = 0,
    axis: plt.Axes | None = None,
    components: bool = True,
    label: str | None = None,
    setylim: bool = True,
    color: str | None = None,
) -> plt.Axes:
    """Plot the spectrum.
    x - np array of energy in eV to plot at (sensible default)
    axis - axis to plot on (default creates new figure)
    components - True plots each voigt component in addition to the spectrum
    label - a string to label the plot with (optional)"""
    gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
    if x is None:
        width = max(2 * gaussian_sigma, 3 * float(np.amax(self.lorentzian_fwhm)))
        lo = np.amin(self.energies) - width
        hi = np.amax(self.energies) + width
        x = np.linspace(lo, hi, 500)
    x = np.asarray(x)
    if axis is None:
        plt.figure()
        axis = plt.gca()
    if components:
        for component in self.components(x, instrument_gaussian_fwhm):
            axis.plot(x, component, "--")
    pdf = self.pdf(x, instrument_gaussian_fwhm)
    axis.plot(x, pdf, lw=2, label=label, color=color)
    axis.set_xlabel("Energy (eV)")
    axis.set_ylabel(f"Counts per {float(x[1] - x[0]):.2} eV bin")
    axis.set_xlim(x[0], x[-1])
    if setylim:
        axis.set_ylim(0, np.amax(pdf) * 1.05)
    axis.set_title(f"{self.shortname} with resolution {instrument_gaussian_fwhm:.2f} eV FWHM")
    return axis

plot_like_reference(axis=None)

Plot the spectrum to match the instrument resolution used in the reference data publication, if known.

Source code in mass2/calibration/fluorescence_lines.py
213
214
215
216
217
218
219
220
def plot_like_reference(self, axis: plt.Axes | None = None) -> plt.Axes:
    """Plot the spectrum to match the instrument resolution used in the reference data publication, if known."""
    if self.reference_plot_instrument_gaussian_fwhm is None:
        fwhm = 0.001
    else:
        fwhm = self.reference_plot_instrument_gaussian_fwhm
    axis = self.plot(axis=axis, instrument_gaussian_fwhm=fwhm)
    return axis

quick_monochromatic_line(name, energy, lorentzian_fwhm, intrinsic_sigma=0.0) classmethod

Create a quick monochromatic line. Intended for use in calibration when we know a line energy, but not a lineshape model. Returns and instrance of SpectralLine with most fields having contents like "unknown: quick_line". The line will have a single lorentzian element with the given energy, fwhm, and intrinsic_sigma values.

Source code in mass2/calibration/fluorescence_lines.py
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
@classmethod
def quick_monochromatic_line(
    cls, name: str, energy: float, lorentzian_fwhm: float, intrinsic_sigma: float = 0.0
) -> "SpectralLine":
    """
    Create a quick monochromatic line. Intended for use in calibration when we know a line energy, but not a lineshape model.
    Returns and instrance of SpectralLine with most fields having contents like "unknown: quick_line". The line will have
    a single lorentzian element with the given energy, fwhm, and intrinsic_sigma values.
    """
    energy = float(energy)
    element = name
    material = "unknown: quick_line"
    if lorentzian_fwhm <= 0 and intrinsic_sigma <= 1e-6:
        intrinsic_sigma = 1e-6
    linetype = "Gaussian"
    reference_short = "unknown: quick_line"
    reference_amplitude = np.array([1.0])
    reference_amplitude_type = AmplitudeType.LORENTZIAN_INTEGRAL_INTENSITY
    nominal_peak_energy = energy
    position_uncertainty = 0.0
    reference_measurement_type = "unkown: quick_line"
    return cls(
        element=element,
        material=material,
        linetype=linetype,
        energies=np.array([energy]),
        lorentzian_fwhm=np.array([lorentzian_fwhm]),
        intrinsic_sigma=intrinsic_sigma,
        reference_short=reference_short,
        reference_amplitude=reference_amplitude,
        reference_amplitude_type=reference_amplitude_type,
        nominal_peak_energy=nominal_peak_energy,
        position_uncertainty=position_uncertainty,
        reference_measurement_type=reference_measurement_type,
    )

rvs(size, instrument_gaussian_fwhm, rng=None)

The CDF and PPF (cumulative distribution and percentile point functions) are hard to compute. But it's easy enough to generate the random variates themselves, so we override that method.

Source code in mass2/calibration/fluorescence_lines.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def rvs(self, size: int | tuple[int] | None, instrument_gaussian_fwhm: float, rng: np.random.Generator | None = None) -> NDArray:
    """The CDF and PPF (cumulative distribution and percentile point functions) are hard to
    compute.  But it's easy enough to generate the random variates themselves, so we
    override that method."""
    if rng is None:
        rng = _rng
    gaussian_sigma = self._gaussian_sigma(instrument_gaussian_fwhm)
    # Choose from among the N Lorentzian lines in proportion to the line amplitudes
    iline = self.cumulative_amplitudes.searchsorted(rng.uniform(0, self.cumulative_amplitudes[-1], size=size))
    # Choose Lorentzian variates of the appropriate width (but centered on 0)
    lor = rng.standard_cauchy(size=size) * self.lorentzian_fwhm[iline] * 0.5
    # If necessary, add a Gaussian variate to mimic finite resolution
    if gaussian_sigma > 0.0:
        lor += rng.standard_normal(size=size) * gaussian_sigma
    # Finally, add the line centers.
    results = lor + self.energies[iline]
    # We must check for non-positive results and replace them by recursive call
    # to self.rvs().
    not_positive = results <= 0.0
    if np.any(not_positive):
        Nbad = not_positive.sum()
        results[not_positive] = self.rvs(size=Nbad, instrument_gaussian_fwhm=instrument_gaussian_fwhm)
    return results

LineEnergies()

A dictionary to know a lot of x-ray fluorescence line energies, based on Deslattes' database.

It is built on facts from mass2.calibration.nist_xray_database module.

It is a dictionary from peak name to energy, with several alternate names for the lines:

E = Energies() print E["MnKAlpha"] print E["MnKAlpha"], E["MnKA"], E["MnKA1"], E["MnKL3"]

Source code in mass2/calibration/fluorescence_lines.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def LineEnergies() -> dict[str, float]:
    """
    A dictionary to know a lot of x-ray fluorescence line energies, based on Deslattes' database.

    It is built on facts from mass2.calibration.nist_xray_database module.

    It is a dictionary from peak name to energy, with several alternate names
    for the lines:

    E = Energies()
    print E["MnKAlpha"]
    print E["MnKAlpha"], E["MnKA"], E["MnKA1"], E["MnKL3"]
    """
    db = NISTXrayDBFile()
    alternate_line_names = {v: k for (k, v) in db.LINE_NICKNAMES.items()}
    data = {}

    for fullname, L in db.lines.items():
        element, linename = fullname.split(" ", 1)
        allnames = [linename]
        if linename in alternate_line_names:
            siegbahn_linename = alternate_line_names[linename]
            long_linename = siegbahn_linename.replace("A", "Alpha").replace("B", "Beta").replace("G", "Gamma")

            allnames.append(siegbahn_linename)
            allnames.append(long_linename)

            if siegbahn_linename.endswith("1"):
                allnames.append(siegbahn_linename[:-1])
                allnames.append(long_linename[:-1])

        for name in allnames:
            key = "".join((element, name))
            data[key] = L.peak

    return data

plot_all_spectra(maxplots=10)

Makes plots showing the line shape and component parts for some lines. Intended to replicate plots in the literature giving spectral lineshapes.

Source code in mass2/calibration/fluorescence_lines.py
1632
1633
1634
1635
1636
1637
1638
def plot_all_spectra(maxplots: int = 10) -> None:
    """Makes plots showing the line shape and component parts for some lines.
    Intended to replicate plots in the literature giving spectral lineshapes."""
    keys = list(spectra.keys())[:maxplots]
    for name in keys:
        spectrum = spectra[name]
        spectrum.plot_like_reference()

Implements MLEModel, CompositeMLEModel, GenericLineModel

CompositeMLEModel

Bases: MLEModel, CompositeModel

A version of lmfit.CompositeModel that uses Maximum Likelihood weights in place of chisq, as described in: doi:10.1007/s10909-014-1098-4 "Maximum-Likelihood Fits to Histograms for Improved Parameter Estimation"

Source code in mass2/calibration/line_models.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
class CompositeMLEModel(MLEModel, lmfit.CompositeModel):
    """A version of lmfit.CompositeModel that uses Maximum Likelihood weights
    in place of chisq, as described in: doi:10.1007/s10909-014-1098-4
    "Maximum-Likelihood Fits to Histograms for Improved Parameter Estimation"
    """

    def _residual(self, params: lmfit.Parameters, data: NDArray | None, weights: NDArray | None, **kwargs: Any) -> NDArray:
        """Calculate the chi_MLE^2 value from Joe Fowler's Paper
        doi:10.1007/s10909-014-1098-4 Maximum-Likelihood Fits to Histograms for Improved Parameter Estimation
        """
        y = self.eval(params, **kwargs)
        if data is None:
            return y
        r2 = y - data
        nonzero = data > 0
        r2[nonzero] += data[nonzero] * np.log((data / y)[nonzero])
        vals = (2 * r2) ** 0.5
        vals[y < data] *= -1
        return vals

    def __add__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Sum of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.add)

    def __sub__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Difference of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.sub)

    def __mul__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Product of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.mul)

    def __truediv__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Ratio of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.truediv)

__add__(other)

Sum of two models

Source code in mass2/calibration/line_models.py
236
237
238
def __add__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Sum of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.add)

__mul__(other)

Product of two models

Source code in mass2/calibration/line_models.py
244
245
246
def __mul__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Product of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.mul)

__sub__(other)

Difference of two models

Source code in mass2/calibration/line_models.py
240
241
242
def __sub__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Difference of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.sub)

__truediv__(other)

Ratio of two models

Source code in mass2/calibration/line_models.py
248
249
250
def __truediv__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Ratio of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.truediv)

GenericLineModel

Bases: MLEModel

A generic line model for fitting spectral lines.

Source code in mass2/calibration/line_models.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
class GenericLineModel(MLEModel):
    """A generic line model for fitting spectral lines."""

    def __init__(
        self,
        spect: "SpectralLine",
        independent_vars: list[str] = ["bin_centers"],
        prefix: str = "",
        nan_policy: str = "raise",
        has_linear_background: bool = True,
        has_tails: bool = False,
        qemodel: Callable | None = None,
        **kwargs: Any,
    ):
        """Initialize a GenericLineModel

        Parameters
        ----------
        spect : SpectralLine
            The line or feature to be modeled
        independent_vars : list[str], optional
            List of independent variable names, by default ["bin_centers"]
        prefix : str, optional
            Model, by default ""
        nan_policy : str, optional
            How to handle NaN results in the computed spectrum, by default "raise"
        has_linear_background : bool, optional
            Whether the background model can have a nonzero slope, by default True
        has_tails : bool, optional
            Whether exponential tails are included in the model, by default False
        qemodel : Callable | None, optional
            A model for the quantum efficiency (which changes the expected line shape), by default None

        Returns
        -------
        GenericLineModel
            The initialized model

        Raises
        ------
        ValueError
            If the spectral model produces negative or NaN values
        """
        self.spect = spect
        self._has_tails = has_tails
        self._has_linear_background = has_linear_background

        param_names = ["fwhm", "peak_ph", "dph_de", "integral"]
        if self._has_linear_background:
            param_names += ["background", "bg_slope"]
        if self._has_tails:
            param_names += ["tail_frac", "tail_tau", "tail_share_hi", "tail_tau_hi"]
        kwargs.update({"prefix": prefix, "nan_policy": nan_policy, "independent_vars": independent_vars, "param_names": param_names})

        if has_tails:

            def modelfunctails(  # noqa: PLR0917
                bin_centers: ArrayLike,
                fwhm: float,
                peak_ph: float,
                dph_de: float,
                integral: float,
                background: float = 0,
                bg_slope: float = 0,
                tail_frac: float = 0,
                tail_tau: float = 8,
                tail_share_hi: float = 0,
                tail_tau_hi: float = 8,
            ) -> NDArray:
                bin_centers = np.asarray(bin_centers, dtype=float)
                bin_width = bin_centers[1] - bin_centers[0]
                energy = (bin_centers - peak_ph) / dph_de + self.spect.peak_energy

                def cleanspectrum_fn(x: ArrayLike) -> NDArray:
                    return self.spect.pdf(x, instrument_gaussian_fwhm=fwhm)

                # tail_tau* is in energy units but has to be converted to the same units as `bin_centers`
                tail_arbs_lo = tail_tau * dph_de
                tail_arbs_hi = tail_tau_hi * dph_de
                spectrum = _smear_exponential_tail(
                    cleanspectrum_fn, energy, fwhm, tail_frac, tail_arbs_lo, tail_share_hi, tail_arbs_hi
                )
                scale_factor = integral * bin_width * dph_de
                r = _scale_add_bg(spectrum, scale_factor, background, bg_slope)
                if any(np.isnan(r)) or any(r < 0):
                    raise ValueError("some entry in r is nan or negative")
                if qemodel is None:
                    return r
                return r * qemodel(energy)

            super().__init__(modelfunctails, **kwargs)

        else:

            def modelfunc(
                bin_centers: ArrayLike,
                fwhm: float,
                peak_ph: float,
                dph_de: float,
                integral: float,
                background: float = 0,
                bg_slope: float = 0,
            ) -> NDArray:
                bin_centers = np.asarray(bin_centers, dtype=float)
                bin_width = bin_centers[1] - bin_centers[0]
                energy = (bin_centers - peak_ph) / dph_de + self.spect.peak_energy
                spectrum = self.spect.pdf(energy, fwhm)
                scale_factor = integral * bin_width / dph_de
                r = _scale_add_bg(spectrum, scale_factor, background, bg_slope)
                if any(np.isnan(r)) or any(r < 0):
                    raise ValueError("some entry in r is nan or negative")
                if qemodel is None:
                    return r
                return r * qemodel(energy)

            super().__init__(modelfunc, **kwargs)

        self._set_paramhints_prefix()

    def _set_paramhints_prefix(self) -> None:
        """Set parameter hints with reasonable initial values and bounds."""
        nominal_peak_energy = self.spect.nominal_peak_energy
        self.set_param_hint("fwhm", value=nominal_peak_energy / 1000, min=nominal_peak_energy / 10000, max=nominal_peak_energy)
        self.set_param_hint("peak_ph", value=nominal_peak_energy, min=0)
        self.set_param_hint("dph_de", value=1, min=0.01, max=100)
        self.set_param_hint("integral", value=100, min=0)
        if self._has_linear_background:
            self.set_param_hint("background", value=1, min=0)
            self.set_param_hint("bg_slope", value=0, vary=False)
        if self._has_tails:
            self.set_param_hint("tail_frac", value=0.05, min=0, max=1, vary=True)
            self.set_param_hint("tail_tau", value=nominal_peak_energy / 200, min=0, max=nominal_peak_energy / 10, vary=True)
            self.set_param_hint("tail_share_hi", value=0, min=0, max=1, vary=False)
            self.set_param_hint("tail_tau_hi", value=nominal_peak_energy / 200, min=0, max=nominal_peak_energy / 10, vary=False)

    def guess(self, data: ArrayLike, bin_centers: ArrayLike, dph_de: float, **kwargs: Any) -> lmfit.Parameters:
        "Guess values for the peak_ph, integral, and background."
        data = np.asarray(data)
        bin_centers = np.asarray(bin_centers)
        order_stat = np.array(data.cumsum(), dtype=float) / data.sum()

        def percentiles(p: float) -> NDArray:
            """Find the p-th percentile of the data using histograms."""
            return bin_centers[(order_stat > p).argmax()]

        fwhm_arb = 0.7 * (percentiles(0.75) - percentiles(0.25))
        peak_ph = bin_centers[data.argmax()]
        if len(data) > 20:
            # Ensure baseline guess > 0 (see Issue #152). Guess at least 1 background across all bins
            baseline = max(data[0:10].mean(), 1.0 / len(data))
        else:
            baseline = 0.1
        tcounts_above_bg = data.sum() - baseline * len(data)
        if tcounts_above_bg < 0:
            tcounts_above_bg = data.sum()  # lets avoid negative estimates for the integral
        pars = self.make_params(peak_ph=peak_ph, background=baseline, integral=tcounts_above_bg, fwhm=fwhm_arb / dph_de, dph_de=dph_de)
        return lmfit.models.update_param_vals(pars, self.prefix, **kwargs)

__init__(spect, independent_vars=['bin_centers'], prefix='', nan_policy='raise', has_linear_background=True, has_tails=False, qemodel=None, **kwargs)

Initialize a GenericLineModel

Parameters:
  • spect (SpectralLine) –

    The line or feature to be modeled

  • independent_vars (list[str], default: ['bin_centers'] ) –

    List of independent variable names, by default ["bin_centers"]

  • prefix (str, default: '' ) –

    Model, by default ""

  • nan_policy (str, default: 'raise' ) –

    How to handle NaN results in the computed spectrum, by default "raise"

  • has_linear_background (bool, default: True ) –

    Whether the background model can have a nonzero slope, by default True

  • has_tails (bool, default: False ) –

    Whether exponential tails are included in the model, by default False

  • qemodel (Callable | None, default: None ) –

    A model for the quantum efficiency (which changes the expected line shape), by default None

Returns:
Raises:
  • ValueError

    If the spectral model produces negative or NaN values

Source code in mass2/calibration/line_models.py
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
def __init__(
    self,
    spect: "SpectralLine",
    independent_vars: list[str] = ["bin_centers"],
    prefix: str = "",
    nan_policy: str = "raise",
    has_linear_background: bool = True,
    has_tails: bool = False,
    qemodel: Callable | None = None,
    **kwargs: Any,
):
    """Initialize a GenericLineModel

    Parameters
    ----------
    spect : SpectralLine
        The line or feature to be modeled
    independent_vars : list[str], optional
        List of independent variable names, by default ["bin_centers"]
    prefix : str, optional
        Model, by default ""
    nan_policy : str, optional
        How to handle NaN results in the computed spectrum, by default "raise"
    has_linear_background : bool, optional
        Whether the background model can have a nonzero slope, by default True
    has_tails : bool, optional
        Whether exponential tails are included in the model, by default False
    qemodel : Callable | None, optional
        A model for the quantum efficiency (which changes the expected line shape), by default None

    Returns
    -------
    GenericLineModel
        The initialized model

    Raises
    ------
    ValueError
        If the spectral model produces negative or NaN values
    """
    self.spect = spect
    self._has_tails = has_tails
    self._has_linear_background = has_linear_background

    param_names = ["fwhm", "peak_ph", "dph_de", "integral"]
    if self._has_linear_background:
        param_names += ["background", "bg_slope"]
    if self._has_tails:
        param_names += ["tail_frac", "tail_tau", "tail_share_hi", "tail_tau_hi"]
    kwargs.update({"prefix": prefix, "nan_policy": nan_policy, "independent_vars": independent_vars, "param_names": param_names})

    if has_tails:

        def modelfunctails(  # noqa: PLR0917
            bin_centers: ArrayLike,
            fwhm: float,
            peak_ph: float,
            dph_de: float,
            integral: float,
            background: float = 0,
            bg_slope: float = 0,
            tail_frac: float = 0,
            tail_tau: float = 8,
            tail_share_hi: float = 0,
            tail_tau_hi: float = 8,
        ) -> NDArray:
            bin_centers = np.asarray(bin_centers, dtype=float)
            bin_width = bin_centers[1] - bin_centers[0]
            energy = (bin_centers - peak_ph) / dph_de + self.spect.peak_energy

            def cleanspectrum_fn(x: ArrayLike) -> NDArray:
                return self.spect.pdf(x, instrument_gaussian_fwhm=fwhm)

            # tail_tau* is in energy units but has to be converted to the same units as `bin_centers`
            tail_arbs_lo = tail_tau * dph_de
            tail_arbs_hi = tail_tau_hi * dph_de
            spectrum = _smear_exponential_tail(
                cleanspectrum_fn, energy, fwhm, tail_frac, tail_arbs_lo, tail_share_hi, tail_arbs_hi
            )
            scale_factor = integral * bin_width * dph_de
            r = _scale_add_bg(spectrum, scale_factor, background, bg_slope)
            if any(np.isnan(r)) or any(r < 0):
                raise ValueError("some entry in r is nan or negative")
            if qemodel is None:
                return r
            return r * qemodel(energy)

        super().__init__(modelfunctails, **kwargs)

    else:

        def modelfunc(
            bin_centers: ArrayLike,
            fwhm: float,
            peak_ph: float,
            dph_de: float,
            integral: float,
            background: float = 0,
            bg_slope: float = 0,
        ) -> NDArray:
            bin_centers = np.asarray(bin_centers, dtype=float)
            bin_width = bin_centers[1] - bin_centers[0]
            energy = (bin_centers - peak_ph) / dph_de + self.spect.peak_energy
            spectrum = self.spect.pdf(energy, fwhm)
            scale_factor = integral * bin_width / dph_de
            r = _scale_add_bg(spectrum, scale_factor, background, bg_slope)
            if any(np.isnan(r)) or any(r < 0):
                raise ValueError("some entry in r is nan or negative")
            if qemodel is None:
                return r
            return r * qemodel(energy)

        super().__init__(modelfunc, **kwargs)

    self._set_paramhints_prefix()

guess(data, bin_centers, dph_de, **kwargs)

Guess values for the peak_ph, integral, and background.

Source code in mass2/calibration/line_models.py
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
def guess(self, data: ArrayLike, bin_centers: ArrayLike, dph_de: float, **kwargs: Any) -> lmfit.Parameters:
    "Guess values for the peak_ph, integral, and background."
    data = np.asarray(data)
    bin_centers = np.asarray(bin_centers)
    order_stat = np.array(data.cumsum(), dtype=float) / data.sum()

    def percentiles(p: float) -> NDArray:
        """Find the p-th percentile of the data using histograms."""
        return bin_centers[(order_stat > p).argmax()]

    fwhm_arb = 0.7 * (percentiles(0.75) - percentiles(0.25))
    peak_ph = bin_centers[data.argmax()]
    if len(data) > 20:
        # Ensure baseline guess > 0 (see Issue #152). Guess at least 1 background across all bins
        baseline = max(data[0:10].mean(), 1.0 / len(data))
    else:
        baseline = 0.1
    tcounts_above_bg = data.sum() - baseline * len(data)
    if tcounts_above_bg < 0:
        tcounts_above_bg = data.sum()  # lets avoid negative estimates for the integral
    pars = self.make_params(peak_ph=peak_ph, background=baseline, integral=tcounts_above_bg, fwhm=fwhm_arb / dph_de, dph_de=dph_de)
    return lmfit.models.update_param_vals(pars, self.prefix, **kwargs)

LineModelResult

Bases: ModelResult

like lmfit.model.Model result, but with some convenient plotting functions for line spectra fits

Source code in mass2/calibration/line_models.py
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
class LineModelResult(lmfit.model.ModelResult):
    """like lmfit.model.Model result, but with some convenient plotting functions for line spectra fits"""

    def _compact_fit_report(self) -> str:
        """A compact fit report suitable for annotating a plot"""
        s = ""
        sn = {"background": "bg", "integral": "intgrl", "bg_slope": "bg_slp"}
        for k in sorted(self.params.keys()):
            v = self.params[k]
            if v.vary:
                if v.stderr is None:
                    sig_figs = 2
                    s += f"{sn.get(k, k):7} {v.value:.{sig_figs}g}±None\n"
                else:
                    sig_figs = int(np.ceil(np.log10(np.abs(v.value / v.stderr))) + 1)
                    sig_figs = max(1, sig_figs)
                    s += f"{sn.get(k, k):7} {v.value:.{sig_figs}g}±{v.stderr:.2g}\n"
            else:
                sig_figs = 2
                s += f"{sn.get(k, k):7} {v.value:.{sig_figs}g} HELD\n"
        s += f"redchi  {self.redchi:.2g}"
        return s

    def plotm(
        self, ax: plt.Axes | None = None, title: str | None = None, xlabel: str | None = None, ylabel: str | None = None
    ) -> None:
        """plot the data, the fit, and annotate the plot with the parameters"""
        title, xlabel, ylabel = self._handle_default_labels(title, xlabel, ylabel)
        if ax is None:
            plt.figure()
            ax = plt.gca()
        ax = lmfit.model.ModelResult.plot_fit(self, ax=ax, xlabel=xlabel, ylabel=ylabel)
        if title is not None:
            plt.title(title)
        ax.text(
            0.05,
            0.95,
            self._compact_fit_report(),
            transform=ax.transAxes,
            verticalalignment="top",
            bbox=dict(facecolor="w", alpha=0.5),
            family="monospace",
        )
        # ax.legend(["data", self._compact_fit_report()],loc='best', frameon=True, framealpha = 0.5)
        ax.legend(loc="upper right")

    def set_label_hints(
        self, binsize: float, ds_shortname: str, attr_str: str, unit_str: str, cut_hint: str, states_hint: str = ""
    ) -> None:
        """Set hints for axis labels and title for plotm()."""
        self._binsize = binsize
        self._ds_shortname = ds_shortname
        self._attr_str = attr_str
        self._unit_str = unit_str
        self._cut_hint = cut_hint
        self._states_hint = states_hint
        self._has_label_hints = True

    def _handle_default_labels(self, title: str | None, xlabel: str | None, ylabel: str | None) -> tuple[str, str, str]:
        """Handle default labels for plotm()."""
        if hasattr(self, "_has_label_hints"):
            if title is None:
                title = f"{self._ds_shortname}: {self.model.spect.shortname}"
            if ylabel is None:
                ylabel = f"counts per {self._binsize:g} {self._unit_str} bin"
                if len(self._states_hint) > 0:
                    ylabel += f"\nstates={self._states_hint}: {self._cut_hint}"
            if xlabel is None:
                xlabel = f"{self._attr_str} ({self._unit_str})"
        elif ylabel is None and "bin_centers" in self.userkws:
            binsize = self.userkws["bin_centers"][1] - self.userkws["bin_centers"][0]
            ylabel = f"counts per {binsize:g} unit bin"
        if title is None:
            title = ""
        if xlabel is None:
            xlabel = ""
        if ylabel is None:
            ylabel = ""
        return title, xlabel, ylabel

    def _validate_bins_per_fwhm(self, minimum_bins_per_fwhm: float) -> None:
        """Validate that the bin size is small enough compared to the fitted FWHM to prevent approximation problems."""
        if "bin_centers" not in self.userkws:
            return  # i guess someone used this for a non histogram fit
        if not VALIDATE_BIN_SIZE:
            return
        bin_centers = self.userkws["bin_centers"]
        bin_size = bin_centers[1] - bin_centers[0]
        for iComp in self.components:
            prefix = iComp.prefix
            dphde = f"{prefix}dph_de"
            fwhm = f"{prefix}fwhm"
            if (dphde in self.params) and (fwhm in self.params):
                bin_size_energy = bin_size / self.params[dphde]
                instrument_gaussian_fwhm = self.params[fwhm].value
                minimum_fwhm_energy = iComp.spect.minimum_fwhm(instrument_gaussian_fwhm)
                bins_per_fwhm = minimum_fwhm_energy / bin_size_energy
                if bins_per_fwhm < minimum_bins_per_fwhm:
                    msg = f"""bins are too large.
Bin size (energy units) = {bin_size_energy:.3g}, fit FWHM (energy units) = {instrument_gaussian_fwhm:.3g}
Minimum FWHM accounting for narrowest Lorentzian in spectrum (energy units) = {minimum_fwhm_energy:.3g}
Bins per FWHM = {bins_per_fwhm:.3g}, Minimum Bins per FWHM = {minimum_bins_per_fwhm:.3g}
To avoid this error:
1. use smaller bins, or
2. pass a smaller value of `minimum_bins_per_fwhm` to .fit, or
3. set `mass2.calibration.line_models.VALIDATE_BIN_SIZE = False`.
See https://github.com/usnistgov/mass/issues/162 for discussion on this issue"""
                    raise ValueError(msg)

plotm(ax=None, title=None, xlabel=None, ylabel=None)

plot the data, the fit, and annotate the plot with the parameters

Source code in mass2/calibration/line_models.py
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
def plotm(
    self, ax: plt.Axes | None = None, title: str | None = None, xlabel: str | None = None, ylabel: str | None = None
) -> None:
    """plot the data, the fit, and annotate the plot with the parameters"""
    title, xlabel, ylabel = self._handle_default_labels(title, xlabel, ylabel)
    if ax is None:
        plt.figure()
        ax = plt.gca()
    ax = lmfit.model.ModelResult.plot_fit(self, ax=ax, xlabel=xlabel, ylabel=ylabel)
    if title is not None:
        plt.title(title)
    ax.text(
        0.05,
        0.95,
        self._compact_fit_report(),
        transform=ax.transAxes,
        verticalalignment="top",
        bbox=dict(facecolor="w", alpha=0.5),
        family="monospace",
    )
    # ax.legend(["data", self._compact_fit_report()],loc='best', frameon=True, framealpha = 0.5)
    ax.legend(loc="upper right")

set_label_hints(binsize, ds_shortname, attr_str, unit_str, cut_hint, states_hint='')

Set hints for axis labels and title for plotm().

Source code in mass2/calibration/line_models.py
458
459
460
461
462
463
464
465
466
467
468
def set_label_hints(
    self, binsize: float, ds_shortname: str, attr_str: str, unit_str: str, cut_hint: str, states_hint: str = ""
) -> None:
    """Set hints for axis labels and title for plotm()."""
    self._binsize = binsize
    self._ds_shortname = ds_shortname
    self._attr_str = attr_str
    self._unit_str = unit_str
    self._cut_hint = cut_hint
    self._states_hint = states_hint
    self._has_label_hints = True

MLEModel

Bases: Model

A version of lmfit.Model that uses Maximum Likelihood weights in place of chisq, as described in: doi:10.1007/s10909-014-1098-4 "Maximum-Likelihood Fits to Histograms for Improved Parameter Estimation"

Source code in mass2/calibration/line_models.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
class MLEModel(lmfit.Model):
    """A version of lmfit.Model that uses Maximum Likelihood weights
    in place of chisq, as described in: doi:10.1007/s10909-014-1098-4
    "Maximum-Likelihood Fits to Histograms for Improved Parameter Estimation"
    """

    def _residual(self, params: lmfit.Parameters, data: NDArray | None, weights: NDArray | None, **kwargs: Any) -> NDArray:
        """Calculate the chi_MLE^2 value from Joe Fowler's Paper
        doi:10.1007/s10909-014-1098-4 "Maximum-Likelihood Fits to Histograms for Improved Parameter Estimation"
        """
        y = self.eval(params, **kwargs)
        if data is None:
            return y
        r2 = y - data
        nonzero = data > 0
        r2[nonzero] += data[nonzero] * np.log((data / y)[nonzero])
        # points that are zero do not effect the chisq value, so should not
        # be inlcuded in the calculate on ndegrees of freedome, and therefore reduced chisq
        # GCO tried setting self.ndata here, but it doesn't persist
        # not clear how to calculate reduced chisq correctly

        # Calculate the sqrt(2*r2) in place into vals.
        # The mask for r2>0 avoids the problem found in MASS issue #217.
        vals = np.zeros_like(r2)
        nonneg = r2 > 0
        vals[nonneg] = np.sqrt(2 * r2[nonneg])
        vals[y < data] *= -1
        return vals

    def __repr__(self) -> str:
        """Return representation of Model."""
        return f"<{type(self).__name__}: {self.name}>"

    def _reprstring(self, long: bool = False) -> str:
        """Return a longer string representation of Model, with its options."""
        out = self._name
        opts = []
        if len(self._prefix) > 0:
            opts.append(f"prefix='{self._prefix}'")
        if long:
            for k, v in self.opts.items():
                opts.append(f"{k}='{v}'")
        if len(opts) > 0:
            out = "{}, {}".format(out, ", ".join(opts))
        return f"{type(self).__name__}({out})"

    def __add__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Sum of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.add)

    def __sub__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Difference of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.sub)

    def __mul__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Product of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.mul)

    def __truediv__(self, other: lmfit.Model) -> "CompositeMLEModel":
        """Ratio of two models"""
        return CompositeMLEModel(self, other, lmfit.model.operator.truediv)

    def fit(self, *args: Any, minimum_bins_per_fwhm: float | None = 3, **kwargs: Any) -> "LineModelResult":
        """as lmfit.Model.fit except
        1. the default method is "least_squares because it gives error bars more often at 1.5-2.0X speed penalty
        2. supports "leastsq_refit" which uses "leastsq" to fit, but if there are no error bars, refits with
        "least_squares" call result.set_label_hints(...) then result.plotm() for a nice plot.
        """
        if "method" not in kwargs:
            # change default method
            kwargs["method"] = "least_squares"
            # least_squares always gives uncertainties, while the normal default leastsq often does not
            # leastsq fails to give uncertaities if parameters are near bounds or at their initial value
            # least_squares is about 1.5X to 2.0X slower based on two test case
        if minimum_bins_per_fwhm is None:
            minimum_bins_per_fwhm = 3  # provide default value
        if "weights" in kwargs and kwargs["weights"] is not None:
            msg = "MLEModel assumes Poisson-distributed data; cannot use weights other than None"
            raise Exception(msg)
        result = self._fit(*args, **kwargs)
        result.__class__ = LineModelResult
        result._validate_bins_per_fwhm(minimum_bins_per_fwhm)
        return result

    def _fit(self, *args: Any, **kwargs: Any) -> "LineModelResult":
        """internal implementation of fit to add support for "leastsq_refit" method"""
        if kwargs["method"] == "leastsq_refit":
            # First fit with leastsq (the fastest method)
            kwargs["method"] = "leastsq"
            result0 = lmfit.Model.fit(self, *args, **kwargs)
            if result0.success and result0.errorbars:
                return result0

            # If we didn't get uncertainties, fit again with least_squares
            kwargs["method"] = "least_squares"
            if "params" in kwargs:
                kwargs["params"] = result0.params
            elif len(args) > 1:
                args = tuple([result0.params if i == 1 else arg for (i, arg) in enumerate(args)])
            result = lmfit.Model.fit(self, *args, **kwargs)
        else:
            result = lmfit.Model.fit(self, *args, **kwargs)
        return result

__add__(other)

Sum of two models

Source code in mass2/calibration/line_models.py
156
157
158
def __add__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Sum of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.add)

__mul__(other)

Product of two models

Source code in mass2/calibration/line_models.py
164
165
166
def __mul__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Product of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.mul)

__repr__()

Return representation of Model.

Source code in mass2/calibration/line_models.py
139
140
141
def __repr__(self) -> str:
    """Return representation of Model."""
    return f"<{type(self).__name__}: {self.name}>"

__sub__(other)

Difference of two models

Source code in mass2/calibration/line_models.py
160
161
162
def __sub__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Difference of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.sub)

__truediv__(other)

Ratio of two models

Source code in mass2/calibration/line_models.py
168
169
170
def __truediv__(self, other: lmfit.Model) -> "CompositeMLEModel":
    """Ratio of two models"""
    return CompositeMLEModel(self, other, lmfit.model.operator.truediv)

fit(*args, minimum_bins_per_fwhm=3, **kwargs)

as lmfit.Model.fit except 1. the default method is "least_squares because it gives error bars more often at 1.5-2.0X speed penalty 2. supports "leastsq_refit" which uses "leastsq" to fit, but if there are no error bars, refits with "least_squares" call result.set_label_hints(...) then result.plotm() for a nice plot.

Source code in mass2/calibration/line_models.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
def fit(self, *args: Any, minimum_bins_per_fwhm: float | None = 3, **kwargs: Any) -> "LineModelResult":
    """as lmfit.Model.fit except
    1. the default method is "least_squares because it gives error bars more often at 1.5-2.0X speed penalty
    2. supports "leastsq_refit" which uses "leastsq" to fit, but if there are no error bars, refits with
    "least_squares" call result.set_label_hints(...) then result.plotm() for a nice plot.
    """
    if "method" not in kwargs:
        # change default method
        kwargs["method"] = "least_squares"
        # least_squares always gives uncertainties, while the normal default leastsq often does not
        # leastsq fails to give uncertaities if parameters are near bounds or at their initial value
        # least_squares is about 1.5X to 2.0X slower based on two test case
    if minimum_bins_per_fwhm is None:
        minimum_bins_per_fwhm = 3  # provide default value
    if "weights" in kwargs and kwargs["weights"] is not None:
        msg = "MLEModel assumes Poisson-distributed data; cannot use weights other than None"
        raise Exception(msg)
    result = self._fit(*args, **kwargs)
    result.__class__ = LineModelResult
    result._validate_bins_per_fwhm(minimum_bins_per_fwhm)
    return result

Calibration algorithms

This file is intended to include algorithms that could be generally useful for calibration. Mostly they are pulled out of the former mass.calibration.young module.

FailedToGetModelException

Bases: Exception

Exception raised when get_model() fails to find a model for a line

Source code in mass2/calibration/algorithms.py
176
177
178
179
class FailedToGetModelException(Exception):
    """Exception raised when get_model() fails to find a model for a line"""

    pass

build_fit_ranges(line_names, excluded_line_names, approx_ecal, fit_width_ev)

Returns a list of (lo,hi) where lo and hi have units of energy of ranges to fit in for each energy in line_names.

Args: line_names (list[str or float]): list or line names or energies excluded_line_names (list[str or float]): list of line_names or energies to avoid when making fit ranges approx_ecal: an EnergyCalibration object containing an approximate calibration fit_width_ev (float): full size in eV of fit ranges

Source code in mass2/calibration/algorithms.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def build_fit_ranges(
    line_names: Iterable[str | float], excluded_line_names: Iterable[str | float], approx_ecal: EnergyCalibration, fit_width_ev: float
) -> tuple[list[float], list[tuple[float, float]], list[float]]:
    """Returns a list of (lo,hi) where lo and hi have units of energy of
    ranges to fit in for each energy in line_names.

    Args:
        line_names (list[str or float]): list or line names or energies
        excluded_line_names (list[str or float]): list of line_names or energies to
            avoid when making fit ranges
        approx_ecal: an EnergyCalibration object containing an approximate calibration
        fit_width_ev (float): full size in eV of fit ranges
    """
    _names, e_e = line_names_and_energies(line_names)
    _excl_names, excl_e_e = line_names_and_energies(excluded_line_names)
    half_width_ev = fit_width_ev / 2.0
    all_e = np.sort(np.hstack((e_e, excl_e_e)))
    assert len(all_e) == len(np.unique(all_e))
    fit_lo_hi_energy = []
    slopes_de_dph = []

    for e in e_e:
        slope_de_dph = cast(float, approx_ecal.energy2dedph(e))
        if any(all_e < e):
            nearest_below = all_e[all_e < e][-1]
        else:
            nearest_below = -np.inf
        if any(all_e > e):
            nearest_above = all_e[all_e > e][0]
        else:
            nearest_above = np.inf
        lo = max(e - half_width_ev, (e + nearest_below) / 2.0)
        hi = min(e + half_width_ev, (e + nearest_above) / 2.0)
        fit_lo_hi_energy.append((lo, hi))
        slopes_de_dph.append(slope_de_dph)

    return e_e, fit_lo_hi_energy, slopes_de_dph

build_fit_ranges_ph(line_names, excluded_line_names, approx_ecal, fit_width_ev)

Call build_fit_ranges() to get (lo,hi) for fitranges in energy units, then convert to ph using approx_ecal

Source code in mass2/calibration/algorithms.py
122
123
124
125
126
127
128
129
130
131
132
133
134
def build_fit_ranges_ph(
    line_names: Iterable[str | float], excluded_line_names: Iterable[str | float], approx_ecal: EnergyCalibration, fit_width_ev: float
) -> tuple[list[float], list[tuple[float, float]], list[float]]:
    """Call build_fit_ranges() to get (lo,hi) for fitranges in energy units,
    then convert to ph using approx_ecal"""
    e_e, fit_lo_hi_energy, slopes_de_dph = build_fit_ranges(line_names, excluded_line_names, approx_ecal, fit_width_ev)
    fit_lo_hi_ph = []
    for lo, hi in fit_lo_hi_energy:
        lo_ph = cast(float, approx_ecal.energy2ph(lo))
        hi_ph = cast(float, approx_ecal.energy2ph(hi))
        fit_lo_hi_ph.append((lo_ph, hi_ph))

    return e_e, fit_lo_hi_ph, slopes_de_dph

find_local_maxima(pulse_heights, gaussian_fwhm)

Smears each pulse by a gaussian of gaussian_fhwm and finds local maxima, returns a list of their locations in pulse_height units (sorted by number of pulses in peak) AND their peak values as: (peak_locations, peak_intensities)

Args: pulse_heights (np.array(dtype=float)): a list of pulse heights (eg p_filt_value) gaussian_fwhm = fwhm of a gaussian that each pulse is smeared with, in same units as pulse heights

Source code in mass2/calibration/algorithms.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def find_local_maxima(pulse_heights: ArrayLike, gaussian_fwhm: float) -> tuple[NDArray, NDArray]:
    """Smears each pulse by a gaussian of gaussian_fhwm and finds local maxima,
    returns a list of their locations in pulse_height units (sorted by number of
    pulses in peak) AND their peak values as: (peak_locations, peak_intensities)

    Args:
        pulse_heights (np.array(dtype=float)): a list of pulse heights (eg p_filt_value)
        gaussian_fwhm = fwhm of a gaussian that each pulse is smeared with, in same units as pulse heights
    """
    # kernel density estimation (with a gaussian kernel)
    n = 128 * 1024
    gaussian_fwhm = float(gaussian_fwhm)
    # The above ensures that lo & hi are floats, so that (lo-hi)/n is always a float in python2
    sigma = gaussian_fwhm / (np.sqrt(np.log(2) * 2) * 2)
    tbw = 1.0 / sigma / (np.pi * 2)
    lo = np.min(pulse_heights) - 3 * gaussian_fwhm
    hi = np.max(pulse_heights) + 3 * gaussian_fwhm
    hist, bins = np.histogram(pulse_heights, np.linspace(lo, hi, n + 1))
    tx = np.fft.rfftfreq(n, (lo - hi) / n)
    ty = np.exp(-(tx**2) / 2 / tbw**2)
    x = (bins[1:] + bins[:-1]) / 2
    y = np.fft.irfft(np.fft.rfft(hist) * ty)

    flag = (y[1:-1] > y[:-2]) & (y[1:-1] > y[2:])
    lm = np.arange(1, n - 1)[flag]
    lm = lm[np.argsort(-y[lm])]

    return np.array(x[lm]), np.array(y[lm])

find_opt_assignment(peak_positions, line_names, nextra=2, nincrement=3, nextramax=8, maxacc=0.015)

Tries to find an assignment of peaks to line names that is reasonably self consistent and smooth

Args: peak_positions (np.array(dtype=float)): a list of peak locations in arb units, e.g. p_filt_value units line_names (list[str or float)]): a list of calibration lines either as number (which is energies in eV), or name to be looked up in STANDARD_FEATURES nextra (int): the algorithm starts with the first len(line_names) + nextra peak_positions nincrement (int): each the algorithm fails to find a satisfactory peak assignment, it uses nincrement more lines nextramax (int): the algorithm stops incrementint nextra past this value, instead failing with a ValueError saying "no peak assignment succeeded" maxacc (float): an empirical number that determines if an assignment is good enough. The default number works reasonably well for tupac data

Source code in mass2/calibration/algorithms.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def find_opt_assignment(
    peak_positions: ArrayLike,
    line_names: list[str | float],
    nextra: int = 2,
    nincrement: int = 3,
    nextramax: int = 8,
    maxacc: float = 0.015,
) -> tuple[list[str], NDArray, list[int]]:
    """Tries to find an assignment of peaks to line names that is reasonably self consistent and smooth

    Args:
        peak_positions (np.array(dtype=float)): a list of peak locations in arb units,
            e.g. p_filt_value units
        line_names (list[str or float)]): a list of calibration lines either as number (which is
            energies in eV), or name to be looked up in STANDARD_FEATURES
        nextra (int): the algorithm starts with the first len(line_names) + nextra peak_positions
        nincrement (int): each the algorithm fails to find a satisfactory peak assignment, it uses
            nincrement more lines
        nextramax (int): the algorithm stops incrementint nextra past this value, instead
            failing with a ValueError saying "no peak assignment succeeded"
        maxacc (float): an empirical number that determines if an assignment is good enough.
            The default number works reasonably well for tupac data
    """
    name_e, e_e = line_names_and_energies(line_names)

    n_sel_pp = len(line_names) + nextra  # number of peak_positions to use to line up to line_names
    nmax = len(line_names) + nextramax
    peak_positions = np.asarray(peak_positions)

    while True:
        sel_positions = np.asarray(peak_positions[:n_sel_pp], dtype="float")
        energies = np.asarray(e_e, dtype="float")
        assign = np.array(list(itertools.combinations(sel_positions, len(line_names))))
        assign.sort(axis=1)
        fracs = np.divide(energies[1:-1] - energies[:-2], energies[2:] - energies[:-2])
        est_pos = assign[:, :-2] * (1 - fracs) + assign[:, 2:] * fracs
        acc_est = np.linalg.norm(np.divide(est_pos - assign[:, 1:-1], assign[:, 2:] - assign[:, :-2]), axis=1)

        opt_assign_i = np.argmin(acc_est)
        acc = acc_est[opt_assign_i]
        opt_assign = assign[opt_assign_i]

        if acc > maxacc * np.sqrt(len(energies)):
            n_sel_pp += nincrement
            if n_sel_pp > nmax:
                msg = f"no peak assignment succeeded: acc {acc:g}, maxacc*sqrt(len(energies)) {maxacc * np.sqrt(len(energies)):g}"
                raise ValueError(msg)
            else:
                continue
        else:
            return name_e, energies, list(opt_assign)

get_model(lineNameOrEnergy, has_linear_background=True, has_tails=False, prefix='')

Get a GenericLineModel for a line, given either a line name or energy in eV

Parameters:
  • lineNameOrEnergy (GenericLineModel | SpectralLine | str | float) –

    A line name, or energy, or a SpectralLine, or a GenericLineModel

  • has_linear_background (bool, default: True ) –

    Whether to allow a background slope, by default True

  • has_tails (bool, default: False ) –

    Whether to allow exponential tails, by default False

  • prefix (str, default: '' ) –

    Line nae prefix, by default ""

Returns:
Raises:
Source code in mass2/calibration/algorithms.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
def get_model(
    lineNameOrEnergy: GenericLineModel | SpectralLine | str | float,
    has_linear_background: bool = True,
    has_tails: bool = False,
    prefix: str = "",
) -> GenericLineModel:
    """Get a GenericLineModel for a line, given either a line name or energy in eV

    Parameters
    ----------
    lineNameOrEnergy : GenericLineModel | SpectralLine | str | float
        A line name, or energy, or a SpectralLine, or a GenericLineModel
    has_linear_background : bool, optional
        Whether to allow a background slope, by default True
    has_tails : bool, optional
        Whether to allow exponential tails, by default False
    prefix : str, optional
        Line nae prefix, by default ""

    Returns
    -------
    GenericLineModel
        An appropriate line model

    Raises
    ------
    FailedToGetModelException
        When a matching line cannot be found
    """
    if isinstance(lineNameOrEnergy, GenericLineModel):
        line = lineNameOrEnergy.spect
    elif isinstance(lineNameOrEnergy, SpectralLine):
        line = lineNameOrEnergy
    elif isinstance(lineNameOrEnergy, str):
        if lineNameOrEnergy in mass2.spectra:
            line = mass2.spectra[lineNameOrEnergy]
        elif lineNameOrEnergy in mass2.STANDARD_FEATURES:
            energy = mass2.STANDARD_FEATURES[lineNameOrEnergy]
            line = SpectralLine.quick_monochromatic_line(lineNameOrEnergy, energy, 0.001, 0)
        else:
            raise FailedToGetModelException(f"failed to get line from lineNameOrEnergy={lineNameOrEnergy}")
    else:
        try:
            energy = float(lineNameOrEnergy)
        except Exception:
            raise FailedToGetModelException(
                f"lineNameOrEnergy = {lineNameOrEnergy} is not convertable"
                " to float or a str in mass2.spectra or mass2.STANDARD_FEATURES"
            )
        line = SpectralLine.quick_monochromatic_line(f"{lineNameOrEnergy}eV", float(lineNameOrEnergy), 0.001, 0)
    return line.model(has_linear_background=has_linear_background, has_tails=has_tails, prefix=prefix)

line_names_and_energies(line_names)

Given a list of line_names, return (names, energies) in eV.

Can also accept energies in eV directly and return (names, energies).

Source code in mass2/calibration/algorithms.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def line_names_and_energies(line_names: Iterable[str | float]) -> tuple[list[str], list[float]]:
    """Given a list of line_names, return (names, energies) in eV.

    Can also accept energies in eV directly and return (names, energies).
    """
    energies: list[float] = []
    for name_or_energy in line_names:
        if isinstance(name_or_energy, str):
            energies.append(STANDARD_FEATURES[name_or_energy])
        else:
            energies.append(float(name_or_energy))
    order: NDArray = np.argsort(energies)
    names = list(line_names)
    sorted_names = [str(names[i]) for i in order]
    energies.sort()
    return sorted_names, energies

multifit(ph, line_names, fit_lo_hi, binsize_ev, slopes_de_dph, hide_deprecation=False)

Args: ph (np.array(dtype=float)): list of pulse heights line_names: names of calibration lines fit_lo_hi (list[list[float]]): a list of (lo,hi) with units of ph, used as edges of histograms for fitting binsize_ev (list[float]): list of binsizes in eV for calibration lines slopes_de_dph (list[float]): - list of slopes de_dph (e in eV) hide_deprecation: whether to suppress deprecation warnings

Source code in mass2/calibration/algorithms.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def multifit(
    ph: ArrayLike,
    line_names: Iterable[str],
    fit_lo_hi: list[list[float]],
    binsize_ev: list[float],
    slopes_de_dph: list[float],
    hide_deprecation: bool = False,
) -> dict[str, Any]:
    """
    Args:
        ph (np.array(dtype=float)): list of pulse heights
        line_names: names of calibration  lines
        fit_lo_hi (list[list[float]]): a list of (lo,hi) with units of ph, used as
            edges of histograms for fitting
        binsize_ev (list[float]): list of binsizes in eV for calibration lines
        slopes_de_dph (list[float]): - list of slopes de_dph (e in eV)
        hide_deprecation: whether to suppress deprecation warnings
    """
    name_e, e_e = line_names_and_energies(line_names)
    results = []
    peak_ph = []
    eres = []

    for i, name in enumerate(name_e):
        lo, hi = fit_lo_hi[i]
        dP_dE = 1 / slopes_de_dph[i]
        binsize_ph = binsize_ev[i] * dP_dE
        result = singlefit(ph, name, lo, hi, binsize_ph, dP_dE)
        results.append(result)
        peak_ph.append(result.best_values["peak_ph"])
        eres.append(result.best_values["fwhm"])
    return {"results": results, "peak_ph": peak_ph, "eres": eres, "line_names": name_e, "energies": e_e}

singlefit(ph, name, lo, hi, binsize_ph, approx_dP_dE)

Performs a fit to a single line in pulse height units

Parameters:
  • ph (ArrayLike) –

    Measured pulse heights

  • name (GenericLineModel | SpectralLine | str | float) –

    Spectral line to fit, either as a name, energy in eV, SpectralLine, or GenericLineModel

  • lo (float) –

    minimum pulse height to include in fit

  • hi (float) –

    maximum pulse height to include in fit

  • binsize_ph (float) –

    bin size in pulse height units

  • approx_dP_dE (float) –

    Estimate of the dph/dE at the line energy, used to constrain the fit

Returns:
Raises:
  • ValueError

    When too many bins would be used in the fit

Source code in mass2/calibration/algorithms.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
def singlefit(
    ph: ArrayLike, name: GenericLineModel | SpectralLine | str | float, lo: float, hi: float, binsize_ph: float, approx_dP_dE: float
) -> LineModelResult:
    """Performs a fit to a single line in pulse height units

    Parameters
    ----------
    ph : ArrayLike
        Measured pulse heights
    name : GenericLineModel | SpectralLine | str | float
        Spectral line to fit, either as a name, energy in eV, SpectralLine, or GenericLineModel
    lo : float
        minimum pulse height to include in fit
    hi : float
        maximum pulse height to include in fit
    binsize_ph : float
        bin size in pulse height units
    approx_dP_dE : float
        Estimate of the dph/dE at the line energy, used to constrain the fit

    Returns
    -------
    LineModelResult
        The best-fit result

    Raises
    ------
    ValueError
        When too many bins would be used in the fit
    """
    nbins = (hi - lo) / binsize_ph
    if nbins > 5000:
        raise ValueError("too damn many bins, dont like running out of memory")
    counts, bin_edges = np.histogram(ph, np.arange(lo, hi, binsize_ph))
    e = bin_edges[:-1] + 0.5 * (bin_edges[1] - bin_edges[0])
    model = get_model(name)
    guess_params = model.guess(counts, bin_centers=e, dph_de=approx_dP_dE)
    if "Gaussian" not in model.name:
        guess_params["dph_de"].set(approx_dP_dE, vary=False)
    result = model.fit(counts, guess_params, bin_centers=e, minimum_bins_per_fwhm=1.5)
    result.energies = e
    return result

Highly charged ions

hci_lines.py

Uses pickle file containing NIST ASD levels data to generate some commonly used HCI lines in mass. Meant to be a replacement for _highly_charged_ion_lines.py, which hard codes in line parameters.

The pickle file can be gzip-compressed, provided the compressed filename ends with ".gz".

February 2020 Paul Szypryt

NIST_ASD

Class for working with a pickled atomic spectra database

Source code in mass2/calibration/hci_lines.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
class NIST_ASD:
    """Class for working with a pickled atomic spectra database"""

    def __init__(self, pickleFilename: str | None = None):
        """Loads ASD pickle file (optionally gzipped)

        Parameters
        ----------
        pickleFilename : str | None, optional
            ASD pickle file name, as str, or if none then `mass2.calibration.hci_lines.DEFAULT_PICKLE_PATH` (default None)
        """

        if pickleFilename is None:
            pickleFilename = os.path.join(os.path.split(__file__)[0], str(DEFAULT_PICKLE_PATH))

        if pickleFilename.endswith(".gz"):
            with gzip.GzipFile(pickleFilename, "rb") as handle:
                self.NIST_ASD_Dict = pickle.load(handle)
        else:
            with open(pickleFilename, "rb") as handle:
                self.NIST_ASD_Dict = pickle.load(handle)

    def getAvailableElements(self) -> list[str]:
        """Returns a list of all available elements from the ASD pickle file"""

        return list(self.NIST_ASD_Dict.keys())

    def getAvailableSpectralCharges(self, element: str) -> list[int]:
        """For a given element, returns a list of all available charge states from the ASD pickle file

        Parameters
        ----------
        element : str
            atomic symbol of element, e.g. 'Ne'

        Returns
        -------
        list[int]
            Available charge states
        """

        return list(self.NIST_ASD_Dict[element].keys())

    def getAvailableLevels(
        self,
        element: str,
        spectralCharge: int,
        requiredConf: str | None = None,
        requiredTerm: str | None = None,
        requiredJVal: str | None = None,
        maxLevels: int | None = None,
        units: str = "eV",
        getUncertainty: bool = True,
    ) -> dict:
        """For a given element and spectral charge state, return a dict of all known levels from the ASD pickle file

        Parameters
        ----------
        element : str
            Elemental atomic symbol, e.g. 'Ne'
        spectralCharge : int
            spectral charge state, e.g. 1 for neutral atoms, 10 for H-like Ne
        requiredConf : str | None, optional
            if not None, limits results to those with `conf == requiredConf`, by default None
        requiredTerm : str | None, optional
            if not None, limits results to those with `term == requiredTerm`, by default None
        requiredJVal : str | None, optional
            if not None, limits results to those with `a == requiredJVal`, by default None
        maxLevels : int | None, optional
            the maximum number of levels (sorted by energy) to return, by default None
        units : str, optional
            'cm-1' or 'eV' for returned line position. If 'eV', converts from database 'cm-1' values, by default "eV"
        getUncertainty : bool, optional
            whether to return uncertain values, by default True

        Returns
        -------
        dict
            A dictionary of energy level strings to energy levels.
        """
        if units not in {"eV", "cm-1"}:
            raise ValueError("Unit type not supported, please use eV or cm-1")

        spectralCharge = int(spectralCharge)
        levelsDict: dict = {}
        numLevels = 0
        for iLevel in list(self.NIST_ASD_Dict[element][spectralCharge].keys()):
            try:
                # Check to see if we reached maximum number of levels to return
                if maxLevels is not None:
                    if numLevels == maxLevels:
                        return levelsDict
                # If required, check to see if level matches search conf, term, JVal
                includeTerm = False
                includeJVal = False
                conf, term, j_str = iLevel.split()
                JVal = j_str.split("=")[1]
                includeConf = (requiredConf is None) or conf == requiredConf
                includeTerm = (requiredTerm is None) or term == requiredTerm
                includeJVal = (requiredJVal is None) or JVal == requiredJVal

                # Include levels that match, in either cm-1 or eV
                if includeConf and includeTerm and includeJVal:
                    numLevels += 1
                    if units == "cm-1":
                        if getUncertainty:
                            levelsDict[iLevel] = self.NIST_ASD_Dict[element][spectralCharge][iLevel]
                        else:
                            levelsDict[iLevel] = self.NIST_ASD_Dict[element][spectralCharge][iLevel][0]
                    elif units == "eV":
                        if getUncertainty:
                            levelsDict[iLevel] = [
                                iValue * INVCM_TO_EV for iValue in self.NIST_ASD_Dict[element][spectralCharge][iLevel]
                            ]
                        else:
                            levelsDict[iLevel] = INVCM_TO_EV * self.NIST_ASD_Dict[element][spectralCharge][iLevel][0]
            except ValueError:
                f"Warning: cannot parse level: {iLevel}"
        return levelsDict

    def getSingleLevel(
        self, element: str, spectralCharge: int, conf: str, term: str, JVal: str, units: str = "eV", getUncertainty: bool = True
    ) -> float:
        """Return the level data for a fully defined element, charge state, conf, term, and JVal.

        Parameters
        ----------
        element : str
            atomic symbol of element, e.g. 'Ne'
        spectralCharge : int
            spectral charge state, e.g. 1 for neutral atoms, 10 for H-like Ne
        conf : str
            nuclear configuration, e.g. '2p'
        term : str
            nuclear term, e.g. '2P*'
        JVal : str
            total angular momentum J, e.g. '3/2'
        units : str, optional
            'cm-1' or 'eV' for returned line position. If 'eV', converts from database 'cm-1' values, by default "eV"
        getUncertainty : bool, optional
            includes uncertainties in list of levels, by default True

        Returns
        -------
        float
            _description_
        """

        levelString = f"{conf} {term} J={JVal}"
        if units == "cm-1":
            if getUncertainty:
                levelEnergy = self.NIST_ASD_Dict[element][spectralCharge][levelString]
            else:
                levelEnergy = self.NIST_ASD_Dict[element][spectralCharge][levelString][0]
        elif units == "eV":
            if getUncertainty:
                levelEnergy = [iValue * INVCM_TO_EV for iValue in self.NIST_ASD_Dict[element][spectralCharge][levelString]]
            else:
                levelEnergy = self.NIST_ASD_Dict[element][spectralCharge][levelString][0] * INVCM_TO_EV
        else:
            raise ValueError("Unit type not supported, please use eV or cm-1")
        return levelEnergy

__init__(pickleFilename=None)

Loads ASD pickle file (optionally gzipped)

Parameters:
  • pickleFilename (str | None, default: None ) –

    ASD pickle file name, as str, or if none then mass2.calibration.hci_lines.DEFAULT_PICKLE_PATH (default None)

Source code in mass2/calibration/hci_lines.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(self, pickleFilename: str | None = None):
    """Loads ASD pickle file (optionally gzipped)

    Parameters
    ----------
    pickleFilename : str | None, optional
        ASD pickle file name, as str, or if none then `mass2.calibration.hci_lines.DEFAULT_PICKLE_PATH` (default None)
    """

    if pickleFilename is None:
        pickleFilename = os.path.join(os.path.split(__file__)[0], str(DEFAULT_PICKLE_PATH))

    if pickleFilename.endswith(".gz"):
        with gzip.GzipFile(pickleFilename, "rb") as handle:
            self.NIST_ASD_Dict = pickle.load(handle)
    else:
        with open(pickleFilename, "rb") as handle:
            self.NIST_ASD_Dict = pickle.load(handle)

getAvailableElements()

Returns a list of all available elements from the ASD pickle file

Source code in mass2/calibration/hci_lines.py
52
53
54
55
def getAvailableElements(self) -> list[str]:
    """Returns a list of all available elements from the ASD pickle file"""

    return list(self.NIST_ASD_Dict.keys())

getAvailableLevels(element, spectralCharge, requiredConf=None, requiredTerm=None, requiredJVal=None, maxLevels=None, units='eV', getUncertainty=True)

For a given element and spectral charge state, return a dict of all known levels from the ASD pickle file

Parameters:
  • element (str) –

    Elemental atomic symbol, e.g. 'Ne'

  • spectralCharge (int) –

    spectral charge state, e.g. 1 for neutral atoms, 10 for H-like Ne

  • requiredConf (str | None, default: None ) –

    if not None, limits results to those with conf == requiredConf, by default None

  • requiredTerm (str | None, default: None ) –

    if not None, limits results to those with term == requiredTerm, by default None

  • requiredJVal (str | None, default: None ) –

    if not None, limits results to those with a == requiredJVal, by default None

  • maxLevels (int | None, default: None ) –

    the maximum number of levels (sorted by energy) to return, by default None

  • units (str, default: 'eV' ) –

    'cm-1' or 'eV' for returned line position. If 'eV', converts from database 'cm-1' values, by default "eV"

  • getUncertainty (bool, default: True ) –

    whether to return uncertain values, by default True

Returns:
  • dict

    A dictionary of energy level strings to energy levels.

Source code in mass2/calibration/hci_lines.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def getAvailableLevels(
    self,
    element: str,
    spectralCharge: int,
    requiredConf: str | None = None,
    requiredTerm: str | None = None,
    requiredJVal: str | None = None,
    maxLevels: int | None = None,
    units: str = "eV",
    getUncertainty: bool = True,
) -> dict:
    """For a given element and spectral charge state, return a dict of all known levels from the ASD pickle file

    Parameters
    ----------
    element : str
        Elemental atomic symbol, e.g. 'Ne'
    spectralCharge : int
        spectral charge state, e.g. 1 for neutral atoms, 10 for H-like Ne
    requiredConf : str | None, optional
        if not None, limits results to those with `conf == requiredConf`, by default None
    requiredTerm : str | None, optional
        if not None, limits results to those with `term == requiredTerm`, by default None
    requiredJVal : str | None, optional
        if not None, limits results to those with `a == requiredJVal`, by default None
    maxLevels : int | None, optional
        the maximum number of levels (sorted by energy) to return, by default None
    units : str, optional
        'cm-1' or 'eV' for returned line position. If 'eV', converts from database 'cm-1' values, by default "eV"
    getUncertainty : bool, optional
        whether to return uncertain values, by default True

    Returns
    -------
    dict
        A dictionary of energy level strings to energy levels.
    """
    if units not in {"eV", "cm-1"}:
        raise ValueError("Unit type not supported, please use eV or cm-1")

    spectralCharge = int(spectralCharge)
    levelsDict: dict = {}
    numLevels = 0
    for iLevel in list(self.NIST_ASD_Dict[element][spectralCharge].keys()):
        try:
            # Check to see if we reached maximum number of levels to return
            if maxLevels is not None:
                if numLevels == maxLevels:
                    return levelsDict
            # If required, check to see if level matches search conf, term, JVal
            includeTerm = False
            includeJVal = False
            conf, term, j_str = iLevel.split()
            JVal = j_str.split("=")[1]
            includeConf = (requiredConf is None) or conf == requiredConf
            includeTerm = (requiredTerm is None) or term == requiredTerm
            includeJVal = (requiredJVal is None) or JVal == requiredJVal

            # Include levels that match, in either cm-1 or eV
            if includeConf and includeTerm and includeJVal:
                numLevels += 1
                if units == "cm-1":
                    if getUncertainty:
                        levelsDict[iLevel] = self.NIST_ASD_Dict[element][spectralCharge][iLevel]
                    else:
                        levelsDict[iLevel] = self.NIST_ASD_Dict[element][spectralCharge][iLevel][0]
                elif units == "eV":
                    if getUncertainty:
                        levelsDict[iLevel] = [
                            iValue * INVCM_TO_EV for iValue in self.NIST_ASD_Dict[element][spectralCharge][iLevel]
                        ]
                    else:
                        levelsDict[iLevel] = INVCM_TO_EV * self.NIST_ASD_Dict[element][spectralCharge][iLevel][0]
        except ValueError:
            f"Warning: cannot parse level: {iLevel}"
    return levelsDict

getAvailableSpectralCharges(element)

For a given element, returns a list of all available charge states from the ASD pickle file

Parameters:
  • element (str) –

    atomic symbol of element, e.g. 'Ne'

Returns:
  • list[int]

    Available charge states

Source code in mass2/calibration/hci_lines.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def getAvailableSpectralCharges(self, element: str) -> list[int]:
    """For a given element, returns a list of all available charge states from the ASD pickle file

    Parameters
    ----------
    element : str
        atomic symbol of element, e.g. 'Ne'

    Returns
    -------
    list[int]
        Available charge states
    """

    return list(self.NIST_ASD_Dict[element].keys())

getSingleLevel(element, spectralCharge, conf, term, JVal, units='eV', getUncertainty=True)

Return the level data for a fully defined element, charge state, conf, term, and JVal.

Parameters:
  • element (str) –

    atomic symbol of element, e.g. 'Ne'

  • spectralCharge (int) –

    spectral charge state, e.g. 1 for neutral atoms, 10 for H-like Ne

  • conf (str) –

    nuclear configuration, e.g. '2p'

  • term (str) –

    nuclear term, e.g. '2P*'

  • JVal (str) –

    total angular momentum J, e.g. '3/2'

  • units (str, default: 'eV' ) –

    'cm-1' or 'eV' for returned line position. If 'eV', converts from database 'cm-1' values, by default "eV"

  • getUncertainty (bool, default: True ) –

    includes uncertainties in list of levels, by default True

Returns:
  • float

    description

Source code in mass2/calibration/hci_lines.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def getSingleLevel(
    self, element: str, spectralCharge: int, conf: str, term: str, JVal: str, units: str = "eV", getUncertainty: bool = True
) -> float:
    """Return the level data for a fully defined element, charge state, conf, term, and JVal.

    Parameters
    ----------
    element : str
        atomic symbol of element, e.g. 'Ne'
    spectralCharge : int
        spectral charge state, e.g. 1 for neutral atoms, 10 for H-like Ne
    conf : str
        nuclear configuration, e.g. '2p'
    term : str
        nuclear term, e.g. '2P*'
    JVal : str
        total angular momentum J, e.g. '3/2'
    units : str, optional
        'cm-1' or 'eV' for returned line position. If 'eV', converts from database 'cm-1' values, by default "eV"
    getUncertainty : bool, optional
        includes uncertainties in list of levels, by default True

    Returns
    -------
    float
        _description_
    """

    levelString = f"{conf} {term} J={JVal}"
    if units == "cm-1":
        if getUncertainty:
            levelEnergy = self.NIST_ASD_Dict[element][spectralCharge][levelString]
        else:
            levelEnergy = self.NIST_ASD_Dict[element][spectralCharge][levelString][0]
    elif units == "eV":
        if getUncertainty:
            levelEnergy = [iValue * INVCM_TO_EV for iValue in self.NIST_ASD_Dict[element][spectralCharge][levelString]]
        else:
            levelEnergy = self.NIST_ASD_Dict[element][spectralCharge][levelString][0] * INVCM_TO_EV
    else:
        raise ValueError("Unit type not supported, please use eV or cm-1")
    return levelEnergy

add_H_like_lines_from_asd(asd, element, maxLevels=None)

Add all known H-like lines for a given element from the ASD database

Source code in mass2/calibration/hci_lines.py
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
def add_H_like_lines_from_asd(asd: NIST_ASD, element: str, maxLevels: int | None = None) -> list[SpectralLine]:
    """Add all known H-like lines for a given element from the ASD database"""
    spectr_ch = xraydb.atomic_number(element)
    added_lines = []
    if maxLevels is not None:
        levelsDict = asd.getAvailableLevels(element, spectralCharge=spectr_ch, maxLevels=maxLevels + 1)
    else:
        levelsDict = asd.getAvailableLevels(element, spectralCharge=spectr_ch)
    for iLevel in list(levelsDict.keys()):
        lineEnergy = levelsDict[iLevel][0]
        if lineEnergy != 0.0:
            iLine = add_hci_line(
                element=element, spectr_ch=spectr_ch, line_identifier=iLevel, energies=[lineEnergy], widths=[0.1], ratios=[1.0]
            )
            added_lines.append(iLine)
    return added_lines

add_He_like_lines_from_asd(asd, element, maxLevels=None)

Add all known He-like lines for a given element from the ASD database

Source code in mass2/calibration/hci_lines.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def add_He_like_lines_from_asd(asd: NIST_ASD, element: str, maxLevels: int | None = None) -> list[SpectralLine]:
    """Add all known He-like lines for a given element from the ASD database"""
    spectr_ch = xraydb.atomic_number(element) - 1
    added_lines = []
    if maxLevels is not None:
        levelsDict = asd.getAvailableLevels(element, spectralCharge=spectr_ch, maxLevels=maxLevels + 1)
    else:
        levelsDict = asd.getAvailableLevels(element, spectralCharge=spectr_ch)
    for iLevel in list(levelsDict.keys()):
        lineEnergy = levelsDict[iLevel][0]
        if lineEnergy != 0.0:
            iLine = add_hci_line(
                element=element, spectr_ch=spectr_ch, line_identifier=iLevel, energies=[lineEnergy], widths=[0.1], ratios=[1.0]
            )
            added_lines.append(iLine)
    return added_lines

add_hci_line(element, spectr_ch, line_identifier, energies, widths, ratios, nominal_peak_energy=None)

Add a single HCI line to the fluorescence_lines database

Parameters:
  • element (str) –

    The element whose line is being added, e.g. 'Ne'

  • spectr_ch (int) –

    The charge state of the ion whose line is being added, e.g. 9 for H-like Ne

  • line_identifier (str) –

    The line identifier, e.g. '1s2S1/2 - 2p2P3/2'

  • energies (ArrayLike) –

    The energies of the components of the line, in eV

  • widths (ArrayLike) –

    The Lorentzian FWHM widths of the components of the line, in eV

  • ratios (ArrayLike) –

    The relative intensities of the components of the line

  • nominal_peak_energy (float | None, default: None ) –

    The nominal spectral peak in eV, by default None

Returns:
Source code in mass2/calibration/hci_lines.py
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def add_hci_line(
    element: str,
    spectr_ch: int,
    line_identifier: str,
    energies: ArrayLike,
    widths: ArrayLike,
    ratios: ArrayLike,
    nominal_peak_energy: float | None = None,
) -> SpectralLine:
    """Add a single HCI line to the fluorescence_lines database

    Parameters
    ----------
    element : str
        The element whose line is being added, e.g. 'Ne'
    spectr_ch : int
        The charge state of the ion whose line is being added, e.g. 9 for H-like Ne
    line_identifier : str
        The line identifier, e.g. '1s2S1/2 - 2p2P3/2'
    energies : ArrayLike
        The energies of the components of the line, in eV
    widths : ArrayLike
        The Lorentzian FWHM widths of the components of the line, in eV
    ratios : ArrayLike
        The relative intensities of the components of the line
    nominal_peak_energy : float | None, optional
        The nominal spectral peak in eV, by default None

    Returns
    -------
    SpectralLine
        The newly added SpectralLine object
    """
    energies = np.asarray(energies)
    widths = np.asarray(widths)
    ratios = np.asarray(ratios)
    if nominal_peak_energy is None:
        nominal_peak_energy = np.dot(energies, ratios) / np.sum(ratios)
    linetype = f"{int(spectr_ch)} {line_identifier}"

    spectrum_class = fluorescence_lines.addline(
        element=element,
        material="Highly Charged Ion",
        linetype=linetype,
        reference_short="NIST ASD",
        reference_plot_instrument_gaussian_fwhm=0.5,
        nominal_peak_energy=nominal_peak_energy,
        energies=energies,
        lorentzian_fwhm=widths,
        reference_amplitude=ratios,
        reference_amplitude_type=AmplitudeType.LORENTZIAN_PEAK_HEIGHT,
        ka12_energy_diff=None,
    )
    return spectrum_class

hci_models.py

Some useful methods for initializing GenericLineModel and CompositeMLEModel objects applied to HCI lines.

June 2020 Paul Szypryt

add_bg_model(generic_model, vary_slope=False)

Adds a LinearBackgroundModel to a generic lmfit model

Parameters:
  • generic_model (GenericLineModel) –

    object to which to add a linear background model

  • vary_slope (bool, default: False ) –

    allows a varying linear slope rather than just constant value, by default False

Returns:
Source code in mass2/calibration/hci_models.py
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def add_bg_model(generic_model: GenericLineModel, vary_slope: bool = False) -> GenericLineModel:
    """Adds a LinearBackgroundModel to a generic lmfit model

    Parameters
    ----------
    generic_model : GenericLineModel
        object to which to add a linear background model
    vary_slope : bool, optional
        allows a varying linear slope rather than just constant value, by default False

    Returns
    -------
    GenericLineModel
        The input model, with background componets added
    """
    # composite_name = generic_model._name
    # bg_prefix = f"{composite_name}_".replace(" ", "_").replace("J=", "").replace("/", "_").replace("*", "").replace(".", "")
    raise NotImplementedError("No LinearBackgroundModel still exists in mass2")

initialize_HLike_2P_model(element, conf, has_linear_background=False, has_tails=False, vary_amp_ratio=False)

Initializes H-like 2P models consisting of J=1/2 and J=3/2 lines

Parameters:
  • element (str) –

    atomic symbol as str, e.g. 'Ne' or 'Ar'

  • conf (str) –

    nuclear configuration as str, e.g. '2p' or '3p'

  • has_linear_background (bool, default: False ) –

    include a single linear background on top of the 2 Lorentzians, by default False

  • has_tails (bool, default: False ) –

    include low energy tail in the model, by default False

  • vary_amp_ratio (bool, default: False ) –

    allow the ratio of the J=3/2 to J=1/2 states to vary away from 2, by default False

Returns:
Source code in mass2/calibration/hci_models.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def initialize_HLike_2P_model(
    element: str, conf: str, has_linear_background: bool = False, has_tails: bool = False, vary_amp_ratio: bool = False
) -> GenericLineModel:
    """Initializes H-like 2P models consisting of J=1/2 and J=3/2 lines

    Parameters
    ----------
    element : str
        atomic symbol as str, e.g. 'Ne' or 'Ar'
    conf : str
        nuclear configuration as str, e.g. '2p' or '3p'
    has_linear_background : bool, optional
        include a single linear background on top of the 2 Lorentzians, by default False
    has_tails : bool, optional
        include low energy tail in the model, by default False
    vary_amp_ratio : bool, optional
        allow the ratio of the J=3/2 to J=1/2 states to vary away from 2, by default False

    Returns
    -------
    GenericLineModel
        The new composite line
    """

    # Set up line names and lmfit prefixes
    charge = int(xraydb.atomic_number(element))
    line_name_1_2 = f"{element}{charge} {conf} 2P* J=1/2"
    line_name_3_2 = f"{element}{charge} {conf} 2P* J=3/2"
    prefix_1_2 = f"{line_name_1_2}_".replace(" ", "_").replace("J=", "").replace("/", "_").replace("*", "").replace(".", "")
    prefix_3_2 = f"{line_name_3_2}_".replace(" ", "_").replace("J=", "").replace("/", "_").replace("*", "").replace(".", "")
    # Initialize individual lines and models
    line_1_2 = spectra[line_name_1_2]
    line_3_2 = spectra[line_name_3_2]
    model_1_2 = line_1_2.model(has_linear_background=False, has_tails=has_tails, prefix=prefix_1_2)
    model_3_2 = line_3_2.model(has_linear_background=False, has_tails=has_tails, prefix=prefix_3_2)
    # Initialize composite model and set addition H-like constraints
    composite_name = f"{element}{charge} {conf}"
    composite_model = initialize_hci_composite_model(
        composite_name=composite_name,
        individual_models=[model_1_2, model_3_2],
        has_linear_background=has_linear_background,
        peak_component_name=line_name_3_2,
    )
    amp_ratio_param_name = f"{element}{charge}_{conf}_amp_ratio"
    composite_model.set_param_hint(name=amp_ratio_param_name, value=0.5, min=0.0, vary=vary_amp_ratio)
    composite_model.set_param_hint(f"{prefix_1_2}integral", expr=f"{prefix_3_2}integral * {amp_ratio_param_name}")
    return composite_model

initialize_HeLike_complex_model(element, has_linear_background=False, has_tails=False, additional_line_names=[])

Initializes 1s2s,2p He-like complexes for a given element.

By default, uses only the 1s.2s 3S J=1, 1s.2p 3P J=1, and 1s.2p 1P J=1 lines.

Parameters:
  • element (str) –

    atomic symbol as str, e.g. 'Ne' or 'Ar'

  • has_linear_background (bool, default: False ) –

    include a single linear background on top of the Lorentzian models, by default False

  • has_tails (bool, default: False ) –

    include low energy tail in the model, by default False

  • additional_line_names (list, default: [] ) –

    additional line names to include in model, e.g. low level Li/Be-like features, by default []

Returns:
Source code in mass2/calibration/hci_models.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def initialize_HeLike_complex_model(
    element: str, has_linear_background: bool = False, has_tails: bool = False, additional_line_names: list = []
) -> GenericLineModel:
    """Initializes 1s2s,2p He-like complexes for a given element.

    By default, uses only the 1s.2s 3S J=1, 1s.2p 3P* J=1, and 1s.2p 1P* J=1 lines.

    Parameters
    ----------
    element : str
        atomic symbol as str, e.g. 'Ne' or 'Ar'
    has_linear_background : bool, optional
        include a single linear background on top of the Lorentzian models, by default False
    has_tails : bool, optional
        include low energy tail in the model, by default False
    additional_line_names : list, optional
        additional line names to include in model, e.g. low level Li/Be-like features, by default []

    Returns
    -------
    GenericLineModel
        A model of the given HCI complex.
    """

    # Set up line names
    charge = int(xraydb.atomic_number(element) - 1)
    line_name_1s2s_3S = f"{element}{charge} 1s.2s 3S J=1"
    line_name_1s2p_3P = f"{element}{charge} 1s.2p 3P* J=1"
    line_name_1s2p_1P = f"{element}{charge} 1s.2p 1P* J=1"
    line_names = np.hstack([[line_name_1s2s_3S, line_name_1s2p_3P, line_name_1s2p_1P], additional_line_names])
    # Set up lines and models based on line_names
    # individual_lines = [spectra[i_line_name]() for i_line_name in line_names]
    individual_models = [
        initialize_hci_line_model(i_line_name, has_linear_background=False, has_tails=has_tails) for i_line_name in line_names
    ]
    # Set up composite model
    composite_name = f"{element}{charge} 1s2s_2p Complex"
    composite_model = initialize_hci_composite_model(
        composite_name=composite_name,
        individual_models=individual_models,
        has_linear_background=has_linear_background,
        peak_component_name=line_name_1s2p_1P,
    )
    return composite_model

initialize_hci_composite_model(composite_name, individual_models, has_linear_background=False, peak_component_name=None)

Initializes composite lmfit model from the sum of input models

Parameters:
  • composite_name (str) –

    name given to composite line model

  • individual_models (list[GenericLineModel]) –

    Models to sum into a composite

  • has_linear_background (bool, default: False ) –

    include a single linear background on top of group of lorentzians, by default False

  • peak_component_name (str | None, default: None ) –

    designate a component to be a peak for energy, all expressions are referenced to this component, by default None

Returns:
Source code in mass2/calibration/hci_models.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def initialize_hci_composite_model(
    composite_name: str,
    individual_models: list[GenericLineModel],
    has_linear_background: bool = False,
    peak_component_name: str | None = None,
) -> GenericLineModel:
    """Initializes composite lmfit model from the sum of input models

    Parameters
    ----------
    composite_name : str
        name given to composite line model
    individual_models : list[GenericLineModel]
        Models to sum into a composite
    has_linear_background : bool, optional
        include a single linear background on top of group of lorentzians, by default False
    peak_component_name : str | None, optional
        designate a component to be a peak for energy, all expressions are referenced to this component, by default None

    Returns
    -------
    GenericLineModel
        The new composite line
    """

    composite_model: GenericLineModel = np.sum(individual_models)
    composite_model.name = composite_name
    if has_linear_background:
        composite_model = add_bg_model(composite_model)
    # Workaround for energy calibration using composite models, pick 1st GenericLineModel component
    line_model_components = [
        i_comp for i_comp in composite_model.components if isinstance(i_comp, mass2.calibration.line_models.GenericLineModel)
    ]
    if peak_component_name is None:
        peak_component_name = line_model_components[0]._name
    peak_component_index = [i_comp._name for i_comp in line_model_components].index(peak_component_name)
    peak_component = line_model_components[peak_component_index]
    composite_model.peak_prefix = peak_component.prefix
    composite_model.peak_energy = peak_component.spect.peak_energy
    # Set up some constraints relative to peak_component
    num_line_components = len(line_model_components)
    line_component_prefixes = [iComp.prefix for iComp in line_model_components]
    line_component_energies = [iComp.spect.peak_energy for iComp in line_model_components]
    for i in np.arange(num_line_components):
        if i != peak_component_index:
            # Single fwhm across model
            composite_model.set_param_hint(f"{line_component_prefixes[i]}fwhm", expr=f"{composite_model.peak_prefix}fwhm")
            # Single dph_de across model
            composite_model.set_param_hint(f"{line_component_prefixes[i]}dph_de", expr=f"{composite_model.peak_prefix}dph_de")
            # Fixed energy separation based on database values
            separation = line_component_energies[i] - composite_model.peak_energy
            hint = f"({separation} * {composite_model.peak_prefix}dph_de) + {composite_model.peak_prefix}peak_ph"
            composite_model.set_param_hint(f"{line_component_prefixes[i]}peak_ph", expr=hint)
    composite_model.shortname = composite_name
    return composite_model

initialize_hci_line_model(line_name, has_linear_background=False, has_tails=False)

Initializes a single lorentzian HCI line model. Reformats line_name to create a lmfit valid prefix.

Parameters:
  • line_name (str) –

    name of line to use in mass2.spectra

  • has_linear_background (bool, default: False ) –

    include linear background in the model, by default False

  • has_tails (bool, default: False ) –

    include low-energy tail in the model, by default False

Returns:
Source code in mass2/calibration/hci_models.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def initialize_hci_line_model(line_name: str, has_linear_background: bool = False, has_tails: bool = False) -> GenericLineModel:
    """Initializes a single lorentzian HCI line model. Reformats line_name to create a lmfit valid prefix.

    Parameters
    ----------
    line_name : str
        name of line to use in mass2.spectra
    has_linear_background : bool, optional
        include linear background in the model, by default False
    has_tails : bool, optional
        include low-energy tail in the model, by default False

    Returns
    -------
    GenericLineModel
        New HCI line.
    """
    line = spectra[line_name]
    prefix = f"{line_name}_".replace(" ", "_").replace("J=", "").replace("/", "_").replace("*", "").replace(".", "")
    line_model = line.model(has_linear_background=has_linear_background, has_tails=has_tails, prefix=prefix)
    line_model.shortname = line_name
    return line_model

models(has_linear_background=False, has_tails=False, vary_Hlike_amp_ratio=False, additional_Helike_complex_lines=[])

Generates some commonly used HCI line models that can be used for energy calibration, etc.

Parameters:
  • has_linear_background (bool, default: False ) –

    include a single linear background on top of the 2 Lorentzians, by default False

  • has_tails (bool, default: False ) –

    nclude low-energy tail in the model, by default False

  • vary_Hlike_amp_ratio (bool, default: False ) –

    allow the ratio of the J=3/2 to J=1/2 H-like states to vary, by default False

  • additional_Helike_complex_lines (list, default: [] ) –

    additional line names to include inHe-like complex model, e.g. low level Li/Be-like features, by default []

Returns:
  • _type_

    Dictionary of mass2.calibration.fluorescence_lines.SpectralLine objects, containing commonly used HCI lines.

Source code in mass2/calibration/hci_models.py
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
def models(
    has_linear_background: bool = False,
    has_tails: bool = False,
    vary_Hlike_amp_ratio: bool = False,
    additional_Helike_complex_lines: list = [],
) -> dict:
    """Generates some commonly used HCI line models that can be used for energy calibration, etc.

    Parameters
    ----------
    has_linear_background : bool, optional
        include a single linear background on top of the 2 Lorentzians, by default False
    has_tails : bool, optional
        nclude low-energy tail in the model, by default False
    vary_Hlike_amp_ratio : bool, optional
        allow the ratio of the J=3/2 to J=1/2 H-like states to vary, by default False
    additional_Helike_complex_lines : list, optional
        additional line names to include inHe-like complex model, e.g. low level Li/Be-like features, by default []

    Returns
    -------
    _type_
        Dictionary of mass2.calibration.fluorescence_lines.SpectralLine objects, containing commonly used HCI lines.
    """

    models_dict = {}
    # Make some common H-like 2P* models
    conf_Hlike_2P_dict = {}
    conf_Hlike_2P_dict["N"] = ["3p", "4p", "5p"]
    conf_Hlike_2P_dict["O"] = ["3p", "4p", "5p"]
    conf_Hlike_2P_dict["Ne"] = ["2p", "3p", "4p", "5p"]
    conf_Hlike_2P_dict["Ar"] = ["2p", "3p", "4p", "5p"]
    for i_element in list(conf_Hlike_2P_dict.keys()):
        for i_conf in conf_Hlike_2P_dict[i_element]:
            Hlike_model = initialize_HLike_2P_model(
                i_element,
                i_conf,
                has_linear_background=has_linear_background,
                has_tails=has_tails,
                vary_amp_ratio=vary_Hlike_amp_ratio,
            )
            models_dict[Hlike_model._name] = Hlike_model

    # Make some common He-like 1s2s,2p complex and higher order 1p* models
    # He-like lines
    Helike_complex_elements = ["N", "O", "Ne", "Ar"]
    for i_element in Helike_complex_elements:
        Helike_model = initialize_HeLike_complex_model(
            i_element,
            has_linear_background=has_linear_background,
            has_tails=has_tails,
            additional_line_names=additional_Helike_complex_lines,
        )
        models_dict[Helike_model._name] = Helike_model
    # 1s.np 1P* lines for n>=3
    conf_Helike_1P_dict = {}
    conf_Helike_1P_dict["N"] = ["1s.4p", "1s.5p"]
    conf_Helike_1P_dict["O"] = ["1s.4p", "1s.5p"]
    conf_Helike_1P_dict["Ne"] = ["1s.3p", "1s.4p", "1s.5p"]
    conf_Helike_1P_dict["Ar"] = ["1s.3p", "1s.4p", "1s.5p"]
    for i_element in list(conf_Helike_1P_dict.keys()):
        i_charge = int(xraydb.atomic_number(i_element) - 1)
        for i_conf in conf_Helike_1P_dict[i_element]:
            Helike_line_name = f"{i_element}{i_charge} {i_conf} 1P* J=1"
            Helike_model = initialize_hci_line_model(
                Helike_line_name, has_linear_background=has_linear_background, has_tails=has_tails
            )
            models_dict[Helike_model._name] = Helike_model

    # Some more complicated cases
    # 500 eV region of H-/He-like N
    N6_1s3p_model = initialize_hci_line_model("N6 1s.3p 1P* J=1", has_linear_background=False, has_tails=has_tails)
    N7_2p_model = initialize_HLike_2P_model(
        "N", "2p", has_linear_background=False, has_tails=has_tails, vary_amp_ratio=vary_Hlike_amp_ratio
    )
    N_500eV_model = initialize_hci_composite_model(
        "N 500eV Region",
        [N6_1s3p_model, N7_2p_model],
        has_linear_background=has_linear_background,
        peak_component_name="N7 2p 2P* J=3/2",
    )
    models_dict[N_500eV_model._name] = N_500eV_model
    # 660 eV region of H-/He-like O
    O8_2p_model = initialize_HLike_2P_model(
        "O", "2p", has_linear_background=False, has_tails=has_tails, vary_amp_ratio=vary_Hlike_amp_ratio
    )
    O7_1s3p_model = initialize_hci_line_model("O7 1s.3p 1P* J=1", has_linear_background=False, has_tails=has_tails)
    O_660eV_model = initialize_hci_composite_model(
        "O 660eV Region",
        [O8_2p_model, O7_1s3p_model],
        has_linear_background=has_linear_background,
        peak_component_name="O8 2p 2P* J=3/2",
    )
    models_dict[O_660eV_model._name] = O_660eV_model

    return models_dict

Bookkeeping

import_asd.py

Tool for converting a NIST ASD levels sql dump into a pickle file

February 2020 Paul Szypryt

parseLine(energyLevelsDict, fieldNamesDict, formattedLine)

Parse a line from the ASD sql dump and add it to the energyLevelsDict

Parameters:
  • energyLevelsDict (dict[str, dict[int, dict[str, list[float]]]]) –

    description

  • fieldNamesDict (dict[str, Any]) –

    description

  • formattedLine (str) –

    description

Source code in mass2/calibration/import_asd.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def parseLine(
    energyLevelsDict: dict[str, dict[int, dict[str, list[float]]]], fieldNamesDict: dict[str, Any], formattedLine: str
) -> None:
    """Parse a line from the ASD sql dump and add it to the energyLevelsDict

    Parameters
    ----------
    energyLevelsDict : dict[str, dict[int, dict[str, list[float]]]]
        _description_
    fieldNamesDict : dict[str, Any]
        _description_
    formattedLine : str
        _description_
    """
    lineAsArray = np.array(ast.literal_eval(formattedLine))
    for iEntry in lineAsArray:
        element = iEntry[fieldNamesDict["ASD_Levels"].index("element")]
        spectr_charge = int(iEntry[fieldNamesDict["ASD_Levels"].index("spectr_charge")])
        # Pull information that will be used to name dictionary keys
        conf = iEntry[fieldNamesDict["ASD_Levels"].index("conf")]
        term = iEntry[fieldNamesDict["ASD_Levels"].index("term")]
        j_val = iEntry[fieldNamesDict["ASD_Levels"].index("j_val")]
        # Pull energy and uncertainty
        energy = iEntry[fieldNamesDict["ASD_Levels"].index("energy")]  # cm^-1, str
        unc = iEntry[fieldNamesDict["ASD_Levels"].index("unc")]  # cm^-1, str
        try:
            energy_inv_cm = float(energy)  # cm^-1
        except ValueError:
            energy_inv_cm = np.nan
        try:
            unc_inv_cm = float(unc)  # cm^-1
        except ValueError:
            unc_inv_cm = np.nan
        if conf and term and term != "*":
            # Set up upper level dictionary
            if element not in energyLevelsDict.keys():
                energyLevelsDict[element] = {}
            if spectr_charge not in energyLevelsDict[element].keys():
                energyLevelsDict[element][spectr_charge] = {}
            levelName = f"{conf} {term} J={j_val}"
            energyLevelsDict[element][spectr_charge][levelName] = [energy_inv_cm, unc_inv_cm]

write_asd_pickle(inputFilename, outputFilename)

Write the levels from a NIST Atomic Spectra Database SQL dump to a pickle file

Parameters:
  • inputFilename (str) –

    The ASD's sql dump file name

  • outputFilename (str) –

    The pickle file name to write the output dictionary to

Source code in mass2/calibration/import_asd.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def write_asd_pickle(inputFilename: str, outputFilename: str) -> None:
    """Write the levels from a NIST Atomic Spectra Database SQL dump to a pickle file

    Parameters
    ----------
    inputFilename : str
        The ASD's sql dump file name
    outputFilename : str
        The pickle file name to write the output dictionary to
    """
    createTableString = "CREATE TABLE"
    valueSearchString = r"\`([^\`]*)\`"
    tableName = ""
    fieldNamesDict: dict[str, Any] = {}
    energyLevelsDict: dict[str, dict[int, dict[str, list[float]]]] = {}
    with open(inputFilename, "r", encoding="utf-8") as ASD_file:
        for line in ASD_file:
            # Create dictionary of field names for various tables
            if line.startswith(createTableString):
                match = re.search(valueSearchString, line)
                if match is not None:
                    fieldNamesDict[match.groups()[0]] = []
            elif tableName and line.strip().startswith("`"):
                match = re.search(valueSearchString, line)
                if match is not None:
                    fieldNamesDict[tableName].append(match.groups()[0])
            # Parse Levels portion
            elif line.startswith("INSERT INTO `ASD_Levels` VALUES"):
                partitionedLine = line.partition(" VALUES ")[-1].strip()
                nullReplacedLine = partitionedLine.replace("NULL", "''")
                formattedLine = nullReplacedLine
                if nullReplacedLine[-1] == ";":
                    formattedLine = nullReplacedLine[:-1]
                parseLine(energyLevelsDict, fieldNamesDict, formattedLine)

    # Sort levels within an element/charge state by energy
    outputDict: dict[str, dict[int, dict[str, list[float]]]] = {}
    for iElement, element in energyLevelsDict.items():
        for iCharge, chargestate in element.items():
            energyOrder = np.argsort(np.array(list(chargestate.values()))[:, 0])
            orderedKeys = np.array(list(chargestate.keys()))[energyOrder]
            orderedValues = np.array(list(chargestate.values()))[energyOrder]
            for i, iKey in enumerate(list(orderedKeys)):
                if iElement not in outputDict.keys():
                    outputDict[iElement] = {}
                if iCharge not in outputDict[iElement].keys():
                    outputDict[iElement][iCharge] = {}
                outputDict[iElement][iCharge][str(iKey)] = orderedValues[i].tolist()

    # Write dict to pickle file
    with open(outputFilename, "wb") as handle:
        pickle.dump(outputDict, handle, protocol=2)

nist_xray_database

Download the NIST x-ray line database from the website, and parse the downloaded data into useable form.

For loading a file (locally, from disk) and plotting some information: * NISTXrayDBFile * plot_line_uncertainties

For updating the data files: * NISTXrayDBRetrieve * GetAllLines

Basic usage (assuming you put the x-ray files in ${MASS_HOME}/mass2/calibration/nist_xray_data.dat):

J. Fowler, NIST February 2014

NISTXrayDBFile

A NIST X-ray database file, loaded from disk.

Source code in mass2/calibration/nist_xray_database.py
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
class NISTXrayDBFile:
    """A NIST X-ray database file, loaded from disk."""

    DEFAULT_FILENAMES = "nist_xray_data.dat", "low_z_xray_data.dat"

    def __init__(self, *filenames: str):
        """Initialize the database from 1 or more <filenames>, which point to
        files downloaded using NISTXrayDBRetrieve. If the list is empty (the
        default), then the file named by self.DEFAULT_FILENAME will be used."""

        self.lines = {}
        self.alllines = set()

        if not filenames:
            path = os.path.split(__file__)[0]
            filenames = tuple([os.path.join(path, df) for df in self.DEFAULT_FILENAMES])

        self.loaded_filenames = []
        for filename in filenames:
            try:
                fp = open(filename, "r", encoding="utf-8")
            except OSError:
                print(f"'{filename}' is not a readable file with X-ray database info! Continuing...")
                continue

            while True:
                line = fp.readline()
                if "Theory" in line and "Blend" in line and "Ref." in line:
                    break

            for textline in fp.readlines():
                try:
                    xrayline = NISTXrayLine(textline)
                    self.lines[xrayline.name] = xrayline
                    self.alllines.add(xrayline)
                except Exception:
                    continue

            self.loaded_filenames.append(filename)
            fp.close()

    LINE_NICKNAMES = {
        "KA1": "KL3",
        "KA2": "KL2",
        "KB1": "KM3",
        "KB3": "KM2",
        "KB5": "KM5",
        "LA1": "L3M5",
        "LA2": "L3M4",
        "Ll": "L3M1",
        "LB3": "L1M3",
        "LB1": "L2M4",
        "LB2": "L3N5",
        "LG1": "L2N4",
    }

    def get_lines_by_type(self, linetype: str) -> tuple["NISTXrayLine"]:
        """Return a tuple containing all lines of a certain type, e.g., "KL3".
        See self.LINE_NICKNAMES for some known line "nicknames"."""
        linetype = linetype.upper()
        if "ALPHA" in linetype:
            linetype = linetype.replace("ALPHA", "A")
        elif "BETA" in linetype:
            linetype = linetype.replace("BETA", "B")
        elif "GAMMA" in linetype:
            linetype = linetype.replace("GAMMA", "G")
        linetype = self.LINE_NICKNAMES.get(linetype, linetype)
        lines = []
        for element in ELEMENTS:
            linename = f"{element} {linetype}"
            if linename in self.lines:
                lines.append(self.lines[linename])
        return tuple(lines)

    def __getitem__(self, key: str) -> "NISTXrayLine":
        """Get a line by its full name, e.g., "Fe KL3", or by a nickname, e.g., "Fe Kalpha1".

        Parameters
        ----------
        key : str
            The line name or nickname

        Returns
        -------
        NISTXrayLine
            The matching NISTXrayLine object

        Raises
        ------
        KeyError
            If not found
        """
        element, line = key.split()[:2]
        element = element.capitalize()
        line = line.upper()
        key = f"{element} {line}"
        if key in self.lines:
            return self.lines[key]
        lcline = line.lower()
        lcline = lcline.replace("alpha", "a")
        lcline = lcline.replace("beta", "b")
        lcline = lcline.replace("gamma", "g")
        if lcline in self.LINE_NICKNAMES:
            key = f"{element} {self.LINE_NICKNAMES[lcline]}"
            return self.lines[key]
        raise KeyError(f"{key} is not a known line or line nickname")

__getitem__(key)

Get a line by its full name, e.g., "Fe KL3", or by a nickname, e.g., "Fe Kalpha1".

Parameters:
  • key (str) –

    The line name or nickname

Returns:
Raises:
  • KeyError

    If not found

Source code in mass2/calibration/nist_xray_database.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
def __getitem__(self, key: str) -> "NISTXrayLine":
    """Get a line by its full name, e.g., "Fe KL3", or by a nickname, e.g., "Fe Kalpha1".

    Parameters
    ----------
    key : str
        The line name or nickname

    Returns
    -------
    NISTXrayLine
        The matching NISTXrayLine object

    Raises
    ------
    KeyError
        If not found
    """
    element, line = key.split()[:2]
    element = element.capitalize()
    line = line.upper()
    key = f"{element} {line}"
    if key in self.lines:
        return self.lines[key]
    lcline = line.lower()
    lcline = lcline.replace("alpha", "a")
    lcline = lcline.replace("beta", "b")
    lcline = lcline.replace("gamma", "g")
    if lcline in self.LINE_NICKNAMES:
        key = f"{element} {self.LINE_NICKNAMES[lcline]}"
        return self.lines[key]
    raise KeyError(f"{key} is not a known line or line nickname")

__init__(*filenames)

Initialize the database from 1 or more , which point to files downloaded using NISTXrayDBRetrieve. If the list is empty (the default), then the file named by self.DEFAULT_FILENAME will be used.

Source code in mass2/calibration/nist_xray_database.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def __init__(self, *filenames: str):
    """Initialize the database from 1 or more <filenames>, which point to
    files downloaded using NISTXrayDBRetrieve. If the list is empty (the
    default), then the file named by self.DEFAULT_FILENAME will be used."""

    self.lines = {}
    self.alllines = set()

    if not filenames:
        path = os.path.split(__file__)[0]
        filenames = tuple([os.path.join(path, df) for df in self.DEFAULT_FILENAMES])

    self.loaded_filenames = []
    for filename in filenames:
        try:
            fp = open(filename, "r", encoding="utf-8")
        except OSError:
            print(f"'{filename}' is not a readable file with X-ray database info! Continuing...")
            continue

        while True:
            line = fp.readline()
            if "Theory" in line and "Blend" in line and "Ref." in line:
                break

        for textline in fp.readlines():
            try:
                xrayline = NISTXrayLine(textline)
                self.lines[xrayline.name] = xrayline
                self.alllines.add(xrayline)
            except Exception:
                continue

        self.loaded_filenames.append(filename)
        fp.close()

get_lines_by_type(linetype)

Return a tuple containing all lines of a certain type, e.g., "KL3". See self.LINE_NICKNAMES for some known line "nicknames".

Source code in mass2/calibration/nist_xray_database.py
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
def get_lines_by_type(self, linetype: str) -> tuple["NISTXrayLine"]:
    """Return a tuple containing all lines of a certain type, e.g., "KL3".
    See self.LINE_NICKNAMES for some known line "nicknames"."""
    linetype = linetype.upper()
    if "ALPHA" in linetype:
        linetype = linetype.replace("ALPHA", "A")
    elif "BETA" in linetype:
        linetype = linetype.replace("BETA", "B")
    elif "GAMMA" in linetype:
        linetype = linetype.replace("GAMMA", "G")
    linetype = self.LINE_NICKNAMES.get(linetype, linetype)
    lines = []
    for element in ELEMENTS:
        linename = f"{element} {linetype}"
        if linename in self.lines:
            lines.append(self.lines[linename])
    return tuple(lines)

NISTXrayLine

A single line from the NIST X-ray database.

Source code in mass2/calibration/nist_xray_database.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
class NISTXrayLine:
    """A single line from the NIST X-ray database."""

    DEFAULT_COLUMN_DEFS = {
        "element": (1, 4),
        "transition": (10, 16),
        "peak": (45, 59),
        "peak_unc": (61, 72),
        "blend": (74, 79),
        "ref": (81, 91),
    }

    def __init__(self, textline: str, column_defs: dict[str, tuple[int, int]] | None = None):
        """Initialize a NISTXrayLine from a line of text found in the NIST x-ray database file.

        Parameters
        ----------
        textline : str
            The text line from the database file
        column_defs : dict[str, tuple[int, int]] | None, optional
            The column boundaries of the relevant data, by default None
        """
        self.element = ""
        self.transition = ""
        self.peak = 0.0
        self.peak_unc = 0.0
        self.blend = ""
        self.ref = ""
        if column_defs is None:
            column_defs = self.DEFAULT_COLUMN_DEFS
        for name, colrange in column_defs.items():
            a = colrange[0] - 1
            b = colrange[1]
            self.__dict__[name] = textline[a:b].rstrip()
        self.peak = float(self.peak)
        self.peak_unc = float(self.peak_unc)
        self.name = f"{self.element} {self.transition}"
        self.raw = textline.rstrip()

    def __str__(self) -> str:
        """The user-friendly string representation of the line"""
        return f"{self.element} {self.transition} line: {self.peak:.3f} +- {self.peak_unc:.3f} eV"

    def __repr__(self) -> str:
        "The code representation of the line"
        return self.raw

__init__(textline, column_defs=None)

Initialize a NISTXrayLine from a line of text found in the NIST x-ray database file.

Parameters:
  • textline (str) –

    The text line from the database file

  • column_defs (dict[str, tuple[int, int]] | None, default: None ) –

    The column boundaries of the relevant data, by default None

Source code in mass2/calibration/nist_xray_database.py
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
def __init__(self, textline: str, column_defs: dict[str, tuple[int, int]] | None = None):
    """Initialize a NISTXrayLine from a line of text found in the NIST x-ray database file.

    Parameters
    ----------
    textline : str
        The text line from the database file
    column_defs : dict[str, tuple[int, int]] | None, optional
        The column boundaries of the relevant data, by default None
    """
    self.element = ""
    self.transition = ""
    self.peak = 0.0
    self.peak_unc = 0.0
    self.blend = ""
    self.ref = ""
    if column_defs is None:
        column_defs = self.DEFAULT_COLUMN_DEFS
    for name, colrange in column_defs.items():
        a = colrange[0] - 1
        b = colrange[1]
        self.__dict__[name] = textline[a:b].rstrip()
    self.peak = float(self.peak)
    self.peak_unc = float(self.peak_unc)
    self.name = f"{self.element} {self.transition}"
    self.raw = textline.rstrip()

__repr__()

The code representation of the line

Source code in mass2/calibration/nist_xray_database.py
286
287
288
def __repr__(self) -> str:
    "The code representation of the line"
    return self.raw

__str__()

The user-friendly string representation of the line

Source code in mass2/calibration/nist_xray_database.py
282
283
284
def __str__(self) -> str:
    """The user-friendly string representation of the line"""
    return f"{self.element} {self.transition} line: {self.peak:.3f} +- {self.peak_unc:.3f} eV"

plot_line_energies()

Plot the energies of some common families of lines from the NIST X-ray database.

Source code in mass2/calibration/nist_xray_database.py
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
def plot_line_energies() -> None:
    """Plot the energies of some common families of lines from the NIST X-ray database."""
    db = NISTXrayDBFile()
    plt.clf()
    cm = plt.cm.nipy_spectral
    transitions = (
        "KL2",
        "KL3",
        "KM5",
        "KM3",
        "KM2",
        "L3M5",
        "L3M4",
        "L3M1",
        "L2M4",
        "L2N4",
        "L3N5",
        "L1M3",
        "L3N7",
        "M5N7",
        "M5N6",
        "M4N6",
        "M3N5",
        "M3N4",
    )
    for i, linetype in enumerate(transitions):
        lines = db.get_lines_by_type(linetype)
        z = [ATOMIC_NUMBERS[line.element] for line in lines]
        e = [line.peak for line in lines]
        plt.loglog(z, e, "o-", color=cm(float(i) / len(transitions)), label=linetype)
    plt.legend(loc="upper left")
    plt.xlim([6, 100])
    plt.grid()
    r = list(range(6, 22)) + list(range(22, 43, 2)) + list(range(45, 75, 3)) + list(range(75, 100, 5))
    plt.xticks(r, ["\n".join([ELEMENTS[i], str(i)]) for i in r])

plot_line_uncertainties()

Plot the uncertainties of some common families of lines from the NIST X-ray database.

Source code in mass2/calibration/nist_xray_database.py
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
def plot_line_uncertainties() -> None:
    """Plot the uncertainties of some common families of lines from the NIST X-ray database."""
    db = NISTXrayDBFile()
    transitions = ("KL3", "KL2", "KM3", "KM5", "L3M5", "L3M4", "L2M4", "L3N5", "L2N4", "L1M3", "L3N7", "L3M1")
    titles = {
        "KL3": "K$\\alpha_1$: Intense",
        "KL2": "K$\\alpha_2$: Intense, but not easily resolved",
        "KM3": "K$\\beta_1$: Intense",
        "KM2": "K$\\beta_3$: Intense, usually unresolvable",
        "KM5": "K$\\beta_5$: Weak line on high-E tail of K$\\beta_1$",
        "L3M5": "L$\\alpha_1$: Prominent",
        "L3M4": "L$\\alpha_2$: Small satellite",
        "L2M4": "L$\\beta_1$: Prominent",
        "L3N5": "L$\\beta_2$: Prominent",
        "L2N4": "K$\\gamma_1$: Weaker",
        "L1M3": "L$\\beta_3$: Weaker",
        "L3N7": "Lu: barely visible",
        "L3M1": "L$\\ell$: very weak",
    }

    axes = {}
    NX, NY = 3, 4
    plt.clf()
    for i, tr in enumerate(transitions):
        axes[i] = plt.subplot(NY, NX, i + 1)
        plt.loglog()
        plt.grid(True)
        plt.title(titles[tr])
        if i >= NX * (NY - 1):
            plt.xlabel("Line energy (eV)")
        if i % NX == 0:
            plt.ylabel("Line uncertainty (eV)")
        plt.ylim([1e-3, 10])
        plt.xlim([100, 3e4])

    for line in db.lines.values():
        if line.transition not in transitions:
            continue
        i = transitions.index(line.transition)
        plt.sca(axes[i])
        plt.plot(line.peak, line.peak_unc, "or")
        plt.text(line.peak, line.peak_unc, line.name)

Materials transmission

Models for X-ray filter and detector efficiency.

Filter dataclass

Represent a single material layer in a FilterStack

Source code in mass2/materials/efficiency_models.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
@dataclass(frozen=True)
class Filter:
    """Represent a single material layer in a FilterStack"""

    name: str
    material: NDArray
    atomic_number: NDArray
    density_g_per_cm3: NDArray[np.float64]
    thickness_cm: NDArray[np.float64]
    fill_fraction: Variable = ufloat(1.0, 1e-8)
    absorber: bool = False

    def get_efficiency(self, xray_energies_eV: ArrayLike, uncertain: bool = False) -> NDArray:
        """Return the efficiency of this Filter at the given x-ray energies."""
        optical_depth = np.vstack([
            xraydb.material_mu(m, xray_energies_eV, density=d) * t
            for (m, d, t) in zip(self.material, self.density_g_per_cm3, self.thickness_cm)
        ])
        individual_transmittance = unp.exp(-optical_depth)
        transmittance = np.prod(individual_transmittance, axis=0)
        if self.absorber:
            efficiency = (1.0 - transmittance) * self.fill_fraction
        else:
            efficiency = (transmittance * self.fill_fraction) + (1.0 - self.fill_fraction)
        if uncertain:
            return efficiency
        else:
            return unp.nominal_values(efficiency)

    def __repr__(self) -> str:
        """Return a string representation of the Filter object."""
        s = f"{type(self)}("
        for material, density, thick in zip(self.material, self.density_g_per_cm3, self.thickness_cm):
            area_density = density * thick
            s += f"{material} {area_density:.3g} g/cm^2, "
        s += f"fill_fraction={self.fill_fraction:.3f}, absorber={self.absorber})"
        return s

    @classmethod
    def newfilter(
        cls,
        name: str,
        material: ArrayLike,
        area_density_g_per_cm2: ArrayLike | None = None,
        thickness_nm: ArrayLike | None = None,
        density_g_per_cm3: ArrayLike | None = None,
        fill_fraction: Variable = ufloat(1, 1e-8),
        absorber: bool = False,
    ) -> "Filter":
        """Create a Filter from the given parameters, filling in defaults as needed."""
        material = np.array(material, ndmin=1)
        atomic_number = np.array([xraydb.atomic_number(iMaterial) for iMaterial in material], ndmin=1)
        fill_fraction = ensure_uncertain(fill_fraction)

        # Save density, either default values for that element, or the given density.
        if density_g_per_cm3 is None:
            density_g_per_cm3 = np.array([xraydb.atomic_density(int(iAtomicNumber)) for iAtomicNumber in atomic_number], ndmin=1)
        else:
            density_g_per_cm3 = np.array(density_g_per_cm3, ndmin=1)
            assert len(material) == len(density_g_per_cm3)

        # Handle input value of areal density or thickness, but not both.
        assert np.logical_xor(area_density_g_per_cm2 is None, thickness_nm is None), (
            "must specify either areal density or thickness, not both"
        )
        if thickness_nm is not None:
            thickness_cm = np.array(thickness_nm, ndmin=1) * 1e-7
        elif area_density_g_per_cm2 is not None:
            area_density_g_per_cm2 = np.array(area_density_g_per_cm2, ndmin=1)
            thickness_cm = area_density_g_per_cm2 / density_g_per_cm3
            if np.ndim == 0:
                thickness_cm = np.array(thickness_cm, ndmin=1)
        else:
            raise ValueError("must specify either areal density or thickness, not both")
        thickness_cm = ensure_uncertain(thickness_cm)
        assert len(thickness_cm) >= 1

        return cls(name, material, atomic_number, density_g_per_cm3, thickness_cm, fill_fraction, absorber)

__repr__()

Return a string representation of the Filter object.

Source code in mass2/materials/efficiency_models.py
133
134
135
136
137
138
139
140
def __repr__(self) -> str:
    """Return a string representation of the Filter object."""
    s = f"{type(self)}("
    for material, density, thick in zip(self.material, self.density_g_per_cm3, self.thickness_cm):
        area_density = density * thick
        s += f"{material} {area_density:.3g} g/cm^2, "
    s += f"fill_fraction={self.fill_fraction:.3f}, absorber={self.absorber})"
    return s

get_efficiency(xray_energies_eV, uncertain=False)

Return the efficiency of this Filter at the given x-ray energies.

Source code in mass2/materials/efficiency_models.py
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def get_efficiency(self, xray_energies_eV: ArrayLike, uncertain: bool = False) -> NDArray:
    """Return the efficiency of this Filter at the given x-ray energies."""
    optical_depth = np.vstack([
        xraydb.material_mu(m, xray_energies_eV, density=d) * t
        for (m, d, t) in zip(self.material, self.density_g_per_cm3, self.thickness_cm)
    ])
    individual_transmittance = unp.exp(-optical_depth)
    transmittance = np.prod(individual_transmittance, axis=0)
    if self.absorber:
        efficiency = (1.0 - transmittance) * self.fill_fraction
    else:
        efficiency = (transmittance * self.fill_fraction) + (1.0 - self.fill_fraction)
    if uncertain:
        return efficiency
    else:
        return unp.nominal_values(efficiency)

newfilter(name, material, area_density_g_per_cm2=None, thickness_nm=None, density_g_per_cm3=None, fill_fraction=ufloat(1, 1e-08), absorber=False) classmethod

Create a Filter from the given parameters, filling in defaults as needed.

Source code in mass2/materials/efficiency_models.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
@classmethod
def newfilter(
    cls,
    name: str,
    material: ArrayLike,
    area_density_g_per_cm2: ArrayLike | None = None,
    thickness_nm: ArrayLike | None = None,
    density_g_per_cm3: ArrayLike | None = None,
    fill_fraction: Variable = ufloat(1, 1e-8),
    absorber: bool = False,
) -> "Filter":
    """Create a Filter from the given parameters, filling in defaults as needed."""
    material = np.array(material, ndmin=1)
    atomic_number = np.array([xraydb.atomic_number(iMaterial) for iMaterial in material], ndmin=1)
    fill_fraction = ensure_uncertain(fill_fraction)

    # Save density, either default values for that element, or the given density.
    if density_g_per_cm3 is None:
        density_g_per_cm3 = np.array([xraydb.atomic_density(int(iAtomicNumber)) for iAtomicNumber in atomic_number], ndmin=1)
    else:
        density_g_per_cm3 = np.array(density_g_per_cm3, ndmin=1)
        assert len(material) == len(density_g_per_cm3)

    # Handle input value of areal density or thickness, but not both.
    assert np.logical_xor(area_density_g_per_cm2 is None, thickness_nm is None), (
        "must specify either areal density or thickness, not both"
    )
    if thickness_nm is not None:
        thickness_cm = np.array(thickness_nm, ndmin=1) * 1e-7
    elif area_density_g_per_cm2 is not None:
        area_density_g_per_cm2 = np.array(area_density_g_per_cm2, ndmin=1)
        thickness_cm = area_density_g_per_cm2 / density_g_per_cm3
        if np.ndim == 0:
            thickness_cm = np.array(thickness_cm, ndmin=1)
    else:
        raise ValueError("must specify either areal density or thickness, not both")
    thickness_cm = ensure_uncertain(thickness_cm)
    assert len(thickness_cm) >= 1

    return cls(name, material, atomic_number, density_g_per_cm3, thickness_cm, fill_fraction, absorber)

FilterStack dataclass

Represent a sequence of named materials

Source code in mass2/materials/efficiency_models.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
@dataclass()
class FilterStack:
    """Represent a sequence of named materials"""

    name: str
    components: list["Filter | FilterStack"] = field(default_factory=list)

    def add(self, film: "Filter | FilterStack") -> None:
        """Add a Filter or FilterStack to this FilterStack."""
        self.components.append(film)

    def add_filter(
        self,
        name: str,
        material: str,
        area_density_g_per_cm2: float | None = None,
        thickness_nm: float | None = None,
        density_g_per_cm3: float | None = None,
        fill_fraction: Variable = ufloat(1, 1e-8),
        absorber: bool = False,
    ) -> None:
        """Create and add a Filter layer to this FilterStack."""
        self.add(
            Filter.newfilter(
                name,
                material,
                area_density_g_per_cm2=area_density_g_per_cm2,
                thickness_nm=thickness_nm,
                density_g_per_cm3=density_g_per_cm3,
                fill_fraction=fill_fraction,
                absorber=absorber,
            )
        )

    def get_efficiency(self, xray_energies_eV: ArrayLike, uncertain: bool = False) -> NDArray:
        """Return the overall efficiency of this FilterStack at the given x-ray energies."""
        assert len(self.components) > 0, f"{self.name} has no components of which to calculate efficiency"
        individual_efficiency = np.array([
            iComponent.get_efficiency(xray_energies_eV, uncertain=uncertain) for iComponent in self.components
        ])
        efficiency = np.prod(individual_efficiency, axis=0)
        if uncertain:
            return efficiency
        else:
            return unp.nominal_values(efficiency)

    def __call__(self, xray_energies_eV: ArrayLike, uncertain: bool = False) -> NDArray:
        """Equivalent to get_efficiency."""
        return self.get_efficiency(xray_energies_eV, uncertain=uncertain)

    def plot_efficiency(self, xray_energies_eV: ArrayLike, ax: plt.Axes | None = None) -> None:
        """Plot the efficiency of this FilterStack and its components."""
        efficiency = unp.nominal_values(self.get_efficiency(xray_energies_eV))
        if ax is None:
            fig = plt.figure()
            ax = fig.add_subplot(111)

        ax.plot(xray_energies_eV, efficiency * 100.0, label="total", lw=2)
        ax.set_xlabel("Energy (keV)")
        ax.set_ylabel("Efficiency (%)")
        ax.set_title(self.name)
        ax.set_title(f"{self.name} Efficiency")

        for v in self.components:
            efficiency = v.get_efficiency(xray_energies_eV)
            ax.plot(xray_energies_eV, efficiency * 100.0, "--", label=v.name)

        ax.legend()

    def __repr__(self) -> str:
        """Return a string representation of the FilterStack object."""
        s = f"{type(self)}(\n"
        for v in self.components:
            s += f"{v.name}: {v}\n"
        s += ")"
        return s

__call__(xray_energies_eV, uncertain=False)

Equivalent to get_efficiency.

Source code in mass2/materials/efficiency_models.py
72
73
74
def __call__(self, xray_energies_eV: ArrayLike, uncertain: bool = False) -> NDArray:
    """Equivalent to get_efficiency."""
    return self.get_efficiency(xray_energies_eV, uncertain=uncertain)

__repr__()

Return a string representation of the FilterStack object.

Source code in mass2/materials/efficiency_models.py
 95
 96
 97
 98
 99
100
101
def __repr__(self) -> str:
    """Return a string representation of the FilterStack object."""
    s = f"{type(self)}(\n"
    for v in self.components:
        s += f"{v.name}: {v}\n"
    s += ")"
    return s

add(film)

Add a Filter or FilterStack to this FilterStack.

Source code in mass2/materials/efficiency_models.py
33
34
35
def add(self, film: "Filter | FilterStack") -> None:
    """Add a Filter or FilterStack to this FilterStack."""
    self.components.append(film)

add_filter(name, material, area_density_g_per_cm2=None, thickness_nm=None, density_g_per_cm3=None, fill_fraction=ufloat(1, 1e-08), absorber=False)

Create and add a Filter layer to this FilterStack.

Source code in mass2/materials/efficiency_models.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def add_filter(
    self,
    name: str,
    material: str,
    area_density_g_per_cm2: float | None = None,
    thickness_nm: float | None = None,
    density_g_per_cm3: float | None = None,
    fill_fraction: Variable = ufloat(1, 1e-8),
    absorber: bool = False,
) -> None:
    """Create and add a Filter layer to this FilterStack."""
    self.add(
        Filter.newfilter(
            name,
            material,
            area_density_g_per_cm2=area_density_g_per_cm2,
            thickness_nm=thickness_nm,
            density_g_per_cm3=density_g_per_cm3,
            fill_fraction=fill_fraction,
            absorber=absorber,
        )
    )

get_efficiency(xray_energies_eV, uncertain=False)

Return the overall efficiency of this FilterStack at the given x-ray energies.

Source code in mass2/materials/efficiency_models.py
60
61
62
63
64
65
66
67
68
69
70
def get_efficiency(self, xray_energies_eV: ArrayLike, uncertain: bool = False) -> NDArray:
    """Return the overall efficiency of this FilterStack at the given x-ray energies."""
    assert len(self.components) > 0, f"{self.name} has no components of which to calculate efficiency"
    individual_efficiency = np.array([
        iComponent.get_efficiency(xray_energies_eV, uncertain=uncertain) for iComponent in self.components
    ])
    efficiency = np.prod(individual_efficiency, axis=0)
    if uncertain:
        return efficiency
    else:
        return unp.nominal_values(efficiency)

plot_efficiency(xray_energies_eV, ax=None)

Plot the efficiency of this FilterStack and its components.

Source code in mass2/materials/efficiency_models.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def plot_efficiency(self, xray_energies_eV: ArrayLike, ax: plt.Axes | None = None) -> None:
    """Plot the efficiency of this FilterStack and its components."""
    efficiency = unp.nominal_values(self.get_efficiency(xray_energies_eV))
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)

    ax.plot(xray_energies_eV, efficiency * 100.0, label="total", lw=2)
    ax.set_xlabel("Energy (keV)")
    ax.set_ylabel("Efficiency (%)")
    ax.set_title(self.name)
    ax.set_title(f"{self.name} Efficiency")

    for v in self.components:
        efficiency = v.get_efficiency(xray_energies_eV)
        ax.plot(xray_energies_eV, efficiency * 100.0, "--", label=v.name)

    ax.legend()

AlFilmWithOxide(name, Al_thickness_nm, Al_density_g_per_cm3=None, num_oxidized_surfaces=2, oxide_density_g_per_cm3=None)

Create a Filter made of an alumninum film with oxides on one or both surfaces

Args: name: name given to filter object, e.g. '50K Filter'. Al_thickness_nm: thickness, in nm, of Al film Al_density_g_per_cm3: Al film density, in g/cm3, defaults to xraydb value num_oxidized_surfaces: Number of film surfaces that contain a native oxide, default 2 oxide_density_g_per_cm3: Al2O3 oxide density, in g/cm3, defaults to bulk xraydb value

Source code in mass2/materials/efficiency_models.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def AlFilmWithOxide(
    name: str,
    Al_thickness_nm: float,
    Al_density_g_per_cm3: float | None = None,
    num_oxidized_surfaces: int = 2,
    oxide_density_g_per_cm3: ArrayLike | None = None,
) -> Filter:
    """Create a Filter made of an alumninum film with oxides on one or both surfaces

    Args:
        name: name given to filter object, e.g. '50K Filter'.
        Al_thickness_nm: thickness, in nm, of Al film
        Al_density_g_per_cm3: Al film density, in g/cm3, defaults to xraydb value
        num_oxidized_surfaces: Number of film surfaces that contain a native oxide, default 2
        oxide_density_g_per_cm3: Al2O3 oxide density, in g/cm3, defaults to bulk xraydb value
    """
    assert num_oxidized_surfaces in {1, 2}, "only 1 or 2 oxidzed surfaces allowed"
    if Al_density_g_per_cm3 is None:
        Al_density_g_per_cm3 = float(xraydb.atomic_density("Al"))
    arbE = 5000.0  # an arbitrary energy (5 keV) is used to get answers from material_mu_components()
    oxide_dict = xraydb.material_mu_components("sapphire", arbE)
    oxide_material = oxide_dict["elements"]
    oxide_mass_fractions = [oxide_dict[x][0] * oxide_dict[x][1] / oxide_dict["mass"] for x in oxide_material]

    # Assume oxidized surfaces are each 3 nm thick.
    num_oxide_elements = len(oxide_material)
    oxide_thickness_nm = np.repeat(num_oxidized_surfaces * 3.0, num_oxide_elements)
    if oxide_density_g_per_cm3 is None:
        oxide_density_g_per_cm3 = np.repeat(oxide_dict["density"], num_oxide_elements)
    else:
        oxide_density_g_per_cm3 = np.asarray(oxide_density_g_per_cm3)

    material = np.hstack(["Al", oxide_material])
    density_g_per_cm3 = np.hstack([Al_density_g_per_cm3, oxide_density_g_per_cm3 * oxide_mass_fractions])
    thickness_nm = np.hstack([Al_thickness_nm, oxide_thickness_nm])
    return Filter.newfilter(name, material, thickness_nm=thickness_nm, density_g_per_cm3=density_g_per_cm3)

AlFilmWithPolymer(name, Al_thickness_nm, polymer_thickness_nm, Al_density_g_per_cm3=None, num_oxidized_surfaces=1, oxide_density_g_per_cm3=None, polymer_density_g_per_cm3=None)

Create a Filter made of an alumninum film with polymer backing

Args: name: name given to filter object, e.g. '50K Filter'. Al_thickness_nm: thickness, in nm, of Al film polymer_thickness_nm: thickness, in nm, of filter backside polymer Al_density_g_per_cm3: Al film density, in g/cm3, defaults to xraydb value num_oxidized_surfaces: Number of film surfaces that contain a native oxide, default 2 oxide_density_g_per_cm3: Al2O3 oxide density, in g/cm3, defaults to bulk xraydb value polymer_density_g_per_cm3: Polymer density, in g/cm3, defaults to Kapton

Source code in mass2/materials/efficiency_models.py
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
def AlFilmWithPolymer(
    name: str,
    Al_thickness_nm: float,
    polymer_thickness_nm: float,
    Al_density_g_per_cm3: float | None = None,
    num_oxidized_surfaces: int = 1,
    oxide_density_g_per_cm3: float | None = None,
    polymer_density_g_per_cm3: float | None = None,
) -> Filter:
    """Create a Filter made of an alumninum film with polymer backing

    Args:
        name: name given to filter object, e.g. '50K Filter'.
        Al_thickness_nm: thickness, in nm, of Al film
        polymer_thickness_nm: thickness, in nm, of filter backside polymer
        Al_density_g_per_cm3: Al film density, in g/cm3, defaults to xraydb value
        num_oxidized_surfaces: Number of film surfaces that contain a native oxide, default 2
        oxide_density_g_per_cm3: Al2O3 oxide density, in g/cm3, defaults to bulk xraydb value
        polymer_density_g_per_cm3: Polymer density, in g/cm3, defaults to Kapton
    """
    assert num_oxidized_surfaces in {1, 2}, "only 1 or 2 oxidzed surfaces allowed"
    if Al_density_g_per_cm3 is None:
        Al_density_g_per_cm3 = xraydb.atomic_density("Al")

    arbE = 5000.0  # an arbitrary energy (5 keV) is used to get answers from material_mu_components()
    oxide_dict = xraydb.material_mu_components("sapphire", arbE)
    oxide_thickness_nm = num_oxidized_surfaces * 3.0  # assume 3 nm per oxidized surface
    oxide_material = oxide_dict["elements"]
    oxide_mass_fractions = np.array([oxide_dict[x][0] * oxide_dict[x][1] / oxide_dict["mass"] for x in oxide_material])
    if oxide_density_g_per_cm3 is None:
        oxide_density_g_per_cm3 = oxide_dict["density"] * np.ones(len(oxide_material))

    polymer_dict = xraydb.material_mu_components("kapton", arbE)
    polymer_material = polymer_dict["elements"]
    polymer_thickness_nm_array = np.ones(len(polymer_material)) * polymer_thickness_nm
    polymer_mass_fractions = np.array([polymer_dict[x][0] * polymer_dict[x][1] / polymer_dict["mass"] for x in polymer_material])
    if polymer_density_g_per_cm3 is None:
        polymer_density_g_per_cm3 = polymer_dict["density"] * np.ones(len(polymer_material))

    material = np.hstack(["Al", oxide_material, polymer_material])
    density_g_per_cm3 = np.hstack([
        [Al_density_g_per_cm3],
        oxide_density_g_per_cm3 * oxide_mass_fractions,
        polymer_density_g_per_cm3 * polymer_mass_fractions,
    ])
    thickness_nm = np.hstack([Al_thickness_nm, oxide_thickness_nm, polymer_thickness_nm_array])

    return Filter.newfilter(name=name, material=material, thickness_nm=thickness_nm, density_g_per_cm3=density_g_per_cm3)

LEX_HT(name)

Create an Al film with polymer and stainless steel backing.

Models the LEX-HT vacuum window.

Args: name: name given to filter object, e.g. '50K Filter'.

Source code in mass2/materials/efficiency_models.py
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
def LEX_HT(name: str) -> FilterStack:
    """Create an Al film with polymer and stainless steel backing.

    Models the LEX-HT vacuum window.

    Args:
        name: name given to filter object, e.g. '50K Filter'.
    """
    # Set up Al + polyimide film
    film_material = ["C", "H", "N", "O", "Al"]
    film_area_density_g_per_cm2_given = np.array([6.7e-5, 2.6e-6, 7.2e-6, 1.7e-5, 1.7e-5])
    film_area_density_g_per_cm2 = with_fractional_uncertainty(film_area_density_g_per_cm2_given, 0.03)
    film1 = Filter.newfilter(name="LEX_HT Film", material=film_material, area_density_g_per_cm2=film_area_density_g_per_cm2)
    # Set up mesh
    mesh_material = ["Fe", "Cr", "Ni", "Mn", "Si"]
    mesh_thickness = 100.0e-4  # cm
    mesh_density = 8.0  # g/cm^3
    mesh_material_fractions = np.array([0.705, 0.19, 0.09, 0.01, 0.005])  # fraction by weight
    mesh_area_density_g_per_cm2_scalar = mesh_material_fractions * mesh_density * mesh_thickness  # g/cm^2
    mesh_area_density_g_per_cm2 = with_fractional_uncertainty(mesh_area_density_g_per_cm2_scalar, 0.02)
    mesh_fill_fraction = ufloat(0.19, 0.01)
    film2 = Filter.newfilter(
        name="LEX_HT Mesh",
        material=mesh_material,
        area_density_g_per_cm2=mesh_area_density_g_per_cm2,
        fill_fraction=mesh_fill_fraction,
    )
    stack = FilterStack(name)
    stack.add(film1)
    stack.add(film2)
    return stack

get_filter_stacks_dict()

Create a dictionary with a few examples of FilterStack objects

Returns:
  • dict

    A dictionary of named FilterStacks

Source code in mass2/materials/efficiency_models.py
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
def get_filter_stacks_dict() -> dict[str, FilterStack]:
    """Create a dictionary with a few examples of FilterStack objects

    Returns
    -------
    dict
        A dictionary of named FilterStacks
    """
    fs_dict: dict[str, FilterStack] = {}

    # EBIT Instrument
    EBIT_filter_stack = FilterStack(name="EBIT 2018")
    EBIT_filter_stack.add_filter(
        name="Electroplated Au Absorber", material="Au", thickness_nm=with_fractional_uncertainty(965.5, 0.03), absorber=True
    )

    EBIT_filter_stack.add(AlFilmWithOxide(name="50mK Filter", Al_thickness_nm=with_fractional_uncertainty(112.5, 0.02)))
    EBIT_filter_stack.add(AlFilmWithOxide(name="3K Filter", Al_thickness_nm=with_fractional_uncertainty(108.5, 0.02)))

    filter_50K = FilterStack(name="50K Filter")
    filter_50K.add(AlFilmWithOxide(name="Al Film", Al_thickness_nm=with_fractional_uncertainty(102.6, 0.02)))
    nickel = Filter.newfilter(name="Ni Mesh", material="Ni", thickness_nm=ufloat(15.0e3, 2e3), fill_fraction=ufloat(0.17, 0.01))
    filter_50K.add(nickel)
    EBIT_filter_stack.add(filter_50K)
    luxel1 = LEX_HT("Luxel Window TES")
    luxel2 = LEX_HT("Luxel Window EBIT")
    EBIT_filter_stack.add(luxel1)
    EBIT_filter_stack.add(luxel2)
    fs_dict[EBIT_filter_stack.name] = EBIT_filter_stack

    # RAVEN Instrument
    RAVEN1_fs = FilterStack(name="RAVEN1 2019")
    RAVEN1_fs.add_filter(name="Evaporated Bi Absorber", material="Bi", thickness_nm=4.4e3, absorber=True)
    RAVEN1_fs.add(AlFilmWithPolymer(name="50mK Filter", Al_thickness_nm=108.4, polymer_thickness_nm=206.4))
    RAVEN1_fs.add(AlFilmWithPolymer(name="3K Filter", Al_thickness_nm=108.4, polymer_thickness_nm=206.4))
    RAVEN1_fs.add(AlFilmWithOxide(name="50K Filter", Al_thickness_nm=1.0e3))
    RAVEN1_fs.add_filter(name="Be TES Vacuum Window", material="Be", thickness_nm=200.0e3)
    RAVEN1_fs.add(AlFilmWithOxide(name="e- Filter", Al_thickness_nm=5.0e3))
    RAVEN1_fs.add_filter(name="Be SEM Vacuum Window", material="Be", thickness_nm=200.0e3)
    fs_dict[RAVEN1_fs.name] = RAVEN1_fs

    # Horton spring 2018, for metrology campaign.
    Horton_filter_stack = FilterStack(name="Horton 2018")
    Horton_filter_stack.add_filter(name="Electroplated Au Absorber", material="Au", thickness_nm=965.5, absorber=True)
    Horton_filter_stack.add(AlFilmWithOxide(name="50mK Filter", Al_thickness_nm=5000))
    Horton_filter_stack.add(AlFilmWithOxide(name="3K Filter", Al_thickness_nm=5000))
    Horton_filter_stack.add(AlFilmWithOxide(name="50K Filter", Al_thickness_nm=12700))
    Horton_filter_stack.add(LEX_HT("Luxel Window TES"))
    fs_dict[Horton_filter_stack.name] = Horton_filter_stack

    return fs_dict

Math and statistics functions

These live in mass2.mathstat.

Entropy

entropy.py

Estimates of the distribution entropy computed using kernel-density estimates of the distribution.

  • laplace_entropy(x, w=1.0) - Compute the entropy H(p) of data set x where the kernel used to estimate p from x is the Laplace kernel k(x) \propto exp(-abs(x-x0)/w).
  • laplace_cross_entropy(x, y, w=1.0) - Compute the cross entropy of q from p, where q and p are the kernel-density estimates taken from data set y and data set x, and where the kernel is the Laplace kernel.
  • KL_divergence(x, y, w=1.0) - Compute the Kullback-Leibler Divergence of data set y from x, where the kernel is the Laplace kernel.

The K-L divergence of Q(x) from P(x) is defined as the integral over the full x domain of P(x) log[P(x)/Q(x)].

This equals the cross-entropy H(P,Q) - H(P). Note that cross-entropy and K-L divergence are not symmetric with respect to reversal of x and y.

laplace_KL_divergence(x, y, w=1.0, approx_mode='size')

Compute the Kullback-Leibler divergence of data set y from data set x.

Use kernel-density estimation, where the kernel is the Laplace kernel k(x) \propto exp(-abs(x-x0)/w).

The approx_mode can be one of: exact The exact integral is computed (can take ~0.25 sec per 10^6 values). approx The integral is approximated by histogramming the data, smoothing that, and using Simpson's rule on the PDF samples that result. size Uses "approx" if len(x)+len(y)>200000, or "exact" otherwise.

Source code in mass2/mathstat/entropy.py
254
255
256
257
258
259
260
261
262
263
264
265
266
def laplace_KL_divergence(x: ArrayLike, y: ArrayLike, w: float = 1.0, approx_mode: str = "size") -> float:
    r"""Compute the Kullback-Leibler divergence of data set `y` from data set `x`.

    Use kernel-density estimation, where the kernel is the Laplace kernel
    k(x) \propto exp(-abs(x-x0)/w).

    The `approx_mode` can be one of:
    ``exact``  The exact integral is computed (can take ~0.25 sec per 10^6 values).
    ``approx`` The integral is approximated by histogramming the data, smoothing
               that, and using Simpson's rule on the PDF samples that result.
    ``size``   Uses "approx" if len(x)+len(y)>200000, or "exact" otherwise.
    """
    return laplace_cross_entropy(x, y, w, approx_mode=approx_mode) - laplace_entropy(x, w, approx_mode=approx_mode)

laplace_cross_entropy(x, y, w=1.0, approx_mode='size')

laplace_cross_entropy(x, y, w: float = 1.0, approx_mode="size")

Compute the cross-entropy of data set x from data set y, where the kernel for x is the Laplace kernel k(x) \propto exp(-abs(x-x0)/w).

The kernel for the y data is the piecewise-constant (top-hat) kernel. We choose this because a Laplace kernel for y led to possible divergences when the y-distribtion q is exceedingly small, but the x-distribution p nevertheless is non-zero because of a random x-value lying far from any random y-values. The constant kernel is given a non-zero floor value, so that q is never so small as to make any x-value impossible.

Args: x (array): One vector of data. y (array): The other vector of data. w (double): The width (exponential scale length) of the Laplace distribution to be used in kernel-density estimation. approx_mode (string): How to balance execution speed and accuracy (default "size").

The approx_mode can be one of: exact The exact integral is computed (can take ~0.25 sec per 10^6 values). approx The integral is approximated by histogramming the data, smoothing that, and using Simpson's rule on the PDF samples that result. size Uses "approx" if len(x)+len(y)>200000, or "exact" otherwise.

Source code in mass2/mathstat/entropy.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
def laplace_cross_entropy(x: ArrayLike, y: ArrayLike, w: float = 1.0, approx_mode: str = "size") -> float:
    r"""`laplace_cross_entropy(x, y, w: float = 1.0, approx_mode="size")`

    Compute the cross-entropy of data set `x` from data set `y`, where the
    kernel for x is the Laplace kernel k(x) \propto exp(-abs(x-x0)/w).

    The kernel for the y data is the piecewise-constant (top-hat) kernel. We choose this
    because a Laplace kernel for y led to possible divergences when the y-distribtion q
    is exceedingly small, but the x-distribution p nevertheless is non-zero because of a
    random x-value lying far from any random y-values. The constant kernel is given a
    non-zero floor value, so that q is never so small as to make any x-value impossible.

    Args:
        x (array): One vector of data.
        y (array): The other vector of data.
        w (double): The width (exponential scale length) of the Laplace distribution
            to be used in kernel-density estimation.
        approx_mode (string): How to balance execution speed and accuracy
            (default "size").

    The `approx_mode` can be one of:
    ``exact``  The exact integral is computed (can take ~0.25 sec per 10^6 values).
    ``approx`` The integral is approximated by histogramming the data, smoothing
               that, and using Simpson's rule on the PDF samples that result.
    ``size``   Uses "approx" if len(x)+len(y)>200000, or "exact" otherwise.
    """
    if w <= 0.0:
        raise ValueError("laplace_cross_entropy(x, y, w) needs `w>0`.")
    x = np.asarray(x)
    y = np.asarray(y)
    Nx = len(x)
    Ny = len(y)
    if Nx == 0 or Ny == 0:
        raise ValueError("laplace_cross_entropy(x, y) needs at least 1 element apiece in `x` and `y`.")

    if approx_mode == "size":
        if Nx + Ny <= 200000:
            approx_mode = "exact"
        else:
            approx_mode = "approx"
    if approx_mode.startswith("exact"):
        xsorted = np.asarray(np.sort(x) / w, dtype=DTYPE)
        ysorted = np.asarray(np.sort(y) / w, dtype=DTYPE)
        return laplace_cross_entropy_arrays(xsorted, ysorted) + np.log(w)
    else:
        return laplace_cross_entropy_approx(np.asarray(x, dtype=DTYPE), np.asarray(y, dtype=DTYPE), w)

laplace_cross_entropy_approx(x, y, w=1.0)

Approximate the cross-entropy H(P, Q) between two empirical distributions P and Q, where P is estimated from data x and Q from data y using Laplace kernel-density estimation and binned histograms.

This method uses histograms and convolution with a Laplace kernel to estimate the probability distributions, then computes the cross-entropy using numerical integration.

Parameters:
  • x (ArrayLike) –

    Data points for the P distribution.

  • y (ArrayLike) –

    Data points for the Q distribution.

  • w (float, default: 1.0 ) –

    The width (exponential scale length) of the Laplace kernel, by default 1.0.

Returns:
  • float

    The approximate cross-entropy H(P, Q) between the two distributions.

Notes
  • This is an approximate method, suitable for large data sets.
  • The Laplace kernel is defined as k(x) ∝ exp(-abs(x-x0)/w).
  • Uses Simpson's rule for numerical integration.
Source code in mass2/mathstat/entropy.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
def laplace_cross_entropy_approx(x: ArrayLike, y: ArrayLike, w: float = 1.0) -> float:
    """
    Approximate the cross-entropy H(P, Q) between two empirical distributions P and Q,
    where P is estimated from data `x` and Q from data `y` using Laplace kernel-density
    estimation and binned histograms.

    This method uses histograms and convolution with a Laplace kernel to estimate the
    probability distributions, then computes the cross-entropy using numerical integration.

    Parameters
    ----------
    x : ArrayLike
        Data points for the P distribution.
    y : ArrayLike
        Data points for the Q distribution.
    w : float, optional
        The width (exponential scale length) of the Laplace kernel, by default 1.0.

    Returns
    -------
    float
        The approximate cross-entropy H(P, Q) between the two distributions.

    Notes
    -----
    - This is an approximate method, suitable for large data sets.
    - The Laplace kernel is defined as k(x) ∝ exp(-abs(x-x0)/w).
    - Uses Simpson's rule for numerical integration.
    """
    EXTEND_DATA = 5 * w
    BINS_PER_W = 20
    KERNEL_WIDTH_IN_WS = 15.0

    xmin = min(np.min(x), np.min(y)) - EXTEND_DATA
    xmax = max(np.max(x), np.max(y)) + EXTEND_DATA
    nbins = int(0.5 + (xmax - xmin) * BINS_PER_W / w)
    cx, b = np.histogram(x, nbins, (xmin, xmax))
    cy, b = np.histogram(y, nbins, (xmin, xmax))
    db = b[1] - b[0]
    nx = int(0.5 + KERNEL_WIDTH_IN_WS * w / db)

    kernel = np.zeros(2 * nx + 1)
    for i in range(2 * nx + 1):
        kx = (i - nx) * db
        kernel[i] = np.exp(-abs(kx / w))

    # kde = unnormalized kernel-density estimator.
    kde = sp.signal.fftconvolve(cx, kernel, mode="full")[nx:-nx]
    kde[kde < kernel.min()] = kernel.min()

    # p = normalized probability distribution.
    norm = 1.0 / sp.integrate.simpson(kde, dx=db)
    p = kde * norm

    kde = sp.signal.fftconvolve(cy, kernel, mode="full")[nx:-nx]
    kde[kde < kernel.min()] = kernel.min()
    norm = 1.0 / sp.integrate.simpson(kde, dx=db)
    q = kde * norm
    return -sp.integrate.simpson(p * np.log(q), dx=db)

laplace_cross_entropy_arrays(x, y)

Compute the cross-entropy H(P, Q) between two empirical distributions P and Q, where P is estimated from data x using a Laplace kernel, and Q is estimated from data y using a piecewise-constant (top-hat) kernel.

This function assumes both x and y are sorted and scaled by the kernel width. The cross-entropy is computed exactly by integrating over all points where the estimated densities change due to the presence of data points in x or y.

Parameters:
  • x (ArrayLike) –

    Sorted array of data points for the P distribution, scaled by kernel width.

  • y (ArrayLike) –

    Sorted array of data points for the Q distribution, scaled by kernel width.

Returns:
  • float

    The exact cross-entropy H(P, Q) between the two distributions.

Notes
  • The Laplace kernel is defined as k(x) ∝ exp(-abs(x-x0)/w).
  • The Q distribution uses a top-hat kernel with a nonzero floor to avoid divergences.
  • This function is intended for internal use; see laplace_cross_entropy for the public API.
Source code in mass2/mathstat/entropy.py
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
def laplace_cross_entropy_arrays(x: ArrayLike, y: ArrayLike) -> float:  # noqa: PLR0914
    """Compute the cross-entropy H(P, Q) between two empirical distributions P and Q,
    where P is estimated from data `x` using a Laplace kernel, and Q is estimated
    from data `y` using a piecewise-constant (top-hat) kernel.

    This function assumes both `x` and `y` are sorted and scaled by the kernel width.
    The cross-entropy is computed exactly by integrating over all points where the
    estimated densities change due to the presence of data points in `x` or `y`.

    Parameters
    ----------
    x : ArrayLike
        Sorted array of data points for the P distribution, scaled by kernel width.
    y : ArrayLike
        Sorted array of data points for the Q distribution, scaled by kernel width.

    Returns
    -------
    float
        The exact cross-entropy H(P, Q) between the two distributions.

    Notes
    -----
    - The Laplace kernel is defined as k(x) ∝ exp(-abs(x-x0)/w).
    - The Q distribution uses a top-hat kernel with a nonzero floor to avoid divergences.
    - This function is intended for internal use; see `laplace_cross_entropy` for the public API.
    """
    # List of all places where q(u) increases or decreases because of a y-point.
    Qstepwidth = 2 * np.sqrt(6)
    ynodes, qstep_is_up = _merge_orderedlists(y - 0.5 * Qstepwidth, y + 0.5 * Qstepwidth)

    # List of all places where p(u) or q(u) changes because of an x- a y-point.
    nodes, isx = _merge_orderedlists(x, ynodes)

    x = np.asarray(x)
    y = np.asarray(y)
    Nx = len(x)
    Ny = len(y)
    N = Nx + Ny * 2

    # Pretend q(u) is never lower than this value, and spread this probability across
    # the range 10 less than the lowest to 10 more than the highest node.
    Qmin_sum = 1.0 / np.sqrt(Ny + 3)
    Qmin = Qmin_sum / (nodes[-1] + 10 - (nodes[0] - 10))
    Qstep = (1.0 - Qmin_sum) / (Ny * Qstepwidth)

    # Initialize the vectors decayfactor, c, and d.
    decayfactor = np.zeros(N, dtype=DTYPE)
    for i in range(1, N):
        decayfactor[i] = np.exp(nodes[i - 1] - nodes[i])

    # c requires a left-right pass over all nodes.
    c = np.zeros(N, dtype=DTYPE)
    stepX = 1.0 / (2 * Nx)
    j = 0
    if isx[0]:
        c[0] = stepX
    else:
        j = 1
    for i in range(1, N):
        factor = decayfactor[i]
        c[i] = factor * c[i - 1]
        if isx[i]:
            c[i] += stepX

    # d requires a right-left pass over all nodes.
    d = np.zeros(N, dtype=DTYPE)
    if isx[N - 1]:
        d[N - 1] = stepX
    for i in range(N - 2, -1, -1):
        factor = decayfactor[i + 1]
        d[i] = factor * d[i + 1]
        if isx[i]:
            d[i] += stepX

    # Now a left-right pass over all nodes to compute the H integral.
    net_up_qsteps = 0
    if not isx[0]:
        net_up_qsteps = 1

    H = -d[0] * np.log(Qmin)  # H due to the open first interval [-inf, nodes[0]]
    for i in range(1, N):
        factor = decayfactor[i]
        q = Qmin + Qstep * net_up_qsteps
        H -= (c[i - 1] + d[i]) * (1 - factor) * np.log(q)

        if not isx[i]:
            if qstep_is_up[j]:
                net_up_qsteps += 1
            else:
                net_up_qsteps -= 1
            j += 1
    H -= c[-1] * np.log(Qmin)  # H due to the open last interval [nodes[-1], +inf]
    return H

laplace_entropy(x_in, w=1.0, approx_mode='size')

Compute the entropy of data set x where the kernel is the Laplace kernel, $k(x) \propto$ exp(-abs(x-x0)/w).

Parameters:
  • x_in (ArrayLike) –

    The vector of data of which we want the entropy.

  • w (float, default: 1.0 ) –

    The width (exponential scale length) of the Laplace distribution to be used in kernel-density estimation, by default 1.0

  • approx_mode (str, default: 'size' ) –

    How to balance execution speed and accuracy, by default "size" The approx_mode can be one of: exact The exact integral is computed (can take ~0.25 sec per 10^6 values). approx The integral is approximated by histogramming the data, smoothing that, and using Simpson's rule on the PDF samples that result. size Uses "approx" if len(x)>200000, or "exact" otherwise.

Returns:
  • float

    The Laplace-kernel entropy.

Raises:
  • ValueError

    If the input array x has no values, or w is not positive.

Source code in mass2/mathstat/entropy.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def laplace_entropy(x_in: ArrayLike, w: float = 1.0, approx_mode: str = "size") -> float:
    """Compute the entropy of data set `x` where the kernel is the Laplace kernel,
    $k(x) \\propto$ exp(-abs(x-x0)/w).

    Parameters
    ----------
    x_in : ArrayLike
        The vector of data of which we want the entropy.
    w : float, optional
        The width (exponential scale length) of the Laplace distribution
            to be used in kernel-density estimation, by default 1.0
    approx_mode : str, optional
        How to balance execution speed and accuracy, by default "size"
        The `approx_mode` can be one of:
        ``exact``  The exact integral is computed (can take ~0.25 sec per 10^6 values).
        ``approx`` The integral is approximated by histogramming the data, smoothing
                that, and using Simpson's rule on the PDF samples that result.
        ``size``   Uses "approx" if len(x)>200000, or "exact" otherwise.

    Returns
    -------
    float
        The Laplace-kernel entropy.

    Raises
    ------
    ValueError
        If the input array `x` has no values, or `w` is not positive.
    """
    x_in = np.asarray(x_in)
    N = len(x_in)
    if N == 0:
        raise ValueError("laplace_entropy(x) needs at least 1 element in `x`.")
    if w <= 0.0:
        raise ValueError("laplace_entropy(x, w) needs `w>0`.")
    x = np.asarray(x_in, dtype=DTYPE)

    if approx_mode == "size":
        if N <= 200000:
            approx_mode = "exact"
        else:
            approx_mode = "approx"
    if approx_mode.startswith("exact"):
        return laplace_entropy_array(x, w)
    else:
        return laplace_entropy_approx(x, w)

laplace_entropy_approx(x, w=1.0)

Approximage the entropy of data set x with a binned histogram and the Laplace-distribution kernel-density estimator of the probability distribtion.

Parameters:
  • x (ArrayLike) –

    The vector of data of which we want the entropy.

  • w (float, default: 1.0 ) –

    The width (exponential scale length) of the Laplace distribution to be used in kernel-density estimation, by default 1.0

Returns:
  • float

    The approximate Laplace-kernel entropy.

Source code in mass2/mathstat/entropy.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def laplace_entropy_approx(x: ArrayLike, w: float = 1.0) -> float:
    """Approximage the entropy of data set `x` with a binned histogram and the Laplace-distribution
    kernel-density estimator of the probability distribtion.

    Parameters
    ----------
    x : ArrayLike
        The vector of data of which we want the entropy.
    w : float, optional
        The width (exponential scale length) of the Laplace distribution
            to be used in kernel-density estimation, by default 1.0
    Returns
    -------
    float
        The approximate Laplace-kernel entropy.
    """
    EXTEND_DATA = 5 * w
    BINS_PER_W = 20
    KERNEL_WIDTH_IN_WS = 15.0

    xmin = np.min(x) - EXTEND_DATA
    xmax = np.max(x) + EXTEND_DATA
    nbins = int(0.5 + (xmax - xmin) * BINS_PER_W / w)
    c, b = np.histogram(x, nbins, (xmin, xmax))
    db = b[1] - b[0]
    nx = int(0.5 + KERNEL_WIDTH_IN_WS * w / db)

    kernel = np.zeros(2 * nx + 1)
    for i in range(2 * nx + 1):
        kx = (i - nx) * db
        kernel[i] = np.exp(-np.abs(kx / w))

    # kde = unnormalized kernel-density estimator.
    kde = sp.signal.fftconvolve(c, kernel, mode="full")[nx:-nx]
    minkern = kernel.min()
    kde[kde < minkern] = minkern

    # p = normalized probability distribution.
    norm = 1.0 / sp.integrate.simpson(kde, dx=db)
    p = kde * norm
    return -sp.integrate.simpson(p * np.log(p), dx=db)

laplace_entropy_array(x, w=1.0)

Compute the entropy of data set x where the kernel is the Laplace kernel, $k(x) \propto$ exp(-abs(x-x0)/w).

Parameters:
  • x (ArrayLike) –

    The vector of data of which we want the entropy.

  • w (float, default: 1.0 ) –

    The width (exponential scale length) of the Laplace distribution to be used in kernel-density estimation, by default 1.0

Returns:
  • float

    The exact Laplace-kernel entropy, regardless of the input array size.

Source code in mass2/mathstat/entropy.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
@njit
def laplace_entropy_array(x: ArrayLike, w: float = 1.0) -> float:
    """Compute the entropy of data set `x` where the kernel is the Laplace kernel,
    $k(x) \\propto$ exp(-abs(x-x0)/w).

    Parameters
    ----------
    x : ArrayLike
        The vector of data of which we want the entropy.
    w : float, optional
        The width (exponential scale length) of the Laplace distribution
            to be used in kernel-density estimation, by default 1.0
    Returns
    -------
    float
        The exact Laplace-kernel entropy, regardless of the input array size.
    """
    x = np.asarray(x)
    N = len(x)
    c = np.zeros(N, dtype=DTYPE)
    d = np.zeros(N, dtype=DTYPE)
    y = np.sort(x) / w

    e = np.exp(-np.diff(y))
    stepsize = 1.0 / (2 * w * N)
    c[0] = stepsize
    for i in range(1, N):
        c[i] = e[i - 1] * c[i - 1] + stepsize
    d[N - 1] = stepsize
    for i in range(N - 2, -1, -1):
        d[i] = e[i] * d[i + 1] + stepsize

    H = w * d[0] * (1 - np.log(d[0])) + w * c[N - 1] * (1 - np.log(c[N - 1]))
    for i in range(N - 1):
        dp = d[i + 1] * e[i]
        r1 = c[i] / d[i + 1]
        e2 = np.sqrt(e[i])
        H += 4 * w * np.sqrt(c[i] * dp) * np.atan((e2 - 1.0 / e2) * r1**0.5 / (r1 + 1.0))
        H += w * (dp - c[i]) * (np.log(c[i] + dp) - 1)
        A, B = d[i + 1], c[i] * e[i]
        H -= w * (A - B) * (np.log(A + B) - 1)
    return H

Fitting

mass2.mathstat.fitting

Model-fitting utilities.

Joe Fowler, NIST

fit_kink_model(x, y, kbounds=None)

Find the linear least-squares solution for a kinked-linear model.

The model is f(x) = a+b(x-k) for x=k, where the 4 parameters are {k,a,b,c}, representing the kink at (x,y)=(k,a) and slopes of b and c for x= k.

Given k, the model is linear in the other parameters, which can thus be found exactly by linear algebra. The best value of k is found by use of the Bounded method of the sp.optimize.minimize_scalar() routine.

Parameters:
  • x (ArrayLike) –

    The input data x-values

  • y (ArrayLike) –

    The input data y-values

  • kbounds (Optional[tuple[float, float]], default: None ) –

    Bounds on k, by default None. If (u,v), then the minimize_scalar is used to find the best k strictly in u<=k<=v. If None, then use the Brent method, which will start with (b1,b2) as a search bracket where b1 and b2 are the 2nd lowest and 2nd highest values of x.

Returns:
  • model_y, abc, X2) where:

    model_y : NDArray[float] an array of the model y-values; kabc : NDArray[float] the best-fit values of the kink location and the 3 linear parameters; X2 : float is the sum of square differences between y and model_y.

Raises:
  • ValueError

    if k doesn't satisfy x.min() < k < x.max()

Examples:

x = np.arange(10, dtype=float) y = np.array(x) truek = 4.6 y[x>truek] = truek y += np.random.default_rng().standard_normal(len(x)).15 model, (kbest,a,b,c), X2 = fit_kink_model(x, y, kbounds=(3,6)) plt.clf() plt.plot(x, y, "or", label="Noisy data to be fit") xi = np.linspace(x[0], kbest, 200) xj = np.linspace(kbest, x[-1], 200) plt.plot(xi, a+b(xi-kbest), "--k", label="Best-fit kinked model") plt.plot(xj, a+c*(xj-kbest), "--k") plt.legend()

Source code in mass2/mathstat/fitting.py
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def fit_kink_model(x: ArrayLike, y: ArrayLike, kbounds: tuple[float, float] | None = None) -> tuple[NDArray, NDArray, float]:
    """Find the linear least-squares solution for a kinked-linear model.

    The model is f(x) = a+b(x-k) for x<k and f(x)=a+c(x-k) for x>=k, where
    the 4 parameters are {k,a,b,c}, representing the kink at (x,y)=(k,a) and
    slopes of b and c for x<k and x>= k.

    Given k, the model is linear in the other parameters, which can thus be
    found exactly by linear algebra. The best value of k is found by use of
    the Bounded method of the sp.optimize.minimize_scalar() routine.

    Parameters
    ----------
    x : ArrayLike
        The input data x-values
    y : ArrayLike
        The input data y-values
    kbounds : Optional[tuple[float, float]], optional
        Bounds on k, by default None.
        If (u,v), then the minimize_scalar is used to find the best k strictly in u<=k<=v.
        If None, then use the Brent method, which will start with (b1,b2) as a search bracket
        where b1 and b2 are the 2nd lowest and 2nd highest values of x.

    Returns
    -------
    model_y, abc, X2) where:
        model_y : NDArray[float]
            an array of the model y-values;
        kabc : NDArray[float]
            the best-fit values of the kink location and the 3 linear parameters;
        X2 : float
            is the sum of square differences between y and model_y.

    Raises
    ------
    ValueError
        if k doesn't satisfy x.min() < k < x.max()

    Examples
    --------
    x = np.arange(10, dtype=float)
    y = np.array(x)
    truek = 4.6
    y[x>truek] = truek
    y += np.random.default_rng().standard_normal(len(x))*.15
    model, (kbest,a,b,c), X2 = fit_kink_model(x, y, kbounds=(3,6))
    plt.clf()
    plt.plot(x, y, "or", label="Noisy data to be fit")
    xi = np.linspace(x[0], kbest, 200)
    xj = np.linspace(kbest, x[-1], 200)
    plt.plot(xi, a+b*(xi-kbest), "--k", label="Best-fit kinked model")
    plt.plot(xj, a+c*(xj-kbest), "--k")
    plt.legend()
    """
    x = np.asarray(x)
    y = np.asarray(y)

    def penalty(k: float, x: NDArray, y: NDArray) -> float:
        "Extract only the cost function from kink_model()"
        _, _, X2 = kink_model(k, x, y)
        return X2

    if kbounds is None:
        kbounds = (x.min(), x.max())
    elif kbounds[0] < x.min() or kbounds[1] > x.max():
        raise ValueError(f"kbounds ({kbounds}) must be within the range of x data")
    optimum = sp.optimize.minimize_scalar(penalty, args=(x, y), method="Bounded", bounds=kbounds)
    kbest = optimum.x
    model, abc, X2 = kink_model(kbest, x, y)
    return model, np.hstack([kbest, abc]), X2

kink_model(k, x, y)

Compute a kinked-linear model on data {x,y} with kink at x=k.

The model is f(x) = a+b(x-k) for x=k, where the 4 parameters are {k,a,b,c}, representing the kink at (x,y)=(k,a) and slopes of b and c for x= k.

For a fixed k, the model is linear in the other parameters, whose linear least-squares values can thus be found exactly by linear algebra. This function computes them.

Returns (model_y, (a,b,c), X2) where: model_y is an array of the model y-values; (a,b,c) are the best-fit values of the linear parameters; X2 is the sum of square differences between y and model_y.

Parameters:
  • k (float) –

    Location of the kink, in x coordinates

  • x (ArrayLike) –

    The input data x-values

  • y (ArrayLike) –

    The input data y-values

Returns:
  • model_y, abc, X2) where:

    model_y : NDArray[float] an array of the model y-values; abc : NDArray[float] the best-fit values of the linear parameters; X2 : float is the sum of square differences between y and model_y.

Raises:
  • ValueError

    if k doesn't satisfy x.min() < k < x.max()

Source code in mass2/mathstat/fitting.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def kink_model(k: float, x: ArrayLike, y: ArrayLike) -> tuple[NDArray, NDArray, float]:
    """Compute a kinked-linear model on data {x,y} with kink at x=k.

    The model is f(x) = a+b(x-k) for x<k and f(x)=a+c(x-k) for x>=k, where
    the 4 parameters are {k,a,b,c}, representing the kink at (x,y)=(k,a) and
    slopes of b and c for x<k and x>= k.

    For a fixed k, the model is linear in the other parameters, whose linear
    least-squares values can thus be found exactly by linear algebra. This
    function computes them.

    Returns (model_y, (a,b,c), X2) where:
    model_y is an array of the model y-values;
    (a,b,c) are the best-fit values of the linear parameters;
    X2 is the sum of square differences between y and model_y.

    Parameters
    ----------
    k : float
        Location of the kink, in x coordinates
    x : ArrayLike
        The input data x-values
    y : ArrayLike
        The input data y-values

    Returns
    -------
    model_y, abc, X2) where:
        model_y : NDArray[float]
            an array of the model y-values;
        abc : NDArray[float]
            the best-fit values of the linear parameters;
        X2 : float
            is the sum of square differences between y and model_y.

    Raises
    ------
    ValueError
        if k doesn't satisfy x.min() < k < x.max()
    """
    x = np.asarray(x)
    y = np.asarray(y)
    xi = x[x < k]
    yi = y[x < k]
    xj = x[x >= k]
    yj = y[x >= k]
    N = len(x)
    if len(xi) == 0 or len(xj) == 0:
        xmin = x.min()
        xmax = x.max()
        raise ValueError(f"k={k:g} should be in range [xmin,xmax], or [{xmin:g},{xmax:g}].")

    dxi = xi - k
    dxj = xj - k
    si = dxi.sum()
    sj = dxj.sum()
    si2 = (dxi**2).sum()
    sj2 = (dxj**2).sum()
    A = np.array([[N, si, sj], [si, si2, 0], [sj, 0, sj2]])
    v = np.array([y.sum(), (yi * dxi).sum(), (yj * dxj).sum()])
    abc = np.linalg.solve(A, v)
    model = np.hstack([abc[0] + abc[1] * dxi, abc[0] + abc[2] * dxj])
    X2 = ((model - y) ** 2).sum()
    return model, abc, X2

Interpolation

interpolate.py

Module mass2.mathstat.interpolate

Contains interpolations functions not readily available elsewhere.

CubicSpline - Perform an exact cubic spline through the data, with either specified slope at the end of the interval or 'natural boundary conditions' (y''=0 at ends).

GPRSpline - Create a smoothing spline based on the theory of Gaussian process regression. Finds the curvature penalty by maximizing the Bayesian marginal likelihood. Intended to supercede SmoothingSpline, but very similar. Differs in how the curvature and data fidelity are balanced.

SmoothingSpline - Create a smoothing spline that does not exactly interpolate the data, but finds the cubic spline with lowest "curvature energy" among all splines that meet the maximum allowed value of chi-squared.

SmoothingSplineLog - Create a SmoothingSpline using the log of the x,y points.

NaturalBsplineBasis - A tool for expressing a spline basis using B-splines but also enforcing 'natural boundary conditions'.

Joe Fowler, NIST Created Feb 2014

CubicSpline

An exact cubic spline, with either a specified slope or 'natural boundary conditions' (y''=0) at ends of interval.

Note that the interface is similar to scipy.interpolate.InterpolatedUnivariateSpline, but the behavior is different. The scipy version will remove the 2nd and 2nd-to-last data points from the set of knots as a way of using the 2 extra degrees of freedom. This class instead sets the 1st or 2nd derivatives at the end of the interval to use the extra degrees of freedom.

This code is inspired by section 3.3. of Numerical Recipes, 3rd Edition.

Usage: x=np.linspace(4,12,20) y=(x-6)**2+np.random.standard_normal(20) cs = mass2.CubicSpline(x, y) plt.clf() plt.plot(x,y,'ok') xa = np.linspace(0,16,200) plt.plot(xa, cs(xa), 'b-')

Source code in mass2/mathstat/interpolate.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
class CubicSpline:
    """An exact cubic spline, with either a specified slope or 'natural boundary
    conditions' (y''=0) at ends of interval.

    Note that the interface is similar to
    scipy.interpolate.InterpolatedUnivariateSpline, but the behavior is
    different. The scipy version will remove the 2nd and 2nd-to-last data points
    from the set of knots as a way of using the 2 extra degrees of freedom. This
    class instead sets the 1st or 2nd derivatives at the end of the interval to
    use the extra degrees of freedom.

    This code is inspired by section 3.3. of Numerical Recipes, 3rd Edition.

    Usage:
    x=np.linspace(4,12,20)
    y=(x-6)**2+np.random.standard_normal(20)
    cs = mass2.CubicSpline(x, y)
    plt.clf()
    plt.plot(x,y,'ok')
    xa = np.linspace(0,16,200)
    plt.plot(xa, cs(xa), 'b-')
    """

    def __init__(self, x: ArrayLike, y: ArrayLike, yprime1: float | None = None, yprimeN: float | None = None):
        """Create an exact cubic spline representation for the function y(x).

        Parameters
        ----------
        x : ArrayLike
            Indepdendent variable values. Will be sorted if not increasing.
        y : ArrayLike
            Dependent variable values.
        yprime1 : float | None, optional
            First derivative at the lower-x boundary, by default None
        yprimeN : float | None, optional
            First derivative at the upper-x boundary, by default None.
            slope of None means to use 'natural boundary conditions' by fixing the
            second derivative to zero at that boundary.
        """
        argsort = np.argsort(x)
        self._x = np.array(x, dtype=float)[argsort]
        self._y = np.array(y, dtype=float)[argsort]
        self._n = len(argsort)
        self._y2 = np.zeros(self._n, dtype=float)
        self.yprime1 = yprime1
        self.yprimeN = yprimeN
        self._compute_y2()

    def _compute_y2(self) -> None:
        """Compute the second derivatives at the knots."""
        self.ystep = self._y[1:] - self._y[:-1]
        self.xstep = self._x[1:] - self._x[:-1]

        u = self.ystep / self.xstep
        u[1:] -= u[:-1]

        # For natural boundary conditions, u[0]=y2[0]=0.
        if self.yprime1 is None:
            u[0] = 0
            self._y2[0] = 0
        else:
            u[0] = (3.0 / self.xstep[0]) * (self.ystep[0] / self.xstep[0] - self.yprime1)
            self._y2[0] = -0.5

        for i in range(1, self._n - 1):
            sig = self.xstep[i - 1] / (self._x[i + 1] - self._x[i - 1])
            p = sig * self._y2[i - 1] + 2.0
            self._y2[i] = (sig - 1.0) / p
            u[i] = (6 * u[i] / (self._x[i + 1] - self._x[i - 1]) - sig * u[i - 1]) / p

        # Again, the following is only for natural boundary conditions
        if self.yprimeN is None:
            qn = un = 0.0
        else:
            qn = 0.5
            un = (3.0 / self.xstep[-1]) * (self.yprimeN - self.ystep[-1] / self.xstep[-1])
        self._y2[self._n - 1] = (un - qn * u[self._n - 2]) / (qn * self._y2[self._n - 2] + 1.0)

        # Backsubstitution:
        for k in range(self._n - 2, -1, -1):
            self._y2[k] = self._y2[k] * self._y2[k + 1] + u[k]

        if self.yprime1 is None:
            self.yprime1 = self.ystep[0] / self.xstep[0] - self.xstep[0] * (self._y2[0] / 3.0 + self._y2[1] / 6.0)
        if self.yprimeN is None:
            self.yprimeN = self.ystep[-1] / self.xstep[-1] + self.xstep[-1] * (self._y2[-2] / 6.0 + self._y2[-1] / 3.0)

    def __call__(self, x: ArrayLike | float, der: int = 0) -> NDArray:
        """Return the value of the cubic spline (or its derivative) at x.

        Parameters
        ----------
        x : ArrayLike | float
            Independent variable value(s) at which to evaluate the spline.
        der : int, optional
            Derivative order, by default 0

        Returns
        -------
        NDArray
            Spline result
        """
        scalar = np.isscalar(x)
        x = np.asarray(x)
        if x.size == 0:
            return np.array([])
        elif x.size == 1:
            x.shape = (1,)
        result = np.zeros_like(x, dtype=float)

        # Find which interval 0,...self._n-2 contains the points (or extrapolates to the points)
        position = np.searchsorted(self._x, x) - 1

        # Here, position == -1 means extrapolate below the first interval.
        extrap_low = position < 0
        if extrap_low.any():
            if der == 0:
                h = x[extrap_low] - self._x[0]  # will be negative
                result[extrap_low] = self._y[0] + h * self.yprime1
            elif der == 1:
                result[extrap_low] = self.yprime1
            elif der > 1:
                result[extrap_low] = 0.0

        # position = self._n-1 means extrapolate above the last interval.
        extrap_hi = position >= self._n - 1
        if extrap_hi.any():
            if der == 0:
                h = x[extrap_hi] - self._x[-1]  # will be positive
                result[extrap_hi] = self._y[-1] + h * self.yprimeN
            elif der == 1:
                result[extrap_hi] = self.yprimeN
            elif der > 1:
                result[extrap_hi] = 0.0

        interp = np.logical_and(position >= 0, position < self._n - 1)
        if interp.any():
            klo = position[interp]
            khi = klo + 1
            dx = self.xstep[klo]
            a = (self._x[khi] - x[interp]) / dx
            b = (x[interp] - self._x[klo]) / dx

            if der == 0:
                result[interp] = (
                    a * self._y[klo] + b * self._y[khi] + ((a**3 - a) * self._y2[klo] + (b**3 - b) * self._y2[khi]) * dx * dx / 6.0
                )
            elif der == 1:
                result[interp] = (
                    -self._y[klo] / dx
                    + self._y[khi] / dx
                    + ((-(a**2) + 1.0 / 3) * self._y2[klo] + (b**2 - 1.0 / 3) * self._y2[khi]) * dx / 2.0
                )
            elif der == 2:
                result[interp] = a * self._y2[klo] + b * self._y2[khi]
            elif der == 3:
                result[interp] = (-self._y2[klo] + self._y2[khi]) * dx
            elif der > 3:
                result[interp] = 0.0

        if scalar:
            result = result[0]
        return result

    def variance(self, xtest: ArrayLike) -> NDArray:  # noqa: PLR6301
        """Return a dummy estimate of the variance at points `xtest`."""
        return np.zeros_like(xtest)

__call__(x, der=0)

Return the value of the cubic spline (or its derivative) at x.

Parameters:
  • x (ArrayLike | float) –

    Independent variable value(s) at which to evaluate the spline.

  • der (int, default: 0 ) –

    Derivative order, by default 0

Returns:
  • NDArray

    Spline result

Source code in mass2/mathstat/interpolate.py
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
def __call__(self, x: ArrayLike | float, der: int = 0) -> NDArray:
    """Return the value of the cubic spline (or its derivative) at x.

    Parameters
    ----------
    x : ArrayLike | float
        Independent variable value(s) at which to evaluate the spline.
    der : int, optional
        Derivative order, by default 0

    Returns
    -------
    NDArray
        Spline result
    """
    scalar = np.isscalar(x)
    x = np.asarray(x)
    if x.size == 0:
        return np.array([])
    elif x.size == 1:
        x.shape = (1,)
    result = np.zeros_like(x, dtype=float)

    # Find which interval 0,...self._n-2 contains the points (or extrapolates to the points)
    position = np.searchsorted(self._x, x) - 1

    # Here, position == -1 means extrapolate below the first interval.
    extrap_low = position < 0
    if extrap_low.any():
        if der == 0:
            h = x[extrap_low] - self._x[0]  # will be negative
            result[extrap_low] = self._y[0] + h * self.yprime1
        elif der == 1:
            result[extrap_low] = self.yprime1
        elif der > 1:
            result[extrap_low] = 0.0

    # position = self._n-1 means extrapolate above the last interval.
    extrap_hi = position >= self._n - 1
    if extrap_hi.any():
        if der == 0:
            h = x[extrap_hi] - self._x[-1]  # will be positive
            result[extrap_hi] = self._y[-1] + h * self.yprimeN
        elif der == 1:
            result[extrap_hi] = self.yprimeN
        elif der > 1:
            result[extrap_hi] = 0.0

    interp = np.logical_and(position >= 0, position < self._n - 1)
    if interp.any():
        klo = position[interp]
        khi = klo + 1
        dx = self.xstep[klo]
        a = (self._x[khi] - x[interp]) / dx
        b = (x[interp] - self._x[klo]) / dx

        if der == 0:
            result[interp] = (
                a * self._y[klo] + b * self._y[khi] + ((a**3 - a) * self._y2[klo] + (b**3 - b) * self._y2[khi]) * dx * dx / 6.0
            )
        elif der == 1:
            result[interp] = (
                -self._y[klo] / dx
                + self._y[khi] / dx
                + ((-(a**2) + 1.0 / 3) * self._y2[klo] + (b**2 - 1.0 / 3) * self._y2[khi]) * dx / 2.0
            )
        elif der == 2:
            result[interp] = a * self._y2[klo] + b * self._y2[khi]
        elif der == 3:
            result[interp] = (-self._y2[klo] + self._y2[khi]) * dx
        elif der > 3:
            result[interp] = 0.0

    if scalar:
        result = result[0]
    return result

__init__(x, y, yprime1=None, yprimeN=None)

Create an exact cubic spline representation for the function y(x).

Parameters:
  • x (ArrayLike) –

    Indepdendent variable values. Will be sorted if not increasing.

  • y (ArrayLike) –

    Dependent variable values.

  • yprime1 (float | None, default: None ) –

    First derivative at the lower-x boundary, by default None

  • yprimeN (float | None, default: None ) –

    First derivative at the upper-x boundary, by default None. slope of None means to use 'natural boundary conditions' by fixing the second derivative to zero at that boundary.

Source code in mass2/mathstat/interpolate.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def __init__(self, x: ArrayLike, y: ArrayLike, yprime1: float | None = None, yprimeN: float | None = None):
    """Create an exact cubic spline representation for the function y(x).

    Parameters
    ----------
    x : ArrayLike
        Indepdendent variable values. Will be sorted if not increasing.
    y : ArrayLike
        Dependent variable values.
    yprime1 : float | None, optional
        First derivative at the lower-x boundary, by default None
    yprimeN : float | None, optional
        First derivative at the upper-x boundary, by default None.
        slope of None means to use 'natural boundary conditions' by fixing the
        second derivative to zero at that boundary.
    """
    argsort = np.argsort(x)
    self._x = np.array(x, dtype=float)[argsort]
    self._y = np.array(y, dtype=float)[argsort]
    self._n = len(argsort)
    self._y2 = np.zeros(self._n, dtype=float)
    self.yprime1 = yprime1
    self.yprimeN = yprimeN
    self._compute_y2()

variance(xtest)

Return a dummy estimate of the variance at points xtest.

Source code in mass2/mathstat/interpolate.py
201
202
203
def variance(self, xtest: ArrayLike) -> NDArray:  # noqa: PLR6301
    """Return a dummy estimate of the variance at points `xtest`."""
    return np.zeros_like(xtest)

GPRSpline

Bases: CubicSpline

A callable object that performs a smoothing cubic spline operation

The smoothing spline is the cubic spline minimizing the "curvature energy" subject to a constraint that the maximum allowed chi-squared is equal to the number of data points. Here curvature energy is defined as the integral of the square of the second derivative from the lowest to the highest knots.

The value of sigmaf fixes the square root of the "function variance". Small values of sigmaf correspond to large penalties on the curvature, so they emphasize low curvature. Large sigmaf places emphasis on fidelity to the data and will have relatively higher curvature (and a higher uncertainty on the derived curve). Setting sigmaf=None (the default) will choose the value that maximizes the Bayesian marginal likelihood of the data and is probably smart.

For further discussion, see Sections 2.2, 2.7, and 6.3 of Rasmussen, C. E., & Williams, K. I. (2006). Gaussian Processes for Machine Learning. Retrieved from http://www.gaussianprocess.org/gpml/chapters/

This object is very similar to SmoothingSpline in this module but is based on Gaussian Process Regression theory. It improves on SmoothingSpline in that: 1. The curvature/data fidelity trade-off is chosen by more principaled, Bayesian means. 2. The uncertainty in the spline curve is estimated by GPR theory.

Source code in mass2/mathstat/interpolate.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
class GPRSpline(CubicSpline):
    """A callable object that performs a smoothing cubic spline operation

    The smoothing spline is the cubic spline minimizing the "curvature
    energy" subject to a constraint that the maximum allowed chi-squared is
    equal to the number of data points. Here curvature energy is defined as
    the integral of the square of the second derivative from the lowest to
    the highest knots.

    The value of `sigmaf` fixes the square root of the "function variance".
    Small values of `sigmaf` correspond to large penalties on the curvature,
    so they emphasize low curvature. Large `sigmaf` places emphasis on fidelity to
    the data and will have relatively higher curvature (and a higher uncertainty on
    the derived curve). Setting `sigmaf=None` (the default) will choose the value that
    maximizes the Bayesian marginal likelihood of the data and is probably smart.

    For further discussion, see Sections 2.2, 2.7, and 6.3 of
    Rasmussen, C. E., & Williams, K. I. (2006). Gaussian Processes for Machine Learning.
    Retrieved from http://www.gaussianprocess.org/gpml/chapters/

    This object is very similar to `SmoothingSpline` in this module but is based on
    Gaussian Process Regression theory. It improves on `SmoothingSpline` in that:
    1. The curvature/data fidelity trade-off is chosen by more principaled, Bayesian means.
    2. The uncertainty in the spline curve is estimated by GPR theory.
    """

    def __init__(self, x: ArrayLike, y: ArrayLike, dy: ArrayLike, dx: ArrayLike | None = None, sigmaf: float | None = None):
        """Set up the Gaussian Process Regression spline.

        Parameters
        ----------
        x : ArrayLike
            Indepdendent variable values. Will be sorted if not increasing.
        y : ArrayLike
            Dependent variable values.
        dy : ArrayLike
            Uncertainties in y values.
        dx : ArrayLike | None, optional
            Uncertainties in x values, by default None
        sigmaf : float | None, optional
            Allowed function variance, or None to maximize the data likelihood, by default None
        """
        self.x = np.array(x)
        self.y = np.array(y)
        self.dy = np.array(dy)
        self.Nk = len(self.x)
        assert self.Nk == len(self.y)
        assert self.Nk == len(self.dy)

        if dx is None:
            self.dx = np.zeros_like(dy)
            self.err = np.array(np.abs(dy))
        else:
            self.dx = np.array(dx)
            roughfit = np.polyfit(self.x, self.y, 2)
            slope = np.poly1d(np.polyder(roughfit, 1))(self.x)
            self.err = np.sqrt((self.dx * slope) ** 2 + self.dy**2)
        assert self.Nk == len(self.dx)
        assert self.Nk == len(self.err)

        if sigmaf is None:
            sigmaf = self.best_sigmaf()
        self.sigmaf = sigmaf

        H = np.vstack((np.ones_like(self.x), self.x))
        K = np.zeros((self.Nk, self.Nk), dtype=float)
        sf2 = sigmaf**2
        for i in range(self.Nk):
            K[i, i] = sf2 * k_spline(self.x[i], self.x[i])
            for j in range(i + 1, self.Nk):
                K[i, j] = K[j, i] = sf2 * k_spline(self.x[i], self.x[j])
        Ky = K + np.diag(self.err**2)
        L = np.linalg.cholesky(Ky)
        LH = np.linalg.solve(L, H.T)
        A = LH.T.dot(LH)
        KinvHT = np.linalg.solve(L.T, LH)
        self.L = L
        self.A = A
        self.KinvHT = KinvHT
        beta = np.linalg.solve(A, KinvHT.T).dot(self.y)

        # Compute at test points = self.x
        # We know that these are the knots of a natural cubic spline
        R = H - KinvHT.T.dot(K)
        fbar = np.linalg.solve(L.T, np.linalg.solve(L, K)).T.dot(y)
        gbar = fbar + R.T.dot(beta)
        CubicSpline.__init__(self, self.x, gbar)

    def best_sigmaf(self) -> float:
        """Return the sigmaf value that maximizes the marginal Bayesian likelihood."""
        guess = np.median(self.err / self.y)
        result = sp.optimize.minimize_scalar(lambda x: -self._marginal_like(x), [guess / 1e4, guess * 1e4])
        if result.success:
            # _marginal_like depends only on the abs(argument), so take minimizer as positive.
            return np.abs(result.x)
        raise (ValueError("Could not maximimze the marginal likelihood"))

    def _marginal_like(self, sigmaf: float) -> float:
        """Compute the marginal likelihood of the data given sigmaf.

        Parameters
        ----------
        sigmaf : float
            The square root of the function variance

        Returns
        -------
        float
            The marginal likelihood (up to an additive constant)
        """
        H = np.vstack((np.ones_like(self.x), self.x))
        K = np.zeros((self.Nk, self.Nk), dtype=float)
        sf2 = sigmaf**2
        for i in range(self.Nk):
            K[i, i] = sf2 * k_spline(self.x[i], self.x[i])
            for j in range(i + 1, self.Nk):
                K[i, j] = K[j, i] = sf2 * k_spline(self.x[i], self.x[j])
        Ky = K + np.diag(self.err**2)
        L = np.linalg.cholesky(Ky)
        LH = np.linalg.solve(L, H.T)
        A = LH.T.dot(LH)
        KinvHT = np.linalg.solve(L.T, LH)
        C = KinvHT.dot(np.linalg.solve(A, KinvHT.T))
        yCy = self.y.dot(C.dot(self.y))
        Linvy = np.linalg.solve(L, self.y)
        yKinvy = Linvy.dot(Linvy)
        return -0.5 * ((self.Nk - 2) * np.log(2 * np.pi) + np.linalg.slogdet(A)[1] + np.linalg.slogdet(Ky)[1] - yCy + yKinvy)

    def variance(self, xtest: ArrayLike) -> NDArray:
        """Returns the variance for function evaluations at the test points `xtest`.

        This equals the diagonal of `self.covariance(xtest)`, but for large test sets,
        this method computes only the diagonal and should therefore be faster."""
        v = []
        xtest = np.asarray(xtest)
        for x in np.asarray(xtest):
            Ktest = self.sigmaf**2 * k_spline(x, self.x)
            LinvKtest = np.linalg.solve(self.L, Ktest)
            cov_ftest = self.sigmaf**2 * k_spline(x, x) - (LinvKtest**2).sum()
            R = np.array((1, x)) - self.KinvHT.T.dot(Ktest)
            v.append(cov_ftest + R.dot(np.linalg.solve(self.A, R)))
        if np.isscalar(xtest):
            return v[0]
        return np.array(v)

    def covariance(self, xtest: ArrayLike) -> NDArray:
        """Returns the covariance between function evaluations at the test points `xtest`."""
        if np.isscalar(xtest):
            return self.variance(xtest)
        xtest = np.asarray(xtest)

        Ktest = self.sigmaf**2 * np.vstack([k_spline(x, self.x) for x in xtest]).T
        LinvKtest = np.linalg.solve(self.L, Ktest)
        cov_ftest = self.sigmaf**2 * np.vstack([k_spline(x, xtest) for x in xtest])
        cov_ftest -= LinvKtest.T.dot(LinvKtest)
        R = np.vstack((np.ones(len(xtest)), xtest))
        R -= self.KinvHT.T.dot(Ktest)
        return cov_ftest + R.T.dot(np.linalg.solve(self.A, R))

__init__(x, y, dy, dx=None, sigmaf=None)

Set up the Gaussian Process Regression spline.

Parameters:
  • x (ArrayLike) –

    Indepdendent variable values. Will be sorted if not increasing.

  • y (ArrayLike) –

    Dependent variable values.

  • dy (ArrayLike) –

    Uncertainties in y values.

  • dx (ArrayLike | None, default: None ) –

    Uncertainties in x values, by default None

  • sigmaf (float | None, default: None ) –

    Allowed function variance, or None to maximize the data likelihood, by default None

Source code in mass2/mathstat/interpolate.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
def __init__(self, x: ArrayLike, y: ArrayLike, dy: ArrayLike, dx: ArrayLike | None = None, sigmaf: float | None = None):
    """Set up the Gaussian Process Regression spline.

    Parameters
    ----------
    x : ArrayLike
        Indepdendent variable values. Will be sorted if not increasing.
    y : ArrayLike
        Dependent variable values.
    dy : ArrayLike
        Uncertainties in y values.
    dx : ArrayLike | None, optional
        Uncertainties in x values, by default None
    sigmaf : float | None, optional
        Allowed function variance, or None to maximize the data likelihood, by default None
    """
    self.x = np.array(x)
    self.y = np.array(y)
    self.dy = np.array(dy)
    self.Nk = len(self.x)
    assert self.Nk == len(self.y)
    assert self.Nk == len(self.dy)

    if dx is None:
        self.dx = np.zeros_like(dy)
        self.err = np.array(np.abs(dy))
    else:
        self.dx = np.array(dx)
        roughfit = np.polyfit(self.x, self.y, 2)
        slope = np.poly1d(np.polyder(roughfit, 1))(self.x)
        self.err = np.sqrt((self.dx * slope) ** 2 + self.dy**2)
    assert self.Nk == len(self.dx)
    assert self.Nk == len(self.err)

    if sigmaf is None:
        sigmaf = self.best_sigmaf()
    self.sigmaf = sigmaf

    H = np.vstack((np.ones_like(self.x), self.x))
    K = np.zeros((self.Nk, self.Nk), dtype=float)
    sf2 = sigmaf**2
    for i in range(self.Nk):
        K[i, i] = sf2 * k_spline(self.x[i], self.x[i])
        for j in range(i + 1, self.Nk):
            K[i, j] = K[j, i] = sf2 * k_spline(self.x[i], self.x[j])
    Ky = K + np.diag(self.err**2)
    L = np.linalg.cholesky(Ky)
    LH = np.linalg.solve(L, H.T)
    A = LH.T.dot(LH)
    KinvHT = np.linalg.solve(L.T, LH)
    self.L = L
    self.A = A
    self.KinvHT = KinvHT
    beta = np.linalg.solve(A, KinvHT.T).dot(self.y)

    # Compute at test points = self.x
    # We know that these are the knots of a natural cubic spline
    R = H - KinvHT.T.dot(K)
    fbar = np.linalg.solve(L.T, np.linalg.solve(L, K)).T.dot(y)
    gbar = fbar + R.T.dot(beta)
    CubicSpline.__init__(self, self.x, gbar)

best_sigmaf()

Return the sigmaf value that maximizes the marginal Bayesian likelihood.

Source code in mass2/mathstat/interpolate.py
300
301
302
303
304
305
306
307
def best_sigmaf(self) -> float:
    """Return the sigmaf value that maximizes the marginal Bayesian likelihood."""
    guess = np.median(self.err / self.y)
    result = sp.optimize.minimize_scalar(lambda x: -self._marginal_like(x), [guess / 1e4, guess * 1e4])
    if result.success:
        # _marginal_like depends only on the abs(argument), so take minimizer as positive.
        return np.abs(result.x)
    raise (ValueError("Could not maximimze the marginal likelihood"))

covariance(xtest)

Returns the covariance between function evaluations at the test points xtest.

Source code in mass2/mathstat/interpolate.py
357
358
359
360
361
362
363
364
365
366
367
368
369
def covariance(self, xtest: ArrayLike) -> NDArray:
    """Returns the covariance between function evaluations at the test points `xtest`."""
    if np.isscalar(xtest):
        return self.variance(xtest)
    xtest = np.asarray(xtest)

    Ktest = self.sigmaf**2 * np.vstack([k_spline(x, self.x) for x in xtest]).T
    LinvKtest = np.linalg.solve(self.L, Ktest)
    cov_ftest = self.sigmaf**2 * np.vstack([k_spline(x, xtest) for x in xtest])
    cov_ftest -= LinvKtest.T.dot(LinvKtest)
    R = np.vstack((np.ones(len(xtest)), xtest))
    R -= self.KinvHT.T.dot(Ktest)
    return cov_ftest + R.T.dot(np.linalg.solve(self.A, R))

variance(xtest)

Returns the variance for function evaluations at the test points xtest.

This equals the diagonal of self.covariance(xtest), but for large test sets, this method computes only the diagonal and should therefore be faster.

Source code in mass2/mathstat/interpolate.py
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
def variance(self, xtest: ArrayLike) -> NDArray:
    """Returns the variance for function evaluations at the test points `xtest`.

    This equals the diagonal of `self.covariance(xtest)`, but for large test sets,
    this method computes only the diagonal and should therefore be faster."""
    v = []
    xtest = np.asarray(xtest)
    for x in np.asarray(xtest):
        Ktest = self.sigmaf**2 * k_spline(x, self.x)
        LinvKtest = np.linalg.solve(self.L, Ktest)
        cov_ftest = self.sigmaf**2 * k_spline(x, x) - (LinvKtest**2).sum()
        R = np.array((1, x)) - self.KinvHT.T.dot(Ktest)
        v.append(cov_ftest + R.dot(np.linalg.solve(self.A, R)))
    if np.isscalar(xtest):
        return v[0]
    return np.array(v)

NaturalBsplineBasis

Represent a cubic B-spline basis in 1D with natural boundary conditions.

That is, f''(x)=0 at the first and last knots. This constraint reduces the effective number of basis functions from (2+Nknots) to Nknots.

Usage: knots = [0,5,8,9,10,12] basis = NaturalBsplineBasis(knots) x = np.linspace(0, 12, 200) plt.clf() for id in range(len(knots)): plt.plot(x, basis(x, id))

Source code in mass2/mathstat/interpolate.py
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
class NaturalBsplineBasis:
    """Represent a cubic B-spline basis in 1D with natural boundary conditions.

    That is, f''(x)=0 at the first and last knots. This constraint reduces the
    effective number of basis functions from (2+Nknots) to Nknots.

    Usage:
    knots = [0,5,8,9,10,12]
    basis = NaturalBsplineBasis(knots)
    x = np.linspace(0, 12, 200)
    plt.clf()
    for id in range(len(knots)):
        plt.plot(x, basis(x, id))
    """

    def __init__(self, knots: ArrayLike):
        """Initialization requires only the list of knots."""
        knots = np.asarray(knots)
        Nk = len(knots)
        b, e = knots[0], knots[-1]
        padknots = np.hstack([[b, b, b], knots, [e, e, e]])

        # Combinations of basis function #1 into 2 and 3 (and #N+2 into N+1
        # and N) are used to enforce the natural B.C. of f''(x)=0 at the ends.
        lowfpp = np.zeros(3, dtype=float)
        hifpp = np.zeros(3, dtype=float)
        for i in (0, 1, 2):
            scoef = np.zeros(Nk + 2, dtype=float)
            scoef[i] = 1.0
            lowfpp[i] = splev(b, sp.interpolate.BSpline(padknots, scoef, 3), der=2)
        for i in (0, 1, 2):
            scoef = np.zeros(Nk + 2, dtype=float)
            scoef[Nk + 1 - i] = 1.0  # go from last to 3rd-to-last
            hifpp[i] = splev(e, sp.interpolate.BSpline(padknots, scoef, 3), der=2)
        self.coef_b = -lowfpp[1:3] / lowfpp[0]
        self.coef_e = -hifpp[1:3] / hifpp[0]

        self.Nk = Nk
        self.knots = np.array(knots)
        self.padknots = padknots

    def __call__(self, x: ArrayLike, id: int, der: int = 0) -> NDArray:
        """Compute the Basis-spline at the points `x` for basis function `id`, or its derivative of degree `der`.

        Parameters
        ----------
        x : ArrayLike
            Independent variable values at which to evaluate the basis function.
        id : int
            Which basis function to evaluate, 0 <= id < Nk
        der : int, optional
            Derivative degree, by default 0

        Returns
        -------
        NDArray
            Basis function values (or derivative values) at `x`.

        Raises
        ------
        ValueError
            If `id` is not in the range 0 <= id < Nk
        """
        if id < 0 or id >= self.Nk:
            raise ValueError(f"Require 0 <= id < Nk={self.Nk}")
        coef = np.zeros(self.Nk + 2, dtype=float)
        coef[id + 1] = 1.0
        if id < 2:
            coef[0] = self.coef_b[id]
        elif id >= self.Nk - 2:
            coef[-1] = self.coef_e[self.Nk - id - 1]
        return splev(x, (self.padknots, coef, 3), der=der)

    def values_matrix(self, der: int = 0) -> NDArray:
        """Return matrix M where M_ij = value at knot i for basis function j.
        If der>0, then return the derivative of that order instead of the value."""
        # Note the array is naturally built by vstack as the Transpose of what we want.
        return np.vstack([self(self.knots, id, der=der) for id in range(self.Nk)]).T

    def expand_coeff(self, beta: NDArray) -> NDArray:
        """Given coefficients of this length-Nk basis, return the coefficients
        needed by FITPACK, which are of length Nk+2."""
        first = beta[0] * self.coef_b[0] + beta[1] * self.coef_b[1]
        last = beta[-1] * self.coef_e[0] + beta[-2] * self.coef_e[1]
        return np.hstack([first, beta, last])

__call__(x, id, der=0)

Compute the Basis-spline at the points x for basis function id, or its derivative of degree der.

Parameters:
  • x (ArrayLike) –

    Independent variable values at which to evaluate the basis function.

  • id (int) –

    Which basis function to evaluate, 0 <= id < Nk

  • der (int, default: 0 ) –

    Derivative degree, by default 0

Returns:
  • NDArray

    Basis function values (or derivative values) at x.

Raises:
  • ValueError

    If id is not in the range 0 <= id < Nk

Source code in mass2/mathstat/interpolate.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
def __call__(self, x: ArrayLike, id: int, der: int = 0) -> NDArray:
    """Compute the Basis-spline at the points `x` for basis function `id`, or its derivative of degree `der`.

    Parameters
    ----------
    x : ArrayLike
        Independent variable values at which to evaluate the basis function.
    id : int
        Which basis function to evaluate, 0 <= id < Nk
    der : int, optional
        Derivative degree, by default 0

    Returns
    -------
    NDArray
        Basis function values (or derivative values) at `x`.

    Raises
    ------
    ValueError
        If `id` is not in the range 0 <= id < Nk
    """
    if id < 0 or id >= self.Nk:
        raise ValueError(f"Require 0 <= id < Nk={self.Nk}")
    coef = np.zeros(self.Nk + 2, dtype=float)
    coef[id + 1] = 1.0
    if id < 2:
        coef[0] = self.coef_b[id]
    elif id >= self.Nk - 2:
        coef[-1] = self.coef_e[self.Nk - id - 1]
    return splev(x, (self.padknots, coef, 3), der=der)

__init__(knots)

Initialization requires only the list of knots.

Source code in mass2/mathstat/interpolate.py
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
def __init__(self, knots: ArrayLike):
    """Initialization requires only the list of knots."""
    knots = np.asarray(knots)
    Nk = len(knots)
    b, e = knots[0], knots[-1]
    padknots = np.hstack([[b, b, b], knots, [e, e, e]])

    # Combinations of basis function #1 into 2 and 3 (and #N+2 into N+1
    # and N) are used to enforce the natural B.C. of f''(x)=0 at the ends.
    lowfpp = np.zeros(3, dtype=float)
    hifpp = np.zeros(3, dtype=float)
    for i in (0, 1, 2):
        scoef = np.zeros(Nk + 2, dtype=float)
        scoef[i] = 1.0
        lowfpp[i] = splev(b, sp.interpolate.BSpline(padknots, scoef, 3), der=2)
    for i in (0, 1, 2):
        scoef = np.zeros(Nk + 2, dtype=float)
        scoef[Nk + 1 - i] = 1.0  # go from last to 3rd-to-last
        hifpp[i] = splev(e, sp.interpolate.BSpline(padknots, scoef, 3), der=2)
    self.coef_b = -lowfpp[1:3] / lowfpp[0]
    self.coef_e = -hifpp[1:3] / hifpp[0]

    self.Nk = Nk
    self.knots = np.array(knots)
    self.padknots = padknots

expand_coeff(beta)

Given coefficients of this length-Nk basis, return the coefficients needed by FITPACK, which are of length Nk+2.

Source code in mass2/mathstat/interpolate.py
451
452
453
454
455
456
def expand_coeff(self, beta: NDArray) -> NDArray:
    """Given coefficients of this length-Nk basis, return the coefficients
    needed by FITPACK, which are of length Nk+2."""
    first = beta[0] * self.coef_b[0] + beta[1] * self.coef_b[1]
    last = beta[-1] * self.coef_e[0] + beta[-2] * self.coef_e[1]
    return np.hstack([first, beta, last])

values_matrix(der=0)

Return matrix M where M_ij = value at knot i for basis function j. If der>0, then return the derivative of that order instead of the value.

Source code in mass2/mathstat/interpolate.py
445
446
447
448
449
def values_matrix(self, der: int = 0) -> NDArray:
    """Return matrix M where M_ij = value at knot i for basis function j.
    If der>0, then return the derivative of that order instead of the value."""
    # Note the array is naturally built by vstack as the Transpose of what we want.
    return np.vstack([self(self.knots, id, der=der) for id in range(self.Nk)]).T

SmoothingSpline

A callable object that performs a smoothing cubic spline operation, using the NaturalBsplineBasis object for the basis representation of splines.

The smoothing spline is the cubic spline minimizing the "curvature energy" subject to a constraint that the maximum allowed chi-squared is equal to the number of data points. Here curvature energy is defined as the integral of the square of the second derivative from the lowest to the highest knots.

For a proof see Reinsch, C. H. (1967). "Smoothing by spline functions." Numerische Mathematik, 10(3), 177-183. http://doi.org/10.1007/BF02162161

Source code in mass2/mathstat/interpolate.py
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
class SmoothingSpline:
    """A callable object that performs a smoothing cubic spline operation, using
    the NaturalBsplineBasis object for the basis representation of splines.

    The smoothing spline is the cubic spline minimizing the "curvature
    energy" subject to a constraint that the maximum allowed chi-squared is
    equal to the number of data points. Here curvature energy is defined as
    the integral of the square of the second derivative from the lowest to
    the highest knots.

    For a proof see Reinsch, C. H. (1967). "Smoothing by spline functions."
    Numerische Mathematik, 10(3), 177-183. http://doi.org/10.1007/BF02162161
    """

    def __init__(self, x: ArrayLike, y: ArrayLike, dy: ArrayLike, dx: ArrayLike | None = None, maxchisq: float | None = None):
        """Smoothing spline for data {x,y} with errors {dy} on the y values
        and {dx} on the x values (or zero if not given).

        If dx errors are given, a global quadratic fit is done to the data to
        estimate the local slope. If that's a poor choice, then you should
        combine your dx and dy errors to create a sensible single error list,
        and you should pass that in as dy.

        maxchisq specifies the largest allowed value of chi-squared (the sum of
        the squares of the differences y_i-f(x_i), divided by the variance v_i).
        If not given, this defaults to the number of data values. When a
        (weighted) least squares fit of a line to the data meets the maxchisq
        constraint, then the actual chi-squared will be less than maxchisq.
        """
        self.x = np.array(x)  # copy
        self.y = np.array(y)
        self.dy = np.array(dy)
        if dx is None:
            err = np.array(np.abs(dy))
            dx = np.zeros_like(err)
        else:
            roughfit = np.polyfit(self.x, self.y, 2)
            slope = np.poly1d(np.polyder(roughfit, 1))(x)
            err = np.sqrt((np.asarray(dx) * slope) ** 2 + self.dy**2)

        self.xscale = (self.x**2).mean() ** 0.5
        self.x /= self.xscale
        self.dx = np.array(dx) / self.xscale
        self.err = err
        self.Nk = len(self.x)
        if maxchisq is None:
            self.maxchisq = float(self.Nk)
        else:
            self.maxchisq = maxchisq

        self.basis = NaturalBsplineBasis(self.x)
        self.N0 = self.basis.values_matrix(0)
        self.N2 = self.basis.values_matrix(2)
        self.Omega = self._compute_Omega(self.x, self.N2)
        self.smooth(chisq=self.maxchisq)

    @staticmethod
    def _compute_Omega(knots: NDArray, N2: NDArray) -> NDArray:
        """Given the matrix M2 of second derivates at the knots (that is, M2_ij is
        the value of B_j''(x_i), second derivative of basis function #j at knot i),
        compute the matrix Omega, where Omega_ij is the integral over the entire
        domain of the product B_i''(x) B_j''(x). This can be done because each B_i(x)
        is piecewise linear, with the slope changes at each knot location."""

        Nk = len(knots)
        assert N2.shape[0] == Nk
        assert N2.shape[1] == Nk
        Omega = np.zeros_like(N2)
        for i in range(Nk):
            for j in range(i + 1):
                for k in range(Nk - 1):
                    Omega[i, j] += (N2[k + 1, i] * N2[k, j] + N2[k + 1, j] * N2[k, i]) * (knots[k + 1] - knots[k]) / 6.0
                for k in range(Nk):
                    Omega[i, j] += N2[k, i] * N2[k, j] * (knots[min(k + 1, Nk - 1)] - knots[max(0, k - 1)]) / 3.0
                Omega[j, i] = Omega[i, j]
        return Omega

    def smooth(self, chisq: float | None = None) -> None:
        """Choose the value of the curve at the knots so as to achieve the
        smallest possible curvature subject to the constraint that the
        sum over all {x,y} pairs S = [(y-f(x))/dy]^2 <= chisq"""
        if chisq is None:
            chisq = self.Nk

        Dinv = self.err ** (-2)  # Vector but stands for diagonals of a diagonal matrix.
        NTDinv = self.N0.T * Dinv
        lhs = np.dot(NTDinv, self.N0)
        rhs = np.dot(self.N0.T, Dinv * self.y)

        def best_params(p: NDArray) -> NDArray:
            """Return the best-fit parameters for a given curvature penalty p."""
            return np.linalg.solve(p * (lhs - self.Omega) + self.Omega, p * rhs)

        def chisq_difference(p: NDArray, target_chisq: float) -> float:
            """Return the difference between the chi-squared for curvature penalty p
            and the target chi-squared."""
            # If curvature is too small, the computation can become singular.
            # Avoid this by returning a crazy-high chisquared, as needed.
            try:
                beta = best_params(p)
            except np.linalg.LinAlgError:
                return 1e99
            ys = np.dot(self.N0, beta)
            chisq = np.sum(((self.y - ys) / self.err) ** 2)
            return chisq - target_chisq

        mincurvature = 1e-20
        pbest = sp.optimize.brentq(chisq_difference, mincurvature, 1, args=(chisq,))
        beta = best_params(pbest)
        self.coeff = self.basis.expand_coeff(beta)
        ys = np.dot(self.N0, beta)
        self.actualchisq = np.sum(((self.y - ys) / self.err) ** 2)

        # Store the linear extrapolation outside the knotted region.
        endpoints = np.array([self.x[0], self.x[-1]]) * self.xscale
        val = self.__eval(endpoints, 0, allow_extrapolate=False)
        slope = self.__eval(endpoints, 1, allow_extrapolate=False) * self.xscale
        self.lowline = np.poly1d([slope[0], val[0]])
        self.highline = np.poly1d([slope[1], val[1]])

    def __eval(self, x: ArrayLike, der: int = 0, allow_extrapolate: bool = True) -> NDArray:
        """Return the value of (the `der`th derivative of) the smoothing spline
        at data points `x`."""
        scalar = np.isscalar(x)
        x = np.asarray(x)
        x /= self.xscale
        splresult = splev(x, (self.basis.padknots, self.coeff, 3), der=der)
        low = x < self.x[0]
        high = x > self.x[-1]
        if np.any(low) and allow_extrapolate:
            if der == 0:
                splresult[low] = self.lowline(x[low] - self.x[0])
            elif der == 1:
                splresult[low] = self.lowline.coeffs[0]
            elif der >= 2:
                splresult[low] = 0.0
        if np.any(high) and allow_extrapolate:
            if der == 0:
                splresult[high] = self.highline(x[high] - self.x[-1])
            elif der == 1:
                splresult[high] = self.highline.coeffs[0]
            elif der >= 2:
                splresult[high] = 0.0
        if der > 0:
            splresult /= self.xscale**der
        if scalar:
            splresult = splresult[()]
        return splresult

    def __call__(self, x: ArrayLike, der: int = 0) -> NDArray:
        """Return the value of (the `der`th derivative of) the smoothing spline
        at data points `x`."""
        return self.__eval(x, der=der)

__call__(x, der=0)

Return the value of (the derth derivative of) the smoothing spline at data points x.

Source code in mass2/mathstat/interpolate.py
608
609
610
611
def __call__(self, x: ArrayLike, der: int = 0) -> NDArray:
    """Return the value of (the `der`th derivative of) the smoothing spline
    at data points `x`."""
    return self.__eval(x, der=der)

__eval(x, der=0, allow_extrapolate=True)

Return the value of (the derth derivative of) the smoothing spline at data points x.

Source code in mass2/mathstat/interpolate.py
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
def __eval(self, x: ArrayLike, der: int = 0, allow_extrapolate: bool = True) -> NDArray:
    """Return the value of (the `der`th derivative of) the smoothing spline
    at data points `x`."""
    scalar = np.isscalar(x)
    x = np.asarray(x)
    x /= self.xscale
    splresult = splev(x, (self.basis.padknots, self.coeff, 3), der=der)
    low = x < self.x[0]
    high = x > self.x[-1]
    if np.any(low) and allow_extrapolate:
        if der == 0:
            splresult[low] = self.lowline(x[low] - self.x[0])
        elif der == 1:
            splresult[low] = self.lowline.coeffs[0]
        elif der >= 2:
            splresult[low] = 0.0
    if np.any(high) and allow_extrapolate:
        if der == 0:
            splresult[high] = self.highline(x[high] - self.x[-1])
        elif der == 1:
            splresult[high] = self.highline.coeffs[0]
        elif der >= 2:
            splresult[high] = 0.0
    if der > 0:
        splresult /= self.xscale**der
    if scalar:
        splresult = splresult[()]
    return splresult

__init__(x, y, dy, dx=None, maxchisq=None)

Smoothing spline for data {x,y} with errors {dy} on the y values and {dx} on the x values (or zero if not given).

If dx errors are given, a global quadratic fit is done to the data to estimate the local slope. If that's a poor choice, then you should combine your dx and dy errors to create a sensible single error list, and you should pass that in as dy.

maxchisq specifies the largest allowed value of chi-squared (the sum of the squares of the differences y_i-f(x_i), divided by the variance v_i). If not given, this defaults to the number of data values. When a (weighted) least squares fit of a line to the data meets the maxchisq constraint, then the actual chi-squared will be less than maxchisq.

Source code in mass2/mathstat/interpolate.py
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
def __init__(self, x: ArrayLike, y: ArrayLike, dy: ArrayLike, dx: ArrayLike | None = None, maxchisq: float | None = None):
    """Smoothing spline for data {x,y} with errors {dy} on the y values
    and {dx} on the x values (or zero if not given).

    If dx errors are given, a global quadratic fit is done to the data to
    estimate the local slope. If that's a poor choice, then you should
    combine your dx and dy errors to create a sensible single error list,
    and you should pass that in as dy.

    maxchisq specifies the largest allowed value of chi-squared (the sum of
    the squares of the differences y_i-f(x_i), divided by the variance v_i).
    If not given, this defaults to the number of data values. When a
    (weighted) least squares fit of a line to the data meets the maxchisq
    constraint, then the actual chi-squared will be less than maxchisq.
    """
    self.x = np.array(x)  # copy
    self.y = np.array(y)
    self.dy = np.array(dy)
    if dx is None:
        err = np.array(np.abs(dy))
        dx = np.zeros_like(err)
    else:
        roughfit = np.polyfit(self.x, self.y, 2)
        slope = np.poly1d(np.polyder(roughfit, 1))(x)
        err = np.sqrt((np.asarray(dx) * slope) ** 2 + self.dy**2)

    self.xscale = (self.x**2).mean() ** 0.5
    self.x /= self.xscale
    self.dx = np.array(dx) / self.xscale
    self.err = err
    self.Nk = len(self.x)
    if maxchisq is None:
        self.maxchisq = float(self.Nk)
    else:
        self.maxchisq = maxchisq

    self.basis = NaturalBsplineBasis(self.x)
    self.N0 = self.basis.values_matrix(0)
    self.N2 = self.basis.values_matrix(2)
    self.Omega = self._compute_Omega(self.x, self.N2)
    self.smooth(chisq=self.maxchisq)

smooth(chisq=None)

Choose the value of the curve at the knots so as to achieve the smallest possible curvature subject to the constraint that the sum over all {x,y} pairs S = [(y-f(x))/dy]^2 <= chisq

Source code in mass2/mathstat/interpolate.py
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
def smooth(self, chisq: float | None = None) -> None:
    """Choose the value of the curve at the knots so as to achieve the
    smallest possible curvature subject to the constraint that the
    sum over all {x,y} pairs S = [(y-f(x))/dy]^2 <= chisq"""
    if chisq is None:
        chisq = self.Nk

    Dinv = self.err ** (-2)  # Vector but stands for diagonals of a diagonal matrix.
    NTDinv = self.N0.T * Dinv
    lhs = np.dot(NTDinv, self.N0)
    rhs = np.dot(self.N0.T, Dinv * self.y)

    def best_params(p: NDArray) -> NDArray:
        """Return the best-fit parameters for a given curvature penalty p."""
        return np.linalg.solve(p * (lhs - self.Omega) + self.Omega, p * rhs)

    def chisq_difference(p: NDArray, target_chisq: float) -> float:
        """Return the difference between the chi-squared for curvature penalty p
        and the target chi-squared."""
        # If curvature is too small, the computation can become singular.
        # Avoid this by returning a crazy-high chisquared, as needed.
        try:
            beta = best_params(p)
        except np.linalg.LinAlgError:
            return 1e99
        ys = np.dot(self.N0, beta)
        chisq = np.sum(((self.y - ys) / self.err) ** 2)
        return chisq - target_chisq

    mincurvature = 1e-20
    pbest = sp.optimize.brentq(chisq_difference, mincurvature, 1, args=(chisq,))
    beta = best_params(pbest)
    self.coeff = self.basis.expand_coeff(beta)
    ys = np.dot(self.N0, beta)
    self.actualchisq = np.sum(((self.y - ys) / self.err) ** 2)

    # Store the linear extrapolation outside the knotted region.
    endpoints = np.array([self.x[0], self.x[-1]]) * self.xscale
    val = self.__eval(endpoints, 0, allow_extrapolate=False)
    slope = self.__eval(endpoints, 1, allow_extrapolate=False) * self.xscale
    self.lowline = np.poly1d([slope[0], val[0]])
    self.highline = np.poly1d([slope[1], val[1]])

SmoothingSplineLog

A smoothing spline in log-log space.

Source code in mass2/mathstat/interpolate.py
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
class SmoothingSplineLog:
    """A smoothing spline in log-log space."""

    def __init__(self, x: ArrayLike, y: ArrayLike, dy: ArrayLike, dx: ArrayLike | None = None, maxchisq: float | None = None):
        """Set up a smoothing spline in log-log space.

        Parameters
        ----------
        x : ArrayLike
            Independent variable values. Must be positive and will be sorted if not increasing.
        y : ArrayLike
            Dependent variable values. Must be positive.
        dy : ArrayLike
            Uncertainties in y values.
        dx : ArrayLike | None, optional
            Uncertainties in x values, by default None
        maxchisq : float | None, optional
            Maximum allowed chi^2 value, by default None

        Raises
        ------
        ValueError
            If any x or y values are not positive.
        """
        x = np.asarray(x)
        y = np.asarray(y)
        dy = np.asarray(dy)
        if np.any(x <= 0) or np.any(y <= 0):
            raise ValueError("The x and y data must all be positive to use a SmoothingSplineLog")
        if dx is not None:
            dx = np.asarray(dx) / x
        self.linear_model = SmoothingSpline(np.log(x), np.log(y), dy / y, dx, maxchisq=maxchisq)

    def __call__(self, x: ArrayLike, der: int = 0) -> NDArray:
        """Compute the log-log smoothing spline or its derivative at the points `x`.

        Parameters
        ----------
        x : ArrayLike
            Independent variable values at which to evaluate the spline.
        der : int, optional
            Derivative degree, by default 0

        Returns
        -------
        NDArray
            Smoothing spline values (or derivative values) at `x`.
        """
        return np.exp(self.linear_model(np.log(x), der=der))

__call__(x, der=0)

Compute the log-log smoothing spline or its derivative at the points x.

Parameters:
  • x (ArrayLike) –

    Independent variable values at which to evaluate the spline.

  • der (int, default: 0 ) –

    Derivative degree, by default 0

Returns:
  • NDArray

    Smoothing spline values (or derivative values) at x.

Source code in mass2/mathstat/interpolate.py
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
def __call__(self, x: ArrayLike, der: int = 0) -> NDArray:
    """Compute the log-log smoothing spline or its derivative at the points `x`.

    Parameters
    ----------
    x : ArrayLike
        Independent variable values at which to evaluate the spline.
    der : int, optional
        Derivative degree, by default 0

    Returns
    -------
    NDArray
        Smoothing spline values (or derivative values) at `x`.
    """
    return np.exp(self.linear_model(np.log(x), der=der))

__init__(x, y, dy, dx=None, maxchisq=None)

Set up a smoothing spline in log-log space.

Parameters:
  • x (ArrayLike) –

    Independent variable values. Must be positive and will be sorted if not increasing.

  • y (ArrayLike) –

    Dependent variable values. Must be positive.

  • dy (ArrayLike) –

    Uncertainties in y values.

  • dx (ArrayLike | None, default: None ) –

    Uncertainties in x values, by default None

  • maxchisq (float | None, default: None ) –

    Maximum allowed chi^2 value, by default None

Raises:
  • ValueError

    If any x or y values are not positive.

Source code in mass2/mathstat/interpolate.py
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
def __init__(self, x: ArrayLike, y: ArrayLike, dy: ArrayLike, dx: ArrayLike | None = None, maxchisq: float | None = None):
    """Set up a smoothing spline in log-log space.

    Parameters
    ----------
    x : ArrayLike
        Independent variable values. Must be positive and will be sorted if not increasing.
    y : ArrayLike
        Dependent variable values. Must be positive.
    dy : ArrayLike
        Uncertainties in y values.
    dx : ArrayLike | None, optional
        Uncertainties in x values, by default None
    maxchisq : float | None, optional
        Maximum allowed chi^2 value, by default None

    Raises
    ------
    ValueError
        If any x or y values are not positive.
    """
    x = np.asarray(x)
    y = np.asarray(y)
    dy = np.asarray(dy)
    if np.any(x <= 0) or np.any(y <= 0):
        raise ValueError("The x and y data must all be positive to use a SmoothingSplineLog")
    if dx is not None:
        dx = np.asarray(dx) / x
    self.linear_model = SmoothingSpline(np.log(x), np.log(y), dy / y, dx, maxchisq=maxchisq)

k_spline(x, y)

Compute the spline covariance kernel, R&W eq 6.28.

Source code in mass2/mathstat/interpolate.py
206
207
208
209
def k_spline(x: NDArray, y: NDArray) -> NDArray:
    """Compute the spline covariance kernel, R&W eq 6.28."""
    v = np.minimum(x, y)
    return v**3 / 3 + v**2 / 2 * np.abs(x - y)

Power spectra

A class and functions to compute a power spectrum using some of the sophistications given in Numerical Recipes, including windowing and overlapping data segments.

Use the class PowerSpectrum in the case that you are compute-limited and PowerSpectrumOverlap in the case that you are data-limited. The latter uses k segments of data two segments at a time to make (k-1) estimates and makes fuller use of all data (except in the first and last segment).

Joe Fowler, NIST

October 13, 2010

Usage:

import power_spectrum as ps import pylab as plt N=1024 M=N/4 data=np.random.default_rng().standard_normal(N) spec = ps.PowerSpectrum(M, dt=1e-6) window = ps.hann(2M) for i in range(3): spec.addDataSegment(data[iM : (i+2)*M], window=window) plt.plot(spec.frequencies(), spec.spectrum())

Or you can use the convenience function that hides the class objects from you and simply returns a (frequency,spectrum) pair of arrays:

N=1024 data=np.random.default_rng().standard_normal(N) plt.clf() for i in (2,4,8,1): f,s = ps.computeSpectrum(data, segfactor=i, dt=1e-6, window=np.hanning) plt.plot(f, s)

Window choices are: bartlett - Triangle shape hann - Sine-squared hamming - 0.08 + 0.92*(sine-squared) welch - Parabolic None - Square (no windowing) *** - Any other vector of length 2m OR any callable accepting 2m as an argument and returning a sequence of that length.

Each window take an argument (n), the number of data points per segment. When using the PowerSpectrum or PowerSpectrumOverlap classes or the convenience function computeSpectrum, you have a choice. You can call the window and pass in the resulting vector, or you can pass in the callable function itself. It is allowed to use different windows on different data segments, though honestly that would be really weird.

PowerSpectrum

Object for accumulating power spectrum estimates from one or more data segments.

If you want to use multiple overlapping segments, use class PowerSpectrumOvelap.

Based on Num Rec 3rd Ed section 13.4

Source code in mass2/mathstat/power_spectrum.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
class PowerSpectrum:
    """Object for accumulating power spectrum estimates from one or more data segments.

    If you want to use multiple overlapping segments, use class
    PowerSpectrumOvelap.

    Based on Num Rec 3rd Ed section 13.4"""

    def __init__(self, m: int, dt: float | None = 1.0):
        """Sets up to estimate PSD at m+1 frequencies (counting DC) given
        data segments of length 2m.  Optional dt is the time step Delta"""
        self.m = m
        self.m2 = 2 * m
        self.nsegments = 0
        self.specsum = np.zeros(m + 1, dtype=float)
        if dt is None:
            self.dt = 1.0
        else:
            self.dt = dt

    def copy(self) -> "PowerSpectrum":
        """Return a copy of the object.

        Handy when coding and you don't want to recompute everything, but
        you do want to update the method definitions."""
        c = PowerSpectrum(self.m, dt=self.dt)
        c.__dict__.update(self.__dict__)
        return c

    def addDataSegment(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
        """Process a data segment of length 2m using the window function
        given.  window can be None (square window), a callable taking the
        length and returning a sequence, or a sequence."""
        if len(data) != self.m2:
            raise ValueError(f"wrong size data segment.  len(data)={len(data)} but require {self.m2}")
        if np.isnan(data).any():
            raise ValueError("data contains NaN")
        if isinstance(window, np.ndarray):
            assert len(window) == self.m2
            w = window
        elif window is None:
            w = np.ones(self.m2)
        elif callable(window):
            w = window(self.m2)
        else:
            raise TypeError("Window not understood")
        wksp = w * data
        sum_window = (w**2).sum()

        scale_factor = 2.0 / (sum_window * self.m2)
        if True:  # we want real units
            scale_factor *= self.dt * self.m2
        wksp = np.fft.rfft(wksp)

        # The first line adds 2x too much to the first/last bins.
        ps = np.abs(wksp) ** 2
        self.specsum += scale_factor * ps
        self.nsegments += 1

    def addLongData(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
        """Process a long vector of data as non-overlapping segments of length 2m."""
        data = np.asarray(data)
        nt = len(data)
        nk = nt // self.m2
        for k in range(nk):
            noff = k * self.m2
            self.addDataSegment(data[noff : noff + self.m2], window=window)

    def spectrum(self, nbins: int | None = None) -> NDArray:
        """If <nbins> is given, the data are averaged into <nbins> bins."""
        if nbins is None:
            return self.specsum / self.nsegments
        if nbins > self.m:
            raise ValueError(f"Cannot rebin into more than m={self.m} bins")

        newbin = np.asarray(0.5 + np.arange(self.m + 1, dtype=float) / (self.m + 1) * nbins, dtype=int)
        result = np.zeros(nbins + 1, dtype=float)
        for i in range(nbins + 1):
            result[i] = self.specsum[newbin == i].mean()
        return result / self.nsegments

    def autocorrelation(self) -> None:
        """Return the autocorrelation (the DFT of this power spectrum)"""
        raise NotImplementedError("The autocorrelation method is not yet implemented.")

    def frequencies(self, nbins: int | None = None) -> NDArray:
        """If <nbins> is given, the data are averaged into <nbins> bins."""
        if nbins is None:
            nbins = self.m
        if nbins > self.m:
            raise ValueError(f"Cannot rebin into more than m={self.m} bins")
        return np.arange(nbins + 1, dtype=float) / (2 * self.dt * nbins)

    def plot(
        self,
        axis: plt.Axes | None = None,
        arb_to_unit_scale_and_label: tuple[float, str] = (1, "arb"),
        sqrt_psd: bool = True,
        **plotkwarg: Any,
    ) -> None:
        """Plot the power spectrum (or its square root) on a log-log plot.

        Parameters
        ----------
        axis : plt.Axes | None, optional
            Axes to plot on, or if None create a new figure, by default None
        arb_to_unit_scale_and_label : tuple[int, str], optional
            rescale the sqrt(PSD) by this amoutn and label it such, by default (1, "arb")
        sqrt_psd : bool, optional
            Whether to take the square root of the PSD, by default True
        """
        if axis is None:
            plt.figure()
            axis = plt.gca()
        arb_to_unit_scale, unit_label = arb_to_unit_scale_and_label
        psd = self.spectrum()[1:] * (arb_to_unit_scale**2)
        freq = self.frequencies()[1:]
        if sqrt_psd:
            axis.plot(freq, np.sqrt(psd), **plotkwarg)
            axis.set_ylabel(f"Power Spectral Density ({unit_label}$/\\sqrt{{Hz}}$)")
        else:
            axis.plot(freq, psd, **plotkwarg)
            axis.set_ylabel(f"Amplitude Spectral Density ({unit_label}$^2$ Hz$^{{-1}}$)")
        plt.loglog()
        axis.grid()
        axis.set_xlabel("Frequency (Hz)")

__init__(m, dt=1.0)

Sets up to estimate PSD at m+1 frequencies (counting DC) given data segments of length 2m. Optional dt is the time step Delta

Source code in mass2/mathstat/power_spectrum.py
74
75
76
77
78
79
80
81
82
83
84
def __init__(self, m: int, dt: float | None = 1.0):
    """Sets up to estimate PSD at m+1 frequencies (counting DC) given
    data segments of length 2m.  Optional dt is the time step Delta"""
    self.m = m
    self.m2 = 2 * m
    self.nsegments = 0
    self.specsum = np.zeros(m + 1, dtype=float)
    if dt is None:
        self.dt = 1.0
    else:
        self.dt = dt

addDataSegment(data, window=None)

Process a data segment of length 2m using the window function given. window can be None (square window), a callable taking the length and returning a sequence, or a sequence.

Source code in mass2/mathstat/power_spectrum.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
def addDataSegment(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
    """Process a data segment of length 2m using the window function
    given.  window can be None (square window), a callable taking the
    length and returning a sequence, or a sequence."""
    if len(data) != self.m2:
        raise ValueError(f"wrong size data segment.  len(data)={len(data)} but require {self.m2}")
    if np.isnan(data).any():
        raise ValueError("data contains NaN")
    if isinstance(window, np.ndarray):
        assert len(window) == self.m2
        w = window
    elif window is None:
        w = np.ones(self.m2)
    elif callable(window):
        w = window(self.m2)
    else:
        raise TypeError("Window not understood")
    wksp = w * data
    sum_window = (w**2).sum()

    scale_factor = 2.0 / (sum_window * self.m2)
    if True:  # we want real units
        scale_factor *= self.dt * self.m2
    wksp = np.fft.rfft(wksp)

    # The first line adds 2x too much to the first/last bins.
    ps = np.abs(wksp) ** 2
    self.specsum += scale_factor * ps
    self.nsegments += 1

addLongData(data, window=None)

Process a long vector of data as non-overlapping segments of length 2m.

Source code in mass2/mathstat/power_spectrum.py
125
126
127
128
129
130
131
132
def addLongData(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
    """Process a long vector of data as non-overlapping segments of length 2m."""
    data = np.asarray(data)
    nt = len(data)
    nk = nt // self.m2
    for k in range(nk):
        noff = k * self.m2
        self.addDataSegment(data[noff : noff + self.m2], window=window)

autocorrelation()

Return the autocorrelation (the DFT of this power spectrum)

Source code in mass2/mathstat/power_spectrum.py
147
148
149
def autocorrelation(self) -> None:
    """Return the autocorrelation (the DFT of this power spectrum)"""
    raise NotImplementedError("The autocorrelation method is not yet implemented.")

copy()

Return a copy of the object.

Handy when coding and you don't want to recompute everything, but you do want to update the method definitions.

Source code in mass2/mathstat/power_spectrum.py
86
87
88
89
90
91
92
93
def copy(self) -> "PowerSpectrum":
    """Return a copy of the object.

    Handy when coding and you don't want to recompute everything, but
    you do want to update the method definitions."""
    c = PowerSpectrum(self.m, dt=self.dt)
    c.__dict__.update(self.__dict__)
    return c

frequencies(nbins=None)

If is given, the data are averaged into bins.

Source code in mass2/mathstat/power_spectrum.py
151
152
153
154
155
156
157
def frequencies(self, nbins: int | None = None) -> NDArray:
    """If <nbins> is given, the data are averaged into <nbins> bins."""
    if nbins is None:
        nbins = self.m
    if nbins > self.m:
        raise ValueError(f"Cannot rebin into more than m={self.m} bins")
    return np.arange(nbins + 1, dtype=float) / (2 * self.dt * nbins)

plot(axis=None, arb_to_unit_scale_and_label=(1, 'arb'), sqrt_psd=True, **plotkwarg)

Plot the power spectrum (or its square root) on a log-log plot.

Parameters:
  • axis (Axes | None, default: None ) –

    Axes to plot on, or if None create a new figure, by default None

  • arb_to_unit_scale_and_label (tuple[int, str], default: (1, 'arb') ) –

    rescale the sqrt(PSD) by this amoutn and label it such, by default (1, "arb")

  • sqrt_psd (bool, default: True ) –

    Whether to take the square root of the PSD, by default True

Source code in mass2/mathstat/power_spectrum.py
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def plot(
    self,
    axis: plt.Axes | None = None,
    arb_to_unit_scale_and_label: tuple[float, str] = (1, "arb"),
    sqrt_psd: bool = True,
    **plotkwarg: Any,
) -> None:
    """Plot the power spectrum (or its square root) on a log-log plot.

    Parameters
    ----------
    axis : plt.Axes | None, optional
        Axes to plot on, or if None create a new figure, by default None
    arb_to_unit_scale_and_label : tuple[int, str], optional
        rescale the sqrt(PSD) by this amoutn and label it such, by default (1, "arb")
    sqrt_psd : bool, optional
        Whether to take the square root of the PSD, by default True
    """
    if axis is None:
        plt.figure()
        axis = plt.gca()
    arb_to_unit_scale, unit_label = arb_to_unit_scale_and_label
    psd = self.spectrum()[1:] * (arb_to_unit_scale**2)
    freq = self.frequencies()[1:]
    if sqrt_psd:
        axis.plot(freq, np.sqrt(psd), **plotkwarg)
        axis.set_ylabel(f"Power Spectral Density ({unit_label}$/\\sqrt{{Hz}}$)")
    else:
        axis.plot(freq, psd, **plotkwarg)
        axis.set_ylabel(f"Amplitude Spectral Density ({unit_label}$^2$ Hz$^{{-1}}$)")
    plt.loglog()
    axis.grid()
    axis.set_xlabel("Frequency (Hz)")

spectrum(nbins=None)

If is given, the data are averaged into bins.

Source code in mass2/mathstat/power_spectrum.py
134
135
136
137
138
139
140
141
142
143
144
145
def spectrum(self, nbins: int | None = None) -> NDArray:
    """If <nbins> is given, the data are averaged into <nbins> bins."""
    if nbins is None:
        return self.specsum / self.nsegments
    if nbins > self.m:
        raise ValueError(f"Cannot rebin into more than m={self.m} bins")

    newbin = np.asarray(0.5 + np.arange(self.m + 1, dtype=float) / (self.m + 1) * nbins, dtype=int)
    result = np.zeros(nbins + 1, dtype=float)
    for i in range(nbins + 1):
        result[i] = self.specsum[newbin == i].mean()
    return result / self.nsegments

PowerSpectrumOverlap

Bases: PowerSpectrum

Object for power spectral estimation using overlapping data segments.

User sends non-overlapping segments of length m, and they are processed in pairs of length 2m with overlap (except on the first and last segment).

Source code in mass2/mathstat/power_spectrum.py
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
class PowerSpectrumOverlap(PowerSpectrum):
    """Object for power spectral estimation using overlapping data segments.

    User sends non-overlapping segments of length m,
    and they are processed in pairs of length 2m with overlap (except
    on the first and last segment).
    """

    def __init__(self, m: int, dt: float | None = 1.0):
        """Sets up an object to accumulate a power spectrum estimate.

        Parameters
        ----------
        m : int
            Create a PSD estimate with m+1 frequency bins (counting DC)
        dt : float | None, optional
            Time sample period in seconds, by default 1.0
        """
        PowerSpectrum.__init__(self, m, dt=dt)
        self.first = True

    def addDataSegment(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
        "Process a data segment of length m using window."
        if self.first:
            self.first = False
            self.fullseg = np.concatenate((np.zeros_like(data), np.array(data)))
        else:
            self.fullseg[0 : self.m] = self.fullseg[self.m :]
            self.fullseg[self.m :] = data
            PowerSpectrum.addDataSegment(self, self.fullseg, window=window)

    def addLongData(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
        """Process a long vector of data as overlapping segments of
        length 2m."""
        nt = len(data)
        nk = (nt - 1) // self.m
        if nk > 1:
            delta_el = (nt - self.m2) / (nk - 1.0)
        else:
            delta_el = 0.0
        for k in range(nk):
            noff = int(k * delta_el + 0.5)
            PowerSpectrum.addDataSegment(self, data[noff : noff + self.m2], window=window)

__init__(m, dt=1.0)

Sets up an object to accumulate a power spectrum estimate.

Parameters:
  • m (int) –

    Create a PSD estimate with m+1 frequency bins (counting DC)

  • dt (float | None, default: 1.0 ) –

    Time sample period in seconds, by default 1.0

Source code in mass2/mathstat/power_spectrum.py
202
203
204
205
206
207
208
209
210
211
212
213
def __init__(self, m: int, dt: float | None = 1.0):
    """Sets up an object to accumulate a power spectrum estimate.

    Parameters
    ----------
    m : int
        Create a PSD estimate with m+1 frequency bins (counting DC)
    dt : float | None, optional
        Time sample period in seconds, by default 1.0
    """
    PowerSpectrum.__init__(self, m, dt=dt)
    self.first = True

addDataSegment(data, window=None)

Process a data segment of length m using window.

Source code in mass2/mathstat/power_spectrum.py
215
216
217
218
219
220
221
222
223
def addDataSegment(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
    "Process a data segment of length m using window."
    if self.first:
        self.first = False
        self.fullseg = np.concatenate((np.zeros_like(data), np.array(data)))
    else:
        self.fullseg[0 : self.m] = self.fullseg[self.m :]
        self.fullseg[self.m :] = data
        PowerSpectrum.addDataSegment(self, self.fullseg, window=window)

addLongData(data, window=None)

Process a long vector of data as overlapping segments of length 2m.

Source code in mass2/mathstat/power_spectrum.py
225
226
227
228
229
230
231
232
233
234
235
236
def addLongData(self, data: NDArray, window: Callable | NDArray | None = None) -> None:
    """Process a long vector of data as overlapping segments of
    length 2m."""
    nt = len(data)
    nk = (nt - 1) // self.m
    if nk > 1:
        delta_el = (nt - self.m2) / (nk - 1.0)
    else:
        delta_el = 0.0
    for k in range(nk):
        noff = int(k * delta_el + 0.5)
        PowerSpectrum.addDataSegment(self, data[noff : noff + self.m2], window=window)

bartlett(n)

A Bartlett window (triangle shape) of length n

Source code in mass2/mathstat/power_spectrum.py
242
243
244
def bartlett(n: int) -> NDArray:
    """A Bartlett window (triangle shape) of length n"""
    return np.bartlett(n)

computeSpectrum(data, segfactor=1, dt=None, window=None)

Convenience function to compute the power spectrum of a single data array.

Args: Data for finding the spectrum How many segments to break up the data into. The spectrum will be found on each consecutive pair of segments and will be averaged over all pairs.

The sample spacing, in time. The window function to apply. Should be a function that accepts a number of samples and returns an array of that length. Possible values are bartlett, welch, hann, and hamming in this module, or use a function of your choosing.

Returns: Either the PSD estimate as an array (non-negative frequencies only), OR the tuple (frequencies, PSD). The latter returns when

is not None.

Source code in mass2/mathstat/power_spectrum.py
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
def computeSpectrum(
    data: ArrayLike, segfactor: int = 1, dt: float | None = None, window: Callable | ArrayLike | None = None
) -> tuple[NDArray, NDArray]:
    """Convenience function to compute the power spectrum of a single data array.

    Args:
        <data>  Data for finding the spectrum
        <segfactor>   How many segments to break up the data into.  The spectrum
            will be found on each consecutive pair of segments and
            will be averaged over all pairs.
        <dt>      The sample spacing, in time.
        <window>  The window function to apply.  Should be a function that accepts
            a number of samples and returns an array of that length. Possible
            values are bartlett, welch, hann, and hamming in this module, or use
            a function of your choosing.

    Returns:
        Either the PSD estimate as an array (non-negative frequencies only),
        *OR* the tuple (frequencies, PSD).  The latter returns when <dt> is not None.
    """
    data = np.asarray(data)
    N = len(data)
    M = N // (2 * segfactor)
    window_length = 2 * M
    if isinstance(window, np.ndarray):
        assert len(window) == window_length
        w = np.array(window)
    elif window is None:
        w = np.ones(window_length, dtype=float)
    elif callable(window):
        w = window(window_length)
    else:
        raise TypeError("Window not understood")

    if segfactor == 1:
        spec = PowerSpectrum(M, dt=dt)
        # Ensure that the datasegment has even length
        spec.addDataSegment(data[: 2 * M], window=w)
    else:
        spec = PowerSpectrumOverlap(M, dt=dt)
        for i in range(2 * segfactor - 1):
            spec.addDataSegment(data[i * M : (i + 1) * M], window=w)

    if dt is None:
        return np.zeros(M), spec.spectrum()
    else:
        return spec.frequencies(), spec.spectrum()

demo(N=1024, window=np.hanning)

Plot a demonstration power spectrum with different segmentations.

Parameters:
  • N (int, default: 1024 ) –

    Length of the white-noise random data vector, by default 1024

  • window (Callable | ArrayLike | None, default: hanning ) –

    Window function to apply, by default np.hanning

Source code in mass2/mathstat/power_spectrum.py
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def demo(N: int = 1024, window: Callable | ArrayLike | None = np.hanning) -> None:
    """Plot a demonstration power spectrum with different segmentations.

    Parameters
    ----------
    N : int, optional
        Length of the white-noise random data vector, by default 1024
    window : Callable | ArrayLike | None, optional
        Window function to apply, by default np.hanning
    """
    data = np.random.default_rng().standard_normal(N)
    plt.clf()
    for i in (2, 4, 8, 1):
        f, s = computeSpectrum(data, segfactor=i, dt=1e0, window=window)
        plt.plot(f, s)

hamming(n)

A Hamming window (0.08 + 0.92*sine-squared) of length n

Source code in mass2/mathstat/power_spectrum.py
260
261
262
def hamming(n: int) -> NDArray:
    """A Hamming window (0.08 + 0.92*sine-squared) of length n"""
    return np.hamming(n)

hann(n)

A Hann window (sine-squared) of length n

Source code in mass2/mathstat/power_spectrum.py
252
253
254
255
256
257
def hann(n: int) -> NDArray:
    """A Hann window (sine-squared) of length n"""
    # twopi = np.pi*2
    # i = np.arange(n, dtype=float)
    # return  0.5*(1.0-np.cos(i*twopi/(n-1)))
    return np.hanning(n)

welch(n)

A Welch window (parabolic) of length n

Source code in mass2/mathstat/power_spectrum.py
247
248
249
def welch(n: int) -> NDArray:
    """A Welch window (parabolic) of length n"""
    return 1 - (2 * np.arange(n, dtype=float) / (n - 1.0) - 1) ** 2

Robust statistics

mass2.mathstat.robust

Functions from the field of robust statistics.

Location estimators: bisquare_weighted_mean - Mean with weights given by the bisquare rho function. huber_weighted_mean - Mean with weights given by Huber's rho function. trimean - Tukey's trimean, the average of the median and the midhinge. shorth_range - Primarily a dispersion estimator, but location=True gives a (poor) location.

Dispersion estimators: median_abs_dev - Median absolute deviation from the median. shorth_range - Length of the shortest closed interval containing at least half the data. Qscale - Normalized Rousseeuw & Croux Q statistic, from the 25%ile of all 2-point distances.

Utility functions: high_median - Weighted median

Recommendations: For location, suggest the bisquare_weighted_mean with k=3.9*sigma, if you can make any reasonable guess as to the Gaussian-like width sigma. If not, trimean is a good second choice, though less efficient.

For dispersion, the Qscale is very efficient for nearly Gaussian data. The median_abs_dev is the most robust though less efficient. If Qscale doesn't work, then short_range is a good second choice.

Created on Feb 9, 2012 Rewritten with Numba Jan 23, 2025

@author: fowlerj

Qscale(x, sort_inplace=False)

Compute the robust estimator of scale Q of Rousseeuw and Croux using only O(n log n) memory and computations.

A naive implementation is O(n^2) in both memory and computations.

Args: x: The data set, an unsorted sequence of values. sort_inplace: Whether it is okay for the function to reorder the set . If True, must be a np.ndarray (or ValueError is raised).

Q is defined as d_n * 2.2219 * {|xi-xj|; i<j}_k, where

{a}_k means the kth order-statistic of the set {a},
this set is that of the distances between all (n 2) possible pairs of data in {x}
n=# of observations in set {x},
k = (n choose 2)/4,
2.2219 makes Q consistent for sigma in normal distributions as n-->infinity,
and d_n is a correction factor to the 2.2219 when n is not large.

This function does apply the correction factors to make Q consistent with sigma for a Gaussian distribution.

Technique from C. Croux & P. Rousseeuw in Comp. Stat Vol 1 (1992) ed. Dodge & Whittaker, Heidelberg: Physica-Verlag pages 411-428. Available at ftp://ftp.win.ua.ac.be/pub/preprints/92/Timeff92.pdf

The estimator is further studied in Rousseeuw & Croux, J Am. Stat. Assoc 88 (1993), pp 1273-1283.

Source code in mass2/mathstat/robust.py
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
def Qscale(x: ArrayLike, sort_inplace: bool = False) -> float:
    """Compute the robust estimator of scale Q of Rousseeuw and Croux using only O(n log n)
    memory and computations.

    A naive implementation is O(n^2) in both memory and computations.

    Args:
        x: The data set, an unsorted sequence of values.
        sort_inplace: Whether it is okay for the function to reorder the set <x>.
            If True, <x> must be a np.ndarray (or ValueError is raised).

    Q is defined as d_n * 2.2219 * {|xi-xj|; i<j}_k, where

        {a}_k means the kth order-statistic of the set {a},
        this set is that of the distances between all (n 2) possible pairs of data in {x}
        n=# of observations in set {x},
        k = (n choose 2)/4,
        2.2219 makes Q consistent for sigma in normal distributions as n-->infinity,
        and d_n is a correction factor to the 2.2219 when n is not large.

    This function does apply the correction factors to make Q consistent with sigma for a
    Gaussian distribution.

    Technique from C. Croux & P. Rousseeuw in Comp. Stat Vol 1 (1992) ed. Dodge & Whittaker,
    Heidelberg: Physica-Verlag pages 411-428.  Available at
    ftp://ftp.win.ua.ac.be/pub/preprints/92/Timeff92.pdf

    The estimator is further studied in Rousseeuw & Croux, J Am. Stat. Assoc 88 (1993), pp 1273-1283.
    """

    if not sort_inplace:
        x = np.array(x)
    elif not isinstance(x, np.ndarray):
        raise ValueError("sort_inplace cannot be True unless the data set x is a np.ndarray.")

    x.sort()
    n = len(x)
    if n < 2:
        raise ValueError("Data set <x> must contain at least 2 values!")
    h = n // 2 + 1
    target_k = h * (h - 1) // 2 - 1  # -1 so that order count can start at 0 instead of conventional 1,2,3...

    # Compute the n-dependent prefactor to make Q consistent with sigma of a Gaussian.
    prefactor = 2.2219
    if n <= 9:
        prefactor *= [0, 0, 0.399, 0.994, 0.512, 0.844, 0.611, 0.857, 0.669, 0.872][n]
    elif n % 2 == 1:
        prefactor *= n / (n + 1.4)
    else:
        prefactor *= n / (n + 3.8)

    # Now down to business finding the 25%ile of |xi - xj| for i<j (or equivalently, for i != j)
    # Imagine the upper triangle of the matrix Aij = xj - xi (upper means j>i).
    # If the set is sorted such that xi <= x(i+1) for any i, then the upper triangle of Aij contains
    # exactly those distances from which we need the k=n/4 order statistic.

    # For small lists, too many boundary problems arise.  Just do it the slow way:
    if n <= 5:
        dist = np.hstack([x[j] - x[:j] for j in range(1, n)])
        assert len(dist) == (n * (n - 1)) // 2
        dist.sort()
        return dist[target_k] * prefactor

    q, npasses = _Qscale_subroutine(x, n, target_k)

    if npasses > n:
        raise RuntimeError(f"Qscale tried {npasses} distances, which is too many")
    return q * prefactor

bisquare_weighted_mean(x, k, center=None, tol=None)

The bisquare weighted mean of the data with a k-value of .

Args: x (array): data values to be summarized k (number): give zero weight to values at least distance k from the weighted mean. center (number): an initial guess at the weighted mean. If None, then the data median will be used (default None). tol (number): tolerance on the estimator (see below; default None)

A sensible choice of is 3 to 5 times the rms width or 1.3 to 2 times the full width at half max of a peak. For strictly Gaussian data, the choices of k= 3.14, 3.88, and 4.68 times sigma will be 80%, 90%, and 95% efficient.

The answer is found iteratively, revised until it changes by less than . If is None (the default), then will use 1e-5 times the median absolute deviation of about its median.

Data values a distance of more than from the weighted mean are given no weight.

Source code in mass2/mathstat/robust.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def bisquare_weighted_mean(x: ArrayLike, k: float, center: float | None = None, tol: float | None = None) -> float:
    """The bisquare weighted mean of the data <x> with a k-value of <k>.

    Args:
        x (array): data values to be summarized
        k (number): give zero weight to values at least distance k from the weighted
            mean.
        center (number): an initial guess at the weighted mean.
            If None, then the data median will be used (default None).
        tol (number): tolerance on the estimator (see below; default None)

    A sensible choice of <k> is 3 to 5 times the rms width or 1.3 to 2 times the
    full width at half max of a peak.  For strictly Gaussian data, the choices of
    k= 3.14, 3.88, and 4.68 times sigma will be 80%, 90%, and 95% efficient.

    The answer is found iteratively, revised until it changes by less than <tol>.  If
    <tol> is None (the default), then <tol> will use 1e-5 times the median absolute
    deviation of <x> about its median.

    Data values a distance of more than <k> from the weighted mean are given no weight.
    """

    x = np.asarray(x)
    if center is None:
        center = np.median(x)
    if tol is None:
        tol = 1e-5 * median_abs_dev(x, normalize=True)

    for _iteration in range(100):
        weights = (1 - ((x - center) / k) ** 2.0) ** 2.0
        weights[np.abs(x - center) > k] = 0.0
        newcenter = (weights * x).sum() / weights.sum()
        if abs(newcenter - center) < tol:
            return newcenter
        center = newcenter
    raise RuntimeError(
        "bisquare_weighted_mean used too many iterations.\n"
        + "Consider using higher <tol> or better <center>, or change to trimean(x)."
    )

high_median(x, weights=None, return_index=False)

Compute the weighted high median of data set x with weights .

Returns: The smallest x[j] such that the sum of all weights for data with x[i] <= x[j] is strictly greater than half the total weight.

If return_index is True, then the chosen index is returned also as (highmed, index).

Source code in mass2/mathstat/robust.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def high_median(x: ArrayLike, weights: ArrayLike | None = None, return_index: bool = False) -> float | tuple[float, int]:
    """Compute the weighted high median of data set x with weights <weights>.

    Returns:
        The smallest x[j] such that the sum of all weights for data
        with x[i] <= x[j] is strictly greater than half the total weight.

    If return_index is True, then the chosen index is returned also as (highmed, index).
    """
    x = np.asarray(x)
    sort_idx = x.argsort()  # now x[sort_idx] is sorted
    n = len(x)
    if weights is None:
        weights = np.ones(n, dtype=float)
    else:
        weights = np.asarray(weights, dtype=float)

    ri = _high_median(sort_idx, weights, n)

    if return_index:
        return x[ri], ri

    return x[ri]

huber_weighted_mean(x, k, center=None, tol=None)

Huber's weighted mean of the data with a k-value of .

Args: x (array): data values to be summarized k (number): give zero weight to values at least distance k from the weighted mean. center (number): an initial guess at the weighted mean. If None, then the data median will be used (default None). tol (number): tolerance on the estimator (see below; default None)

A sensible choice of is 1 to 1.5 times the rms width or 0.4 to 0.6 times the full width at half max of a peak. For strictly Gaussian data, the choices of k=1.0 and 1.4 sigma give ...

The answer is found iteratively, revised until it changes by less than . If is None (the default), then will use 1e-5 times the median absolute deviation of about its median.

Data values a distance of more than from the weighted mean are given no weight.

Source code in mass2/mathstat/robust.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def huber_weighted_mean(x: ArrayLike, k: float, center: float | None = None, tol: float | None = None) -> float:
    """Huber's weighted mean of the data <x> with a k-value of <k>.

    Args:
        x (array): data values to be summarized
        k (number): give zero weight to values at least distance k from the weighted
            mean.
        center (number): an initial guess at the weighted mean.
            If None, then the data median will be used (default None).
        tol (number): tolerance on the estimator (see below; default None)

    A sensible choice of <k> is 1 to 1.5 times the rms width or 0.4 to 0.6 times the
    full width at half max of a peak.  For strictly Gaussian data, the choices of
    k=1.0 and 1.4 sigma give ...

    The answer is found iteratively, revised until it changes by less than <tol>.  If
    <tol> is None (the default), then <tol> will use 1e-5 times the median absolute
    deviation of <x> about its median.

    Data values a distance of more than <k> from the weighted mean are given no weight.
    """

    x = np.asarray(x)
    if center is None:
        center = np.median(x)
    if tol is None:
        tol = 1e-5 * median_abs_dev(x, normalize=True)

    for _iteration in range(100):
        weights = np.asarray((1.0 * k) / np.abs(x - center))
        weights[weights > 1.0] = 1.0
        newcenter = (weights * x).sum() / weights.sum()
        if abs(newcenter - center) < tol:
            return newcenter
        center = newcenter
    raise RuntimeError(
        "huber_weighted_mean used too many iterations.\n" + "Consider using higher <tol> or better <center>, or change to trimean(x)."
    )

median_abs_dev(x, normalize=False)

Median absolute deviation (from the median) of data vector.

Args: x (array): data to be summarized. normalize (bool): if True, then return MAD/0.675, which scaling makes the statistic consistent with the standard deviation for an asymptotically large sample of Gaussian deviates (default False).

Source code in mass2/mathstat/robust.py
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def median_abs_dev(x: ArrayLike, normalize: bool = False) -> float:
    """Median absolute deviation (from the median) of data vector.

    Args:
        x (array): data to be summarized.
        normalize (bool): if True, then return MAD/0.675, which scaling makes
            the statistic consistent with the standard deviation for an asymptotically large
            sample of Gaussian deviates (default False).
    """
    x = np.asarray(x)
    mad = np.median(np.abs(x - np.median(x)))
    if normalize:
        return mad / 0.674480  # Half of the normal distribution has abs(x-mu) < 0.674480*sigma
    return mad

shorth_range(x, normalize=False, sort_inplace=False, location=False)

Returns the Shortest Half (shorth) Range, a robust estimator of dispersion.

The Shortest Half of a data set {x} means that closed interval [a,b] where (1) a and b are both elements of the data set, (2) at least half of the elements are in the closed interval, and (3) which minimizes the length of the closed interval (b-a). The shorth range is (b-a). See mass2.mathstat.robust.shorth_information for further explanation and references in the literature.

Args: x (array): The data set under study. Must be a sequence of values. normalize (bool): If False (default), then return the actual range b-a. If True, then the range will be divided by 1.348960, which normalizes the range to be a consistent estimator of the parameter sigma in the case of an exact Gaussian distribution. (A small correction of order 1/N is applied, too, which mostly corrects for bias at modest values of the sample size N.) sort_inplace - Permit this function to reorder the data set . If False (default), then x will be copied and the copy will be sorted. (Note that if is not a np.ndarray, an error will be raised if is True.) location - Whether to return two location estimators in addition to the dispersion estimator. (default False).

Returns: shorth range if evaluates to False; otherwise returns: (shorth range, shorth mean, shorth center)

In this, shorth mean is the mean of all samples in the closed range [a,b], and shorth center = (a+b)/2. Beware that both of these location estimators have the undesirable property that their asymptotic standard deviation improves only as N^(-1/3) rather than the more usual N^(-1/2). So it is not a very good idea to use them as location estimators. They are really only included here for testing just how useless they are.

Source code in mass2/mathstat/robust.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def shorth_range(
    x: ArrayLike, normalize: bool = False, sort_inplace: bool = False, location: bool = False
) -> float | tuple[float, float, float]:
    """Returns the Shortest Half (shorth) Range, a robust estimator of dispersion.

    The Shortest Half of a data set {x} means that closed interval [a,b] where (1) a and b are both
    elements of the data set, (2) at least half of the elements are in the closed interval, and (3)
    which minimizes the length of the closed interval (b-a).  The shorth range is (b-a). See
    mass2.mathstat.robust.shorth_information for further explanation and references in the
    literature.

    Args:
        x (array): The data set under study.  Must be a sequence of values.
        normalize (bool): If False (default), then return the actual range b-a.  If True, then the
            range will be divided by 1.348960, which normalizes the range to be a consistent estimator
            of the parameter sigma in the case of an exact Gaussian distribution.  (A small correction
            of order 1/N is applied, too, which mostly corrects for bias at modest values of the sample
            size N.)
        sort_inplace - Permit this function to reorder the data set <x>.  If False (default), then x will be
            copied and the copy will be sorted.  (Note that if <x> is not a np.ndarray, an error
            will be raised if <sort_inplace> is True.)
        location     - Whether to return two location estimators in addition to the dispersion estimator.
            (default False).

    Returns:
        shorth range   if <location> evaluates to False; otherwise returns:
        (shorth range, shorth mean, shorth center)

    In this, shorth mean is the mean of all samples in the closed range [a,b], and
    shorth center = (a+b)/2.  Beware that both of these location estimators have the
    undesirable property that their asymptotic standard deviation improves only as
    N^(-1/3) rather than the more usual N^(-1/2).  So it is not a very good idea to
    use them as location estimators.  They are really only included here for testing
    just how useless they are.
    """

    if not sort_inplace:
        x = np.array(x)
    elif not isinstance(x, np.ndarray):
        raise ValueError("sort_inplace cannot be True unless the data set x is a np.ndarray.")
    x.sort()

    n = len(x)  # Number of data values
    nhalves = int((n + 1) / 2)  # Number of minimal intervals containing at least half the data
    nobs = 1 + int(n / 2)  # Number of data values in each minimal interval

    range_each_half = x[n - nhalves : n] - x[0:nhalves]
    idxa = range_each_half.argmin()
    a, b = x[idxa], x[idxa + nobs - 1]
    shorth_range = b - a

    if normalize:
        shorth_range /= 2 * 0.674480
        # Asymptotic expectation for normal data: sigma*2*0.674480
        # The value 2*0.674480 is twice the inverse cumulative normal distribution at 0.75. That is,
        # the middle 50% of a normal distribution are within ±0.674480*sigma of the mean.

        # The small-n corrections depend on n mod 4.  See Rousseeuw & Lerow 1988.
        # These are not at all clear from the text of the paper (see table on p. 115
        # if you want to try to decode them).
        if n % 4 == 0:
            shorth_range *= (n + 1.0) / n
        elif n % 4 == 1:
            shorth_range *= (n + 1.0) / (n - 1.0)
        elif n % 4 == 2:
            shorth_range *= (n + 1.0) / n
        else:
            shorth_range *= (n + 1.0) / (n - 1.0)

    if location:
        return shorth_range, x[idxa : idxa + nobs].mean(), 0.5 * (a + b)
    return shorth_range

trimean(x)

Return Tukey's trimean for a data set , a measure of its central tendency ("location" or "center").

If (q1,q2,q3) are the quartiles (i.e., the 25%ile, median, and 75 %ile), the trimean is (q1+q3)/4 + q2/2.

Source code in mass2/mathstat/robust.py
121
122
123
124
125
126
127
128
129
130
131
def trimean(x: ArrayLike) -> float:
    """Return Tukey's trimean for a data set <x>, a measure of its central tendency
    ("location" or "center").

    If (q1,q2,q3) are the quartiles (i.e., the 25%ile, median, and 75 %ile),
    the trimean is (q1+q3)/4 + q2/2.
    """
    x = np.asarray(x)
    q1, q2, q3 = [np.percentile(x, per) for per in (25, 50, 75)]
    trimean = 0.25 * (q1 + q3) + 0.5 * q2
    return trimean