Merge pull request #87 from Harry-Rich/scipp

Scippified Predict posterior
bjmorgan · Dec 11, 2024 · f9adcbf · f9adcbf
2 parents ac68556 + 9ccc711
commit f9adcbf
Show file tree

Hide file tree

Showing 2 changed files with 63 additions and 1 deletion.
diff --git a/kinisi/analyzer.py b/kinisi/analyzer.py
@@ -118,6 +118,20 @@ def _from_universe(cls,
                                  progress=progress)
             return cls(p)
 
+    def posterior_predictive(self, n_posterior_samples: int = None,
+                             n_predictive_samples: int = 256,
+                             progress: bool = True):
+        """
+        Sample  the posterior predictive distribution. The shape of the resulting array will be
+        `(n_posterior_samples * n_predictive_samples, start_dt)`.
+        
+        :params posterior_predictive_params: Parameters for the :py:func:`diffusion.posterior_predictive` method. 
+            See the appropriate documentation for more guidence on this dictionary.
+
+        :return: Samples from the posterior predictive distribution. 
+        """
+        return self.diff.posterior_predictive(n_posterior_samples,n_predictive_samples,progress)
+
     @property
     def n_atoms(self) -> int:
         """

diff --git a/kinisi/diffusion.py b/kinisi/diffusion.py
@@ -10,9 +10,10 @@
 import scipp as sc
 from statsmodels.stats.correlation_tools import cov_nearest
 from scipy.linalg import pinvh
-from scipy.stats import linregress
+from scipy.stats import linregress, multivariate_normal
 from scipy.optimize import minimize
 from emcee import EnsembleSampler
+from tqdm import tqdm
 
 
 class Diffusion:
@@ -205,6 +206,53 @@ def compute_covariance_matrix(self) -> sc.Variable:
                             minimum_eigenvalue_method(cov[self.diff_regime:, self.diff_regime:], self._cond_max)),
                         unit=self.msd.unit**2)
 
+    def posterior_predictive(self,
+                             n_posterior_samples: int = None,
+                             n_predictive_samples: int = 256,
+                             progress: bool = True) -> sc.Variable:
+        """
+        Sample the posterior predictive distribution. The shape of the resulting array will be
+        `(n_posterior_samples * n_predictive_samples, start_dt)`.
+        
+        :param n_posterior_samples: Number of samples from the posterior distribution.
+            Optional, default is the number of posterior samples.
+        :param n_predictive_samples: Number of random samples per sample from the posterior distribution.
+            Optional, default is :py:attr:`256`.
+        :param progress: Show tqdm progress for sampling. Optional, default is :py:attr:`True`.
+
+        :return: Samples from the posterior predictive distribution. 
+        """
+        if n_posterior_samples is None:
+            n_posterior_samples = self.gradient.size
+
+        ppd_unit = self.gradient.unit * self.msd.coords['time interval'].unit + self.intercept.unit
+
+        diff_regime = np.argwhere(self.msd.coords['time interval'] >= self._start_dt)[0][0]
+        ppd = sc.zeros(
+            dims=['posterior samples', 'predictive samples', 'time interval'],
+            shape=[n_posterior_samples, n_predictive_samples, self.msd.coords['time interval'][diff_regime:].size],
+            unit=ppd_unit)
+        samples_to_draw = list(enumerate(np.random.randint(0, self.gradient.size, size=n_posterior_samples)))
+
+        if progress:
+            iterator = tqdm(samples_to_draw, desc='Calculating Posterior Predictive')
+        else:
+            iterator = samples_to_draw
+
+        # Checking unit consistency for mu and covariance
+        try:
+            ppd_unit**2 == self._covariance_matrix.unit
+        except:
+            'Units of the covariance matrix and mu do not align correctly'
+
+        for i, n in iterator:
+            mu = self.gradient[n] * self.msd.coords['time interval'][diff_regime:] + self.intercept[n]
+            mv = multivariate_normal(mean=mu.values, cov=self._covariance_matrix.values, allow_singular=True)
+            ppd.values[i] = mv.rvs(n_predictive_samples)
+
+        ppd = sc.flatten(ppd, dims=['posterior samples', 'predictive samples'], to='samples')
+        return ppd
+
 
 def minimum_eigenvalue_method(cov: np.ndarray, cond_max=1e16) -> np.ndarray:
     """