Source code for plenoptic.process.stats

"""Functions for computing image statistics on multi-dimensional tensors."""

# numpydoc ignore=ES01
import torch
from torch import Tensor

__all__ = [
    "kurtosis",
    "skew",
    "variance",
]


def __dir__() -> list[str]:
    return __all__


[docs] def variance( x: Tensor, mean: float | Tensor | None = None, dim: int | list[int] | None = None, keepdim: bool = False, ) -> Tensor: r""" Calculate sample variance. Note that this is the uncorrected, or sample, variance, corresponding to ``torch.var(*, correction=0)``. Parameters ---------- x The input tensor. mean Reuse a precomputed mean. dim The dimension or dimensions to reduce. keepdim Whether to retain the reduced dimensions (as singletons) or not. Returns ------- out The variance tensor. See Also -------- skew Calculate sample skewness. kurtosis Calculate sample kurtosis. Examples -------- .. plot:: >>> import plenoptic as po >>> import matplotlib.pyplot as plt >>> import torch >>> po.set_seed(42) >>> x = torch.randn(10000) >>> v = po.process.variance(x) >>> x_more = x * 3 >>> v_more = po.process.variance(x_more) >>> x_less = x * 0.3 >>> v_less = po.process.variance(x_less) >>> fig, (ax_less, ax, ax_more) = plt.subplots( ... 1, 3, sharex=True, sharey=True, figsize=(12, 4) ... ) >>> _ = ax_less.hist(x_less, bins=50) >>> _ = ax_less.set(title=f"σ=0.3\nVariance: {v_less:.4f}", ylabel="Frequency") >>> _ = ax.hist(x, bins=50) >>> _ = ax.set(title=f"Standard Gaussian, σ=1\nVariance: {v:.4f}") >>> _ = ax_more.hist(x_more, bins=50) >>> _ = ax_more.set(title=f"σ=3\nVariance: {v_more:.4f}") If you have precomputed the mean, you can pass it and avoid recomputing it: >>> precomputed_mean = torch.mean(x) >>> v = po.process.variance(x, mean=precomputed_mean) >>> v tensor(1.0088) If you want to compute along a specific dimension, you can specify it: >>> x = torch.randn(10000, 2) >>> v = po.process.variance(x, dim=0) >>> v tensor([1.0127, 1.0045]) This function differs from :func:`torch.var` in that it does not apply a correction: >>> plenoptic_v_corrected = v * x.shape[0] / (x.shape[0] - 1) >>> torch_v = torch.var(x, dim=0) >>> torch.isclose(plenoptic_v_corrected, torch_v) tensor([True, True]) """ if dim is None: dim = tuple(range(x.ndim)) if mean is None: mean = torch.mean(x, dim=dim, keepdim=True) return torch.mean((x - mean).pow(2), dim=dim, keepdim=keepdim)
[docs] def skew( x: Tensor, mean: float | Tensor | None = None, var: float | Tensor | None = None, dim: int | list[int] | None = None, keepdim: bool = False, ) -> Tensor: r""" Calculate sample estimate of *asymmetry* about input's mean. To help with interpretation: - Skew of normal distribution is 0. - Negative skew, also known as left-skewed: the left tail is longer. Distribution appears as a right-leaning curve. - Positive skew, also known as right-skewed: the right tail is longer. Distribution appears as a left-leaning curve. Parameters ---------- x The input tensor. mean Reuse a precomputed mean. var Reuse a precomputed variance. dim The dimension or dimensions to reduce. keepdim Whether to retain the reduced dimensions (as singletons) or not. Returns ------- out The skewness tensor. See Also -------- variance Calculate sample variance. kurtosis Calculate sample kurtosis. Examples -------- .. plot:: >>> import plenoptic as po >>> import matplotlib.pyplot as plt >>> import torch >>> po.set_seed(42) >>> x = torch.randn(10000) >>> s = po.process.skew(x) >>> x_right = torch.exp(x / 2) >>> s_right = po.process.skew(x_right) >>> x_left = -torch.exp(x / 2) >>> s_left = po.process.skew(x_left) >>> fig, (ax_left, ax, ax_right) = plt.subplots( ... 1, 3, sharex=True, figsize=(12, 4) ... ) >>> _ = ax_left.hist(x_left, bins=50) >>> _ = ax_left.set( ... title=f"Left skew: {s_left:.4f}", ylabel="Frequency", xlim=(-5, 5) ... ) >>> _ = ax.hist(x, bins=50) >>> _ = ax.set(title=f"Standard Gaussian\nSkew: {s:.4f}") >>> _ = ax_right.hist(x_right, bins=50) >>> _ = ax_right.set(title=f"Right skew: {s_right:.4f}") If you have precomputed the mean and/or variance, you can pass them and avoid recomputing: >>> precomputed_mean = torch.mean(x) >>> precomputed_var = variance(x) >>> s = po.process.skew(x, mean=precomputed_mean, var=precomputed_var) >>> s tensor(-0.0010) If you want to compute along a specific dimension, you can specify it: >>> x = torch.randn(10000, 2) >>> s = po.process.skew(x, dim=0) >>> s tensor([-0.0257, -0.0063]) """ if dim is None: dim = tuple(range(x.ndim)) if mean is None: mean = torch.mean(x, dim=dim, keepdim=True) if var is None: var = variance(x, mean=mean, dim=dim, keepdim=keepdim) return torch.mean((x - mean).pow(3), dim=dim, keepdim=keepdim) / var.pow(1.5)
[docs] def kurtosis( x: Tensor, mean: float | Tensor | None = None, var: float | Tensor | None = None, dim: int | list[int] | None = None, keepdim: bool = False, ) -> Tensor: r""" Calculate sample estimate of *tailedness* (presence of outliers). To help with interpretation: - Kurtosis of univariate normal is 3. - Smaller than 3: *platykurtic* (e.g. uniform distribution). - Greater than 3: *leptokurtic* (e.g. Laplace distribution). Parameters ---------- x The input tensor. mean Reuse a precomputed mean. var Reuse a precomputed variance. dim The dimension or dimensions to reduce. keepdim Whether to retain the reduced dimensions (as singletons) or not. Returns ------- out The kurtosis tensor. See Also -------- variance Calculate sample variance. skew Calculate sample skewness. Examples -------- .. plot:: >>> import plenoptic as po >>> import matplotlib.pyplot as plt >>> import torch >>> po.set_seed(42) >>> x = torch.randn(10000) >>> k = po.process.kurtosis(x) >>> x_platy = torch.rand(10000) * 10 - 5 >>> k_platy = po.process.kurtosis(x_platy) >>> x_lepto = torch.distributions.Laplace(loc=0.0, scale=1.0).sample((10000,)) >>> k_lepto = po.process.kurtosis(x_lepto) >>> fig, (ax_platy, ax, ax_lepto) = plt.subplots( ... 1, 3, sharex=True, figsize=(12, 4) ... ) >>> _ = ax_platy.hist(x_platy.numpy(), bins=50) >>> _ = ax_platy.set( ... title=f"Platykurtic (Uniform)\nKurtosis: {k_platy:.4f}", ... ylabel="Frequency", ... xlim=(-5, 5), ... ) >>> _ = ax.hist(x.numpy(), bins=50) >>> _ = ax.set(title=f"Standard Gaussian\nKurtosis: {k:.4f}") >>> _ = ax_lepto.hist(x_lepto.numpy(), bins=50) >>> _ = ax_lepto.set(title=f"Leptokurtic (Laplace)\nKurtosis: {k_lepto:.4f}") If you have precomputed the mean and/or variance, you can pass them and avoid recomputing: >>> precomputed_mean = torch.mean(x) >>> precomputed_var = variance(x) >>> k = po.process.kurtosis(x, mean=precomputed_mean, var=precomputed_var) >>> k tensor(2.9354) If you want to compute along a specific dimension, you can specify it: >>> x = torch.randn(10000, 2) >>> k = po.process.kurtosis(x, dim=0) >>> k tensor([3.0057, 2.9506]) """ if dim is None: dim = tuple(range(x.ndim)) if mean is None: mean = torch.mean(x, dim=dim, keepdim=True) if var is None: var = variance(x, mean=mean, dim=dim, keepdim=keepdim) return torch.mean(torch.abs(x - mean).pow(4), dim=dim, keepdim=keepdim) / var.pow(2)