Source code for plenoptic.synthesize.autodiff

import warnings

import torch
from torch import Tensor



[docs]
def jacobian(y: Tensor, x: Tensor) -> Tensor:
    """Explicitly compute the full Jacobian matrix.
    N.B. This is only recommended for small input sizes (e.g. <100x100 image)

    Parameters
    ----------
    y
        Model output with gradient attached
    x
        Tensor with gradient function model input with gradient attached

    Returns
    -------
    J
        Jacobian matrix with ``torch.Size([len(y), len(x)])``
    """

    if x.numel() > 1e4:
        warnings.warn(
            "Calculation of Jacobian with input dimensionality greater than"
            " 1E4 may take too long; consideran iterative method (e.g. power"
            " method, randomized svd) instead."
        )

    J = (
        torch.stack(
            [
                torch.autograd.grad(
                    [y[i].sum()], [x], retain_graph=True, create_graph=True
                )[0]
                for i in range(y.size(0))
            ],
            dim=-1,
        )
        .squeeze()
        .t()
    )

    if y.shape[0] == 1:  # need to return a 2D tensor even if y dimensionality is 1
        J = J.unsqueeze(0)

    return J.detach()




[docs]
def vector_jacobian_product(
    y: Tensor,
    x: Tensor,
    U: Tensor,
    retain_graph: bool = True,
    create_graph: bool = True,
    detach: bool = False,
):
    r"""Compute vector Jacobian product: :math:`\text{vjp} = u^T(\partial y/\partial x)`

    Backward Mode Auto-Differentiation (`Lop` in Theano)

    Note on efficiency: When this function is used in the context of power iteration for
    computing eigenvectors, the vector output will be repeatedly fed back into :meth:
    `vector_jacobian_product()` and :meth:`jacobian_vector_product()`.
    To prevent the accumulation of gradient history in this vector (especially on GPU),
    we need to ensure the computation graph is not kept in memory after each iteration.
    We can do this by detaching the output, as well as carefully specifying where/when
    to retain the created graph.

    Parameters
    ----------
    y
        Output with gradient attached, ``torch.Size([m, 1])``.
    x
        Input with gradient attached, ``torch.Size([n, 1])``.
    U
        Direction, shape is ``torch.Size([m, k])``, i.e. same dim as output tensor.
    retain_graph
        Whether or not to keep graph after doing one :meth:`vector_jacobian_product`.
        Must be set to True if k>1.
    create_graph
        Whether or not to create computational graph. Usually should be set to True
        unless you're reusing the graph like in the second step
        of :meth:`jacobian_vector_product`.
    detach
        As with ``create_graph``, only necessary to be True when reusing the output
        like we do in the 2nd step of :meth:`jacobian_vector_product`.

    Returns
    -------
    vJ
        vector-Jacobian product, ``torch.Size([m, k])``.
    """

    assert y.shape[-1] == 1
    assert U.shape[0] == y.shape[0]

    k = U.shape[-1]
    product = []

    for i in range(k):
        (grad_x,) = torch.autograd.grad(
            y,
            x,
            U[:, i].unsqueeze(-1),
            retain_graph=retain_graph,
            create_graph=create_graph,
            allow_unused=True,
        )
        product.append(grad_x.reshape(x.shape))

    vJ = torch.cat(product, dim=1)

    if detach:
        return vJ.detach()
    else:
        return vJ




[docs]
def jacobian_vector_product(
    y: Tensor, x: Tensor, V: Tensor, dummy_vec: Tensor = None
) -> Tensor:
    r"""Compute Jacobian Vector Product: :math:`\text{jvp} = (\partial y/\partial x) v`

    Forward Mode Auto-Differentiation (``Rop`` in Theano). PyTorch does not natively
    support this operation; this function essentially calls backward mode autodiff
    twice, as described in [1].

    See :meth:`vector_jacobian_product()` docstring on why we and pass arguments for
    ``retain_graph`` and ``create_graph``.

    Parameters
    ----------
    y
        Model output with gradient attached, shape is torch.Size([m, 1])
    x
        Model input with gradient attached, shape is torch.Size([n, 1]), i.e. same dim
        as input tensor
    V
        Directions in which to compute product, shape is torch.Size([n, k]) where k is
        number of vectors to compute
    dummy_vec
        Vector with which to do jvp trick [1]. If argument exists, then use some
        pre-allocated, cached vector, otherwise create a new one and move to device in
        this method.

    Returns
    -------
    Jv
        Jacobian-vector product, torch.Size([n, k])

    Notes
    -----
    [1] https://j-towns.github.io/2017/06/12/A-new-trick.html
    """
    assert y.shape[-1] == 1
    assert V.shape[0] == x.shape[0]

    if dummy_vec is None:
        dummy_vec = torch.ones_like(y, requires_grad=True)

    # do vjp twice to get jvp; set detach = False first; dummy_vec must be non-zero and
    # is only there as a helper
    g = vector_jacobian_product(
        y, x, dummy_vec, retain_graph=True, create_graph=True, detach=False
    )
    Jv = vector_jacobian_product(
        g, dummy_vec, V, retain_graph=True, create_graph=False, detach=True
    )

    return Jv