Source code for plenoptic.synthesize.autodiff

import warnings

import torch
from torch import Tensor


[docs] def jacobian(y: Tensor, x: Tensor) -> Tensor: """Explicitly compute the full Jacobian matrix. N.B. This is only recommended for small input sizes (e.g. <100x100 image) Parameters ---------- y Model output with gradient attached x Tensor with gradient function model input with gradient attached Returns ------- J Jacobian matrix with ``torch.Size([len(y), len(x)])`` """ if x.numel() > 1e4: warnings.warn( "Calculation of Jacobian with input dimensionality greater than" " 1E4 may take too long; consideran iterative method (e.g. power" " method, randomized svd) instead." ) J = ( torch.stack( [ torch.autograd.grad( [y[i].sum()], [x], retain_graph=True, create_graph=True )[0] for i in range(y.size(0)) ], dim=-1, ) .squeeze() .t() ) if y.shape[0] == 1: # need to return a 2D tensor even if y dimensionality is 1 J = J.unsqueeze(0) return J.detach()
[docs] def vector_jacobian_product( y: Tensor, x: Tensor, U: Tensor, retain_graph: bool = True, create_graph: bool = True, detach: bool = False, ): r"""Compute vector Jacobian product: :math:`\text{vjp} = u^T(\partial y/\partial x)` Backward Mode Auto-Differentiation (`Lop` in Theano) Note on efficiency: When this function is used in the context of power iteration for computing eigenvectors, the vector output will be repeatedly fed back into :meth: `vector_jacobian_product()` and :meth:`jacobian_vector_product()`. To prevent the accumulation of gradient history in this vector (especially on GPU), we need to ensure the computation graph is not kept in memory after each iteration. We can do this by detaching the output, as well as carefully specifying where/when to retain the created graph. Parameters ---------- y Output with gradient attached, ``torch.Size([m, 1])``. x Input with gradient attached, ``torch.Size([n, 1])``. U Direction, shape is ``torch.Size([m, k])``, i.e. same dim as output tensor. retain_graph Whether or not to keep graph after doing one :meth:`vector_jacobian_product`. Must be set to True if k>1. create_graph Whether or not to create computational graph. Usually should be set to True unless you're reusing the graph like in the second step of :meth:`jacobian_vector_product`. detach As with ``create_graph``, only necessary to be True when reusing the output like we do in the 2nd step of :meth:`jacobian_vector_product`. Returns ------- vJ vector-Jacobian product, ``torch.Size([m, k])``. """ assert y.shape[-1] == 1 assert U.shape[0] == y.shape[0] k = U.shape[-1] product = [] for i in range(k): (grad_x,) = torch.autograd.grad( y, x, U[:, i].unsqueeze(-1), retain_graph=retain_graph, create_graph=create_graph, allow_unused=True, ) product.append(grad_x.reshape(x.shape)) vJ = torch.cat(product, dim=1) if detach: return vJ.detach() else: return vJ
[docs] def jacobian_vector_product( y: Tensor, x: Tensor, V: Tensor, dummy_vec: Tensor = None ) -> Tensor: r"""Compute Jacobian Vector Product: :math:`\text{jvp} = (\partial y/\partial x) v` Forward Mode Auto-Differentiation (``Rop`` in Theano). PyTorch does not natively support this operation; this function essentially calls backward mode autodiff twice, as described in [1]. See :meth:`vector_jacobian_product()` docstring on why we and pass arguments for ``retain_graph`` and ``create_graph``. Parameters ---------- y Model output with gradient attached, shape is torch.Size([m, 1]) x Model input with gradient attached, shape is torch.Size([n, 1]), i.e. same dim as input tensor V Directions in which to compute product, shape is torch.Size([n, k]) where k is number of vectors to compute dummy_vec Vector with which to do jvp trick [1]. If argument exists, then use some pre-allocated, cached vector, otherwise create a new one and move to device in this method. Returns ------- Jv Jacobian-vector product, torch.Size([n, k]) Notes ----- [1] https://j-towns.github.io/2017/06/12/A-new-trick.html """ assert y.shape[-1] == 1 assert V.shape[0] == x.shape[0] if dummy_vec is None: dummy_vec = torch.ones_like(y, requires_grad=True) # do vjp twice to get jvp; set detach = False first; dummy_vec must be non-zero and # is only there as a helper g = vector_jacobian_product( y, x, dummy_vec, retain_graph=True, create_graph=True, detach=False ) Jv = vector_jacobian_product( g, dummy_vec, V, retain_graph=True, create_graph=False, detach=True ) return Jv