Source code for neuralnet_pytorch.utils.tensor_utils

from .. import cuda_ext_available

import torch as T
import numpy as np
import numbers

__all__ = ['dimshuffle', 'shape_padleft', 'shape_padright', 'swapaxes', 'ravel_index', 'tile', 'repeat', 'block_diag',
           'block_diag_sparse', 'get_bilinear_weights', 'interpolate_bilinear', 'batch_pairwise_sqdist', 'gram_matrix',
           'var', 'std', 'break_dim']


[docs]def dimshuffle(x: T.Tensor, pattern):
    """
    Reorders the dimensions of this variable, optionally inserting broadcasted dimensions.
    Inspired by `Theano's dimshuffle`_.

    .. _Theano's dimshuffle:
        https://github.com/Theano/Theano/blob/d395439aec5a6ddde8ef5c266fd976412a5c5695/theano/tensor/var.py#L323-L356

    :param x:
        Input tensor.
    :param pattern:
        List/tuple of int mixed with `x` for broadcastable dimensions.
    :return:
        a tensor whose shape matches `pattern`.

    Examples
    --------
    To create a 3D view of a [2D] matrix, call ``dimshuffle(x, [0,'x',1])``.
    This will create a 3D view such that the
    middle dimension is an implicit broadcasted dimension.  To do the same
    thing on the transpose of that matrix, call ``dimshuffle(x, [1, 'x', 0])``.

    See Also
    --------
    :func:`~neuralnet_pytorch.utils.shape_padleft`
    :func:`~neuralnet_pytorch.utils.shape_padright`
    :func:`~neuralnet_pytorch.utils.swapaxes`
    """

    assert isinstance(pattern, (list, tuple)), 'pattern must be a list/tuple'
    no_expand_pattern = [x for x in pattern if x != 'x']
    y = x.permute(*no_expand_pattern)
    shape = list(y.shape)
    for idx, e in enumerate(pattern):
        if e == 'x':
            shape.insert(idx, 1)
    return y.view(*shape)


[docs]def shape_padleft(x: T.Tensor, n_ones=1):
    """
    Reshape `x` by left-padding the shape with `n_ones` 1s.
    Inspired by `Theano's shape_padleft`_.

    .. _Theano's shape_padleft:
        https://github.com/Theano/Theano/blob/d395439aec5a6ddde8ef5c266fd976412a5c5695/theano/tensor/basic.py#L4539-L4553

    :param x:
        variable to be reshaped.
    :param n_ones:
        number of 1s to pad.

    See Also
    --------
    :func:`~neuralnet_pytorch.utils.dimshuffle`
    :func:`~neuralnet_pytorch.utils.shape_padright`
    :func:`~neuralnet_pytorch.utils.swapaxes`
    """

    pattern = ('x',) * n_ones + tuple(range(x.ndimension()))
    return dimshuffle(x, pattern)


[docs]def shape_padright(x: T.Tensor, n_ones=1):
    """
    Reshape `x` by right-padding the shape with `n_ones` 1s.
    Inspired by `Theano's shape_padright`_.

    .. _Theano's shape_padright:
        https://github.com/Theano/Theano/blob/d395439aec5a6ddde8ef5c266fd976412a5c5695/theano/tensor/basic.py#L4557

    :param x:
        variable to be reshaped.
    :param n_ones:
        number of 1s to pad.

    See Also
    --------
    :func:`~neuralnet_pytorch.utils.dimshuffle`
    :func:`~neuralnet_pytorch.utils.shape_padleft`
    :func:`~neuralnet_pytorch.utils.swapaxes`
    """

    pattern = tuple(range(x.ndimension())) + ('x',) * n_ones
    return dimshuffle(x, pattern)


[docs]def swapaxes(y: T.Tensor, axis1, axis2):
    """
    Swaps two given axes in the input tensor.
    If the input is of shape :math:`(n_1, n_2, ..., n_{axis1}, ..., n_{axis2}, ...)`,
    the output will be :math:`(n_1, n_2, ..., n_{axis2}, ..., n_{axis1}, ...)`.
    Can be seen as a generalization of transpose.
    Taken from `Theano's swapaxes`_.

    .. _Theano's swapaxes:
        http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor._tensor_py_operators.swapaxes

    :param y:
        a tensor.
    :param axis1:
        an axis to be swapped.
    :param axis2:
        another axis to be swapped.
    :return:
        the axis-swapped tensor.

    See Also
    --------
    :func:`~neuralnet_pytorch.utils.dimshuffle`
    :func:`~neuralnet_pytorch.utils.shape_padleft`
    :func:`~neuralnet_pytorch.utils.shape_padright`
    """

    ndim = y.ndimension()
    li = list(range(0, ndim))
    li[axis1], li[axis2] = li[axis2], li[axis1]
    return dimshuffle(y, li)


[docs]def ravel_index(indices: T.Tensor, shape):
    """
    Finds the linear index of `index` of a tensor of `shape`
    when it is flattened.

    :param indices:
        a tensor containing indices to be linearized.
    :param shape:
        shape of the tensor w.r.t the index tensor.
    :return:
        the linear index of the element having `index`.

    Examples
    --------

    >>> import torch as T
    >>> import numpy as np
    >>> import neuralnet_pytorch as nnt
    >>> shape = (2, 3, 5)
    >>> a = T.arange(np.prod(shape)).view(*shape)
    >>> indices = T.tensor([[1, 0, 1, 1], [0, 1, 2, 1], [1, 0, 4, 3]]).long()
    >>> print(a[list(indices)])
    tensor([16.,  5., 29., 23.])
    >>> linear_indices = nnt.utils.ravel_index(indices, shape)
    >>> print(linear_indices)
    tensor([16,  5, 29, 23])
    >>> print(a.flatten()[linear_indices])
    tensor([16.,  5., 29., 23.])
    """

    assert indices.shape[0] == len(shape), 'indices and shape must have the same length'
    shape = T.tensor(shape).to(indices.device)
    return sum([indices[i].long() * T.prod(shape[i + 1:]) for i in range(len(shape))])


[docs]def tile(x: T.Tensor, dims):
    """
    Repeats `x` along `dims`.
    Behaves like :func:`numpy.tile`.

    :param x:
        a :mod:`torch.Tensor`.
    :param dims:
        the number of times to tile this tensor along each dimension.
    :return:
        the tiled tensor.
    """

    return x.repeat(*dims)


[docs]def repeat(input: T.Tensor, repeats, dim=None):
    """
    Repeats elements of a tensor like :func:`numpy.repeat`.

    :param input:
        a :mod:`torch.Tensor`.
    :param repeats:
        the number of times to repeat this tensor along `dim`.
    :param dim:
        the dimension to repeat.
        If not specified, the method is applied to the flattened tensor.
        Default: ``None``.
    :return:
        the repeated tensor.
    """

    return T.repeat_interleave(input, repeats, dim)


def moveaxis(x: T.Tensor, source, destination):
    """
    Adapted from Numpy.

    Move axes of an array to new positions.

    Other axes remain in their original order.

    .. versionadded:: 1.11.0

    :param x:
        The array whose axes should be reordered.
    :param source:
        Original positions of the axes to move. These must be unique.
    :param destination:
        Destination positions for each of the original axes. These must also be unique.

    :return:
        Array with moved axes. This array is a view of the input array.

    See Also
    --------
    :func:`~neuralnet_pytorch.utils.dimshuffle`
    :func:`~neuralnet_pytorch.utils.swapaxes`

    Examples
    --------

    >>> x = torch.zeros((3, 4, 5))
    >>> nnt.utils.moveaxis(x, 0, -1).shape
    (4, 5, 3)
    >>> nnt.utils.moveaxis(x, -1, 0).shape
    (5, 3, 4)

    These all achieve the same result:

    >>> x.permute(2, 1, 0).shape
    (5, 4, 3)
    >>> nnt.utils.swapaxes(x, 0, -1).shape
    (5, 4, 3)
    >>> nnt.utils.moveaxis(x, [0, 1], [-1, -2]).shape
    (5, 4, 3)
    >>> nnt.utils.moveaxis(x, [0, 1, 2], [-1, -2, -3]).shape
    (5, 4, 3)

    """

    n = x.ndim
    source = np.normalize_axis_tuple(source, n, 'source')
    destination = np.normalize_axis_tuple(destination, n, 'destination')
    if len(source) != len(destination):
        raise ValueError('`source` and `destination` arguments must have '
                         'the same number of elements')

    order = [n for n in range(n) if n not in source]

    for dest, src in sorted(zip(destination, source)):
        order.insert(dest, src)

    result = x.permute(*order)
    return result


[docs]def block_diag(*blocks):
    """
    Modified from scipy.linalg.block_diag.
    Creates a block diagonal matrix from provided arrays.
    Given the inputs `A`, `B` and `C`, the output will have these
    arrays arranged on the diagonal::

        [[A, 0, 0],
         [0, B, 0],
         [0, 0, C]]

    :param blocks:
        an iterator of tensors, up to 2-D.
        A 1-D tensor of length `n` is treated as a 2-D array
        with shape `(1,n)`.
    :return:
        a tensor with `A`, `B`, `C`, ... on the diagonal.
        Has the same dtype as `A`.

    Notes
    -----
    If all the input arrays are square, the output is known as a
    block diagonal matrix.

    See Also
    --------
    :func:`~neuralnet_pytorch.utils.block_diag_sparse`

    Examples
    --------

    >>> from neuralnet_pytorch.utils import block_diag
    >>> A = T.tensor([[1, 0],
    ...               [0, 1]])
    >>> B = T.tensor([[3, 4, 5],
    ...               [6, 7, 8]])
    >>> C = T.tensor([[7]])
    >>> block_diag(A, B, C)
    tensor([[1 0 0 0 0 0]
            [0 1 0 0 0 0]
            [0 0 3 4 5 0]
            [0 0 6 7 8 0]
            [0 0 0 0 0 7]])
    >>> block_diag(T.tensor([1.0]), T.tensor([2, 3]), T.tensor([[4, 5], [6, 7]]))
    tensor([[ 1.,  0.,  0.,  0.,  0.],
            [ 0.,  2.,  3.,  0.,  0.],
            [ 0.,  0.,  0.,  4.,  5.],
            [ 0.,  0.,  0.,  6.,  7.]])
    """

    assert all(a.ndimension() >= 2 for a in blocks), 'All tensors must be at least of rank 2'

    shapes = np.array([a.shape for a in blocks])
    out = T.zeros(*list(np.sum(shapes, axis=0))).to(blocks[0].device)

    r, c = 0, 0
    for i, (rr, cc) in enumerate(shapes):
        out[r:r + rr, c:c + cc] = blocks[i]
        r = r + rr
        c = c + cc
    return out


[docs]def block_diag_sparse(a: T.Tensor, dense=False):
    """
    Creates a sparse block diagonal matrix from the provided array.
    Given the input tensor of size ``(n, r, c)``, the output will have
    the matrices of the last two indices arranged on the diagonal::

        [[a[0], 0, 0],
         [0, a[1], 0],
         [0, 0, a[2]]]

    :param a:
        a tensor of size ``(n, r, c)``.
    :param dense:
        whether to return a dense matrix.
        Default: ``False``.
    :return:
        a tensor with `a[0]`, `a[1]`, `a[2]`, ... on the diagonal.
        Has the same dtype as `a`.

    Notes
    -----
    This function is for square matrices only. For general cases,
    use :func:`~neuralnet_pytorch.utils.block_diag`.

    See Also
    --------
    :func:`~neuralnet_pytorch.utils.block_diag`

    Examples
    --------

    >>> from neuralnet_pytorch.utils import block_diag_sparse
    >>> import numpy as np
    >>> a = T.arange(3 * 2 * 4).view(3, 2, 4)
    >>> block_diag_sparse(a)
    tensor(indices=tensor([[ 0,  0,  0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,
                             3,  3,  4,  4,  4,  4,  5,  5,  5,  5],
                           [ 0,  1,  2,  3,  0,  1,  2,  3,  4,  5,  6,  7,  4,  5,
                             6,  7,  8,  9, 10, 11,  8,  9, 10, 11]]),
           values=tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,
                          14, 15, 16, 17, 18, 19, 20, 21, 22, 23]),
           size=(6, 12), nnz=24, layout=torch.sparse_coo)
    >>> block_diag_sparse(a, dense=True)
    tensor([[ 0,  1,  2,  3,  0,  0,  0,  0,  0,  0,  0,  0],
            [ 4,  5,  6,  7,  0,  0,  0,  0,  0,  0,  0,  0],
            [ 0,  0,  0,  0,  8,  9, 10, 11,  0,  0,  0,  0],
            [ 0,  0,  0,  0, 12, 13, 14, 15,  0,  0,  0,  0],
            [ 0,  0,  0,  0,  0,  0,  0,  0, 16, 17, 18, 19],
            [ 0,  0,  0,  0,  0,  0,  0,  0, 20, 21, 22, 23]])
    """
    assert len(a.shape) == 3, \
        'Input tensor must have 3 dimensions with the last two being matrices, got {}'.format(len(a.shape))

    n, r, c = a.shape
    y = T.arange(r)
    x = T.arange(c)
    yy, xx = T.meshgrid(y, x)

    xxs = T.stack([xx] * n)
    yys = T.stack([yy] * n)
    transl_x = T.arange(n) * c
    transl_y = T.arange(n) * r
    xxs_transl = xxs + transl_x[..., None, None]
    yys_transl = yys + transl_y[..., None, None]

    x_flat = xxs_transl.flatten()
    y_flat = yys_transl.flatten()
    indices = T.stack((y_flat, x_flat))

    a_sp = T.sparse_coo_tensor(indices.long(), a.flatten(),
                               size=T.Size((n * r, n * c)), dtype=a.dtype)
    return a_sp.to_dense() if dense else a_sp


[docs]def get_bilinear_weights(x: T.Tensor, y: T.Tensor, h: int, w: int, border_mode='nearest'):
    """
    Returns bilinear weights used in bilinear interpolation.

    :param x:
        floating point coordinates along the x-axis.
    :param y:
        floating point coordinates along the y-axis.
    :param h:
        height of the 2D array.
    :param w:
        width of the 2D array
    :param border_mode:
        strategy to deal with borders.
        Choices are ``'nearest'`` (default), ``'mirror'``, and ``'wrap'``.
    :return:
        the weights for bilinear interpolation and the integer coordinates.
    """

    x0_f = T.floor(x)
    y0_f = T.floor(y)
    x1_f = x0_f + 1
    y1_f = y0_f + 1

    if border_mode == 'nearest':
        x0 = T.clamp(x0_f, 0, w - 1)
        x1 = T.clamp(x1_f, 0, w - 1)
        y0 = T.clamp(y0_f, 0, h - 1)
        y1 = T.clamp(y1_f, 0, h - 1)
    elif border_mode == 'mirror':
        w = 2 * (w - 1)
        x0 = T.min(x0_f % w, -x0_f % w)
        x1 = T.min(x1_f % w, -x1_f % w)
        h = 2 * (h - 1)
        y0 = T.min(y0_f % h, -y0_f % h)
        y1 = T.min(y1_f % h, -y1_f % h)
    elif border_mode == 'wrap':
        x0 = T.fmod(x0_f, w)
        x1 = T.fmod(x1_f, w)
        y0 = T.fmod(y0_f, h)
        y1 = T.fmod(y1_f, h)
    else:
        raise ValueError("border_mode must be one of "
                         "'nearest', 'mirror', 'wrap'")
    x0, x1, y0, y1 = [v.long() for v in (x0, x1, y0, y1)]

    wxy = dimshuffle((x1_f - x) * (y1_f - y), (0, 'x'))
    wx1y = dimshuffle((x1_f - x) * (1. - (y1_f - y)), (0, 'x'))
    w1xy = dimshuffle((1. - (x1_f - x)) * (y1_f - y), (0, 'x'))
    w1x1y = dimshuffle((1. - (x1_f - x)) * (1. - (y1_f - y)), (0, 'x'))
    return wxy, wx1y, w1xy, w1x1y, x0, x1, y0, y1


[docs]def interpolate_bilinear(im: T.Tensor, x: T.Tensor, y: T.Tensor, output_shape=None, border_mode='nearest'):
    """
    Returns a batch of interpolated images. Used for Spatial Transformer Network.
    Works like `torch.grid_sample`.

    :param im:
        a batch of input images
    :param x:
        floating point coordinates along the x-axis.
        Should be in the range [-1, 1].
    :param y:
        floating point coordinates along the y-axis.
        Should be in the range [-1, 1].
    :param output_shape:
        output shape. A tuple of height and width.
        If not specified, output will have the same shape as input.
    :param border_mode:
        strategy to deal with borders.
        Choices are ``'nearest'`` (default), ``'mirror'``, and ``'wrap'``.
    :return:
        the bilinear interpolated batch of images.
    """

    if im.ndimension() != 4:
        raise TypeError('im should be a 4D Tensor image, got %dD' % im.ndimension())

    output_shape = output_shape if output_shape else im.shape[2:]
    x, y = x.flatten(), y.flatten()
    n, c, h, w = im.shape
    h_out, w_out = output_shape

    # scale coordinates from [-1, 1] to [0, width/height - 1]
    x = (x + 1) / 2 * (w - 1)
    y = (y + 1) / 2 * (h - 1)
    wxy, wx1y, w1xy, w1x1y, x0, x1, y0, y1 = get_bilinear_weights(
        x, y, h, w, border_mode=border_mode)

    base = T.arange(n) * w * h
    base = T.reshape(base, (-1, 1))
    base = repeat(base, (1, h_out * w_out))
    base = base.flatten()

    base_y0 = base + y0 * w
    base_y1 = base + y1 * w
    idx_a = base_y0 + x0
    idx_b = base_y1 + x0
    idx_c = base_y0 + x1
    idx_d = base_y1 + x1

    im_flat = T.reshape(dimshuffle(im, (0, 2, 3, 1)), (-1, c))
    pixel_a = im_flat[idx_a]
    pixel_b = im_flat[idx_b]
    pixel_c = im_flat[idx_c]
    pixel_d = im_flat[idx_d]

    output = wxy * pixel_a + wx1y * pixel_b + w1xy * pixel_c + w1x1y * pixel_d
    output = T.reshape(output, (n, h_out, w_out, c))
    return dimshuffle(output, (0, 3, 1, 2))


[docs]def batch_pairwise_sqdist(x: T.Tensor, y: T.Tensor, c_code=cuda_ext_available):
    """
    Calculates the pair-wise square distance between two sets of points.
    To get the Euclidean distance, explicit square root needs to be applied
    to the output.

    :param x:
        a tensor of shape ``(..., nx, d)``.
        If the tensor dimension is 2, the tensor batch dim is broadcasted.
    :param y:
        a tensor of shape ``(..., ny, d)``.
        If the tensor dimension is 2, the tensor batch dim is broadcasted.
    :param c_code:
        whether to use a C++ implementation.
        Default: ``True`` when the CUDA extension is installed. ``False`` otherwise.
    :return:
        a tensor containing the exhaustive square distance between every pair of points
        in `x` and `y` from the same batch.
    """

    if c_code:
        from ..extensions import batch_pairwise_dist
        return batch_pairwise_dist(x, y)
    else:
        P = T.cdist(x, y)
        return P ** 2


[docs]def gram_matrix(x: T.Tensor) -> T.Tensor:
    """
    Computes the Gram matrix given a 4D tensor.

    :param x:
        a 4D tensor.
    :return:
        the Gram matrix of `x`.
    """

    b, c, h, w = x.shape
    features = x.view(b, c, -1)
    G = T.bmm(features, features.transpose(-1, -2))
    return G.div(np.prod(x.shape[1:]))


[docs]def var(x: T.Tensor, dim=None, unbiased=True, keepdim=False):
    """
    Calculates the variance of `x` along `dim`.
    Exists because :mod:`torch.var` sometimes causes some error in backward pass.

    :param x:
        a tensor.
    :param dim:
        the dimension along which to calculate the variance.
        Can be ``int``/``list``/``tuple``.
    :param unbiased:
        whether to use an unbiased estimate.
        Default: ``True``.
    :param keepdim:
        whether to keep the reduced dims as ``1``.
        Default: ``False``.
    :return:
        the variance of `x`
    """

    if dim is None:
        dim = tuple(i for i in range(len(x.shape)))

    if isinstance(dim, numbers.Number):
        dim = (int(dim),)

    mean = T.mean(x, dim, keepdim=True)
    dim_prod = np.prod([x.shape[i] for i in dim])
    if unbiased:
        dim_prod -= 1

    var = T.sum((x - mean) ** 2, dim, keepdim=keepdim) / dim_prod
    return var


[docs]def std(x: T.Tensor, dim=None, unbiased=True, keepdim=False):
    """
    Calculates the standard deviation of `x` along `dim`.
    Exists because :mod:`torch.std` sometimes causes some error in backward pass.

    :param x:
        a tensor.
    :param dim:
        the dimension along which to calculate the variance.
        Can be ``int``/``list``/``tuple``.
    :param unbiased:
        whether to use an unbiased estimate.
        Default: ``True``.
    :param keepdim:
        whether to keep the reduced dims as ``1``.
        Default: ``False``.
    :return:
        the standard deviation of `x`
    """

    return T.sqrt(var(x, dim, unbiased, keepdim) + 1e-8)


[docs]def break_dim(x: T.Tensor, dim: int, sizes=(-1,)):
    """
    Break input tensor at `dim` into sizes.

    :param x:
        an input tensor.
    :param dim:
        position at which the tensor is broken.
    :param sizes:
        sizes that the broken tensor is reshaped into.
    :return:
        a tensor with shape at `dim` is `sizes`.
    """

    if dim < 0:
        dim += x.ndim

    shape = tuple(x.shape)
    new_shape = shape[:dim] + tuple(sizes) + shape[dim+1:]
    return x.view(*new_shape)