Source code for neuralnet_pytorch.utils.cv_utils

import torch as T

from .. import cuda_ext_available
from . import tensor_utils as tutils

__all__ = ['rgb2gray', 'rgb2ycbcr', 'rgba2rgb', 'ycbcr2rgb', 'pc2vox', 'pc2vox_fast']


[docs]def rgb2gray(img: T.Tensor):
    """
    Converts a batch of RGB images to gray.

    :param img:
        a batch of RGB image tensors.
    :return:
        a batch of gray images.
    """

    if len(img.shape) != 4:
        raise ValueError('Input images must have four dimensions, not %d' % len(img.shape))

    return (0.299 * img[:, 0] + 0.587 * img[:, 1] + 0.114 * img[:, 2]).unsqueeze(1)


[docs]def rgb2ycbcr(img: T.Tensor):
    """
    Converts a batch of RGB images to YCbCr.

    :param img:
        a batch of RGB image tensors.
    :return:
        a batch of YCbCr images.
    """

    if len(img.shape) != 4:
        raise ValueError('Input images must have four dimensions, not %d' % len(img.shape))

    Y = 0. + .299 * img[:, 0] + .587 * img[:, 1] + .114 * img[:, 2]
    Cb = 128. - .169 * img[:, 0] - .331 * img[:, 1] + .5 * img[:, 2]
    Cr = 128. + .5 * img[:, 0] - .419 * img[:, 1] - .081 * img[:, 2]
    return T.cat((Y.unsqueeze(1), Cb.unsqueeze(1), Cr.unsqueeze(1)), 1)


[docs]def ycbcr2rgb(img: T.Tensor):
    """
    Converts a batch of YCbCr images to RGB.

    :param img:
        a batch of YCbCr image tensors.
    :return:
        a batch of RGB images.
    """

    if len(img.shape) != 4:
        raise ValueError('Input images must have four dimensions, not %d' % len(img.shape))

    R = img[:, 0] + 1.4 * (img[:, 2] - 128.)
    G = img[:, 0] - .343 * (img[:, 1] - 128.) - .711 * (img[:, 2] - 128.)
    B = img[:, 0] + 1.765 * (img[:, 1] - 128.)
    return T.cat((R.unsqueeze(1), G.unsqueeze(1), B.unsqueeze(1)), 1)


[docs]def rgba2rgb(img: T.Tensor):
    """
    Converts a batch of RGBA images to RGB.

    :param img:
        an batch of RGBA image tensors.
    :return:
        a batch of RGB images.
    """

    r = img[..., 0, :, :]
    g = img[..., 1, :, :]
    b = img[..., 2, :, :]
    a = img[..., 3, :, :]

    shape = img.shape[:-3] + (3,) + img.shape[-2:]
    out = T.zeros(*shape).to(img.device)
    out[..., 0, :, :] = (1 - a) * r + a * r
    out[..., 1, :, :] = (1 - a) * g + a * g
    out[..., 2, :, :] = (1 - a) * b + a * b
    return out


[docs]def pc2vox(pc: T.Tensor, vox_size=32, sigma=.005, analytical_gauss_norm=True):
    """
    Converts a centered point cloud to voxel representation.

    :param pc:
        a batch of centered point clouds.
    :param vox_size:
        resolution of the voxel field.
        Default: 32.
    :param sigma:
        std of the Gaussian blur that determines the area of effect of each point.
    :param analytical_gauss_norm:
        whether to use a analytically precomputed normalization term.
    :return:
        the voxel representation of input.
    """
    assert pc.ndimension() in (2, 3), 'Point cloud must be a 2D a 3D tensor'
    if pc.ndimension() == 2:
        pc = pc[None]

    x = pc[..., 0]
    y = pc[..., 1]
    z = pc[..., 2]

    rng = T.linspace(-1.0, 1.0, vox_size).to(pc.device)
    xg, yg, zg = T.meshgrid(rng, rng, rng)  # [G,G,G]

    x_big = tutils.shape_padright(x, 3)  # [B,N,1,1,1]
    y_big = tutils.shape_padright(y, 3)  # [B,N,1,1,1]
    z_big = tutils.shape_padright(z, 3)  # [B,N,1,1,1]

    xg = tutils.shape_padleft(xg, 2)  # [1,1,G,G,G]
    yg = tutils.shape_padleft(yg, 2)  # [1,1,G,G,G]
    zg = tutils.shape_padleft(zg, 2)  # [1,1,G,G,G]

    # squared distance
    sq_distance = (x_big - xg) ** 2. + (y_big - yg) ** 2. + (z_big - zg) ** 2.

    # compute gaussian
    func = T.exp(-sq_distance / (2. * sigma ** 2))  # [B,N,G,G,G]

    # normalise gaussian
    if analytical_gauss_norm:
        # should work with any grid sizes
        magic_factor = 1.78984352254  # see estimate_gauss_normaliser
        sigma_normalised = sigma * vox_size
        normaliser = 1. / (magic_factor * (sigma_normalised ** 3.))
        func *= normaliser
    else:
        normaliser = T.sum(func, (2, 3, 4), keepdim=True)
        func /= normaliser

    summed = T.sum(func, dim=1)  # [B,G,G G]
    voxels = T.clamp(summed, 0., 1.)
    return voxels


[docs]def pc2vox_fast(pc: T.Tensor, voxel_size=32, grid_size=1., filter_outlier=True, c_code=cuda_ext_available):
    """
    A fast conversion from a centered point cloud to voxel representation.

    :param pc:
        a batch of centered point clouds.
    :param voxel_size:
        resolution of the voxel field.
        Default: 32.
    :param grid_size:
        range of the point clouds.
        Default: 1.
    :param filter_outlier:
        whether to filter point outside of `grid_size`.
        Default: ``True``.
    :param c_code:
        whether to use a C++ implementation.
        Default: ``True``.
    :return:
        the voxel representation of input.
    """

    assert pc.ndimension() in (2, 3), 'Point cloud must be a 2D a 3D tensor'
    if pc.ndimension() == 2:
        pc = pc[None]

    if c_code:
        from ..extensions import pc2vox
        voxel = pc2vox(pc, voxel_size, grid_size, filter_outlier)
    else:
        b, n, _ = pc.shape
        half_size = grid_size / 2.
        valid = (pc >= -half_size) & (pc <= half_size)
        valid = T.all(valid, 2)
        pc_grid = (pc + half_size) * (voxel_size - 1.)
        indices_floor = T.floor(pc_grid)
        indices = indices_floor.long()
        batch_indices = T.arange(b).to(pc.device)
        batch_indices = tutils.shape_padright(batch_indices, 2)
        batch_indices = tutils.tile(batch_indices, (1, n, 1))
        indices = T.cat((batch_indices, indices), 2)
        indices = T.reshape(indices, (-1, 4))

        r = pc_grid - indices_floor
        rr = (1. - r, r)
        if filter_outlier:
            valid = valid.flatten()
            indices = indices[valid]

        def interpolate_scatter3d(pos):
            updates = rr[pos[0]][..., 0] * rr[pos[1]][..., 1] * rr[pos[2]][..., 2]
            updates = updates.flatten()

            if filter_outlier:
                updates = updates[valid]

            indices_shift = T.tensor([[0] + pos]).to(pc.device)
            indices_loc = indices + indices_shift
            out_shape = (b,) + (voxel_size,) * 3
            voxels = T.zeros(*out_shape).to(pc.device).flatten()
            voxels.scatter_add_(0, tutils.ravel_index(indices_loc.t(), out_shape), updates)
            return voxels.view(*out_shape)

        voxel = [interpolate_scatter3d([k, j, i]) for k in range(2) for j in range(2) for i in range(2)]
        voxel = sum(voxel)
        voxel = T.clamp(voxel, 0., 1.)
    return voxel