import torch as T
import torch.nn.functional as F
import numpy as np
import torch.nn as nn
from . import layers
from . import utils
from . import cuda_ext_available
__all__ = ['huber_loss', 'first_derivative_loss', 'lp_loss', 'ssim', 'psnr', 'chamfer_loss', 'emd_loss', 'tv_reg',
'spectral_norm']
[docs]def huber_loss(x, y, reduce='mean'):
"""
An alias for :func:`torch.nn.functional.smooth_l1_loss`.
"""
return F.smooth_l1_loss(x, y, reduce=reduce)
[docs]def first_derivative_loss(x, y, p=2):
"""
Calculates lp loss between the first derivatives of the inputs.
:param x:
a :class:`torch.Tensor`.
:param y:
a :class:`torch.Tensor` of the same shape as x.
:param p:
order of the norm.
:return:
the scalar loss between the first derivatives of the inputs.
"""
if x.ndimension() != 4 and y.ndimension() != 4:
raise TypeError('y and y_pred should have four dimensions')
kern_x = T.from_numpy(np.array([[1, 0, -1], [2, 0, -2], [1, 0, -1]], dtype='float32')).requires_grad_(False)
kern_x = T.flip(kern_x.expand(y.shape[1], y.shape[1], 3, 3), (0, 1)).to(x.device)
kern_y = T.from_numpy(np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]], dtype='float32')).requires_grad_(False)
kern_y = T.flip(kern_y.expand(y.shape[1], y.shape[1], 3, 3), (0, 1)).to(x.device)
x_grad_x = F.conv2d(x, kern_x, padding=1)
x_grad_y = F.conv2d(x, kern_y, padding=1)
x_grad = T.sqrt(x_grad_x ** 2 + x_grad_y ** 2 + 1e-10)
y_grad_x = F.conv2d(y, kern_x, padding=1)
y_grad_y = F.conv2d(y, kern_y, padding=1)
y_grad = T.sqrt(y_grad_x ** 2 + y_grad_y ** 2 + 1e-10)
return lp_loss(x_grad, y_grad, p)
[docs]def lp_loss(x, y, p=2, reduction='mean'):
"""
Calculates p-norm of (x - y).
:param x:
a :class:`torch.Tensor`.
:param y:
a :class:`torch.Tensor` of the same shape as x.
:param p:
order of the norm.
:param reduction:
``'mean'`` or ``'sum'``.
:return:
the p-norm of (x - y).
"""
assert reduction in ('sum', 'mean'), 'Unknown choice of reduction'
if y.ndimension() != x.ndimension():
raise TypeError('y should have the same shape as y_pred', ('y', y.data.type(), 'y_pred', x.data.type()))
if p == 1:
return F.l1_loss(x, y, reduction=reduction)
elif p == 2:
return F.mse_loss(x, y, reduction=reduction)
else:
reduce = T.mean if reduction == 'mean' else T.sum
return reduce(T.abs(x - y) ** p)
[docs]def chamfer_loss(xyz1, xyz2, reduce='mean', c_code=cuda_ext_available):
"""
Calculates the Chamfer distance between two batches of point clouds.
The Pytorch code is adapted from DenseLidarNet_.
The CUDA code is adapted from AtlasNet_.
.. _DenseLidarNet: https://github.com/345ishaan/DenseLidarNet/blob/master/code/chamfer_loss.py
.. _AtlasNet: https://github.com/ThibaultGROUEIX/AtlasNet/tree/master/extension
:param xyz1:
a point cloud of shape ``(b, n1, k)`` or ``(n1, k)``.
:param xyz2:
a point cloud of shape (b, n2, k) or (n2, k).
:param reduce:
``'mean'`` or ``'sum'``. Default: ``'mean'``.
:param c_code:
whether to use CUDA implementation.
This version is much more memory-friendly and slightly faster.
:return:
the Chamfer distance between the inputs.
"""
assert len(xyz1.shape) in (2, 3) and len(xyz2.shape) in (2, 3), 'Unknown shape of tensors'
if xyz1.dim() == 2:
xyz1 = xyz1.unsqueeze(0)
if xyz2.dim() == 2:
xyz2 = xyz2.unsqueeze(0)
assert reduce in ('mean', 'sum'), 'Unknown reduce method'
reduce = T.sum if reduce == 'sum' else T.mean
if c_code:
from .extensions import chamfer_distance
dist1, dist2 = chamfer_distance(xyz1, xyz2)
else:
P = utils.batch_pairwise_sqdist(xyz1, xyz2, c_code=c_code)
dist2, _ = T.min(P, 1)
dist1, _ = T.min(P, 2)
loss_2 = reduce(dist2)
loss_1 = reduce(dist1)
return loss_1 + loss_2
[docs]def emd_loss(xyz1, xyz2, reduce='mean', sinkhorn=False):
"""
Calculates the Earth Mover Distance (or Wasserstein metric) between two sets
of points.
:param xyz1:
a point cloud of shape ``(b, n1, k)`` or ``(n1, k)``.
:param xyz2:
a point cloud of shape (b, n2, k) or (n2, k).
:param reduce:
``'mean'`` or ``'sum'``. Default: ``'mean'``.
:param sinkhorn:
whether to use the Sinkhorn approximation of the Wasserstein distance.
``False`` will fall back to a CUDA implementation, which is only available
if the CUDA-extended neuralnet-pytorch is installed.
Default: ``True``.
:return:
the EMD between the inputs.
"""
assert reduce in ('mean', 'sum'), 'Reduce method should be mean or sum'
if sinkhorn:
import geomloss
return geomloss.SamplesLoss()(xyz1, xyz2)
else:
from .extensions import earth_mover_distance as emd
emd_dist = (emd(xyz1, xyz2) + emd(xyz2, xyz1)) / 2.
return T.mean(emd_dist) if reduce == 'mean' else T.sum(emd_dist)
def _fspecial_gauss(size, sigma):
x, y = np.mgrid[-size // 2 + 1:size // 2 + 1, -size // 2 + 1:size // 2 + 1]
g = np.exp(-((x ** 2 + y ** 2) / (2.0 * sigma ** 2)))
return g / np.sum(g)
[docs]def ssim(img1, img2, max_val=1., filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03, cs_map=False):
"""
Returns the Structural Similarity Map between `img1` and `img2`.
This function attempts to match the functionality of ssim_index_new.m by
Zhou Wang: http://www.cns.nyu.edu/~lcv/ssim/msssim.zip
:param img1:
a 4D :class:`torch.Tensor`.
:param img2:
a 4D :class:`torch.Tensor` of the same shape as `img1`.
:param max_val:
the dynamic range of the images (i.e., the difference between the
maximum the and minimum allowed values).
:param filter_size:
size of blur kernel to use (will be reduced for small images).
:param filter_sigma:
standard deviation for Gaussian blur kernel (will be reduced
for small images).
:param k1:
constant used to maintain stability in the SSIM calculation (0.01 in
the original paper).
:param k2:
constant used to maintain stability in the SSIM calculation (0.03 in
the original paper).
:return:
pair containing the mean SSIM and contrast sensitivity between `img1` and `img2`.
:raise:
RuntimeError: If input images don't have the same shape or don't have four
dimensions: [batch_size, height, width, depth].
"""
if img1.ndimension() != 4:
raise RuntimeError('Input images must have four dimensions, not %d', img1.ndimension())
_, _, height, width = img1.shape
# Filter size can't be larger than height or width of images.
size = min((filter_size, height, width))
# Scale down sigma if a smaller filter size is used.
sigma = (size * filter_sigma / filter_size) if filter_size else 1.
if filter_size:
window = T.flip(T.tensor(_fspecial_gauss(size, sigma)), (0, 1)).view(1, 1, size, size)\
.requires_grad_(False).to(device=img1.device, dtype=img1.dtype)
mu1 = F.conv2d(img1, window)
mu2 = F.conv2d(img2, window)
sigma11 = F.conv2d(img1 * img1, window)
sigma22 = F.conv2d(img2 * img2, window)
sigma12 = F.conv2d(img1 * img2, window)
else:
# Empty blur kernel so no need to convolve.
mu1, mu2 = img1, img2
sigma11 = img1 * img1
sigma22 = img2 * img2
sigma12 = img1 * img2
mu1 = mu1 * mu1
mu2 = mu2 * mu2
mu12 = mu1 * mu2
sigma11 -= mu1
sigma22 -= mu2
sigma12 -= mu12
# Calculate intermediate values used by both ssim and cs_map.
c1 = (k1 * max_val) ** 2
c2 = (k2 * max_val) ** 2
v1 = 2.0 * sigma12 + c2
v2 = sigma11 + sigma22 + c2
ssim = T.mean((((2.0 * mu12 + c1) * v1) / ((mu1 + mu2 + c1) * v2 + 1e-10)))
output = ssim if not cs_map else (ssim, T.mean(v1 / v2))
return output
[docs]def psnr(x, y):
"""
Peak-signal-to-noise ratio for [0,1] images.
:param x:
a :class:`torch.Tensor`.
:param y:
a :class:`torch.Tensor` of the same shape as `x`.
"""
return -10 * T.log(T.mean((y - x) ** 2)) / np.log(10.)
[docs]def tv_reg(y):
"""
Total variation regularization.
:param y:
a tensor of at least 2D.
The last 2 dimensions will be regularized.
:return:
the total variation loss.
"""
return T.sum(T.abs(y[..., :-1] - y[..., 1:])) + T.sum(T.abs(y[..., :-1, :] - y[..., 1:, :]))
[docs]def spectral_norm(module, name='weight', n_power_iterations=1, eps=1e-12, dim=None):
"""
Applies :func:`torch.nn.utils.spectral_norm` recursively to `module` and all of
its submodules.
:param module:
containing module.
:param name:
name of weight parameter.
Default: ``'weight'``.
:param n_power_iterations:
number of power iterations to calculate spectral norm.
:param eps:
epsilon for numerical stability in calculating norms.
:param dim:
dimension corresponding to number of outputs,
the default is ``0``, except for modules that are instances of
ConvTranspose{1,2,3}d, when it is ``1``.
:return:
the original module with the spectral norm hook.
"""
if hasattr(module, 'weight'):
if dim is None:
dim = 1 if isinstance(module, layers.ConvTranspose2d) else 0
if not isinstance(module, (nn.modules.batchnorm._BatchNorm,
nn.GroupNorm,
nn.LayerNorm)):
module = nn.utils.spectral_norm(module, name, n_power_iterations, eps, dim)
return module
else:
for mod_name, mod in module.named_children():
mod = spectral_norm(mod, name, n_power_iterations, eps, dim)
module.__setattr__(mod_name, mod)
return module