import numpy as np
import torch as T
import torch.nn as nn
from torch.nn.modules.utils import _pair
from .abstract import _LayerMethod, Sequential
from .. import utils
from ..utils import _image_shape, _matrix_shape
__all__ = ['Conv2d', 'ConvTranspose2d', 'FC', 'Softmax', 'DepthwiseSepConv2D']
[docs]@utils.add_simple_repr
class Conv2d(nn.Conv2d, _LayerMethod):
"""
Extends :class:`torch.nn.Conv2d` with :class:`~neuralnet_pytorch.layers.layers._LayerMethod`.
Parameters
----------
input_shape
shape of the 4D input image.
If a single integer is passed, it is treated as the number of input channels
and other sizes are unknown.
out_channels : int
number of channels produced by the convolution.
kernel_size
size of the convolving kernel.
stride
stride of the convolution. Default: 1.
padding
zero-padding added to both sides of the input.
Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``,
which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes.
Default: ``'half'``.
dilation
spacing between kernel elements. Default: 1.
groups : int
number of blocked connections from input channels to output channels. Default: 1.
bias : bool
if ``True``, adds a learnable bias to the output. Default: ``True``.
activation
non-linear function to activate the linear result.
It accepts any callable function
as well as a recognizable ``str``.
A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`.
weights_init
a kernel initialization method from :mod:`torch.nn.init`.
bias_init
a bias initialization method from :mod:`torch.nn.init`.
kwargs
extra keyword arguments to pass to activation.
"""
def __init__(self, input_shape, out_channels, kernel_size, stride=1, padding='half', dilation=1, groups=1,
bias=True, activation=None, weights_init=None, bias_init=None, **kwargs):
input_shape = _image_shape(input_shape)
assert input_shape[1] is not None, 'Shape at dimension 1 (zero-based index) must be known'
self.input_shape = input_shape
kernel_size = _pair(kernel_size)
self.no_bias = bias
self.activation = utils.function(activation, **kwargs)
self.weights_init = weights_init
self.bias_init = bias_init
self.border_mode = padding
dilation = _pair(dilation)
self.ks = [fs + (fs - 1) * (d - 1) for fs, d in zip(kernel_size, dilation)]
if isinstance(padding, str):
if padding == 'half':
padding = [k >> 1 for k in self.ks]
elif padding in ('valid', 'ref', 'rep'):
padding = (0,) * len(self.ks)
elif padding == 'full':
padding = [k - 1 for k in self.ks]
else:
raise NotImplementedError
elif isinstance(padding, int):
padding = _pair(padding)
else:
raise ValueError('padding must be a str/tuple/int, got %s' % type(padding))
super().__init__(int(input_shape[1]), out_channels, kernel_size, stride, tuple(padding), dilation, bias=bias,
groups=groups)
def forward(self, input, *args, **kwargs):
if self.border_mode in ('ref', 'rep'):
padding = (self.ks[1] // 2, self.ks[1] // 2, self.ks[0] // 2, self.ks[0] // 2)
pad = nn.ReflectionPad2d(padding) if self.border_mode == 'ref' else nn.ReplicationPad2d(padding)
else:
pad = lambda x: x # noqa: E731
input = pad(input)
input = self.activation(super().forward(input))
return input
@property
@utils.validate
def output_shape(self):
shape = [np.nan if s is None else s for s in self.input_shape]
padding = (self.ks[0] >> 1, self.ks[1] >> 1) if self.border_mode in ('ref', 'rep') else self.padding
shape[2:] = [(s - self.ks[idx] + 2 * padding[idx]) // self.stride[idx] + 1 for idx, s in enumerate(shape[2:])]
shape[1] = self.out_channels
return tuple(shape)
def reset_parameters(self):
super().reset_parameters()
if self.weights_init:
self.weights_init(self.weight)
if self.bias is not None and self.bias_init:
self.bias_init(self.bias)
def extra_repr(self):
s = super().extra_repr()
s += ', activation={}'.format(self.activation.__name__)
return s
[docs]@utils.add_simple_repr
class ConvTranspose2d(nn.ConvTranspose2d, _LayerMethod):
"""
Extends :class:`torch.nn.ConvTranspose2d` with :class:`~neuralnet_pytorch.layers.layers._LayerMethod`.
Parameters
----------
input_shape
shape of the 4D input image.
If a single integer is passed, it is treated as the number of input channels
and other sizes are unknown.
out_channels : int
number of channels produced by the convolution.
kernel_size
size of the convolving kernel.
stride
stride of the convolution. Default: 1.
padding
``dilation * (kernel_size - 1) - padding`` zero-padding
will be added to both sides of each dimension in the input.
Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``,
which is similar the Theano, and ``'ref'`` and ``'rep'`` which are common padding schemes.
Default: ``'half'``.
output_padding
additional size added to one side of each dimension in the output shape. Default: 0
groups : int
number of blocked connections from input channels to output channels. Default: 1.
bias : bool
if ``True``, adds a learnable bias to the output. Default: ``True``.
dilation
spacing between kernel elements. Default: 1.
activation
non-linear function to activate the linear result.
It accepts any callable function
as well as a recognizable ``str``.
A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`.
weights_init
a kernel initialization method from :mod:`torch.nn.init`.
bias_init
a bias initialization method from :mod:`torch.nn.init`.
output_size
size of the output tensor. If ``None``, the shape is automatically determined.
kwargs
extra keyword arguments to pass to activation.
"""
def __init__(self, input_shape, out_channels, kernel_size, stride=1, padding='half', output_padding=0, groups=1,
bias=True, dilation=1, activation='linear', weights_init=None, bias_init=None, output_size=None,
**kwargs):
input_shape = _image_shape(input_shape)
self.input_shape = input_shape
self.weights_init = weights_init
self.bias_init = bias_init
self.activation = utils.function(activation, **kwargs)
self.output_size = _pair(output_size) if output_size is not None else None
kernel_size = _pair(kernel_size)
if isinstance(padding, str):
if padding == 'half':
padding = (kernel_size[0] // 2, kernel_size[1] // 2)
elif padding == 'valid':
padding = (0, 0)
elif padding == 'full':
padding = (kernel_size[0] - 1, kernel_size[1] - 1)
else:
raise NotImplementedError
elif isinstance(padding, int):
padding = (padding, padding)
super().__init__(int(input_shape[1]), out_channels, kernel_size, stride, padding, output_padding, groups, bias,
dilation)
def forward(self, input, output_size=None, *args, **kwargs):
output = self.activation(super().forward(
input, output_size=self.output_size if output_size is None else output_size))
return output
@property
@utils.validate
def output_shape(self):
if self.output_size is not None:
assert len(self.output_size) == 2, \
'output_size should have exactly 2 elements, got %d' % len(self.output_size)
return (self.input_shape[0], self.out_channels) + tuple(self.output_size)
shape = [np.nan if s is None else s for s in self.input_shape]
_, _, h_in, w_in = shape
h_out = (h_in - 1) * self.stride[0] - 2 * self.padding[0] + self.dilation[0] * \
(self.kernel_size[0] - 1) + self.output_padding[0] + 1
w_out = (w_in - 1) * self.stride[1] - 2 * self.padding[1] + self.dilation[1] * \
(self.kernel_size[1] - 1) + self.output_padding[1] + 1
return self.input_shape[0], self.out_channels, h_out, w_out
def reset_parameters(self):
super().reset_parameters()
if self.weights_init:
self.weights_init(self.weight)
if self.bias is not None and self.bias_init:
self.bias_init(self.bias)
def extra_repr(self):
s = 'activation={}'.format(self.activation.__name__)
return s
[docs]@utils.add_simple_repr
class FC(nn.Linear, _LayerMethod):
"""
AKA fully connected layer in deep learning literature.
This class extends :class:`torch.nn.Linear` by :class:`~neuralnet_pytorch.layers.layers._LayerMethod`.
Parameters
----------
input_shape
shape of the input tensor.
If an integer is passed, it is treated as the size of each input sample.
out_features : int
size of each output sample.
bias : bool
if set to ``False``, the layer will not learn an additive bias.
Default: ``True``.
activation
non-linear function to activate the linear result.
It accepts any callable function
as well as a recognizable ``str``.
A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`.
weights_init
a kernel initialization method from :mod:`torch.nn.init`.
bias_init
a bias initialization method from :mod:`torch.nn.init`.
flatten : bool
whether to flatten input tensor into 2D matrix. Default: ``False``.
keepdim : bool
whether to keep the output dimension when `out_features` is 1.
kwargs
extra keyword arguments to pass to activation.
"""
def __init__(self, input_shape, out_features, bias=True, activation=None, weights_init=None, bias_init=None,
flatten=False, keepdim=True, **kwargs):
input_shape = _matrix_shape(input_shape)
assert input_shape[-1] is not None, 'Shape at the last position (zero-based index) must be known'
self.input_shape = input_shape
self.weights_init = weights_init
self.bias_init = bias_init
self.flatten = flatten
self.keepdim = keepdim
self.activation = utils.function(activation, **kwargs)
super().__init__(int(np.prod(self.input_shape[1:])) if flatten else self.input_shape[-1], out_features, bias)
def forward(self, input, *args, **kwargs):
if self.flatten:
input = T.flatten(input, 1)
output = self.activation(super().forward(input))
return output.flatten(-2) if self.out_features == 1 and not self.keepdim else output
@property
@utils.validate
def output_shape(self):
if self.flatten:
if self.keepdim:
return self.input_shape[0], self.out_features, 1
else:
return self.input_shape[0], self.out_features
else:
return self.input_shape[:-1] + (self.out_features,)
def reset_parameters(self):
super().reset_parameters()
if self.weights_init:
self.weights_init(self.weight)
if self.bias is not None and self.bias_init:
self.bias_init(self.bias)
def extra_repr(self):
s = super().extra_repr()
s += ', activation={}'.format(self.activation.__name__)
return s
[docs]class Softmax(FC):
"""
A special case of :class:`~neuralnet_pytorch.layers.FC` with softmax activation function.
Parameters
----------
input_shape
shape of the input tensor.
If an integer is passed, it is treated as the size of each input sample.
out_features : int
size of each output sample.
dim : int
dimension to apply softmax. Default: 1.
weights_init
a kernel initialization method from :mod:`torch.nn.init`.
bias_init
a bias initialization method from :mod:`torch.nn.init`.
kwargs
extra keyword arguments to pass to activation.
"""
def __init__(self, input_shape, out_features, dim=1, weights_init=None, bias_init=None, **kwargs):
self.dim = dim
kwargs['dim'] = dim
super().__init__(input_shape, out_features, activation='softmax', weights_init=weights_init,
bias_init=bias_init, **kwargs)
[docs]@utils.add_custom_repr
class DepthwiseSepConv2D(Sequential):
"""
Performs depthwise separable convolution in image processing.
Parameters
----------
input_shape
shape of the 4D input image.
If a single integer is passed, it is treated as the number of input channels
and other sizes are unknown.
out_channels : int
number of channels produced by the convolution.
kernel_size : int
size of the convolving kernel.
depth_mul
depth multiplier for intermediate result of depthwise convolution
padding
zero-padding added to both sides of the input.
Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``,
which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes.
Default: ``'half'``.
dilation
spacing between kernel elements. Default: 1.
bias : bool
if ``True``, adds a learnable bias to the output. Default: ``True``.
activation
non-linear function to activate the linear result.
It accepts any callable function
as well as a recognizable ``str``.
A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`.
kwargs
extra keyword arguments to pass to activation.
"""
def __init__(self, input_shape, out_channels, kernel_size, depth_mul=1, stride=1, padding='half', dilation=1,
bias=True, activation=None, **kwargs):
input_shape = _image_shape(input_shape)
super().__init__(input_shape=input_shape)
self.out_channels = out_channels
self.kernel_size = kernel_size
self.depth_mul = depth_mul
self.padding = padding
self.dilation = dilation
self.activation = utils.function(activation, **kwargs)
self.depthwise = Conv2d(self.output_shape, input_shape[1] * depth_mul, kernel_size, stride=stride,
padding=padding, dilation=dilation, groups=input_shape[1], bias=bias)
self.pointwise = Conv2d(self.output_shape, out_channels, 1, activation=activation, padding=padding,
dilation=dilation, bias=False, **kwargs)
def extra_repr(self):
s = ('{input_shape}, {out_channels}, kernel_size={kernel_size}'
', depth_mul={depth_mul}')
if self.padding != 'half':
s += ', padding={padding}'
if self.dilation != (1,) * len(self.dilation):
s += ', dilation={dilation}'
s = s.format(**self.__dict__)
s += ', activation={}'.format(self.activation.__name__)
return s