Source code for neuralnet_pytorch.layers.convolution

import numpy as np
import torch as T
import torch.nn as nn
from torch.nn.modules.utils import _pair

from .abstract import _LayerMethod, Sequential
from .. import utils
from ..utils import _image_shape, _matrix_shape

__all__ = ['Conv2d', 'ConvTranspose2d', 'FC', 'Softmax', 'DepthwiseSepConv2D']


[docs]@utils.add_simple_repr class Conv2d(nn.Conv2d, _LayerMethod): """ Extends :class:`torch.nn.Conv2d` with :class:`~neuralnet_pytorch.layers.layers._LayerMethod`. Parameters ---------- input_shape shape of the 4D input image. If a single integer is passed, it is treated as the number of input channels and other sizes are unknown. out_channels : int number of channels produced by the convolution. kernel_size size of the convolving kernel. stride stride of the convolution. Default: 1. padding zero-padding added to both sides of the input. Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``, which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes. Default: ``'half'``. dilation spacing between kernel elements. Default: 1. groups : int number of blocked connections from input channels to output channels. Default: 1. bias : bool if ``True``, adds a learnable bias to the output. Default: ``True``. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. weights_init a kernel initialization method from :mod:`torch.nn.init`. bias_init a bias initialization method from :mod:`torch.nn.init`. kwargs extra keyword arguments to pass to activation. """ def __init__(self, input_shape, out_channels, kernel_size, stride=1, padding='half', dilation=1, groups=1, bias=True, activation=None, weights_init=None, bias_init=None, **kwargs): input_shape = _image_shape(input_shape) assert input_shape[1] is not None, 'Shape at dimension 1 (zero-based index) must be known' self.input_shape = input_shape kernel_size = _pair(kernel_size) self.no_bias = bias self.activation = utils.function(activation, **kwargs) self.weights_init = weights_init self.bias_init = bias_init self.border_mode = padding dilation = _pair(dilation) self.ks = [fs + (fs - 1) * (d - 1) for fs, d in zip(kernel_size, dilation)] if isinstance(padding, str): if padding == 'half': padding = [k >> 1 for k in self.ks] elif padding in ('valid', 'ref', 'rep'): padding = (0,) * len(self.ks) elif padding == 'full': padding = [k - 1 for k in self.ks] else: raise NotImplementedError elif isinstance(padding, int): padding = _pair(padding) else: raise ValueError('padding must be a str/tuple/int, got %s' % type(padding)) super().__init__(int(input_shape[1]), out_channels, kernel_size, stride, tuple(padding), dilation, bias=bias, groups=groups) def forward(self, input, *args, **kwargs): if self.border_mode in ('ref', 'rep'): padding = (self.ks[1] // 2, self.ks[1] // 2, self.ks[0] // 2, self.ks[0] // 2) pad = nn.ReflectionPad2d(padding) if self.border_mode == 'ref' else nn.ReplicationPad2d(padding) else: pad = lambda x: x # noqa: E731 input = pad(input) input = self.activation(super().forward(input)) return input @property @utils.validate def output_shape(self): shape = [np.nan if s is None else s for s in self.input_shape] padding = (self.ks[0] >> 1, self.ks[1] >> 1) if self.border_mode in ('ref', 'rep') else self.padding shape[2:] = [(s - self.ks[idx] + 2 * padding[idx]) // self.stride[idx] + 1 for idx, s in enumerate(shape[2:])] shape[1] = self.out_channels return tuple(shape) def reset_parameters(self): super().reset_parameters() if self.weights_init: self.weights_init(self.weight) if self.bias is not None and self.bias_init: self.bias_init(self.bias) def extra_repr(self): s = super().extra_repr() s += ', activation={}'.format(self.activation.__name__) return s
[docs]@utils.add_simple_repr class ConvTranspose2d(nn.ConvTranspose2d, _LayerMethod): """ Extends :class:`torch.nn.ConvTranspose2d` with :class:`~neuralnet_pytorch.layers.layers._LayerMethod`. Parameters ---------- input_shape shape of the 4D input image. If a single integer is passed, it is treated as the number of input channels and other sizes are unknown. out_channels : int number of channels produced by the convolution. kernel_size size of the convolving kernel. stride stride of the convolution. Default: 1. padding ``dilation * (kernel_size - 1) - padding`` zero-padding will be added to both sides of each dimension in the input. Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``, which is similar the Theano, and ``'ref'`` and ``'rep'`` which are common padding schemes. Default: ``'half'``. output_padding additional size added to one side of each dimension in the output shape. Default: 0 groups : int number of blocked connections from input channels to output channels. Default: 1. bias : bool if ``True``, adds a learnable bias to the output. Default: ``True``. dilation spacing between kernel elements. Default: 1. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. weights_init a kernel initialization method from :mod:`torch.nn.init`. bias_init a bias initialization method from :mod:`torch.nn.init`. output_size size of the output tensor. If ``None``, the shape is automatically determined. kwargs extra keyword arguments to pass to activation. """ def __init__(self, input_shape, out_channels, kernel_size, stride=1, padding='half', output_padding=0, groups=1, bias=True, dilation=1, activation='linear', weights_init=None, bias_init=None, output_size=None, **kwargs): input_shape = _image_shape(input_shape) self.input_shape = input_shape self.weights_init = weights_init self.bias_init = bias_init self.activation = utils.function(activation, **kwargs) self.output_size = _pair(output_size) if output_size is not None else None kernel_size = _pair(kernel_size) if isinstance(padding, str): if padding == 'half': padding = (kernel_size[0] // 2, kernel_size[1] // 2) elif padding == 'valid': padding = (0, 0) elif padding == 'full': padding = (kernel_size[0] - 1, kernel_size[1] - 1) else: raise NotImplementedError elif isinstance(padding, int): padding = (padding, padding) super().__init__(int(input_shape[1]), out_channels, kernel_size, stride, padding, output_padding, groups, bias, dilation) def forward(self, input, output_size=None, *args, **kwargs): output = self.activation(super().forward( input, output_size=self.output_size if output_size is None else output_size)) return output @property @utils.validate def output_shape(self): if self.output_size is not None: assert len(self.output_size) == 2, \ 'output_size should have exactly 2 elements, got %d' % len(self.output_size) return (self.input_shape[0], self.out_channels) + tuple(self.output_size) shape = [np.nan if s is None else s for s in self.input_shape] _, _, h_in, w_in = shape h_out = (h_in - 1) * self.stride[0] - 2 * self.padding[0] + self.dilation[0] * \ (self.kernel_size[0] - 1) + self.output_padding[0] + 1 w_out = (w_in - 1) * self.stride[1] - 2 * self.padding[1] + self.dilation[1] * \ (self.kernel_size[1] - 1) + self.output_padding[1] + 1 return self.input_shape[0], self.out_channels, h_out, w_out def reset_parameters(self): super().reset_parameters() if self.weights_init: self.weights_init(self.weight) if self.bias is not None and self.bias_init: self.bias_init(self.bias) def extra_repr(self): s = 'activation={}'.format(self.activation.__name__) return s
[docs]@utils.add_simple_repr class FC(nn.Linear, _LayerMethod): """ AKA fully connected layer in deep learning literature. This class extends :class:`torch.nn.Linear` by :class:`~neuralnet_pytorch.layers.layers._LayerMethod`. Parameters ---------- input_shape shape of the input tensor. If an integer is passed, it is treated as the size of each input sample. out_features : int size of each output sample. bias : bool if set to ``False``, the layer will not learn an additive bias. Default: ``True``. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. weights_init a kernel initialization method from :mod:`torch.nn.init`. bias_init a bias initialization method from :mod:`torch.nn.init`. flatten : bool whether to flatten input tensor into 2D matrix. Default: ``False``. keepdim : bool whether to keep the output dimension when `out_features` is 1. kwargs extra keyword arguments to pass to activation. """ def __init__(self, input_shape, out_features, bias=True, activation=None, weights_init=None, bias_init=None, flatten=False, keepdim=True, **kwargs): input_shape = _matrix_shape(input_shape) assert input_shape[-1] is not None, 'Shape at the last position (zero-based index) must be known' self.input_shape = input_shape self.weights_init = weights_init self.bias_init = bias_init self.flatten = flatten self.keepdim = keepdim self.activation = utils.function(activation, **kwargs) super().__init__(int(np.prod(self.input_shape[1:])) if flatten else self.input_shape[-1], out_features, bias) def forward(self, input, *args, **kwargs): if self.flatten: input = T.flatten(input, 1) output = self.activation(super().forward(input)) return output.flatten(-2) if self.out_features == 1 and not self.keepdim else output @property @utils.validate def output_shape(self): if self.flatten: if self.keepdim: return self.input_shape[0], self.out_features, 1 else: return self.input_shape[0], self.out_features else: return self.input_shape[:-1] + (self.out_features,) def reset_parameters(self): super().reset_parameters() if self.weights_init: self.weights_init(self.weight) if self.bias is not None and self.bias_init: self.bias_init(self.bias) def extra_repr(self): s = super().extra_repr() s += ', activation={}'.format(self.activation.__name__) return s
[docs]class Softmax(FC): """ A special case of :class:`~neuralnet_pytorch.layers.FC` with softmax activation function. Parameters ---------- input_shape shape of the input tensor. If an integer is passed, it is treated as the size of each input sample. out_features : int size of each output sample. dim : int dimension to apply softmax. Default: 1. weights_init a kernel initialization method from :mod:`torch.nn.init`. bias_init a bias initialization method from :mod:`torch.nn.init`. kwargs extra keyword arguments to pass to activation. """ def __init__(self, input_shape, out_features, dim=1, weights_init=None, bias_init=None, **kwargs): self.dim = dim kwargs['dim'] = dim super().__init__(input_shape, out_features, activation='softmax', weights_init=weights_init, bias_init=bias_init, **kwargs)
[docs]@utils.add_custom_repr class DepthwiseSepConv2D(Sequential): """ Performs depthwise separable convolution in image processing. Parameters ---------- input_shape shape of the 4D input image. If a single integer is passed, it is treated as the number of input channels and other sizes are unknown. out_channels : int number of channels produced by the convolution. kernel_size : int size of the convolving kernel. depth_mul depth multiplier for intermediate result of depthwise convolution padding zero-padding added to both sides of the input. Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``, which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes. Default: ``'half'``. dilation spacing between kernel elements. Default: 1. bias : bool if ``True``, adds a learnable bias to the output. Default: ``True``. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. kwargs extra keyword arguments to pass to activation. """ def __init__(self, input_shape, out_channels, kernel_size, depth_mul=1, stride=1, padding='half', dilation=1, bias=True, activation=None, **kwargs): input_shape = _image_shape(input_shape) super().__init__(input_shape=input_shape) self.out_channels = out_channels self.kernel_size = kernel_size self.depth_mul = depth_mul self.padding = padding self.dilation = dilation self.activation = utils.function(activation, **kwargs) self.depthwise = Conv2d(self.output_shape, input_shape[1] * depth_mul, kernel_size, stride=stride, padding=padding, dilation=dilation, groups=input_shape[1], bias=bias) self.pointwise = Conv2d(self.output_shape, out_channels, 1, activation=activation, padding=padding, dilation=dilation, bias=False, **kwargs) def extra_repr(self): s = ('{input_shape}, {out_channels}, kernel_size={kernel_size}' ', depth_mul={depth_mul}') if self.padding != 'half': s += ', padding={padding}' if self.dilation != (1,) * len(self.dilation): s += ', dilation={dilation}' s = s.format(**self.__dict__) s += ', activation={}'.format(self.activation.__name__) return s