Source code for neuralnet_pytorch.layers.blocks

from functools import partial
from torch.nn.modules.utils import _pair

from .abstract import Sequential, Module, Lambda
from .convolution import Conv2d, FC
from .normalization import BatchNorm2d, InstanceNorm2d, LayerNorm, BatchNorm1d, InstanceNorm1d, FeatureNorm1d
from .. import utils
from ..utils import _image_shape

__all__ = ['ConvNormAct', 'StackingConv', 'ResNetBasicBlock', 'ResNetBottleneckBlock', 'FCNormAct']


[docs]@utils.add_custom_repr class ConvNormAct(Sequential): """ Fuses convolution, normalization and activation together. Parameters ---------- input_shape shape of the 4D input image. If a single integer is passed, it is treated as the number of input channels and other sizes are unknown. out_channels : int number of channels produced by the convolution. kernel_size size of the convolving kernel. stride stride of the convolution. Default: 1. padding zero-padding added to both sides of the input. Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``, which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes. Default: ``'half'``. dilation spacing between kernel elements. Default: 1. groups : int number of blocked connections from input channels to output channels. Default: 1. bias : bool if ``True``, adds a learnable bias to the output. Default: ``True``. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. weights_init a kernel initialization method from :mod:`torch.nn.init`. bias_init a bias initialization method from :mod:`torch.nn.init`. eps a value added to the denominator for numerical stability. Default: 1e-5. momentum : float the value used for the running_mean and running_var computation. Can be set to ``None`` for cumulative moving average (i.e. simple average). Default: 0.1. affine a boolean value that when set to ``True``, this module has learnable affine parameters. Default: ``True``. track_running_stats a boolean value that when set to ``True``, this module tracks the running mean and variance, and when set to ``False``, this module does not track such statistics and always uses batch statistics in both training and eval modes. Default: ``True``. no_scale: bool whether to use a trainable scale parameter. Default: ``True``. norm_layer normalization method to be used. Choices are ``'bn'``, ``'in'``, ``'ln'`` or a callable. Default: ``'bn'``. kwargs extra keyword arguments to pass to activation. """ def __init__(self, input_shape, out_channels, kernel_size, stride=1, padding='half', dilation=1, groups=1, bias=True, activation='relu', weights_init=None, bias_init=None, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, no_scale=False, norm_layer='bn', **kwargs): super().__init__(input_shape=input_shape) self.out_channels = out_channels self.kernel_size = kernel_size self.padding = padding self.stride = stride self.dilation = dilation self.activation = utils.function(activation, **kwargs) self.norm_layer = norm_layer self.conv = Conv2d(input_shape, out_channels, kernel_size, weights_init=weights_init, bias=bias, bias_init=bias_init, padding=padding, stride=stride, dilation=dilation, activation=None, groups=groups, **kwargs) if isinstance(norm_layer, str): norm_layer = BatchNorm2d if norm_layer == 'bn' else InstanceNorm2d if norm_layer == 'in' \ else LayerNorm if norm_layer == 'ln' else norm_layer else: assert callable(norm_layer), 'norm_layer must be an instance of `str` or callable' self.norm = norm_layer(self.conv.output_shape, eps, momentum, affine, track_running_stats, no_scale=no_scale, activation=self.activation, **kwargs) def extra_repr(self): s = ('{in_channels}, {out_channels}, kernel_size={kernel_size}' ', stride={stride}') if self.conv.padding != (0,) * len(self.conv.padding): s += ', padding={padding}' if self.conv.dilation != (1,) * len(self.conv.dilation): s += ', dilation={dilation}' if self.conv.output_padding != (0,) * len(self.conv.output_padding): s += ', output_padding={output_padding}' if self.conv.groups != 1: s += ', groups={groups}' if self.conv.bias is None: s += ', bias=False' s = s.format(**self.conv.__dict__) s += ', activation={}'.format(self.activation.__name__) return s
[docs]@utils.add_custom_repr class StackingConv(Sequential): """ Stacks multiple convolution layers together. Parameters ---------- input_shape shape of the 4D input image. If a single integer is passed, it is treated as the number of input channels and other sizes are unknown. out_channels : int number of channels produced by the convolution. kernel_size size of the convolving kernel. num_layer : int number of convolutional layers. stride stride of the convolution. Default: 1. padding zero-padding added to both sides of the input. Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``, which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes. Default: ``'half'``. dilation spacing between kernel elements. Default: 1. bias : bool if ``True``, adds a learnable bias to the output. Default: ``True``. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. weights_init a kernel initialization method from :mod:`torch.nn.init`. bias_init a bias initialization method from :mod:`torch.nn.init`. norm_method normalization method to be used. Choices are ``'bn'``, ``'in'``, and ``'ln'``. Default: ``'bn'``. groups : int number of blocked connections from input channels to output channels. Default: 1. kwargs extra keyword arguments to pass to activation. """ def __init__(self, input_shape, out_channels, kernel_size, num_layers, stride=1, padding='half', dilation=1, bias=True, activation='relu', weights_init=None, bias_init=None, norm_method=None, groups=1, **kwargs): assert num_layers > 1, 'num_layers must be greater than 1, got %d' % num_layers input_shape = _image_shape(input_shape) super().__init__(input_shape=input_shape) self.num_filters = out_channels self.filter_size = kernel_size self.stride = stride self.dilation = dilation self.groups = groups self.activation = utils.function(activation, **kwargs) self.num_layers = num_layers self.norm_method = norm_method shape = tuple(input_shape) conv_layer = partial(ConvNormAct, norm_method=norm_method) if norm_method is not None else Conv2d for num in range(num_layers - 1): layer = conv_layer(input_shape=shape, out_channels=out_channels, kernel_size=kernel_size, weights_init=weights_init, bias_init=bias_init, stride=1, padding=padding, dilation=dilation, activation=activation, groups=groups, bias=bias, **kwargs) self.add_module('stacking_conv_%d' % (num + 1), layer) shape = layer.output_shape self.add_module('stacking_conv_%d' % num_layers, conv_layer(input_shape=shape, out_channels=out_channels, bias=bias, groups=groups, kernel_size=kernel_size, weights_init=weights_init, stride=stride, padding=padding, dilation=dilation, activation=activation, bias_init=bias_init, **kwargs)) def extra_repr(self): s = ('{input_shape}, {out_channels}, kernel_size={kernel_size}' ', stride={stride}, num_layers={num_layers}') if self.dilation != (1,) * len(self.dilation): s += ', dilation={dilation}' if self.groups != 1: s += ', groups={groups}' s = s.format(**self.__dict__) s += ', activation={}'.format(self.activation.__name__) return s
[docs]@utils.add_custom_repr class FCNormAct(Sequential): """ Fuses fully connected, normalization and activation together. Parameters ---------- input_shape shape of the input tensor. If an integer is passed, it is treated as the size of each input sample. out_features : int size of each output sample. bias : bool if set to ``False``, the layer will not learn an additive bias. Default: ``True``. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. weights_init a kernel initialization method from :mod:`torch.nn.init`. bias_init a bias initialization method from :mod:`torch.nn.init`. flatten : bool whether to flatten input tensor into 2D matrix. Default: ``False``. keepdim : bool whether to keep the output dimension when `out_features` is 1. eps a value added to the denominator for numerical stability. Default: 1e-5. momentum : float the value used for the running_mean and running_var computation. Can be set to ``None`` for cumulative moving average (i.e. simple average). Default: 0.1. affine a boolean value that when set to ``True``, this module has learnable affine parameters. Default: ``True``. track_running_stats a boolean value that when set to ``True``, this module tracks the running mean and variance, and when set to ``False``, this module does not track such statistics and always uses batch statistics in both training and eval modes. Default: ``True``. no_scale: bool whether to use a trainable scale parameter. Default: ``True``. norm_layer normalization method to be used. Choices are ``'bn'``, ``'in'``, and ``'ln'``, or a callable function. Default: ``'bn'``. kwargs extra keyword arguments to pass to activation and norm_layer. """ def __init__(self, input_shape, out_features, bias=True, activation=None, weights_init=None, bias_init=None, flatten=False, keepdim=True, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, no_scale=False, norm_layer='bn', **kwargs): super().__init__(input_shape=input_shape) self.out_features = out_features self.flatten = flatten self.keepdim = keepdim self.activation = utils.function(activation, **kwargs) self.fc = FC(self.input_shape, out_features, bias, weights_init=weights_init, bias_init=bias_init, flatten=flatten, keepdim=keepdim) if isinstance(norm_layer, str): norm_layer = BatchNorm1d if norm_layer == 'bn' else InstanceNorm1d if norm_layer == 'in' \ else LayerNorm if norm_layer == 'ln' else FeatureNorm1d if norm_layer == 'fn' else norm_layer else: assert callable(norm_layer), 'norm_layer must be an instance of `str` or callable' self.norm = norm_layer(self.fc.output_shape, eps, momentum, affine, track_running_stats, no_scale=no_scale, activation=self.activation, **kwargs) def extra_repr(self): s = '{in_features}, {out_features}' if self.flatten: s += 'flatten={flatten}' if not self.keepdim: s += 'keepdim={keepdim}' s = s.format(**self.fc.__dict__) s += ', activation={}'.format(self.activation.__name__) return s
[docs]@utils.add_custom_repr class ResNetBasicBlock(Module): """ A basic block to build ResNet (https://arxiv.org/abs/1512.03385). Parameters ---------- input_shape shape of the 4D input image. If a single integer is passed, it is treated as the number of input channels and other sizes are unknown. out_channels : int number of channels produced by the convolution. kernel_size size of the convolving kernel. stride stride of the convolution. Default: 1. padding zero-padding added to both sides of the input. Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``, which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes. Default: ``'half'``. dilation spacing between kernel elements. Default: 1. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. downsample a module to process the residual branch when output shape is different from input shape. If ``None``, a simple :class:`ConvNormAct` is used. groups : int number of blocked connections from input channels to output channels. Default: 1. block a function to construct the main branch of the block. If ``None``, a simple block as described in the paper is used. weights_init a kernel initialization method from :mod:`torch.nn.init`. norm_layer normalization method to be used. Choices are ``'bn'``, ``'in'``, and ``'ln'``. Default: ``'bn'``. kwargs extra keyword arguments to pass to activation. Attributes ---------- expansion : int expansion coefficients of the number of output channels. Default: 1. """ expansion = 1 def __init__(self, input_shape, out_channels, kernel_size=3, stride=1, padding='half', dilation=1, activation='relu', base_width=64, downsample=None, groups=1, block=None, weights_init=None, norm_layer='bn', **kwargs): input_shape = _image_shape(input_shape) super().__init__(input_shape=input_shape) self.out_channels = out_channels self.kernel_size = kernel_size self.stride = stride self.padding = padding self.dilation = _pair(dilation) self.activation = utils.function(activation, **kwargs) self.base_width = base_width self.width = int(out_channels * (base_width / 64)) * groups self.groups = groups self.weights_init = weights_init self.norm_layer = norm_layer self.kwargs = kwargs self.block = self._make_block() if block is None else block(**kwargs) if downsample is not None: assert isinstance(downsample, (Module, Sequential)), \ 'downsample must be an instance of Module, got %s' % type(downsample) self.downsample = downsample else: if stride > 1 or input_shape[1] != out_channels * self.expansion: self.downsample = ConvNormAct(input_shape, out_channels * self.expansion, 1, stride=stride, bias=False, padding=padding, weights_init=weights_init, activation='linear') else: self.downsample = Lambda(lambda x: x, output_shape=input_shape, input_shape=input_shape) def _make_block(self): block = Sequential(input_shape=self.input_shape) out_channels = self.out_channels if self.expansion == 1 else self.width if self.expansion != 1: block.add_module('conv1x1', ConvNormAct(block.output_shape, out_channels, 1, bias=False, padding=self.padding, weights_init=self.weights_init, activation=self.activation)) block.add_module('conv_norm_act_1', ConvNormAct(block.output_shape, out_channels, self.kernel_size, bias=False, padding=self.padding, weights_init=self.weights_init, stride=self.stride, activation=self.activation, groups=self.groups, norm_layer=self.norm_layer, **self.kwargs)) block.add_module('conv_norm_act_2', ConvNormAct(block.output_shape, self.out_channels * self.expansion, 1 if self.expansion != 1 else self.kernel_size, bias=False, padding=self.padding, activation=None, weights_init=self.weights_init, norm_layer=self.norm_layer, **self.kwargs)) return block def forward(self, input, *args, **kwargs): res = input out = self.block(input) out += self.downsample(res) return self.activation(out) def extra_repr(self): s = ('{input_shape}, {out_channels}, kernel_size={kernel_size}' ', stride={stride}') if self.dilation != (1,) * len(self.dilation): s += ', dilation={dilation}' if self.groups != 1: s += ', groups={groups}' s = s.format(**self.__dict__) s += ', activation={}'.format(self.activation.__name__) return s @property @utils.validate def output_shape(self): return self.downsample.output_shape
[docs]class ResNetBottleneckBlock(ResNetBasicBlock): """ A bottleneck block to build ResNet (https://arxiv.org/abs/1512.03385). Parameters ---------- input_shape shape of the 4D input image. If a single integer is passed, it is treated as the number of input channels and other sizes are unknown. out_channels : int number of channels produced by the convolution. kernel_size size of the convolving kernel. stride stride of the convolution. Default: 1. padding zero-padding added to both sides of the input. Besides tuple/list/integer, it accepts ``str`` such as ``'half'`` and ``'valid'``, which is similar the Theano, and ``'ref'`` and ``'rep'``, which are common padding schemes. Default: ``'half'``. dilation spacing between kernel elements. Default: 1. activation non-linear function to activate the linear result. It accepts any callable function as well as a recognizable ``str``. A list of possible ``str`` is in :func:`~neuralnet_pytorch.utils.function`. downsample a module to process the residual branch when output shape is different from input shape. If ``None``, a simple :class:`ConvNormAct` is used. groups : int number of blocked connections from input channels to output channels. Default: 1. block a function to construct the main branch of the block. If ``None``, a simple block as described in the paper is used. weights_init a kernel initialization method from :mod:`torch.nn.init`. norm_layer normalization method to be used. Choices are ``'bn'``, ``'in'``, and ``'ln'``. Default: ``'bn'``. kwargs extra keyword arguments to pass to activation. Attributes ---------- expansion : int expansion coefficients of the number of output channels. Default: 4. """ expansion = 4 def __init__(self, input_shape, out_channels, kernel_size=3, stride=1, padding='half', dilation=1, activation='relu', base_width=64, downsample=None, groups=1, block=None, weights_init=None, norm_layer='bn', **kwargs): super().__init__(input_shape, out_channels, kernel_size, stride=stride, padding=padding, dilation=dilation, activation=activation, base_width=base_width, downsample=downsample, groups=groups, block=block, weights_init=weights_init, norm_layer=norm_layer, **kwargs)