From 603de2bc9f518e5b3203e60c772c34419dacba93 Mon Sep 17 00:00:00 2001 From: Piv <18462828+Piv200@users.noreply.github.com> Date: Sun, 18 Jul 2021 18:59:25 +0930 Subject: [PATCH] Start adding packnet model --- group_norm.py | 209 ++++++++++++++++++++++++++++++++++++++++++ packnet_functional.py | 98 ++++++++++++++++++++ 2 files changed, 307 insertions(+) create mode 100644 group_norm.py create mode 100644 packnet_functional.py diff --git a/group_norm.py b/group_norm.py new file mode 100644 index 0000000..11ccd0c --- /dev/null +++ b/group_norm.py @@ -0,0 +1,209 @@ +# MIT License +# +# Copyright (c) 2019 Somshubra Majumdar +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +# Taken from: https://github.com/titu1994/Keras-Group-Normalization/blob/master/group_norm.py + +from tensorflow.keras import backend as K +from tensorflow.keras import constraints +from tensorflow.keras import initializers +from tensorflow.keras import regularizers +from tensorflow.keras.layers import Layer, InputSpec + + +class GroupNormalization(Layer): + """Group normalization layer + Group Normalization divides the channels into groups and computes within each group + the mean and variance for normalization. GN's computation is independent of batch sizes, + and its accuracy is stable in a wide range of batch sizes + # Arguments + groups: Integer, the number of groups for Group Normalization. + axis: Integer, the axis that should be normalized + (typically the features axis). + For instance, after a `Conv2D` layer with + `data_format="channels_first"`, + set `axis=1` in `BatchNormalization`. + epsilon: Small float added to variance to avoid dividing by zero. + center: If True, add offset of `beta` to normalized tensor. + If False, `beta` is ignored. + scale: If True, multiply by `gamma`. + If False, `gamma` is not used. + When the next layer is linear (also e.g. `nn.relu`), + this can be disabled since the scaling + will be done by the next layer. + beta_initializer: Initializer for the beta weight. + gamma_initializer: Initializer for the gamma weight. + beta_regularizer: Optional regularizer for the beta weight. + gamma_regularizer: Optional regularizer for the gamma weight. + beta_constraint: Optional constraint for the beta weight. + gamma_constraint: Optional constraint for the gamma weight. + # Input shape + Arbitrary. Use the keyword argument `input_shape` + (tuple of integers, does not include the samples axis) + when using this layer as the first layer in a model. + # Output shape + Same shape as input. + # References + - [Group Normalization](https://arxiv.org/abs/1803.08494) + """ + + def __init__(self, + groups=32, + axis=-1, + epsilon=1e-5, + center=True, + scale=True, + beta_initializer='zeros', + gamma_initializer='ones', + beta_regularizer=None, + gamma_regularizer=None, + beta_constraint=None, + gamma_constraint=None, + **kwargs): + super(GroupNormalization, self).__init__(**kwargs) + self.supports_masking = True + self.groups = groups + self.axis = axis + self.epsilon = epsilon + self.center = center + self.scale = scale + self.beta_initializer = initializers.get(beta_initializer) + self.gamma_initializer = initializers.get(gamma_initializer) + self.beta_regularizer = regularizers.get(beta_regularizer) + self.gamma_regularizer = regularizers.get(gamma_regularizer) + self.beta_constraint = constraints.get(beta_constraint) + self.gamma_constraint = constraints.get(gamma_constraint) + self.gamma = None + self.beta = None + + def build(self, input_shape): + dim = input_shape[self.axis] + + if dim is None: + raise ValueError('Axis ' + str(self.axis) + ' of ' + 'input tensor should have a defined dimension ' + 'but the layer received an input with shape ' + + str(input_shape) + '.') + + if dim < self.groups: + raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' + 'more than the number of channels (' + + str(dim) + ').') + + if dim % self.groups != 0: + raise ValueError('Number of groups (' + str(self.groups) + ') must be a ' + 'multiple of the number of channels (' + + str(dim) + ').') + + self.input_spec = InputSpec(ndim=len(input_shape), + axes={self.axis: dim}) + shape = (dim,) + + if self.scale: + self.gamma = self.add_weight(shape=shape, + name='gamma', + initializer=self.gamma_initializer, + regularizer=self.gamma_regularizer, + constraint=self.gamma_constraint) + if self.center: + self.beta = self.add_weight(shape=shape, + name='beta', + initializer=self.beta_initializer, + regularizer=self.beta_regularizer, + constraint=self.beta_constraint) + self.built = True + + def call(self, inputs, **kwargs): + input_shape = K.int_shape(inputs) + tensor_input_shape = K.shape(inputs) + + # Prepare broadcasting shape. + reduction_axes = list(range(len(input_shape))) + del reduction_axes[self.axis] + broadcast_shape = [1] * len(input_shape) + broadcast_shape[self.axis] = input_shape[self.axis] // self.groups + broadcast_shape.insert(1, self.groups) + + reshape_group_shape = K.shape(inputs) + group_axes = [reshape_group_shape[i] for i in range(len(input_shape))] + group_axes[self.axis] = input_shape[self.axis] // self.groups + group_axes.insert(1, self.groups) + + # reshape inputs to new group shape + group_shape = [group_axes[0], self.groups] + group_axes[2:] + group_shape = K.stack(group_shape) + inputs = K.reshape(inputs, group_shape) + + group_reduction_axes = list(range(len(group_axes))) + group_reduction_axes = group_reduction_axes[2:] + + mean = K.mean(inputs, axis=group_reduction_axes, keepdims=True) + variance = K.var(inputs, axis=group_reduction_axes, keepdims=True) + + inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) + + # prepare broadcast shape + inputs = K.reshape(inputs, group_shape) + outputs = inputs + + # In this case we must explicitly broadcast all parameters. + if self.scale: + broadcast_gamma = K.reshape(self.gamma, broadcast_shape) + outputs = outputs * broadcast_gamma + + if self.center: + broadcast_beta = K.reshape(self.beta, broadcast_shape) + outputs = outputs + broadcast_beta + + outputs = K.reshape(outputs, tensor_input_shape) + + return outputs + + def get_config(self): + config = { + 'groups': self.groups, + 'axis': self.axis, + 'epsilon': self.epsilon, + 'center': self.center, + 'scale': self.scale, + 'beta_initializer': initializers.serialize(self.beta_initializer), + 'gamma_initializer': initializers.serialize(self.gamma_initializer), + 'beta_regularizer': regularizers.serialize(self.beta_regularizer), + 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), + 'beta_constraint': constraints.serialize(self.beta_constraint), + 'gamma_constraint': constraints.serialize(self.gamma_constraint) + } + base_config = super(GroupNormalization, self).get_config() + return dict(list(base_config.items()) + list(config.items())) + + def compute_output_shape(self, input_shape): + return input_shape + + +if __name__ == '__main__': + from tensorflow.keras.layers import Input + from tensorflow.keras.models import Model + + ip = Input(shape=(None, None, 4)) + # ip = Input(batch_shape=(100, None, None, 2)) + x = GroupNormalization(groups=2, axis=-1, epsilon=0.1)(ip) + model = Model(ip, x) + model.summary() diff --git a/packnet_functional.py b/packnet_functional.py new file mode 100644 index 0000000..52e815c --- /dev/null +++ b/packnet_functional.py @@ -0,0 +1,98 @@ +import tensorflow as tf +import tensorflow.keras as keras +import tensorflow.keras.layers as layers +from tensorflow import nn + +import group_norm + + +def pack_layer(): + pass + + +def residual_layer(inputs, out_channels, stride, dropout=None): + """ + Keras implementation of the Residual block (ResNet) as used in PackNet + :param inputs: + :param out_channels: + :param stride: + :param dropout: + :return: + """ + x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs) + x = layers.Conv2D(out_channels, 3, padding='same')(x) + shortcut = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs) + if dropout: + shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut) + x = keras.layers.Concatenate()([x, shortcut]) + x = group_norm.GroupNormalization(16)(x) + return keras.layers.ELU()(x) + + +# Packnet usually expects more than one layer per block (2,2,3,3) +def residual_bock(inputs, out_channels, residual_layers, stride, dropout=None): + pass + + +def packnet_conv2d(inputs, out_channels, kernel_size, stride): + x = keras.layers.Conv2D(out_channels, kernel_size, stride, padding='same') + x = group_norm.GroupNormalization(16)(x) + return keras.layers.ELU()(x) + + +def packnet_inverse_depth(inputs, out_channels=1, min_depth=0.5): + x = packnet_conv2d(inputs, out_channels, kernel_size=3, stride=1) + return keras.activations.sigmoid(x) / min_depth + + +def pack_3d(inputs, kernel_size, r=2, features_3d=8): + """ + Implementatino of the 3d packing block proposed here: https://arxiv.org/abs/1905.02693 + :param inputs: + :param kernel_size: + :param r: + :param features_3d: + :return: + """ + # Data format for single image in nyu is HWC (space_to_depth uses NHWC as default) + x = nn.space_to_depth(inputs, r) + x = tf.expand_dims(x, 1) + x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same') + b, c, d, h, w = x.shape + x = tf.reshape(x, (b, c * d, h, w)) + return packnet_conv2d(x, inputs.shape[1], kernel_size, 1) + + +def unpack_3d(inputs, out_channels, kernel_size, r=3, features_3d=8): + x = packnet_conv2d(inputs, out_channels * (r ** 2) // features_3d, kernel_size, 1) + x = tf.expand_dims(x, 1) # B x D x 4(out)/D x H/2 x W/2 + x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same') + b, c, d, h, w = x.shape + x = tf.reshape(x, [b, c * d, h, w]) + return nn.depth_to_space(x, r) + + +# TODO: Support different size packnet for scaling up/down +def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4): + """ + Make the PackNet depth network. + :param shape: Input shape of the image + :param skip_add: Set to use add rather than concat skip connections, defaults to True + :return: + """ + input = keras.layers.Input(shape=shape) + x = packnet_conv2d(input, 32, 5, 1) + skip_1 = x + x = packnet_conv2d(input, 32, 7, 1) + x = pack_3d(x, 5, features_3d) + x = residual_layer(x, 64, ) + + # TODO: Skip connection + if skip_add: + x = keras.layers.Add([x, ]) + else: + x = keras.layers.Concatenate([x, ]) + + x = packnet_conv2d(x, 32, 3, 1) + x = packnet_inverse_depth(x) + return keras.Model(inputs=input, outputs=x, name="PackNet")