Refactor load/util, start fixing packnet to support NHWC format

2021-07-19 12:32:56 +09:30
parent d8bf493999
commit 38e7ad069e
6 changed files with 85 additions and 93 deletions
--- a/packnet_functional.py
+++ b/packnet_functional.py
@@ -21,7 +21,8 @@ def residual_layer(inputs, out_channels, stride, dropout=None):
    """
    x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
    x = layers.Conv2D(out_channels, 3, padding='same')(x)
-    shortcut = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
+    shortcut = layers.Conv2D(
+        out_channels, 3, padding='same', stride=stride)(inputs)
    if dropout:
        shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut)
    x = keras.layers.Concatenate()([x, shortcut])
@@ -38,7 +39,8 @@ def residual_block(inputs, out_channels, residual_layers, stride, dropout=None):


 def packnet_conv2d(inputs, out_channels, kernel_size, stride):
-    x = keras.layers.Conv2D(out_channels, kernel_size, stride, padding='same')
+    x = keras.layers.Conv2D(out_channels, kernel_size,
+                            stride, padding='same')(inputs)
    x = group_norm.GroupNormalization(16)(x)
    return keras.layers.ELU()(x)

@@ -59,23 +61,25 @@ def pack_3d(inputs, kernel_size, r=2, features_3d=8):
    """
    # Data format for single image in nyu is HWC (space_to_depth uses NHWC as default)
    x = nn.space_to_depth(inputs, r)
-    x = tf.expand_dims(x, 1)
-    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')
-    b, c, d, h, w = x.shape
-    x = tf.reshape(x, (b, c * d, h, w))
-    return packnet_conv2d(x, inputs.shape[1], kernel_size, 1)
+    x = tf.expand_dims(x, 4)
+    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x)
+    b, h, w, c, d = x.shape
+    x = tf.reshape(x, (b, h, w, c * d))
+    return packnet_conv2d(x, inputs.shape[3], kernel_size, 1)


 def unpack_3d(inputs, out_channels, kernel_size, r=3, features_3d=8):
-    x = packnet_conv2d(inputs, out_channels * (r ** 2) // features_3d, kernel_size, 1)
-    x = tf.expand_dims(x, 1)  # B x D x 4(out)/D x H/2 x W/2
+    x = packnet_conv2d(inputs, out_channels * (r ** 2) //
+                       features_3d, kernel_size, 1)
+    x = tf.expand_dims(x, 4)  # B x H/2 x W/2 x 4(out)/D x D
    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')
    b, c, d, h, w = x.shape
-    x = tf.reshape(x, [b, c * d, h, w])
+    x = tf.reshape(x, [b, h, w, c * d])
    return nn.depth_to_space(x, r)


 # TODO: Support different size packnet for scaling up/down
+# TODO: Support different channel format (right now we're supporting NHWC, we should also support NCHW)
 def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None):
    """
    Make the PackNet depth network.
@@ -105,10 +109,12 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None
    # ================ ENCODER =================

    x = unpack_3d(x, 512, 3, features_3d=features_3d)
-    x = keras.layers.Add([x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5])
+    x = keras.layers.Add(
+        [x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5])
    x = packnet_conv2d(x, 512, 3, 1)
    x = unpack_3d(x, 256, 3, features_3d=features_3d)
-    x = keras.layers.Add([x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4])
+    x = keras.layers.Add(
+        [x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4])
    x = packnet_conv2d(x, 256, 3, 1)
    # TODO: This is wrong, look at the paper
    x = packnet_inverse_depth(x, 1)