Refactor load/util, start fixing packnet to support NHWC format

2021-07-19 12:32:56 +09:30
parent d8bf493999
commit 38e7ad069e
6 changed files with 85 additions and 93 deletions
--- a/dense_depth_functional.py
+++ b/dense_depth_functional.py
@@ -1,4 +1,3 @@
-import tensorflow as tf
 import tensorflow.keras as keras
 import tensorflow_datasets as tfds

@@ -25,7 +24,8 @@ def dense_depth(size, weights=None, shape=(224, 224, 3)):
    densenet_output_channels = densenet.layers[-1].output.shape[-1]

    # Reduce the feature set (pointwise)
-    decoder = keras.layers.Conv2D(filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output)
+    decoder = keras.layers.Conv2D(
+        filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output)

    # The actual decoder
    decoder = dense_upsample_block(
@@ -87,30 +87,6 @@ def dense_nnconv5(size, weights=None, shape=(224, 224, 3), half_features=True):
    return keras.Model(inputs=input, outputs=decoder, name="fast_dense_depth")


-def load_nyu(download_dir='../nyu'):
-    """
-    Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
-    :return: nyu_v2 dataset builder
-    """
-    builder = tfds.builder('nyu_depth_v2')
-    builder.download_and_prepare(download_dir=download_dir)
-    return builder \
-        .as_dataset(split='train', shuffle_files=True) \
-        .shuffle(buffer_size=1024) \
-        .batch(8) \
-        .map(lambda x: fd.crop_and_resize(x))
-
-
-def load_nyu_evaluate(download_dir='../nyu'):
-    """
-    Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
-    :return: nyu_v2 dataset builder
-    """
-    builder = tfds.builder('nyu_depth_v2')
-    builder.download_and_prepare(download_dir=download_dir)
-    return builder.as_dataset(split='validation').batch(1).map(lambda x: fd.crop_and_resize(x))
-
-
 if __name__ == '__main__':
    model = dense_depth(169, 'imagenet')
    model.summary()
--- a/fast_depth_functional.py
+++ b/fast_depth_functional.py
@@ -1,6 +1,8 @@
 import tensorflow as tf
 import tensorflow.keras as keras
 import tensorflow_datasets as tfds
+from load import load_nyu, load_nyu_evaluate
+from util import crop_and_resize
 # Needed for the kitti dataset, don't delete

 """
@@ -160,56 +162,6 @@ def load_model(file):
                                                         'delta2_metric': delta2_metric,
                                                         'delta3_metric': delta3_metric})

-
-def crop_and_resize(x):
-    shape = tf.shape(x['depth'])
-    img_shape = tf.shape(x['image'])
-    # Ensure we get a square for when we resize is later.
-    # For horizontal images this is basically just cropping the sides off
-    center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2])
-
-    def layer():
-        return keras.Sequential([
-            keras.layers.experimental.preprocessing.CenterCrop(
-                center_shape, center_shape),
-            keras.layers.experimental.preprocessing.Resizing(
-                224, 224, interpolation='nearest')
-        ])
-
-    # Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow
-    return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1]))
-
-
-def load_nyu(download_dir='../nyu'):
-    """
-    Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
-    :return: nyu_v2 dataset builder
-    """
-    builder = tfds.builder('nyu_depth_v2')
-    builder.download_and_prepare(download_dir=download_dir)
-    return builder \
-        .as_dataset(split='train', shuffle_files=True) \
-        .shuffle(buffer_size=1024) \
-        .batch(8) \
-        .map(lambda x: crop_and_resize(x))
-
-
-def load_nyu_evaluate(download_dir='../nyu'):
-    """
-    Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
-    :return: nyu_v2 dataset builder
-    """
-    builder = tfds.builder('nyu_depth_v2')
-    builder.download_and_prepare(download_dir=download_dir)
-    return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x))
-
-
-def load_kitti(download_dir='../kitti'):
-    ds = tfds.builder('kitti_depth')
-    ds.download_and_prepare(download_dir=download_dir)
-    return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x))
-
-
 if __name__ == '__main__':
    model = mobilenet_nnconv5()
    model.summary()
--- a/load.py
+++ b/load.py
@@ -0,0 +1,33 @@
+from util import crop_and_resize
+import tensorflow_datasets as tfds
+import tensorflow.keras as keras
+
+
+def load_nyu(download_dir='../nyu', out_shape=(224, 224)):
+    """
+    Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
+    :return: nyu_v2 dataset builder
+    """
+    builder = tfds.builder('nyu_depth_v2')
+    builder.download_and_prepare(download_dir=download_dir)
+    return builder \
+        .as_dataset(split='train', shuffle_files=True) \
+        .shuffle(buffer_size=1024) \
+        .batch(8) \
+        .map(lambda x: crop_and_resize(x, out_shape))
+
+
+def load_nyu_evaluate(download_dir='../nyu', out_shape=(224, 224)):
+    """
+    Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
+    :return: nyu_v2 dataset builder
+    """
+    builder = tfds.builder('nyu_depth_v2')
+    builder.download_and_prepare(download_dir=download_dir)
+    return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x, out_shape))
+
+
+def load_kitti(download_dir='../kitti', out_shape=(224, 224)):
+    ds = tfds.builder('kitti_depth')
+    ds.download_and_prepare(download_dir=download_dir)
+    return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x, out_shape))
--- a/packnet_functional.py
+++ b/packnet_functional.py
@@ -21,7 +21,8 @@ def residual_layer(inputs, out_channels, stride, dropout=None):
    """
    x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
    x = layers.Conv2D(out_channels, 3, padding='same')(x)
-    shortcut = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
+    shortcut = layers.Conv2D(
+        out_channels, 3, padding='same', stride=stride)(inputs)
    if dropout:
        shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut)
    x = keras.layers.Concatenate()([x, shortcut])
@@ -38,7 +39,8 @@ def residual_block(inputs, out_channels, residual_layers, stride, dropout=None):


 def packnet_conv2d(inputs, out_channels, kernel_size, stride):
-    x = keras.layers.Conv2D(out_channels, kernel_size, stride, padding='same')
+    x = keras.layers.Conv2D(out_channels, kernel_size,
+                            stride, padding='same')(inputs)
    x = group_norm.GroupNormalization(16)(x)
    return keras.layers.ELU()(x)

@@ -59,23 +61,25 @@ def pack_3d(inputs, kernel_size, r=2, features_3d=8):
    """
    # Data format for single image in nyu is HWC (space_to_depth uses NHWC as default)
    x = nn.space_to_depth(inputs, r)
-    x = tf.expand_dims(x, 1)
-    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')
-    b, c, d, h, w = x.shape
-    x = tf.reshape(x, (b, c * d, h, w))
-    return packnet_conv2d(x, inputs.shape[1], kernel_size, 1)
+    x = tf.expand_dims(x, 4)
+    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x)
+    b, h, w, c, d = x.shape
+    x = tf.reshape(x, (b, h, w, c * d))
+    return packnet_conv2d(x, inputs.shape[3], kernel_size, 1)


 def unpack_3d(inputs, out_channels, kernel_size, r=3, features_3d=8):
-    x = packnet_conv2d(inputs, out_channels * (r ** 2) // features_3d, kernel_size, 1)
-    x = tf.expand_dims(x, 1)  # B x D x 4(out)/D x H/2 x W/2
+    x = packnet_conv2d(inputs, out_channels * (r ** 2) //
+                       features_3d, kernel_size, 1)
+    x = tf.expand_dims(x, 4)  # B x H/2 x W/2 x 4(out)/D x D
    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')
    b, c, d, h, w = x.shape
-    x = tf.reshape(x, [b, c * d, h, w])
+    x = tf.reshape(x, [b, h, w, c * d])
    return nn.depth_to_space(x, r)


 # TODO: Support different size packnet for scaling up/down
+# TODO: Support different channel format (right now we're supporting NHWC, we should also support NCHW)
 def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None):
    """
    Make the PackNet depth network.
@@ -105,10 +109,12 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None
    # ================ ENCODER =================

    x = unpack_3d(x, 512, 3, features_3d=features_3d)
-    x = keras.layers.Add([x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5])
+    x = keras.layers.Add(
+        [x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5])
    x = packnet_conv2d(x, 512, 3, 1)
    x = unpack_3d(x, 256, 3, features_3d=features_3d)
-    x = keras.layers.Add([x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4])
+    x = keras.layers.Add(
+        [x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4])
    x = packnet_conv2d(x, 256, 3, 1)
    # TODO: This is wrong, look at the paper
    x = packnet_inverse_depth(x, 1)
--- a/unsupervised/warp.py
+++ b/unsupervised/warp.py
@@ -5,6 +5,10 @@ def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics):
    SFM Learner inverse warp step
        ps ~ K.T(t->s).Dt(pt).K^-1.pt

+    Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto
+    the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample 
+    the pixels in the source image (ps) to reconstruct the target image.
+
    :param target_img: Tensor (batch, height, width, 3)
    :param source_img: Tensor, same shape as target_img
    :param depth: Tensor, (batch, height, width, 1)
--- a/util.py
+++ b/util.py
@@ -0,0 +1,21 @@
+import tensorflow as tf
+import tensorflow.keras as keras
+
+
+def crop_and_resize(x, out_shape=(224, 224)):
+    shape = tf.shape(x['depth'])
+    img_shape = tf.shape(x['image'])
+    # Ensure we get a square for when we resize is later.
+    # For horizontal images this is basically just cropping the sides off
+    center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2])
+
+    def layer():
+        return keras.Sequential([
+            keras.layers.experimental.preprocessing.CenterCrop(
+                center_shape, center_shape),
+            keras.layers.experimental.preprocessing.Resizing(
+                out_shape[0], out_shape[1], interpolation='nearest')
+        ])
+
+    # Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow
+    return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1]))