Add compiling packnet model, refactor modules to not duplicate loaders and trainers

2021-07-23 22:41:46 +09:30
parent 66cbc7faf6
commit 3254eef4bf
8 changed files with 135 additions and 96 deletions
--- a/fast_depth_functional.py
+++ b/fast_depth_functional.py
@@ -1,9 +1,8 @@
-import tensorflow as tf
 import tensorflow.keras as keras
-import tensorflow_datasets as tfds
-from load import load_nyu, load_nyu_evaluate
+
+from load import load_nyu_evaluate
+from metric import *
 from util import crop_and_resize
-# Needed for the kitti dataset, don't delete

 """
 Unofficial tensorflow keras implementation of FastDepth (mobilenet_nnconv5).
@@ -76,59 +75,6 @@ def mobilenet_nnconv5(weights=None, shape=(224, 224, 3)):
    return keras.Model(inputs=input, outputs=x, name="fast_depth")


-def delta1_metric(y_true, y_pred):
-    maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred)
-    return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25), tf.float32), axes=None)[0]
-
-
-def delta2_metric(y_true, y_pred):
-    maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred)
-    return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 2), tf.float32), axes=None)[0]
-
-
-def delta3_metric(y_true, y_pred):
-    maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred)
-    return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 3), tf.float32), axes=None)[0]
-
-
-def compile(model, optimiser=keras.optimizers.SGD(), loss=keras.losses.MeanSquaredError(), custom_metrics=None):
-    """
-    Compile FastDepth model with relevant metrics
-    :param model: Model to compile
-    :param optimiser: Custom optimiser to use
-    :param loss: Loss function to use
-    :param include_metrics: Whether to include metrics (RMSE, MSE, a1,2,3)
-    """
-    model.compile(optimizer=optimiser,
-                  loss=loss,
-                  metrics=[keras.metrics.RootMeanSquaredError(),
-                           keras.metrics.MeanSquaredError(),
-                           delta1_metric,
-                           delta2_metric,
-                           delta3_metric] if custom_metrics is None else custom_metrics)
-
-
-def train(existing_model=None, pretrained_weights='imagenet', epochs=4, save_file=None, dataset=None):
-    """
-    Compile, train and save (if a save file is specified) a Fast Depth model.
-    :param existing_model: Existing FastDepth model to train. None will create
-    :param pretrained_weights: Weights to use if existing_model is not specified. See keras.applications.MobileNet
-        weights parameter for options here.
-    :param epochs: Number of epochs to run for
-    :param save_file: File/directory to save to after training. By default the model won't be saved
-    :param dataset: Train dataset to use. By default will DOWNLOAD and use tensorflow nyu_v2 dataset
-    """
-    if not existing_model:
-        existing_model = mobilenet_nnconv5(pretrained_weights)
-        compile(existing_model)
-    if not dataset:
-        dataset = load_nyu()
-    existing_model.fit(dataset, epochs=epochs)
-    if save_file:
-        existing_model.save(save_file)
-    return existing_model
-
-
 def evaluate(compiled_model, dataset=None):
    """
    Evaluate the model using rmse, delta1/2/3 metrics
@@ -152,16 +98,6 @@ def forward(model, image):
    return model(crop_and_resize(image))


-def load_model(file):
-    """
-    Load previously trained FastDepth model from disk. Will include relevant metrics (custom objects)
-    :param file: File/directory to load the model from
-    :return:
-    """
-    return keras.models.load_model(file, custom_objects={'delta1_metric': delta1_metric,
-                                                         'delta2_metric': delta2_metric,
-                                                         'delta3_metric': delta3_metric})
-
 if __name__ == '__main__':
    model = mobilenet_nnconv5()
    model.summary()
--- a/load.py
+++ b/load.py
@@ -1,6 +1,9 @@
-from util import crop_and_resize
-import tensorflow_datasets as tfds
 import tensorflow.keras as keras
+import tensorflow_datasets as tfds
+
+from losses import dense_depth_loss_function
+from metric import *
+from util import crop_and_resize


 def load_nyu(download_dir='../nyu', out_shape=(224, 224)):
@@ -31,3 +34,15 @@ def load_kitti(download_dir='../kitti', out_shape=(224, 224)):
    ds = tfds.builder('kitti_depth')
    ds.download_and_prepare(download_dir=download_dir)
    return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x, out_shape))
+
+
+def load_model(file):
+    """
+    Load previously trained FastDepth model from disk. Will include relevant metrics (custom objects)
+    :param file: File/directory to load the model from
+    :return:
+    """
+    return keras.models.load_model(file, custom_objects={'delta1_metric': delta1_metric,
+                                                         'delta2_metric': delta2,
+                                                         'delta3_metric': delta3,
+                                                         'dense_depth_loss_function': dense_depth_loss_function})
--- a/losses.py
+++ b/losses.py
@@ -6,15 +6,15 @@ def dense_depth_loss_function(y, y_pred):
    Implementation of the loss from the dense depth paper https://arxiv.org/pdf/1812.11941.pdf
    """
    # Point-wise L1 loss
-    l_depth = tf.reduce_mean(tf.math.abs(y_pred - y), axis=-1)
+    l1_depth = tf.reduce_mean(tf.math.abs(y_pred - y), axis=-1)

    # L1 loss over image gradients
    dy, dx = tf.image.image_gradients(y)
    dy_pred, dx_pred = tf.image.image_gradients(y_pred)
-    l_grad = tf.reduce_mean(tf.math.abs(dy_pred - dy) +
+    gradient = tf.reduce_mean(tf.math.abs(dy_pred - dy) +
                              tf.math.abs(dx_pred - dx), axis=-1)

    #  Structural Similarity (SSIM)
-    l_ssim = (1 - tf.image.ssim(y, y_pred, 500)) / 2
+    ssim = (1 - tf.image.ssim(y, y_pred, 500)) / 2

-    return 0.1 * tf.reduce_mean(l_depth) + tf.reduce_mean(l_grad) + l_ssim
+    return 0.1 * tf.reduce_mean(l1_depth) + tf.reduce_mean(gradient) + ssim
--- a/metric.py
+++ b/metric.py
@@ -0,0 +1,16 @@
+import tensorflow as tf
+
+
+def delta1_metric(y_true, y_pred):
+    max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred)
+    return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25), tf.float32))
+
+
+def delta2(y_true, y_pred):
+    max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred)
+    return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25 ** 2), tf.float32))
+
+
+def delta3(y_true, y_pred):
+    max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred)
+    return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25 ** 3), tf.float32))
--- a/packnet_functional.py
+++ b/packnet_functional.py
@@ -19,10 +19,10 @@ def residual_layer(inputs, out_channels, stride, dropout=None):
    :param dropout:
    :return:
    """
-    x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
+    x = layers.Conv2D(out_channels, 3, padding='same', strides=stride)(inputs)
    x = layers.Conv2D(out_channels, 3, padding='same')(x)
    shortcut = layers.Conv2D(
-        out_channels, 3, padding='same', stride=stride)(inputs)
+        out_channels, 3, padding='same', strides=stride)(inputs)
    if dropout:
        shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut)
    x = keras.layers.Concatenate()([x, shortcut])
@@ -46,7 +46,7 @@ def packnet_conv2d(inputs, out_channels, kernel_size, stride):


 def packnet_inverse_depth(inputs, out_channels=1, min_depth=0.5):
-    x = packnet_conv2d(inputs, out_channels, kernel_size=3, stride=1)
+    x = layers.Conv2D(out_channels, 3, padding='same')(inputs)
    return keras.activations.sigmoid(x) / min_depth


@@ -64,7 +64,7 @@ def pack_3d(inputs, kernel_size, r=2, features_3d=8):
    x = tf.expand_dims(x, 4)
    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x)
    b, h, w, c, d = x.shape
-    x = tf.reshape(x, (b, h, w, c * d))
+    x = keras.layers.Reshape((h, w, c * d))(x)
    return packnet_conv2d(x, inputs.shape[3], kernel_size, 1)


@@ -74,7 +74,7 @@ def unpack_3d(inputs, out_channels, kernel_size, r=2, features_3d=8):
    x = tf.expand_dims(x, 4)  # B x H/2 x W/2 x 4(out)/D x D
    x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x)
    b, h, w, c, d = x.shape
-    x = tf.reshape(x, [b, h, w, c * d])
+    x = keras.layers.Reshape([h, w, c * d])(x)
    return nn.depth_to_space(x, r)


@@ -92,7 +92,7 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None
    input = keras.layers.Input(shape=shape)
    x = packnet_conv2d(input, 32, 5, 1)
    skip_1 = x
-    x = packnet_conv2d(x, 32, 7, 1)
+    x = packnet_conv2d(x, 64, 7, 1)
    x = pack_3d(x, 5, features_3d=features_3d)
    skip_2 = x
    x = residual_block(x, 64, 2, 1, dropout)
@@ -108,24 +108,43 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None
    x = pack_3d(x, 3, features_3d=features_3d)
    # ================ ENCODER =================

+    # ================ DECODER =================
+    # layer 7
    x = unpack_3d(x, 512, 3, features_3d=features_3d)
-    x = keras.layers.Add(
-        [x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5])
+    x = keras.layers.Add()(
+        [x, skip_5]) if skip_add else keras.layers.Concatenate()([x, skip_5])
    x = packnet_conv2d(x, 512, 3, 1)
+    # layer 8
    x = unpack_3d(x, 256, 3, features_3d=features_3d)
-    x = keras.layers.Add(
-        [x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4])
+    x = keras.layers.Add()(
+        [x, skip_4]) if skip_add else keras.layers.Concatenate()([x, skip_4])
    x = packnet_conv2d(x, 256, 3, 1)
-    # TODO: This is wrong, look at the paper
+    layer_8 = x
+    # layer 9
    x = packnet_inverse_depth(x, 1)
-    x = keras.layers.UpSampling2D()
-
-    # TODO: Skip connection
-    if skip_add:
-        x = keras.layers.Add([x, ])
-    else:
-        x = keras.layers.Concatenate([x, ])
-
-    x = packnet_conv2d(x, 32, 3, 1)
+    # layer 10
+    u_layer_8 = unpack_3d(layer_8, 128, 3, features_3d=features_3d)
+    x = keras.layers.UpSampling2D()(x)
+    x = keras.layers.Add()([u_layer_8, skip_3, x]) if skip_add else keras.layers.Concatenate()([u_layer_8, skip_3, x])
+    x = packnet_conv2d(x, 128, 3, 1)
+    layer_10 = x
+    # layer 11
+    x = packnet_inverse_depth(x, 1)
+    # layer 12
+    u_layer_10 = unpack_3d(layer_10, 64, 3, features_3d=features_3d)
+    x = keras.layers.UpSampling2D()(x)
+    x = keras.layers.Add()([u_layer_10, skip_2, x]) if skip_add else keras.layers.Concatenate()([u_layer_10, skip_2, x])
+    x = packnet_conv2d(x, 64, 3, 1)
+    layer_12 = x
+    # layer 13
    x = packnet_inverse_depth(x)
+    # layer 14
+    u_layer_12 = unpack_3d(layer_12, 32, 3, features_3d=features_3d)
+    x = keras.layers.UpSampling2D()(x)
+    x = keras.layers.Add()([u_layer_12, skip_1, x]) if skip_add else keras.layers.Concatenate()([u_layer_12, skip_1, x])
+    x = packnet_conv2d(x, 32, 3, 1)
+    # layer 15
+    x = packnet_inverse_depth(x)
+    # ================ DECODER =================
+
    return keras.Model(inputs=input, outputs=x, name="PackNet")
--- a/packnet_tests.py
+++ b/packnet_tests.py
@@ -28,6 +28,10 @@ class PacknetTests(unittest.TestCase):
        # TODO: Anything else we can test here for validity?
        self.assertEqual(y.shape, out_shape)

+    def test_packnet(self):
+        packnet = p.make_packnet()
+        self.assertIsNotNone(packnet)
+

 if __name__ == '__main__':
    unittest.main()
--- a/train.py
+++ b/train.py
@@ -0,0 +1,49 @@
+"""
+Collection of functions to train the various models, and use different losses
+"""
+import tensorflow.keras as keras
+
+from load import load_nyu
+from metric import *
+
+
+def compile(model, optimiser=keras.optimizers.SGD(), loss=keras.losses.MeanSquaredError(), custom_metrics=None):
+    """
+    Compile FastDepth model with relevant metrics
+    :param model: Model to compile
+    :param optimiser: Custom optimiser to use
+    :param loss: Loss function to use
+    :param include_metrics: Whether to include metrics (RMSE, MSE, a1,2,3)
+    """
+    model.compile(optimizer=optimiser,
+                  loss=loss,
+                  metrics=[keras.metrics.RootMeanSquaredError(),
+                           keras.metrics.MeanSquaredError(),
+                           delta1_metric,
+                           delta2,
+                           delta3,
+                           keras.metrics.MeanAbsolutePercentageError(),
+                           keras.metrics.MeanAbsoluteError()] if custom_metrics is None else custom_metrics)
+
+
+def train(existing_model=None, pretrained_weights='imagenet', epochs=4, save_file=None, dataset=None,
+          checkpoint='ckpt'):
+    """
+    Compile, train and save (if a save file is specified) a Fast Depth model.
+    :param existing_model: Existing FastDepth model to train. None will create
+    :param pretrained_weights: Weights to use if existing_model is not specified. See keras.applications.MobileNet
+        weights parameter for options here.
+    :param epochs: Number of epochs to run for
+    :param save_file: File/directory to save to after training. By default the model won't be saved
+    :param dataset: Train dataset to use. By default will DOWNLOAD and use tensorflow nyu_v2 dataset
+    :param checkpoint: Checkpoint to save to
+    """
+    callbacks = []
+    if checkpoint:
+        callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint, save_weights_only=True))
+    if not dataset:
+        dataset = load_nyu()
+    existing_model.fit(dataset, epochs=epochs, callbacks=callbacks)
+    if save_file:
+        existing_model.save(save_file)
+    return existing_model
--- a/util.py
+++ b/util.py
@@ -5,9 +5,9 @@ import tensorflow.keras as keras
 def crop_and_resize(x, out_shape=(224, 224)):
    shape = tf.shape(x['depth'])
    img_shape = tf.shape(x['image'])
-    # Ensure we get a square for when we resize is later.
+    # Ensure we get a square for when we resize it later.
    # For horizontal images this is basically just cropping the sides off
-    center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2])
+    center_shape = tf.minimum(shape[1], tf.minimum(shape[2], tf.minimum(img_shape[1], img_shape[2])))

    def layer():
        return keras.Sequential([