diff --git a/fast_depth_functional.py b/fast_depth_functional.py index 65ca9bb..672f224 100644 --- a/fast_depth_functional.py +++ b/fast_depth_functional.py @@ -1,9 +1,8 @@ -import tensorflow as tf import tensorflow.keras as keras -import tensorflow_datasets as tfds -from load import load_nyu, load_nyu_evaluate + +from load import load_nyu_evaluate +from metric import * from util import crop_and_resize -# Needed for the kitti dataset, don't delete """ Unofficial tensorflow keras implementation of FastDepth (mobilenet_nnconv5). @@ -76,59 +75,6 @@ def mobilenet_nnconv5(weights=None, shape=(224, 224, 3)): return keras.Model(inputs=input, outputs=x, name="fast_depth") -def delta1_metric(y_true, y_pred): - maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred) - return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25), tf.float32), axes=None)[0] - - -def delta2_metric(y_true, y_pred): - maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred) - return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 2), tf.float32), axes=None)[0] - - -def delta3_metric(y_true, y_pred): - maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred) - return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 3), tf.float32), axes=None)[0] - - -def compile(model, optimiser=keras.optimizers.SGD(), loss=keras.losses.MeanSquaredError(), custom_metrics=None): - """ - Compile FastDepth model with relevant metrics - :param model: Model to compile - :param optimiser: Custom optimiser to use - :param loss: Loss function to use - :param include_metrics: Whether to include metrics (RMSE, MSE, a1,2,3) - """ - model.compile(optimizer=optimiser, - loss=loss, - metrics=[keras.metrics.RootMeanSquaredError(), - keras.metrics.MeanSquaredError(), - delta1_metric, - delta2_metric, - delta3_metric] if custom_metrics is None else custom_metrics) - - -def train(existing_model=None, pretrained_weights='imagenet', epochs=4, save_file=None, dataset=None): - """ - Compile, train and save (if a save file is specified) a Fast Depth model. - :param existing_model: Existing FastDepth model to train. None will create - :param pretrained_weights: Weights to use if existing_model is not specified. See keras.applications.MobileNet - weights parameter for options here. - :param epochs: Number of epochs to run for - :param save_file: File/directory to save to after training. By default the model won't be saved - :param dataset: Train dataset to use. By default will DOWNLOAD and use tensorflow nyu_v2 dataset - """ - if not existing_model: - existing_model = mobilenet_nnconv5(pretrained_weights) - compile(existing_model) - if not dataset: - dataset = load_nyu() - existing_model.fit(dataset, epochs=epochs) - if save_file: - existing_model.save(save_file) - return existing_model - - def evaluate(compiled_model, dataset=None): """ Evaluate the model using rmse, delta1/2/3 metrics @@ -152,16 +98,6 @@ def forward(model, image): return model(crop_and_resize(image)) -def load_model(file): - """ - Load previously trained FastDepth model from disk. Will include relevant metrics (custom objects) - :param file: File/directory to load the model from - :return: - """ - return keras.models.load_model(file, custom_objects={'delta1_metric': delta1_metric, - 'delta2_metric': delta2_metric, - 'delta3_metric': delta3_metric}) - if __name__ == '__main__': model = mobilenet_nnconv5() model.summary() diff --git a/load.py b/load.py index 0cce7cf..4860143 100644 --- a/load.py +++ b/load.py @@ -1,6 +1,9 @@ -from util import crop_and_resize -import tensorflow_datasets as tfds import tensorflow.keras as keras +import tensorflow_datasets as tfds + +from losses import dense_depth_loss_function +from metric import * +from util import crop_and_resize def load_nyu(download_dir='../nyu', out_shape=(224, 224)): @@ -31,3 +34,15 @@ def load_kitti(download_dir='../kitti', out_shape=(224, 224)): ds = tfds.builder('kitti_depth') ds.download_and_prepare(download_dir=download_dir) return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x, out_shape)) + + +def load_model(file): + """ + Load previously trained FastDepth model from disk. Will include relevant metrics (custom objects) + :param file: File/directory to load the model from + :return: + """ + return keras.models.load_model(file, custom_objects={'delta1_metric': delta1_metric, + 'delta2_metric': delta2, + 'delta3_metric': delta3, + 'dense_depth_loss_function': dense_depth_loss_function}) diff --git a/losses.py b/losses.py index 0e47ede..6051246 100644 --- a/losses.py +++ b/losses.py @@ -6,15 +6,15 @@ def dense_depth_loss_function(y, y_pred): Implementation of the loss from the dense depth paper https://arxiv.org/pdf/1812.11941.pdf """ # Point-wise L1 loss - l_depth = tf.reduce_mean(tf.math.abs(y_pred - y), axis=-1) + l1_depth = tf.reduce_mean(tf.math.abs(y_pred - y), axis=-1) # L1 loss over image gradients dy, dx = tf.image.image_gradients(y) dy_pred, dx_pred = tf.image.image_gradients(y_pred) - l_grad = tf.reduce_mean(tf.math.abs(dy_pred - dy) + - tf.math.abs(dx_pred - dx), axis=-1) + gradient = tf.reduce_mean(tf.math.abs(dy_pred - dy) + + tf.math.abs(dx_pred - dx), axis=-1) # Structural Similarity (SSIM) - l_ssim = (1 - tf.image.ssim(y, y_pred, 500)) / 2 + ssim = (1 - tf.image.ssim(y, y_pred, 500)) / 2 - return 0.1 * tf.reduce_mean(l_depth) + tf.reduce_mean(l_grad) + l_ssim + return 0.1 * tf.reduce_mean(l1_depth) + tf.reduce_mean(gradient) + ssim diff --git a/metric.py b/metric.py new file mode 100644 index 0000000..3a80ac1 --- /dev/null +++ b/metric.py @@ -0,0 +1,16 @@ +import tensorflow as tf + + +def delta1_metric(y_true, y_pred): + max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred) + return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25), tf.float32)) + + +def delta2(y_true, y_pred): + max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred) + return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25 ** 2), tf.float32)) + + +def delta3(y_true, y_pred): + max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred) + return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25 ** 3), tf.float32)) diff --git a/packnet_functional.py b/packnet_functional.py index ae32229..c404c07 100644 --- a/packnet_functional.py +++ b/packnet_functional.py @@ -19,10 +19,10 @@ def residual_layer(inputs, out_channels, stride, dropout=None): :param dropout: :return: """ - x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs) + x = layers.Conv2D(out_channels, 3, padding='same', strides=stride)(inputs) x = layers.Conv2D(out_channels, 3, padding='same')(x) shortcut = layers.Conv2D( - out_channels, 3, padding='same', stride=stride)(inputs) + out_channels, 3, padding='same', strides=stride)(inputs) if dropout: shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut) x = keras.layers.Concatenate()([x, shortcut]) @@ -46,7 +46,7 @@ def packnet_conv2d(inputs, out_channels, kernel_size, stride): def packnet_inverse_depth(inputs, out_channels=1, min_depth=0.5): - x = packnet_conv2d(inputs, out_channels, kernel_size=3, stride=1) + x = layers.Conv2D(out_channels, 3, padding='same')(inputs) return keras.activations.sigmoid(x) / min_depth @@ -64,7 +64,7 @@ def pack_3d(inputs, kernel_size, r=2, features_3d=8): x = tf.expand_dims(x, 4) x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x) b, h, w, c, d = x.shape - x = tf.reshape(x, (b, h, w, c * d)) + x = keras.layers.Reshape((h, w, c * d))(x) return packnet_conv2d(x, inputs.shape[3], kernel_size, 1) @@ -74,7 +74,7 @@ def unpack_3d(inputs, out_channels, kernel_size, r=2, features_3d=8): x = tf.expand_dims(x, 4) # B x H/2 x W/2 x 4(out)/D x D x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x) b, h, w, c, d = x.shape - x = tf.reshape(x, [b, h, w, c * d]) + x = keras.layers.Reshape([h, w, c * d])(x) return nn.depth_to_space(x, r) @@ -92,7 +92,7 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None input = keras.layers.Input(shape=shape) x = packnet_conv2d(input, 32, 5, 1) skip_1 = x - x = packnet_conv2d(x, 32, 7, 1) + x = packnet_conv2d(x, 64, 7, 1) x = pack_3d(x, 5, features_3d=features_3d) skip_2 = x x = residual_block(x, 64, 2, 1, dropout) @@ -108,24 +108,43 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None x = pack_3d(x, 3, features_3d=features_3d) # ================ ENCODER ================= + # ================ DECODER ================= + # layer 7 x = unpack_3d(x, 512, 3, features_3d=features_3d) - x = keras.layers.Add( - [x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5]) + x = keras.layers.Add()( + [x, skip_5]) if skip_add else keras.layers.Concatenate()([x, skip_5]) x = packnet_conv2d(x, 512, 3, 1) + # layer 8 x = unpack_3d(x, 256, 3, features_3d=features_3d) - x = keras.layers.Add( - [x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4]) + x = keras.layers.Add()( + [x, skip_4]) if skip_add else keras.layers.Concatenate()([x, skip_4]) x = packnet_conv2d(x, 256, 3, 1) - # TODO: This is wrong, look at the paper + layer_8 = x + # layer 9 x = packnet_inverse_depth(x, 1) - x = keras.layers.UpSampling2D() - - # TODO: Skip connection - if skip_add: - x = keras.layers.Add([x, ]) - else: - x = keras.layers.Concatenate([x, ]) - - x = packnet_conv2d(x, 32, 3, 1) + # layer 10 + u_layer_8 = unpack_3d(layer_8, 128, 3, features_3d=features_3d) + x = keras.layers.UpSampling2D()(x) + x = keras.layers.Add()([u_layer_8, skip_3, x]) if skip_add else keras.layers.Concatenate()([u_layer_8, skip_3, x]) + x = packnet_conv2d(x, 128, 3, 1) + layer_10 = x + # layer 11 + x = packnet_inverse_depth(x, 1) + # layer 12 + u_layer_10 = unpack_3d(layer_10, 64, 3, features_3d=features_3d) + x = keras.layers.UpSampling2D()(x) + x = keras.layers.Add()([u_layer_10, skip_2, x]) if skip_add else keras.layers.Concatenate()([u_layer_10, skip_2, x]) + x = packnet_conv2d(x, 64, 3, 1) + layer_12 = x + # layer 13 x = packnet_inverse_depth(x) + # layer 14 + u_layer_12 = unpack_3d(layer_12, 32, 3, features_3d=features_3d) + x = keras.layers.UpSampling2D()(x) + x = keras.layers.Add()([u_layer_12, skip_1, x]) if skip_add else keras.layers.Concatenate()([u_layer_12, skip_1, x]) + x = packnet_conv2d(x, 32, 3, 1) + # layer 15 + x = packnet_inverse_depth(x) + # ================ DECODER ================= + return keras.Model(inputs=input, outputs=x, name="PackNet") diff --git a/packnet_tests.py b/packnet_tests.py index 22b3feb..eb80577 100644 --- a/packnet_tests.py +++ b/packnet_tests.py @@ -28,6 +28,10 @@ class PacknetTests(unittest.TestCase): # TODO: Anything else we can test here for validity? self.assertEqual(y.shape, out_shape) + def test_packnet(self): + packnet = p.make_packnet() + self.assertIsNotNone(packnet) + if __name__ == '__main__': unittest.main() diff --git a/train.py b/train.py new file mode 100644 index 0000000..474d495 --- /dev/null +++ b/train.py @@ -0,0 +1,49 @@ +""" +Collection of functions to train the various models, and use different losses +""" +import tensorflow.keras as keras + +from load import load_nyu +from metric import * + + +def compile(model, optimiser=keras.optimizers.SGD(), loss=keras.losses.MeanSquaredError(), custom_metrics=None): + """ + Compile FastDepth model with relevant metrics + :param model: Model to compile + :param optimiser: Custom optimiser to use + :param loss: Loss function to use + :param include_metrics: Whether to include metrics (RMSE, MSE, a1,2,3) + """ + model.compile(optimizer=optimiser, + loss=loss, + metrics=[keras.metrics.RootMeanSquaredError(), + keras.metrics.MeanSquaredError(), + delta1_metric, + delta2, + delta3, + keras.metrics.MeanAbsolutePercentageError(), + keras.metrics.MeanAbsoluteError()] if custom_metrics is None else custom_metrics) + + +def train(existing_model=None, pretrained_weights='imagenet', epochs=4, save_file=None, dataset=None, + checkpoint='ckpt'): + """ + Compile, train and save (if a save file is specified) a Fast Depth model. + :param existing_model: Existing FastDepth model to train. None will create + :param pretrained_weights: Weights to use if existing_model is not specified. See keras.applications.MobileNet + weights parameter for options here. + :param epochs: Number of epochs to run for + :param save_file: File/directory to save to after training. By default the model won't be saved + :param dataset: Train dataset to use. By default will DOWNLOAD and use tensorflow nyu_v2 dataset + :param checkpoint: Checkpoint to save to + """ + callbacks = [] + if checkpoint: + callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint, save_weights_only=True)) + if not dataset: + dataset = load_nyu() + existing_model.fit(dataset, epochs=epochs, callbacks=callbacks) + if save_file: + existing_model.save(save_file) + return existing_model diff --git a/util.py b/util.py index dbd1c8c..e4bc29a 100644 --- a/util.py +++ b/util.py @@ -5,9 +5,9 @@ import tensorflow.keras as keras def crop_and_resize(x, out_shape=(224, 224)): shape = tf.shape(x['depth']) img_shape = tf.shape(x['image']) - # Ensure we get a square for when we resize is later. + # Ensure we get a square for when we resize it later. # For horizontal images this is basically just cropping the sides off - center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2]) + center_shape = tf.minimum(shape[1], tf.minimum(shape[2], tf.minimum(img_shape[1], img_shape[2]))) def layer(): return keras.Sequential([