diff --git a/dense_depth_functional.py b/dense_depth_functional.py index 416d372..183de6c 100644 --- a/dense_depth_functional.py +++ b/dense_depth_functional.py @@ -1,4 +1,3 @@ -import tensorflow as tf import tensorflow.keras as keras import tensorflow_datasets as tfds @@ -25,7 +24,8 @@ def dense_depth(size, weights=None, shape=(224, 224, 3)): densenet_output_channels = densenet.layers[-1].output.shape[-1] # Reduce the feature set (pointwise) - decoder = keras.layers.Conv2D(filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output) + decoder = keras.layers.Conv2D( + filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output) # The actual decoder decoder = dense_upsample_block( @@ -66,19 +66,19 @@ def dense_nnconv5(size, weights=None, shape=(224, 224, 3), half_features=True): # Reduce the feature set (pointwise) decoder = keras.layers.Conv2D(filters=int(densenet_output_shape[-1]), kernel_size=1, padding='same', - input_shape=densenet_output_shape, name='conv2')(densenet.output) + input_shape=densenet_output_shape, name='conv2')(densenet.output) # TODO: More intermediate layers here? # Fast Depth Decoder decoder = fd.nnconv5(decoder, densenet.get_layer('pool3_pool').output_shape[3], 1, - skip_connection=densenet.get_layer('pool3_pool').output) + skip_connection=densenet.get_layer('pool3_pool').output) decoder = fd.nnconv5(decoder, densenet.get_layer('pool2_pool').output_shape[3], 2, - skip_connection=densenet.get_layer('pool2_pool').output) + skip_connection=densenet.get_layer('pool2_pool').output) decoder = fd.nnconv5(decoder, densenet.get_layer('pool1').output_shape[3], 3, - skip_connection=densenet.get_layer('pool1').output) + skip_connection=densenet.get_layer('pool1').output) decoder = fd.nnconv5(decoder, densenet.get_layer('conv1/relu').output_shape[3], 4, - skip_connection=densenet.get_layer('conv1/relu').output) + skip_connection=densenet.get_layer('conv1/relu').output) # Final Pointwise for depth extraction decoder = keras.layers.Conv2D(1, 1, padding='same')(decoder) @@ -87,30 +87,6 @@ def dense_nnconv5(size, weights=None, shape=(224, 224, 3), half_features=True): return keras.Model(inputs=input, outputs=decoder, name="fast_dense_depth") -def load_nyu(download_dir='../nyu'): - """ - Load the nyu_v2 dataset train split. Will be downloaded to ../nyu - :return: nyu_v2 dataset builder - """ - builder = tfds.builder('nyu_depth_v2') - builder.download_and_prepare(download_dir=download_dir) - return builder \ - .as_dataset(split='train', shuffle_files=True) \ - .shuffle(buffer_size=1024) \ - .batch(8) \ - .map(lambda x: fd.crop_and_resize(x)) - - -def load_nyu_evaluate(download_dir='../nyu'): - """ - Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu - :return: nyu_v2 dataset builder - """ - builder = tfds.builder('nyu_depth_v2') - builder.download_and_prepare(download_dir=download_dir) - return builder.as_dataset(split='validation').batch(1).map(lambda x: fd.crop_and_resize(x)) - - if __name__ == '__main__': model = dense_depth(169, 'imagenet') model.summary() diff --git a/fast_depth_functional.py b/fast_depth_functional.py index 376eee8..65ca9bb 100644 --- a/fast_depth_functional.py +++ b/fast_depth_functional.py @@ -1,6 +1,8 @@ import tensorflow as tf import tensorflow.keras as keras import tensorflow_datasets as tfds +from load import load_nyu, load_nyu_evaluate +from util import crop_and_resize # Needed for the kitti dataset, don't delete """ @@ -160,56 +162,6 @@ def load_model(file): 'delta2_metric': delta2_metric, 'delta3_metric': delta3_metric}) - -def crop_and_resize(x): - shape = tf.shape(x['depth']) - img_shape = tf.shape(x['image']) - # Ensure we get a square for when we resize is later. - # For horizontal images this is basically just cropping the sides off - center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2]) - - def layer(): - return keras.Sequential([ - keras.layers.experimental.preprocessing.CenterCrop( - center_shape, center_shape), - keras.layers.experimental.preprocessing.Resizing( - 224, 224, interpolation='nearest') - ]) - - # Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow - return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1])) - - -def load_nyu(download_dir='../nyu'): - """ - Load the nyu_v2 dataset train split. Will be downloaded to ../nyu - :return: nyu_v2 dataset builder - """ - builder = tfds.builder('nyu_depth_v2') - builder.download_and_prepare(download_dir=download_dir) - return builder \ - .as_dataset(split='train', shuffle_files=True) \ - .shuffle(buffer_size=1024) \ - .batch(8) \ - .map(lambda x: crop_and_resize(x)) - - -def load_nyu_evaluate(download_dir='../nyu'): - """ - Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu - :return: nyu_v2 dataset builder - """ - builder = tfds.builder('nyu_depth_v2') - builder.download_and_prepare(download_dir=download_dir) - return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x)) - - -def load_kitti(download_dir='../kitti'): - ds = tfds.builder('kitti_depth') - ds.download_and_prepare(download_dir=download_dir) - return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x)) - - if __name__ == '__main__': model = mobilenet_nnconv5() model.summary() diff --git a/load.py b/load.py new file mode 100644 index 0000000..0cce7cf --- /dev/null +++ b/load.py @@ -0,0 +1,33 @@ +from util import crop_and_resize +import tensorflow_datasets as tfds +import tensorflow.keras as keras + + +def load_nyu(download_dir='../nyu', out_shape=(224, 224)): + """ + Load the nyu_v2 dataset train split. Will be downloaded to ../nyu + :return: nyu_v2 dataset builder + """ + builder = tfds.builder('nyu_depth_v2') + builder.download_and_prepare(download_dir=download_dir) + return builder \ + .as_dataset(split='train', shuffle_files=True) \ + .shuffle(buffer_size=1024) \ + .batch(8) \ + .map(lambda x: crop_and_resize(x, out_shape)) + + +def load_nyu_evaluate(download_dir='../nyu', out_shape=(224, 224)): + """ + Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu + :return: nyu_v2 dataset builder + """ + builder = tfds.builder('nyu_depth_v2') + builder.download_and_prepare(download_dir=download_dir) + return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x, out_shape)) + + +def load_kitti(download_dir='../kitti', out_shape=(224, 224)): + ds = tfds.builder('kitti_depth') + ds.download_and_prepare(download_dir=download_dir) + return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x, out_shape)) diff --git a/packnet_functional.py b/packnet_functional.py index c2d9d31..5bcb578 100644 --- a/packnet_functional.py +++ b/packnet_functional.py @@ -21,7 +21,8 @@ def residual_layer(inputs, out_channels, stride, dropout=None): """ x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs) x = layers.Conv2D(out_channels, 3, padding='same')(x) - shortcut = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs) + shortcut = layers.Conv2D( + out_channels, 3, padding='same', stride=stride)(inputs) if dropout: shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut) x = keras.layers.Concatenate()([x, shortcut]) @@ -38,7 +39,8 @@ def residual_block(inputs, out_channels, residual_layers, stride, dropout=None): def packnet_conv2d(inputs, out_channels, kernel_size, stride): - x = keras.layers.Conv2D(out_channels, kernel_size, stride, padding='same') + x = keras.layers.Conv2D(out_channels, kernel_size, + stride, padding='same')(inputs) x = group_norm.GroupNormalization(16)(x) return keras.layers.ELU()(x) @@ -59,23 +61,25 @@ def pack_3d(inputs, kernel_size, r=2, features_3d=8): """ # Data format for single image in nyu is HWC (space_to_depth uses NHWC as default) x = nn.space_to_depth(inputs, r) - x = tf.expand_dims(x, 1) - x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same') - b, c, d, h, w = x.shape - x = tf.reshape(x, (b, c * d, h, w)) - return packnet_conv2d(x, inputs.shape[1], kernel_size, 1) + x = tf.expand_dims(x, 4) + x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x) + b, h, w, c, d = x.shape + x = tf.reshape(x, (b, h, w, c * d)) + return packnet_conv2d(x, inputs.shape[3], kernel_size, 1) def unpack_3d(inputs, out_channels, kernel_size, r=3, features_3d=8): - x = packnet_conv2d(inputs, out_channels * (r ** 2) // features_3d, kernel_size, 1) - x = tf.expand_dims(x, 1) # B x D x 4(out)/D x H/2 x W/2 + x = packnet_conv2d(inputs, out_channels * (r ** 2) // + features_3d, kernel_size, 1) + x = tf.expand_dims(x, 4) # B x H/2 x W/2 x 4(out)/D x D x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same') b, c, d, h, w = x.shape - x = tf.reshape(x, [b, c * d, h, w]) + x = tf.reshape(x, [b, h, w, c * d]) return nn.depth_to_space(x, r) # TODO: Support different size packnet for scaling up/down +# TODO: Support different channel format (right now we're supporting NHWC, we should also support NCHW) def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None): """ Make the PackNet depth network. @@ -105,10 +109,12 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None # ================ ENCODER ================= x = unpack_3d(x, 512, 3, features_3d=features_3d) - x = keras.layers.Add([x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5]) + x = keras.layers.Add( + [x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5]) x = packnet_conv2d(x, 512, 3, 1) x = unpack_3d(x, 256, 3, features_3d=features_3d) - x = keras.layers.Add([x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4]) + x = keras.layers.Add( + [x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4]) x = packnet_conv2d(x, 256, 3, 1) # TODO: This is wrong, look at the paper x = packnet_inverse_depth(x, 1) diff --git a/unsupervised/warp.py b/unsupervised/warp.py index 22eb535..7cc19b3 100644 --- a/unsupervised/warp.py +++ b/unsupervised/warp.py @@ -5,6 +5,10 @@ def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics): SFM Learner inverse warp step ps ~ K.T(t->s).Dt(pt).K^-1.pt + Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto + the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample + the pixels in the source image (ps) to reconstruct the target image. + :param target_img: Tensor (batch, height, width, 3) :param source_img: Tensor, same shape as target_img :param depth: Tensor, (batch, height, width, 1) diff --git a/util.py b/util.py new file mode 100644 index 0000000..dbd1c8c --- /dev/null +++ b/util.py @@ -0,0 +1,21 @@ +import tensorflow as tf +import tensorflow.keras as keras + + +def crop_and_resize(x, out_shape=(224, 224)): + shape = tf.shape(x['depth']) + img_shape = tf.shape(x['image']) + # Ensure we get a square for when we resize is later. + # For horizontal images this is basically just cropping the sides off + center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2]) + + def layer(): + return keras.Sequential([ + keras.layers.experimental.preprocessing.CenterCrop( + center_shape, center_shape), + keras.layers.experimental.preprocessing.Resizing( + out_shape[0], out_shape[1], interpolation='nearest') + ]) + + # Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow + return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1]))