Refactor load/util, start fixing packnet to support NHWC format
This commit is contained in:
@@ -1,4 +1,3 @@
|
|||||||
import tensorflow as tf
|
|
||||||
import tensorflow.keras as keras
|
import tensorflow.keras as keras
|
||||||
import tensorflow_datasets as tfds
|
import tensorflow_datasets as tfds
|
||||||
|
|
||||||
@@ -25,7 +24,8 @@ def dense_depth(size, weights=None, shape=(224, 224, 3)):
|
|||||||
densenet_output_channels = densenet.layers[-1].output.shape[-1]
|
densenet_output_channels = densenet.layers[-1].output.shape[-1]
|
||||||
|
|
||||||
# Reduce the feature set (pointwise)
|
# Reduce the feature set (pointwise)
|
||||||
decoder = keras.layers.Conv2D(filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output)
|
decoder = keras.layers.Conv2D(
|
||||||
|
filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output)
|
||||||
|
|
||||||
# The actual decoder
|
# The actual decoder
|
||||||
decoder = dense_upsample_block(
|
decoder = dense_upsample_block(
|
||||||
@@ -87,30 +87,6 @@ def dense_nnconv5(size, weights=None, shape=(224, 224, 3), half_features=True):
|
|||||||
return keras.Model(inputs=input, outputs=decoder, name="fast_dense_depth")
|
return keras.Model(inputs=input, outputs=decoder, name="fast_dense_depth")
|
||||||
|
|
||||||
|
|
||||||
def load_nyu(download_dir='../nyu'):
|
|
||||||
"""
|
|
||||||
Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
|
|
||||||
:return: nyu_v2 dataset builder
|
|
||||||
"""
|
|
||||||
builder = tfds.builder('nyu_depth_v2')
|
|
||||||
builder.download_and_prepare(download_dir=download_dir)
|
|
||||||
return builder \
|
|
||||||
.as_dataset(split='train', shuffle_files=True) \
|
|
||||||
.shuffle(buffer_size=1024) \
|
|
||||||
.batch(8) \
|
|
||||||
.map(lambda x: fd.crop_and_resize(x))
|
|
||||||
|
|
||||||
|
|
||||||
def load_nyu_evaluate(download_dir='../nyu'):
|
|
||||||
"""
|
|
||||||
Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
|
|
||||||
:return: nyu_v2 dataset builder
|
|
||||||
"""
|
|
||||||
builder = tfds.builder('nyu_depth_v2')
|
|
||||||
builder.download_and_prepare(download_dir=download_dir)
|
|
||||||
return builder.as_dataset(split='validation').batch(1).map(lambda x: fd.crop_and_resize(x))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
model = dense_depth(169, 'imagenet')
|
model = dense_depth(169, 'imagenet')
|
||||||
model.summary()
|
model.summary()
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import tensorflow.keras as keras
|
import tensorflow.keras as keras
|
||||||
import tensorflow_datasets as tfds
|
import tensorflow_datasets as tfds
|
||||||
|
from load import load_nyu, load_nyu_evaluate
|
||||||
|
from util import crop_and_resize
|
||||||
# Needed for the kitti dataset, don't delete
|
# Needed for the kitti dataset, don't delete
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -160,56 +162,6 @@ def load_model(file):
|
|||||||
'delta2_metric': delta2_metric,
|
'delta2_metric': delta2_metric,
|
||||||
'delta3_metric': delta3_metric})
|
'delta3_metric': delta3_metric})
|
||||||
|
|
||||||
|
|
||||||
def crop_and_resize(x):
|
|
||||||
shape = tf.shape(x['depth'])
|
|
||||||
img_shape = tf.shape(x['image'])
|
|
||||||
# Ensure we get a square for when we resize is later.
|
|
||||||
# For horizontal images this is basically just cropping the sides off
|
|
||||||
center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2])
|
|
||||||
|
|
||||||
def layer():
|
|
||||||
return keras.Sequential([
|
|
||||||
keras.layers.experimental.preprocessing.CenterCrop(
|
|
||||||
center_shape, center_shape),
|
|
||||||
keras.layers.experimental.preprocessing.Resizing(
|
|
||||||
224, 224, interpolation='nearest')
|
|
||||||
])
|
|
||||||
|
|
||||||
# Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow
|
|
||||||
return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1]))
|
|
||||||
|
|
||||||
|
|
||||||
def load_nyu(download_dir='../nyu'):
|
|
||||||
"""
|
|
||||||
Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
|
|
||||||
:return: nyu_v2 dataset builder
|
|
||||||
"""
|
|
||||||
builder = tfds.builder('nyu_depth_v2')
|
|
||||||
builder.download_and_prepare(download_dir=download_dir)
|
|
||||||
return builder \
|
|
||||||
.as_dataset(split='train', shuffle_files=True) \
|
|
||||||
.shuffle(buffer_size=1024) \
|
|
||||||
.batch(8) \
|
|
||||||
.map(lambda x: crop_and_resize(x))
|
|
||||||
|
|
||||||
|
|
||||||
def load_nyu_evaluate(download_dir='../nyu'):
|
|
||||||
"""
|
|
||||||
Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
|
|
||||||
:return: nyu_v2 dataset builder
|
|
||||||
"""
|
|
||||||
builder = tfds.builder('nyu_depth_v2')
|
|
||||||
builder.download_and_prepare(download_dir=download_dir)
|
|
||||||
return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x))
|
|
||||||
|
|
||||||
|
|
||||||
def load_kitti(download_dir='../kitti'):
|
|
||||||
ds = tfds.builder('kitti_depth')
|
|
||||||
ds.download_and_prepare(download_dir=download_dir)
|
|
||||||
return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
model = mobilenet_nnconv5()
|
model = mobilenet_nnconv5()
|
||||||
model.summary()
|
model.summary()
|
||||||
|
|||||||
33
load.py
Normal file
33
load.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
from util import crop_and_resize
|
||||||
|
import tensorflow_datasets as tfds
|
||||||
|
import tensorflow.keras as keras
|
||||||
|
|
||||||
|
|
||||||
|
def load_nyu(download_dir='../nyu', out_shape=(224, 224)):
|
||||||
|
"""
|
||||||
|
Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
|
||||||
|
:return: nyu_v2 dataset builder
|
||||||
|
"""
|
||||||
|
builder = tfds.builder('nyu_depth_v2')
|
||||||
|
builder.download_and_prepare(download_dir=download_dir)
|
||||||
|
return builder \
|
||||||
|
.as_dataset(split='train', shuffle_files=True) \
|
||||||
|
.shuffle(buffer_size=1024) \
|
||||||
|
.batch(8) \
|
||||||
|
.map(lambda x: crop_and_resize(x, out_shape))
|
||||||
|
|
||||||
|
|
||||||
|
def load_nyu_evaluate(download_dir='../nyu', out_shape=(224, 224)):
|
||||||
|
"""
|
||||||
|
Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
|
||||||
|
:return: nyu_v2 dataset builder
|
||||||
|
"""
|
||||||
|
builder = tfds.builder('nyu_depth_v2')
|
||||||
|
builder.download_and_prepare(download_dir=download_dir)
|
||||||
|
return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x, out_shape))
|
||||||
|
|
||||||
|
|
||||||
|
def load_kitti(download_dir='../kitti', out_shape=(224, 224)):
|
||||||
|
ds = tfds.builder('kitti_depth')
|
||||||
|
ds.download_and_prepare(download_dir=download_dir)
|
||||||
|
return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x, out_shape))
|
||||||
@@ -21,7 +21,8 @@ def residual_layer(inputs, out_channels, stride, dropout=None):
|
|||||||
"""
|
"""
|
||||||
x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
|
x = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
|
||||||
x = layers.Conv2D(out_channels, 3, padding='same')(x)
|
x = layers.Conv2D(out_channels, 3, padding='same')(x)
|
||||||
shortcut = layers.Conv2D(out_channels, 3, padding='same', stride=stride)(inputs)
|
shortcut = layers.Conv2D(
|
||||||
|
out_channels, 3, padding='same', stride=stride)(inputs)
|
||||||
if dropout:
|
if dropout:
|
||||||
shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut)
|
shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut)
|
||||||
x = keras.layers.Concatenate()([x, shortcut])
|
x = keras.layers.Concatenate()([x, shortcut])
|
||||||
@@ -38,7 +39,8 @@ def residual_block(inputs, out_channels, residual_layers, stride, dropout=None):
|
|||||||
|
|
||||||
|
|
||||||
def packnet_conv2d(inputs, out_channels, kernel_size, stride):
|
def packnet_conv2d(inputs, out_channels, kernel_size, stride):
|
||||||
x = keras.layers.Conv2D(out_channels, kernel_size, stride, padding='same')
|
x = keras.layers.Conv2D(out_channels, kernel_size,
|
||||||
|
stride, padding='same')(inputs)
|
||||||
x = group_norm.GroupNormalization(16)(x)
|
x = group_norm.GroupNormalization(16)(x)
|
||||||
return keras.layers.ELU()(x)
|
return keras.layers.ELU()(x)
|
||||||
|
|
||||||
@@ -59,23 +61,25 @@ def pack_3d(inputs, kernel_size, r=2, features_3d=8):
|
|||||||
"""
|
"""
|
||||||
# Data format for single image in nyu is HWC (space_to_depth uses NHWC as default)
|
# Data format for single image in nyu is HWC (space_to_depth uses NHWC as default)
|
||||||
x = nn.space_to_depth(inputs, r)
|
x = nn.space_to_depth(inputs, r)
|
||||||
x = tf.expand_dims(x, 1)
|
x = tf.expand_dims(x, 4)
|
||||||
x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')
|
x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x)
|
||||||
b, c, d, h, w = x.shape
|
b, h, w, c, d = x.shape
|
||||||
x = tf.reshape(x, (b, c * d, h, w))
|
x = tf.reshape(x, (b, h, w, c * d))
|
||||||
return packnet_conv2d(x, inputs.shape[1], kernel_size, 1)
|
return packnet_conv2d(x, inputs.shape[3], kernel_size, 1)
|
||||||
|
|
||||||
|
|
||||||
def unpack_3d(inputs, out_channels, kernel_size, r=3, features_3d=8):
|
def unpack_3d(inputs, out_channels, kernel_size, r=3, features_3d=8):
|
||||||
x = packnet_conv2d(inputs, out_channels * (r ** 2) // features_3d, kernel_size, 1)
|
x = packnet_conv2d(inputs, out_channels * (r ** 2) //
|
||||||
x = tf.expand_dims(x, 1) # B x D x 4(out)/D x H/2 x W/2
|
features_3d, kernel_size, 1)
|
||||||
|
x = tf.expand_dims(x, 4) # B x H/2 x W/2 x 4(out)/D x D
|
||||||
x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')
|
x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')
|
||||||
b, c, d, h, w = x.shape
|
b, c, d, h, w = x.shape
|
||||||
x = tf.reshape(x, [b, c * d, h, w])
|
x = tf.reshape(x, [b, h, w, c * d])
|
||||||
return nn.depth_to_space(x, r)
|
return nn.depth_to_space(x, r)
|
||||||
|
|
||||||
|
|
||||||
# TODO: Support different size packnet for scaling up/down
|
# TODO: Support different size packnet for scaling up/down
|
||||||
|
# TODO: Support different channel format (right now we're supporting NHWC, we should also support NCHW)
|
||||||
def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None):
|
def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None):
|
||||||
"""
|
"""
|
||||||
Make the PackNet depth network.
|
Make the PackNet depth network.
|
||||||
@@ -105,10 +109,12 @@ def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None
|
|||||||
# ================ ENCODER =================
|
# ================ ENCODER =================
|
||||||
|
|
||||||
x = unpack_3d(x, 512, 3, features_3d=features_3d)
|
x = unpack_3d(x, 512, 3, features_3d=features_3d)
|
||||||
x = keras.layers.Add([x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5])
|
x = keras.layers.Add(
|
||||||
|
[x, skip_5]) if skip_add else keras.layers.Concatenate([x, skip_5])
|
||||||
x = packnet_conv2d(x, 512, 3, 1)
|
x = packnet_conv2d(x, 512, 3, 1)
|
||||||
x = unpack_3d(x, 256, 3, features_3d=features_3d)
|
x = unpack_3d(x, 256, 3, features_3d=features_3d)
|
||||||
x = keras.layers.Add([x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4])
|
x = keras.layers.Add(
|
||||||
|
[x, skip_4]) if skip_add else keras.layers.Concatenate([x, skip_4])
|
||||||
x = packnet_conv2d(x, 256, 3, 1)
|
x = packnet_conv2d(x, 256, 3, 1)
|
||||||
# TODO: This is wrong, look at the paper
|
# TODO: This is wrong, look at the paper
|
||||||
x = packnet_inverse_depth(x, 1)
|
x = packnet_inverse_depth(x, 1)
|
||||||
|
|||||||
@@ -5,6 +5,10 @@ def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics):
|
|||||||
SFM Learner inverse warp step
|
SFM Learner inverse warp step
|
||||||
ps ~ K.T(t->s).Dt(pt).K^-1.pt
|
ps ~ K.T(t->s).Dt(pt).K^-1.pt
|
||||||
|
|
||||||
|
Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto
|
||||||
|
the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample
|
||||||
|
the pixels in the source image (ps) to reconstruct the target image.
|
||||||
|
|
||||||
:param target_img: Tensor (batch, height, width, 3)
|
:param target_img: Tensor (batch, height, width, 3)
|
||||||
:param source_img: Tensor, same shape as target_img
|
:param source_img: Tensor, same shape as target_img
|
||||||
:param depth: Tensor, (batch, height, width, 1)
|
:param depth: Tensor, (batch, height, width, 1)
|
||||||
|
|||||||
21
util.py
Normal file
21
util.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow.keras as keras
|
||||||
|
|
||||||
|
|
||||||
|
def crop_and_resize(x, out_shape=(224, 224)):
|
||||||
|
shape = tf.shape(x['depth'])
|
||||||
|
img_shape = tf.shape(x['image'])
|
||||||
|
# Ensure we get a square for when we resize is later.
|
||||||
|
# For horizontal images this is basically just cropping the sides off
|
||||||
|
center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2])
|
||||||
|
|
||||||
|
def layer():
|
||||||
|
return keras.Sequential([
|
||||||
|
keras.layers.experimental.preprocessing.CenterCrop(
|
||||||
|
center_shape, center_shape),
|
||||||
|
keras.layers.experimental.preprocessing.Resizing(
|
||||||
|
out_shape[0], out_shape[1], interpolation='nearest')
|
||||||
|
])
|
||||||
|
|
||||||
|
# Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow
|
||||||
|
return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1]))
|
||||||
Reference in New Issue
Block a user