Merge branch 'unsupervised' into 'main'
Packnet See merge request vato007/fast-depth-tf!4
This commit is contained in:
@@ -1,4 +1,3 @@
|
||||
import tensorflow as tf
|
||||
import tensorflow.keras as keras
|
||||
import tensorflow_datasets as tfds
|
||||
|
||||
@@ -25,7 +24,8 @@ def dense_depth(size, weights=None, shape=(224, 224, 3)):
|
||||
densenet_output_channels = densenet.layers[-1].output.shape[-1]
|
||||
|
||||
# Reduce the feature set (pointwise)
|
||||
decoder = keras.layers.Conv2D(filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output)
|
||||
decoder = keras.layers.Conv2D(
|
||||
filters=densenet_output_channels, kernel_size=1, padding='same')(densenet.output)
|
||||
|
||||
# The actual decoder
|
||||
decoder = dense_upsample_block(
|
||||
@@ -66,19 +66,19 @@ def dense_nnconv5(size, weights=None, shape=(224, 224, 3), half_features=True):
|
||||
|
||||
# Reduce the feature set (pointwise)
|
||||
decoder = keras.layers.Conv2D(filters=int(densenet_output_shape[-1]), kernel_size=1, padding='same',
|
||||
input_shape=densenet_output_shape, name='conv2')(densenet.output)
|
||||
input_shape=densenet_output_shape, name='conv2')(densenet.output)
|
||||
|
||||
# TODO: More intermediate layers here?
|
||||
|
||||
# Fast Depth Decoder
|
||||
decoder = fd.nnconv5(decoder, densenet.get_layer('pool3_pool').output_shape[3], 1,
|
||||
skip_connection=densenet.get_layer('pool3_pool').output)
|
||||
skip_connection=densenet.get_layer('pool3_pool').output)
|
||||
decoder = fd.nnconv5(decoder, densenet.get_layer('pool2_pool').output_shape[3], 2,
|
||||
skip_connection=densenet.get_layer('pool2_pool').output)
|
||||
skip_connection=densenet.get_layer('pool2_pool').output)
|
||||
decoder = fd.nnconv5(decoder, densenet.get_layer('pool1').output_shape[3], 3,
|
||||
skip_connection=densenet.get_layer('pool1').output)
|
||||
skip_connection=densenet.get_layer('pool1').output)
|
||||
decoder = fd.nnconv5(decoder, densenet.get_layer('conv1/relu').output_shape[3], 4,
|
||||
skip_connection=densenet.get_layer('conv1/relu').output)
|
||||
skip_connection=densenet.get_layer('conv1/relu').output)
|
||||
|
||||
# Final Pointwise for depth extraction
|
||||
decoder = keras.layers.Conv2D(1, 1, padding='same')(decoder)
|
||||
@@ -87,30 +87,6 @@ def dense_nnconv5(size, weights=None, shape=(224, 224, 3), half_features=True):
|
||||
return keras.Model(inputs=input, outputs=decoder, name="fast_dense_depth")
|
||||
|
||||
|
||||
def load_nyu(download_dir='../nyu'):
|
||||
"""
|
||||
Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
|
||||
:return: nyu_v2 dataset builder
|
||||
"""
|
||||
builder = tfds.builder('nyu_depth_v2')
|
||||
builder.download_and_prepare(download_dir=download_dir)
|
||||
return builder \
|
||||
.as_dataset(split='train', shuffle_files=True) \
|
||||
.shuffle(buffer_size=1024) \
|
||||
.batch(8) \
|
||||
.map(lambda x: fd.crop_and_resize(x))
|
||||
|
||||
|
||||
def load_nyu_evaluate(download_dir='../nyu'):
|
||||
"""
|
||||
Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
|
||||
:return: nyu_v2 dataset builder
|
||||
"""
|
||||
builder = tfds.builder('nyu_depth_v2')
|
||||
builder.download_and_prepare(download_dir=download_dir)
|
||||
return builder.as_dataset(split='validation').batch(1).map(lambda x: fd.crop_and_resize(x))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
model = dense_depth(169, 'imagenet')
|
||||
model.summary()
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
import tensorflow as tf
|
||||
import tensorflow.keras as keras
|
||||
import tensorflow_datasets as tfds
|
||||
# Needed for the kitti dataset, don't delete
|
||||
|
||||
from load import load_nyu_evaluate
|
||||
from metric import *
|
||||
from util import crop_and_resize
|
||||
|
||||
"""
|
||||
Unofficial tensorflow keras implementation of FastDepth (mobilenet_nnconv5).
|
||||
@@ -74,59 +75,6 @@ def mobilenet_nnconv5(weights=None, shape=(224, 224, 3)):
|
||||
return keras.Model(inputs=input, outputs=x, name="fast_depth")
|
||||
|
||||
|
||||
def delta1_metric(y_true, y_pred):
|
||||
maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred)
|
||||
return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25), tf.float32), axes=None)[0]
|
||||
|
||||
|
||||
def delta2_metric(y_true, y_pred):
|
||||
maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred)
|
||||
return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 2), tf.float32), axes=None)[0]
|
||||
|
||||
|
||||
def delta3_metric(y_true, y_pred):
|
||||
maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred)
|
||||
return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 3), tf.float32), axes=None)[0]
|
||||
|
||||
|
||||
def compile(model, optimiser=keras.optimizers.SGD(), loss=keras.losses.MeanSquaredError(), custom_metrics=None):
|
||||
"""
|
||||
Compile FastDepth model with relevant metrics
|
||||
:param model: Model to compile
|
||||
:param optimiser: Custom optimiser to use
|
||||
:param loss: Loss function to use
|
||||
:param include_metrics: Whether to include metrics (RMSE, MSE, a1,2,3)
|
||||
"""
|
||||
model.compile(optimizer=optimiser,
|
||||
loss=loss,
|
||||
metrics=[keras.metrics.RootMeanSquaredError(),
|
||||
keras.metrics.MeanSquaredError(),
|
||||
delta1_metric,
|
||||
delta2_metric,
|
||||
delta3_metric] if custom_metrics is None else custom_metrics)
|
||||
|
||||
|
||||
def train(existing_model=None, pretrained_weights='imagenet', epochs=4, save_file=None, dataset=None):
|
||||
"""
|
||||
Compile, train and save (if a save file is specified) a Fast Depth model.
|
||||
:param existing_model: Existing FastDepth model to train. None will create
|
||||
:param pretrained_weights: Weights to use if existing_model is not specified. See keras.applications.MobileNet
|
||||
weights parameter for options here.
|
||||
:param epochs: Number of epochs to run for
|
||||
:param save_file: File/directory to save to after training. By default the model won't be saved
|
||||
:param dataset: Train dataset to use. By default will DOWNLOAD and use tensorflow nyu_v2 dataset
|
||||
"""
|
||||
if not existing_model:
|
||||
existing_model = mobilenet_nnconv5(pretrained_weights)
|
||||
compile(existing_model)
|
||||
if not dataset:
|
||||
dataset = load_nyu()
|
||||
existing_model.fit(dataset, epochs=epochs)
|
||||
if save_file:
|
||||
existing_model.save(save_file)
|
||||
return existing_model
|
||||
|
||||
|
||||
def evaluate(compiled_model, dataset=None):
|
||||
"""
|
||||
Evaluate the model using rmse, delta1/2/3 metrics
|
||||
@@ -150,66 +98,6 @@ def forward(model, image):
|
||||
return model(crop_and_resize(image))
|
||||
|
||||
|
||||
def load_model(file):
|
||||
"""
|
||||
Load previously trained FastDepth model from disk. Will include relevant metrics (custom objects)
|
||||
:param file: File/directory to load the model from
|
||||
:return:
|
||||
"""
|
||||
return keras.models.load_model(file, custom_objects={'delta1_metric': delta1_metric,
|
||||
'delta2_metric': delta2_metric,
|
||||
'delta3_metric': delta3_metric})
|
||||
|
||||
|
||||
def crop_and_resize(x):
|
||||
shape = tf.shape(x['depth'])
|
||||
img_shape = tf.shape(x['image'])
|
||||
# Ensure we get a square for when we resize is later.
|
||||
# For horizontal images this is basically just cropping the sides off
|
||||
center_shape = min(shape[1], shape[2], img_shape[1], img_shape[2])
|
||||
|
||||
def layer():
|
||||
return keras.Sequential([
|
||||
keras.layers.experimental.preprocessing.CenterCrop(
|
||||
center_shape, center_shape),
|
||||
keras.layers.experimental.preprocessing.Resizing(
|
||||
224, 224, interpolation='nearest')
|
||||
])
|
||||
|
||||
# Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow
|
||||
return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1]))
|
||||
|
||||
|
||||
def load_nyu(download_dir='../nyu'):
|
||||
"""
|
||||
Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
|
||||
:return: nyu_v2 dataset builder
|
||||
"""
|
||||
builder = tfds.builder('nyu_depth_v2')
|
||||
builder.download_and_prepare(download_dir=download_dir)
|
||||
return builder \
|
||||
.as_dataset(split='train', shuffle_files=True) \
|
||||
.shuffle(buffer_size=1024) \
|
||||
.batch(8) \
|
||||
.map(lambda x: crop_and_resize(x))
|
||||
|
||||
|
||||
def load_nyu_evaluate(download_dir='../nyu'):
|
||||
"""
|
||||
Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
|
||||
:return: nyu_v2 dataset builder
|
||||
"""
|
||||
builder = tfds.builder('nyu_depth_v2')
|
||||
builder.download_and_prepare(download_dir=download_dir)
|
||||
return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x))
|
||||
|
||||
|
||||
def load_kitti(download_dir='../kitti'):
|
||||
ds = tfds.builder('kitti_depth')
|
||||
ds.download_and_prepare(download_dir=download_dir)
|
||||
return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
model = mobilenet_nnconv5()
|
||||
model.summary()
|
||||
|
||||
209
group_norm.py
Normal file
209
group_norm.py
Normal file
@@ -0,0 +1,209 @@
|
||||
# MIT License
|
||||
#
|
||||
# Copyright (c) 2019 Somshubra Majumdar
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
# Taken from: https://github.com/titu1994/Keras-Group-Normalization/blob/master/group_norm.py
|
||||
|
||||
from tensorflow.keras import backend as K
|
||||
from tensorflow.keras import constraints
|
||||
from tensorflow.keras import initializers
|
||||
from tensorflow.keras import regularizers
|
||||
from tensorflow.keras.layers import Layer, InputSpec
|
||||
|
||||
|
||||
class GroupNormalization(Layer):
|
||||
"""Group normalization layer
|
||||
Group Normalization divides the channels into groups and computes within each group
|
||||
the mean and variance for normalization. GN's computation is independent of batch sizes,
|
||||
and its accuracy is stable in a wide range of batch sizes
|
||||
# Arguments
|
||||
groups: Integer, the number of groups for Group Normalization.
|
||||
axis: Integer, the axis that should be normalized
|
||||
(typically the features axis).
|
||||
For instance, after a `Conv2D` layer with
|
||||
`data_format="channels_first"`,
|
||||
set `axis=1` in `BatchNormalization`.
|
||||
epsilon: Small float added to variance to avoid dividing by zero.
|
||||
center: If True, add offset of `beta` to normalized tensor.
|
||||
If False, `beta` is ignored.
|
||||
scale: If True, multiply by `gamma`.
|
||||
If False, `gamma` is not used.
|
||||
When the next layer is linear (also e.g. `nn.relu`),
|
||||
this can be disabled since the scaling
|
||||
will be done by the next layer.
|
||||
beta_initializer: Initializer for the beta weight.
|
||||
gamma_initializer: Initializer for the gamma weight.
|
||||
beta_regularizer: Optional regularizer for the beta weight.
|
||||
gamma_regularizer: Optional regularizer for the gamma weight.
|
||||
beta_constraint: Optional constraint for the beta weight.
|
||||
gamma_constraint: Optional constraint for the gamma weight.
|
||||
# Input shape
|
||||
Arbitrary. Use the keyword argument `input_shape`
|
||||
(tuple of integers, does not include the samples axis)
|
||||
when using this layer as the first layer in a model.
|
||||
# Output shape
|
||||
Same shape as input.
|
||||
# References
|
||||
- [Group Normalization](https://arxiv.org/abs/1803.08494)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
groups=32,
|
||||
axis=-1,
|
||||
epsilon=1e-5,
|
||||
center=True,
|
||||
scale=True,
|
||||
beta_initializer='zeros',
|
||||
gamma_initializer='ones',
|
||||
beta_regularizer=None,
|
||||
gamma_regularizer=None,
|
||||
beta_constraint=None,
|
||||
gamma_constraint=None,
|
||||
**kwargs):
|
||||
super(GroupNormalization, self).__init__(**kwargs)
|
||||
self.supports_masking = True
|
||||
self.groups = groups
|
||||
self.axis = axis
|
||||
self.epsilon = epsilon
|
||||
self.center = center
|
||||
self.scale = scale
|
||||
self.beta_initializer = initializers.get(beta_initializer)
|
||||
self.gamma_initializer = initializers.get(gamma_initializer)
|
||||
self.beta_regularizer = regularizers.get(beta_regularizer)
|
||||
self.gamma_regularizer = regularizers.get(gamma_regularizer)
|
||||
self.beta_constraint = constraints.get(beta_constraint)
|
||||
self.gamma_constraint = constraints.get(gamma_constraint)
|
||||
self.gamma = None
|
||||
self.beta = None
|
||||
|
||||
def build(self, input_shape):
|
||||
dim = input_shape[self.axis]
|
||||
|
||||
if dim is None:
|
||||
raise ValueError('Axis ' + str(self.axis) + ' of '
|
||||
'input tensor should have a defined dimension '
|
||||
'but the layer received an input with shape ' +
|
||||
str(input_shape) + '.')
|
||||
|
||||
if dim < self.groups:
|
||||
raise ValueError('Number of groups (' + str(self.groups) + ') cannot be '
|
||||
'more than the number of channels (' +
|
||||
str(dim) + ').')
|
||||
|
||||
if dim % self.groups != 0:
|
||||
raise ValueError('Number of groups (' + str(self.groups) + ') must be a '
|
||||
'multiple of the number of channels (' +
|
||||
str(dim) + ').')
|
||||
|
||||
self.input_spec = InputSpec(ndim=len(input_shape),
|
||||
axes={self.axis: dim})
|
||||
shape = (dim,)
|
||||
|
||||
if self.scale:
|
||||
self.gamma = self.add_weight(shape=shape,
|
||||
name='gamma',
|
||||
initializer=self.gamma_initializer,
|
||||
regularizer=self.gamma_regularizer,
|
||||
constraint=self.gamma_constraint)
|
||||
if self.center:
|
||||
self.beta = self.add_weight(shape=shape,
|
||||
name='beta',
|
||||
initializer=self.beta_initializer,
|
||||
regularizer=self.beta_regularizer,
|
||||
constraint=self.beta_constraint)
|
||||
self.built = True
|
||||
|
||||
def call(self, inputs, **kwargs):
|
||||
input_shape = K.int_shape(inputs)
|
||||
tensor_input_shape = K.shape(inputs)
|
||||
|
||||
# Prepare broadcasting shape.
|
||||
reduction_axes = list(range(len(input_shape)))
|
||||
del reduction_axes[self.axis]
|
||||
broadcast_shape = [1] * len(input_shape)
|
||||
broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
|
||||
broadcast_shape.insert(1, self.groups)
|
||||
|
||||
reshape_group_shape = K.shape(inputs)
|
||||
group_axes = [reshape_group_shape[i] for i in range(len(input_shape))]
|
||||
group_axes[self.axis] = input_shape[self.axis] // self.groups
|
||||
group_axes.insert(1, self.groups)
|
||||
|
||||
# reshape inputs to new group shape
|
||||
group_shape = [group_axes[0], self.groups] + group_axes[2:]
|
||||
group_shape = K.stack(group_shape)
|
||||
inputs = K.reshape(inputs, group_shape)
|
||||
|
||||
group_reduction_axes = list(range(len(group_axes)))
|
||||
group_reduction_axes = group_reduction_axes[2:]
|
||||
|
||||
mean = K.mean(inputs, axis=group_reduction_axes, keepdims=True)
|
||||
variance = K.var(inputs, axis=group_reduction_axes, keepdims=True)
|
||||
|
||||
inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))
|
||||
|
||||
# prepare broadcast shape
|
||||
inputs = K.reshape(inputs, group_shape)
|
||||
outputs = inputs
|
||||
|
||||
# In this case we must explicitly broadcast all parameters.
|
||||
if self.scale:
|
||||
broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
|
||||
outputs = outputs * broadcast_gamma
|
||||
|
||||
if self.center:
|
||||
broadcast_beta = K.reshape(self.beta, broadcast_shape)
|
||||
outputs = outputs + broadcast_beta
|
||||
|
||||
outputs = K.reshape(outputs, tensor_input_shape)
|
||||
|
||||
return outputs
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
'groups': self.groups,
|
||||
'axis': self.axis,
|
||||
'epsilon': self.epsilon,
|
||||
'center': self.center,
|
||||
'scale': self.scale,
|
||||
'beta_initializer': initializers.serialize(self.beta_initializer),
|
||||
'gamma_initializer': initializers.serialize(self.gamma_initializer),
|
||||
'beta_regularizer': regularizers.serialize(self.beta_regularizer),
|
||||
'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
|
||||
'beta_constraint': constraints.serialize(self.beta_constraint),
|
||||
'gamma_constraint': constraints.serialize(self.gamma_constraint)
|
||||
}
|
||||
base_config = super(GroupNormalization, self).get_config()
|
||||
return dict(list(base_config.items()) + list(config.items()))
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
return input_shape
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from tensorflow.keras.layers import Input
|
||||
from tensorflow.keras.models import Model
|
||||
|
||||
ip = Input(shape=(None, None, 4))
|
||||
# ip = Input(batch_shape=(100, None, None, 2))
|
||||
x = GroupNormalization(groups=2, axis=-1, epsilon=0.1)(ip)
|
||||
model = Model(ip, x)
|
||||
model.summary()
|
||||
48
load.py
Normal file
48
load.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import tensorflow.keras as keras
|
||||
import tensorflow_datasets as tfds
|
||||
|
||||
from losses import dense_depth_loss_function
|
||||
from metric import *
|
||||
from util import crop_and_resize
|
||||
|
||||
|
||||
def load_nyu(download_dir='../nyu', out_shape=(224, 224)):
|
||||
"""
|
||||
Load the nyu_v2 dataset train split. Will be downloaded to ../nyu
|
||||
:return: nyu_v2 dataset builder
|
||||
"""
|
||||
builder = tfds.builder('nyu_depth_v2')
|
||||
builder.download_and_prepare(download_dir=download_dir)
|
||||
return builder \
|
||||
.as_dataset(split='train', shuffle_files=True) \
|
||||
.shuffle(buffer_size=1024) \
|
||||
.batch(8) \
|
||||
.map(lambda x: crop_and_resize(x, out_shape))
|
||||
|
||||
|
||||
def load_nyu_evaluate(download_dir='../nyu', out_shape=(224, 224)):
|
||||
"""
|
||||
Load the nyu_v2 dataset validation split. Will be downloaded to ../nyu
|
||||
:return: nyu_v2 dataset builder
|
||||
"""
|
||||
builder = tfds.builder('nyu_depth_v2')
|
||||
builder.download_and_prepare(download_dir=download_dir)
|
||||
return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x, out_shape))
|
||||
|
||||
|
||||
def load_kitti(download_dir='../kitti', out_shape=(224, 224)):
|
||||
ds = tfds.builder('kitti_depth')
|
||||
ds.download_and_prepare(download_dir=download_dir)
|
||||
return ds.as_dataset(tfds.Split.TRAIN).batch(8).map(lambda x: crop_and_resize(x, out_shape))
|
||||
|
||||
|
||||
def load_model(file):
|
||||
"""
|
||||
Load previously trained FastDepth model from disk. Will include relevant metrics (custom objects)
|
||||
:param file: File/directory to load the model from
|
||||
:return:
|
||||
"""
|
||||
return keras.models.load_model(file, custom_objects={'delta1_metric': delta1_metric,
|
||||
'delta2_metric': delta2,
|
||||
'delta3_metric': delta3,
|
||||
'dense_depth_loss_function': dense_depth_loss_function})
|
||||
10
losses.py
10
losses.py
@@ -6,15 +6,15 @@ def dense_depth_loss_function(y, y_pred):
|
||||
Implementation of the loss from the dense depth paper https://arxiv.org/pdf/1812.11941.pdf
|
||||
"""
|
||||
# Point-wise L1 loss
|
||||
l_depth = tf.reduce_mean(tf.math.abs(y_pred - y), axis=-1)
|
||||
l1_depth = tf.reduce_mean(tf.math.abs(y_pred - y), axis=-1)
|
||||
|
||||
# L1 loss over image gradients
|
||||
dy, dx = tf.image.image_gradients(y)
|
||||
dy_pred, dx_pred = tf.image.image_gradients(y_pred)
|
||||
l_grad = tf.reduce_mean(tf.math.abs(dy_pred - dy) +
|
||||
tf.math.abs(dx_pred - dx), axis=-1)
|
||||
gradient = tf.reduce_mean(tf.math.abs(dy_pred - dy) +
|
||||
tf.math.abs(dx_pred - dx), axis=-1)
|
||||
|
||||
# Structural Similarity (SSIM)
|
||||
l_ssim = (1 - tf.image.ssim(y, y_pred, 500)) / 2
|
||||
ssim = (1 - tf.image.ssim(y, y_pred, 500)) / 2
|
||||
|
||||
return 0.1 * tf.reduce_mean(l_depth) + tf.reduce_mean(l_grad) + l_ssim
|
||||
return 0.1 * tf.reduce_mean(l1_depth) + tf.reduce_mean(gradient) + ssim
|
||||
|
||||
16
metric.py
Normal file
16
metric.py
Normal file
@@ -0,0 +1,16 @@
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def delta1_metric(y_true, y_pred):
|
||||
max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred)
|
||||
return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25), tf.float32))
|
||||
|
||||
|
||||
def delta2(y_true, y_pred):
|
||||
max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred)
|
||||
return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25 ** 2), tf.float32))
|
||||
|
||||
|
||||
def delta3(y_true, y_pred):
|
||||
max_ratio = tf.maximum(y_pred / y_true, y_true / y_pred)
|
||||
return tf.reduce_mean(tf.cast(max_ratio < tf.convert_to_tensor(1.25 ** 3), tf.float32))
|
||||
74
openvino_inference.py
Normal file
74
openvino_inference.py
Normal file
@@ -0,0 +1,74 @@
|
||||
import argparse
|
||||
import time
|
||||
|
||||
import cv2
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from openvino.inference_engine import IECore
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
"""Parse and return command line arguments"""
|
||||
parser = argparse.ArgumentParser(add_help=False)
|
||||
args = parser.add_argument_group('Options')
|
||||
# fmt: off
|
||||
args.add_argument('-h', '--help', action='help', help='Show this help message and exit.')
|
||||
args.add_argument('-m', '--model', required=True, type=str,
|
||||
help='Required. Path to an .xml or .onnx file with a trained model.')
|
||||
args.add_argument('-i', '--input', required=True, type=str, help='Required. Path to an image file.')
|
||||
args.add_argument('-d', '--device', default='CPU', type=str,
|
||||
help='Optional. Specify the target device to infer on; CPU, GPU, MYRIAD, HDDL or HETERO: '
|
||||
'is acceptable. The sample will look for a suitable plugin for device specified. '
|
||||
'Default value is CPU.')
|
||||
# fmt: on
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def sample(model_location, image_location, device='CPU'):
|
||||
ie = IECore()
|
||||
|
||||
net = ie.read_network(model=model_location)
|
||||
input_blob = next(iter(net.input_info))
|
||||
output_blob = next(iter(net.outputs))
|
||||
|
||||
b, c, h, w = net.input_info[input_blob].input_data.shape
|
||||
image = cv2.imread(image_location)
|
||||
input_ratio = h / w
|
||||
target_ratio = image.shape[0] / image.shape[1]
|
||||
crop_axis = 0 if target_ratio > input_ratio else 1
|
||||
crop_factor = input_ratio * target_ratio / 2
|
||||
center = [image.shape[0] / 2, image.shape[1] / 2]
|
||||
x1 = int(center[0] - image.shape[0] * crop_factor) if crop_axis == 0 else 0
|
||||
x2 = int(center[0] + image.shape[0] * crop_factor) if crop_axis == 0 else image.shape[0]
|
||||
y1 = int(center[1] - image.shape[1] * crop_factor) if crop_axis == 1 else 0
|
||||
y2 = int(center[1] + image.shape[1] * crop_factor) if crop_axis == 1 else image.shape[1]
|
||||
# Crop to target aspect ratio
|
||||
image = image[x1:x2, y1:y2]
|
||||
if image.shape[:-1] != (h, w):
|
||||
image = cv2.resize(image, (w, h))
|
||||
|
||||
image = image.transpose((2, 0, 1))
|
||||
# For batching
|
||||
image = np.expand_dims(image, axis=0)
|
||||
|
||||
exec_net = ie.load_network(network=net, device_name=device)
|
||||
start = time.time()
|
||||
res = exec_net.infer(inputs={input_blob: image})
|
||||
print('First Inference Time Seconds: ' + str(time.time() - start))
|
||||
start = time.time()
|
||||
res = exec_net.infer(inputs={input_blob: image})
|
||||
print('Second Inference Time Seconds: ' + str(time.time() - start))
|
||||
start = time.time()
|
||||
res = exec_net.infer(inputs={input_blob: image})
|
||||
print('Third Inference Time Seconds: ' + str(time.time() - start))
|
||||
res = res[output_blob]
|
||||
depth = res[0][0]
|
||||
fig = plt.figure()
|
||||
ii = plt.imshow(depth, interpolation='nearest')
|
||||
fig.colorbar(ii)
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parsed_args = parse_args()
|
||||
sample(parsed_args.model, parsed_args.input, parsed_args.device)
|
||||
150
packnet_functional.py
Normal file
150
packnet_functional.py
Normal file
@@ -0,0 +1,150 @@
|
||||
import tensorflow as tf
|
||||
import tensorflow.keras as keras
|
||||
import tensorflow.keras.layers as layers
|
||||
from tensorflow import nn
|
||||
|
||||
import group_norm
|
||||
|
||||
|
||||
def pack_layer():
|
||||
pass
|
||||
|
||||
|
||||
def residual_layer(inputs, out_channels, stride, dropout=None):
|
||||
"""
|
||||
Keras implementation of the Residual block (ResNet) as used in PackNet
|
||||
:param inputs:
|
||||
:param out_channels:
|
||||
:param stride:
|
||||
:param dropout:
|
||||
:return:
|
||||
"""
|
||||
x = layers.Conv2D(out_channels, 3, padding='same', strides=stride)(inputs)
|
||||
x = layers.Conv2D(out_channels, 3, padding='same')(x)
|
||||
shortcut = layers.Conv2D(
|
||||
out_channels, 3, padding='same', strides=stride)(inputs)
|
||||
if dropout:
|
||||
shortcut = keras.layers.SpatialDropout2D(dropout)(shortcut)
|
||||
x = keras.layers.Concatenate()([x, shortcut])
|
||||
x = group_norm.GroupNormalization(16)(x)
|
||||
return keras.layers.ELU()(x)
|
||||
|
||||
|
||||
# Packnet usually expects more than one layer per block (2,2,3,3)
|
||||
def residual_block(inputs, out_channels, residual_layers, stride, dropout=None):
|
||||
x = inputs
|
||||
for i in range(0, residual_layers):
|
||||
x = residual_layer(x, out_channels, stride, dropout)
|
||||
return x
|
||||
|
||||
|
||||
def packnet_conv2d(inputs, out_channels, kernel_size, stride):
|
||||
x = keras.layers.Conv2D(out_channels, kernel_size,
|
||||
stride, padding='same')(inputs)
|
||||
x = group_norm.GroupNormalization(16)(x)
|
||||
return keras.layers.ELU()(x)
|
||||
|
||||
|
||||
def packnet_inverse_depth(inputs, out_channels=1, min_depth=0.5):
|
||||
x = layers.Conv2D(out_channels, 3, padding='same')(inputs)
|
||||
return keras.activations.sigmoid(x) / min_depth
|
||||
|
||||
|
||||
def pack_3d(inputs, kernel_size, r=2, features_3d=8):
|
||||
"""
|
||||
Implementatino of the 3d packing block proposed here: https://arxiv.org/abs/1905.02693
|
||||
:param inputs:
|
||||
:param kernel_size:
|
||||
:param r:
|
||||
:param features_3d:
|
||||
:return:
|
||||
"""
|
||||
# Data format for single image in nyu is HWC (space_to_depth uses NHWC as default)
|
||||
x = nn.space_to_depth(inputs, r)
|
||||
x = tf.expand_dims(x, 4)
|
||||
x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x)
|
||||
b, h, w, c, d = x.shape
|
||||
x = keras.layers.Reshape((h, w, c * d))(x)
|
||||
return packnet_conv2d(x, inputs.shape[3], kernel_size, 1)
|
||||
|
||||
|
||||
def unpack_3d(inputs, out_channels, kernel_size, r=2, features_3d=8):
|
||||
x = packnet_conv2d(inputs, out_channels * (r ** 2) //
|
||||
features_3d, kernel_size, 1)
|
||||
x = tf.expand_dims(x, 4) # B x H/2 x W/2 x 4(out)/D x D
|
||||
x = keras.layers.Conv3D(features_3d, kernel_size=3, padding='same')(x)
|
||||
b, h, w, c, d = x.shape
|
||||
x = keras.layers.Reshape([h, w, c * d])(x)
|
||||
return nn.depth_to_space(x, r)
|
||||
|
||||
|
||||
# TODO: Support different size packnet for scaling up/down
|
||||
# TODO: Support different channel format (right now we're supporting NHWC, we should also support NCHW)
|
||||
def make_packnet(shape=(224, 224, 3), skip_add=True, features_3d=4, dropout=None):
|
||||
"""
|
||||
Make the PackNet depth network.
|
||||
:param shape: Input shape of the image
|
||||
:param skip_add: Set to use add rather than concat skip connections, defaults to True
|
||||
:return:
|
||||
"""
|
||||
|
||||
# ================ ENCODER =================
|
||||
input = keras.layers.Input(shape=shape)
|
||||
x = packnet_conv2d(input, 32, 5, 1)
|
||||
skip_1 = x
|
||||
x = packnet_conv2d(x, 64, 7, 1)
|
||||
x = pack_3d(x, 5, features_3d=features_3d)
|
||||
skip_2 = x
|
||||
x = residual_block(x, 64, 2, 1, dropout)
|
||||
x = pack_3d(x, 3, features_3d=features_3d)
|
||||
skip_3 = x
|
||||
x = residual_block(x, 128, 2, 1, dropout)
|
||||
x = pack_3d(x, 3, features_3d=features_3d)
|
||||
skip_4 = x
|
||||
x = residual_block(x, 256, 3, 1, dropout)
|
||||
x = pack_3d(x, 3, features_3d=features_3d)
|
||||
skip_5 = x
|
||||
x = residual_block(x, 512, 3, 1, dropout)
|
||||
x = pack_3d(x, 3, features_3d=features_3d)
|
||||
# ================ ENCODER =================
|
||||
|
||||
# ================ DECODER =================
|
||||
# layer 7
|
||||
x = unpack_3d(x, 512, 3, features_3d=features_3d)
|
||||
x = keras.layers.Add()(
|
||||
[x, skip_5]) if skip_add else keras.layers.Concatenate()([x, skip_5])
|
||||
x = packnet_conv2d(x, 512, 3, 1)
|
||||
# layer 8
|
||||
x = unpack_3d(x, 256, 3, features_3d=features_3d)
|
||||
x = keras.layers.Add()(
|
||||
[x, skip_4]) if skip_add else keras.layers.Concatenate()([x, skip_4])
|
||||
x = packnet_conv2d(x, 256, 3, 1)
|
||||
layer_8 = x
|
||||
# layer 9
|
||||
x = packnet_inverse_depth(x, 1)
|
||||
# layer 10
|
||||
u_layer_8 = unpack_3d(layer_8, 128, 3, features_3d=features_3d)
|
||||
x = keras.layers.UpSampling2D()(x)
|
||||
x = keras.layers.Add()([u_layer_8, skip_3, x]) if skip_add else keras.layers.Concatenate()([u_layer_8, skip_3, x])
|
||||
x = packnet_conv2d(x, 128, 3, 1)
|
||||
layer_10 = x
|
||||
# layer 11
|
||||
x = packnet_inverse_depth(x, 1)
|
||||
# layer 12
|
||||
u_layer_10 = unpack_3d(layer_10, 64, 3, features_3d=features_3d)
|
||||
x = keras.layers.UpSampling2D()(x)
|
||||
x = keras.layers.Add()([u_layer_10, skip_2, x]) if skip_add else keras.layers.Concatenate()([u_layer_10, skip_2, x])
|
||||
x = packnet_conv2d(x, 64, 3, 1)
|
||||
layer_12 = x
|
||||
# layer 13
|
||||
x = packnet_inverse_depth(x)
|
||||
# layer 14
|
||||
u_layer_12 = unpack_3d(layer_12, 32, 3, features_3d=features_3d)
|
||||
x = keras.layers.UpSampling2D()(x)
|
||||
x = keras.layers.Add()([u_layer_12, skip_1, x]) if skip_add else keras.layers.Concatenate()([u_layer_12, skip_1, x])
|
||||
x = packnet_conv2d(x, 32, 3, 1)
|
||||
# layer 15
|
||||
x = packnet_inverse_depth(x)
|
||||
# ================ DECODER =================
|
||||
|
||||
return keras.Model(inputs=input, outputs=x, name="PackNet")
|
||||
37
packnet_tests.py
Normal file
37
packnet_tests.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import unittest
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
import packnet_functional as p
|
||||
|
||||
|
||||
class PacknetTests(unittest.TestCase):
|
||||
|
||||
def test_pack_3d_layer(self):
|
||||
# 3d packing expects a multiple of 16 for channels due to using 16 groups in group normalisation
|
||||
test_input = tf.random.normal([4, 224, 224, 32])
|
||||
y = p.pack_3d(test_input, 3, features_3d=4)
|
||||
out_shape = [i for i in test_input.shape]
|
||||
out_shape[1] = out_shape[1] // 2
|
||||
out_shape[2] = out_shape[2] // 2
|
||||
# TODO: Anything else we can test here for validity?
|
||||
self.assertEqual(y.shape, out_shape)
|
||||
|
||||
def test_unpack_3d_layer(self):
|
||||
num_output_channels = 32
|
||||
test_input = tf.random.normal([4, 112, 112, 64])
|
||||
y = p.unpack_3d(test_input, num_output_channels, 3, features_3d=4)
|
||||
out_shape = [i for i in test_input.shape]
|
||||
out_shape[1] = out_shape[1] * 2
|
||||
out_shape[2] = out_shape[2] * 2
|
||||
out_shape[3] = num_output_channels
|
||||
# TODO: Anything else we can test here for validity?
|
||||
self.assertEqual(y.shape, out_shape)
|
||||
|
||||
def test_packnet(self):
|
||||
packnet = p.make_packnet()
|
||||
self.assertIsNotNone(packnet)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
49
train.py
Normal file
49
train.py
Normal file
@@ -0,0 +1,49 @@
|
||||
"""
|
||||
Collection of functions to train the various models, and use different losses
|
||||
"""
|
||||
import tensorflow.keras as keras
|
||||
|
||||
from load import load_nyu
|
||||
from metric import *
|
||||
|
||||
|
||||
def compile(model, optimiser=keras.optimizers.SGD(), loss=keras.losses.MeanSquaredError(), custom_metrics=None):
|
||||
"""
|
||||
Compile FastDepth model with relevant metrics
|
||||
:param model: Model to compile
|
||||
:param optimiser: Custom optimiser to use
|
||||
:param loss: Loss function to use
|
||||
:param include_metrics: Whether to include metrics (RMSE, MSE, a1,2,3)
|
||||
"""
|
||||
model.compile(optimizer=optimiser,
|
||||
loss=loss,
|
||||
metrics=[keras.metrics.RootMeanSquaredError(),
|
||||
keras.metrics.MeanSquaredError(),
|
||||
delta1_metric,
|
||||
delta2,
|
||||
delta3,
|
||||
keras.metrics.MeanAbsolutePercentageError(),
|
||||
keras.metrics.MeanAbsoluteError()] if custom_metrics is None else custom_metrics)
|
||||
|
||||
|
||||
def train(existing_model=None, pretrained_weights='imagenet', epochs=4, save_file=None, dataset=None,
|
||||
checkpoint='ckpt'):
|
||||
"""
|
||||
Compile, train and save (if a save file is specified) a Fast Depth model.
|
||||
:param existing_model: Existing FastDepth model to train. None will create
|
||||
:param pretrained_weights: Weights to use if existing_model is not specified. See keras.applications.MobileNet
|
||||
weights parameter for options here.
|
||||
:param epochs: Number of epochs to run for
|
||||
:param save_file: File/directory to save to after training. By default the model won't be saved
|
||||
:param dataset: Train dataset to use. By default will DOWNLOAD and use tensorflow nyu_v2 dataset
|
||||
:param checkpoint: Checkpoint to save to
|
||||
"""
|
||||
callbacks = []
|
||||
if checkpoint:
|
||||
callbacks.append(keras.callbacks.ModelCheckpoint(checkpoint, save_weights_only=True))
|
||||
if not dataset:
|
||||
dataset = load_nyu()
|
||||
existing_model.fit(dataset, epochs=epochs, callbacks=callbacks)
|
||||
if save_file:
|
||||
existing_model.save(save_file)
|
||||
return existing_model
|
||||
53
unsupervised/loss.py
Normal file
53
unsupervised/loss.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def l1_loss(target_img, reprojected_img):
|
||||
"""
|
||||
Calculates the l1 norm between the target and reprojected image
|
||||
|
||||
:param target_img: Tensor (batch, height, width, 3)
|
||||
:param reprojected_img: Tensor, same shape as target_img
|
||||
:return: The per-pixel l1 norm -> Tensor (batch, height, width, 1)
|
||||
"""
|
||||
return tf.reduce_mean(tf.abs(target_img - reprojected_img), axis=3)
|
||||
|
||||
|
||||
def l2_loss(target_img, reprojected_img):
|
||||
"""
|
||||
Calculates the l2 norm between the target and reprojected image
|
||||
|
||||
:param target_img: Tensor (batch, height, width, 3)
|
||||
:param reprojected_img: Tensor, same shape as target_img
|
||||
:return: The per-pixel l2 norm -> Tensor (batch, height, width, 1)
|
||||
"""
|
||||
return tf.reduce_mean((target_img - reprojected_img) ** 2 ** (1 / 2), axis=3)
|
||||
|
||||
|
||||
def make_combined_ssim_l1_loss(ssim_weight: int = 0.85, other_loss_fn=l1_loss):
|
||||
"""
|
||||
Create a loss function that will calculate ssim for the two images, and use the other_loss_fn to calculate the
|
||||
per pixel loss
|
||||
:param ssim_weight: Weighting that should be applied to SSIM weight vs l1 difference between target and
|
||||
reprojected image
|
||||
:param other_loss_fn: Function to combine with the ssim
|
||||
:return: Function to calculate the per-pixel combined ssim with other loss function
|
||||
"""
|
||||
|
||||
def combined_ssim_loss(target_img, reprojected_img):
|
||||
"""
|
||||
Calculates the per-pixel photometric reconstruction loss for each source image,
|
||||
combined this with the SSIM between the reconstructed image and the actual image.
|
||||
|
||||
Calculates the following:
|
||||
ssim_weight * SSIM(target_img, reprojected_img) + (1 - ssim_weight) * other_loss_fn(target_img - reprojected_img)
|
||||
|
||||
:param target_img: Tensor with shape (batch, height, width, 3) - current image we're training on
|
||||
:param reprojected_img: Tensor with same shape as target_img, Reprojected from some source image that
|
||||
should be as close as possible to the target image
|
||||
:return: Per-pixel loss -> Tensor with shape (batch, height, width, 1), where height and width match target_img
|
||||
height and width
|
||||
"""
|
||||
ssim = tf.image.ssim(target_img, reprojected_img, axis=3, keepdim=True)
|
||||
return ssim_weight * ssim + (1 - ssim_weight) * other_loss_fn(target_img, reprojected_img)
|
||||
|
||||
return combined_ssim_loss
|
||||
166
unsupervised/third-party/train.py
vendored
Normal file
166
unsupervised/third-party/train.py
vendored
Normal file
@@ -0,0 +1,166 @@
|
||||
"""
|
||||
Trainer to learn depth information on unlabeled data (raw images/videos)
|
||||
|
||||
Allows pluggable depth networks for differing performance (including fast-depth)
|
||||
"""
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def compute_smooth_loss(self, pred_disp):
|
||||
def gradient(pred):
|
||||
D_dy = pred[:, 1:, :, :] - pred[:, :-1, :, :]
|
||||
D_dx = pred[:, :, 1:, :] - pred[:, :, :-1, :]
|
||||
return D_dx, D_dy
|
||||
dx, dy = gradient(pred_disp)
|
||||
dx2, dxdy = gradient(dx)
|
||||
dydx, dy2 = gradient(dy)
|
||||
return tf.reduce_mean(tf.abs(dx2)) + \
|
||||
tf.reduce_mean(tf.abs(dxdy)) + \
|
||||
tf.reduce_mean(tf.abs(dydx)) + \
|
||||
tf.reduce_mean(tf.abs(dy2))
|
||||
|
||||
|
||||
def get_reference_explain_mask(self, downscaling):
|
||||
opt = self.opt
|
||||
tmp = np.array([0, 1])
|
||||
ref_exp_mask = np.tile(tmp,
|
||||
(opt.batch_size,
|
||||
int(opt.img_height/(2**downscaling)),
|
||||
int(opt.img_width/(2**downscaling)),
|
||||
1))
|
||||
ref_exp_mask = tf.constant(ref_exp_mask, dtype=tf.float32)
|
||||
return ref_exp_mask
|
||||
|
||||
|
||||
def get_sfm_loss_fn(opt):
|
||||
def sfm_loss_fn(y, y_pred):
|
||||
# TODO: Correctly format a batch that is required for this loss function
|
||||
pixel_loss = 0
|
||||
exp_loss = 0
|
||||
smooth_loss = 0
|
||||
tgt_image_all = []
|
||||
src_image_stack_all = []
|
||||
proj_image_stack_all = []
|
||||
proj_error_stack_all = []
|
||||
exp_mask_stack_all = []
|
||||
for s in range(opt.num_scales):
|
||||
if opt.explain_reg_weight > 0:
|
||||
# Construct a reference explainability mask (i.e. all
|
||||
# pixels are explainable)
|
||||
ref_exp_mask = get_reference_explain_mask(s)
|
||||
# Scale the source and target images for computing loss at the
|
||||
# according scale.
|
||||
curr_tgt_image = tf.image.resize_area(tgt_image,
|
||||
[int(opt.img_height/(2**s)), int(opt.img_width/(2**s))])
|
||||
curr_src_image_stack = tf.image.resize_area(src_image_stack,
|
||||
[int(opt.img_height/(2**s)), int(opt.img_width/(2**s))])
|
||||
|
||||
if opt.smooth_weight > 0:
|
||||
smooth_loss += opt.smooth_weight/(2**s) * \
|
||||
compute_smooth_loss(pred_disp[s])
|
||||
|
||||
for i in range(opt.num_source):
|
||||
# Inverse warp the source image to the target image frame
|
||||
curr_proj_image = projective_inverse_warp(
|
||||
curr_src_image_stack[:, :, :, 3*i:3*(i+1)],
|
||||
tf.squeeze(pred_depth[s], axis=3),
|
||||
pred_poses[:, i, :],
|
||||
intrinsics[:, s, :, :])
|
||||
curr_proj_error = tf.abs(curr_proj_image - curr_tgt_image)
|
||||
# Cross-entropy loss as regularization for the
|
||||
# explainability prediction
|
||||
if opt.explain_reg_weight > 0:
|
||||
curr_exp_logits = tf.slice(pred_exp_logits[s],
|
||||
[0, 0, 0, i*2],
|
||||
[-1, -1, -1, 2])
|
||||
exp_loss += opt.explain_reg_weight * \
|
||||
self.compute_exp_reg_loss(curr_exp_logits,
|
||||
ref_exp_mask)
|
||||
curr_exp = tf.nn.softmax(curr_exp_logits)
|
||||
# Photo-consistency loss weighted by explainability
|
||||
if opt.explain_reg_weight > 0:
|
||||
pixel_loss += tf.reduce_mean(curr_proj_error *
|
||||
tf.expand_dims(curr_exp[:, :, :, 1], -1))
|
||||
else:
|
||||
pixel_loss += tf.reduce_mean(curr_proj_error)
|
||||
# Prepare images for tensorboard summaries
|
||||
if i == 0:
|
||||
proj_image_stack = curr_proj_image
|
||||
proj_error_stack = curr_proj_error
|
||||
if opt.explain_reg_weight > 0:
|
||||
exp_mask_stack = tf.expand_dims(
|
||||
curr_exp[:, :, :, 1], -1)
|
||||
else:
|
||||
proj_image_stack = tf.concat([proj_image_stack,
|
||||
curr_proj_image], axis=3)
|
||||
proj_error_stack = tf.concat([proj_error_stack,
|
||||
curr_proj_error], axis=3)
|
||||
if opt.explain_reg_weight > 0:
|
||||
exp_mask_stack = tf.concat([exp_mask_stack,
|
||||
tf.expand_dims(curr_exp[:, :, :, 1], -1)], axis=3)
|
||||
tgt_image_all.append(curr_tgt_image)
|
||||
src_image_stack_all.append(curr_src_image_stack)
|
||||
proj_image_stack_all.append(proj_image_stack)
|
||||
proj_error_stack_all.append(proj_error_stack)
|
||||
if opt.explain_reg_weight > 0:
|
||||
exp_mask_stack_all.append(exp_mask_stack)
|
||||
total_loss = pixel_loss + smooth_loss + exp_loss
|
||||
return total_loss
|
||||
return sfm_loss_fn
|
||||
|
||||
|
||||
def photometric_reconstruction_loss(tgt_img, ref_imgs, intrinsics,
|
||||
depth, explainability_mask, pose,
|
||||
rotation_mode='euler', padding_mode='zeros'):
|
||||
def one_scale(d, mask):
|
||||
assert(mask is None or d.size()
|
||||
[2:] == mask.size()[2:])
|
||||
assert(pose.size(1) == len(ref_imgs))
|
||||
|
||||
reconstruction_loss = 0
|
||||
b, _, h, w = d.size()
|
||||
downscale = tgt_img.size(2)/h
|
||||
|
||||
tgt_img_scaled = F.interpolate(tgt_img, (h, w), mode='area')
|
||||
ref_imgs_scaled = [F.interpolate(
|
||||
ref_img, (h, w), mode='area') for ref_img in ref_imgs]
|
||||
intrinsics_scaled = tf.concat(
|
||||
(intrinsics[:, 0:2]/downscale, intrinsics[:, 2:]), dim=1)
|
||||
|
||||
warped_imgs = []
|
||||
diff_maps = []
|
||||
|
||||
for i, ref_img in enumerate(ref_imgs_scaled):
|
||||
current_pose = pose[:, i]
|
||||
|
||||
ref_img_warped, valid_points = inverse_warp(ref_img, depth[:, 0], current_pose,
|
||||
intrinsics_scaled,
|
||||
rotation_mode, padding_mode)
|
||||
diff = (tgt_img_scaled - ref_img_warped) * \
|
||||
valid_points.unsqueeze(1).float()
|
||||
|
||||
if explainability_mask is not None:
|
||||
diff = diff * explainability_mask[:, i:i+1].expand_as(diff)
|
||||
|
||||
reconstruction_loss += diff.abs().mean()
|
||||
assert((reconstruction_loss == reconstruction_loss).item() == 1)
|
||||
|
||||
warped_imgs.append(ref_img_warped[0])
|
||||
diff_maps.append(diff[0])
|
||||
|
||||
return reconstruction_loss, warped_imgs, diff_maps
|
||||
|
||||
warped_results, diff_results = [], []
|
||||
if type(explainability_mask) not in [tuple, list]:
|
||||
explainability_mask = [explainability_mask]
|
||||
if type(depth) not in [list, tuple]:
|
||||
depth = [depth]
|
||||
|
||||
total_loss = 0
|
||||
for d, mask in zip(depth, explainability_mask):
|
||||
loss, warped, diff = one_scale(d, mask)
|
||||
total_loss += loss
|
||||
warped_results.append(warped)
|
||||
diff_results.append(diff)
|
||||
return total_loss, warped_results, diff_results
|
||||
354
unsupervised/third-party/utils.py
vendored
Normal file
354
unsupervised/third-party/utils.py
vendored
Normal file
@@ -0,0 +1,354 @@
|
||||
"""
|
||||
Utils to load and split image/video data.
|
||||
"""
|
||||
|
||||
from __future__ import division
|
||||
import math
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
def euler2mat(z, y, x):
|
||||
"""Converts euler angles to rotation matrix
|
||||
TODO: remove the dimension for 'N' (deprecated for converting all source
|
||||
poses altogether)
|
||||
Reference: https://github.com/pulkitag/pycaffe-utils/blob/master/rot_utils.py#L174
|
||||
Args:
|
||||
z: rotation angle along z axis (in radians) -- size = [B, N]
|
||||
y: rotation angle along y axis (in radians) -- size = [B, N]
|
||||
x: rotation angle along x axis (in radians) -- size = [B, N]
|
||||
Returns:
|
||||
Rotation matrix corresponding to the euler angles -- size = [B, N, 3, 3]
|
||||
"""
|
||||
B = tf.shape(z)[0]
|
||||
N = 1
|
||||
z = tf.clip_by_value(z, -math.pi, math.pi)
|
||||
y = tf.clip_by_value(y, -math.pi, math.pi)
|
||||
x = tf.clip_by_value(x, -math.pi, math.pi)
|
||||
|
||||
# Expand to B x N x 1 x 1
|
||||
z = tf.expand_dims(tf.expand_dims(z, -1), -1)
|
||||
y = tf.expand_dims(tf.expand_dims(y, -1), -1)
|
||||
x = tf.expand_dims(tf.expand_dims(x, -1), -1)
|
||||
|
||||
zeros = tf.zeros([B, N, 1, 1])
|
||||
ones = tf.ones([B, N, 1, 1])
|
||||
|
||||
cosz = tf.cos(z)
|
||||
sinz = tf.sin(z)
|
||||
rotz_1 = tf.concat([cosz, -sinz, zeros], axis=3)
|
||||
rotz_2 = tf.concat([sinz, cosz, zeros], axis=3)
|
||||
rotz_3 = tf.concat([zeros, zeros, ones], axis=3)
|
||||
zmat = tf.concat([rotz_1, rotz_2, rotz_3], axis=2)
|
||||
|
||||
cosy = tf.cos(y)
|
||||
siny = tf.sin(y)
|
||||
roty_1 = tf.concat([cosy, zeros, siny], axis=3)
|
||||
roty_2 = tf.concat([zeros, ones, zeros], axis=3)
|
||||
roty_3 = tf.concat([-siny, zeros, cosy], axis=3)
|
||||
ymat = tf.concat([roty_1, roty_2, roty_3], axis=2)
|
||||
|
||||
cosx = tf.cos(x)
|
||||
sinx = tf.sin(x)
|
||||
rotx_1 = tf.concat([ones, zeros, zeros], axis=3)
|
||||
rotx_2 = tf.concat([zeros, cosx, -sinx], axis=3)
|
||||
rotx_3 = tf.concat([zeros, sinx, cosx], axis=3)
|
||||
xmat = tf.concat([rotx_1, rotx_2, rotx_3], axis=2)
|
||||
|
||||
rotMat = tf.matmul(tf.matmul(xmat, ymat), zmat)
|
||||
return rotMat
|
||||
|
||||
|
||||
def pose_vec2mat(vec):
|
||||
"""Converts 6DoF parameters to transformation matrix
|
||||
Args:
|
||||
vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
|
||||
Returns:
|
||||
A transformation matrix -- [B, 4, 4]
|
||||
"""
|
||||
batch_size, _ = vec.get_shape().as_list()
|
||||
translation = tf.slice(vec, [0, 0], [-1, 3])
|
||||
translation = tf.expand_dims(translation, -1)
|
||||
rx = tf.slice(vec, [0, 3], [-1, 1])
|
||||
ry = tf.slice(vec, [0, 4], [-1, 1])
|
||||
rz = tf.slice(vec, [0, 5], [-1, 1])
|
||||
rot_mat = euler2mat(rz, ry, rx)
|
||||
rot_mat = tf.squeeze(rot_mat, axis=[1])
|
||||
filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
|
||||
filler = tf.tile(filler, [batch_size, 1, 1])
|
||||
transform_mat = tf.concat([rot_mat, translation], axis=2)
|
||||
transform_mat = tf.concat([transform_mat, filler], axis=1)
|
||||
return transform_mat
|
||||
|
||||
|
||||
def pixel2cam(depth, pixel_coords, intrinsics, is_homogeneous=True):
|
||||
"""Transforms coordinates in the pixel frame to the camera frame.
|
||||
|
||||
Args:
|
||||
depth: [batch, height, width]
|
||||
pixel_coords: homogeneous pixel coordinates [batch, 3, height, width]
|
||||
intrinsics: camera intrinsics [batch, 3, 3]
|
||||
is_homogeneous: return in homogeneous coordinates
|
||||
Returns:
|
||||
Coords in the camera frame [batch, 3 (4 if homogeneous), height, width]
|
||||
"""
|
||||
batch, height, width = depth.get_shape().as_list()
|
||||
depth = tf.reshape(depth, [batch, 1, -1])
|
||||
pixel_coords = tf.reshape(pixel_coords, [batch, 3, -1])
|
||||
cam_coords = tf.matmul(tf.matrix_inverse(intrinsics), pixel_coords) * depth
|
||||
if is_homogeneous:
|
||||
ones = tf.ones([batch, 1, height*width])
|
||||
cam_coords = tf.concat([cam_coords, ones], axis=1)
|
||||
cam_coords = tf.reshape(cam_coords, [batch, -1, height, width])
|
||||
return cam_coords
|
||||
|
||||
|
||||
def cam2pixel(cam_coords, proj):
|
||||
"""Transforms coordinates in a camera frame to the pixel frame.
|
||||
|
||||
Args:
|
||||
cam_coords: [batch, 4, height, width]
|
||||
proj: [batch, 4, 4]
|
||||
Returns:
|
||||
Pixel coordinates projected from the camera frame [batch, height, width, 2]
|
||||
"""
|
||||
batch, _, height, width = cam_coords.get_shape().as_list()
|
||||
cam_coords = tf.reshape(cam_coords, [batch, 4, -1])
|
||||
unnormalized_pixel_coords = tf.matmul(proj, cam_coords)
|
||||
x_u = tf.slice(unnormalized_pixel_coords, [0, 0, 0], [-1, 1, -1])
|
||||
y_u = tf.slice(unnormalized_pixel_coords, [0, 1, 0], [-1, 1, -1])
|
||||
z_u = tf.slice(unnormalized_pixel_coords, [0, 2, 0], [-1, 1, -1])
|
||||
x_n = x_u / (z_u + 1e-10)
|
||||
y_n = y_u / (z_u + 1e-10)
|
||||
pixel_coords = tf.concat([x_n, y_n], axis=1)
|
||||
pixel_coords = tf.reshape(pixel_coords, [batch, 2, height, width])
|
||||
return tf.transpose(pixel_coords, perm=[0, 2, 3, 1])
|
||||
|
||||
|
||||
def meshgrid(batch, height, width, is_homogeneous=True):
|
||||
"""Construct a 2D meshgrid.
|
||||
|
||||
Args:
|
||||
batch: batch size
|
||||
height: height of the grid
|
||||
width: width of the grid
|
||||
is_homogeneous: whether to return in homogeneous coordinates
|
||||
Returns:
|
||||
x,y grid coordinates [batch, 2 (3 if homogeneous), height, width]
|
||||
"""
|
||||
x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
|
||||
tf.transpose(tf.expand_dims(
|
||||
tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
|
||||
y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
|
||||
tf.ones(shape=tf.stack([1, width])))
|
||||
x_t = (x_t + 1.0) * 0.5 * tf.cast(width - 1, tf.float32)
|
||||
y_t = (y_t + 1.0) * 0.5 * tf.cast(height - 1, tf.float32)
|
||||
if is_homogeneous:
|
||||
ones = tf.ones_like(x_t)
|
||||
coords = tf.stack([x_t, y_t, ones], axis=0)
|
||||
else:
|
||||
coords = tf.stack([x_t, y_t], axis=0)
|
||||
coords = tf.tile(tf.expand_dims(coords, 0), [batch, 1, 1, 1])
|
||||
return coords
|
||||
|
||||
|
||||
def projective_inverse_warp(img, depth, pose, intrinsics):
|
||||
"""Inverse warp a source image to the target image plane based on projection.
|
||||
|
||||
Args:
|
||||
img: the source image [batch, height_s, width_s, 3]
|
||||
depth: depth map of the target image [batch, height_t, width_t]
|
||||
pose: target to source camera transformation matrix [batch, 6], in the
|
||||
order of tx, ty, tz, rx, ry, rz
|
||||
intrinsics: camera intrinsics [batch, 3, 3]
|
||||
Returns:
|
||||
Source image inverse warped to the target image plane [batch, height_t,
|
||||
width_t, 3]
|
||||
"""
|
||||
batch, height, width, _ = img.get_shape().as_list()
|
||||
# Convert pose vector to matrix
|
||||
pose = pose_vec2mat(pose)
|
||||
# Construct pixel grid coordinates
|
||||
pixel_coords = meshgrid(batch, height, width)
|
||||
# Convert pixel coordinates to the camera frame
|
||||
cam_coords = pixel2cam(depth, pixel_coords, intrinsics)
|
||||
# Construct a 4x4 intrinsic matrix (TODO: can it be 3x4?)
|
||||
filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
|
||||
filler = tf.tile(filler, [batch, 1, 1])
|
||||
intrinsics = tf.concat([intrinsics, tf.zeros([batch, 3, 1])], axis=2)
|
||||
intrinsics = tf.concat([intrinsics, filler], axis=1)
|
||||
# Get a 4x4 transformation matrix from 'target' camera frame to 'source'
|
||||
# pixel frame.
|
||||
proj_tgt_cam_to_src_pixel = tf.matmul(intrinsics, pose)
|
||||
src_pixel_coords = cam2pixel(cam_coords, proj_tgt_cam_to_src_pixel)
|
||||
output_img = bilinear_sampler(img, src_pixel_coords)
|
||||
return output_img
|
||||
|
||||
|
||||
def bilinear_sampler(imgs, coords):
|
||||
"""Construct a new image by bilinear sampling from the input image.
|
||||
|
||||
Points falling outside the source image boundary have value 0.
|
||||
|
||||
Args:
|
||||
imgs: source image to be sampled from [batch, height_s, width_s, channels]
|
||||
coords: coordinates of source pixels to sample from [batch, height_t,
|
||||
width_t, 2]. height_t/width_t correspond to the dimensions of the output
|
||||
image (don't need to be the same as height_s/width_s). The two channels
|
||||
correspond to x and y coordinates respectively.
|
||||
Returns:
|
||||
A new sampled image [batch, height_t, width_t, channels]
|
||||
"""
|
||||
def _repeat(x, n_repeats):
|
||||
rep = tf.transpose(
|
||||
tf.expand_dims(tf.ones(shape=tf.stack([
|
||||
n_repeats,
|
||||
])), 1), [1, 0])
|
||||
rep = tf.cast(rep, 'float32')
|
||||
x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
|
||||
return tf.reshape(x, [-1])
|
||||
|
||||
with tf.name_scope('image_sampling'):
|
||||
coords_x, coords_y = tf.split(coords, [1, 1], axis=3)
|
||||
inp_size = imgs.get_shape()
|
||||
coord_size = coords.get_shape()
|
||||
out_size = coords.get_shape().as_list()
|
||||
out_size[3] = imgs.get_shape().as_list()[3]
|
||||
|
||||
coords_x = tf.cast(coords_x, 'float32')
|
||||
coords_y = tf.cast(coords_y, 'float32')
|
||||
|
||||
x0 = tf.floor(coords_x)
|
||||
x1 = x0 + 1
|
||||
y0 = tf.floor(coords_y)
|
||||
y1 = y0 + 1
|
||||
|
||||
y_max = tf.cast(tf.shape(imgs)[1] - 1, 'float32')
|
||||
x_max = tf.cast(tf.shape(imgs)[2] - 1, 'float32')
|
||||
zero = tf.zeros([1], dtype='float32')
|
||||
|
||||
x0_safe = tf.clip_by_value(x0, zero, x_max)
|
||||
y0_safe = tf.clip_by_value(y0, zero, y_max)
|
||||
x1_safe = tf.clip_by_value(x1, zero, x_max)
|
||||
y1_safe = tf.clip_by_value(y1, zero, y_max)
|
||||
|
||||
# bilinear interp weights, with points outside the grid having weight 0
|
||||
# wt_x0 = (x1 - coords_x) * tf.cast(tf.equal(x0, x0_safe), 'float32')
|
||||
# wt_x1 = (coords_x - x0) * tf.cast(tf.equal(x1, x1_safe), 'float32')
|
||||
# wt_y0 = (y1 - coords_y) * tf.cast(tf.equal(y0, y0_safe), 'float32')
|
||||
# wt_y1 = (coords_y - y0) * tf.cast(tf.equal(y1, y1_safe), 'float32')
|
||||
|
||||
wt_x0 = x1_safe - coords_x
|
||||
wt_x1 = coords_x - x0_safe
|
||||
wt_y0 = y1_safe - coords_y
|
||||
wt_y1 = coords_y - y0_safe
|
||||
|
||||
# indices in the flat image to sample from
|
||||
dim2 = tf.cast(inp_size[2], 'float32')
|
||||
dim1 = tf.cast(inp_size[2] * inp_size[1], 'float32')
|
||||
base = tf.reshape(
|
||||
_repeat(
|
||||
tf.cast(tf.range(coord_size[0]), 'float32') * dim1,
|
||||
coord_size[1] * coord_size[2]),
|
||||
[out_size[0], out_size[1], out_size[2], 1])
|
||||
|
||||
base_y0 = base + y0_safe * dim2
|
||||
base_y1 = base + y1_safe * dim2
|
||||
idx00 = tf.reshape(x0_safe + base_y0, [-1])
|
||||
idx01 = x0_safe + base_y1
|
||||
idx10 = x1_safe + base_y0
|
||||
idx11 = x1_safe + base_y1
|
||||
|
||||
# sample from imgs
|
||||
imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]]))
|
||||
imgs_flat = tf.cast(imgs_flat, 'float32')
|
||||
im00 = tf.reshape(
|
||||
tf.gather(imgs_flat, tf.cast(idx00, 'int32')), out_size)
|
||||
im01 = tf.reshape(
|
||||
tf.gather(imgs_flat, tf.cast(idx01, 'int32')), out_size)
|
||||
im10 = tf.reshape(
|
||||
tf.gather(imgs_flat, tf.cast(idx10, 'int32')), out_size)
|
||||
im11 = tf.reshape(
|
||||
tf.gather(imgs_flat, tf.cast(idx11, 'int32')), out_size)
|
||||
|
||||
w00 = wt_x0 * wt_y0
|
||||
w01 = wt_x0 * wt_y1
|
||||
w10 = wt_x1 * wt_y0
|
||||
w11 = wt_x1 * wt_y1
|
||||
|
||||
output = tf.add_n([
|
||||
w00 * im00, w01 * im01,
|
||||
w10 * im10, w11 * im11
|
||||
])
|
||||
return output
|
||||
|
||||
# Spatial transformer network bilinear sampler, taken from https://github.com/kevinzakka/spatial-transformer-network/blob/master/stn/transformer.py
|
||||
|
||||
|
||||
def stn_bilinear_sampler(img, x, y):
|
||||
"""
|
||||
Performs bilinear sampling of the input images according to the
|
||||
normalized coordinates provided by the sampling grid. Note that
|
||||
the sampling is done identically for each channel of the input.
|
||||
To test if the function works properly, output image should be
|
||||
identical to input image when theta is initialized to identity
|
||||
transform.
|
||||
Input
|
||||
-----
|
||||
- img: batch of images in (B, H, W, C) layout.
|
||||
- grid: x, y which is the output of affine_grid_generator.
|
||||
Returns
|
||||
-------
|
||||
- out: interpolated images according to grids. Same size as grid.
|
||||
"""
|
||||
H = tf.shape(img)[1]
|
||||
W = tf.shape(img)[2]
|
||||
max_y = tf.cast(H - 1, 'int32')
|
||||
max_x = tf.cast(W - 1, 'int32')
|
||||
zero = tf.zeros([], dtype='int32')
|
||||
|
||||
# rescale x and y to [0, W-1/H-1]
|
||||
x = tf.cast(x, 'float32')
|
||||
y = tf.cast(y, 'float32')
|
||||
x = 0.5 * ((x + 1.0) * tf.cast(max_x-1, 'float32'))
|
||||
y = 0.5 * ((y + 1.0) * tf.cast(max_y-1, 'float32'))
|
||||
|
||||
# grab 4 nearest corner points for each (x_i, y_i)
|
||||
x0 = tf.cast(tf.floor(x), 'int32')
|
||||
x1 = x0 + 1
|
||||
y0 = tf.cast(tf.floor(y), 'int32')
|
||||
y1 = y0 + 1
|
||||
|
||||
# clip to range [0, H-1/W-1] to not violate img boundaries
|
||||
x0 = tf.clip_by_value(x0, zero, max_x)
|
||||
x1 = tf.clip_by_value(x1, zero, max_x)
|
||||
y0 = tf.clip_by_value(y0, zero, max_y)
|
||||
y1 = tf.clip_by_value(y1, zero, max_y)
|
||||
|
||||
# get pixel value at corner coords
|
||||
Ia = get_pixel_value(img, x0, y0)
|
||||
Ib = get_pixel_value(img, x0, y1)
|
||||
Ic = get_pixel_value(img, x1, y0)
|
||||
Id = get_pixel_value(img, x1, y1)
|
||||
|
||||
# recast as float for delta calculation
|
||||
x0 = tf.cast(x0, 'float32')
|
||||
x1 = tf.cast(x1, 'float32')
|
||||
y0 = tf.cast(y0, 'float32')
|
||||
y1 = tf.cast(y1, 'float32')
|
||||
|
||||
# calculate deltas
|
||||
wa = (x1-x) * (y1-y)
|
||||
wb = (x1-x) * (y-y0)
|
||||
wc = (x-x0) * (y1-y)
|
||||
wd = (x-x0) * (y-y0)
|
||||
|
||||
# add dimension for addition
|
||||
wa = tf.expand_dims(wa, axis=3)
|
||||
wb = tf.expand_dims(wb, axis=3)
|
||||
wc = tf.expand_dims(wc, axis=3)
|
||||
wd = tf.expand_dims(wd, axis=3)
|
||||
|
||||
# compute output
|
||||
out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
|
||||
|
||||
return out
|
||||
20
unsupervised/train.py
Normal file
20
unsupervised/train.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""
|
||||
Trainer to learn depth information on unlabeled data (raw images/videos)
|
||||
|
||||
Allows pluggable depth networks for differing performance (including fast-depth)
|
||||
"""
|
||||
|
||||
import tensorflow.keras as keras
|
||||
|
||||
|
||||
class SFMLearner(keras.Model):
|
||||
|
||||
def __init__(depth_model, pose_model):
|
||||
pass
|
||||
|
||||
def train_step(self, data):
|
||||
pass
|
||||
|
||||
|
||||
def make_sfm_learner_pose_net(input_shape=(224, 224, 3)):
|
||||
pass
|
||||
19
unsupervised/warp.py
Normal file
19
unsupervised/warp.py
Normal file
@@ -0,0 +1,19 @@
|
||||
def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics):
|
||||
"""
|
||||
Calculate the reprojected image from the source to the target, based on the given depth, pose and intrinsics
|
||||
|
||||
SFM Learner inverse warp step
|
||||
ps ~ K.T(t->s).Dt(pt).K^-1.pt
|
||||
|
||||
Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto
|
||||
the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample
|
||||
the pixels in the source image (ps) to reconstruct the target image.
|
||||
|
||||
:param target_img: Tensor (batch, height, width, 3)
|
||||
:param source_img: Tensor, same shape as target_img
|
||||
:param depth: Tensor, (batch, height, width, 1)
|
||||
:param pose: (batch, 3, 3)
|
||||
:param intrinsics: (batch, 3, 3)
|
||||
:return: The source image reprojected to the target
|
||||
"""
|
||||
pass
|
||||
21
util.py
Normal file
21
util.py
Normal file
@@ -0,0 +1,21 @@
|
||||
import tensorflow as tf
|
||||
import tensorflow.keras as keras
|
||||
|
||||
|
||||
def crop_and_resize(x, out_shape=(224, 224)):
|
||||
shape = tf.shape(x['depth'])
|
||||
img_shape = tf.shape(x['image'])
|
||||
# Ensure we get a square for when we resize it later.
|
||||
# For horizontal images this is basically just cropping the sides off
|
||||
center_shape = tf.minimum(shape[1], tf.minimum(shape[2], tf.minimum(img_shape[1], img_shape[2])))
|
||||
|
||||
def layer():
|
||||
return keras.Sequential([
|
||||
keras.layers.experimental.preprocessing.CenterCrop(
|
||||
center_shape, center_shape),
|
||||
keras.layers.experimental.preprocessing.Resizing(
|
||||
out_shape[0], out_shape[1], interpolation='nearest')
|
||||
])
|
||||
|
||||
# Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow
|
||||
return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1]))
|
||||
Reference in New Issue
Block a user