Start adding unsupervised train loop
This commit is contained in:
@@ -9,8 +9,8 @@ def wrap_mobilenet_nnconv5_for_utrain(model):
|
|||||||
|
|
||||||
This just exposes the lower disparity layers as outputs, so they can be used to train at different scales/image
|
This just exposes the lower disparity layers as outputs, so they can be used to train at different scales/image
|
||||||
resolutions.
|
resolutions.
|
||||||
:param model:
|
:param model: Fast Depth model to wrap
|
||||||
:return:
|
:return: Keras model that takes same input as model and outputs the model output plus 3 disparity layers
|
||||||
"""
|
"""
|
||||||
input = model.input
|
input = model.input
|
||||||
disp_1 = model.get_layer('conv_pw_%d_relu' % 15).output
|
disp_1 = model.get_layer('conv_pw_%d_relu' % 15).output
|
||||||
|
|||||||
@@ -5,16 +5,71 @@ Allows pluggable depth networks for differing performance (including fast-depth)
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import tensorflow.keras as keras
|
import tensorflow.keras as keras
|
||||||
|
import warp
|
||||||
|
import unsupervised.loss as loss
|
||||||
|
|
||||||
|
|
||||||
class SFMLearner(keras.Model):
|
class UnsupervisedPoseDepthLearner(keras.Model):
|
||||||
|
"""
|
||||||
|
Keras model to learn simultaneous depth + pose from image/video sequences.
|
||||||
|
|
||||||
|
To train this, the datasource should yield 3 frames and camera intrinsics.
|
||||||
|
Optionally velocity + timestamp per frame to train to real scale
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, depth_model, pose_model, num_scales=3, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.depth_model = depth_model
|
||||||
|
self.pose_model = pose_model
|
||||||
|
self.num_scales = num_scales
|
||||||
|
|
||||||
def __init__(depth_model, pose_model):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def train_step(self, data):
|
def train_step(self, data):
|
||||||
|
"""
|
||||||
|
|
||||||
|
:param data: Format: {frames: Mat[3], intrinsics: Tensor}
|
||||||
|
"""
|
||||||
|
# Pass through depth for target image
|
||||||
|
# TODO: Convert frame to tensor (or do this in the dataloader)
|
||||||
|
# TODO: Ensure the depth output includes enough outputs for each scale
|
||||||
|
depth = self.depth_model(data.frames[1])
|
||||||
|
|
||||||
|
# Pass through depth -> pose for both source images
|
||||||
|
# TODO: Concat these poses using tf.concat
|
||||||
|
pose1 = self.pose_model(data.frames[1], data.frames[0])
|
||||||
|
pose2 = self.pose_model(data.frames[1], data.frames[2])
|
||||||
|
|
||||||
|
shape = depth[0].shape
|
||||||
|
|
||||||
|
# TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
|
||||||
|
# Ideally the size/batch size will still be calculated automatically
|
||||||
|
coords = warp.image_coordinate(shape[0], shape[1], shape[2])
|
||||||
|
|
||||||
|
scale_losses = []
|
||||||
|
# For each scale, do the projective inverse warp step and calculate losses
|
||||||
|
for i in range(self.num_scales):
|
||||||
|
# TODO: Could simplify this by stacking the source images (see sfmlearner)
|
||||||
|
# It isn't too much of an issue right now since we're only using 2 images like in monodepth
|
||||||
|
# For each depth output (scale), do the projective inverse warp on each input image and calculate the losses
|
||||||
|
# Only take the min loss between the two warped images (from monodepth2)
|
||||||
|
warp1 = warp.projective_inverse_warp(data.frames[0], depth[i], pose1, data.intrinsics, coords)
|
||||||
|
warp2 = warp.projective_inverse_warp(data.frames[2], depth[i], pose1, data.intrinsics, coords)
|
||||||
|
|
||||||
|
# Per pixel loss is just the difference in pixel intensities?
|
||||||
|
# Something like l1 plus ssim
|
||||||
|
loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], warp1)
|
||||||
|
loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], warp2)
|
||||||
|
|
||||||
|
# Take the min from these? Or min after auto-masking? I think after auto masking
|
||||||
|
|
||||||
|
|
||||||
|
# Also do the auto masking from monodepth2 (compare pixel difference between warped with difference
|
||||||
|
# in source, if source is more different then ignore the pixel).
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Collect losses, average them out
|
||||||
|
|
||||||
|
# Calculate smooth losses
|
||||||
|
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def make_sfm_learner_pose_net(input_shape=(224, 224, 3)):
|
|
||||||
pass
|
|
||||||
|
|||||||
Reference in New Issue
Block a user