Update for python 2.10, add general training algorithm step
This commit is contained in:
@@ -4,8 +4,9 @@ Trainer to learn depth information on unlabeled data (raw images/videos)
|
|||||||
Allows pluggable depth networks for differing performance (including fast-depth)
|
Allows pluggable depth networks for differing performance (including fast-depth)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import tensorflow.keras as keras
|
import tensorflow as tf
|
||||||
import warp
|
import tensorflow.python.keras as keras
|
||||||
|
from unsupervised import warp
|
||||||
import unsupervised.loss as loss
|
import unsupervised.loss as loss
|
||||||
|
|
||||||
|
|
||||||
@@ -21,8 +22,9 @@ class UnsupervisedPoseDepthLearner(keras.Model):
|
|||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.depth_model = depth_model
|
self.depth_model = depth_model
|
||||||
self.pose_model = pose_model
|
self.pose_model = pose_model
|
||||||
|
# TODO: I think num_scales should be something defined on the depth model itself
|
||||||
self.num_scales = num_scales
|
self.num_scales = num_scales
|
||||||
|
self.smoothness = 1e-3
|
||||||
|
|
||||||
def train_step(self, data):
|
def train_step(self, data):
|
||||||
"""
|
"""
|
||||||
@@ -44,32 +46,45 @@ class UnsupervisedPoseDepthLearner(keras.Model):
|
|||||||
# TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
|
# TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
|
||||||
# Ideally the size/batch size will still be calculated automatically
|
# Ideally the size/batch size will still be calculated automatically
|
||||||
coords = warp.image_coordinate(shape[0], shape[1], shape[2])
|
coords = warp.image_coordinate(shape[0], shape[1], shape[2])
|
||||||
|
total_loss = 0
|
||||||
|
|
||||||
scale_losses = []
|
scale_losses = []
|
||||||
# For each scale, do the projective inverse warp step and calculate losses
|
# For each scale, do the projective inverse warp step and calculate losses
|
||||||
for i in range(self.num_scales):
|
for scale in range(self.num_scales):
|
||||||
# TODO: Could simplify this by stacking the source images (see sfmlearner)
|
# TODO: Could simplify this by stacking the source images (see sfmlearner)
|
||||||
# It isn't too much of an issue right now since we're only using 2 images like in monodepth
|
# It isn't too much of an issue right now since we're only using 2 images (left/right)
|
||||||
# For each depth output (scale), do the projective inverse warp on each input image and calculate the losses
|
# For each depth output (scale), do the projective inverse warp on each input image and calculate the losses
|
||||||
# Only take the min loss between the two warped images (from monodepth2)
|
# Only take the min loss between the two warped images (from monodepth2)
|
||||||
warp1 = warp.projective_inverse_warp(data.frames[0], depth[i], pose1, data.intrinsics, coords)
|
warp1 = warp.projective_inverse_warp(data.frames[0], depth[scale], pose1, data.intrinsics, coords)
|
||||||
warp2 = warp.projective_inverse_warp(data.frames[2], depth[i], pose1, data.intrinsics, coords)
|
warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose1, data.intrinsics, coords)
|
||||||
|
|
||||||
# Per pixel loss is just the difference in pixel intensities?
|
# Per pixel loss is just the difference in pixel intensities?
|
||||||
# Something like l1 plus ssim
|
# Something like l1 plus ssim
|
||||||
loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], warp1)
|
warp_loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], warp1)
|
||||||
loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], warp2)
|
warp_loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], warp2)
|
||||||
|
|
||||||
# Take the min from these? Or min after auto-masking? I think after auto masking
|
# Take loss between target (data.frames[1]) and source images (pre-warp)
|
||||||
|
source_loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], data.frames[0])
|
||||||
|
source_loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], data.frames[2])
|
||||||
|
|
||||||
|
# Take the min (per pixel) of the losses of warped/unwarped images (so min across pixels of 4 images)
|
||||||
# Also do the auto masking from monodepth2 (compare pixel difference between warped with difference
|
# TODO: Verify the axes are correct
|
||||||
# in source, if source is more different then ignore the pixel).
|
reprojection_loss = tf.reduce_mean(
|
||||||
pass
|
tf.reduce_min(tf.concat([warp_loss1, warp_loss2, source_loss1, source_loss2], axis=3), axis=3))
|
||||||
|
|
||||||
# Collect losses, average them out
|
|
||||||
|
|
||||||
# Calculate smooth losses
|
# Calculate smooth losses
|
||||||
|
smooth_loss = loss.smooth_loss(depth[scale], data.frames[1])
|
||||||
|
|
||||||
|
# TODO: Monodepth also divides the smooth loss by 2 ** scale, why?
|
||||||
|
smoothed_reprojection_loss = self.smoothness * smooth_loss / (2 ** scale)
|
||||||
|
|
||||||
|
# Add to total loss (with smooth loss + smooth loss weighting applied to pixel losses)
|
||||||
|
total_loss += reprojection_loss + smooth_loss
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Collect losses, average them out (divide by number of scales)
|
||||||
|
total_loss /= self.num_scales
|
||||||
|
|
||||||
|
# Apply optimise step on total loss
|
||||||
|
|
||||||
pass
|
pass
|
||||||
Reference in New Issue
Block a user