Start adding optimise step to unsupervised train, add demo to main

2022-05-03 19:51:34 +09:30
parent 909c38cf56
commit 950dd4582e
2 changed files with 31 additions and 15 deletions
--- a/main.py
+++ b/main.py
@@ -1,4 +1,6 @@
 import fast_depth_functional as fd
 from unsupervised.models import pose_net, wrap_mobilenet_nnconv5_for_utrain
 from unsupervised.train import UnsupervisedPoseDepthLearner
 if __name__ == '__main__':
    fd.fix_windows_gpu()
@@ -9,3 +11,11 @@ if __name__ == '__main__':
    # Save in Tensorflow SavedModel format
    # tf.saved_model.save(model, 'fast_depth_nyu_v2_224_224_3_e1_saved_model')
    # Unsupervised
    depth_model = fd.mobilenet_nnconv5()
    pose_model = pose_net()
    model = UnsupervisedPoseDepthLearner(wrap_mobilenet_nnconv5_for_utrain(depth_model), pose_model)
    model.compile(optimizer='adam')
    # TODO: Incorporate data generator
    # model.fit()
--- a/unsupervised/train.py
+++ b/unsupervised/train.py
@@ -6,8 +6,7 @@ Allows pluggable depth networks for differing performance (including fast-depth)
 import tensorflow as tf
 import tensorflow.python.keras as keras
-from unsupervised import warp
+from unsupervised import warp, loss
 import unsupervised.loss as loss
 class UnsupervisedPoseDepthLearner(keras.Model):
@@ -31,16 +30,26 @@ class UnsupervisedPoseDepthLearner(keras.Model):
        :param data: Format: {frames: Mat[3], intrinsics: Tensor}
        """
-        # Pass through depth for target image
+        with tf.GradientTape as tape:
-        # TODO: Convert frame to tensor (or do this in the dataloader)
+            # Pass through depth for target image
-        # TODO: Ensure the depth output includes enough outputs for each scale
+            # TODO: Convert frame to tensor (or do this in the dataloader)
-        depth = self.depth_model(data.frames[1])
+            # TODO: Ensure the depth output includes enough outputs for each scale
            depth = self.depth_model(data.frames[1])
-        # Pass through depth -> pose for both source images
+            # Pass through depth -> pose for both source images
-        # TODO: Concat these poses using tf.concat
+            # TODO: Concat these poses using tf.concat
-        pose1 = self.pose_model(data.frames[1], data.frames[0])
+            pose1 = self.pose_model(data.frames[1], data.frames[0])
-        pose2 = self.pose_model(data.frames[1], data.frames[2])
+            pose2 = self.pose_model(data.frames[1], data.frames[2])
            loss = self.calculate_loss(depth, pose1, pose2, data)
        # Apply optimise step on total loss
        # TODO: Do these need to be separate for depth/pose model?
        grads = tape.gradient(loss, zip(self.depth_model.trainable_weights, self.pose_model.trainable_weights))
        self.optimizer.apply_gradients(
            zip(grads, self.depth_model.trainable_weights, self.pose_model.trainable_weights))
    def calculate_loss(self, depth, pose1, pose2, data):
        shape = depth[0].shape
        # TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
@@ -57,7 +66,7 @@ class UnsupervisedPoseDepthLearner(keras.Model):
            # Only take the min loss between the two warped images (from monodepth2)
            # TODO: Need to bilinear resize the depth at each scale up to the size of image
            warp1 = warp.projective_inverse_warp(data.frames[0], depth[scale], pose1, data.intrinsics, coords)
-            warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose1, data.intrinsics, coords)
+            warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose2, data.intrinsics, coords)
            # Per pixel loss is just the difference in pixel intensities?
            # Something like l1 plus ssim
@@ -89,7 +98,4 @@ class UnsupervisedPoseDepthLearner(keras.Model):
        # Collect losses, average them out (divide by number of scales)
        total_loss /= self.num_scales
-
+        return total_loss
        # Apply optimise step on total loss
        pass