Start adding optimise step to unsupervised train, add demo to main

2022-05-03 19:51:34 +09:30
parent 909c38cf56
commit 950dd4582e
2 changed files with 31 additions and 15 deletions
--- a/main.py
+++ b/main.py
@@ -1,4 +1,6 @@
 import fast_depth_functional as fd
+from unsupervised.models import pose_net, wrap_mobilenet_nnconv5_for_utrain
+from unsupervised.train import UnsupervisedPoseDepthLearner

 if __name__ == '__main__':
    fd.fix_windows_gpu()
@@ -9,3 +11,11 @@ if __name__ == '__main__':

    # Save in Tensorflow SavedModel format
    # tf.saved_model.save(model, 'fast_depth_nyu_v2_224_224_3_e1_saved_model')
+
+    # Unsupervised
+    depth_model = fd.mobilenet_nnconv5()
+    pose_model = pose_net()
+    model = UnsupervisedPoseDepthLearner(wrap_mobilenet_nnconv5_for_utrain(depth_model), pose_model)
+    model.compile(optimizer='adam')
+    # TODO: Incorporate data generator
+    # model.fit()
--- a/unsupervised/train.py
+++ b/unsupervised/train.py
@@ -6,8 +6,7 @@ Allows pluggable depth networks for differing performance (including fast-depth)

 import tensorflow as tf
 import tensorflow.python.keras as keras
-from unsupervised import warp
-import unsupervised.loss as loss
+from unsupervised import warp, loss


 class UnsupervisedPoseDepthLearner(keras.Model):
@@ -31,16 +30,26 @@ class UnsupervisedPoseDepthLearner(keras.Model):

        :param data: Format: {frames: Mat[3], intrinsics: Tensor}
        """
-        # Pass through depth for target image
-        # TODO: Convert frame to tensor (or do this in the dataloader)
-        # TODO: Ensure the depth output includes enough outputs for each scale
-        depth = self.depth_model(data.frames[1])
+        with tf.GradientTape as tape:
+            # Pass through depth for target image
+            # TODO: Convert frame to tensor (or do this in the dataloader)
+            # TODO: Ensure the depth output includes enough outputs for each scale
+            depth = self.depth_model(data.frames[1])

-        # Pass through depth -> pose for both source images
-        # TODO: Concat these poses using tf.concat
-        pose1 = self.pose_model(data.frames[1], data.frames[0])
-        pose2 = self.pose_model(data.frames[1], data.frames[2])
+            # Pass through depth -> pose for both source images
+            # TODO: Concat these poses using tf.concat
+            pose1 = self.pose_model(data.frames[1], data.frames[0])
+            pose2 = self.pose_model(data.frames[1], data.frames[2])

+            loss = self.calculate_loss(depth, pose1, pose2, data)
+
+        # Apply optimise step on total loss
+        # TODO: Do these need to be separate for depth/pose model?
+        grads = tape.gradient(loss, zip(self.depth_model.trainable_weights, self.pose_model.trainable_weights))
+        self.optimizer.apply_gradients(
+            zip(grads, self.depth_model.trainable_weights, self.pose_model.trainable_weights))
+
+    def calculate_loss(self, depth, pose1, pose2, data):
        shape = depth[0].shape

        # TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
@@ -57,7 +66,7 @@ class UnsupervisedPoseDepthLearner(keras.Model):
            # Only take the min loss between the two warped images (from monodepth2)
            # TODO: Need to bilinear resize the depth at each scale up to the size of image
            warp1 = warp.projective_inverse_warp(data.frames[0], depth[scale], pose1, data.intrinsics, coords)
-            warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose1, data.intrinsics, coords)
+            warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose2, data.intrinsics, coords)

            # Per pixel loss is just the difference in pixel intensities?
            # Something like l1 plus ssim
@@ -89,7 +98,4 @@ class UnsupervisedPoseDepthLearner(keras.Model):

        # Collect losses, average them out (divide by number of scales)
        total_loss /= self.num_scales
-
-        # Apply optimise step on total loss
-
-        pass
+        return total_loss