From 950dd4582e33fc884243b50ffe3fc7eb62a7d72a Mon Sep 17 00:00:00 2001
From: Piv <18462828+Piv200@users.noreply.github.com>
Date: Tue, 3 May 2022 19:51:34 +0930
Subject: [PATCH] Start adding optimise step to unsupervised train, add demo to
 main

---
 main.py               | 10 ++++++++++
 unsupervised/train.py | 36 +++++++++++++++++++++---------------
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/main.py b/main.py
index d72959e..73342ad 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,6 @@
 import fast_depth_functional as fd
+from unsupervised.models import pose_net, wrap_mobilenet_nnconv5_for_utrain
+from unsupervised.train import UnsupervisedPoseDepthLearner
 
 if __name__ == '__main__':
     fd.fix_windows_gpu()
@@ -9,3 +11,11 @@ if __name__ == '__main__':
 
     # Save in Tensorflow SavedModel format
     # tf.saved_model.save(model, 'fast_depth_nyu_v2_224_224_3_e1_saved_model')
+
+    # Unsupervised
+    depth_model = fd.mobilenet_nnconv5()
+    pose_model = pose_net()
+    model = UnsupervisedPoseDepthLearner(wrap_mobilenet_nnconv5_for_utrain(depth_model), pose_model)
+    model.compile(optimizer='adam')
+    # TODO: Incorporate data generator
+    # model.fit()
diff --git a/unsupervised/train.py b/unsupervised/train.py
index 0100814..e0f661c 100644
--- a/unsupervised/train.py
+++ b/unsupervised/train.py
@@ -6,8 +6,7 @@ Allows pluggable depth networks for differing performance (including fast-depth)
 
 import tensorflow as tf
 import tensorflow.python.keras as keras
-from unsupervised import warp
-import unsupervised.loss as loss
+from unsupervised import warp, loss
 
 
 class UnsupervisedPoseDepthLearner(keras.Model):
@@ -31,16 +30,26 @@ class UnsupervisedPoseDepthLearner(keras.Model):
 
         :param data: Format: {frames: Mat[3], intrinsics: Tensor}
         """
-        # Pass through depth for target image
-        # TODO: Convert frame to tensor (or do this in the dataloader)
-        # TODO: Ensure the depth output includes enough outputs for each scale
-        depth = self.depth_model(data.frames[1])
+        with tf.GradientTape as tape:
+            # Pass through depth for target image
+            # TODO: Convert frame to tensor (or do this in the dataloader)
+            # TODO: Ensure the depth output includes enough outputs for each scale
+            depth = self.depth_model(data.frames[1])
 
-        # Pass through depth -> pose for both source images
-        # TODO: Concat these poses using tf.concat
-        pose1 = self.pose_model(data.frames[1], data.frames[0])
-        pose2 = self.pose_model(data.frames[1], data.frames[2])
+            # Pass through depth -> pose for both source images
+            # TODO: Concat these poses using tf.concat
+            pose1 = self.pose_model(data.frames[1], data.frames[0])
+            pose2 = self.pose_model(data.frames[1], data.frames[2])
 
+            loss = self.calculate_loss(depth, pose1, pose2, data)
+
+        # Apply optimise step on total loss
+        # TODO: Do these need to be separate for depth/pose model?
+        grads = tape.gradient(loss, zip(self.depth_model.trainable_weights, self.pose_model.trainable_weights))
+        self.optimizer.apply_gradients(
+            zip(grads, self.depth_model.trainable_weights, self.pose_model.trainable_weights))
+
+    def calculate_loss(self, depth, pose1, pose2, data):
         shape = depth[0].shape
 
         # TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
@@ -57,7 +66,7 @@ class UnsupervisedPoseDepthLearner(keras.Model):
             # Only take the min loss between the two warped images (from monodepth2)
             # TODO: Need to bilinear resize the depth at each scale up to the size of image
             warp1 = warp.projective_inverse_warp(data.frames[0], depth[scale], pose1, data.intrinsics, coords)
-            warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose1, data.intrinsics, coords)
+            warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose2, data.intrinsics, coords)
 
             # Per pixel loss is just the difference in pixel intensities?
             # Something like l1 plus ssim
@@ -89,7 +98,4 @@ class UnsupervisedPoseDepthLearner(keras.Model):
 
         # Collect losses, average them out (divide by number of scales)
         total_loss /= self.num_scales
-
-        # Apply optimise step on total loss
-
-        pass
+        return total_loss