Start adding optimise step to unsupervised train, add demo to main

Fix total loss calculation, add some more todos
Update for python 2.10, add general training algorithm step
2022-05-03 19:51:34 +09:30 · 2022-05-03 19:05:29 +09:30 · 2022-05-03 16:56:15 +09:30 · 2021-11-20 13:37:26 +10:30 · 2021-08-29 19:26:15 +09:30 · 2021-08-29 18:06:37 +09:30
7 changed files with 479 additions and 25 deletions
--- a/main.py
+++ b/main.py
@@ -1,4 +1,6 @@
 import fast_depth_functional as fd
+from unsupervised.models import pose_net, wrap_mobilenet_nnconv5_for_utrain
+from unsupervised.train import UnsupervisedPoseDepthLearner

 if __name__ == '__main__':
    fd.fix_windows_gpu()
@@ -9,3 +11,11 @@ if __name__ == '__main__':

    # Save in Tensorflow SavedModel format
    # tf.saved_model.save(model, 'fast_depth_nyu_v2_224_224_3_e1_saved_model')
+
+    # Unsupervised
+    depth_model = fd.mobilenet_nnconv5()
+    pose_model = pose_net()
+    model = UnsupervisedPoseDepthLearner(wrap_mobilenet_nnconv5_for_utrain(depth_model), pose_model)
+    model.compile(optimizer='adam')
+    # TODO: Incorporate data generator
+    # model.fit()
--- a/unsupervised/load.py
+++ b/unsupervised/load.py
@@ -0,0 +1,50 @@
+import os
+
+import cv2
+
+
+def video_generator(video_path_or_folder, intrinsics, allowed_extensions=('mp4', 'mkv', 'mov')):
+    """
+    Create a generator for unsupervised training on depth sequences from a video file or folder of video files
+    :param video_path_or_folder: Video file or folder with list of video files to iterate through
+    :param intrinsics: Intrinsics for the videos TODO: Intrinsics per video
+    :param allowed_extensions: Allowed video extensions, to not accidentally pick files that aren't videos
+    :return: generator that yields dict of {frames: [frame1, frame2, frame3], intrinsics: [fx, fy, tx, ty]}
+    """
+    if os.path.isfile(video_path_or_folder):
+        # TODO: How to re-yield? Is this enough, since I'm just returning the actual generator?
+        #  Or do I need to iterate like below?
+        return _single_video_generator(video_path_or_folder)
+    else:
+        for root, dirs, files in os.walk(video_path_or_folder):
+            for file in files:
+                if os.path.splitext(file)[1] in allowed_extensions:
+                    for frames in _single_video_generator(file):
+                        yield frames
+
+
+def _single_video_generator(video_file, intrinsics):
+    # Single video file
+    video = cv2.VideoCapture(video_file)
+
+    try:
+        # Buffer to store 3 frames, yield when this fills up
+        current_frames = []
+        while video.grab():
+            current_frames.append(video.retrieve())
+            if len(current_frames) == 3:
+                temp_frames = current_frames
+                current_frames = []
+                # TODO: Consider converting frames to tensor
+                yield {'frames': temp_frames, 'intrinsics': intrinsics}
+    finally:
+        video.release()
+
+
+def image_generator(root_folder):
+    """
+    Create an image generator for unsupervised training
+    :param root_folder:
+    :return:
+    """
+    pass
--- a/unsupervised/models.py
+++ b/unsupervised/models.py
@@ -9,8 +9,8 @@ def wrap_mobilenet_nnconv5_for_utrain(model):

    This just exposes the lower disparity layers as outputs, so they can be used to train at different scales/image
    resolutions.
-    :param model:
-    :return:
+    :param model: Fast Depth model to wrap
+    :return: Keras model that takes same input as model and outputs the model output plus 3 disparity layers
    """
    input = model.input
    disp_1 = model.get_layer('conv_pw_%d_relu' % 15).output
--- a/unsupervised/third-party/utils.py
+++ b/unsupervised/third-party/utils.py
@@ -3,7 +3,9 @@ Utils to load and split image/video data.
 """

 from __future__ import division
+
 import math
+
 import tensorflow as tf


@@ -58,6 +60,49 @@ def euler2mat(z, y, x):
    return rotMat


+def euler2mat_noNDim(x, y, z):
+    """
+
+    :param x: Tensor of shape (B, 1) - x axis rotation
+    :param y: Tensor of shape (B, 1) - y axis rotation
+    :param z: Tensor of shape (B, 1) - z axis rotation
+    :return:  Rotation matrix for the given euler anglers, in the order rotation(x).rotation(y).rotation(z)
+    """
+    batch_size = tf.shape(z)[0]
+
+    # Euler angles should be between -pi and pi, clip so the pose network is coerced to this range
+    z = tf.clip_by_value(z, -math.pi, math.pi)
+    y = tf.clip_by_value(y, -math.pi, math.pi)
+    x = tf.clip_by_value(x, -math.pi, math.pi)
+
+    zeros = tf.zeros([batch_size, 1])
+    ones = tf.ones([batch_size, 1])
+
+    cosx = tf.cos(x)
+    sinx = tf.sin(x)
+    rotx_1 = tf.concat([ones, zeros, zeros], axis=1)
+    rotx_2 = tf.concat([zeros, cosx, -sinx], axis=1)
+    rotx_3 = tf.concat([zeros, sinx, cosx], axis=1)
+    xmat = tf.reshape(tf.concat([rotx_1, rotx_2, rotx_3], axis=1), [batch_size, 3, 3])
+
+    cosz = tf.cos(z)
+    sinz = tf.sin(z)
+    rotz_1 = tf.concat([cosz, -sinz, zeros], axis=1)
+    rotz_2 = tf.concat([sinz, cosz, zeros], axis=1)
+    rotz_3 = tf.concat([zeros, zeros, ones], axis=1)
+    zmat = tf.reshape(tf.concat([rotz_1, rotz_2, rotz_3], axis=1), [batch_size, 3, 3])
+
+    cosy = tf.cos(y)
+    siny = tf.sin(y)
+    roty_1 = tf.concat([cosy, zeros, siny], axis=1)
+    roty_2 = tf.concat([zeros, ones, zeros], axis=1)
+    roty_3 = tf.concat([-siny, zeros, cosy], axis=1)
+    ymat = tf.reshape(tf.concat([roty_1, roty_2, roty_3], axis=1), [batch_size, 3, 3])
+
+    rotMat = tf.matmul(tf.matmul(zmat, ymat), xmat)
+    return rotMat
+
+
 def pose_vec2mat(vec):
    """Converts 6DoF parameters to transformation matrix
    Args:
@@ -281,6 +326,7 @@ def bilinear_sampler(imgs, coords):
        ])
        return output

+
 # Spatial transformer network bilinear sampler, taken from https://github.com/kevinzakka/spatial-transformer-network/blob/master/stn/transformer.py


--- a/unsupervised/train.py
+++ b/unsupervised/train.py
@@ -4,17 +4,98 @@ Trainer to learn depth information on unlabeled data (raw images/videos)
 Allows pluggable depth networks for differing performance (including fast-depth)
 """

-import tensorflow.keras as keras
+import tensorflow as tf
+import tensorflow.python.keras as keras
+from unsupervised import warp, loss


-class SFMLearner(keras.Model):
+class UnsupervisedPoseDepthLearner(keras.Model):
+    """
+    Keras model to learn simultaneous depth + pose from image/video sequences.

-    def __init__(depth_model, pose_model):
-        pass
+    To train this, the datasource should yield 3 frames and camera intrinsics.
+    Optionally velocity + timestamp per frame to train to real scale
+    """
+
+    def __init__(self, depth_model, pose_model, num_scales=3, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.depth_model = depth_model
+        self.pose_model = pose_model
+        # TODO: I think num_scales should be something defined on the depth model itself
+        self.num_scales = num_scales
+        self.smoothness = 1e-3

    def train_step(self, data):
+        """
+
+        :param data: Format: {frames: Mat[3], intrinsics: Tensor}
+        """
+        with tf.GradientTape as tape:
+            # Pass through depth for target image
+            # TODO: Convert frame to tensor (or do this in the dataloader)
+            # TODO: Ensure the depth output includes enough outputs for each scale
+            depth = self.depth_model(data.frames[1])
+
+            # Pass through depth -> pose for both source images
+            # TODO: Concat these poses using tf.concat
+            pose1 = self.pose_model(data.frames[1], data.frames[0])
+            pose2 = self.pose_model(data.frames[1], data.frames[2])
+
+            loss = self.calculate_loss(depth, pose1, pose2, data)
+
+        # Apply optimise step on total loss
+        # TODO: Do these need to be separate for depth/pose model?
+        grads = tape.gradient(loss, zip(self.depth_model.trainable_weights, self.pose_model.trainable_weights))
+        self.optimizer.apply_gradients(
+            zip(grads, self.depth_model.trainable_weights, self.pose_model.trainable_weights))
+
+    def calculate_loss(self, depth, pose1, pose2, data):
+        shape = depth[0].shape
+
+        # TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
+        #   Ideally the size/batch size will still be calculated automatically
+        coords = warp.image_coordinate(shape[0], shape[1], shape[2])
+        total_loss = 0
+
+        scale_losses = []
+        # For each scale, do the projective inverse warp step and calculate losses
+        for scale in range(self.num_scales):
+            # TODO: Could simplify this by stacking the source images (see sfmlearner)
+            #   It isn't too much of an issue right now since we're only using 2 images (left/right)
+            # For each depth output (scale), do the projective inverse warp on each input image and calculate the losses
+            # Only take the min loss between the two warped images (from monodepth2)
+            # TODO: Need to bilinear resize the depth at each scale up to the size of image
+            warp1 = warp.projective_inverse_warp(data.frames[0], depth[scale], pose1, data.intrinsics, coords)
+            warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose2, data.intrinsics, coords)
+
+            # Per pixel loss is just the difference in pixel intensities?
+            # Something like l1 plus ssim
+            warp_loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], warp1)
+            warp_loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], warp2)
+
+            # Take loss between target (data.frames[1]) and source images (pre-warp)
+            source_loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], data.frames[0])
+            source_loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], data.frames[2])
+
+            # Take the min (per pixel) of the losses of warped/unwarped images (so min across pixels of 4 images)
+            # TODO: Verify the axes are correct
+            reprojection_loss = tf.reduce_mean(
+                tf.reduce_min(tf.concat([warp_loss1, warp_loss2, source_loss1, source_loss2], axis=3), axis=3))
+
+            # Calculate smooth losses
+            # TODO: Since smooth loss is calculated directly on the depth at the scale, we need
+            #  to resize the target image to the same dimensions as the depth map at the current scale
+            #  Can do this by just inspecting the shape of the depth and resizing to match that (but
+            #  with 3 colour channels)
+            smooth_loss = loss.smooth_loss(depth[scale], data.frames[1])
+
+            # SFM Learner downscales smoothing loss depending on the scale
+            smoothed_reprojection_loss = self.smoothness * smooth_loss / (2 ** scale)
+
+            # Add to total loss (with smooth loss + smooth loss weighting applied to pixel losses)
+            total_loss += reprojection_loss + smoothed_reprojection_loss
            pass

-
-def make_sfm_learner_pose_net(input_shape=(224, 224, 3)):
-    pass
+        # Collect losses, average them out (divide by number of scales)
+        total_loss /= self.num_scales
+        return total_loss
--- a/unsupervised/warp.py
+++ b/unsupervised/warp.py
@@ -1,19 +1,223 @@
-def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics):
+import math
+
+import tensorflow as tf
+
+
+def euler_to_matrix(x, y, z):
+    """
+
+    :param x: Tensor of shape (B, 1) - x axis rotation
+    :param y: Tensor of shape (B, 1) - y axis rotation
+    :param z: Tensor of shape (B, 1) - z axis rotation
+    :return:  Rotation matrix for the given euler anglers, in the order rotation(x) -> rotation(y) -> rotation(z)
+    """
+    batch_size = tf.shape(z)[0]
+
+    # Euler angles should be between -pi and pi, clip so the pose network is coerced to this range
+    z = tf.clip_by_value(z, -math.pi, math.pi)
+    y = tf.clip_by_value(y, -math.pi, math.pi)
+    x = tf.clip_by_value(x, -math.pi, math.pi)
+
+    cosx = tf.cos(x)
+    sinx = tf.sin(x)
+
+    cosy = tf.cos(y)
+    siny = tf.sin(y)
+
+    cosz = tf.cos(z)
+    sinz = tf.sin(z)
+
+    # Otherwise this will need to be reversed
+    # Rotate about x, y then z. z goes first here as rotation is always left side of coordinates
+    # R = Rz(φ)Ry(θ)Rx(ψ)
+    # = |   cos(θ)cos(φ)    sin(ψ)sin(θ)cos(φ) − cos(ψ)sin(φ)   cos(ψ)sin(θ)cos(φ) + sin(ψ)sin(φ)   |
+    #   |   cos(θ)sin(φ)    sin(ψ)sin(θ)sin(φ) + cos(ψ)cos(φ)   cos(ψ)sin(θ)sin(φ) − sin(ψ)cos(φ)   |
+    #   |   −sin(θ)         sin(ψ)cos(θ)                        cos(ψ)cos(θ)                        |
+    row_1 = tf.concat([cosy * cosz, sinx * siny * cosz - cosx * sinz, cosx * siny * cosz + sinx * sinz], 1)
+    row_2 = tf.concat([cosy * sinz, sinx * siny * sinz + cosx * cosz, cosx * siny * sinz - sinx * cosz], 1)
+    row_3 = tf.concat([-siny, sinx * cosy, cosx * cosy], 1)
+    return tf.reshape(tf.concat([row_1, row_2, row_3], axis=1), [batch_size, 3, 3])
+
+
+def pose_vec2mat(vec):
+    """Converts 6DoF parameters to transformation matrix
+    Args:
+        vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
+    Returns:
+        A transformation matrix -- [B, 3, 4]
+    """
+    batch_size, _ = vec.get_shape().as_list()
+    translation = tf.slice(vec, [0, 0], [-1, 3])
+    translation = tf.expand_dims(translation, -1)
+    rx = tf.slice(vec, [0, 3], [-1, 1])
+    ry = tf.slice(vec, [0, 4], [-1, 1])
+    rz = tf.slice(vec, [0, 5], [-1, 1])
+    rot_mat = euler_to_matrix(rx, ry, rz)
+    transform_mat = tf.concat([rot_mat, translation], axis=2)
+    return transform_mat
+
+
+def image_coordinate(batch, height, width):
+    """
+    Construct a tensor for the given height/width with elements the homogenous coordinates for the pixel
+    :param batch: Number of images in a batch
+    :param height: Height of image
+    :param width: Width of image
+    :return: Tensor of shape (B, height, width, 3), homogenous coordinates for an image.
+        Coordinates are in order [x, y, 1]
+    """
+    x_coords = tf.range(width)
+    y_coords = tf.range(height)
+
+    x_mesh, y_mesh = tf.meshgrid(x_coords, y_coords)
+
+    ones_mesh = tf.cast(tf.ones([height, width]), tf.int32)
+
+    stacked = tf.stack([x_mesh, y_mesh, ones_mesh], axis=2)
+
+    return tf.cast(tf.repeat(tf.expand_dims(stacked, axis=0), batch, axis=0), dtype=tf.float32)
+
+
+def projective_inverse_warp(source_img, depth, pose, intrinsics, coordinates):
    """
    Calculate the reprojected image from the source to the target, based on the given depth, pose and intrinsics

    SFM Learner inverse warp step
-        ps ~ K.T(t->s).Dt(pt).K^-1.pt
+        ps ~ K.T(t->s).Dt(pt)*K^-1.pt
+
+        Note that the depth pixel Dt(pt) is multiplied by every coordinate value (just element-wise, not matrix multiplication)

    Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto
    the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample 
    the pixels in the source image (ps) to reconstruct the target image.

-    :param target_img: Tensor (batch, height, width, 3)
-    :param source_img: Tensor, same shape as target_img
-    :param depth: Tensor, (batch, height, width, 1)
-    :param pose: (batch, 3, 3)
-    :param intrinsics: (batch, 3, 3)
+    :param source_img: Tensor (batch, height, width, 3)
+    :param depth: Tensor, (batch, height, width)
+    :param pose: (batch, 6)
+    :param intrinsics: (batch, 3, 3) TODO: Intrinsics per image (per source/target image)?
+    :param coordinates: (batch, 3, height * width) - coordinates for the image. Pass this in so it doesn't need to be
+        calculated on every warp step
    :return: The source image reprojected to the target
    """
-    pass
+    # Convert pose vector (output of pose net) to pose matrix (4x4)
+    pose_3x4 = pose_vec2mat(pose)
+
+    # Convert intrinsics matrix (3x3) to (4x4) so it can be multiplied by the pose net
+    # intrinsics_4x4 =
+    # Calculate inverse of the 4x4 intrinsics matrix
+    intrinsics_inverse = tf.linalg.inv(intrinsics)
+
+    depth_flat = tf.reshape(depth, [depth.shape[0], depth.shape[1] * depth.shape[2]])
+
+    # Do the function
+    sample_coordinates = tf.matmul(tf.matmul(intrinsics, pose_3x4),
+                                   tf.concat([depth_flat * tf.matmul(intrinsics_inverse, coordinates),
+                                              tf.ones([depth_flat.shape[0], 1, depth_flat.shape[1]])], axis=1))
+
+    # Normalise the x/y axes (divide by z axis)
+    sample_coordinates = sample_coordinates[:, 0:2] / sample_coordinates[:, 2]
+
+    # Reshape back to image coordinates
+    sample_coordinates = tf.reshape(tf.transpose(sample_coordinates, [0, 2, 1]),
+                                    [depth.shape[0], depth.shape[1], depth.shape[2], 2])
+
+    # sample from the source image using the coordinates applied by the function
+    return bilinear_sampler(source_img, sample_coordinates)
+
+
+def bilinear_sampler(imgs, coords):
+    """Construct a new image by bilinear sampling from the input image.
+
+    Points falling outside the source image boundary have value 0.
+
+    Args:
+      imgs: source image to be sampled from [batch, height_s, width_s, channels]
+      coords: coordinates of source pixels to sample from [batch, height_t,
+        width_t, 2]. height_t/width_t correspond to the dimensions of the output
+        image (don't need to be the same as height_s/width_s). The two channels
+        correspond to x and y coordinates respectively.
+    Returns:
+      A new sampled image [batch, height_t, width_t, channels]
+    """
+
+    def _repeat(x, n_repeats):
+        rep = tf.transpose(
+            tf.expand_dims(tf.ones(shape=tf.stack([
+                n_repeats,
+            ])), 1), [1, 0])
+        rep = tf.cast(rep, 'float32')
+        x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
+        return tf.reshape(x, [-1])
+
+    coords_x, coords_y = tf.split(coords, [1, 1], axis=3)
+    inp_size = imgs.get_shape()
+    coord_size = coords.get_shape()
+    out_size = coords.get_shape().as_list()
+    out_size[3] = imgs.get_shape().as_list()[3]
+
+    coords_x = tf.cast(coords_x, 'float32')
+    coords_y = tf.cast(coords_y, 'float32')
+
+    x0 = tf.floor(coords_x)
+    x1 = x0 + 1
+    y0 = tf.floor(coords_y)
+    y1 = y0 + 1
+
+    y_max = tf.cast(tf.shape(imgs)[1] - 1, 'float32')
+    x_max = tf.cast(tf.shape(imgs)[2] - 1, 'float32')
+    zero = tf.zeros([1], dtype='float32')
+
+    x0_safe = tf.clip_by_value(x0, zero, x_max)
+    y0_safe = tf.clip_by_value(y0, zero, y_max)
+    x1_safe = tf.clip_by_value(x1, zero, x_max)
+    y1_safe = tf.clip_by_value(y1, zero, y_max)
+
+    # bilinear interp weights, with points outside the grid having weight 0
+    # wt_x0 = (x1 - coords_x) * tf.cast(tf.equal(x0, x0_safe), 'float32')
+    # wt_x1 = (coords_x - x0) * tf.cast(tf.equal(x1, x1_safe), 'float32')
+    # wt_y0 = (y1 - coords_y) * tf.cast(tf.equal(y0, y0_safe), 'float32')
+    # wt_y1 = (coords_y - y0) * tf.cast(tf.equal(y1, y1_safe), 'float32')
+
+    wt_x0 = x1_safe - coords_x
+    wt_x1 = coords_x - x0_safe
+    wt_y0 = y1_safe - coords_y
+    wt_y1 = coords_y - y0_safe
+
+    # indices in the flat image to sample from
+    dim2 = tf.cast(inp_size[2], 'float32')
+    dim1 = tf.cast(inp_size[2] * inp_size[1], 'float32')
+    base = tf.reshape(
+        _repeat(
+            tf.cast(tf.range(coord_size[0]), 'float32') * dim1,
+            coord_size[1] * coord_size[2]),
+        [out_size[0], out_size[1], out_size[2], 1])
+
+    base_y0 = base + y0_safe * dim2
+    base_y1 = base + y1_safe * dim2
+    idx00 = tf.reshape(x0_safe + base_y0, [-1])
+    idx01 = x0_safe + base_y1
+    idx10 = x1_safe + base_y0
+    idx11 = x1_safe + base_y1
+
+    # sample from imgs
+    imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]]))
+    imgs_flat = tf.cast(imgs_flat, 'float32')
+    im00 = tf.reshape(
+        tf.gather(imgs_flat, tf.cast(idx00, 'int32')), out_size)
+    im01 = tf.reshape(
+        tf.gather(imgs_flat, tf.cast(idx01, 'int32')), out_size)
+    im10 = tf.reshape(
+        tf.gather(imgs_flat, tf.cast(idx10, 'int32')), out_size)
+    im11 = tf.reshape(
+        tf.gather(imgs_flat, tf.cast(idx11, 'int32')), out_size)
+
+    w00 = wt_x0 * wt_y0
+    w01 = wt_x0 * wt_y1
+    w10 = wt_x1 * wt_y0
+    w11 = wt_x1 * wt_y1
+
+    output = tf.add_n([
+        w00 * im00, w01 * im01,
+        w10 * im10, w11 * im11
+    ])
+    return output
--- a/unsupervised/warp_tests.py
+++ b/unsupervised/warp_tests.py
@@ -0,0 +1,63 @@
+import unittest
+
+import numpy as np
+import tensorflow as tf
+
+import warp
+
+
+class MyTestCase(unittest.TestCase):
+    def test_euler_to_rotation_matrix(self):
+        # quarter rotation in every
+        x = y = z = tf.expand_dims(tf.expand_dims(tf.constant(np.pi / 2), 0), 0)
+        x2 = y2 = z2 = tf.expand_dims(tf.expand_dims(tf.constant(np.pi / 4), 0), 0)
+
+        x_batch = tf.concat([x, x2], 0)
+        y_batch = tf.concat([y, y2], 0)
+        z_batch = tf.concat([z, z2], 0)
+
+        # TODO: Construct expected final rotation matrix, just 3x3 using numpy, so that we can do an
+        #   elementwise comparison later. Probably also want to check the
+
+        rotation_matrices = warp.euler_to_matrix(x_batch, y_batch, z_batch)
+        # old_rot = utils.euler2mat_noNDim(x_batch, y_batch, z_batch)
+
+        self.assertEqual(rotation_matrices.shape, [2, 3, 3])
+
+    def test_coordinates(self):
+        height = 1000
+        width = 2000
+        coords = warp.image_coordinate(8, height, width)
+
+        self.assertEqual(coords.shape, [8, height, width, 3])
+        self.assertEqual(coords[0, 0, 0, 0], 0)
+        self.assertEqual(coords[0, 0, 0, 1], 0)
+        self.assertEqual(coords[0, 0, 0, 2], 1)
+
+        self.assertEqual(coords[0, height - 1, 0, 0], 0)
+        self.assertEqual(coords[0, height - 1, 0, 1], height - 1)
+        self.assertEqual(coords[0, height - 1, 0, 2], 1)
+
+        self.assertEqual(coords[0, height - 1, width - 1, 0], width - 1)
+        self.assertEqual(coords[0, height - 1, width - 1, 1], height - 1)
+        self.assertEqual(coords[0, height - 1, width - 1, 2], 1)
+
+    def test_warp(self):
+        height = 1000
+        width = 2000
+        coords = warp.image_coordinate(1, height, width)
+        coords = tf.reshape(coords, [1, height * width, 3])
+        coords = tf.transpose(coords, [0, 2, 1])
+        # source image to sample from
+        img = tf.random.uniform([1, height, width, 3]) * 255
+
+        intrinsics = tf.constant([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=tf.float32)
+
+        disp = tf.random.uniform([1, height, width]) * 255
+        pose = tf.random.uniform([1, 6])
+
+        self.assertEqual(warp.projective_inverse_warp(img, disp, pose, intrinsics, coords).shape, img.shape)
+
+
+if __name__ == '__main__':
+    unittest.main()
Author	SHA1	Message	Date
Piv	950dd4582e	Start adding optimise step to unsupervised train, add demo to main	2022-05-03 19:51:34 +09:30
Piv	909c38cf56	Fix total loss calculation, add some more todos	2022-05-03 19:05:29 +09:30
piv	188c55d1c8	Update for python 2.10, add general training algorithm step	2022-05-03 16:56:15 +09:30
Piv	aa423cc38a	Start adding unsupervised train loop	2021-11-20 13:37:26 +10:30
Piv	2bb37b2722	Fix up generator to include intrinsics	2021-08-29 19:26:15 +09:30
Piv	90b73bf420	Start adding generators for unsupervised training	2021-08-29 18:06:37 +09:30
Piv	42fcf5554a	Update warp test to verify output shape	2021-08-24 21:39:10 +09:30
Piv	c164c9720a	Finish Projective Inverse Warp algorithm	2021-08-24 20:13:30 +09:30
Piv	b7917ec465	More warp implementation	2021-08-21 17:32:16 +09:30
Piv	df1ac89a81	Add euler to rotation matrix, grid flattening	2021-08-10 20:39:52 +09:30
Piv	8016f0f945	Add coordinates generation implementation	2021-08-08 22:11:50 +09:30
Piv	ece37843ce	Merge branch 'main' into unsupervised	2021-08-08 18:58:39 +09:30
Piv	cd278e683f	Start adding pose warp conversions	2021-08-07 17:18:06 +09:30
Piv	5996d6eaf0	Merge branch 'main' into unsupervised	2021-08-05 17:49:48 +09:30
Piv	8be4ce4e6d	Add smooth loss	2021-08-05 17:48:44 +09:30
Piv	b95442bb23	Finish off pose net	2021-08-04 20:51:46 +09:30
Piv	a111f89722	Start adding pose decoder	2021-08-03 20:25:19 +09:30
Piv	2372b906df	Add resnet18	2021-08-01 10:44:33 +09:30