Compare commits

18 Commits

Author SHA1 Message Date
Piv
950dd4582e Start adding optimise step to unsupervised train, add demo to main 2022-05-03 19:51:34 +09:30
Piv
909c38cf56 Fix total loss calculation, add some more todos 2022-05-03 19:05:29 +09:30
piv
188c55d1c8 Update for python 2.10, add general training algorithm step 2022-05-03 16:56:15 +09:30
Piv
aa423cc38a Start adding unsupervised train loop 2021-11-20 13:37:26 +10:30
Piv
2bb37b2722 Fix up generator to include intrinsics 2021-08-29 19:26:15 +09:30
Piv
90b73bf420 Start adding generators for unsupervised training 2021-08-29 18:06:37 +09:30
Piv
42fcf5554a Update warp test to verify output shape 2021-08-24 21:39:10 +09:30
Piv
c164c9720a Finish Projective Inverse Warp algorithm 2021-08-24 20:13:30 +09:30
Piv
b7917ec465 More warp implementation 2021-08-21 17:32:16 +09:30
Piv
df1ac89a81 Add euler to rotation matrix, grid flattening 2021-08-10 20:39:52 +09:30
Piv
8016f0f945 Add coordinates generation implementation 2021-08-08 22:11:50 +09:30
Piv
ece37843ce Merge branch 'main' into unsupervised 2021-08-08 18:58:39 +09:30
Piv
cd278e683f Start adding pose warp conversions 2021-08-07 17:18:06 +09:30
Piv
5996d6eaf0 Merge branch 'main' into unsupervised 2021-08-05 17:49:48 +09:30
Piv
8be4ce4e6d Add smooth loss 2021-08-05 17:48:44 +09:30
Piv
b95442bb23 Finish off pose net 2021-08-04 20:51:46 +09:30
Piv
a111f89722 Start adding pose decoder 2021-08-03 20:25:19 +09:30
Piv
2372b906df Add resnet18 2021-08-01 10:44:33 +09:30
7 changed files with 479 additions and 25 deletions

10
main.py
View File

@@ -1,4 +1,6 @@
import fast_depth_functional as fd import fast_depth_functional as fd
from unsupervised.models import pose_net, wrap_mobilenet_nnconv5_for_utrain
from unsupervised.train import UnsupervisedPoseDepthLearner
if __name__ == '__main__': if __name__ == '__main__':
fd.fix_windows_gpu() fd.fix_windows_gpu()
@@ -9,3 +11,11 @@ if __name__ == '__main__':
# Save in Tensorflow SavedModel format # Save in Tensorflow SavedModel format
# tf.saved_model.save(model, 'fast_depth_nyu_v2_224_224_3_e1_saved_model') # tf.saved_model.save(model, 'fast_depth_nyu_v2_224_224_3_e1_saved_model')
# Unsupervised
depth_model = fd.mobilenet_nnconv5()
pose_model = pose_net()
model = UnsupervisedPoseDepthLearner(wrap_mobilenet_nnconv5_for_utrain(depth_model), pose_model)
model.compile(optimizer='adam')
# TODO: Incorporate data generator
# model.fit()

50
unsupervised/load.py Normal file
View File

@@ -0,0 +1,50 @@
import os
import cv2
def video_generator(video_path_or_folder, intrinsics, allowed_extensions=('mp4', 'mkv', 'mov')):
"""
Create a generator for unsupervised training on depth sequences from a video file or folder of video files
:param video_path_or_folder: Video file or folder with list of video files to iterate through
:param intrinsics: Intrinsics for the videos TODO: Intrinsics per video
:param allowed_extensions: Allowed video extensions, to not accidentally pick files that aren't videos
:return: generator that yields dict of {frames: [frame1, frame2, frame3], intrinsics: [fx, fy, tx, ty]}
"""
if os.path.isfile(video_path_or_folder):
# TODO: How to re-yield? Is this enough, since I'm just returning the actual generator?
# Or do I need to iterate like below?
return _single_video_generator(video_path_or_folder)
else:
for root, dirs, files in os.walk(video_path_or_folder):
for file in files:
if os.path.splitext(file)[1] in allowed_extensions:
for frames in _single_video_generator(file):
yield frames
def _single_video_generator(video_file, intrinsics):
# Single video file
video = cv2.VideoCapture(video_file)
try:
# Buffer to store 3 frames, yield when this fills up
current_frames = []
while video.grab():
current_frames.append(video.retrieve())
if len(current_frames) == 3:
temp_frames = current_frames
current_frames = []
# TODO: Consider converting frames to tensor
yield {'frames': temp_frames, 'intrinsics': intrinsics}
finally:
video.release()
def image_generator(root_folder):
"""
Create an image generator for unsupervised training
:param root_folder:
:return:
"""
pass

View File

@@ -9,8 +9,8 @@ def wrap_mobilenet_nnconv5_for_utrain(model):
This just exposes the lower disparity layers as outputs, so they can be used to train at different scales/image This just exposes the lower disparity layers as outputs, so they can be used to train at different scales/image
resolutions. resolutions.
:param model: :param model: Fast Depth model to wrap
:return: :return: Keras model that takes same input as model and outputs the model output plus 3 disparity layers
""" """
input = model.input input = model.input
disp_1 = model.get_layer('conv_pw_%d_relu' % 15).output disp_1 = model.get_layer('conv_pw_%d_relu' % 15).output

View File

@@ -3,7 +3,9 @@ Utils to load and split image/video data.
""" """
from __future__ import division from __future__ import division
import math import math
import tensorflow as tf import tensorflow as tf
@@ -58,6 +60,49 @@ def euler2mat(z, y, x):
return rotMat return rotMat
def euler2mat_noNDim(x, y, z):
"""
:param x: Tensor of shape (B, 1) - x axis rotation
:param y: Tensor of shape (B, 1) - y axis rotation
:param z: Tensor of shape (B, 1) - z axis rotation
:return: Rotation matrix for the given euler anglers, in the order rotation(x).rotation(y).rotation(z)
"""
batch_size = tf.shape(z)[0]
# Euler angles should be between -pi and pi, clip so the pose network is coerced to this range
z = tf.clip_by_value(z, -math.pi, math.pi)
y = tf.clip_by_value(y, -math.pi, math.pi)
x = tf.clip_by_value(x, -math.pi, math.pi)
zeros = tf.zeros([batch_size, 1])
ones = tf.ones([batch_size, 1])
cosx = tf.cos(x)
sinx = tf.sin(x)
rotx_1 = tf.concat([ones, zeros, zeros], axis=1)
rotx_2 = tf.concat([zeros, cosx, -sinx], axis=1)
rotx_3 = tf.concat([zeros, sinx, cosx], axis=1)
xmat = tf.reshape(tf.concat([rotx_1, rotx_2, rotx_3], axis=1), [batch_size, 3, 3])
cosz = tf.cos(z)
sinz = tf.sin(z)
rotz_1 = tf.concat([cosz, -sinz, zeros], axis=1)
rotz_2 = tf.concat([sinz, cosz, zeros], axis=1)
rotz_3 = tf.concat([zeros, zeros, ones], axis=1)
zmat = tf.reshape(tf.concat([rotz_1, rotz_2, rotz_3], axis=1), [batch_size, 3, 3])
cosy = tf.cos(y)
siny = tf.sin(y)
roty_1 = tf.concat([cosy, zeros, siny], axis=1)
roty_2 = tf.concat([zeros, ones, zeros], axis=1)
roty_3 = tf.concat([-siny, zeros, cosy], axis=1)
ymat = tf.reshape(tf.concat([roty_1, roty_2, roty_3], axis=1), [batch_size, 3, 3])
rotMat = tf.matmul(tf.matmul(zmat, ymat), xmat)
return rotMat
def pose_vec2mat(vec): def pose_vec2mat(vec):
"""Converts 6DoF parameters to transformation matrix """Converts 6DoF parameters to transformation matrix
Args: Args:
@@ -281,6 +326,7 @@ def bilinear_sampler(imgs, coords):
]) ])
return output return output
# Spatial transformer network bilinear sampler, taken from https://github.com/kevinzakka/spatial-transformer-network/blob/master/stn/transformer.py # Spatial transformer network bilinear sampler, taken from https://github.com/kevinzakka/spatial-transformer-network/blob/master/stn/transformer.py
@@ -309,8 +355,8 @@ def stn_bilinear_sampler(img, x, y):
# rescale x and y to [0, W-1/H-1] # rescale x and y to [0, W-1/H-1]
x = tf.cast(x, 'float32') x = tf.cast(x, 'float32')
y = tf.cast(y, 'float32') y = tf.cast(y, 'float32')
x = 0.5 * ((x + 1.0) * tf.cast(max_x-1, 'float32')) x = 0.5 * ((x + 1.0) * tf.cast(max_x - 1, 'float32'))
y = 0.5 * ((y + 1.0) * tf.cast(max_y-1, 'float32')) y = 0.5 * ((y + 1.0) * tf.cast(max_y - 1, 'float32'))
# grab 4 nearest corner points for each (x_i, y_i) # grab 4 nearest corner points for each (x_i, y_i)
x0 = tf.cast(tf.floor(x), 'int32') x0 = tf.cast(tf.floor(x), 'int32')
@@ -337,10 +383,10 @@ def stn_bilinear_sampler(img, x, y):
y1 = tf.cast(y1, 'float32') y1 = tf.cast(y1, 'float32')
# calculate deltas # calculate deltas
wa = (x1-x) * (y1-y) wa = (x1 - x) * (y1 - y)
wb = (x1-x) * (y-y0) wb = (x1 - x) * (y - y0)
wc = (x-x0) * (y1-y) wc = (x - x0) * (y1 - y)
wd = (x-x0) * (y-y0) wd = (x - x0) * (y - y0)
# add dimension for addition # add dimension for addition
wa = tf.expand_dims(wa, axis=3) wa = tf.expand_dims(wa, axis=3)
@@ -349,6 +395,6 @@ def stn_bilinear_sampler(img, x, y):
wd = tf.expand_dims(wd, axis=3) wd = tf.expand_dims(wd, axis=3)
# compute output # compute output
out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id]) out = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
return out return out

View File

@@ -4,17 +4,98 @@ Trainer to learn depth information on unlabeled data (raw images/videos)
Allows pluggable depth networks for differing performance (including fast-depth) Allows pluggable depth networks for differing performance (including fast-depth)
""" """
import tensorflow.keras as keras import tensorflow as tf
import tensorflow.python.keras as keras
from unsupervised import warp, loss
class SFMLearner(keras.Model): class UnsupervisedPoseDepthLearner(keras.Model):
"""
Keras model to learn simultaneous depth + pose from image/video sequences.
def __init__(depth_model, pose_model): To train this, the datasource should yield 3 frames and camera intrinsics.
pass Optionally velocity + timestamp per frame to train to real scale
"""
def __init__(self, depth_model, pose_model, num_scales=3, *args, **kwargs):
super().__init__(*args, **kwargs)
self.depth_model = depth_model
self.pose_model = pose_model
# TODO: I think num_scales should be something defined on the depth model itself
self.num_scales = num_scales
self.smoothness = 1e-3
def train_step(self, data): def train_step(self, data):
"""
:param data: Format: {frames: Mat[3], intrinsics: Tensor}
"""
with tf.GradientTape as tape:
# Pass through depth for target image
# TODO: Convert frame to tensor (or do this in the dataloader)
# TODO: Ensure the depth output includes enough outputs for each scale
depth = self.depth_model(data.frames[1])
# Pass through depth -> pose for both source images
# TODO: Concat these poses using tf.concat
pose1 = self.pose_model(data.frames[1], data.frames[0])
pose2 = self.pose_model(data.frames[1], data.frames[2])
loss = self.calculate_loss(depth, pose1, pose2, data)
# Apply optimise step on total loss
# TODO: Do these need to be separate for depth/pose model?
grads = tape.gradient(loss, zip(self.depth_model.trainable_weights, self.pose_model.trainable_weights))
self.optimizer.apply_gradients(
zip(grads, self.depth_model.trainable_weights, self.pose_model.trainable_weights))
def calculate_loss(self, depth, pose1, pose2, data):
shape = depth[0].shape
# TODO: Pull coords out of train step into initialiser, then it only needs to be created once.
# Ideally the size/batch size will still be calculated automatically
coords = warp.image_coordinate(shape[0], shape[1], shape[2])
total_loss = 0
scale_losses = []
# For each scale, do the projective inverse warp step and calculate losses
for scale in range(self.num_scales):
# TODO: Could simplify this by stacking the source images (see sfmlearner)
# It isn't too much of an issue right now since we're only using 2 images (left/right)
# For each depth output (scale), do the projective inverse warp on each input image and calculate the losses
# Only take the min loss between the two warped images (from monodepth2)
# TODO: Need to bilinear resize the depth at each scale up to the size of image
warp1 = warp.projective_inverse_warp(data.frames[0], depth[scale], pose1, data.intrinsics, coords)
warp2 = warp.projective_inverse_warp(data.frames[2], depth[scale], pose2, data.intrinsics, coords)
# Per pixel loss is just the difference in pixel intensities?
# Something like l1 plus ssim
warp_loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], warp1)
warp_loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], warp2)
# Take loss between target (data.frames[1]) and source images (pre-warp)
source_loss1 = loss.make_combined_ssim_l1_loss(data.frames[1], data.frames[0])
source_loss2 = loss.make_combined_ssim_l1_loss(data.frames[1], data.frames[2])
# Take the min (per pixel) of the losses of warped/unwarped images (so min across pixels of 4 images)
# TODO: Verify the axes are correct
reprojection_loss = tf.reduce_mean(
tf.reduce_min(tf.concat([warp_loss1, warp_loss2, source_loss1, source_loss2], axis=3), axis=3))
# Calculate smooth losses
# TODO: Since smooth loss is calculated directly on the depth at the scale, we need
# to resize the target image to the same dimensions as the depth map at the current scale
# Can do this by just inspecting the shape of the depth and resizing to match that (but
# with 3 colour channels)
smooth_loss = loss.smooth_loss(depth[scale], data.frames[1])
# SFM Learner downscales smoothing loss depending on the scale
smoothed_reprojection_loss = self.smoothness * smooth_loss / (2 ** scale)
# Add to total loss (with smooth loss + smooth loss weighting applied to pixel losses)
total_loss += reprojection_loss + smoothed_reprojection_loss
pass pass
# Collect losses, average them out (divide by number of scales)
def make_sfm_learner_pose_net(input_shape=(224, 224, 3)): total_loss /= self.num_scales
pass return total_loss

View File

@@ -1,19 +1,223 @@
def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics): import math
import tensorflow as tf
def euler_to_matrix(x, y, z):
"""
:param x: Tensor of shape (B, 1) - x axis rotation
:param y: Tensor of shape (B, 1) - y axis rotation
:param z: Tensor of shape (B, 1) - z axis rotation
:return: Rotation matrix for the given euler anglers, in the order rotation(x) -> rotation(y) -> rotation(z)
"""
batch_size = tf.shape(z)[0]
# Euler angles should be between -pi and pi, clip so the pose network is coerced to this range
z = tf.clip_by_value(z, -math.pi, math.pi)
y = tf.clip_by_value(y, -math.pi, math.pi)
x = tf.clip_by_value(x, -math.pi, math.pi)
cosx = tf.cos(x)
sinx = tf.sin(x)
cosy = tf.cos(y)
siny = tf.sin(y)
cosz = tf.cos(z)
sinz = tf.sin(z)
# Otherwise this will need to be reversed
# Rotate about x, y then z. z goes first here as rotation is always left side of coordinates
# R = Rz(φ)Ry(θ)Rx(ψ)
# = | cos(θ)cos(φ) sin(ψ)sin(θ)cos(φ) cos(ψ)sin(φ) cos(ψ)sin(θ)cos(φ) + sin(ψ)sin(φ) |
# | cos(θ)sin(φ) sin(ψ)sin(θ)sin(φ) + cos(ψ)cos(φ) cos(ψ)sin(θ)sin(φ) sin(ψ)cos(φ) |
# | sin(θ) sin(ψ)cos(θ) cos(ψ)cos(θ) |
row_1 = tf.concat([cosy * cosz, sinx * siny * cosz - cosx * sinz, cosx * siny * cosz + sinx * sinz], 1)
row_2 = tf.concat([cosy * sinz, sinx * siny * sinz + cosx * cosz, cosx * siny * sinz - sinx * cosz], 1)
row_3 = tf.concat([-siny, sinx * cosy, cosx * cosy], 1)
return tf.reshape(tf.concat([row_1, row_2, row_3], axis=1), [batch_size, 3, 3])
def pose_vec2mat(vec):
"""Converts 6DoF parameters to transformation matrix
Args:
vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
Returns:
A transformation matrix -- [B, 3, 4]
"""
batch_size, _ = vec.get_shape().as_list()
translation = tf.slice(vec, [0, 0], [-1, 3])
translation = tf.expand_dims(translation, -1)
rx = tf.slice(vec, [0, 3], [-1, 1])
ry = tf.slice(vec, [0, 4], [-1, 1])
rz = tf.slice(vec, [0, 5], [-1, 1])
rot_mat = euler_to_matrix(rx, ry, rz)
transform_mat = tf.concat([rot_mat, translation], axis=2)
return transform_mat
def image_coordinate(batch, height, width):
"""
Construct a tensor for the given height/width with elements the homogenous coordinates for the pixel
:param batch: Number of images in a batch
:param height: Height of image
:param width: Width of image
:return: Tensor of shape (B, height, width, 3), homogenous coordinates for an image.
Coordinates are in order [x, y, 1]
"""
x_coords = tf.range(width)
y_coords = tf.range(height)
x_mesh, y_mesh = tf.meshgrid(x_coords, y_coords)
ones_mesh = tf.cast(tf.ones([height, width]), tf.int32)
stacked = tf.stack([x_mesh, y_mesh, ones_mesh], axis=2)
return tf.cast(tf.repeat(tf.expand_dims(stacked, axis=0), batch, axis=0), dtype=tf.float32)
def projective_inverse_warp(source_img, depth, pose, intrinsics, coordinates):
""" """
Calculate the reprojected image from the source to the target, based on the given depth, pose and intrinsics Calculate the reprojected image from the source to the target, based on the given depth, pose and intrinsics
SFM Learner inverse warp step SFM Learner inverse warp step
ps ~ K.T(t->s).Dt(pt).K^-1.pt ps ~ K.T(t->s).Dt(pt)*K^-1.pt
Note that the depth pixel Dt(pt) is multiplied by every coordinate value (just element-wise, not matrix multiplication)
Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto
the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample
the pixels in the source image (ps) to reconstruct the target image. the pixels in the source image (ps) to reconstruct the target image.
:param target_img: Tensor (batch, height, width, 3) :param source_img: Tensor (batch, height, width, 3)
:param source_img: Tensor, same shape as target_img :param depth: Tensor, (batch, height, width)
:param depth: Tensor, (batch, height, width, 1) :param pose: (batch, 6)
:param pose: (batch, 3, 3) :param intrinsics: (batch, 3, 3) TODO: Intrinsics per image (per source/target image)?
:param intrinsics: (batch, 3, 3) :param coordinates: (batch, 3, height * width) - coordinates for the image. Pass this in so it doesn't need to be
calculated on every warp step
:return: The source image reprojected to the target :return: The source image reprojected to the target
""" """
pass # Convert pose vector (output of pose net) to pose matrix (4x4)
pose_3x4 = pose_vec2mat(pose)
# Convert intrinsics matrix (3x3) to (4x4) so it can be multiplied by the pose net
# intrinsics_4x4 =
# Calculate inverse of the 4x4 intrinsics matrix
intrinsics_inverse = tf.linalg.inv(intrinsics)
depth_flat = tf.reshape(depth, [depth.shape[0], depth.shape[1] * depth.shape[2]])
# Do the function
sample_coordinates = tf.matmul(tf.matmul(intrinsics, pose_3x4),
tf.concat([depth_flat * tf.matmul(intrinsics_inverse, coordinates),
tf.ones([depth_flat.shape[0], 1, depth_flat.shape[1]])], axis=1))
# Normalise the x/y axes (divide by z axis)
sample_coordinates = sample_coordinates[:, 0:2] / sample_coordinates[:, 2]
# Reshape back to image coordinates
sample_coordinates = tf.reshape(tf.transpose(sample_coordinates, [0, 2, 1]),
[depth.shape[0], depth.shape[1], depth.shape[2], 2])
# sample from the source image using the coordinates applied by the function
return bilinear_sampler(source_img, sample_coordinates)
def bilinear_sampler(imgs, coords):
"""Construct a new image by bilinear sampling from the input image.
Points falling outside the source image boundary have value 0.
Args:
imgs: source image to be sampled from [batch, height_s, width_s, channels]
coords: coordinates of source pixels to sample from [batch, height_t,
width_t, 2]. height_t/width_t correspond to the dimensions of the output
image (don't need to be the same as height_s/width_s). The two channels
correspond to x and y coordinates respectively.
Returns:
A new sampled image [batch, height_t, width_t, channels]
"""
def _repeat(x, n_repeats):
rep = tf.transpose(
tf.expand_dims(tf.ones(shape=tf.stack([
n_repeats,
])), 1), [1, 0])
rep = tf.cast(rep, 'float32')
x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
return tf.reshape(x, [-1])
coords_x, coords_y = tf.split(coords, [1, 1], axis=3)
inp_size = imgs.get_shape()
coord_size = coords.get_shape()
out_size = coords.get_shape().as_list()
out_size[3] = imgs.get_shape().as_list()[3]
coords_x = tf.cast(coords_x, 'float32')
coords_y = tf.cast(coords_y, 'float32')
x0 = tf.floor(coords_x)
x1 = x0 + 1
y0 = tf.floor(coords_y)
y1 = y0 + 1
y_max = tf.cast(tf.shape(imgs)[1] - 1, 'float32')
x_max = tf.cast(tf.shape(imgs)[2] - 1, 'float32')
zero = tf.zeros([1], dtype='float32')
x0_safe = tf.clip_by_value(x0, zero, x_max)
y0_safe = tf.clip_by_value(y0, zero, y_max)
x1_safe = tf.clip_by_value(x1, zero, x_max)
y1_safe = tf.clip_by_value(y1, zero, y_max)
# bilinear interp weights, with points outside the grid having weight 0
# wt_x0 = (x1 - coords_x) * tf.cast(tf.equal(x0, x0_safe), 'float32')
# wt_x1 = (coords_x - x0) * tf.cast(tf.equal(x1, x1_safe), 'float32')
# wt_y0 = (y1 - coords_y) * tf.cast(tf.equal(y0, y0_safe), 'float32')
# wt_y1 = (coords_y - y0) * tf.cast(tf.equal(y1, y1_safe), 'float32')
wt_x0 = x1_safe - coords_x
wt_x1 = coords_x - x0_safe
wt_y0 = y1_safe - coords_y
wt_y1 = coords_y - y0_safe
# indices in the flat image to sample from
dim2 = tf.cast(inp_size[2], 'float32')
dim1 = tf.cast(inp_size[2] * inp_size[1], 'float32')
base = tf.reshape(
_repeat(
tf.cast(tf.range(coord_size[0]), 'float32') * dim1,
coord_size[1] * coord_size[2]),
[out_size[0], out_size[1], out_size[2], 1])
base_y0 = base + y0_safe * dim2
base_y1 = base + y1_safe * dim2
idx00 = tf.reshape(x0_safe + base_y0, [-1])
idx01 = x0_safe + base_y1
idx10 = x1_safe + base_y0
idx11 = x1_safe + base_y1
# sample from imgs
imgs_flat = tf.reshape(imgs, tf.stack([-1, inp_size[3]]))
imgs_flat = tf.cast(imgs_flat, 'float32')
im00 = tf.reshape(
tf.gather(imgs_flat, tf.cast(idx00, 'int32')), out_size)
im01 = tf.reshape(
tf.gather(imgs_flat, tf.cast(idx01, 'int32')), out_size)
im10 = tf.reshape(
tf.gather(imgs_flat, tf.cast(idx10, 'int32')), out_size)
im11 = tf.reshape(
tf.gather(imgs_flat, tf.cast(idx11, 'int32')), out_size)
w00 = wt_x0 * wt_y0
w01 = wt_x0 * wt_y1
w10 = wt_x1 * wt_y0
w11 = wt_x1 * wt_y1
output = tf.add_n([
w00 * im00, w01 * im01,
w10 * im10, w11 * im11
])
return output

View File

@@ -0,0 +1,63 @@
import unittest
import numpy as np
import tensorflow as tf
import warp
class MyTestCase(unittest.TestCase):
def test_euler_to_rotation_matrix(self):
# quarter rotation in every
x = y = z = tf.expand_dims(tf.expand_dims(tf.constant(np.pi / 2), 0), 0)
x2 = y2 = z2 = tf.expand_dims(tf.expand_dims(tf.constant(np.pi / 4), 0), 0)
x_batch = tf.concat([x, x2], 0)
y_batch = tf.concat([y, y2], 0)
z_batch = tf.concat([z, z2], 0)
# TODO: Construct expected final rotation matrix, just 3x3 using numpy, so that we can do an
# elementwise comparison later. Probably also want to check the
rotation_matrices = warp.euler_to_matrix(x_batch, y_batch, z_batch)
# old_rot = utils.euler2mat_noNDim(x_batch, y_batch, z_batch)
self.assertEqual(rotation_matrices.shape, [2, 3, 3])
def test_coordinates(self):
height = 1000
width = 2000
coords = warp.image_coordinate(8, height, width)
self.assertEqual(coords.shape, [8, height, width, 3])
self.assertEqual(coords[0, 0, 0, 0], 0)
self.assertEqual(coords[0, 0, 0, 1], 0)
self.assertEqual(coords[0, 0, 0, 2], 1)
self.assertEqual(coords[0, height - 1, 0, 0], 0)
self.assertEqual(coords[0, height - 1, 0, 1], height - 1)
self.assertEqual(coords[0, height - 1, 0, 2], 1)
self.assertEqual(coords[0, height - 1, width - 1, 0], width - 1)
self.assertEqual(coords[0, height - 1, width - 1, 1], height - 1)
self.assertEqual(coords[0, height - 1, width - 1, 2], 1)
def test_warp(self):
height = 1000
width = 2000
coords = warp.image_coordinate(1, height, width)
coords = tf.reshape(coords, [1, height * width, 3])
coords = tf.transpose(coords, [0, 2, 1])
# source image to sample from
img = tf.random.uniform([1, height, width, 3]) * 255
intrinsics = tf.constant([[[1, 0, 0], [0, 1, 0], [0, 0, 1]]], dtype=tf.float32)
disp = tf.random.uniform([1, height, width]) * 255
pose = tf.random.uniform([1, 6])
self.assertEqual(warp.projective_inverse_warp(img, disp, pose, intrinsics, coords).shape, img.shape)
if __name__ == '__main__':
unittest.main()