Files
fast-depth-tf/unsupervised/warp.py

137 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import math
import tensorflow as tf
def euler_to_matrix(x, y, z):
"""
:param x: Tensor of shape (B, 1) - x axis rotation
:param y: Tensor of shape (B, 1) - y axis rotation
:param z: Tensor of shape (B, 1) - z axis rotation
:return: Rotation matrix for the given euler anglers, in the order rotation(x) -> rotation(y) -> rotation(z)
"""
batch_size = tf.shape(z)[0]
# Euler angles should be between -pi and pi, clip so the pose network is coerced to this range
z = tf.clip_by_value(z, -math.pi, math.pi)
y = tf.clip_by_value(y, -math.pi, math.pi)
x = tf.clip_by_value(x, -math.pi, math.pi)
cosx = tf.cos(x)
sinx = tf.sin(x)
cosy = tf.cos(y)
siny = tf.sin(y)
cosz = tf.cos(z)
sinz = tf.sin(z)
# Otherwise this will need to be reversed
# Rotate about x, y then z. z goes first here as rotation is always left side of coordinates
# R = Rz(φ)Ry(θ)Rx(ψ)
# = | cos(θ)cos(φ) sin(ψ)sin(θ)cos(φ) cos(ψ)sin(φ) cos(ψ)sin(θ)cos(φ) + sin(ψ)sin(φ) |
# | cos(θ)sin(φ) sin(ψ)sin(θ)sin(φ) + cos(ψ)cos(φ) cos(ψ)sin(θ)sin(φ) sin(ψ)cos(φ) |
# | sin(θ) sin(ψ)cos(θ) cos(ψ)cos(θ) |
row_1 = tf.concat([cosy * cosz, sinx * siny * cosz - cosx * sinz, cosx * siny * cosz + sinx * sinz], 1)
row_2 = tf.concat([cosy * sinz, sinx * siny * sinz + cosx * cosz, cosx * siny * sinz - sinx * cosz], 1)
row_3 = tf.concat([-siny, sinx * cosy, cosx * cosy], 1)
return tf.reshape(tf.concat([row_1, row_2, row_3], axis=1), [batch_size, 3, 3])
def pose_vec2mat(vec):
"""Converts 6DoF parameters to transformation matrix
Args:
vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
Returns:
A transformation matrix -- [B, 4, 4]
"""
# TODO: FIXME
batch_size, _ = vec.get_shape().as_list()
translation = tf.slice(vec, [0, 0], [-1, 3])
translation = tf.expand_dims(translation, -1)
rx = tf.slice(vec, [0, 3], [-1, 1])
ry = tf.slice(vec, [0, 4], [-1, 1])
rz = tf.slice(vec, [0, 5], [-1, 1])
rot_mat = euler_to_matrix(rx, ry, rz)
rot_mat = tf.squeeze(rot_mat, axis=[1])
filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
filler = tf.tile(filler, [batch_size, 1, 1])
transform_mat = tf.concat([rot_mat, translation], axis=2)
transform_mat = tf.concat([transform_mat, filler], axis=1)
return transform_mat
def image_coordinate(batch, height, width):
"""
Construct a tensor for the given height/width with elements the homogenous coordinates for the pixel
:param batch: Number of images in a batch
:param height: Height of image
:param width: Width of image
:return: Tensor of shape (B, height, width, 3), homogenous coordinates for an image.
Coordinates are in order [x, y, 1]
"""
x_coords = tf.range(width)
y_coords = tf.range(height)
x_mesh, y_mesh = tf.meshgrid(x_coords, y_coords)
ones_mesh = tf.cast(tf.ones([height, width]), tf.int32)
stacked = tf.stack([x_mesh, y_mesh, ones_mesh], axis=2)
return tf.repeat(tf.expand_dims(stacked, axis=0), batch, axis=0)
def intrinsics_vector_to_matrix(intrinsics):
"""
Convert 4 element
:param intrinsics: Tensor of shape (B, 4), intrinsics for each image
:return: Tensor of shape (B, 4, 4), intrinsics for each batch
"""
pass
def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics, coordinates):
"""
Calculate the reprojected image from the source to the target, based on the given depth, pose and intrinsics
SFM Learner inverse warp step
ps ~ K.T(t->s).Dt(pt)*K^-1.pt
Note that the depth pixel Dt(pt) is multiplied by every coordinate value (just element-wise, not matrix multiplication)
Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto
the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample
the pixels in the source image (ps) to reconstruct the target image.
:param target_img: Tensor (batch, height, width, 3)
:param source_img: Tensor, same shape as target_img
:param depth: Tensor, (batch, height, width, 1)
:param pose: (batch, 6)
:param intrinsics: (batch, 4) (fx, fy, px, py) TODO: Intrinsics per image (per source/target image)?
:param coordinates: (batch, height, width, 3) - coordinates for the image. Pass this in so it doesn't need to be
calculated on every warp step
:return: The source image reprojected to the target
"""
# Convert pose vector (output of pose net) to pose matrix (4x4)
pose_4x4 = pose_vec2mat(pose)
# Convert intrinsics matrix (3x3) to (4x4) so it can be multiplied by the pose net
# intrinsics_4x4 =
# Calculate inverse of the 4x4 intrinsics matrix
tf.linalg.inv()
# Create grid (or array?) of homogenous coordinates
grid_coords = image_coordinate(*depth.shape)
# Flatten the image coords to [B, 3, height * width] so each point can be used in calculations
grid_coords = tf.transpose(tf.reshape(grid_coords, [0, 2, 1]))
# Get grid coordinates as array
# Do the function
# sample from the source image using the coordinates applied by the function
pass