137 lines
5.3 KiB
Python
137 lines
5.3 KiB
Python
import math
|
||
|
||
import tensorflow as tf
|
||
|
||
|
||
def euler_to_matrix(x, y, z):
|
||
"""
|
||
|
||
:param x: Tensor of shape (B, 1) - x axis rotation
|
||
:param y: Tensor of shape (B, 1) - y axis rotation
|
||
:param z: Tensor of shape (B, 1) - z axis rotation
|
||
:return: Rotation matrix for the given euler anglers, in the order rotation(x) -> rotation(y) -> rotation(z)
|
||
"""
|
||
batch_size = tf.shape(z)[0]
|
||
|
||
# Euler angles should be between -pi and pi, clip so the pose network is coerced to this range
|
||
z = tf.clip_by_value(z, -math.pi, math.pi)
|
||
y = tf.clip_by_value(y, -math.pi, math.pi)
|
||
x = tf.clip_by_value(x, -math.pi, math.pi)
|
||
|
||
cosx = tf.cos(x)
|
||
sinx = tf.sin(x)
|
||
|
||
cosy = tf.cos(y)
|
||
siny = tf.sin(y)
|
||
|
||
cosz = tf.cos(z)
|
||
sinz = tf.sin(z)
|
||
|
||
# Otherwise this will need to be reversed
|
||
# Rotate about x, y then z. z goes first here as rotation is always left side of coordinates
|
||
# R = Rz(φ)Ry(θ)Rx(ψ)
|
||
# = | cos(θ)cos(φ) sin(ψ)sin(θ)cos(φ) − cos(ψ)sin(φ) cos(ψ)sin(θ)cos(φ) + sin(ψ)sin(φ) |
|
||
# | cos(θ)sin(φ) sin(ψ)sin(θ)sin(φ) + cos(ψ)cos(φ) cos(ψ)sin(θ)sin(φ) − sin(ψ)cos(φ) |
|
||
# | −sin(θ) sin(ψ)cos(θ) cos(ψ)cos(θ) |
|
||
row_1 = tf.concat([cosy * cosz, sinx * siny * cosz - cosx * sinz, cosx * siny * cosz + sinx * sinz], 1)
|
||
row_2 = tf.concat([cosy * sinz, sinx * siny * sinz + cosx * cosz, cosx * siny * sinz - sinx * cosz], 1)
|
||
row_3 = tf.concat([-siny, sinx * cosy, cosx * cosy], 1)
|
||
return tf.reshape(tf.concat([row_1, row_2, row_3], axis=1), [batch_size, 3, 3])
|
||
|
||
|
||
def pose_vec2mat(vec):
|
||
"""Converts 6DoF parameters to transformation matrix
|
||
Args:
|
||
vec: 6DoF parameters in the order of tx, ty, tz, rx, ry, rz -- [B, 6]
|
||
Returns:
|
||
A transformation matrix -- [B, 4, 4]
|
||
"""
|
||
# TODO: FIXME
|
||
batch_size, _ = vec.get_shape().as_list()
|
||
translation = tf.slice(vec, [0, 0], [-1, 3])
|
||
translation = tf.expand_dims(translation, -1)
|
||
rx = tf.slice(vec, [0, 3], [-1, 1])
|
||
ry = tf.slice(vec, [0, 4], [-1, 1])
|
||
rz = tf.slice(vec, [0, 5], [-1, 1])
|
||
rot_mat = euler_to_matrix(rx, ry, rz)
|
||
rot_mat = tf.squeeze(rot_mat, axis=[1])
|
||
filler = tf.constant([0.0, 0.0, 0.0, 1.0], shape=[1, 1, 4])
|
||
filler = tf.tile(filler, [batch_size, 1, 1])
|
||
transform_mat = tf.concat([rot_mat, translation], axis=2)
|
||
transform_mat = tf.concat([transform_mat, filler], axis=1)
|
||
return transform_mat
|
||
|
||
|
||
def image_coordinate(batch, height, width):
|
||
"""
|
||
Construct a tensor for the given height/width with elements the homogenous coordinates for the pixel
|
||
:param batch: Number of images in a batch
|
||
:param height: Height of image
|
||
:param width: Width of image
|
||
:return: Tensor of shape (B, height, width, 3), homogenous coordinates for an image.
|
||
Coordinates are in order [x, y, 1]
|
||
"""
|
||
x_coords = tf.range(width)
|
||
y_coords = tf.range(height)
|
||
|
||
x_mesh, y_mesh = tf.meshgrid(x_coords, y_coords)
|
||
|
||
ones_mesh = tf.cast(tf.ones([height, width]), tf.int32)
|
||
|
||
stacked = tf.stack([x_mesh, y_mesh, ones_mesh], axis=2)
|
||
|
||
return tf.repeat(tf.expand_dims(stacked, axis=0), batch, axis=0)
|
||
|
||
|
||
def intrinsics_vector_to_matrix(intrinsics):
|
||
"""
|
||
Convert 4 element
|
||
:param intrinsics: Tensor of shape (B, 4), intrinsics for each image
|
||
:return: Tensor of shape (B, 4, 4), intrinsics for each batch
|
||
"""
|
||
pass
|
||
|
||
|
||
def projective_inverse_warp(target_img, source_img, depth, pose, intrinsics, coordinates):
|
||
"""
|
||
Calculate the reprojected image from the source to the target, based on the given depth, pose and intrinsics
|
||
|
||
SFM Learner inverse warp step
|
||
ps ~ K.T(t->s).Dt(pt)*K^-1.pt
|
||
|
||
Note that the depth pixel Dt(pt) is multiplied by every coordinate value (just element-wise, not matrix multiplication)
|
||
|
||
Idea is to map the pixel coordinates of the target image to 3d space (Dt(pt).K^-1.pt), then map these onto
|
||
the source image in pixel coordinates (K.T(t->s).{3d coord}), then using the projected coordinates we sample
|
||
the pixels in the source image (ps) to reconstruct the target image.
|
||
|
||
:param target_img: Tensor (batch, height, width, 3)
|
||
:param source_img: Tensor, same shape as target_img
|
||
:param depth: Tensor, (batch, height, width, 1)
|
||
:param pose: (batch, 6)
|
||
:param intrinsics: (batch, 4) (fx, fy, px, py) TODO: Intrinsics per image (per source/target image)?
|
||
:param coordinates: (batch, height, width, 3) - coordinates for the image. Pass this in so it doesn't need to be
|
||
calculated on every warp step
|
||
:return: The source image reprojected to the target
|
||
"""
|
||
# Convert pose vector (output of pose net) to pose matrix (4x4)
|
||
pose_4x4 = pose_vec2mat(pose)
|
||
|
||
# Convert intrinsics matrix (3x3) to (4x4) so it can be multiplied by the pose net
|
||
# intrinsics_4x4 =
|
||
# Calculate inverse of the 4x4 intrinsics matrix
|
||
tf.linalg.inv()
|
||
|
||
# Create grid (or array?) of homogenous coordinates
|
||
grid_coords = image_coordinate(*depth.shape)
|
||
# Flatten the image coords to [B, 3, height * width] so each point can be used in calculations
|
||
grid_coords = tf.transpose(tf.reshape(grid_coords, [0, 2, 1]))
|
||
|
||
# Get grid coordinates as array
|
||
|
||
# Do the function
|
||
|
||
# sample from the source image using the coordinates applied by the function
|
||
|
||
pass
|