fast-depth-tf/unsupervised/loss.py

import tensorflow as tf


def l1_loss(target_img, reprojected_img):
    """
    Calculates the l1 norm between the target and reprojected image

    :param target_img: Tensor (batch, height, width, 3)
    :param reprojected_img: Tensor, same shape as target_img
    :return: The per-pixel l1 norm -> Tensor (batch, height, width, 1)
    """
    return tf.reduce_mean(tf.abs(target_img - reprojected_img), axis=3)


def l2_loss(target_img, reprojected_img):
    """
    Calculates the l2 norm between the target and reprojected image

    :param target_img: Tensor (batch, height, width, 3)
    :param reprojected_img: Tensor, same shape as target_img
    :return: The per-pixel l2 norm -> Tensor (batch, height, width, 1)
    """
    return tf.reduce_mean((target_img - reprojected_img) ** 2 ** (1 / 2), axis=3)


def make_combined_ssim_l1_loss(ssim_weight: int = 0.85, other_loss_fn=l1_loss):
    """
    Create a loss function that will calculate ssim for the two images, and use the other_loss_fn to calculate the
    per pixel loss
    :param ssim_weight: Weighting that should be applied to SSIM weight vs l1 difference between target and
        reprojected image
    :param other_loss_fn: Function to combine with the ssim
    :return: Function to calculate the per-pixel combined ssim with other loss function
    """

    def combined_ssim_loss(target_img, reprojected_img):
        """
        Calculates the per-pixel photometric reconstruction loss for each source image,
        combined this with the SSIM between the reconstructed image and the actual image.

        Calculates the following:
        ssim_weight * SSIM(target_img, reprojected_img) + (1 - ssim_weight) * other_loss_fn(target_img - reprojected_img)

        :param target_img: Tensor with shape (batch, height, width, 3) - current image we're training on
        :param reprojected_img: Tensor with same shape as target_img, Reprojected from some source image that
            should be as close as possible to the target image
        :return: Per-pixel loss -> Tensor with shape (batch, height, width, 1), where height and width match target_img
            height and width
        """
        ssim = tf.image.ssim(target_img, reprojected_img, axis=3, keepdim=True)
        return ssim_weight * ssim + (1 - ssim_weight) * other_loss_fn(target_img, reprojected_img)

    return combined_ssim_loss


# TODO: Consider other gradient methods for calculating smoothness, e.g. convolution methods such as Sobel
def smooth_loss(depth, colour_image):
    """
    Calculate the edge-aware per-pixel smooth loss on a depth map, with image scaled appropriately to the depth map

    Does this equation (equation 3 in monodepth2 paper):
    |dxd*t|e^(-|dxIt|) + |dyd*t|e^(-|dyIt|)

    :param depth: Tensor with shape (B, h, w, 1) - disparity, such as the depth map
    :param colour_image: Tensor with shape (B, h, w, 3) - colour image, same resolution as disparity map
    :return: smooth loss
    """
    # Mean normalised inverse depth
    normalised_depth = depth / (tf.reduce_mean(depth, [1, 2], keepdims=True) + 1e-7)

    # Nothing fancy here for gradients (follows sfmlearner/monodepth), just shift 1 pixel and
    # compare the change (x/y shift left/up 1 pixel)
    depth_gradient_x = tf.abs(normalised_depth[:, :-1, :, :] - normalised_depth[:, 1:, :, :])
    depth_gradient_y = tf.abs(normalised_depth[:, :, :-1, :] - normalised_depth[:, :, 1:, :])

    # Colour gradients to work better with edges, monodepth 1/2 uses these
    image_gradient_x = tf.abs(colour_image[:, :-1, :, :] - colour_image[:, 1:, :, :])
    image_gradient_y = tf.abs(colour_image[:, :, :-1, :] - colour_image[:, :, 1:, :])

    # Average the 3 colour channels into a single channel, so can be compared with the depth disparities
    smooth_x = depth_gradient_x * tf.exp(-tf.reduce_mean(image_gradient_x, 3, keepdims=True))
    smooth_y = depth_gradient_y * tf.exp(-tf.reduce_mean(image_gradient_y, 3, keepdims=True))

    return tf.reduce_mean(smooth_x) + tf.reduce_mean(smooth_y)