import tensorflow as tf import tensorflow.keras as keras import tensorflow_datasets as tfds # Ripped from: https://forums.developer.nvidia.com/t/could-not-create-cudnn-handle-cudnn-status-alloc-failed/108261/4?u=mpivato4 # Seems to be an issue on windows so explicitly set gpu growth def fix_windows_gpu(): gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: # Currently, memory growth needs to be the same across GPUs for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) logical_gpus = tf.config.experimental.list_logical_devices('GPU') print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs") except RuntimeError as e: # Memory growth must be set before GPUs have been initialized print(e) ''' Functional version of fastdepth model ''' def FDDepthwiseBlock(inputs, out_channels, block_id=1): x = keras.layers.DepthwiseConv2D(5, padding='same')(inputs) x = keras.layers.BatchNormalization()(x) x = keras.layers.ReLU(6.)(x) x = keras.layers.Conv2D(out_channels, 1, padding='same')(x) x = keras.layers.BatchNormalization()(x) return keras.layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x) def make_mobilenet_nnconv5(weights=None, shape=(224, 224, 3)): input = keras.layers.Input(shape=shape) x = input mobilenet = keras.applications.MobileNet(input_tensor=x, include_top=False, weights=weights) for layer in mobilenet.layers: x = layer(x) if layer.name == 'conv_pw_5_relu': conv5 = x elif layer.name == 'conv_pw_3_relu': conv3 = x elif layer.name == 'conv_pw_1_relu': conv1 = x # Fast depth decoder x = FDDepthwiseBlock(x, 512, block_id=14) # TODO: Bilinear interpolation # x = keras.layers.experimental.preprocessing.Resizing(14, 14) # Nearest neighbour interpolation, used by fast depth paper x = keras.layers.experimental.preprocessing.Resizing(14, 14, interpolation='nearest')(x) x = FDDepthwiseBlock(x, 256, block_id=15) x = keras.layers.experimental.preprocessing.Resizing(28, 28, interpolation='nearest')(x) x = keras.layers.Add()([x, conv5]) x = FDDepthwiseBlock(x, 128, block_id=16) x = keras.layers.experimental.preprocessing.Resizing(56, 56, interpolation='nearest')(x) x = keras.layers.Add()([x, conv3]) x = FDDepthwiseBlock(x, 64, block_id=17) x = keras.layers.experimental.preprocessing.Resizing(112, 112, interpolation='nearest')(x) x = keras.layers.Add()([x, conv1]) x = FDDepthwiseBlock(x, 32, block_id=18) x = keras.layers.experimental.preprocessing.Resizing(224, 224, interpolation='nearest')(x) x = keras.layers.Conv2D(1, 1, padding='same')(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.ReLU(6.)(x) return keras.Model(inputs=input, outputs=x, name="fast_depth") # TODO: Fix these, float doesn't work same as pytorch def delta1_metric(y_true, y_pred): maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred) return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25), tf.float32), axes=None)[0] def delta2_metric(y_true, y_pred): maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred) return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 2), tf.float32), axes=None)[0] def delta3_metric(y_true, y_pred): maxRatio = tf.maximum(y_pred / y_true, y_true / y_pred) return tf.nn.moments(tf.cast(maxRatio < tf.convert_to_tensor(1.25 ** 3), tf.float32), axes=None)[0] def compile(model): # TODO: Learning rate (exponential decay) model.compile(optimizer=keras.optimizers.SGD(momentum=0.9), loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.RootMeanSquaredError(), keras.metrics.MeanSquaredError(), delta1_metric, delta2_metric, delta3_metric]) def train(existing_model=None, pretrained_weights='imagenet', epochs=4, save_file=None, dataset=None): if not existing_model: existing_model = make_mobilenet_nnconv5(pretrained_weights) compile(existing_model) if not dataset: dataset = load_nyu() existing_model.fit(dataset, epochs=epochs) if save_file: existing_model.save(save_file) return existing_model def evaluate(compiled_model, dataset=None): """ Evaluate the model using rmse, delta1/2/3 metrics :param compiled_model: Compiled, trained model to evaluate :param dataset: Dataset for evaluation. Should be of format {'image': image, 'depth': label}, where label width/height matches image width/height. Defaults to Tensorflow nyu_v2 evaluation split dataset (https://www.tensorflow.org/datasets/catalog/nyu_depth_v2) """ if not dataset: dataset = load_nyu_evaluate() compiled_model.evaluate(dataset, verbose=1) def forward(model, image): """ Propagate a single or batch of images through the model. Image(s) should be in format NHWC :param model: :param image: :return: """ return model(crop_and_resize(image)) def load_model(file): return keras.models.load_model(file, custom_objects={'delta1_metric': delta1_metric, 'delta2_metric': delta2_metric, 'delta3_metric': delta3_metric}) def crop_and_resize(x): shape = tf.shape(x['depth']) def layer(): return keras.Sequential([ keras.layers.experimental.preprocessing.CenterCrop(shape[1], shape[2]), keras.layers.experimental.preprocessing.Resizing(224, 224, interpolation='nearest') ]) # Reshape label to 4d, can't use array unwrap as it's unsupported by tensorflow return layer()(x['image']), layer()(tf.reshape(x['depth'], [shape[0], shape[1], shape[2], 1])) def load_nyu(): builder = tfds.builder('nyu_depth_v2') builder.download_and_prepare(download_dir='../nyu') return builder \ .as_dataset(split='train', shuffle_files=True) \ .shuffle(buffer_size=1024) \ .batch(8) \ .map(lambda x: crop_and_resize(x)) def load_nyu_evaluate(): builder = tfds.builder('nyu_depth_v2') builder.download_and_prepare(download_dir='../nyu') return builder.as_dataset(split='validation').batch(1).map(lambda x: crop_and_resize(x))