Source code for atlalign.metrics

"""A module implementing some useful metrics.

Notes
-----
The metrics depend on what we apply them to. In general there are 4 types of metrics:
    * DVF (=multioutput regression)
    * Annotation (=per pixel classification)
    * Image
    * Keypoint metrics

These metrics are using numpy and are supposed to be used locally after forward passing

In DVF Metrics, the word 'combined' denotes the fact that we are performing multioutput regression. Each of the metrics
should always return a tuple of (metric_average, metric_per_output).

"""

"""
    The package atlalign is a tool for registration of 2D images.

    Copyright (C) 2021 EPFL/Blue Brain Project

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Lesser General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""

from functools import wraps

import ants
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.spatial import distance
from skimage.metrics import peak_signal_noise_ratio, structural_similarity
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

from atlalign.base import DisplacementField
from atlalign.data import annotation_volume, segmentation_collapsing_labels
from atlalign.utils import find_labels_dic


# General utils
def _checker_and_convertor_dvf(y_true, y_pred, copy=True):
    """Check if inputs correct and convert to np.ndarray if necessary.

    Parameters
    ----------
    y_true : np.ndarray or list
        If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
        If list then elements are instances of ``atlalign.base.DisplacementField``.

    y_pred : np.ndarray or list
        If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
        If list then elements are instances of ``atlalign.base.DisplacementField``.

    copy : bool
        If True, then copied arrays are returned.

    Returns
    -------
    y_true : np.ndarray
        If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.

    y_pred : np.ndarray or list
        If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.

    """
    inputs = {"y_true": y_true, "y_pred": y_pred}
    # checks
    for y_name, y in inputs.items():
        if not isinstance(y, (list, np.ndarray)):
            raise TypeError(
                "The {} needs to be either a list or np.ndarray, current type:{}".format(
                    y_name, type(y)
                )
            )

        if isinstance(y, list):
            if not np.all([isinstance(df, DisplacementField) for df in y]):
                raise TypeError(
                    "All elements of the list of {} need to be an instance of DisplacementField"
                )

            # convert to np.ndarray
            y_array = np.array([np.stack((df.delta_x, df.delta_y), axis=2) for df in y])

            return (
                _checker_and_convertor_dvf(y_true, y_array, copy=copy)
                if y_name == "y_pred"
                else _checker_and_convertor_dvf(y_array, y_pred, copy=copy)
            )

        if y.ndim != 4:
            raise ValueError(
                "The number of dimensions of {} needs to be 4.".format(y_name)
            )

        if y.shape[3] != 2:
            raise ValueError("The displacement field needs to have delta_x and delta_y")

    if y_true.shape != y_pred.shape:
        raise ValueError("The two input arrays have inconsistent shapes")

    if copy:
        return y_true.copy(), y_pred.copy()

    else:
        return y_true, y_pred


def _scikit_metric_wrapper(y_true, y_pred, metric_callable, **kwargs):
    """Wrap scikit regression metrics so that it is possible to work with image shaped regression outputs.

    Parameters
    ----------
    y_true : np.ndarray or list
        If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
        If list then elements are instances of ``atlalign.base.DisplacementField``.

    y_pred : np.ndarray or list
        If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
        If list then elements are instances of ``atlalign.base.DisplacementField``.

    metric_callable : callable
        A scikit-learn-like metric that inputs y_true, y_pred of shapes (n_samples, n_outputs) and outputs
        (n_outputs, ) metric scores.

    kwargs : dict
        Keyword arguments passed into a corresponding scikit-learns metric.

    Returns
    -------
    metric_average : float
        An average version of the metric over all regression outputs.

    metric_per_output : np.ndarray
        An np.ndarray of shape (h, w, 2) representing individual metric scores for each regression output.

    """
    y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred)

    kwargs.update({"multioutput": "raw_values"})

    shape = y_true.shape
    N = shape[0]

    res_raw = metric_callable(
        y_true.reshape((N, -1)), y_pred.reshape((N, -1)), **kwargs
    )

    metric_per_output = res_raw.reshape(shape[1:])
    metric_average = metric_per_output.mean()

    return metric_average, metric_per_output


# OPTICAL FLOW METRICS
[docs]def angular_error_of(y_true, y_pred, weighted=False): """Compute angular error between two displacement fields. Parameters ---------- y_true : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. y_pred : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. weighted : bool Only applicable in cases where `N=1`. The norm of `y_true` is used to created the weights for the average. Returns ------- angular_error_average : float An average angular error over all samples and pixels. If `weighted=True` then it is a weighted average where the weights are derived from the norm of the `y_true`. angular_error_per_output : np.ndarray An np.ndarray of shape (h, w) representing an average over all samples of angular error. """ y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred) shape = y_true.shape if weighted and shape[0] != 1: raise ValueError("The weighted average is only allowed for a single sample.") angular_error_per_output_per_sample = np.zeros(shape[:-1]) for i in range(shape[0]): delta_x_true = y_true[i, ..., 0] delta_y_true = y_true[i, ..., 1] delta_x_pred = y_pred[i, ..., 0] delta_y_pred = y_pred[i, ..., 1] top = delta_x_true * delta_x_pred + delta_y_true * delta_y_pred bottom = np.sqrt( delta_x_true * delta_x_true + delta_y_true * delta_y_true ) * np.sqrt(delta_x_pred * delta_x_pred + delta_y_pred * delta_y_pred) angular_error_per_output_per_sample[i] = np.rad2deg( np.arccos(np.clip(top / bottom, -1, 1)) ) angular_error_per_output = np.nanmean( angular_error_per_output_per_sample, axis=0 ) # (h, w) if weighted: norm = DisplacementField(y_true[0, ..., 0], y_true[0, ..., 1]).norm weights = norm / norm.sum() angular_error_average = np.where( np.isnan(angular_error_per_output), 0, angular_error_per_output * weights ).sum() # ehm, true and pred different nan else: angular_error_average = np.nanmean(angular_error_per_output) # float return angular_error_average, angular_error_per_output
# REGRESSION METRICS
[docs]def correlation_combined(y_true, y_pred): """Compute combined version of correlation. Notes ----- Slow. Parameters ---------- y_true : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. y_pred : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. Returns ------- correlation_average : float Mean correlation. correlation_per_output : np.ndarray An np.ndarray of shape (h, w, 2) representing individual correlation scores for each regression output. """ y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred) shape = y_true.shape df_true = pd.DataFrame(y_true.reshape(len(y_true), -1)) df_pred = pd.DataFrame(y_pred.reshape(len(y_pred), -1)) res_s = df_true.apply(lambda x: np.corrcoef(x, df_pred[x.name])[0, 1], axis=0) # res_s = df_true.apply(lambda x: x.corr(df_pred[x.name]), axis=0) correlation_per_output = res_s.values.reshape(shape[1:]) correlation_average = correlation_per_output.mean() return correlation_average, correlation_per_output
[docs]def mae_combined(y_true, y_pred): """Compute combined version of mean absolute error. Notes ----- A difference between this implementation and scikit-learn is that the inputs here `y_true`, `y_pred` are custom made for our registration problem. Parameters ---------- y_true : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. y_pred : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. Returns ------- mae_average : float A combined mse. mae_per_output : np.ndarray An np.ndarray of shape (h, w, 2) representing individual mae scores for each regression output. """ return _scikit_metric_wrapper(y_true, y_pred, metric_callable=mean_absolute_error)
[docs]def mse_combined(y_true, y_pred): """Compute combined version of mean squared error. Notes ----- A difference between this implementation and scikit-learn is that the inputs here `y_true`, `y_pred` are custom made for our registration problem. Parameters ---------- y_true : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. y_pred : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. Returns ------- mse_average : float A combined mse. mse_per_output : np.ndarray An np.ndarray of shape (h, w, 2) representing individual mse scores for each regression output. """ return _scikit_metric_wrapper(y_true, y_pred, metric_callable=mean_squared_error)
[docs]def r2_combined(y_true, y_pred): """Compute combined version of r2. Notes ----- A difference between this implementation and scikit-learn is that the inputs here `y_true`, `y_pred` are custom made for our registration problem. Parameters ---------- y_true : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. y_pred : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. Returns ------- r2_average : float A combined r2. r2_per_output : np.ndarray An np.ndarray of shape (h, w, 2) representing individual r2 scores for each regression output. """ return _scikit_metric_wrapper(y_true, y_pred, metric_callable=r2_score)
[docs]def vector_distance_combined(y_true, y_pred): """Compute combined version of vector distance. Parameters ---------- y_true : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. y_pred : np.ndarray or list If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields. If list then elements are instances of ``atlalign.base.DisplacementField``. Returns ------- vector_distance_average : float An average vector distance over all samples and pixels. vector_distance_per_output : np.ndarray An np.ndarray of shape (h, w) representing an average over all samples of vector distance. """ y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred) diff = y_pred - y_true vector_distance_per_output = np.sqrt( np.square(diff[..., 0]) + np.square(diff[..., 1]) ).mean(axis=0) vector_distance_average = vector_distance_per_output.mean() return vector_distance_average, vector_distance_per_output
# ANNOTATION METRICS def _checker_annotation(y_true, y_pred, k): """Check whether the inputs for annotation are correct. Parameters ---------- y_true : np.ndarray A np.ndarray of shape (N, h, w) such that the first dimension represents the sample. The ground truth annotation. y_pred : np.ndarray A np.ndarray of shape (N, h, w) such that the first dimensions represents the sample. The predicted annotation. k : int or float or None A class label. If None, then averaging based on label distribution in each true image is performed. Raises ------ TypeError If `y_true` or `y_pred` not ``np.ndarray``. ValueError Various inconsistencies. """ if not (isinstance(y_true, np.ndarray) and isinstance(y_pred, np.ndarray)): raise TypeError("Both of the inputs y_true and y_pred need to be a np.ndarray.") if not (y_true.ndim == 3 and y_pred.ndim == 3): raise ValueError("The input arrays need to be 3D.") if y_true.shape != y_pred.shape: raise ValueError("The arrays have different shapes.") # Make sure k appears at least once in at least one of the arrays or its a None if k is None: return if not (np.any(y_true == k) or np.any(y_pred == k)): raise ValueError("The k={} is not present in either of the arrays.".format(k))
[docs]def iou_score(y_true, y_pred, k=0, disable_check=False, excluded_labels=None): """Compute intersection over union of a class `k` equally weighted over all samples. Notes ----- If the class is not present in either of the images — ('0/0') type of situation, we simply skip this sample and do not consider it if for the average. Parameters ---------- y_true : np.ndarray A np.ndarray of shape (N, h, w) such that the first dimension represents the sample. The ground truth annotation. y_pred : np.ndarray A np.ndarray of shape (N, h, w) such that the first dimensions represents the sample. The predicted annotation. k : int or float or None A class label. If None, then averaging based on label distribution in each true image is performed. disable_check : bool If False, then checks are disabled. Used when recursively calling the function. excluded_labels : None or list If None then no effect. If a list of ints then they wont be used in the averaging over labels (in case `k` is None). Returns ------- iou_average : float An average IOU over all samples. iou_per_sample : np.ndarray, shape = (N,) IOU score per each sample. Note that if label does not occur in either of the images then its equal to np.nan. """ # Run checks if not disable_check: _checker_annotation(y_true, y_pred, k) n = len(y_true) # n_pixels = np.prod(y_true.shape[1:]) iou_per_sample = np.zeros(n, dtype=np.float32) for i in range(n): n_pixels = ( ~np.isin(y_true[i], np.array(excluded_labels or [])) ).sum() # only non excluded pixels if k is not None: mask_true = y_true[i] == k mask_pred = y_pred[i] == k intersection = np.logical_and(mask_true, mask_pred) union = np.logical_or(mask_true, mask_pred) iou_per_sample[i] = ( (intersection.sum() / union.sum()) if not np.all(union == 0) else np.nan ) else: # true image distribution w = { label: (y_true[i] == label).sum() / n_pixels for label in (set(np.unique(y_true[i])) | set(np.unique(y_pred[i]))) - set(excluded_labels or []) } weighted_average = sum( iou_score( y_true[i][np.newaxis, :, :], y_pred[i][np.newaxis, :, :], k, disable_check=True, )[0] * p for k, p in w.items() ) iou_per_sample[i] = weighted_average iou_average = np.nanmean(iou_per_sample) return iou_average, iou_per_sample
[docs]def dice_score(y_true, y_pred, k=0, disable_check=False, excluded_labels=None): """Compute dice score of a class `k` equally weighted over all samples. Notes ----- If the class is not present in either of the images — ('0/0') type of situation, we simply skip this sample and do not consider it if for the average. Parameters ---------- y_true : np.ndarray A np.ndarray of shape (N, h, w) such that the first dimension represents the sample. The ground truth annotation. y_pred : np.ndarray A np.ndarray of shape (N, h, w) such that the first dimensions represents the sample. The predicted annotation. k : int or float or None A class label. If None, then averaging based on label distribution in each true image is performed. disable_check : bool If False, then checks are disabled. Used when recursively calling the function. excluded_labels : None or list If None then no effect. If a list of ints then they wont be used in the averaging over labels (in case `k` is None). Returns ------- dice_average : float An average dice over all samples. dice_per_sample : np.ndarray, shape = (N,) Dice score per each sample. Note that if label does not occur in either of the images then its equal to np.nan. """ # Run checks if not disable_check: _checker_annotation(y_true, y_pred, k) n = len(y_true) # n_pixels = np.prod(y_true.shape[1:]) dice_per_sample = np.zeros(n, dtype=np.float32) for i in range(n): if set(np.unique(y_true[i])) != set(np.unique(y_pred[i])): # print('Different labels') pass n_pixels = ( ~np.isin(y_true[i], np.array(excluded_labels or [])) ).sum() # only non excluded pixels if k is not None: mask_true = y_true[i] == k mask_pred = y_pred[i] == k intersection = np.logical_and(mask_true, mask_pred) union = np.logical_or(mask_true, mask_pred) iou_per_sample = ( intersection.sum() / union.sum() if not np.all(union == 0) else np.nan ) dice_per_sample[i] = (2 * iou_per_sample) / (1 + iou_per_sample) else: # true image distribution w = { label: (y_true[i] == label).sum() / n_pixels for label in (set(np.unique(y_true[i])) | set(np.unique(y_pred[i]))) - set(excluded_labels or []) } weighted_average = sum( dice_score( y_true[i][np.newaxis, :, :], y_pred[i][np.newaxis, :, :], k, disable_check=True, )[0] * p for k, p in w.items() ) dice_per_sample[i] = weighted_average dice_average = np.nanmean(dice_per_sample) return dice_average, dice_per_sample
# IMAGE METRICS
[docs]def multiple_images_decorator(fun): """Enhance a function with iteration over the samples. Parameters ---------- fun : callable Callable whose functionality will be enhanced. Returns ------- wrapper_fun : callable Enhanced version of `fun` callable. """ # make sure metadata kept @wraps(fun) def wrapper_fun(*args, **kwargs): """Define enhanced function.""" assert len(args) == 2 y_true, y_pred = args # CHECKS if not (y_pred.shape[-2:] == y_true.shape[-2:]): raise ValueError("The image shape is not consistent.") if not (y_pred.dtype == np.float32 and y_true.dtype == np.float32): raise TypeError("The only allowed dtype is float32.") if kwargs.get("mask") is not None: if kwargs["mask"].shape != y_true.shape[-2:]: raise ValueError( "The mask has to have the same shape as the input images." ) if kwargs["mask"].dtype != np.bool: raise ValueError("The mask needs to be an array of booleans.") # MAIN ALGORITHM if y_true.ndim == 2: return fun(*args, **kwargs) elif y_true.ndim == 3: if y_true.shape[0] != y_pred.shape[0]: raise ValueError( "The number of y_true images and y_pred images has to be the same" ) metric_values = [] for i in range(y_true.shape[0]): y_true_single = y_true[i] y_pred_single = y_pred[i] metric_values.append(fun(y_true_single, y_pred_single, **kwargs)) return np.array(metric_values) else: raise ValueError("Invalid number of dimensions {}".format(y_true.ndim)) return wrapper_fun
[docs]@multiple_images_decorator def mse_img(y_true, y_pred, mask=None): """Compute the mean-squared error between two images. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. mask: np.array, optional Optional, can be specified to have the computation carried out on a precise area. Returns ------- mse : float The mean-squared error (MSE) metric. Loss metric, the lower the more similar the images are. """ if mask is None: size = y_true.shape mask = np.ones(size, dtype=bool) return np.sum((y_true[mask] - y_pred[mask]) ** 2) / np.sum(mask)
[docs]@multiple_images_decorator def mae_img(y_true, y_pred, mask=None): """Compute the mean absolute error between two images. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. mask: np.array, optional Optional, can be specified to have the computation carried out on a precise area. Returns ------- mse : float The mean absolute error (MAE) metric. Loss metric, the lower the more similar the images are. """ if mask is None: size = y_true.shape mask = np.ones(size, dtype=bool) return np.sum(np.abs(y_true[mask] - y_pred[mask])) / np.sum(mask)
[docs]@multiple_images_decorator def psnr_img(y_true, y_pred, mask=None, data_range=None): """Compute the peak signal to noise ratio (PSNR) for an image. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. mask: np.array, optional Optional, can be specified to have the computation carried out on a precise area. data_range : int The data range of the input image (distance between minimum and maximum possible values). By default, this is estimated from the image data-type. Returns ------- psnr : float The PSNR metric. Similarity metric, the higher the more similar the images are. References ---------- .. https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio """ if mask is None: size = y_true.shape mask = np.ones(size, dtype=bool) return peak_signal_noise_ratio(y_true[mask], y_pred[mask], data_range=data_range)
[docs]@multiple_images_decorator def demons_img(y_true, y_pred, mask=None): """Compute the demons metric between two images. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. mask: np.array, optional Optional, can be specified to have the computation carried out on a precise area. Returns ------- demons : float The demons value. Loss metric, the lower the more similar the images are. """ y_true_ants = ants.image_clone(ants.from_numpy(y_true), pixeltype="float") y_pred_ants = ants.image_clone(ants.from_numpy(y_pred), pixeltype="float") if mask is None: demons = ants.image_similarity(y_true_ants, y_pred_ants, metric_type="Demons") else: mask_ants = ants.image_clone( ants.from_numpy(mask.astype(float)), pixeltype="float" ) demons = ants.image_similarity( y_true_ants, y_pred_ants, fixed_mask=mask_ants, moving_mask=mask_ants, metric_type="Demons", ) return demons
[docs]@multiple_images_decorator def cross_correlation_img(y_true, y_pred, mask=None): """Compute the cross correlation metric between two images. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. mask: np.array, optional Optional, can be specified to have the computation carried out on a precise area. Returns ------- cc : float The Cross-Correlation value. Similarity metric, the higher the more similar the images are. """ y_true_ants = ants.image_clone(ants.from_numpy(y_true), pixeltype="float") y_pred_ants = ants.image_clone(ants.from_numpy(y_pred), pixeltype="float") if mask is None: cc = ants.image_similarity(y_true_ants, y_pred_ants, metric_type="Correlation") else: mask_ants = ants.image_clone( ants.from_numpy(mask.astype(float)), pixeltype="float" ) cc = ants.image_similarity( y_true_ants, y_pred_ants, fixed_mask=mask_ants, moving_mask=mask_ants, metric_type="Correlation", ) return -cc
[docs]@multiple_images_decorator def ssmi_img(y_true, y_pred): """Compute the structural similarity between two images. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. Returns ------- ssmi : float The structural similarity (SSMI) metric. Similarity metric, the higher the more similar the images are. """ return structural_similarity(y_true, y_pred)
[docs]@multiple_images_decorator def mi_img(y_true, y_pred, mask=None, metric_type="MattesMutualInformation"): """Compute the mutual information (MI) between two images. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. mask: np.array, optional Optional, can be specified to have the computation carried out on a precise area. metric_type: str, {'MattesMutualInformation', 'JointHistogramMutualInformation'} Type of mutual information computation. Returns ------- mi : float The mutual information (MI) metric. Similarity metric, the higher the more similar the images are. """ y_true_ants = ants.image_clone(ants.from_numpy(y_true), pixeltype="float") y_pred_ants = ants.image_clone(ants.from_numpy(y_pred), pixeltype="float") if mask is None: mi = ants.image_similarity(y_true_ants, y_pred_ants, metric_type=metric_type) else: mask_ants = ants.image_clone( ants.from_numpy(mask.astype(float)), pixeltype="float" ) mi = ants.image_similarity( y_true_ants, y_pred_ants, fixed_mask=mask_ants, moving_mask=mask_ants, metric_type=metric_type, ) return -mi
[docs]@multiple_images_decorator def perceptual_loss_img(y_true, y_pred, model="net-lin", net="vgg"): """Compute the perceptual loss (PL) between two images. Parameters ---------- y_true : np.array Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. y_pred : np.array Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of the sample dimension. model: str, {'net', 'net-lin'} Type of model (cf lpips_tf package). net: str, {'vgg', 'alex'} Type of network (cf lpips_tf package). Return ------ pl: float The Perceptual Loss (PL) metric. Loss metric, the lower the more similar the images are. Notes ----- We use the decorator just to make sure we do not run out of memory during a forward pass. Also, its fully convolutional but if the images are too small then might run into issues. """ try: import lpips_tf except ModuleNotFoundError as err: raise ModuleNotFoundError( """ LPIPS-TensorFlow required but not found. Please install it by running the following command: $ pip install git+http://github.com/alexlee-gk/lpips-tensorflow.git#egg=lpips_tf """ ) from err # gray2rgb y_true = np.stack((y_true,) * 3, axis=-1) y_pred = np.stack((y_pred,) * 3, axis=-1) image0_ph = tf.Variable(tf.float32) # noqa: F841 image1_ph = tf.Variable(tf.float32) # noqa: F841 @tf.function def lpips(image0_ph, image1_ph): return lpips_tf.lpips(image0_ph, image1_ph, model=model, net=net) pl = lpips(y_true, y_pred) tf.reset_default_graph() return pl.item()
# KEYPOINTS METRICS def _euclidean_distance_kp(y_true, y_pred): """Euclidean distance between pairs of n-dimensional points. Parameters ---------- y_true : np.array Array of shape (N, d) where `N` is the number of samples. y_pred : np.array Array of shape (N, d) where `N` is the number of samples. Returns ------- distance : np.array Array of shape (N, ) representing the Euclidean distance between `y_true` and `y_pred` for each sample. Raises ------ ValueError: In case the shapes do not agree. Examples -------- >>> import numpy as np >>> from atlalign.metrics import _euclidean_distance_kp >>> np.random.seed(0) >>> y_true = np.array([[1, 2], [3.4, 4], [2, 1]]) >>> y_pred = np.array([[1.2, 3], [3, 4], [2, 1.8]]) >>> _euclidean_distance_kp(y_true, y_pred) array([1.0198039, 0.4 , 0.8 ]) """ if y_true.shape != y_pred.shape: raise ValueError( "The shapes of true and predicted are different, {} vs {}".format( y_true.shape, y_pred.shape ) ) return np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1)) def _compute_weights_kp(y, distance_kwargs=None): """Compute weights of keypoints. Parameters ---------- y : np.array Array of shape (N, 2) where each row represents a keypoint. distance_kwargs : dict or None If ``dict`` then represents parameters to be passed into scipy implementation. Returns ------- weights : np.array Array of shape (N, ) where each element represents a weight of that specific keypoint. """ dist_matrix = distance.cdist( y, y, **(distance_kwargs or {}) ) # uses euclidean distance by default mean_dist = np.mean(dist_matrix, axis=0) return mean_dist / mean_dist.sum()
[docs]def tre_kp(y_true, y_pred, weighted=False): """Compute (absolute) target registration error. Parameters ---------- y_true : np.array Array of shape (N, 2) where `N` is the number of samples. y_pred : np.array Array of shape (N, 2) where `N` is the number of samples. weighted : bool If True, then the final TRE is weighted by `y_true`. Returns ------- mean_tre : float Mean target registration error over all keypoint pairs. tre : np.array Array of shape (N,) where elements represent the TRE of a given keypoint par. weights : np.array Array of shape (N,). If `weighted` is False then simply 1 / N. Otherwise the weights are derived from `y_true`. References ---------- .. https://anhir.grand-challenge.org/Performance_Metrics/ """ # Extract useful parameters n = len(y_true) tre = _euclidean_distance_kp(y_true, y_pred) weights = _compute_weights_kp(y_true) if weighted else 1 / n * np.ones(n) mean_tre = np.inner(tre, weights) return mean_tre, tre, weights
[docs]def rtre_kp(y_true, y_pred, h, w, weighted=False): """Compute relative target registration error. Parameters ---------- y_true : np.array Array of shape (N, 2) where `N` is the number of samples. y_pred : np.array Array of shape (N, 2) where `N` is the number of samples. h : np.array Height of the image. w : np.array Width of the image. weighted : bool If True, then the final TRE is weighted by `y_true`. Returns ------- mean_tre : float Mean target registration error over all keypoint pairs. rtre : np.array Array of shape (N,) where elements represent the rTRE of a given keypoint par. weights : np.array Array of shape (N,). If `weighted` is False then simply 1 / N. Otherwise the weights are derived from `y_true`. References ---------- .. https://anhir.grand-challenge.org/Performance_Metrics/ """ # Extract useful parameters n = len(y_true) diagonal = (w**2 + h**2) ** (1 / 2) rtre = _euclidean_distance_kp(y_true, y_pred) / diagonal weights = _compute_weights_kp(y_true) if weighted else 1 / n * np.ones(n) mean_rtre = np.inner(rtre, weights) return mean_rtre, rtre, weights
[docs]def improvement_kp(y_true, y_pred, y_init): """Compute improvement ratio with respect to initial keypoints. Parameters ---------- y_true : np.array Array of shape (N, 2) where `N` is the number of samples. Ground truth positions in the reference space. y_pred : np.array Array of shape (N, 2) where `N` is the number of samples. Predicted positions in the reference space. y_init : np.array Array of shape (N, 2) where `N` is the number of samples. Initial positions in the reference space. Returns ------- percent_improved : float Percent of predicted keypoints that were better than the initial ones. mask : np.array Array of booleans representing which predicted keypoints achieved a higher TRE than the inital one. """ pred_vs_true = tre_kp(y_true, y_pred)[1] init_vs_true = tre_kp(y_true, y_init)[1] mask = pred_vs_true < init_vs_true percent_improved = mask.sum() / len(mask) return percent_improved, mask
# ALL TOGETHER
[docs]def evaluate(y_true, y_pred, imgs_mov, img_ids, ps, dataset_ids, depths=()): """Evaluate all relevant matrics in per sample fashion. Parameters ---------- y_true : np.ndarray np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields. y_pred : np.ndarray np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields. imgs_mov : np.ndarray Array of shape (N, h, w) representing the moving images. If dtype=float32 then no division. If uint8 then values devided by 255 and casted to flaot32. img_ids : np.array Array of shape (N, ) representing the image ids. dataset_ids : np.array Array of shape (N, ) representing the dataset ids. depths : tuple Tuple of different depths to compute the intersection over union score. Returns ------- result : pd.DataFrame All results even containing array entries. results_viewable : pd.DataFrame Results without array entries. """ if ( len(y_true) != len(y_pred) != len(imgs_mov) != len(img_ids) != len(ps) != len(dataset_ids) ): raise ValueError() n = len(y_true) imgs_mov = [ imgs_mov[i].astype(np.float32) / (255 if imgs_mov.dtype == np.uint8 else 1) for i in range(n) ] # displacement fields df_true = [ DisplacementField(y_true[i, ..., 0], y_true[i, ..., 1]) for i in range(n) ] df_pred = [ DisplacementField(y_pred[i, ..., 0], y_pred[i, ..., 1]) for i in range(n) ] # inverse dvfs inv_df_true = [df.pseudo_inverse(ds_f=64) for df in df_true] inv_df_pred = [] for df in df_pred: try: inv_df_pred.append(df.pseudo_inverse(ds_f=64)) except Exception: inv_df_pred.append(None) # image registration imgs_reg_true = [df_true[i].warp(imgs_mov[i]) for i in range(n)] imgs_reg_pred = [df_pred[i].warp(imgs_mov[i]) for i in range(n)] # compute norms pred template_dict = { # SINGLE DVFS "norm_a": pd.Series(0, index=range(n), dtype=float), "norm_pp": [np.empty((320, 456)) for _ in range(n)], "jacobian_nonpositive_pixels_a": pd.Series(0, index=range(n), dtype=float), "jacobian_pp": [np.empty((320, 456)) for _ in range(n)], # META "p": pd.Series(ps, dtype=int), "dataset_id": pd.Series(dataset_ids, dtype=int), "imgs_mov": imgs_mov, # OPTICAL FLOW "vector_distance_a": pd.Series(0, index=range(n), dtype=float), "vector_distance_pp": [np.empty((320, 456)) for _ in range(n)], "angular_error_a": pd.Series(0, index=range(n), dtype=float), "angular_error_pp": [np.empty((320, 456)) for _ in range(n)], # REGRESSION METRICS # 'correlation_a': pd.Series(0, index=range(n), dtype=float), # 'correlation_po': [np.empty((320, 456, 2)) for _ in range(n)], # IMAGE METRICS "mae_img_a": pd.Series(0, index=range(n), dtype=float), "mse_img_a": pd.Series(0, index=range(n), dtype=float), "psnr_img_a": pd.Series(0, index=range(n), dtype=float), "cross_correlation_img_a": pd.Series(0, index=range(n), dtype=float), "ssmi_img_a": pd.Series(0, index=range(n), dtype=float), "mi_img_a": pd.Series(0, index=range(n), dtype=float), # Segmentation metrics # 'iou_depth_0' : None } # NORM for i in range(n): template_dict["norm_pp"][i] = df_pred[i].norm template_dict["norm_a"][i] = template_dict["norm_pp"][i].mean() # JACOBIAN for i in range(n): template_dict["jacobian_pp"][i] = df_pred[i].jacobian template_dict["jacobian_nonpositive_pixels_a"][i] = np.sum( template_dict["jacobian_pp"][i] <= 0 ) template_dict["jacobian_nonpositive_pixels_perc_a"] = ( 100 * template_dict["jacobian_nonpositive_pixels_a"] / (320 * 456) ) # noqa # VECTOR DISTANCE for i in range(n): vd_a, vd_pp = vector_distance_combined([df_true[i]], [df_pred[i]]) ( template_dict["vector_distance_a"][i], template_dict["vector_distance_pp"][i], ) = (vd_a, vd_pp) # VECTOR DISTANCE for i in range(n): ae_a, ae_pp = angular_error_of([df_true[i]], [df_pred[i]], weighted=True) template_dict["angular_error_a"][i], template_dict["angular_error_pp"][i] = ( ae_a, ae_pp, ) # MSE IMAGE template_dict["mae_img_a"] = mae_img( np.array(imgs_reg_true), np.array(imgs_reg_pred) ) template_dict["mse_img_a"] = mse_img( np.array(imgs_reg_true), np.array(imgs_reg_pred) ) template_dict["psnr_img_a"] = psnr_img( np.array(imgs_reg_true), np.array(imgs_reg_pred) ) template_dict["ssmi_img_a"] = ssmi_img( np.array(imgs_reg_true), np.array(imgs_reg_pred) ) template_dict["mi_img_a"] = mi_img(np.array(imgs_reg_true), np.array(imgs_reg_pred)) template_dict["cross_correlation_img_a"] = cross_correlation_img( np.array(imgs_reg_true), np.array(imgs_reg_pred) ) # Segmentation metrics if depths: annot_vol = annotation_volume() labels_dict = segmentation_collapsing_labels() for d in depths: print("Depth {}".format(d)) ious = [] for i, p in enumerate(ps): segm_ref = find_labels_dic(annot_vol[p // 25], labels_dict, d) segm_true = inv_df_true[i].warp_annotation(segm_ref) if inv_df_pred[i] is not None: segm_pred = inv_df_pred[i].warp_annotation(segm_ref) ious.append( iou_score( np.array([segm_true]), np.array([segm_pred]), k=None, excluded_labels=[0], )[0] ) else: ious.append(np.nan) template_dict["iou_depth_{}".format(d)] = ious result = pd.DataFrame(template_dict) result = result.set_index(img_ids) return result, result[result.dtypes[result.dtypes != object].index]
[docs]def evaluate_single( deltas_true, deltas_pred, img_mov, p=None, avol=None, collapsing_labels=None, deltas_pred_inv=None, deltas_true_inv=None, ds_f=4, depths=(), ): """Evaluate a single sample. Parameters ---------- deltas_true : DisplacementField or np.ndarray If np.ndarray then of shape (height, width, 2) representing deltas_xy of ground truth. deltas_pred : DisplacementField or np.ndarray If np.ndarray then of shape (height, width, 2) representing deltas_xy of prediction. img_mov : np.ndarray Moving image. p : int Coronal section in microns. avol : np.ndarray or None Annotation volume of shape (528, 320, 456). If None then loaded via `annotation_volume`. collapsing_labels : dict or None Dictionary for segmentation collapsing. If None then loaded via `segmentation_collapsing_labels` deltas_pred_inv : None or np.ndarray If np.ndarray then of shape (height, width, 2) representing inv_deltas_xy of prediction. If not provided computed from `df_pred`. deltas_true_inv : None or np.ndarray If np.ndarray then of shape (height, width, 2) representing inv_deltas_xy of truth. If not provided computed from `df_true`. ds_f : int Downsampling factor for numerical inversses. depths : tuple Tuple of integers representing all depths to compute IOU for. If empty no IOU computation takes places. Returns ------- results : pd.Series Relevant metrics. """ n_pixels = 320 * 456 if not (deltas_true.shape == (320, 456, 2) and deltas_pred.shape == (320, 456, 2)): raise ValueError("Incorrect shape of input") df_true = DisplacementField(deltas_true[..., 0], deltas_true[..., 1]) df_pred = DisplacementField(deltas_pred[..., 0], deltas_pred[..., 1]) img_reg_true = df_true.warp(img_mov) img_reg_pred = df_pred.warp(img_mov) all_metrics = { "mse_img": mse_img(img_reg_true, img_reg_pred), "mae_img": mae_img(img_reg_true, img_reg_pred), "psnr_img": psnr_img(img_reg_true, img_reg_pred), "ssmi_img": ssmi_img(img_reg_true, img_reg_pred), "mi_img": mi_img(img_reg_true, img_reg_pred), "cc_img": cross_correlation_img(img_reg_true, img_reg_pred), # 'perceptual_img': perceptual_loss_img(img_reg_true, img_reg_pred), "norm": df_pred.norm.mean(), "corrupted_pixels": np.sum(df_pred.jacobian < 0) / n_pixels, "euclidean_distance": vector_distance_combined([df_true], [df_pred])[0], "angular_error": angular_error_of([df_true], [df_pred], weighted=True)[0], } # segmentations metrics if not depths: return all_metrics else: # checks if avol is None or avol.shape != (528, 320, 456): raise ValueError("Incorrectly shaped annotation volume or not provided") # Prepare inverses if deltas_true_inv is not None: df_true_inv = DisplacementField( deltas_true_inv[..., 0], deltas_true_inv[..., 1] ) else: df_true_inv = df_true.pseudo_inverse(ds_f=ds_f) if deltas_pred_inv is not None: df_pred_inv = DisplacementField( deltas_pred_inv[..., 0], deltas_pred_inv[..., 1] ) else: df_pred_inv = df_pred.pseudo_inverse(ds_f=ds_f) # Extract data avol_ = annotation_volume() if avol is None else avol collapsing_labels_ = ( segmentation_collapsing_labels() if collapsing_labels is None else collapsing_labels ) # Compute images = {} for depth in depths: segm_ref = find_labels_dic(avol_[p // 25], collapsing_labels_, depth) segm_true = df_true_inv.warp_annotation(segm_ref) segm_pred = df_pred_inv.warp_annotation(segm_ref) images[depth] = (segm_true, segm_pred) all_metrics["iou_{}".format(depth)] = iou_score( np.array([segm_true]), np.array([segm_pred]), k=None, excluded_labels=[0], )[0] all_metrics["dice_{}".format(depth)] = dice_score( np.array([segm_true]), np.array([segm_pred]), k=None, excluded_labels=[0], )[0] return all_metrics, images