"""A module implementing some useful metrics.
Notes
-----
The metrics depend on what we apply them to. In general there are 4 types of metrics:
* DVF (=multioutput regression)
* Annotation (=per pixel classification)
* Image
* Keypoint metrics
These metrics are using numpy and are supposed to be used locally after forward passing
In DVF Metrics, the word 'combined' denotes the fact that we are performing multioutput regression. Each of the metrics
should always return a tuple of (metric_average, metric_per_output).
"""
"""
The package atlalign is a tool for registration of 2D images.
Copyright (C) 2021 EPFL/Blue Brain Project
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
from functools import wraps
import ants
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.spatial import distance
from skimage.metrics import peak_signal_noise_ratio, structural_similarity
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from atlalign.base import DisplacementField
from atlalign.data import annotation_volume, segmentation_collapsing_labels
from atlalign.utils import find_labels_dic
# General utils
def _checker_and_convertor_dvf(y_true, y_pred, copy=True):
"""Check if inputs correct and convert to np.ndarray if necessary.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
copy : bool
If True, then copied arrays are returned.
Returns
-------
y_true : np.ndarray
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
"""
inputs = {"y_true": y_true, "y_pred": y_pred}
# checks
for y_name, y in inputs.items():
if not isinstance(y, (list, np.ndarray)):
raise TypeError(
"The {} needs to be either a list or np.ndarray, current type:{}".format(
y_name, type(y)
)
)
if isinstance(y, list):
if not np.all([isinstance(df, DisplacementField) for df in y]):
raise TypeError(
"All elements of the list of {} need to be an instance of DisplacementField"
)
# convert to np.ndarray
y_array = np.array([np.stack((df.delta_x, df.delta_y), axis=2) for df in y])
return (
_checker_and_convertor_dvf(y_true, y_array, copy=copy)
if y_name == "y_pred"
else _checker_and_convertor_dvf(y_array, y_pred, copy=copy)
)
if y.ndim != 4:
raise ValueError(
"The number of dimensions of {} needs to be 4.".format(y_name)
)
if y.shape[3] != 2:
raise ValueError("The displacement field needs to have delta_x and delta_y")
if y_true.shape != y_pred.shape:
raise ValueError("The two input arrays have inconsistent shapes")
if copy:
return y_true.copy(), y_pred.copy()
else:
return y_true, y_pred
def _scikit_metric_wrapper(y_true, y_pred, metric_callable, **kwargs):
"""Wrap scikit regression metrics so that it is possible to work with image shaped regression outputs.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
metric_callable : callable
A scikit-learn-like metric that inputs y_true, y_pred of shapes (n_samples, n_outputs) and outputs
(n_outputs, ) metric scores.
kwargs : dict
Keyword arguments passed into a corresponding scikit-learns metric.
Returns
-------
metric_average : float
An average version of the metric over all regression outputs.
metric_per_output : np.ndarray
An np.ndarray of shape (h, w, 2) representing individual metric scores for each regression output.
"""
y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred)
kwargs.update({"multioutput": "raw_values"})
shape = y_true.shape
N = shape[0]
res_raw = metric_callable(
y_true.reshape((N, -1)), y_pred.reshape((N, -1)), **kwargs
)
metric_per_output = res_raw.reshape(shape[1:])
metric_average = metric_per_output.mean()
return metric_average, metric_per_output
# OPTICAL FLOW METRICS
[docs]def angular_error_of(y_true, y_pred, weighted=False):
"""Compute angular error between two displacement fields.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
weighted : bool
Only applicable in cases where `N=1`. The norm of `y_true` is used to created the weights for the average.
Returns
-------
angular_error_average : float
An average angular error over all samples and pixels. If `weighted=True` then it is a weighted average
where the weights are derived from the norm of the `y_true`.
angular_error_per_output : np.ndarray
An np.ndarray of shape (h, w) representing an average over all samples of angular error.
"""
y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred)
shape = y_true.shape
if weighted and shape[0] != 1:
raise ValueError("The weighted average is only allowed for a single sample.")
angular_error_per_output_per_sample = np.zeros(shape[:-1])
for i in range(shape[0]):
delta_x_true = y_true[i, ..., 0]
delta_y_true = y_true[i, ..., 1]
delta_x_pred = y_pred[i, ..., 0]
delta_y_pred = y_pred[i, ..., 1]
top = delta_x_true * delta_x_pred + delta_y_true * delta_y_pred
bottom = np.sqrt(
delta_x_true * delta_x_true + delta_y_true * delta_y_true
) * np.sqrt(delta_x_pred * delta_x_pred + delta_y_pred * delta_y_pred)
angular_error_per_output_per_sample[i] = np.rad2deg(
np.arccos(np.clip(top / bottom, -1, 1))
)
angular_error_per_output = np.nanmean(
angular_error_per_output_per_sample, axis=0
) # (h, w)
if weighted:
norm = DisplacementField(y_true[0, ..., 0], y_true[0, ..., 1]).norm
weights = norm / norm.sum()
angular_error_average = np.where(
np.isnan(angular_error_per_output), 0, angular_error_per_output * weights
).sum() # ehm, true and pred different nan
else:
angular_error_average = np.nanmean(angular_error_per_output) # float
return angular_error_average, angular_error_per_output
# REGRESSION METRICS
[docs]def correlation_combined(y_true, y_pred):
"""Compute combined version of correlation.
Notes
-----
Slow.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
Returns
-------
correlation_average : float
Mean correlation.
correlation_per_output : np.ndarray
An np.ndarray of shape (h, w, 2) representing individual correlation scores for each regression output.
"""
y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred)
shape = y_true.shape
df_true = pd.DataFrame(y_true.reshape(len(y_true), -1))
df_pred = pd.DataFrame(y_pred.reshape(len(y_pred), -1))
res_s = df_true.apply(lambda x: np.corrcoef(x, df_pred[x.name])[0, 1], axis=0)
# res_s = df_true.apply(lambda x: x.corr(df_pred[x.name]), axis=0)
correlation_per_output = res_s.values.reshape(shape[1:])
correlation_average = correlation_per_output.mean()
return correlation_average, correlation_per_output
[docs]def mae_combined(y_true, y_pred):
"""Compute combined version of mean absolute error.
Notes
-----
A difference between this implementation and scikit-learn is that the inputs here `y_true`, `y_pred` are
custom made for our registration problem.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
Returns
-------
mae_average : float
A combined mse.
mae_per_output : np.ndarray
An np.ndarray of shape (h, w, 2) representing individual mae scores for each regression output.
"""
return _scikit_metric_wrapper(y_true, y_pred, metric_callable=mean_absolute_error)
[docs]def mse_combined(y_true, y_pred):
"""Compute combined version of mean squared error.
Notes
-----
A difference between this implementation and scikit-learn is that the inputs here `y_true`, `y_pred` are
custom made for our registration problem.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
Returns
-------
mse_average : float
A combined mse.
mse_per_output : np.ndarray
An np.ndarray of shape (h, w, 2) representing individual mse scores for each regression output.
"""
return _scikit_metric_wrapper(y_true, y_pred, metric_callable=mean_squared_error)
[docs]def r2_combined(y_true, y_pred):
"""Compute combined version of r2.
Notes
-----
A difference between this implementation and scikit-learn is that the inputs here `y_true`, `y_pred` are
custom made for our registration problem.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
Returns
-------
r2_average : float
A combined r2.
r2_per_output : np.ndarray
An np.ndarray of shape (h, w, 2) representing individual r2 scores for each regression output.
"""
return _scikit_metric_wrapper(y_true, y_pred, metric_callable=r2_score)
[docs]def vector_distance_combined(y_true, y_pred):
"""Compute combined version of vector distance.
Parameters
----------
y_true : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
y_pred : np.ndarray or list
If np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
If list then elements are instances of ``atlalign.base.DisplacementField``.
Returns
-------
vector_distance_average : float
An average vector distance over all samples and pixels.
vector_distance_per_output : np.ndarray
An np.ndarray of shape (h, w) representing an average over all samples of vector distance.
"""
y_true, y_pred = _checker_and_convertor_dvf(y_true, y_pred)
diff = y_pred - y_true
vector_distance_per_output = np.sqrt(
np.square(diff[..., 0]) + np.square(diff[..., 1])
).mean(axis=0)
vector_distance_average = vector_distance_per_output.mean()
return vector_distance_average, vector_distance_per_output
# ANNOTATION METRICS
def _checker_annotation(y_true, y_pred, k):
"""Check whether the inputs for annotation are correct.
Parameters
----------
y_true : np.ndarray
A np.ndarray of shape (N, h, w) such that the first dimension represents the sample.
The ground truth annotation.
y_pred : np.ndarray
A np.ndarray of shape (N, h, w) such that the first dimensions represents the sample.
The predicted annotation.
k : int or float or None
A class label. If None, then averaging based on label distribution in each true image is
performed.
Raises
------
TypeError
If `y_true` or `y_pred` not ``np.ndarray``.
ValueError
Various inconsistencies.
"""
if not (isinstance(y_true, np.ndarray) and isinstance(y_pred, np.ndarray)):
raise TypeError("Both of the inputs y_true and y_pred need to be a np.ndarray.")
if not (y_true.ndim == 3 and y_pred.ndim == 3):
raise ValueError("The input arrays need to be 3D.")
if y_true.shape != y_pred.shape:
raise ValueError("The arrays have different shapes.")
# Make sure k appears at least once in at least one of the arrays or its a None
if k is None:
return
if not (np.any(y_true == k) or np.any(y_pred == k)):
raise ValueError("The k={} is not present in either of the arrays.".format(k))
[docs]def iou_score(y_true, y_pred, k=0, disable_check=False, excluded_labels=None):
"""Compute intersection over union of a class `k` equally weighted over all samples.
Notes
-----
If the class is not present in either of the images — ('0/0') type of situation, we simply skip
this sample and do not consider it if for the average.
Parameters
----------
y_true : np.ndarray
A np.ndarray of shape (N, h, w) such that the first dimension represents the sample.
The ground truth annotation.
y_pred : np.ndarray
A np.ndarray of shape (N, h, w) such that the first dimensions represents the sample.
The predicted annotation.
k : int or float or None
A class label. If None, then averaging based on label distribution in each true image is
performed.
disable_check : bool
If False, then checks are disabled. Used when recursively calling the function.
excluded_labels : None or list
If None then no effect. If a list of ints then they wont be used in the averaging over labels (in case
`k` is None).
Returns
-------
iou_average : float
An average IOU over all samples.
iou_per_sample : np.ndarray, shape = (N,)
IOU score per each sample. Note that if label does not occur in either of the images then
its equal to np.nan.
"""
# Run checks
if not disable_check:
_checker_annotation(y_true, y_pred, k)
n = len(y_true)
# n_pixels = np.prod(y_true.shape[1:])
iou_per_sample = np.zeros(n, dtype=np.float32)
for i in range(n):
n_pixels = (
~np.isin(y_true[i], np.array(excluded_labels or []))
).sum() # only non excluded pixels
if k is not None:
mask_true = y_true[i] == k
mask_pred = y_pred[i] == k
intersection = np.logical_and(mask_true, mask_pred)
union = np.logical_or(mask_true, mask_pred)
iou_per_sample[i] = (
(intersection.sum() / union.sum()) if not np.all(union == 0) else np.nan
)
else:
# true image distribution
w = {
label: (y_true[i] == label).sum() / n_pixels
for label in (set(np.unique(y_true[i])) | set(np.unique(y_pred[i])))
- set(excluded_labels or [])
}
weighted_average = sum(
iou_score(
y_true[i][np.newaxis, :, :],
y_pred[i][np.newaxis, :, :],
k,
disable_check=True,
)[0]
* p
for k, p in w.items()
)
iou_per_sample[i] = weighted_average
iou_average = np.nanmean(iou_per_sample)
return iou_average, iou_per_sample
[docs]def dice_score(y_true, y_pred, k=0, disable_check=False, excluded_labels=None):
"""Compute dice score of a class `k` equally weighted over all samples.
Notes
-----
If the class is not present in either of the images — ('0/0') type of situation, we simply skip
this sample and do not consider it if for the average.
Parameters
----------
y_true : np.ndarray
A np.ndarray of shape (N, h, w) such that the first dimension represents the sample.
The ground truth annotation.
y_pred : np.ndarray
A np.ndarray of shape (N, h, w) such that the first dimensions represents the sample.
The predicted annotation.
k : int or float or None
A class label. If None, then averaging based on label distribution in each true image is
performed.
disable_check : bool
If False, then checks are disabled. Used when recursively calling the function.
excluded_labels : None or list
If None then no effect. If a list of ints then they wont be used in the averaging over labels (in case
`k` is None).
Returns
-------
dice_average : float
An average dice over all samples.
dice_per_sample : np.ndarray, shape = (N,)
Dice score per each sample. Note that if label does not occur in either of the images then
its equal to np.nan.
"""
# Run checks
if not disable_check:
_checker_annotation(y_true, y_pred, k)
n = len(y_true)
# n_pixels = np.prod(y_true.shape[1:])
dice_per_sample = np.zeros(n, dtype=np.float32)
for i in range(n):
if set(np.unique(y_true[i])) != set(np.unique(y_pred[i])):
# print('Different labels')
pass
n_pixels = (
~np.isin(y_true[i], np.array(excluded_labels or []))
).sum() # only non excluded pixels
if k is not None:
mask_true = y_true[i] == k
mask_pred = y_pred[i] == k
intersection = np.logical_and(mask_true, mask_pred)
union = np.logical_or(mask_true, mask_pred)
iou_per_sample = (
intersection.sum() / union.sum() if not np.all(union == 0) else np.nan
)
dice_per_sample[i] = (2 * iou_per_sample) / (1 + iou_per_sample)
else:
# true image distribution
w = {
label: (y_true[i] == label).sum() / n_pixels
for label in (set(np.unique(y_true[i])) | set(np.unique(y_pred[i])))
- set(excluded_labels or [])
}
weighted_average = sum(
dice_score(
y_true[i][np.newaxis, :, :],
y_pred[i][np.newaxis, :, :],
k,
disable_check=True,
)[0]
* p
for k, p in w.items()
)
dice_per_sample[i] = weighted_average
dice_average = np.nanmean(dice_per_sample)
return dice_average, dice_per_sample
# IMAGE METRICS
[docs]def multiple_images_decorator(fun):
"""Enhance a function with iteration over the samples.
Parameters
----------
fun : callable
Callable whose functionality will be enhanced.
Returns
-------
wrapper_fun : callable
Enhanced version of `fun` callable.
"""
# make sure metadata kept
@wraps(fun)
def wrapper_fun(*args, **kwargs):
"""Define enhanced function."""
assert len(args) == 2
y_true, y_pred = args
# CHECKS
if not (y_pred.shape[-2:] == y_true.shape[-2:]):
raise ValueError("The image shape is not consistent.")
if not (y_pred.dtype == np.float32 and y_true.dtype == np.float32):
raise TypeError("The only allowed dtype is float32.")
if kwargs.get("mask") is not None:
if kwargs["mask"].shape != y_true.shape[-2:]:
raise ValueError(
"The mask has to have the same shape as the input images."
)
if kwargs["mask"].dtype != np.bool:
raise ValueError("The mask needs to be an array of booleans.")
# MAIN ALGORITHM
if y_true.ndim == 2:
return fun(*args, **kwargs)
elif y_true.ndim == 3:
if y_true.shape[0] != y_pred.shape[0]:
raise ValueError(
"The number of y_true images and y_pred images has to be the same"
)
metric_values = []
for i in range(y_true.shape[0]):
y_true_single = y_true[i]
y_pred_single = y_pred[i]
metric_values.append(fun(y_true_single, y_pred_single, **kwargs))
return np.array(metric_values)
else:
raise ValueError("Invalid number of dimensions {}".format(y_true.ndim))
return wrapper_fun
[docs]@multiple_images_decorator
def mse_img(y_true, y_pred, mask=None):
"""Compute the mean-squared error between two images.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
mask: np.array, optional
Optional, can be specified to have the computation carried out on a precise area.
Returns
-------
mse : float
The mean-squared error (MSE) metric. Loss metric, the lower the more similar the images are.
"""
if mask is None:
size = y_true.shape
mask = np.ones(size, dtype=bool)
return np.sum((y_true[mask] - y_pred[mask]) ** 2) / np.sum(mask)
[docs]@multiple_images_decorator
def mae_img(y_true, y_pred, mask=None):
"""Compute the mean absolute error between two images.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
mask: np.array, optional
Optional, can be specified to have the computation carried out on a precise area.
Returns
-------
mse : float
The mean absolute error (MAE) metric. Loss metric, the lower the more similar the images are.
"""
if mask is None:
size = y_true.shape
mask = np.ones(size, dtype=bool)
return np.sum(np.abs(y_true[mask] - y_pred[mask])) / np.sum(mask)
[docs]@multiple_images_decorator
def psnr_img(y_true, y_pred, mask=None, data_range=None):
"""Compute the peak signal to noise ratio (PSNR) for an image.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
mask: np.array, optional
Optional, can be specified to have the computation carried out on a precise area.
data_range : int
The data range of the input image (distance between minimum and
maximum possible values). By default, this is estimated from the image
data-type.
Returns
-------
psnr : float
The PSNR metric. Similarity metric, the higher the more similar the images are.
References
----------
.. https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
"""
if mask is None:
size = y_true.shape
mask = np.ones(size, dtype=bool)
return peak_signal_noise_ratio(y_true[mask], y_pred[mask], data_range=data_range)
[docs]@multiple_images_decorator
def demons_img(y_true, y_pred, mask=None):
"""Compute the demons metric between two images.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
mask: np.array, optional
Optional, can be specified to have the computation carried out on a precise area.
Returns
-------
demons : float
The demons value. Loss metric, the lower the more similar the images are.
"""
y_true_ants = ants.image_clone(ants.from_numpy(y_true), pixeltype="float")
y_pred_ants = ants.image_clone(ants.from_numpy(y_pred), pixeltype="float")
if mask is None:
demons = ants.image_similarity(y_true_ants, y_pred_ants, metric_type="Demons")
else:
mask_ants = ants.image_clone(
ants.from_numpy(mask.astype(float)), pixeltype="float"
)
demons = ants.image_similarity(
y_true_ants,
y_pred_ants,
fixed_mask=mask_ants,
moving_mask=mask_ants,
metric_type="Demons",
)
return demons
[docs]@multiple_images_decorator
def cross_correlation_img(y_true, y_pred, mask=None):
"""Compute the cross correlation metric between two images.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
mask: np.array, optional
Optional, can be specified to have the computation carried out on a precise area.
Returns
-------
cc : float
The Cross-Correlation value. Similarity metric, the higher the more similar the images are.
"""
y_true_ants = ants.image_clone(ants.from_numpy(y_true), pixeltype="float")
y_pred_ants = ants.image_clone(ants.from_numpy(y_pred), pixeltype="float")
if mask is None:
cc = ants.image_similarity(y_true_ants, y_pred_ants, metric_type="Correlation")
else:
mask_ants = ants.image_clone(
ants.from_numpy(mask.astype(float)), pixeltype="float"
)
cc = ants.image_similarity(
y_true_ants,
y_pred_ants,
fixed_mask=mask_ants,
moving_mask=mask_ants,
metric_type="Correlation",
)
return -cc
[docs]@multiple_images_decorator
def ssmi_img(y_true, y_pred):
"""Compute the structural similarity between two images.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
Returns
-------
ssmi : float
The structural similarity (SSMI) metric. Similarity metric, the higher the more similar the images are.
"""
return structural_similarity(y_true, y_pred)
[docs]@multiple_images_decorator
def mi_img(y_true, y_pred, mask=None, metric_type="MattesMutualInformation"):
"""Compute the mutual information (MI) between two images.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
mask: np.array, optional
Optional, can be specified to have the computation carried out on a precise area.
metric_type: str, {'MattesMutualInformation', 'JointHistogramMutualInformation'}
Type of mutual information computation.
Returns
-------
mi : float
The mutual information (MI) metric. Similarity metric, the higher the more similar the images are.
"""
y_true_ants = ants.image_clone(ants.from_numpy(y_true), pixeltype="float")
y_pred_ants = ants.image_clone(ants.from_numpy(y_pred), pixeltype="float")
if mask is None:
mi = ants.image_similarity(y_true_ants, y_pred_ants, metric_type=metric_type)
else:
mask_ants = ants.image_clone(
ants.from_numpy(mask.astype(float)), pixeltype="float"
)
mi = ants.image_similarity(
y_true_ants,
y_pred_ants,
fixed_mask=mask_ants,
moving_mask=mask_ants,
metric_type=metric_type,
)
return -mi
[docs]@multiple_images_decorator
def perceptual_loss_img(y_true, y_pred, model="net-lin", net="vgg"):
"""Compute the perceptual loss (PL) between two images.
Parameters
----------
y_true : np.array
Image 1. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
y_pred : np.array
Image 2. Either (h, w) of (N, h, w). If (N, h, w), the decorator `multiple_images_decorator` takes care of
the sample dimension.
model: str, {'net', 'net-lin'}
Type of model (cf lpips_tf package).
net: str, {'vgg', 'alex'}
Type of network (cf lpips_tf package).
Return
------
pl: float
The Perceptual Loss (PL) metric. Loss metric, the lower the more similar the images are.
Notes
-----
We use the decorator just to make sure we do not run out of memory during a forward pass. Also,
its fully convolutional but if the images are too small then might run into issues.
"""
try:
import lpips_tf
except ModuleNotFoundError as err:
raise ModuleNotFoundError(
"""
LPIPS-TensorFlow required but not found.
Please install it by running the following command:
$ pip install git+http://github.com/alexlee-gk/lpips-tensorflow.git#egg=lpips_tf
"""
) from err
# gray2rgb
y_true = np.stack((y_true,) * 3, axis=-1)
y_pred = np.stack((y_pred,) * 3, axis=-1)
image0_ph = tf.Variable(tf.float32) # noqa: F841
image1_ph = tf.Variable(tf.float32) # noqa: F841
@tf.function
def lpips(image0_ph, image1_ph):
return lpips_tf.lpips(image0_ph, image1_ph, model=model, net=net)
pl = lpips(y_true, y_pred)
tf.reset_default_graph()
return pl.item()
# KEYPOINTS METRICS
def _euclidean_distance_kp(y_true, y_pred):
"""Euclidean distance between pairs of n-dimensional points.
Parameters
----------
y_true : np.array
Array of shape (N, d) where `N` is the number of samples.
y_pred : np.array
Array of shape (N, d) where `N` is the number of samples.
Returns
-------
distance : np.array
Array of shape (N, ) representing the Euclidean distance between `y_true` and `y_pred` for each sample.
Raises
------
ValueError:
In case the shapes do not agree.
Examples
--------
>>> import numpy as np
>>> from atlalign.metrics import _euclidean_distance_kp
>>> np.random.seed(0)
>>> y_true = np.array([[1, 2], [3.4, 4], [2, 1]])
>>> y_pred = np.array([[1.2, 3], [3, 4], [2, 1.8]])
>>> _euclidean_distance_kp(y_true, y_pred)
array([1.0198039, 0.4 , 0.8 ])
"""
if y_true.shape != y_pred.shape:
raise ValueError(
"The shapes of true and predicted are different, {} vs {}".format(
y_true.shape, y_pred.shape
)
)
return np.sqrt(np.sum((y_true - y_pred) ** 2, axis=1))
def _compute_weights_kp(y, distance_kwargs=None):
"""Compute weights of keypoints.
Parameters
----------
y : np.array
Array of shape (N, 2) where each row represents a keypoint.
distance_kwargs : dict or None
If ``dict`` then represents parameters to be passed into scipy implementation.
Returns
-------
weights : np.array
Array of shape (N, ) where each element represents a weight of that specific keypoint.
"""
dist_matrix = distance.cdist(
y, y, **(distance_kwargs or {})
) # uses euclidean distance by default
mean_dist = np.mean(dist_matrix, axis=0)
return mean_dist / mean_dist.sum()
[docs]def tre_kp(y_true, y_pred, weighted=False):
"""Compute (absolute) target registration error.
Parameters
----------
y_true : np.array
Array of shape (N, 2) where `N` is the number of samples.
y_pred : np.array
Array of shape (N, 2) where `N` is the number of samples.
weighted : bool
If True, then the final TRE is weighted by `y_true`.
Returns
-------
mean_tre : float
Mean target registration error over all keypoint pairs.
tre : np.array
Array of shape (N,) where elements represent the TRE of a given keypoint par.
weights : np.array
Array of shape (N,). If `weighted` is False then simply 1 / N. Otherwise the weights are derived from `y_true`.
References
----------
.. https://anhir.grand-challenge.org/Performance_Metrics/
"""
# Extract useful parameters
n = len(y_true)
tre = _euclidean_distance_kp(y_true, y_pred)
weights = _compute_weights_kp(y_true) if weighted else 1 / n * np.ones(n)
mean_tre = np.inner(tre, weights)
return mean_tre, tre, weights
[docs]def rtre_kp(y_true, y_pred, h, w, weighted=False):
"""Compute relative target registration error.
Parameters
----------
y_true : np.array
Array of shape (N, 2) where `N` is the number of samples.
y_pred : np.array
Array of shape (N, 2) where `N` is the number of samples.
h : np.array
Height of the image.
w : np.array
Width of the image.
weighted : bool
If True, then the final TRE is weighted by `y_true`.
Returns
-------
mean_tre : float
Mean target registration error over all keypoint pairs.
rtre : np.array
Array of shape (N,) where elements represent the rTRE of a given keypoint par.
weights : np.array
Array of shape (N,). If `weighted` is False then simply 1 / N. Otherwise the weights are derived from `y_true`.
References
----------
.. https://anhir.grand-challenge.org/Performance_Metrics/
"""
# Extract useful parameters
n = len(y_true)
diagonal = (w**2 + h**2) ** (1 / 2)
rtre = _euclidean_distance_kp(y_true, y_pred) / diagonal
weights = _compute_weights_kp(y_true) if weighted else 1 / n * np.ones(n)
mean_rtre = np.inner(rtre, weights)
return mean_rtre, rtre, weights
[docs]def improvement_kp(y_true, y_pred, y_init):
"""Compute improvement ratio with respect to initial keypoints.
Parameters
----------
y_true : np.array
Array of shape (N, 2) where `N` is the number of samples. Ground truth positions in the reference space.
y_pred : np.array
Array of shape (N, 2) where `N` is the number of samples. Predicted positions in the reference space.
y_init : np.array
Array of shape (N, 2) where `N` is the number of samples. Initial positions in the reference space.
Returns
-------
percent_improved : float
Percent of predicted keypoints that were better than the initial ones.
mask : np.array
Array of booleans representing which predicted keypoints achieved a higher TRE than the inital one.
"""
pred_vs_true = tre_kp(y_true, y_pred)[1]
init_vs_true = tre_kp(y_true, y_init)[1]
mask = pred_vs_true < init_vs_true
percent_improved = mask.sum() / len(mask)
return percent_improved, mask
# ALL TOGETHER
[docs]def evaluate(y_true, y_pred, imgs_mov, img_ids, ps, dataset_ids, depths=()):
"""Evaluate all relevant matrics in per sample fashion.
Parameters
----------
y_true : np.ndarray
np.ndarray then expected shape is (N, h, w, 2) and represents true samples of displacement fields.
y_pred : np.ndarray
np.ndarray then expected shape is (N, h, w, 2) and represents predicted samples of displacement fields.
imgs_mov : np.ndarray
Array of shape (N, h, w) representing the moving images. If dtype=float32 then no division. If uint8
then values devided by 255 and casted to flaot32.
img_ids : np.array
Array of shape (N, ) representing the image ids.
dataset_ids : np.array
Array of shape (N, ) representing the dataset ids.
depths : tuple
Tuple of different depths to compute the intersection over union score.
Returns
-------
result : pd.DataFrame
All results even containing array entries.
results_viewable : pd.DataFrame
Results without array entries.
"""
if (
len(y_true)
!= len(y_pred)
!= len(imgs_mov)
!= len(img_ids)
!= len(ps)
!= len(dataset_ids)
):
raise ValueError()
n = len(y_true)
imgs_mov = [
imgs_mov[i].astype(np.float32) / (255 if imgs_mov.dtype == np.uint8 else 1)
for i in range(n)
]
# displacement fields
df_true = [
DisplacementField(y_true[i, ..., 0], y_true[i, ..., 1]) for i in range(n)
]
df_pred = [
DisplacementField(y_pred[i, ..., 0], y_pred[i, ..., 1]) for i in range(n)
]
# inverse dvfs
inv_df_true = [df.pseudo_inverse(ds_f=64) for df in df_true]
inv_df_pred = []
for df in df_pred:
try:
inv_df_pred.append(df.pseudo_inverse(ds_f=64))
except Exception:
inv_df_pred.append(None)
# image registration
imgs_reg_true = [df_true[i].warp(imgs_mov[i]) for i in range(n)]
imgs_reg_pred = [df_pred[i].warp(imgs_mov[i]) for i in range(n)]
# compute norms pred
template_dict = {
# SINGLE DVFS
"norm_a": pd.Series(0, index=range(n), dtype=float),
"norm_pp": [np.empty((320, 456)) for _ in range(n)],
"jacobian_nonpositive_pixels_a": pd.Series(0, index=range(n), dtype=float),
"jacobian_pp": [np.empty((320, 456)) for _ in range(n)],
# META
"p": pd.Series(ps, dtype=int),
"dataset_id": pd.Series(dataset_ids, dtype=int),
"imgs_mov": imgs_mov,
# OPTICAL FLOW
"vector_distance_a": pd.Series(0, index=range(n), dtype=float),
"vector_distance_pp": [np.empty((320, 456)) for _ in range(n)],
"angular_error_a": pd.Series(0, index=range(n), dtype=float),
"angular_error_pp": [np.empty((320, 456)) for _ in range(n)],
# REGRESSION METRICS
# 'correlation_a': pd.Series(0, index=range(n), dtype=float),
# 'correlation_po': [np.empty((320, 456, 2)) for _ in range(n)],
# IMAGE METRICS
"mae_img_a": pd.Series(0, index=range(n), dtype=float),
"mse_img_a": pd.Series(0, index=range(n), dtype=float),
"psnr_img_a": pd.Series(0, index=range(n), dtype=float),
"cross_correlation_img_a": pd.Series(0, index=range(n), dtype=float),
"ssmi_img_a": pd.Series(0, index=range(n), dtype=float),
"mi_img_a": pd.Series(0, index=range(n), dtype=float),
# Segmentation metrics
# 'iou_depth_0' : None
}
# NORM
for i in range(n):
template_dict["norm_pp"][i] = df_pred[i].norm
template_dict["norm_a"][i] = template_dict["norm_pp"][i].mean()
# JACOBIAN
for i in range(n):
template_dict["jacobian_pp"][i] = df_pred[i].jacobian
template_dict["jacobian_nonpositive_pixels_a"][i] = np.sum(
template_dict["jacobian_pp"][i] <= 0
)
template_dict["jacobian_nonpositive_pixels_perc_a"] = (
100 * template_dict["jacobian_nonpositive_pixels_a"] / (320 * 456)
) # noqa
# VECTOR DISTANCE
for i in range(n):
vd_a, vd_pp = vector_distance_combined([df_true[i]], [df_pred[i]])
(
template_dict["vector_distance_a"][i],
template_dict["vector_distance_pp"][i],
) = (vd_a, vd_pp)
# VECTOR DISTANCE
for i in range(n):
ae_a, ae_pp = angular_error_of([df_true[i]], [df_pred[i]], weighted=True)
template_dict["angular_error_a"][i], template_dict["angular_error_pp"][i] = (
ae_a,
ae_pp,
)
# MSE IMAGE
template_dict["mae_img_a"] = mae_img(
np.array(imgs_reg_true), np.array(imgs_reg_pred)
)
template_dict["mse_img_a"] = mse_img(
np.array(imgs_reg_true), np.array(imgs_reg_pred)
)
template_dict["psnr_img_a"] = psnr_img(
np.array(imgs_reg_true), np.array(imgs_reg_pred)
)
template_dict["ssmi_img_a"] = ssmi_img(
np.array(imgs_reg_true), np.array(imgs_reg_pred)
)
template_dict["mi_img_a"] = mi_img(np.array(imgs_reg_true), np.array(imgs_reg_pred))
template_dict["cross_correlation_img_a"] = cross_correlation_img(
np.array(imgs_reg_true), np.array(imgs_reg_pred)
)
# Segmentation metrics
if depths:
annot_vol = annotation_volume()
labels_dict = segmentation_collapsing_labels()
for d in depths:
print("Depth {}".format(d))
ious = []
for i, p in enumerate(ps):
segm_ref = find_labels_dic(annot_vol[p // 25], labels_dict, d)
segm_true = inv_df_true[i].warp_annotation(segm_ref)
if inv_df_pred[i] is not None:
segm_pred = inv_df_pred[i].warp_annotation(segm_ref)
ious.append(
iou_score(
np.array([segm_true]),
np.array([segm_pred]),
k=None,
excluded_labels=[0],
)[0]
)
else:
ious.append(np.nan)
template_dict["iou_depth_{}".format(d)] = ious
result = pd.DataFrame(template_dict)
result = result.set_index(img_ids)
return result, result[result.dtypes[result.dtypes != object].index]
[docs]def evaluate_single(
deltas_true,
deltas_pred,
img_mov,
p=None,
avol=None,
collapsing_labels=None,
deltas_pred_inv=None,
deltas_true_inv=None,
ds_f=4,
depths=(),
):
"""Evaluate a single sample.
Parameters
----------
deltas_true : DisplacementField or np.ndarray
If np.ndarray then of shape (height, width, 2) representing deltas_xy of ground truth.
deltas_pred : DisplacementField or np.ndarray
If np.ndarray then of shape (height, width, 2) representing deltas_xy of prediction.
img_mov : np.ndarray
Moving image.
p : int
Coronal section in microns.
avol : np.ndarray or None
Annotation volume of shape (528, 320, 456). If None then loaded via `annotation_volume`.
collapsing_labels : dict or None
Dictionary for segmentation collapsing. If None then loaded via `segmentation_collapsing_labels`
deltas_pred_inv : None or np.ndarray
If np.ndarray then of shape (height, width, 2) representing inv_deltas_xy of prediction. If not provided
computed from `df_pred`.
deltas_true_inv : None or np.ndarray
If np.ndarray then of shape (height, width, 2) representing inv_deltas_xy of truth. If not provided
computed from `df_true`.
ds_f : int
Downsampling factor for numerical inversses.
depths : tuple
Tuple of integers representing all depths to compute IOU for. If empty no IOU computation takes places.
Returns
-------
results : pd.Series
Relevant metrics.
"""
n_pixels = 320 * 456
if not (deltas_true.shape == (320, 456, 2) and deltas_pred.shape == (320, 456, 2)):
raise ValueError("Incorrect shape of input")
df_true = DisplacementField(deltas_true[..., 0], deltas_true[..., 1])
df_pred = DisplacementField(deltas_pred[..., 0], deltas_pred[..., 1])
img_reg_true = df_true.warp(img_mov)
img_reg_pred = df_pred.warp(img_mov)
all_metrics = {
"mse_img": mse_img(img_reg_true, img_reg_pred),
"mae_img": mae_img(img_reg_true, img_reg_pred),
"psnr_img": psnr_img(img_reg_true, img_reg_pred),
"ssmi_img": ssmi_img(img_reg_true, img_reg_pred),
"mi_img": mi_img(img_reg_true, img_reg_pred),
"cc_img": cross_correlation_img(img_reg_true, img_reg_pred),
# 'perceptual_img': perceptual_loss_img(img_reg_true, img_reg_pred),
"norm": df_pred.norm.mean(),
"corrupted_pixels": np.sum(df_pred.jacobian < 0) / n_pixels,
"euclidean_distance": vector_distance_combined([df_true], [df_pred])[0],
"angular_error": angular_error_of([df_true], [df_pred], weighted=True)[0],
}
# segmentations metrics
if not depths:
return all_metrics
else:
# checks
if avol is None or avol.shape != (528, 320, 456):
raise ValueError("Incorrectly shaped annotation volume or not provided")
# Prepare inverses
if deltas_true_inv is not None:
df_true_inv = DisplacementField(
deltas_true_inv[..., 0], deltas_true_inv[..., 1]
)
else:
df_true_inv = df_true.pseudo_inverse(ds_f=ds_f)
if deltas_pred_inv is not None:
df_pred_inv = DisplacementField(
deltas_pred_inv[..., 0], deltas_pred_inv[..., 1]
)
else:
df_pred_inv = df_pred.pseudo_inverse(ds_f=ds_f)
# Extract data
avol_ = annotation_volume() if avol is None else avol
collapsing_labels_ = (
segmentation_collapsing_labels()
if collapsing_labels is None
else collapsing_labels
)
# Compute
images = {}
for depth in depths:
segm_ref = find_labels_dic(avol_[p // 25], collapsing_labels_, depth)
segm_true = df_true_inv.warp_annotation(segm_ref)
segm_pred = df_pred_inv.warp_annotation(segm_ref)
images[depth] = (segm_true, segm_pred)
all_metrics["iou_{}".format(depth)] = iou_score(
np.array([segm_true]),
np.array([segm_pred]),
k=None,
excluded_labels=[0],
)[0]
all_metrics["dice_{}".format(depth)] = dice_score(
np.array([segm_true]),
np.array([segm_pred]),
k=None,
excluded_labels=[0],
)[0]
return all_metrics, images