Source code for bt4vt.metrics

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# Created on 01-05-2022
# @author: wiebket, AnnaLesch

import numpy as np
import scipy as sp
import pandas as pd

#########################################
# In this section we compute performance evaluation metrics
# 1. Equal Error Rate
# 2. Minimum of the Detection Cost Function (mincdet)
#########################################


[docs]def compute_eer(fprs, fnrs, thresholds): """Computation of the Equal Error Rate and its threshold :param fprs: Array of False Positive Rates :type fprs: ndarray :param fnrs: Array of False Negative Rates :type fnrs: ndarray :param thresholds: Array of Threshold values corresponding to fprs and fnrs :type thresholds: ndarray :returns: eer, eer_threshold :rtype: float, float """ # Compute the equal error rate and its threshold min_ix = np.nanargmin(np.absolute((fnrs - fprs))) eer = max(fprs[min_ix], fnrs[min_ix]) * 100 eer_threshold = thresholds[min_ix] return eer, eer_threshold
[docs]def compute_min_cdet(fprs, fnrs, thresholds, dcf_p_target, dcf_c_fp, dcf_c_fn): """Computation of the minimum of the detection cost function and its threshold. Computation is performed as defined in the `NIST Speaker Recognition Evaluation Plan 2019 <https://www.nist.gov/itl/iad/mig/nist-2019-speaker-recognition-evaluation>`_ .. math:: C_{Det}(\\theta) = C_{FN} \\times P_{Target} \\times P_{FN}(\\theta) + C_{FP} \\times (1 - P_{Target}) \\times P_{FP}(\\theta) :param fprs: Array of False Positive Rates :type fprs: ndarray :param fnrs: Array of False Negative Rates :type fnrs: ndarray :param thresholds: Array of Threshold values corresponding to fprs and fnrs :type thresholds: ndarray :param dcf_p_target: [description] :type dcf_p_target: float :param dcf_c_fp: [description] :type dcf_c_fp: float :param dcf_c_fn: [description] :type dcf_c_fn: float :returns: min_cdet, min_cdet_threshold :rtype: float, float """ cdet = np.array([fnr * dcf_c_fn * dcf_p_target for fnr in fnrs]) + np.array( [fpr * dcf_c_fp * (1 - dcf_p_target) for fpr in fprs]) min_ix = np.nanargmin(cdet) min_cdet = cdet[min_ix] min_cdet_threshold = thresholds[min_ix] return min_cdet, min_cdet_threshold
[docs]def get_fpfn_at_threshold(fprs, fnrs, thresholds, threshold_value, ppf_norm=False): """Get the False Positive Rate and False Negative Rate at a given threshold value. :param fprs: Array of False Positive Rates :type fprs: ndarray :param fnrs: Array of False Negative Rates :type fnrs: ndarray :param thresholds: Array of Threshold values corresponding to fprs and fnrs :type thresholds: ndarray :param threshold_value: Threshold value to get fpr and fnr for i.e. min_cdet_threshold :type threshold_value: float :param ppf_norm: normalise the fpr and fnr values to the percent point function. Default is set to False. :type ppf_norm: bool :returns: fpr_at_threshold, fnr_at_threshold :rtype: float, float """ # Find the index in df that is closest to the SUBGROUP minimum threshold value threshold_diff = np.array([abs(i - threshold_value) for i in thresholds]) if ppf_norm: fpr_at_threshold = sp.stats.norm.ppf(fprs)[np.ndarray.argmin(threshold_diff)] fnr_at_threshold = sp.stats.norm.ppf(fnrs)[np.ndarray.argmin(threshold_diff)] else: fpr_at_threshold = fprs[np.ndarray.argmin(threshold_diff)] fnr_at_threshold = fnrs[np.ndarray.argmin(threshold_diff)] return fpr_at_threshold, fnr_at_threshold
[docs]def compute_cdet_at_threshold(fprs, fnrs, thresholds, threshold_value, dcf_p_target, dcf_c_fp, dcf_c_fn): """Computation of detection cost function at a given threshold. Computation is performed as defined in the `NIST Speaker Recognition Evaluation Plan 2019 <https://www.nist.gov/itl/iad/mig/nist-2019-speaker-recognition-evaluation>`_ .. math:: C_{Det}(\\theta) = C_{FN} \\times P_{Target} \\times P_{FN}(\\theta) + C_{FP} \\times (1 - P_{Target}) \\times P_{FP}(\\theta) :param fprs: Array of False Positive Rates :type fprs: ndarray :param fnrs: Array of False Negative Rates :type fnrs: ndarray :param thresholds: Array of Threshold values corresponding to fprs and fnrs :type thresholds: ndarray :param threshold_value: Threshold value to compute detection cost function for :type threshold_value: float :param dcf_p_target: [description] :type dcf_p_target: float :param dcf_c_fp: [description] :type dcf_c_fp: float :param dcf_c_fn: [description] :type dcf_c_fn: float :returns: cdet_at_threshold :rtype: float """ fpr_at_threshold, fnr_at_threshold = get_fpfn_at_threshold(fprs, fnrs, thresholds, threshold_value) cdet_at_threshold = fpr_at_threshold * dcf_c_fp * (1 - dcf_p_target) + fnr_at_threshold * dcf_c_fn * dcf_p_target return cdet_at_threshold
######################################### # In this section we compute bias metrics # 1. Ratio of group mincdet / average mincdet # 2. Ratio of group fp, fn rates / average fp, fn rates #########################################
[docs]def compute_metrics_ratios(metrics): """Computation of metric ratios defined as the subgroup metric scores divided by the average metric score. :param metrics: DataFrame that contains metric scores for the average evaluation and subgroup evaluations. The first row corresponds to the eer, all other rows correspond to the min_cdet scores with weights specified in the config file :type metrics: DataFrame :returns: metric_ratios :rtype: DataFrame """ metrics_ratios = metrics.iloc[1:, 1:].div(metrics['average'], axis=0) #metrics_ratios.insert(0, 'thresholds', metrics['thresholds']) -> use concat to avoid performance issues metrics_ratios = pd.concat([metrics_ratios, metrics["thresholds"].rename("thresholds")], axis=1) # transfer speaker groups from metrics to metrics_ratios metrics_ratios.loc[0] = metrics.loc[0] return metrics_ratios
def compute_fpfn_ratio(fpfnth, metrics, metrics_baseline, filter_keys: list, threshold_type): # nice function for future to understand the real life impact of threshold settings return