Source code for combss.metrics

"""
combss.metrics.py

This private module contains logic for computing performance metrics for variable selection.
Metrics computed:
- Relative predition error
- Matthew's Correlation Coefficient 
- Accuracy
- Sensitivity
- Specificity
- F1 Score
- Precision
"""

import numpy as np


[docs]
def binary_confusion_matrix(y_true, y_pred):
    """
    Compute confusion matrix for binary classification.
    
    Args:
        y_true (np.ndarray): Ground truth (0 or 1).
        y_pred (np.ndarray): Predicted labels (0 or 1).
    
    Returns:
        np.ndarray: 2x2 confusion matrix [[TN, FP], [FN, TP]].
    """
    y_true = np.asarray(y_true).ravel()
    y_pred = np.asarray(y_pred).ravel()
    
    TP = np.sum((y_true == 1) & (y_pred == 1))
    TN = np.sum((y_true == 0) & (y_pred == 0))
    FP = np.sum((y_true == 0) & (y_pred == 1))
    FN = np.sum((y_true == 1) & (y_pred == 0))
    
    return np.array([[TN, FP],
                    [FN, TP]])




[docs]
def performance_metrics(data_X, beta_true, beta_pred):
    
    """ Computes the evaluation metrics for COMBSS.

	Parameters
	----------
	data_X : array-like of shape (n_samples, n_covariates)
        The design matrix, where `n_samples` is the number of samples observed
        and `n_covariates` is the number of covariates measured in each sample.

    beta_true : array-like of shape (n_covariates, 1)
        The true value of beta used in the generation of data.

    beta_pred : array-like of shape (n_covariates, 1)
        The predicted value of beta generated by COMBSS.


	Returns
	-------
    array-like of floats, [pe, MCC, accuracy, sensitivity, specificity, f1_score, precision], where

	pe : float
        The model's relative prediction error, expressed as a fraction where the L-2 norm of the difference 
        between the fitted values and true predicted values is divided by the L-2 norm of the true predicted values.

    MCC : float
        The model's Matthew's Correlation Coefficient.

    acc : float
        The accuracy of the particular model, calculated as proportion of total instances where 
        the model correctly classifies whether or not a predictor is selected in, or rejected from 
        the true model, calculated as a quantity between 0 and 1.

    sens : float
        The sensitivity of the particular model, calculated as the proportion of total instances
        where the model correctly classifies the inclusion of predictors that belong in the true
        model, calculated as a quantity between 0 and 1.

    spec : float
        The specificity of the particular model, calculated as the proportion of total instances 
        where the model correctly classifies the rejection of predictors that do not belong in 
        the true model, calculated as a quantity between 0 and 1.

    f1 : float
        The F1 Score of the particular model.

    prec : float
        The precision of the model, calculated as the proportion at which the model correctly 
        includes a true predictor in it's predicted model, calculated as a quantity between 0 and 1.
    """
  
    s_true = [np.array(beta_true) != 0][0]
    s_pred = [np.array(beta_pred) != 0][0]
    c_matrix = binary_confusion_matrix(s_true, s_pred)
    
    TN = c_matrix[0, 0]
    FN = c_matrix[1, 0]
    FP = c_matrix[0, 1]
    TP = c_matrix[1, 1]
    
    acc = (TP + TN)/(TP + TN + FP + FN)
    sens = TP/(TP + FN)
    spec = TN/(TN + FP)
    
    # If the model fails to predict any elements within the predicted model, we take Prediction Error = 1, Precision = 0 and MCC = 0.
    if (sum(s_pred) == 0):
        pe = 1
        prec = 0
        mcc = 0
    else:
        Xbeta_true = data_X@beta_true
        pe = np.square(Xbeta_true - data_X@beta_pred).mean()/np.square(Xbeta_true).mean()        
        prec =  TP/(TP + FP)
        mcc = (TP*TN-FP*FN)/np.sqrt((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN))
    if TP == 0:
        # If the model fails to recover any existing elements of the true model, we take F1 Score = 0.
        f1 = 0.0
    else:
        f1 = TP/(TP + (FP + FN)/2)
    
    result = {
        "pe" : pe,
        "mcc" : mcc,
        "acc" : acc,
        "sens" : sens,
        "spec" : spec,
        "f1" : f1,
        "prec" : prec
        }
        
    return result