Source code for ordinal_xai.models.onn

"""
Ordinal Neural Network (ONN) for ordinal regression.

This module implements an Ordinal Neural Network model for ordinal regression,
using a fully connected neural network based on the skorch library with a triangular loss function from the dlordinal library (Berchez et al., 2025). The model
is designed to handle ordinal data by incorporating the ordinal nature of the
target variable into the loss function.

The model is implemented as a scikit-learn compatible estimator, allowing it to be
used with scikit-learn's pipeline and cross-validation tools.
"""

from typing import Optional, List, Dict, Union, Callable, Tuple
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.base import BaseEstimator
from ..utils.data_utils import transform_features
from sklearn.utils.validation import check_X_y, check_is_fitted
from skorch import NeuralNet
from skorch.callbacks import EarlyStopping
from dlordinal.losses import CDWCELoss
from dlordinal.output_layers import StickBreakingLayer, CLM
from .base_model import BaseOrdinalModel

class FCNet(nn.Module):
    """
    Fully Connected Neural Network for ordinal regression.
    
    This class implements a feed-forward neural network with configurable
    hidden layers, dropout, and ReLU activation functions. The network outputs
    logits that can be used with various ordinal loss functions.
    
    Parameters
    ----------
    input_dim : int
        Number of input features
    num_classes : int
        Number of ordinal classes
    hidden_layers : List[int]
        List of hidden layer sizes
    dropout : float
        Dropout rate for regularization
        
    Attributes
    ----------
    network : nn.Sequential
        The neural network architecture
    """
    
    def __init__(self, input_dim: int, num_classes: int, hidden_layers: List[int], dropout: float, output_layer: nn.Module):
        """
        Initialize the neural network architecture.
        
        Parameters
        ----------
        input_dim : int
            Number of input features
        num_classes : int
            Number of ordinal classes
        hidden_layers : List[int]
            List of hidden layer sizes
        dropout : float
            Dropout rate for regularization
        """
        super().__init__()
        layers = []
        prev_dim = input_dim
        for h in hidden_layers:
            layers.append(nn.Linear(prev_dim, h))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            prev_dim = h
        layers.append(output_layer)
        self.network = nn.Sequential(*layers)
    
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Forward pass through the network.
        
        Parameters
        ----------
        X : torch.Tensor
            Input tensor of shape (batch_size, input_dim)
            
        Returns
        -------
        torch.Tensor
            Output tensor of shape (batch_size, num_classes) containing logits
        """
        return self.network(X)

[docs] class ONN(BaseEstimator, BaseOrdinalModel): """ Ordinal Neural Network for ordinal regression. This class implements an ordinal regression model using a neural network with a triangular loss function. The model uses a fully connected network with configurable architecture and training parameters. Parameters ---------- hidden_layers : List[int], default=[64, 64] List of hidden layer sizes dropout : float, default=0.2 Dropout rate for regularization max_epochs : int, default=10000 Maximum number of training epochs batch_size : int, default=32 Batch size for training lr : float, default=0.001 Learning rate for optimization patience : int, default=10 Number of epochs to wait for improvement before early stopping min_delta : float, default=0.0001 Minimum change in validation loss to be considered as improvement verbose : int, default=2 Verbosity level for training progress Attributes ---------- feature_names_ : list Names of features used during training n_features_in_ : int Number of features seen during training ranks_ : ndarray Unique ordinal class labels _model : NeuralNet The fitted skorch neural network _encoder : OneHotEncoder Encoder for categorical features _scaler : StandardScaler Scaler for numerical features is_fitted_ : bool Whether the model has been fitted Notes ----- - The model handles both categorical and numerical features automatically - Categorical features are one-hot encoded - Numerical features are standardized - Uses early stopping to prevent overfitting - Automatically uses GPU if available """
[docs] def __init__( self, hidden_layers: List[int] = [64, 64], output_layer: nn.Module = StickBreakingLayer, loss_function: nn.Module = CDWCELoss, dropout: float = 0.2, max_epochs: int = 1000, batch_size: int = 32, lr: float = 0.0001, optimizer: torch.optim.Optimizer = torch.optim.Adam, patience: int = 10, min_delta: float = 0.0001, verbose: int = 2 ): """ Initialize the Ordinal Neural Network. Parameters ---------- hidden_layers : List[int], default=[64, 64] List of hidden layer sizes dropout : float, default=0.2 Dropout rate for regularization max_epochs : int, default=1000 Maximum number of training epochs batch_size : int, default=32 Batch size for training lr : float, default=0.001 Learning rate for optimization optimizer: torch.optim.Optimizer, default=torch.optim.Adam Optimizer for optimization patience : int, default=10 Number of epochs to wait for improvement before early stopping min_delta : float, default=0.0001 Minimum change in validation loss to be considered as improvement verbose : int, default=2 Verbosity level for training progress """ super().__init__() self.hidden_layers = hidden_layers self.output_layer = output_layer self.loss_function = loss_function self.dropout = dropout self.max_epochs = max_epochs self.batch_size = batch_size self.lr = lr self.optimizer = optimizer self.patience = patience self.min_delta = min_delta self.verbose = verbose self._model = None self._encoder = None self._scaler = None self.is_fitted_ = False
[docs] def get_params(self, deep: bool = True) -> Dict[str, any]: """ Get parameters for this estimator. Parameters ---------- deep : bool, default=True If True, will return the parameters for this estimator and contained subobjects that are estimators. Returns ------- dict Parameter names mapped to their values """ return { "hidden_layers": self.hidden_layers, "dropout": self.dropout, "max_epochs": self.max_epochs, "batch_size": self.batch_size, "lr": self.lr, "patience": self.patience, "min_delta": self.min_delta, "verbose": self.verbose }
[docs] def set_params(self, **params: any) -> "ONN": """ Set the parameters of this estimator. Parameters ---------- **params : dict Estimator parameters Returns ------- self : ONN The estimator instance """ for key, value in params.items(): setattr(self, key, value) return self
[docs] def fit(self, X: pd.DataFrame, y: pd.Series) -> "ONN": """ Fit the Ordinal Neural Network model. This method fits the model to the training data, handling both categorical and numerical features appropriately. It uses early stopping to prevent overfitting and automatically selects the best device (CPU/GPU) for training. Parameters ---------- X : pd.DataFrame of shape (n_samples, n_features) Training data y : pd.Series of shape (n_samples,) Target values Returns ------- self : ONN The fitted model Raises ------ ValueError If the input data contains invalid values RuntimeError If there are issues with the neural network training """ # Store feature names and ranks self.feature_names_ = X.columns.tolist() self.n_features_in_ = X.shape[1] self.ranks_ = np.unique(y) # Transform input data X_transformed = self.transform(X, fit=True) # Convert to torch tensors X_tensor = torch.FloatTensor(X_transformed.values) y_tensor = torch.LongTensor(y.values) # Initialize model input_dim = X_transformed.shape[1] num_classes = len(self.ranks_) self.output_layer = self.output_layer(num_classes=num_classes,input_shape=self.hidden_layers[-1]) net = FCNet(input_dim, num_classes, self.hidden_layers, self.dropout, self.output_layer) # Initialize early stopping callback early_stopping = EarlyStopping( monitor='valid_loss', patience=self.patience, threshold=self.min_delta, threshold_mode='rel', lower_is_better=True ) # Initialize skorch neural network self._model = NeuralNet( module=net, criterion=self.loss_function(num_classes=num_classes), optimizer=self.optimizer, max_epochs=self.max_epochs, batch_size=self.batch_size, lr=self.lr, device='cuda' if torch.cuda.is_available() else 'cpu', callbacks=[early_stopping], verbose=self.verbose ) # Fit the model self._model.fit(X_tensor, y_tensor) # Set fitted flag self.is_fitted_ = True return self
[docs] def predict(self, X: pd.DataFrame) -> np.ndarray: """ Predict ordinal class labels. Parameters ---------- X : pd.DataFrame of shape (n_samples, n_features) Samples to predict Returns ------- ndarray of shape (n_samples,) Predicted ordinal class labels Raises ------ NotFittedError If the model has not been fitted """ return self.predict_proba(X).argmax(axis=1)
[docs] def predict_proba(self, X: pd.DataFrame) -> np.ndarray: """ Predict class probabilities. Parameters ---------- X : pd.DataFrame of shape (n_samples, n_features) Samples to predict probabilities for Returns ------- ndarray of shape (n_samples, n_classes) Predicted class probabilities Raises ------ NotFittedError If the model has not been fitted """ check_is_fitted(self) X_transformed = self.transform(X, fit=False) X_tensor = torch.FloatTensor(X_transformed.values) # Get logits and convert to probabilities with torch.no_grad(): logits = self._model.predict(X_tensor) probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True) return np.array(probs)
[docs] def transform(self, X: pd.DataFrame, fit: bool = False, no_scaling: bool = False) -> pd.DataFrame: """ Transform input data into the format expected by the model. This method handles both categorical and numerical features: - Categorical features are one-hot encoded - Numerical features are standardized (unless no_scaling=True) Parameters ---------- X : pd.DataFrame of shape (n_samples, n_features) Input data to transform fit : bool, default=False Whether to fit new encoder/scaler or use existing ones no_scaling : bool, default=False Whether to skip scaling of numerical features Returns ------- pd.DataFrame Transformed data Raises ------ ValueError If the input data has different features than training data """ X_transformed, encoder, scaler = transform_features( X, fit=fit, encoder=self._encoder, scaler=self._scaler, no_scaling=no_scaling ) if fit: self._encoder = encoder self._scaler = scaler return X_transformed