"""
Ordinal Neural Network (ONN) for ordinal regression.
This module implements an Ordinal Neural Network model for ordinal regression,
using a fully connected neural network based on the skorch library with a triangular loss function from the dlordinal library (Berchez et al., 2025). The model
is designed to handle ordinal data by incorporating the ordinal nature of the
target variable into the loss function.
The model is implemented as a scikit-learn compatible estimator, allowing it to be
used with scikit-learn's pipeline and cross-validation tools.
"""
from typing import Optional, List, Dict, Union, Callable, Tuple
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.base import BaseEstimator
from ..utils.data_utils import transform_features
from sklearn.utils.validation import check_X_y, check_is_fitted
from skorch import NeuralNet
from skorch.callbacks import EarlyStopping
from dlordinal.losses import CDWCELoss
from dlordinal.output_layers import StickBreakingLayer, CLM
from .base_model import BaseOrdinalModel
class FCNet(nn.Module):
"""
Fully Connected Neural Network for ordinal regression.
This class implements a feed-forward neural network with configurable
hidden layers, dropout, and ReLU activation functions. The network outputs
logits that can be used with various ordinal loss functions.
Parameters
----------
input_dim : int
Number of input features
num_classes : int
Number of ordinal classes
hidden_layers : List[int]
List of hidden layer sizes
dropout : float
Dropout rate for regularization
Attributes
----------
network : nn.Sequential
The neural network architecture
"""
def __init__(self, input_dim: int, num_classes: int, hidden_layers: List[int], dropout: float, output_layer: nn.Module):
"""
Initialize the neural network architecture.
Parameters
----------
input_dim : int
Number of input features
num_classes : int
Number of ordinal classes
hidden_layers : List[int]
List of hidden layer sizes
dropout : float
Dropout rate for regularization
"""
super().__init__()
layers = []
prev_dim = input_dim
for h in hidden_layers:
layers.append(nn.Linear(prev_dim, h))
layers.append(nn.ReLU())
layers.append(nn.Dropout(dropout))
prev_dim = h
layers.append(output_layer)
self.network = nn.Sequential(*layers)
def forward(self, X: torch.Tensor) -> torch.Tensor:
"""
Forward pass through the network.
Parameters
----------
X : torch.Tensor
Input tensor of shape (batch_size, input_dim)
Returns
-------
torch.Tensor
Output tensor of shape (batch_size, num_classes) containing logits
"""
return self.network(X)
[docs]
class ONN(BaseEstimator, BaseOrdinalModel):
"""
Ordinal Neural Network for ordinal regression.
This class implements an ordinal regression model using a neural network
with a triangular loss function. The model uses a fully connected network
with configurable architecture and training parameters.
Parameters
----------
hidden_layers : List[int], default=[64, 64]
List of hidden layer sizes
dropout : float, default=0.2
Dropout rate for regularization
max_epochs : int, default=10000
Maximum number of training epochs
batch_size : int, default=32
Batch size for training
lr : float, default=0.001
Learning rate for optimization
patience : int, default=10
Number of epochs to wait for improvement before early stopping
min_delta : float, default=0.0001
Minimum change in validation loss to be considered as improvement
verbose : int, default=2
Verbosity level for training progress
Attributes
----------
feature_names_ : list
Names of features used during training
n_features_in_ : int
Number of features seen during training
ranks_ : ndarray
Unique ordinal class labels
_model : NeuralNet
The fitted skorch neural network
_encoder : OneHotEncoder
Encoder for categorical features
_scaler : StandardScaler
Scaler for numerical features
is_fitted_ : bool
Whether the model has been fitted
Notes
-----
- The model handles both categorical and numerical features automatically
- Categorical features are one-hot encoded
- Numerical features are standardized
- Uses early stopping to prevent overfitting
- Automatically uses GPU if available
"""
[docs]
def __init__(
self,
hidden_layers: List[int] = [64, 64],
output_layer: nn.Module = StickBreakingLayer,
loss_function: nn.Module = CDWCELoss,
dropout: float = 0.2,
max_epochs: int = 1000,
batch_size: int = 32,
lr: float = 0.0001,
optimizer: torch.optim.Optimizer = torch.optim.Adam,
patience: int = 10,
min_delta: float = 0.0001,
verbose: int = 2
):
"""
Initialize the Ordinal Neural Network.
Parameters
----------
hidden_layers : List[int], default=[64, 64]
List of hidden layer sizes
dropout : float, default=0.2
Dropout rate for regularization
max_epochs : int, default=1000
Maximum number of training epochs
batch_size : int, default=32
Batch size for training
lr : float, default=0.001
Learning rate for optimization
optimizer: torch.optim.Optimizer, default=torch.optim.Adam
Optimizer for optimization
patience : int, default=10
Number of epochs to wait for improvement before early stopping
min_delta : float, default=0.0001
Minimum change in validation loss to be considered as improvement
verbose : int, default=2
Verbosity level for training progress
"""
super().__init__()
self.hidden_layers = hidden_layers
self.output_layer = output_layer
self.loss_function = loss_function
self.dropout = dropout
self.max_epochs = max_epochs
self.batch_size = batch_size
self.lr = lr
self.optimizer = optimizer
self.patience = patience
self.min_delta = min_delta
self.verbose = verbose
self._model = None
self._encoder = None
self._scaler = None
self.is_fitted_ = False
[docs]
def get_params(self, deep: bool = True) -> Dict[str, any]:
"""
Get parameters for this estimator.
Parameters
----------
deep : bool, default=True
If True, will return the parameters for this estimator and
contained subobjects that are estimators.
Returns
-------
dict
Parameter names mapped to their values
"""
return {
"hidden_layers": self.hidden_layers,
"dropout": self.dropout,
"max_epochs": self.max_epochs,
"batch_size": self.batch_size,
"lr": self.lr,
"patience": self.patience,
"min_delta": self.min_delta,
"verbose": self.verbose
}
[docs]
def set_params(self, **params: any) -> "ONN":
"""
Set the parameters of this estimator.
Parameters
----------
**params : dict
Estimator parameters
Returns
-------
self : ONN
The estimator instance
"""
for key, value in params.items():
setattr(self, key, value)
return self
[docs]
def fit(self, X: pd.DataFrame, y: pd.Series) -> "ONN":
"""
Fit the Ordinal Neural Network model.
This method fits the model to the training data, handling both categorical
and numerical features appropriately. It uses early stopping to prevent
overfitting and automatically selects the best device (CPU/GPU) for training.
Parameters
----------
X : pd.DataFrame of shape (n_samples, n_features)
Training data
y : pd.Series of shape (n_samples,)
Target values
Returns
-------
self : ONN
The fitted model
Raises
------
ValueError
If the input data contains invalid values
RuntimeError
If there are issues with the neural network training
"""
# Store feature names and ranks
self.feature_names_ = X.columns.tolist()
self.n_features_in_ = X.shape[1]
self.ranks_ = np.unique(y)
# Transform input data
X_transformed = self.transform(X, fit=True)
# Convert to torch tensors
X_tensor = torch.FloatTensor(X_transformed.values)
y_tensor = torch.LongTensor(y.values)
# Initialize model
input_dim = X_transformed.shape[1]
num_classes = len(self.ranks_)
self.output_layer = self.output_layer(num_classes=num_classes,input_shape=self.hidden_layers[-1])
net = FCNet(input_dim, num_classes, self.hidden_layers, self.dropout, self.output_layer)
# Initialize early stopping callback
early_stopping = EarlyStopping(
monitor='valid_loss',
patience=self.patience,
threshold=self.min_delta,
threshold_mode='rel',
lower_is_better=True
)
# Initialize skorch neural network
self._model = NeuralNet(
module=net,
criterion=self.loss_function(num_classes=num_classes),
optimizer=self.optimizer,
max_epochs=self.max_epochs,
batch_size=self.batch_size,
lr=self.lr,
device='cuda' if torch.cuda.is_available() else 'cpu',
callbacks=[early_stopping],
verbose=self.verbose
)
# Fit the model
self._model.fit(X_tensor, y_tensor)
# Set fitted flag
self.is_fitted_ = True
return self
[docs]
def predict(self, X: pd.DataFrame) -> np.ndarray:
"""
Predict ordinal class labels.
Parameters
----------
X : pd.DataFrame of shape (n_samples, n_features)
Samples to predict
Returns
-------
ndarray of shape (n_samples,)
Predicted ordinal class labels
Raises
------
NotFittedError
If the model has not been fitted
"""
return self.predict_proba(X).argmax(axis=1)
[docs]
def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
"""
Predict class probabilities.
Parameters
----------
X : pd.DataFrame of shape (n_samples, n_features)
Samples to predict probabilities for
Returns
-------
ndarray of shape (n_samples, n_classes)
Predicted class probabilities
Raises
------
NotFittedError
If the model has not been fitted
"""
check_is_fitted(self)
X_transformed = self.transform(X, fit=False)
X_tensor = torch.FloatTensor(X_transformed.values)
# Get logits and convert to probabilities
with torch.no_grad():
logits = self._model.predict(X_tensor)
probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
return np.array(probs)