Source code for torch_uncertainty.post_processing.calibration.matrix_scaler

from typing import Literal

import torch
from torch import Tensor, device, nn
from torch.nn.functional import linear

from .scaler import Scaler
from .utils import _check_classes


[docs] class MatrixScaler(Scaler): def __init__( self, num_classes: int, model: nn.Module | None = None, init_weight_temperature: float | Tensor = 1, init_bias_temperature: float | Tensor | None = None, lr: float = 0.1, max_iter: int = 200, eps: float = 1e-8, device: Literal["cpu", "cuda"] | device | None = None, ) -> None: r"""Matrix scaling post-processing for calibrated probabilities. Generalises temperature and vector scaling by applying a full affine transformation to the logits before the softmax: .. math:: \tilde{\mathbf{p}}(\mathbf{x}) = \mathrm{softmax}\!\left(\mathbf{W} \mathbf{z}(\mathbf{x}) + \mathbf{b}\right), where :math:`\mathbf{W} \in \mathbb{R}^{C \times C}` and :math:`\mathbf{b} \in \mathbb{R}^C` are fit by minimising the cross-entropy on a held-out calibration set. Matrix scaling has :math:`C^2 + C` parameters and can therefore overfit on small calibration sets — consider :class:`VectorScaler` or :class:`DirichletScaler` when calibration data is scarce. Args: num_classes: Number of classes :math:`C`. model: Model to calibrate. Defaults to ``None``. init_weight_temperature: Initial value for the weight matrix (used as :math:`1/T \cdot \mathbf{I}`). Defaults to ``1``. init_bias_temperature: Initial value for the bias. The inverse bias will be set to the ``0`` vector if set to ``None``. Defaults to ``None``. lr: Learning rate for the optimizer. Defaults to ``0.1``. max_iter: Maximum number of iterations for the optimizer. Defaults to ``200``. eps: Small value for stability. Defaults to ``1e-8``. device: Device to use for optimization. Defaults to ``None``. References: [1] `Guo, C., Pleiss, G., Sun, Y., & Weinberger, K. Q. (2017). On calibration of modern neural networks. ICML 2017 <https://arxiv.org/abs/1706.04599>`_. Warning: For binary models, a sigmoid is applied before the prediction is transposed to the 2-class case. """ super().__init__(model=model, lr=lr, max_iter=max_iter, eps=eps, device=device) _check_classes(num_classes) self.num_classes = num_classes self.set_temperature(init_weight_temperature, init_bias_temperature)
[docs] def set_temperature(self, val_weight: float | Tensor, val_bias: float | Tensor | None) -> None: """Set the temperature matrix to a given value. Args: val_weight: Weight temperature value. val_bias: Bias temperature value. """ eye = torch.eye(self.num_classes, device=self.device) self.inv_temperature_weight = nn.Parameter( eye / val_weight, requires_grad=True, ) if val_bias is None: bias = torch.zeros(self.num_classes, device=self.device) else: bias = torch.ones(self.num_classes, device=self.device) / val_bias self.inv_temperature_bias = nn.Parameter( bias, requires_grad=True, ) self.trained = False
def _scale(self, logits: Tensor) -> Tensor: return linear(logits, self.inv_temperature_weight, self.inv_temperature_bias) @property def inv_temperature(self) -> list: return [self.inv_temperature_weight, self.inv_temperature_bias] @property def temperature(self) -> list: return [torch.inverse(self.inv_temperature_weight), 1 / self.inv_temperature_bias]