Source code for torch_uncertainty.datasets.classification.tabular.credit_g

import pandas as pd
import torch

from .base import TabularClassificationDataset


[docs] class GermanCredit(TabularClassificationDataset): """The UCI Statlog German Credit dataset. Predicts credit risk (good/bad). Reads the ``german.data`` file, whose categorical attributes are encoded as string codes (``A11``, ``A34`` ...) and are one-hot expanded via :func:`pandas.get_dummies`; numeric attributes are kept as-is. Reference: H. Hofmann, *Statlog (German Credit Data)*, UCI ML Repository, 1994. Note: The licenses of the datasets may differ from TorchUncertainty's license. Check before use. """ url = "https://archive.ics.uci.edu/static/public/144/statlog+german+credit+data.zip" dataset_name = "german_credit" filename = "german.data" def _make_dataset(self) -> None: data = pd.read_csv( self.root / self.dataset_name / self.filename, sep=r"\s+", header=None, ) # Last column: 1 = good credit → 0, 2 = bad credit → 1 self.targets = torch.as_tensor((data.iloc[:, -1].values - 1).copy(), dtype=torch.long) data = data.iloc[:, :-1] self.data = torch.as_tensor( pd.get_dummies(data).astype(float).values.copy(), dtype=torch.float32 ) self.num_features = self.data.shape[1]