Source code for torch_uncertainty.datasets.classification.tabular.credit_approval

import pandas as pd
import torch

from .base import TabularClassificationDataset


[docs] class CreditApproval(TabularClassificationDataset): """The UCI Credit Approval dataset. Predicts credit card application approval. Features are anonymised; missing values (``?``) are imputed with the column mean for numeric attributes and the mode for categorical ones. Reference: J.R. Quinlan, *Simplifying Decision Trees*, IJMMS, 1987. Note: The licenses of the datasets may differ from TorchUncertainty's license. Check before use. """ url = "https://archive.ics.uci.edu/static/public/27/credit+approval.zip" dataset_name = "credit_approval" filename = "crx.data" def _make_dataset(self) -> None: data = pd.read_csv( self.root / self.dataset_name / self.filename, header=None, na_values=["?"], ) # Target is the last column: '+' → 1, '-' → 0 self.targets = torch.as_tensor( (data.iloc[:, -1] == "+").astype(int).values.copy(), dtype=torch.long ) data = data.iloc[:, :-1] # Impute missing values for col in data.columns: if not pd.api.types.is_numeric_dtype(data[col]): data[col] = data[col].fillna(data[col].mode()[0]) else: data[col] = data[col].fillna(data[col].mean()) self.data = torch.as_tensor( pd.get_dummies(data).astype(float).values.copy(), dtype=torch.float32 ) self.num_features = self.data.shape[1]