Source code for torch_uncertainty.datasets.classification.tabular.pima_diabetes

import torch

from .base import TabularClassificationDataset, load_arff


[docs] class PimaDiabetes(TabularClassificationDataset): """The UCI Pima Indians Diabetes dataset. Predicts diabetes onset from clinical measurements. All features are numeric. The dataset is downloaded from the OpenML static data server (dataset 37). Reference: J.W. Smith et al., *Using the ADAP Learning Algorithm to Forecast the Onset of Diabetes Mellitus*, Proc. SCAMC, 1988. Note: The licenses of the datasets may differ from TorchUncertainty's license. Check before use. """ # OpenML dataset 37 — static CDN, no database dependency url = "https://data.openml.org/datasets/0000/0037/dataset_37.arff" dataset_name = "pima_diabetes" filename = "pima_diabetes.arff" is_archive = False def _make_dataset(self) -> None: df = load_arff(self.root / self.dataset_name / self.filename) target_col = "class" target_vals = df[target_col].str.strip() # OpenML encodes: tested_negative → 0, tested_positive → 1 self.targets = torch.as_tensor( (target_vals == "tested_positive").astype(int).values.copy(), dtype=torch.long ) df = df.drop(columns=[target_col]) self.data = torch.as_tensor(df.values.astype(float).copy(), dtype=torch.float32) self.num_features = self.data.shape[1]