Source code for torch_uncertainty.datasets.classification.tabular.higgs

import torch

from .base import TabularClassificationDataset, load_arff


[docs] class HiggsBoson(TabularClassificationDataset): """The Higgs Boson dataset — small version (OpenML 23512, 98 050 samples). Predicts whether a collision event produces a Higgs boson or is background noise. All features are numerical. Downloaded from the OpenML repository as an ARFF file. Reference: Baldi et al., *Searching for Exotic Particles in High-Energy Physics with Deep Learning*, Nature Communications, 2014. Note: The licenses of the datasets may differ from TorchUncertainty's license. Check before use. """ # OpenML dataset 23512, file_id 2063675 url = "https://api.openml.org/data/v1/download/2063675" dataset_name = "higgs_boson" filename = "higgs.arff" is_archive = False def _make_dataset(self) -> None: df = load_arff(self.root / self.dataset_name / self.filename) target_col = "class" self.targets = torch.as_tensor( df[target_col].astype(float).astype(int).values.copy(), dtype=torch.long ) df = df.drop(columns=[target_col]) self.data = torch.as_tensor(df.values.astype(float).copy(), dtype=torch.float32) self.num_features = self.data.shape[1]