Source code for torch_uncertainty.datasets.classification.tabular.dota2_games

import numpy as np
import pandas as pd
import torch
from torch import Tensor

from .base import TabularClassificationDataset


[docs] class DOTA2Games(TabularClassificationDataset): """The UCI DOTA 2 Games Results dataset. Predicts the winning team from hero selection in DOTA 2 matches. The dataset is provided pre-split; standardization statistics are computed from the training file. Note: The licenses of the datasets may differ from TorchUncertainty's license. Check before use. """ md5_zip = "896623c082b062f56b9c49c6c1fc0bf7" url = "https://archive.ics.uci.edu/static/public/367/dota2+games+results.zip" dataset_name = "dota2_games" filename = "dota2Train.csv" num_features = 116 need_split = False pre_split = True def _read(self, fname: str) -> pd.DataFrame: return pd.read_csv(self.root / self.dataset_name / fname, header=None) def _split(self, df: pd.DataFrame) -> tuple[Tensor, Tensor]: targets = torch.as_tensor(np.where(df.iloc[:, 0] == 1, 1, 0).copy(), dtype=torch.long) features = df.drop(columns=[0]) data = torch.as_tensor(features.values.astype(float).copy(), dtype=torch.float32) return data, targets def _make_pre_split_dataset(self) -> tuple[Tensor, Tensor, Tensor, Tensor]: train_data, train_targets = self._split(self._read("dota2Train.csv")) test_data, test_targets = self._split(self._read("dota2Test.csv")) self.num_features = train_data.shape[1] return train_data, train_targets, test_data, test_targets