Source code for torch_uncertainty.datasets.classification.tabular.dota2_games
import numpy as np
import pandas as pd
import torch
from torch import Tensor
from .base import TabularClassificationDataset
[docs]
class DOTA2Games(TabularClassificationDataset):
"""The UCI DOTA 2 Games Results dataset.
Predicts the winning team from hero selection in DOTA 2 matches. The
dataset is provided pre-split; standardization statistics are computed
from the training file.
Note:
The licenses of the datasets may differ from TorchUncertainty's
license. Check before use.
"""
md5_zip = "896623c082b062f56b9c49c6c1fc0bf7"
url = "https://archive.ics.uci.edu/static/public/367/dota2+games+results.zip"
dataset_name = "dota2_games"
filename = "dota2Train.csv"
num_features = 116
need_split = False
pre_split = True
def _read(self, fname: str) -> pd.DataFrame:
return pd.read_csv(self.root / self.dataset_name / fname, header=None)
def _split(self, df: pd.DataFrame) -> tuple[Tensor, Tensor]:
targets = torch.as_tensor(np.where(df.iloc[:, 0] == 1, 1, 0).copy(), dtype=torch.long)
features = df.drop(columns=[0])
data = torch.as_tensor(features.values.astype(float).copy(), dtype=torch.float32)
return data, targets
def _make_pre_split_dataset(self) -> tuple[Tensor, Tensor, Tensor, Tensor]:
train_data, train_targets = self._split(self._read("dota2Train.csv"))
test_data, test_targets = self._split(self._read("dota2Test.csv"))
self.num_features = train_data.shape[1]
return train_data, train_targets, test_data, test_targets