Source code for torch_uncertainty.datasets.classification.uci.dota2_games
import logging
from collections.abc import Callable
from pathlib import Path
import numpy as np
import pandas as pd
import torch
from torchvision.datasets.utils import download_and_extract_archive
from .uci_classification import UCIClassificationDataset
[docs]
class DOTA2Games(UCIClassificationDataset):
md5_zip = "896623c082b062f56b9c49c6c1fc0bf7"
url = "https://archive.ics.uci.edu/static/public/367/dota2+games+results.zip"
dataset_name = "dota2+games+results"
filename = "dota2Train.csv"
num_features = 116
need_split = False
def __init__(
self,
root: Path | str,
transform: Callable | None = None,
target_transform: Callable | None = None,
binary: bool = True,
download: bool = False,
train: bool = True,
test_split: float = 0.2,
split_seed: int = 21893027,
) -> None:
"""The DOTA 2 Games UCI classification dataset.
Args:
root (str | Path): Root directory of the datasets.
train (bool, optional): If ``True``, creates dataset from training set,
otherwise creates from test set.
transform (callable, optional): A function/transform that takes in a
numpy array and returns a transformed version. Defaults to ``None``.
target_transform (callable, optional): A function/transform that takes
in the target and transforms it. Defaults to ``None``.
download (bool, optional): If ``True``, downloads the dataset from the
internet and puts it in root directory. If dataset is already
downloaded, it is not downloaded again. Defaults to ``False``.
binary (bool, optional): Whether to use binary classification. Defaults
to ``True``. Defaults to ``True``.
test_split (float, optional): The fraction of the dataset to use as test set.
Defaults to ``0.2``.
split_seed (int, optional): The random seed for splitting the dataset.
Defaults to ``21893027``.
Note - License:
The licenses of the datasets may differ from TorchUncertainty's
license. Check before use.
"""
super().__init__(
root,
transform,
target_transform,
binary,
download,
train,
test_split,
split_seed,
)
[docs]
def download(self) -> None:
"""Download and extract dataset."""
if self._check_integrity():
logging.info("Files already downloaded and verified")
return
download_and_extract_archive(
self.url,
download_root=self.root,
extract_root=self.root / "dota2_games",
filename="dota2+games+results.zip",
md5=self.md5_zip,
)
def _make_dataset(self) -> None:
"""Create dataset from extracted files."""
path = self.root / "dota2_games" / ("dota2Train.csv" if self.train else "dota2Test.csv")
data = pd.read_csv(path, sep=",", header=None)
data[0] = np.where(data[0] == 1, 1, 0)
self.targets = torch.as_tensor(data[0].values, dtype=torch.long)
self.data = data.drop(columns=[0])
categorical_columns = self.data.select_dtypes(include="object").columns
for col in categorical_columns:
if self.data[col].nunique() == 2:
self.data[col] = np.where(self.data[col] == "yes", 1, 0)
self.data = torch.as_tensor(
pd.get_dummies(self.data).astype(float).values, dtype=torch.float32
)
self.num_features = self.data.shape[1]