Source code for torch_uncertainty.datasets.classification.uci.online_shoppers
from collections.abc import Callable
from pathlib import Path
import pandas as pd
import torch
from .uci_classification import UCIClassificationDataset
[docs]class OnlineShoppers(UCIClassificationDataset):
"""The Online Shoppers Intention UCI classification dataset.
Args:
root (str): Root directory of the datasets.
train (bool, optional): If True, creates dataset from training set,
otherwise creates from test set.
transform (callable, optional): A function/transform that takes in a
numpy array and returns a transformed version.
target_transform (callable, optional): A function/transform that takes
in the target and transforms it.
download (bool, optional): If true, downloads the dataset from the
internet and puts it in root directory. If dataset is already
downloaded, it is not downloaded again.
binary (bool, optional): Whether to use binary classification. Defaults
to ``True``.
Note - License:
The licenses of the datasets may differ from TorchUncertainty's
license. Check before use.
"""
md5_zip = "d835049e5f428f3b8cb8a6e6937f5537"
url = "https://archive.ics.uci.edu/static/public/468/online+shoppers+purchasing+intention+dataset.zip"
dataset_name = "online_shoppers"
filename = "online_shoppers_intention.csv"
num_features = 28
def __init__(
self,
root: Path | str,
transform: Callable | None = None,
target_transform: Callable | None = None,
binary: bool = True,
download: bool = False,
train: bool = True,
test_split: float = 0.2,
split_seed: int = 21893027,
) -> None:
super().__init__(
root,
transform,
target_transform,
binary,
download,
train,
test_split,
split_seed,
)
def _make_dataset(self) -> None:
"""Create dataset from extracted files."""
data = pd.read_csv(
self.root / self.dataset_name / self.filename,
sep=",",
true_values=["TRUE"],
false_values=["FALSE"],
)
self.targets = torch.as_tensor(data["Revenue"].values, dtype=torch.long)
data = pd.get_dummies(data).astype(float)
data = data.drop(columns=["Revenue"])
self.data = torch.as_tensor(data.values, dtype=torch.float32)
self.num_features = self.data.shape[1]