[docs]classUCIRegression(Dataset):"""The UCI regression datasets. Args: root (str): Root directory of the datasets. train (bool, optional): If True, creates dataset from training set, otherwise creates from test set. transform (callable, optional): A function/transform that takes in a numpy array and returns a transformed version. target_transform (callable, optional): A function/transform that takes in the target and transforms it. dataset_name (str, optional): The name of the dataset. One of "boston-housing", "concrete", "energy", "kin8nm", "naval-propulsion-plant", "power-plant", "protein", "wine-quality-red", and "yacht". download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. Note - Ethics: You may want to avoid using the boston-housing dataset because of ethical concerns. Note - License: The licenses of the datasets may differ from TorchUncertainty's license. Check before use. """root_appendix="uci_regression"uci_subsets=["boston","concrete","energy-efficiency","energy-prediction","kin8nm","naval-propulsion-plant","power-plant","protein","wine-quality-red","yacht",]md5_tgz=["d4accdce7a25600298819f8e28e8d593","eba3e28907d4515244165b6b2c311b7b","2018fb7b50778fdc1304d50a78874579","d0f0f8ceaaf45df2233ce0600097bd84","df08c665b7665809e74e32b107836a3a","54f4febcf51bdba12e1ca63e28b3e973","f5065a616eae05eb4ecae445ecf6e720","37bcb77a8abad274a987439e6a3de632","0ddfa7a9379510fe7ff88b9930e3c332","4e6727f462779e2d396e8f7d2ddb79a3",]urls=["https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data","https://archive.ics.uci.edu/static/public/165/concrete+compressive+strength.zip","https://archive.ics.uci.edu/static/public/242/energy+efficiency.zip","https://archive.ics.uci.edu/static/public/374/appliances+energy+prediction.zip","https://zenodo.org/records/14645866/files/kin8nm.csv","https://raw.githubusercontent.com/luishpinto/cm-naval-propulsion-plant/master/data.csv","https://archive.ics.uci.edu/static/public/294/combined+cycle+power+plant.zip","https://archive.ics.uci.edu/static/public/265/physicochemical+""properties+of+protein+tertiary+structure.zip","https://archive.ics.uci.edu/static/public/186/wine+quality.zip","https://archive.ics.uci.edu/static/public/243/yacht+hydrodynamics.zip",]def__init__(self,root:Path|str,transform:Callable|None=None,target_transform:Callable|None=None,dataset_name:str="energy",download:bool=False,seed:int=42,shuffle:bool=True,)->None:super().__init__()self.root=Path(root)self.transform=transformself.target_transform=target_transformself.seed=seedself.shuffle=shuffleifdataset_namenotinself.uci_subsets:raiseValueError(f"The dataset {dataset_name} is not implemented. ""`dataset_name` should be one of {self.uci_subsets}.")self.dataset_name=dataset_namedataset_id=self.uci_subsets.index(dataset_name)self.url=self.urls[dataset_id]self.start_filename=self.url.split("/")[-1]self.md5=self.md5_tgz[dataset_id]ifdownload:self.download()self._make_dataset()def__len__(self)->int:"""Get the length of the dataset."""returnself.data.shape[0]def_check_integrity(self)->bool:"""Check the integrity of the dataset(s)."""returncheck_integrity(self.root/self.root_appendix/Path(self.start_filename),self.md5,)def_standardize(self)->None:self.data=(self.data-self.data_mean)/self.data_stdself.targets=(self.targets-self.target_mean)/self.target_stddef_compute_statistics(self)->None:self.data_mean=self.data.mean(axis=0)self.data_std=self.data.std(axis=0)self.data_std[self.data_std==0]=1self.target_mean=self.targets.mean(axis=0)self.target_std=self.targets.std(axis=0)
[docs]defdownload(self)->None:"""Download and extract dataset."""ifself._check_integrity():logging.info("Files already downloaded and verified")returnifself.urlisNone:raiseValueError(f"The dataset {self.dataset_name} is not available for download.")download_root=self.root/self.root_appendix/self.dataset_nameifself.dataset_name=="boston":download_url(self.url,root=download_root,filename="housing.data",)elifself.dataset_name=="kin8nm":download_url(self.url,root=download_root,filename="kin8nm.csv",)elifself.dataset_name=="naval-propulsion-plant":download_url(self.url,root=download_root,filename="data.csv",)else:download_and_extract_archive(self.url,download_root=download_root,extract_root=download_root,filename=self.start_filename,md5=self.md5,)
def_make_dataset(self)->None:"""Create dataset from extracted files."""ifnotpandas_installed:# coverage: ignoreraiseImportError("Please install torch_uncertainty with the tabular option:""""pip install -U "torch_uncertainty[tabular]".""")path=self.root/self.root_appendix/self.dataset_nameifself.dataset_name=="boston":array=pd.read_table(path/"housing.data",names=boston_column_names,header=None,delim_whitespace=True,)elifself.dataset_name=="concrete":array=pd.read_excel(path/"Concrete_Data.xls").to_numpy()elifself.dataset_name=="energy-efficiency":array=pd.read_excel(path/"ENB2012_data.xlsx").to_numpy()elifself.dataset_name=="energy-prediction":array=pd.read_csv(path/"energydata_complete.csv")[energy_prediction_column_names].to_numpy()elifself.dataset_name=="kin8nm":array=pd.read_csv(path/"kin8nm.csv").to_numpy()elifself.dataset_name=="naval-propulsion-plant":df=pd.read_csv(path/"data.csv",header=None,sep=";",decimal=",")# convert Ex to 10^x and remove second targetarray=df.apply(pd.to_numeric,errors="coerce").to_numpy()[:,:-1]elifself.dataset_name=="protein":array=pd.read_csv(path/"CASP.csv",).to_numpy()elifself.dataset_name=="wine-quality-red":array=pd.read_csv(path/"winequality-red.csv",sep=";",).to_numpy()elifself.dataset_name=="yacht":array=pd.read_csv(path/"yacht_hydrodynamics.data",delim_whitespace=True,header=None,).to_numpy()else:raiseValueError("Dataset not implemented.")array=torch.as_tensor(array).float()ifself.dataset_name=="energy-efficiency":self.data=array[:,2:-3]self.targets=array[:,-2]else:self.data=array[:,:-1]self.targets=array[:,-1]self._compute_statistics()self._standardize()ifself.dataset_name=="energy-prediction":self.data=F.pad(self.data,(0,0,13,0),value=0)ifself.shuffle:gen=torch.Generator()gen.manual_seed(self.seed)indexes=torch.randperm(array.shape[0],generator=gen)array=array[indexes]def__getitem__(self,index:int)->tuple[torch.Tensor,torch.Tensor]:"""Get sample and target for a given index."""ifself.dataset_name=="energy-prediction":data=self.data[index:index+13,:]target=self.data[index:index+13,:]returndata,targetdata=self.data[index]ifself.transformisnotNone:data=self.transform(data)target=self.targets[index]ifself.target_transformisnotNone:target=self.target_transform(target)returndata,target