clarena.stl_datasets.tinyimagenet
The submodule in stl_datasets for TinyImageNet dataset.
1r""" 2The submodule in `stl_datasets` for TinyImageNet dataset. 3""" 4 5__all__ = ["TinyImageNet"] 6 7import logging 8from typing import Callable 9 10import torch 11from tinyimagenet import TinyImageNet as TinyImageNetRaw 12from torch.utils.data import Dataset, random_split 13from torchvision.transforms import transforms 14 15from clarena.stl_datasets.base import STLDatasetFromRaw 16 17# always get logger for built-in logging in each module 18pylogger = logging.getLogger(__name__) 19 20 21class TinyImageNet(STLDatasetFromRaw): 22 r"""TinyImageNet dataset. The [TinyImageNet dataset](http://vision.stanford.edu/teaching/cs231n/reports/2015/pdfs/yle_project.pdf) is smaller, more manageable version of the [Imagenet dataset](https://www.image-net.org). It consists of 100,000 training, 10,000 validation and 10,000 test images of 200 classes, each 64x64 color image.""" 23 24 original_dataset_python_class: type[Dataset] = TinyImageNetRaw 25 r"""The original dataset class.""" 26 27 def __init__( 28 self, 29 root: str, 30 validation_percentage: float, 31 batch_size: int = 1, 32 num_workers: int = 0, 33 custom_transforms: Callable | transforms.Compose | None = None, 34 repeat_channels: int | None = None, 35 to_tensor: bool = True, 36 resize: tuple[int, int] | None = None, 37 ) -> None: 38 r""" 39 **Args:** 40 - **root** (`str`): the root directory where the original TinyImageNet data 'tiny-imagenet-200/' live. 41 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 42 - **batch_size** (`int`): The batch size in train, val, test dataloader. 43 - **num_workers** (`int`): the number of workers for dataloaders. 44 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included. 45 - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. 46 - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True. 47 - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers. 48 """ 49 super().__init__( 50 root=root, 51 batch_size=batch_size, 52 num_workers=num_workers, 53 custom_transforms=custom_transforms, 54 repeat_channels=repeat_channels, 55 to_tensor=to_tensor, 56 resize=resize, 57 ) 58 59 self.validation_percentage: float = validation_percentage 60 r"""The percentage to randomly split some training data into validation data.""" 61 62 def prepare_data(self) -> None: 63 r"""Download the original TinyImageNet dataset if haven't.""" 64 65 TinyImageNetRaw(root=self.root) 66 67 pylogger.debug( 68 "The original TinyImageNet dataset has been downloaded to %s.", 69 self.root, 70 ) 71 72 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 73 """Get the training and validation dataset. 74 75 **Returns:** 76 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset. 77 """ 78 dataset_train_and_val = TinyImageNetRaw( 79 root=self.root, 80 split="train", 81 transform=self.train_and_val_transforms(), 82 target_transform=self.target_transform(), 83 ) 84 85 return random_split( 86 dataset_train_and_val, 87 lengths=[1 - self.validation_percentage, self.validation_percentage], 88 generator=torch.Generator().manual_seed( 89 42 90 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 91 ) 92 93 def test_dataset(self) -> Dataset: 94 r"""Get the test dataset. 95 96 **Returns:** 97 - **test_dataset** (`Dataset`): the test dataset. 98 """ 99 dataset_test = TinyImageNetRaw( 100 root=self.root, 101 split="val", 102 transform=self.test_transforms(), 103 target_transform=self.target_transform(), 104 ) 105 106 return dataset_test
class
TinyImageNet(clarena.stl_datasets.base.STLDatasetFromRaw):
22class TinyImageNet(STLDatasetFromRaw): 23 r"""TinyImageNet dataset. The [TinyImageNet dataset](http://vision.stanford.edu/teaching/cs231n/reports/2015/pdfs/yle_project.pdf) is smaller, more manageable version of the [Imagenet dataset](https://www.image-net.org). It consists of 100,000 training, 10,000 validation and 10,000 test images of 200 classes, each 64x64 color image.""" 24 25 original_dataset_python_class: type[Dataset] = TinyImageNetRaw 26 r"""The original dataset class.""" 27 28 def __init__( 29 self, 30 root: str, 31 validation_percentage: float, 32 batch_size: int = 1, 33 num_workers: int = 0, 34 custom_transforms: Callable | transforms.Compose | None = None, 35 repeat_channels: int | None = None, 36 to_tensor: bool = True, 37 resize: tuple[int, int] | None = None, 38 ) -> None: 39 r""" 40 **Args:** 41 - **root** (`str`): the root directory where the original TinyImageNet data 'tiny-imagenet-200/' live. 42 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 43 - **batch_size** (`int`): The batch size in train, val, test dataloader. 44 - **num_workers** (`int`): the number of workers for dataloaders. 45 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included. 46 - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. 47 - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True. 48 - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers. 49 """ 50 super().__init__( 51 root=root, 52 batch_size=batch_size, 53 num_workers=num_workers, 54 custom_transforms=custom_transforms, 55 repeat_channels=repeat_channels, 56 to_tensor=to_tensor, 57 resize=resize, 58 ) 59 60 self.validation_percentage: float = validation_percentage 61 r"""The percentage to randomly split some training data into validation data.""" 62 63 def prepare_data(self) -> None: 64 r"""Download the original TinyImageNet dataset if haven't.""" 65 66 TinyImageNetRaw(root=self.root) 67 68 pylogger.debug( 69 "The original TinyImageNet dataset has been downloaded to %s.", 70 self.root, 71 ) 72 73 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 74 """Get the training and validation dataset. 75 76 **Returns:** 77 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset. 78 """ 79 dataset_train_and_val = TinyImageNetRaw( 80 root=self.root, 81 split="train", 82 transform=self.train_and_val_transforms(), 83 target_transform=self.target_transform(), 84 ) 85 86 return random_split( 87 dataset_train_and_val, 88 lengths=[1 - self.validation_percentage, self.validation_percentage], 89 generator=torch.Generator().manual_seed( 90 42 91 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 92 ) 93 94 def test_dataset(self) -> Dataset: 95 r"""Get the test dataset. 96 97 **Returns:** 98 - **test_dataset** (`Dataset`): the test dataset. 99 """ 100 dataset_test = TinyImageNetRaw( 101 root=self.root, 102 split="val", 103 transform=self.test_transforms(), 104 target_transform=self.target_transform(), 105 ) 106 107 return dataset_test
TinyImageNet dataset. The TinyImageNet dataset is smaller, more manageable version of the Imagenet dataset. It consists of 100,000 training, 10,000 validation and 10,000 test images of 200 classes, each 64x64 color image.
TinyImageNet( root: str, validation_percentage: float, batch_size: int = 1, num_workers: int = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType] = None, repeat_channels: int | None = None, to_tensor: bool = True, resize: tuple[int, int] | None = None)
28 def __init__( 29 self, 30 root: str, 31 validation_percentage: float, 32 batch_size: int = 1, 33 num_workers: int = 0, 34 custom_transforms: Callable | transforms.Compose | None = None, 35 repeat_channels: int | None = None, 36 to_tensor: bool = True, 37 resize: tuple[int, int] | None = None, 38 ) -> None: 39 r""" 40 **Args:** 41 - **root** (`str`): the root directory where the original TinyImageNet data 'tiny-imagenet-200/' live. 42 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 43 - **batch_size** (`int`): The batch size in train, val, test dataloader. 44 - **num_workers** (`int`): the number of workers for dataloaders. 45 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included. 46 - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. 47 - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True. 48 - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers. 49 """ 50 super().__init__( 51 root=root, 52 batch_size=batch_size, 53 num_workers=num_workers, 54 custom_transforms=custom_transforms, 55 repeat_channels=repeat_channels, 56 to_tensor=to_tensor, 57 resize=resize, 58 ) 59 60 self.validation_percentage: float = validation_percentage 61 r"""The percentage to randomly split some training data into validation data."""
Args:
- root (
str): the root directory where the original TinyImageNet data 'tiny-imagenet-200/' live. - validation_percentage (
float): the percentage to randomly split some training data into validation data. - batch_size (
int): The batch size in train, val, test dataloader. - num_workers (
int): the number of workers for dataloaders. - custom_transforms (
transformortransforms.ComposeorNone): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform.ToTensor(), normalize and so on are not included. - repeat_channels (
int|None): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. - to_tensor (
bool): whether to includeToTensor()transform. Default is True. - resize (
tuple[int, int]|Noneor list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers.
original_dataset_python_class: type[torch.utils.data.dataset.Dataset] =
<class 'tinyimagenet.TinyImageNet'>
The original dataset class.
validation_percentage: float
The percentage to randomly split some training data into validation data.
def
prepare_data(self) -> None:
63 def prepare_data(self) -> None: 64 r"""Download the original TinyImageNet dataset if haven't.""" 65 66 TinyImageNetRaw(root=self.root) 67 68 pylogger.debug( 69 "The original TinyImageNet dataset has been downloaded to %s.", 70 self.root, 71 )
Download the original TinyImageNet dataset if haven't.
def
train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
73 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 74 """Get the training and validation dataset. 75 76 **Returns:** 77 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset. 78 """ 79 dataset_train_and_val = TinyImageNetRaw( 80 root=self.root, 81 split="train", 82 transform=self.train_and_val_transforms(), 83 target_transform=self.target_transform(), 84 ) 85 86 return random_split( 87 dataset_train_and_val, 88 lengths=[1 - self.validation_percentage, self.validation_percentage], 89 generator=torch.Generator().manual_seed( 90 42 91 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 92 )
Get the training and validation dataset.
Returns:
- train_and_val_dataset (
tuple[Dataset, Dataset]): the train and validation dataset.
def
test_dataset(self) -> torch.utils.data.dataset.Dataset:
94 def test_dataset(self) -> Dataset: 95 r"""Get the test dataset. 96 97 **Returns:** 98 - **test_dataset** (`Dataset`): the test dataset. 99 """ 100 dataset_test = TinyImageNetRaw( 101 root=self.root, 102 split="val", 103 transform=self.test_transforms(), 104 target_transform=self.target_transform(), 105 ) 106 107 return dataset_test
Get the test dataset.
Returns:
- test_dataset (
Dataset): the test dataset.