clarena.stl_datasets.linnaeus5
The submodule in stl_datasets for Linnaeus 5 dataset.
1r""" 2The submodule in `stl_datasets` for Linnaeus 5 dataset. 3""" 4 5__all__ = ["Linnaeus5"] 6 7import logging 8from typing import Callable 9 10import torch 11from torch.utils.data import Dataset, random_split 12from torchvision.transforms import transforms 13 14from clarena.stl_datasets.base import STLDatasetFromRaw 15from clarena.stl_datasets.raw import Linnaeus5 as Linnaeus5Raw 16from clarena.stl_datasets.raw import ( 17 Linnaeus5_32, 18 Linnaeus5_64, 19 Linnaeus5_128, 20 Linnaeus5_256, 21) 22 23# always get logger for built-in logging in each module 24pylogger = logging.getLogger(__name__) 25 26 27class Linnaeus5(STLDatasetFromRaw): 28 r"""Linnaeus 5 dataset. The [Linnaeus 5 dataset](https://chaladze.com/l5/) is a collection of flower images. It consists of 8,000 images of 5 flower species (classes). It provides 256x256, 128x128, 64x64, and 32x32 color images. We support all of them in Permuted Linnaeus 5.""" 29 30 def __init__( 31 self, 32 root: str, 33 resolution: str, 34 validation_percentage: float, 35 batch_size: int = 1, 36 num_workers: int = 0, 37 custom_transforms: Callable | transforms.Compose | None = None, 38 repeat_channels: int | None = None, 39 to_tensor: bool = True, 40 resize: tuple[int, int] | None = None, 41 ) -> None: 42 r""" 43 **Args:** 44 - **root** (`str`): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live. 45 - **resolution** (`str`): Image resolution, one of ["256", "128", "64", "32"]. 46 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 47 - **batch_size** (`int`): The batch size in train, val, test dataloader. 48 - **num_workers** (`int`): the number of workers for dataloaders. 49 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included. 50 - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. 51 - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True. 52 - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers. 53 """ 54 55 if resolution == "32": 56 self.original_dataset_python_class: type[Dataset] = Linnaeus5_32 57 elif resolution == "64": 58 self.original_dataset_python_class: type[Dataset] = Linnaeus5_64 59 elif resolution == "128": 60 self.original_dataset_python_class: type[Dataset] = Linnaeus5_128 61 elif resolution == "256": 62 self.original_dataset_python_class: type[Dataset] = Linnaeus5_256 63 r"""The original dataset class.""" 64 65 super().__init__( 66 root=root, 67 batch_size=batch_size, 68 num_workers=num_workers, 69 custom_transforms=custom_transforms, 70 repeat_channels=repeat_channels, 71 to_tensor=to_tensor, 72 resize=resize, 73 ) 74 75 self.resolution: str = resolution 76 r"""Store the resolution of the original dataset.""" 77 78 self.validation_percentage: float = validation_percentage 79 r"""The percentage to randomly split some training data into validation data.""" 80 81 def prepare_data(self) -> None: 82 r"""Download the original Linnaeus 5 dataset if haven't.""" 83 84 Linnaeus5Raw( 85 root=self.root, resolution=self.resolution, train=True, download=True 86 ) 87 Linnaeus5Raw( 88 root=self.root, resolution=self.resolution, train=False, download=True 89 ) 90 91 pylogger.debug( 92 "The original Linnaeus 5 dataset has been downloaded to %s.", 93 self.root, 94 ) 95 96 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 97 """Get the training and validation dataset. 98 99 **Returns:** 100 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset. 101 """ 102 dataset_train_and_val = Linnaeus5Raw( 103 root=self.root, 104 resolution=self.resolution, 105 train=True, 106 transform=self.train_and_val_transforms(), 107 target_transform=self.target_transform(), 108 download=False, 109 ) 110 111 return random_split( 112 dataset_train_and_val, 113 lengths=[1 - self.validation_percentage, self.validation_percentage], 114 generator=torch.Generator().manual_seed( 115 42 116 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 117 ) 118 119 def test_dataset(self) -> Dataset: 120 r"""Get the test dataset. 121 122 **Returns:** 123 - **test_dataset** (`Dataset`): the test dataset. 124 """ 125 dataset_test = Linnaeus5Raw( 126 root=self.root, 127 resolution=self.resolution, 128 train=False, 129 transform=self.test_transforms(), 130 target_transform=self.target_transform(), 131 download=False, 132 ) 133 134 return dataset_test
class
Linnaeus5(clarena.stl_datasets.base.STLDatasetFromRaw):
28class Linnaeus5(STLDatasetFromRaw): 29 r"""Linnaeus 5 dataset. The [Linnaeus 5 dataset](https://chaladze.com/l5/) is a collection of flower images. It consists of 8,000 images of 5 flower species (classes). It provides 256x256, 128x128, 64x64, and 32x32 color images. We support all of them in Permuted Linnaeus 5.""" 30 31 def __init__( 32 self, 33 root: str, 34 resolution: str, 35 validation_percentage: float, 36 batch_size: int = 1, 37 num_workers: int = 0, 38 custom_transforms: Callable | transforms.Compose | None = None, 39 repeat_channels: int | None = None, 40 to_tensor: bool = True, 41 resize: tuple[int, int] | None = None, 42 ) -> None: 43 r""" 44 **Args:** 45 - **root** (`str`): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live. 46 - **resolution** (`str`): Image resolution, one of ["256", "128", "64", "32"]. 47 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 48 - **batch_size** (`int`): The batch size in train, val, test dataloader. 49 - **num_workers** (`int`): the number of workers for dataloaders. 50 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included. 51 - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. 52 - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True. 53 - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers. 54 """ 55 56 if resolution == "32": 57 self.original_dataset_python_class: type[Dataset] = Linnaeus5_32 58 elif resolution == "64": 59 self.original_dataset_python_class: type[Dataset] = Linnaeus5_64 60 elif resolution == "128": 61 self.original_dataset_python_class: type[Dataset] = Linnaeus5_128 62 elif resolution == "256": 63 self.original_dataset_python_class: type[Dataset] = Linnaeus5_256 64 r"""The original dataset class.""" 65 66 super().__init__( 67 root=root, 68 batch_size=batch_size, 69 num_workers=num_workers, 70 custom_transforms=custom_transforms, 71 repeat_channels=repeat_channels, 72 to_tensor=to_tensor, 73 resize=resize, 74 ) 75 76 self.resolution: str = resolution 77 r"""Store the resolution of the original dataset.""" 78 79 self.validation_percentage: float = validation_percentage 80 r"""The percentage to randomly split some training data into validation data.""" 81 82 def prepare_data(self) -> None: 83 r"""Download the original Linnaeus 5 dataset if haven't.""" 84 85 Linnaeus5Raw( 86 root=self.root, resolution=self.resolution, train=True, download=True 87 ) 88 Linnaeus5Raw( 89 root=self.root, resolution=self.resolution, train=False, download=True 90 ) 91 92 pylogger.debug( 93 "The original Linnaeus 5 dataset has been downloaded to %s.", 94 self.root, 95 ) 96 97 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 98 """Get the training and validation dataset. 99 100 **Returns:** 101 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset. 102 """ 103 dataset_train_and_val = Linnaeus5Raw( 104 root=self.root, 105 resolution=self.resolution, 106 train=True, 107 transform=self.train_and_val_transforms(), 108 target_transform=self.target_transform(), 109 download=False, 110 ) 111 112 return random_split( 113 dataset_train_and_val, 114 lengths=[1 - self.validation_percentage, self.validation_percentage], 115 generator=torch.Generator().manual_seed( 116 42 117 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 118 ) 119 120 def test_dataset(self) -> Dataset: 121 r"""Get the test dataset. 122 123 **Returns:** 124 - **test_dataset** (`Dataset`): the test dataset. 125 """ 126 dataset_test = Linnaeus5Raw( 127 root=self.root, 128 resolution=self.resolution, 129 train=False, 130 transform=self.test_transforms(), 131 target_transform=self.target_transform(), 132 download=False, 133 ) 134 135 return dataset_test
Linnaeus 5 dataset. The Linnaeus 5 dataset is a collection of flower images. It consists of 8,000 images of 5 flower species (classes). It provides 256x256, 128x128, 64x64, and 32x32 color images. We support all of them in Permuted Linnaeus 5.
Linnaeus5( root: str, resolution: str, validation_percentage: float, batch_size: int = 1, num_workers: int = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType] = None, repeat_channels: int | None = None, to_tensor: bool = True, resize: tuple[int, int] | None = None)
31 def __init__( 32 self, 33 root: str, 34 resolution: str, 35 validation_percentage: float, 36 batch_size: int = 1, 37 num_workers: int = 0, 38 custom_transforms: Callable | transforms.Compose | None = None, 39 repeat_channels: int | None = None, 40 to_tensor: bool = True, 41 resize: tuple[int, int] | None = None, 42 ) -> None: 43 r""" 44 **Args:** 45 - **root** (`str`): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live. 46 - **resolution** (`str`): Image resolution, one of ["256", "128", "64", "32"]. 47 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 48 - **batch_size** (`int`): The batch size in train, val, test dataloader. 49 - **num_workers** (`int`): the number of workers for dataloaders. 50 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included. 51 - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. 52 - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True. 53 - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers. 54 """ 55 56 if resolution == "32": 57 self.original_dataset_python_class: type[Dataset] = Linnaeus5_32 58 elif resolution == "64": 59 self.original_dataset_python_class: type[Dataset] = Linnaeus5_64 60 elif resolution == "128": 61 self.original_dataset_python_class: type[Dataset] = Linnaeus5_128 62 elif resolution == "256": 63 self.original_dataset_python_class: type[Dataset] = Linnaeus5_256 64 r"""The original dataset class.""" 65 66 super().__init__( 67 root=root, 68 batch_size=batch_size, 69 num_workers=num_workers, 70 custom_transforms=custom_transforms, 71 repeat_channels=repeat_channels, 72 to_tensor=to_tensor, 73 resize=resize, 74 ) 75 76 self.resolution: str = resolution 77 r"""Store the resolution of the original dataset.""" 78 79 self.validation_percentage: float = validation_percentage 80 r"""The percentage to randomly split some training data into validation data."""
Args:
- root (
str): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live. - resolution (
str): Image resolution, one of ["256", "128", "64", "32"]. - validation_percentage (
float): the percentage to randomly split some training data into validation data. - batch_size (
int): The batch size in train, val, test dataloader. - num_workers (
int): the number of workers for dataloaders. - custom_transforms (
transformortransforms.ComposeorNone): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform.ToTensor(), normalize and so on are not included. - repeat_channels (
int|None): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer. - to_tensor (
bool): whether to includeToTensor()transform. Default is True. - resize (
tuple[int, int]|Noneor list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers.
validation_percentage: float
The percentage to randomly split some training data into validation data.
def
prepare_data(self) -> None:
82 def prepare_data(self) -> None: 83 r"""Download the original Linnaeus 5 dataset if haven't.""" 84 85 Linnaeus5Raw( 86 root=self.root, resolution=self.resolution, train=True, download=True 87 ) 88 Linnaeus5Raw( 89 root=self.root, resolution=self.resolution, train=False, download=True 90 ) 91 92 pylogger.debug( 93 "The original Linnaeus 5 dataset has been downloaded to %s.", 94 self.root, 95 )
Download the original Linnaeus 5 dataset if haven't.
def
train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
97 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 98 """Get the training and validation dataset. 99 100 **Returns:** 101 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset. 102 """ 103 dataset_train_and_val = Linnaeus5Raw( 104 root=self.root, 105 resolution=self.resolution, 106 train=True, 107 transform=self.train_and_val_transforms(), 108 target_transform=self.target_transform(), 109 download=False, 110 ) 111 112 return random_split( 113 dataset_train_and_val, 114 lengths=[1 - self.validation_percentage, self.validation_percentage], 115 generator=torch.Generator().manual_seed( 116 42 117 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 118 )
Get the training and validation dataset.
Returns:
- train_and_val_dataset (
tuple[Dataset, Dataset]): the train and validation dataset.
def
test_dataset(self) -> torch.utils.data.dataset.Dataset:
120 def test_dataset(self) -> Dataset: 121 r"""Get the test dataset. 122 123 **Returns:** 124 - **test_dataset** (`Dataset`): the test dataset. 125 """ 126 dataset_test = Linnaeus5Raw( 127 root=self.root, 128 resolution=self.resolution, 129 train=False, 130 transform=self.test_transforms(), 131 target_transform=self.target_transform(), 132 download=False, 133 ) 134 135 return dataset_test
Get the test dataset.
Returns:
- test_dataset (
Dataset): the test dataset.