clarena.stl_datasets.linnaeus5

The submodule in stl_datasets for Linnaeus 5 dataset.

  1r"""
  2The submodule in `stl_datasets` for Linnaeus 5 dataset.
  3"""
  4
  5__all__ = ["Linnaeus5"]
  6
  7import logging
  8from typing import Callable
  9
 10import torch
 11from torch.utils.data import Dataset, random_split
 12from torchvision.transforms import transforms
 13
 14from clarena.stl_datasets.base import STLDatasetFromRaw
 15from clarena.stl_datasets.raw import Linnaeus5 as Linnaeus5Raw
 16from clarena.stl_datasets.raw import (
 17    Linnaeus5_32,
 18    Linnaeus5_64,
 19    Linnaeus5_128,
 20    Linnaeus5_256,
 21)
 22
 23# always get logger for built-in logging in each module
 24pylogger = logging.getLogger(__name__)
 25
 26
 27class Linnaeus5(STLDatasetFromRaw):
 28    r"""Linnaeus 5 dataset. The [Linnaeus 5 dataset](https://chaladze.com/l5/) is a collection of flower images. It consists of 8,000 images of 5 flower species (classes). It provides 256x256, 128x128, 64x64, and 32x32 color images. We support all of them in Permuted Linnaeus 5."""
 29
 30    def __init__(
 31        self,
 32        root: str,
 33        resolution: str,
 34        validation_percentage: float,
 35        batch_size: int = 1,
 36        num_workers: int = 0,
 37        custom_transforms: Callable | transforms.Compose | None = None,
 38        repeat_channels: int | None = None,
 39        to_tensor: bool = True,
 40        resize: tuple[int, int] | None = None,
 41    ) -> None:
 42        r"""
 43        **Args:**
 44        - **root** (`str`): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live.
 45        - **resolution** (`str`): Image resolution, one of ["256", "128", "64", "32"].
 46        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 47        - **batch_size** (`int`): The batch size in train, val, test dataloader.
 48        - **num_workers** (`int`): the number of workers for dataloaders.
 49        - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included.
 50        - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer.
 51        - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True.
 52        - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers.
 53        """
 54
 55        if resolution == "32":
 56            self.original_dataset_python_class: type[Dataset] = Linnaeus5_32
 57        elif resolution == "64":
 58            self.original_dataset_python_class: type[Dataset] = Linnaeus5_64
 59        elif resolution == "128":
 60            self.original_dataset_python_class: type[Dataset] = Linnaeus5_128
 61        elif resolution == "256":
 62            self.original_dataset_python_class: type[Dataset] = Linnaeus5_256
 63            r"""The original dataset class."""
 64
 65        super().__init__(
 66            root=root,
 67            batch_size=batch_size,
 68            num_workers=num_workers,
 69            custom_transforms=custom_transforms,
 70            repeat_channels=repeat_channels,
 71            to_tensor=to_tensor,
 72            resize=resize,
 73        )
 74
 75        self.resolution: str = resolution
 76        r"""Store the resolution of the original dataset."""
 77
 78        self.validation_percentage: float = validation_percentage
 79        r"""The percentage to randomly split some training data into validation data."""
 80
 81    def prepare_data(self) -> None:
 82        r"""Download the original Linnaeus 5 dataset if haven't."""
 83
 84        Linnaeus5Raw(
 85            root=self.root, resolution=self.resolution, train=True, download=True
 86        )
 87        Linnaeus5Raw(
 88            root=self.root, resolution=self.resolution, train=False, download=True
 89        )
 90
 91        pylogger.debug(
 92            "The original Linnaeus 5 dataset has been downloaded to %s.",
 93            self.root,
 94        )
 95
 96    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
 97        """Get the training and validation dataset.
 98
 99        **Returns:**
100        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset.
101        """
102        dataset_train_and_val = Linnaeus5Raw(
103            root=self.root,
104            resolution=self.resolution,
105            train=True,
106            transform=self.train_and_val_transforms(),
107            target_transform=self.target_transform(),
108            download=False,
109        )
110
111        return random_split(
112            dataset_train_and_val,
113            lengths=[1 - self.validation_percentage, self.validation_percentage],
114            generator=torch.Generator().manual_seed(
115                42
116            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
117        )
118
119    def test_dataset(self) -> Dataset:
120        r"""Get the test dataset.
121
122        **Returns:**
123        - **test_dataset** (`Dataset`): the test dataset.
124        """
125        dataset_test = Linnaeus5Raw(
126            root=self.root,
127            resolution=self.resolution,
128            train=False,
129            transform=self.test_transforms(),
130            target_transform=self.target_transform(),
131            download=False,
132        )
133
134        return dataset_test
class Linnaeus5(clarena.stl_datasets.base.STLDatasetFromRaw):
 28class Linnaeus5(STLDatasetFromRaw):
 29    r"""Linnaeus 5 dataset. The [Linnaeus 5 dataset](https://chaladze.com/l5/) is a collection of flower images. It consists of 8,000 images of 5 flower species (classes). It provides 256x256, 128x128, 64x64, and 32x32 color images. We support all of them in Permuted Linnaeus 5."""
 30
 31    def __init__(
 32        self,
 33        root: str,
 34        resolution: str,
 35        validation_percentage: float,
 36        batch_size: int = 1,
 37        num_workers: int = 0,
 38        custom_transforms: Callable | transforms.Compose | None = None,
 39        repeat_channels: int | None = None,
 40        to_tensor: bool = True,
 41        resize: tuple[int, int] | None = None,
 42    ) -> None:
 43        r"""
 44        **Args:**
 45        - **root** (`str`): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live.
 46        - **resolution** (`str`): Image resolution, one of ["256", "128", "64", "32"].
 47        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 48        - **batch_size** (`int`): The batch size in train, val, test dataloader.
 49        - **num_workers** (`int`): the number of workers for dataloaders.
 50        - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included.
 51        - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer.
 52        - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True.
 53        - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers.
 54        """
 55
 56        if resolution == "32":
 57            self.original_dataset_python_class: type[Dataset] = Linnaeus5_32
 58        elif resolution == "64":
 59            self.original_dataset_python_class: type[Dataset] = Linnaeus5_64
 60        elif resolution == "128":
 61            self.original_dataset_python_class: type[Dataset] = Linnaeus5_128
 62        elif resolution == "256":
 63            self.original_dataset_python_class: type[Dataset] = Linnaeus5_256
 64            r"""The original dataset class."""
 65
 66        super().__init__(
 67            root=root,
 68            batch_size=batch_size,
 69            num_workers=num_workers,
 70            custom_transforms=custom_transforms,
 71            repeat_channels=repeat_channels,
 72            to_tensor=to_tensor,
 73            resize=resize,
 74        )
 75
 76        self.resolution: str = resolution
 77        r"""Store the resolution of the original dataset."""
 78
 79        self.validation_percentage: float = validation_percentage
 80        r"""The percentage to randomly split some training data into validation data."""
 81
 82    def prepare_data(self) -> None:
 83        r"""Download the original Linnaeus 5 dataset if haven't."""
 84
 85        Linnaeus5Raw(
 86            root=self.root, resolution=self.resolution, train=True, download=True
 87        )
 88        Linnaeus5Raw(
 89            root=self.root, resolution=self.resolution, train=False, download=True
 90        )
 91
 92        pylogger.debug(
 93            "The original Linnaeus 5 dataset has been downloaded to %s.",
 94            self.root,
 95        )
 96
 97    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
 98        """Get the training and validation dataset.
 99
100        **Returns:**
101        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset.
102        """
103        dataset_train_and_val = Linnaeus5Raw(
104            root=self.root,
105            resolution=self.resolution,
106            train=True,
107            transform=self.train_and_val_transforms(),
108            target_transform=self.target_transform(),
109            download=False,
110        )
111
112        return random_split(
113            dataset_train_and_val,
114            lengths=[1 - self.validation_percentage, self.validation_percentage],
115            generator=torch.Generator().manual_seed(
116                42
117            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
118        )
119
120    def test_dataset(self) -> Dataset:
121        r"""Get the test dataset.
122
123        **Returns:**
124        - **test_dataset** (`Dataset`): the test dataset.
125        """
126        dataset_test = Linnaeus5Raw(
127            root=self.root,
128            resolution=self.resolution,
129            train=False,
130            transform=self.test_transforms(),
131            target_transform=self.target_transform(),
132            download=False,
133        )
134
135        return dataset_test

Linnaeus 5 dataset. The Linnaeus 5 dataset is a collection of flower images. It consists of 8,000 images of 5 flower species (classes). It provides 256x256, 128x128, 64x64, and 32x32 color images. We support all of them in Permuted Linnaeus 5.

Linnaeus5( root: str, resolution: str, validation_percentage: float, batch_size: int = 1, num_workers: int = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType] = None, repeat_channels: int | None = None, to_tensor: bool = True, resize: tuple[int, int] | None = None)
31    def __init__(
32        self,
33        root: str,
34        resolution: str,
35        validation_percentage: float,
36        batch_size: int = 1,
37        num_workers: int = 0,
38        custom_transforms: Callable | transforms.Compose | None = None,
39        repeat_channels: int | None = None,
40        to_tensor: bool = True,
41        resize: tuple[int, int] | None = None,
42    ) -> None:
43        r"""
44        **Args:**
45        - **root** (`str`): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live.
46        - **resolution** (`str`): Image resolution, one of ["256", "128", "64", "32"].
47        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
48        - **batch_size** (`int`): The batch size in train, val, test dataloader.
49        - **num_workers** (`int`): the number of workers for dataloaders.
50        - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. `ToTensor()`, normalize and so on are not included.
51        - **repeat_channels** (`int` | `None`): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer.
52        - **to_tensor** (`bool`): whether to include `ToTensor()` transform. Default is True.
53        - **resize** (`tuple[int, int]` | `None` or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers.
54        """
55
56        if resolution == "32":
57            self.original_dataset_python_class: type[Dataset] = Linnaeus5_32
58        elif resolution == "64":
59            self.original_dataset_python_class: type[Dataset] = Linnaeus5_64
60        elif resolution == "128":
61            self.original_dataset_python_class: type[Dataset] = Linnaeus5_128
62        elif resolution == "256":
63            self.original_dataset_python_class: type[Dataset] = Linnaeus5_256
64            r"""The original dataset class."""
65
66        super().__init__(
67            root=root,
68            batch_size=batch_size,
69            num_workers=num_workers,
70            custom_transforms=custom_transforms,
71            repeat_channels=repeat_channels,
72            to_tensor=to_tensor,
73            resize=resize,
74        )
75
76        self.resolution: str = resolution
77        r"""Store the resolution of the original dataset."""
78
79        self.validation_percentage: float = validation_percentage
80        r"""The percentage to randomly split some training data into validation data."""

Args:

  • root (str): the root directory where the original Linnaeus 5 data 'Linnaeus5/' live.
  • resolution (str): Image resolution, one of ["256", "128", "64", "32"].
  • validation_percentage (float): the percentage to randomly split some training data into validation data.
  • batch_size (int): The batch size in train, val, test dataloader.
  • num_workers (int): the number of workers for dataloaders.
  • custom_transforms (transform or transforms.Compose or None): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. ToTensor(), normalize and so on are not included.
  • repeat_channels (int | None): the number of channels to repeat. Default is None, which means no repeat. If not None, it should be an integer.
  • to_tensor (bool): whether to include ToTensor() transform. Default is True.
  • resize (tuple[int, int] | None or list of them): the size to resize the images to. Default is None, which means no resize. If not None, it should be a tuple of two integers.
resolution: str

Store the resolution of the original dataset.

validation_percentage: float

The percentage to randomly split some training data into validation data.

def prepare_data(self) -> None:
82    def prepare_data(self) -> None:
83        r"""Download the original Linnaeus 5 dataset if haven't."""
84
85        Linnaeus5Raw(
86            root=self.root, resolution=self.resolution, train=True, download=True
87        )
88        Linnaeus5Raw(
89            root=self.root, resolution=self.resolution, train=False, download=True
90        )
91
92        pylogger.debug(
93            "The original Linnaeus 5 dataset has been downloaded to %s.",
94            self.root,
95        )

Download the original Linnaeus 5 dataset if haven't.

def train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
 97    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
 98        """Get the training and validation dataset.
 99
100        **Returns:**
101        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset.
102        """
103        dataset_train_and_val = Linnaeus5Raw(
104            root=self.root,
105            resolution=self.resolution,
106            train=True,
107            transform=self.train_and_val_transforms(),
108            target_transform=self.target_transform(),
109            download=False,
110        )
111
112        return random_split(
113            dataset_train_and_val,
114            lengths=[1 - self.validation_percentage, self.validation_percentage],
115            generator=torch.Generator().manual_seed(
116                42
117            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
118        )

Get the training and validation dataset.

Returns:

  • train_and_val_dataset (tuple[Dataset, Dataset]): the train and validation dataset.
def test_dataset(self) -> torch.utils.data.dataset.Dataset:
120    def test_dataset(self) -> Dataset:
121        r"""Get the test dataset.
122
123        **Returns:**
124        - **test_dataset** (`Dataset`): the test dataset.
125        """
126        dataset_test = Linnaeus5Raw(
127            root=self.root,
128            resolution=self.resolution,
129            train=False,
130            transform=self.test_transforms(),
131            target_transform=self.target_transform(),
132            download=False,
133        )
134
135        return dataset_test

Get the test dataset.

Returns:

  • test_dataset (Dataset): the test dataset.