clarena.cl_datasets.permuted_fgvc_aircraft

The submodule in cl_datasets for Permuted FGVC-Aircraft dataset.

View Source

  1r"""
  2The submodule in `cl_datasets` for Permuted FGVC-Aircraft dataset.
  3"""
  4
  5__all__ = ["PermutedFGVCAircraft"]
  6
  7import logging
  8from typing import Callable
  9
 10from torch.utils.data import Dataset
 11from torchvision.datasets import FGVCAircraft
 12from torchvision.transforms import transforms
 13
 14from clarena.cl_datasets import CLPermutedDataset
 15from clarena.stl_datasets.raw import (
 16    FGVCAircraftFamily,
 17    FGVCAircraftManufacturer,
 18    FGVCAircraftVariant,
 19)
 20
 21# always get logger for built-in logging in each module
 22pylogger = logging.getLogger(__name__)
 23
 24
 25class PermutedFGVCAircraft(CLPermutedDataset):
 26    r"""Permuted FGVC-Aircraft dataset. The [FGVC-Aircraft dataset](https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/) is a collection of aircraft images. It consists of 10,200 images, each color image.
 27
 28    FGVC-Aircraft has 3 different class labels by variant, family and manufacturer, which has 102, 70, 41 classes respectively. We support all of them in Permuted FGVC-Aircraft.
 29    """
 30
 31    def __init__(
 32        self,
 33        root: str,
 34        annotation_level: str,
 35        num_tasks: int,
 36        batch_size: int | dict[int, int] = 1,
 37        num_workers: int | dict[int, int] = 0,
 38        custom_transforms: (
 39            Callable
 40            | transforms.Compose
 41            | None
 42            | dict[int, Callable | transforms.Compose | None]
 43        ) = None,
 44        repeat_channels: int | None | dict[int, int | None] = None,
 45        to_tensor: bool | dict[int, bool] = True,
 46        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 47        permutation_mode: str = "first_channel_only",
 48        permutation_seeds: dict[int, int] | None = None,
 49    ) -> None:
 50        r"""
 51        **Args:**
 52        - **root** (`str`): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live.
 53        - **annotation_level** (`str`): The annotation level, supports 'variant', 'family' and 'manufacturer'.
 54        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 55        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 56        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 57        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 58        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 59        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 60        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 61        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 62        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 63        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 64        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 65        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 66        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 67        - **permutation_mode** (`str`): the mode of permutation; one of:
 68            1. 'all': permute all pixels.
 69            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 70            3. 'first_channel_only': permute only the first channel.
 71        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 72        """
 73
 74        if annotation_level == "variant":
 75            self.original_dataset_python_class: type[Dataset] = FGVCAircraftVariant
 76        elif annotation_level == "family":
 77            self.original_dataset_python_class: type[Dataset] = FGVCAircraftFamily
 78        elif annotation_level == "manufacturer":
 79            self.original_dataset_python_class: type[Dataset] = FGVCAircraftManufacturer
 80            r"""The original dataset class."""
 81
 82        super().__init__(
 83            root=root,
 84            num_tasks=num_tasks,
 85            batch_size=batch_size,
 86            num_workers=num_workers,
 87            custom_transforms=custom_transforms,
 88            repeat_channels=repeat_channels,
 89            to_tensor=to_tensor,
 90            resize=resize,
 91            permutation_mode=permutation_mode,
 92            permutation_seeds=permutation_seeds,
 93        )
 94
 95        self.annotation_level: str = annotation_level
 96        r"""The annotation level, supports 'variant', 'family' and 'manufacturer'."""
 97
 98    def prepare_data(self) -> None:
 99        r"""Download the original FGVC-Aircraft dataset if haven't."""
100
101        if self.task_id != 1:
102            return  # download all original datasets only at the beginning of first task
103
104        FGVCAircraft(root=self.root_t, split="train", download=True)
105        FGVCAircraft(root=self.root_t, split="val", download=True)
106        FGVCAircraft(root=self.root_t, split="test", download=True)
107
108        pylogger.debug(
109            "The original FGVC-Aircraft dataset has been downloaded to %s.",
110            self.root_t,
111        )
112
113    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
114        """Get the training and validation dataset of task `self.task_id`.
115
116        **Returns:**
117        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
118        """
119        dataset_train = FGVCAircraft(
120            root=self.root_t,
121            split="train",
122            annotation_level=self.annotation_level,
123            transform=self.train_and_val_transforms(),
124            target_transform=self.target_transform(),
125            download=False,
126        )
127
128        dataset_val = FGVCAircraft(
129            root=self.root_t,
130            split="val",
131            annotation_level=self.annotation_level,
132            transform=self.train_and_val_transforms(),
133            target_transform=self.target_transform(),
134            download=False,
135        )
136
137        return dataset_train, dataset_val
138
139    def test_dataset(self) -> Dataset:
140        r"""Get the test dataset of task `self.task_id`.
141
142        **Returns:**
143        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
144        """
145        dataset_test = FGVCAircraft(
146            root=self.root_t,
147            split="test",
148            annotation_level=self.annotation_level,
149            transform=self.test_transforms(),
150            target_transform=self.target_transform(),
151            download=False,
152        )
153
154        return dataset_test

class PermutedFGVCAircraft(clarena.cl_datasets.base.CLPermutedDataset): View Source

 26class PermutedFGVCAircraft(CLPermutedDataset):
 27    r"""Permuted FGVC-Aircraft dataset. The [FGVC-Aircraft dataset](https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/) is a collection of aircraft images. It consists of 10,200 images, each color image.
 28
 29    FGVC-Aircraft has 3 different class labels by variant, family and manufacturer, which has 102, 70, 41 classes respectively. We support all of them in Permuted FGVC-Aircraft.
 30    """
 31
 32    def __init__(
 33        self,
 34        root: str,
 35        annotation_level: str,
 36        num_tasks: int,
 37        batch_size: int | dict[int, int] = 1,
 38        num_workers: int | dict[int, int] = 0,
 39        custom_transforms: (
 40            Callable
 41            | transforms.Compose
 42            | None
 43            | dict[int, Callable | transforms.Compose | None]
 44        ) = None,
 45        repeat_channels: int | None | dict[int, int | None] = None,
 46        to_tensor: bool | dict[int, bool] = True,
 47        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 48        permutation_mode: str = "first_channel_only",
 49        permutation_seeds: dict[int, int] | None = None,
 50    ) -> None:
 51        r"""
 52        **Args:**
 53        - **root** (`str`): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live.
 54        - **annotation_level** (`str`): The annotation level, supports 'variant', 'family' and 'manufacturer'.
 55        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 56        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 57        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 58        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 59        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 60        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 61        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 62        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 63        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 64        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 65        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 66        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 67        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 68        - **permutation_mode** (`str`): the mode of permutation; one of:
 69            1. 'all': permute all pixels.
 70            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 71            3. 'first_channel_only': permute only the first channel.
 72        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 73        """
 74
 75        if annotation_level == "variant":
 76            self.original_dataset_python_class: type[Dataset] = FGVCAircraftVariant
 77        elif annotation_level == "family":
 78            self.original_dataset_python_class: type[Dataset] = FGVCAircraftFamily
 79        elif annotation_level == "manufacturer":
 80            self.original_dataset_python_class: type[Dataset] = FGVCAircraftManufacturer
 81            r"""The original dataset class."""
 82
 83        super().__init__(
 84            root=root,
 85            num_tasks=num_tasks,
 86            batch_size=batch_size,
 87            num_workers=num_workers,
 88            custom_transforms=custom_transforms,
 89            repeat_channels=repeat_channels,
 90            to_tensor=to_tensor,
 91            resize=resize,
 92            permutation_mode=permutation_mode,
 93            permutation_seeds=permutation_seeds,
 94        )
 95
 96        self.annotation_level: str = annotation_level
 97        r"""The annotation level, supports 'variant', 'family' and 'manufacturer'."""
 98
 99    def prepare_data(self) -> None:
100        r"""Download the original FGVC-Aircraft dataset if haven't."""
101
102        if self.task_id != 1:
103            return  # download all original datasets only at the beginning of first task
104
105        FGVCAircraft(root=self.root_t, split="train", download=True)
106        FGVCAircraft(root=self.root_t, split="val", download=True)
107        FGVCAircraft(root=self.root_t, split="test", download=True)
108
109        pylogger.debug(
110            "The original FGVC-Aircraft dataset has been downloaded to %s.",
111            self.root_t,
112        )
113
114    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
115        """Get the training and validation dataset of task `self.task_id`.
116
117        **Returns:**
118        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
119        """
120        dataset_train = FGVCAircraft(
121            root=self.root_t,
122            split="train",
123            annotation_level=self.annotation_level,
124            transform=self.train_and_val_transforms(),
125            target_transform=self.target_transform(),
126            download=False,
127        )
128
129        dataset_val = FGVCAircraft(
130            root=self.root_t,
131            split="val",
132            annotation_level=self.annotation_level,
133            transform=self.train_and_val_transforms(),
134            target_transform=self.target_transform(),
135            download=False,
136        )
137
138        return dataset_train, dataset_val
139
140    def test_dataset(self) -> Dataset:
141        r"""Get the test dataset of task `self.task_id`.
142
143        **Returns:**
144        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
145        """
146        dataset_test = FGVCAircraft(
147            root=self.root_t,
148            split="test",
149            annotation_level=self.annotation_level,
150            transform=self.test_transforms(),
151            target_transform=self.target_transform(),
152            download=False,
153        )
154
155        return dataset_test

Permuted FGVC-Aircraft dataset. The FGVC-Aircraft dataset is a collection of aircraft images. It consists of 10,200 images, each color image.

FGVC-Aircraft has 3 different class labels by variant, family and manufacturer, which has 102, 70, 41 classes respectively. We support all of them in Permuted FGVC-Aircraft.

PermutedFGVCAircraft( root: str, annotation_level: str, num_tasks: int, batch_size: int | dict[int, int] = 1, num_workers: int | dict[int, int] = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType, dict[int, Union[Callable, torchvision.transforms.transforms.Compose, NoneType]]] = None, repeat_channels: int | None | dict[int, int | None] = None, to_tensor: bool | dict[int, bool] = True, resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, permutation_mode: str = 'first_channel_only', permutation_seeds: dict[int, int] | None = None) View Source

32    def __init__(
33        self,
34        root: str,
35        annotation_level: str,
36        num_tasks: int,
37        batch_size: int | dict[int, int] = 1,
38        num_workers: int | dict[int, int] = 0,
39        custom_transforms: (
40            Callable
41            | transforms.Compose
42            | None
43            | dict[int, Callable | transforms.Compose | None]
44        ) = None,
45        repeat_channels: int | None | dict[int, int | None] = None,
46        to_tensor: bool | dict[int, bool] = True,
47        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
48        permutation_mode: str = "first_channel_only",
49        permutation_seeds: dict[int, int] | None = None,
50    ) -> None:
51        r"""
52        **Args:**
53        - **root** (`str`): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live.
54        - **annotation_level** (`str`): The annotation level, supports 'variant', 'family' and 'manufacturer'.
55        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
56        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
57        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
58        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
59        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
60        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
61        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
62        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
63        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
64        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
65        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
66        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
67        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
68        - **permutation_mode** (`str`): the mode of permutation; one of:
69            1. 'all': permute all pixels.
70            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
71            3. 'first_channel_only': permute only the first channel.
72        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
73        """
74
75        if annotation_level == "variant":
76            self.original_dataset_python_class: type[Dataset] = FGVCAircraftVariant
77        elif annotation_level == "family":
78            self.original_dataset_python_class: type[Dataset] = FGVCAircraftFamily
79        elif annotation_level == "manufacturer":
80            self.original_dataset_python_class: type[Dataset] = FGVCAircraftManufacturer
81            r"""The original dataset class."""
82
83        super().__init__(
84            root=root,
85            num_tasks=num_tasks,
86            batch_size=batch_size,
87            num_workers=num_workers,
88            custom_transforms=custom_transforms,
89            repeat_channels=repeat_channels,
90            to_tensor=to_tensor,
91            resize=resize,
92            permutation_mode=permutation_mode,
93            permutation_seeds=permutation_seeds,
94        )
95
96        self.annotation_level: str = annotation_level
97        r"""The annotation level, supports 'variant', 'family' and 'manufacturer'."""

Args:

root (str): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live.
annotation_level (str): The annotation level, supports 'variant', 'family' and 'manufacturer'.
num_tasks (int): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to num_tasks.
batch_size (int | dict[int, int]): the batch size for train, val, and test dataloaders. If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an int, it is the same batch size for all tasks.
num_workers (int | dict[int, int]): the number of workers for dataloaders. If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an int, it is the same number of workers for all tasks.
custom_transforms (transform or transforms.Compose or None or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. ToTensor(), normalization, permute, and so on are not included. If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is None, no custom transforms are applied.
repeat_channels (int | None | dict of them): the number of channels to repeat for each task. Default is None, which means no repeat. If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an int, it is the same number of channels to repeat for all tasks. If it is None, no repeat is applied.
to_tensor (bool | dict[int, bool]): whether to include the ToTensor() transform. Default is True. If it is a dict, the keys are task IDs and the values are whether to include the ToTensor() transform for each task. If it is a single boolean value, it is applied to all tasks.
resize (tuple[int, int] | None or dict of them): the size to resize the images to. Default is None, which means no resize. If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is None, no resize is applied.
permutation_mode (str): the mode of permutation; one of:
1. 'all': permute all pixels.
2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
3. 'first_channel_only': permute only the first channel.
permutation_seeds (dict[int, int] | None): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is None, which creates a dict of seeds from 0 to num_tasks-1.

annotation_level: str

The annotation level, supports 'variant', 'family' and 'manufacturer'.

def prepare_data(self) -> None: View Source

 99    def prepare_data(self) -> None:
100        r"""Download the original FGVC-Aircraft dataset if haven't."""
101
102        if self.task_id != 1:
103            return  # download all original datasets only at the beginning of first task
104
105        FGVCAircraft(root=self.root_t, split="train", download=True)
106        FGVCAircraft(root=self.root_t, split="val", download=True)
107        FGVCAircraft(root=self.root_t, split="test", download=True)
108
109        pylogger.debug(
110            "The original FGVC-Aircraft dataset has been downloaded to %s.",
111            self.root_t,
112        )

Download the original FGVC-Aircraft dataset if haven't.

def train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]: View Source

114    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
115        """Get the training and validation dataset of task `self.task_id`.
116
117        **Returns:**
118        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
119        """
120        dataset_train = FGVCAircraft(
121            root=self.root_t,
122            split="train",
123            annotation_level=self.annotation_level,
124            transform=self.train_and_val_transforms(),
125            target_transform=self.target_transform(),
126            download=False,
127        )
128
129        dataset_val = FGVCAircraft(
130            root=self.root_t,
131            split="val",
132            annotation_level=self.annotation_level,
133            transform=self.train_and_val_transforms(),
134            target_transform=self.target_transform(),
135            download=False,
136        )
137
138        return dataset_train, dataset_val

Get the training and validation dataset of task self.task_id.

Returns:

train_and_val_dataset (tuple[Dataset, Dataset]): the train and validation dataset of task self.task_id.

def test_dataset(self) -> torch.utils.data.dataset.Dataset: View Source

140    def test_dataset(self) -> Dataset:
141        r"""Get the test dataset of task `self.task_id`.
142
143        **Returns:**
144        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
145        """
146        dataset_test = FGVCAircraft(
147            root=self.root_t,
148            split="test",
149            annotation_level=self.annotation_level,
150            transform=self.test_transforms(),
151            target_transform=self.target_transform(),
152            download=False,
153        )
154
155        return dataset_test

Get the test dataset of task self.task_id.

Returns:

test_dataset (Dataset): the test dataset of task self.task_id.