clarena.cl_datasets.permuted_fgvc_aircraft
The submodule in cl_datasets for Permuted FGVC-Aircraft dataset.
1r""" 2The submodule in `cl_datasets` for Permuted FGVC-Aircraft dataset. 3""" 4 5__all__ = ["PermutedFGVCAircraft"] 6 7import logging 8from typing import Callable 9 10from torch.utils.data import Dataset 11from torchvision.datasets import FGVCAircraft 12from torchvision.transforms import transforms 13 14from clarena.cl_datasets import CLPermutedDataset 15from clarena.stl_datasets.raw import ( 16 FGVCAircraftFamily, 17 FGVCAircraftManufacturer, 18 FGVCAircraftVariant, 19) 20 21# always get logger for built-in logging in each module 22pylogger = logging.getLogger(__name__) 23 24 25class PermutedFGVCAircraft(CLPermutedDataset): 26 r"""Permuted FGVC-Aircraft dataset. The [FGVC-Aircraft dataset](https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/) is a collection of aircraft images. It consists of 10,200 images, each color image. 27 28 FGVC-Aircraft has 3 different class labels by variant, family and manufacturer, which has 102, 70, 41 classes respectively. We support all of them in Permuted FGVC-Aircraft. 29 """ 30 31 def __init__( 32 self, 33 root: str, 34 annotation_level: str, 35 num_tasks: int, 36 batch_size: int | dict[int, int] = 1, 37 num_workers: int | dict[int, int] = 0, 38 custom_transforms: ( 39 Callable 40 | transforms.Compose 41 | None 42 | dict[int, Callable | transforms.Compose | None] 43 ) = None, 44 repeat_channels: int | None | dict[int, int | None] = None, 45 to_tensor: bool | dict[int, bool] = True, 46 resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, 47 permutation_mode: str = "first_channel_only", 48 permutation_seeds: dict[int, int] | None = None, 49 ) -> None: 50 r""" 51 **Args:** 52 - **root** (`str`): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live. 53 - **annotation_level** (`str`): The annotation level, supports 'variant', 'family' and 'manufacturer'. 54 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`. 55 - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders. 56 If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks. 57 - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders. 58 If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks. 59 - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included. 60 If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied. 61 - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat. 62 If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied. 63 - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`. 64 If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks. 65 - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize. 66 If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied. 67 - **permutation_mode** (`str`): the mode of permutation; one of: 68 1. 'all': permute all pixels. 69 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 70 3. 'first_channel_only': permute only the first channel. 71 - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1. 72 """ 73 74 if annotation_level == "variant": 75 self.original_dataset_python_class: type[Dataset] = FGVCAircraftVariant 76 elif annotation_level == "family": 77 self.original_dataset_python_class: type[Dataset] = FGVCAircraftFamily 78 elif annotation_level == "manufacturer": 79 self.original_dataset_python_class: type[Dataset] = FGVCAircraftManufacturer 80 r"""The original dataset class.""" 81 82 super().__init__( 83 root=root, 84 num_tasks=num_tasks, 85 batch_size=batch_size, 86 num_workers=num_workers, 87 custom_transforms=custom_transforms, 88 repeat_channels=repeat_channels, 89 to_tensor=to_tensor, 90 resize=resize, 91 permutation_mode=permutation_mode, 92 permutation_seeds=permutation_seeds, 93 ) 94 95 self.annotation_level: str = annotation_level 96 r"""The annotation level, supports 'variant', 'family' and 'manufacturer'.""" 97 98 def prepare_data(self) -> None: 99 r"""Download the original FGVC-Aircraft dataset if haven't.""" 100 101 if self.task_id != 1: 102 return # download all original datasets only at the beginning of first task 103 104 FGVCAircraft(root=self.root_t, split="train", download=True) 105 FGVCAircraft(root=self.root_t, split="val", download=True) 106 FGVCAircraft(root=self.root_t, split="test", download=True) 107 108 pylogger.debug( 109 "The original FGVC-Aircraft dataset has been downloaded to %s.", 110 self.root_t, 111 ) 112 113 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 114 """Get the training and validation dataset of task `self.task_id`. 115 116 **Returns:** 117 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 118 """ 119 dataset_train = FGVCAircraft( 120 root=self.root_t, 121 split="train", 122 annotation_level=self.annotation_level, 123 transform=self.train_and_val_transforms(), 124 target_transform=self.target_transform(), 125 download=False, 126 ) 127 128 dataset_val = FGVCAircraft( 129 root=self.root_t, 130 split="val", 131 annotation_level=self.annotation_level, 132 transform=self.train_and_val_transforms(), 133 target_transform=self.target_transform(), 134 download=False, 135 ) 136 137 return dataset_train, dataset_val 138 139 def test_dataset(self) -> Dataset: 140 r"""Get the test dataset of task `self.task_id`. 141 142 **Returns:** 143 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 144 """ 145 dataset_test = FGVCAircraft( 146 root=self.root_t, 147 split="test", 148 annotation_level=self.annotation_level, 149 transform=self.test_transforms(), 150 target_transform=self.target_transform(), 151 download=False, 152 ) 153 154 return dataset_test
class
PermutedFGVCAircraft(clarena.cl_datasets.base.CLPermutedDataset):
26class PermutedFGVCAircraft(CLPermutedDataset): 27 r"""Permuted FGVC-Aircraft dataset. The [FGVC-Aircraft dataset](https://www.robots.ox.ac.uk/~vgg/data/fgvc-aircraft/) is a collection of aircraft images. It consists of 10,200 images, each color image. 28 29 FGVC-Aircraft has 3 different class labels by variant, family and manufacturer, which has 102, 70, 41 classes respectively. We support all of them in Permuted FGVC-Aircraft. 30 """ 31 32 def __init__( 33 self, 34 root: str, 35 annotation_level: str, 36 num_tasks: int, 37 batch_size: int | dict[int, int] = 1, 38 num_workers: int | dict[int, int] = 0, 39 custom_transforms: ( 40 Callable 41 | transforms.Compose 42 | None 43 | dict[int, Callable | transforms.Compose | None] 44 ) = None, 45 repeat_channels: int | None | dict[int, int | None] = None, 46 to_tensor: bool | dict[int, bool] = True, 47 resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, 48 permutation_mode: str = "first_channel_only", 49 permutation_seeds: dict[int, int] | None = None, 50 ) -> None: 51 r""" 52 **Args:** 53 - **root** (`str`): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live. 54 - **annotation_level** (`str`): The annotation level, supports 'variant', 'family' and 'manufacturer'. 55 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`. 56 - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders. 57 If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks. 58 - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders. 59 If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks. 60 - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included. 61 If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied. 62 - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat. 63 If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied. 64 - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`. 65 If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks. 66 - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize. 67 If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied. 68 - **permutation_mode** (`str`): the mode of permutation; one of: 69 1. 'all': permute all pixels. 70 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 71 3. 'first_channel_only': permute only the first channel. 72 - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1. 73 """ 74 75 if annotation_level == "variant": 76 self.original_dataset_python_class: type[Dataset] = FGVCAircraftVariant 77 elif annotation_level == "family": 78 self.original_dataset_python_class: type[Dataset] = FGVCAircraftFamily 79 elif annotation_level == "manufacturer": 80 self.original_dataset_python_class: type[Dataset] = FGVCAircraftManufacturer 81 r"""The original dataset class.""" 82 83 super().__init__( 84 root=root, 85 num_tasks=num_tasks, 86 batch_size=batch_size, 87 num_workers=num_workers, 88 custom_transforms=custom_transforms, 89 repeat_channels=repeat_channels, 90 to_tensor=to_tensor, 91 resize=resize, 92 permutation_mode=permutation_mode, 93 permutation_seeds=permutation_seeds, 94 ) 95 96 self.annotation_level: str = annotation_level 97 r"""The annotation level, supports 'variant', 'family' and 'manufacturer'.""" 98 99 def prepare_data(self) -> None: 100 r"""Download the original FGVC-Aircraft dataset if haven't.""" 101 102 if self.task_id != 1: 103 return # download all original datasets only at the beginning of first task 104 105 FGVCAircraft(root=self.root_t, split="train", download=True) 106 FGVCAircraft(root=self.root_t, split="val", download=True) 107 FGVCAircraft(root=self.root_t, split="test", download=True) 108 109 pylogger.debug( 110 "The original FGVC-Aircraft dataset has been downloaded to %s.", 111 self.root_t, 112 ) 113 114 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 115 """Get the training and validation dataset of task `self.task_id`. 116 117 **Returns:** 118 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 119 """ 120 dataset_train = FGVCAircraft( 121 root=self.root_t, 122 split="train", 123 annotation_level=self.annotation_level, 124 transform=self.train_and_val_transforms(), 125 target_transform=self.target_transform(), 126 download=False, 127 ) 128 129 dataset_val = FGVCAircraft( 130 root=self.root_t, 131 split="val", 132 annotation_level=self.annotation_level, 133 transform=self.train_and_val_transforms(), 134 target_transform=self.target_transform(), 135 download=False, 136 ) 137 138 return dataset_train, dataset_val 139 140 def test_dataset(self) -> Dataset: 141 r"""Get the test dataset of task `self.task_id`. 142 143 **Returns:** 144 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 145 """ 146 dataset_test = FGVCAircraft( 147 root=self.root_t, 148 split="test", 149 annotation_level=self.annotation_level, 150 transform=self.test_transforms(), 151 target_transform=self.target_transform(), 152 download=False, 153 ) 154 155 return dataset_test
Permuted FGVC-Aircraft dataset. The FGVC-Aircraft dataset is a collection of aircraft images. It consists of 10,200 images, each color image.
FGVC-Aircraft has 3 different class labels by variant, family and manufacturer, which has 102, 70, 41 classes respectively. We support all of them in Permuted FGVC-Aircraft.
PermutedFGVCAircraft( root: str, annotation_level: str, num_tasks: int, batch_size: int | dict[int, int] = 1, num_workers: int | dict[int, int] = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType, dict[int, Union[Callable, torchvision.transforms.transforms.Compose, NoneType]]] = None, repeat_channels: int | None | dict[int, int | None] = None, to_tensor: bool | dict[int, bool] = True, resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, permutation_mode: str = 'first_channel_only', permutation_seeds: dict[int, int] | None = None)
32 def __init__( 33 self, 34 root: str, 35 annotation_level: str, 36 num_tasks: int, 37 batch_size: int | dict[int, int] = 1, 38 num_workers: int | dict[int, int] = 0, 39 custom_transforms: ( 40 Callable 41 | transforms.Compose 42 | None 43 | dict[int, Callable | transforms.Compose | None] 44 ) = None, 45 repeat_channels: int | None | dict[int, int | None] = None, 46 to_tensor: bool | dict[int, bool] = True, 47 resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, 48 permutation_mode: str = "first_channel_only", 49 permutation_seeds: dict[int, int] | None = None, 50 ) -> None: 51 r""" 52 **Args:** 53 - **root** (`str`): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live. 54 - **annotation_level** (`str`): The annotation level, supports 'variant', 'family' and 'manufacturer'. 55 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`. 56 - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders. 57 If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks. 58 - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders. 59 If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks. 60 - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included. 61 If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied. 62 - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat. 63 If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied. 64 - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`. 65 If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks. 66 - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize. 67 If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied. 68 - **permutation_mode** (`str`): the mode of permutation; one of: 69 1. 'all': permute all pixels. 70 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 71 3. 'first_channel_only': permute only the first channel. 72 - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1. 73 """ 74 75 if annotation_level == "variant": 76 self.original_dataset_python_class: type[Dataset] = FGVCAircraftVariant 77 elif annotation_level == "family": 78 self.original_dataset_python_class: type[Dataset] = FGVCAircraftFamily 79 elif annotation_level == "manufacturer": 80 self.original_dataset_python_class: type[Dataset] = FGVCAircraftManufacturer 81 r"""The original dataset class.""" 82 83 super().__init__( 84 root=root, 85 num_tasks=num_tasks, 86 batch_size=batch_size, 87 num_workers=num_workers, 88 custom_transforms=custom_transforms, 89 repeat_channels=repeat_channels, 90 to_tensor=to_tensor, 91 resize=resize, 92 permutation_mode=permutation_mode, 93 permutation_seeds=permutation_seeds, 94 ) 95 96 self.annotation_level: str = annotation_level 97 r"""The annotation level, supports 'variant', 'family' and 'manufacturer'."""
Args:
- root (
str): the root directory where the original FGVCAircraft data 'FGVCAircraft/' live. - annotation_level (
str): The annotation level, supports 'variant', 'family' and 'manufacturer'. - num_tasks (
int): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 tonum_tasks. - batch_size (
int|dict[int, int]): the batch size for train, val, and test dataloaders. If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is anint, it is the same batch size for all tasks. - num_workers (
int|dict[int, int]): the number of workers for dataloaders. If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is anint, it is the same number of workers for all tasks. - custom_transforms (
transformortransforms.ComposeorNoneor dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform.ToTensor(), normalization, permute, and so on are not included. If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it isNone, no custom transforms are applied. - repeat_channels (
int|None| dict of them): the number of channels to repeat for each task. Default isNone, which means no repeat. If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is anint, it is the same number of channels to repeat for all tasks. If it isNone, no repeat is applied. - to_tensor (
bool|dict[int, bool]): whether to include theToTensor()transform. Default isTrue. If it is a dict, the keys are task IDs and the values are whether to include theToTensor()transform for each task. If it is a single boolean value, it is applied to all tasks. - resize (
tuple[int, int]|Noneor dict of them): the size to resize the images to. Default isNone, which means no resize. If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it isNone, no resize is applied. - permutation_mode (
str): the mode of permutation; one of:- 'all': permute all pixels.
- 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
- 'first_channel_only': permute only the first channel.
- permutation_seeds (
dict[int, int]|None): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default isNone, which creates a dict of seeds from 0 tonum_tasks-1.
def
prepare_data(self) -> None:
99 def prepare_data(self) -> None: 100 r"""Download the original FGVC-Aircraft dataset if haven't.""" 101 102 if self.task_id != 1: 103 return # download all original datasets only at the beginning of first task 104 105 FGVCAircraft(root=self.root_t, split="train", download=True) 106 FGVCAircraft(root=self.root_t, split="val", download=True) 107 FGVCAircraft(root=self.root_t, split="test", download=True) 108 109 pylogger.debug( 110 "The original FGVC-Aircraft dataset has been downloaded to %s.", 111 self.root_t, 112 )
Download the original FGVC-Aircraft dataset if haven't.
def
train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
114 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 115 """Get the training and validation dataset of task `self.task_id`. 116 117 **Returns:** 118 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 119 """ 120 dataset_train = FGVCAircraft( 121 root=self.root_t, 122 split="train", 123 annotation_level=self.annotation_level, 124 transform=self.train_and_val_transforms(), 125 target_transform=self.target_transform(), 126 download=False, 127 ) 128 129 dataset_val = FGVCAircraft( 130 root=self.root_t, 131 split="val", 132 annotation_level=self.annotation_level, 133 transform=self.train_and_val_transforms(), 134 target_transform=self.target_transform(), 135 download=False, 136 ) 137 138 return dataset_train, dataset_val
Get the training and validation dataset of task self.task_id.
Returns:
- train_and_val_dataset (
tuple[Dataset, Dataset]): the train and validation dataset of taskself.task_id.
def
test_dataset(self) -> torch.utils.data.dataset.Dataset:
140 def test_dataset(self) -> Dataset: 141 r"""Get the test dataset of task `self.task_id`. 142 143 **Returns:** 144 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 145 """ 146 dataset_test = FGVCAircraft( 147 root=self.root_t, 148 split="test", 149 annotation_level=self.annotation_level, 150 transform=self.test_transforms(), 151 target_transform=self.target_transform(), 152 download=False, 153 ) 154 155 return dataset_test
Get the test dataset of task self.task_id.
Returns:
- test_dataset (
Dataset): the test dataset of taskself.task_id.