clarena.cl_datasets.permuted_oxford_iiit_pet
The submodule in cl_datasets for Permuted Oxford-IIIT Pet dataset.
1r""" 2The submodule in `cl_datasets` for Permuted Oxford-IIIT Pet dataset. 3""" 4 5__all__ = ["PermutedOxfordIIITPet"] 6 7import logging 8from typing import Callable 9 10import torch 11from torch.utils.data import Dataset, random_split 12from torchvision.datasets import OxfordIIITPet 13from torchvision.transforms import transforms 14 15from clarena.cl_datasets import CLPermutedDataset 16from clarena.stl_datasets.raw import OxfordIIITPet2, OxfordIIITPet37 17 18# always get logger for built-in logging in each module 19pylogger = logging.getLogger(__name__) 20 21 22class PermutedOxfordIIITPet(CLPermutedDataset): 23 r"""Permuted Oxford-IIIT Pet dataset. The [Oxford-IIIT Pet dataset](https://www.robots.ox.ac.uk/~vgg/data/pets/) is a collection of cat and dog pictures. It consists of 7,349 images of 37 breeds (classes), each color image. It also provides a binary classification version with 2 classes (cat or dog). We support both versions in Permuted Oxford-IIIT Pet.""" 24 25 def __init__( 26 self, 27 root: str, 28 target_type: str, 29 num_tasks: int, 30 validation_percentage: float, 31 batch_size: int | dict[int, int] = 1, 32 num_workers: int | dict[int, int] = 0, 33 custom_transforms: ( 34 Callable 35 | transforms.Compose 36 | None 37 | dict[int, Callable | transforms.Compose | None] 38 ) = None, 39 repeat_channels: int | None | dict[int, int | None] = None, 40 to_tensor: bool | dict[int, bool] = True, 41 resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, 42 permutation_mode: str = "first_channel_only", 43 permutation_seeds: dict[int, int] | None = None, 44 ) -> None: 45 r""" 46 **Args:** 47 - **root** (`str`): the root directory where the original Oxford-IIIT Pet data 'OxfordIIITPet/' live. 48 - **target_type** (`str`): the target type; one of: 49 1. 'category': Label for one of the 37 pet categories. 50 2. 'binary-category': Binary label for cat or dog. 51 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`. 52 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 53 - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders. 54 If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks. 55 - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders. 56 If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks. 57 - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included. 58 If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied. 59 - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat. 60 If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied. 61 - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`. 62 If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks. 63 - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize. 64 If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied. 65 - **permutation_mode** (`str`): the mode of permutation; one of: 66 1. 'all': permute all pixels. 67 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 68 3. 'first_channel_only': permute only the first channel. 69 - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1. 70 """ 71 if target_type == "category": 72 self.original_dataset_python_class: type[Dataset] = OxfordIIITPet37 73 elif target_type == "binary-category": 74 self.original_dataset_python_class: type[Dataset] = OxfordIIITPet2 75 r"""The original dataset class.""" 76 77 super().__init__( 78 root=root, 79 num_tasks=num_tasks, 80 batch_size=batch_size, 81 num_workers=num_workers, 82 custom_transforms=custom_transforms, 83 repeat_channels=repeat_channels, 84 to_tensor=to_tensor, 85 resize=resize, 86 permutation_mode=permutation_mode, 87 permutation_seeds=permutation_seeds, 88 ) 89 90 self.target_type: str = target_type 91 r"""The target type. """ 92 93 self.validation_percentage: float = validation_percentage 94 r"""The percentage to randomly split some training data into validation data.""" 95 96 def prepare_data(self) -> None: 97 r"""Download the original Oxford-IIIT Pet dataset if haven't.""" 98 99 if self.task_id != 1: 100 return # download all original datasets only at the beginning of first task 101 102 OxfordIIITPet( 103 root=self.root_t, 104 split="trainval", 105 target_types=self.target_type, 106 download=True, 107 ) 108 OxfordIIITPet( 109 root=self.root_t, 110 split="test", 111 target_types=self.target_type, 112 download=True, 113 ) 114 115 pylogger.debug( 116 "The original Oxford-IIIT Pet dataset has been downloaded to %s.", 117 self.root_t, 118 ) 119 120 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 121 """Get the training and validation dataset of task `self.task_id`. 122 123 **Returns:** 124 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 125 """ 126 dataset_train_and_val = OxfordIIITPet( 127 root=self.root_t, 128 split="trainval", 129 target_types=self.target_type, 130 transform=self.train_and_val_transforms(), 131 target_transform=self.target_transform(), 132 download=False, 133 ) 134 135 return random_split( 136 dataset_train_and_val, 137 lengths=[1 - self.validation_percentage, self.validation_percentage], 138 generator=torch.Generator().manual_seed( 139 42 140 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 141 ) 142 143 def test_dataset(self) -> Dataset: 144 r"""Get the test dataset of task `self.task_id`. 145 146 **Returns:** 147 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 148 """ 149 dataset_test = OxfordIIITPet( 150 root=self.root_t, 151 split="test", 152 target_types=self.target_type, 153 transform=self.test_transforms(), 154 target_transform=self.target_transform(), 155 download=False, 156 ) 157 158 return dataset_test
class
PermutedOxfordIIITPet(clarena.cl_datasets.base.CLPermutedDataset):
23class PermutedOxfordIIITPet(CLPermutedDataset): 24 r"""Permuted Oxford-IIIT Pet dataset. The [Oxford-IIIT Pet dataset](https://www.robots.ox.ac.uk/~vgg/data/pets/) is a collection of cat and dog pictures. It consists of 7,349 images of 37 breeds (classes), each color image. It also provides a binary classification version with 2 classes (cat or dog). We support both versions in Permuted Oxford-IIIT Pet.""" 25 26 def __init__( 27 self, 28 root: str, 29 target_type: str, 30 num_tasks: int, 31 validation_percentage: float, 32 batch_size: int | dict[int, int] = 1, 33 num_workers: int | dict[int, int] = 0, 34 custom_transforms: ( 35 Callable 36 | transforms.Compose 37 | None 38 | dict[int, Callable | transforms.Compose | None] 39 ) = None, 40 repeat_channels: int | None | dict[int, int | None] = None, 41 to_tensor: bool | dict[int, bool] = True, 42 resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, 43 permutation_mode: str = "first_channel_only", 44 permutation_seeds: dict[int, int] | None = None, 45 ) -> None: 46 r""" 47 **Args:** 48 - **root** (`str`): the root directory where the original Oxford-IIIT Pet data 'OxfordIIITPet/' live. 49 - **target_type** (`str`): the target type; one of: 50 1. 'category': Label for one of the 37 pet categories. 51 2. 'binary-category': Binary label for cat or dog. 52 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`. 53 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 54 - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders. 55 If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks. 56 - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders. 57 If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks. 58 - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included. 59 If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied. 60 - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat. 61 If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied. 62 - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`. 63 If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks. 64 - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize. 65 If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied. 66 - **permutation_mode** (`str`): the mode of permutation; one of: 67 1. 'all': permute all pixels. 68 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 69 3. 'first_channel_only': permute only the first channel. 70 - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1. 71 """ 72 if target_type == "category": 73 self.original_dataset_python_class: type[Dataset] = OxfordIIITPet37 74 elif target_type == "binary-category": 75 self.original_dataset_python_class: type[Dataset] = OxfordIIITPet2 76 r"""The original dataset class.""" 77 78 super().__init__( 79 root=root, 80 num_tasks=num_tasks, 81 batch_size=batch_size, 82 num_workers=num_workers, 83 custom_transforms=custom_transforms, 84 repeat_channels=repeat_channels, 85 to_tensor=to_tensor, 86 resize=resize, 87 permutation_mode=permutation_mode, 88 permutation_seeds=permutation_seeds, 89 ) 90 91 self.target_type: str = target_type 92 r"""The target type. """ 93 94 self.validation_percentage: float = validation_percentage 95 r"""The percentage to randomly split some training data into validation data.""" 96 97 def prepare_data(self) -> None: 98 r"""Download the original Oxford-IIIT Pet dataset if haven't.""" 99 100 if self.task_id != 1: 101 return # download all original datasets only at the beginning of first task 102 103 OxfordIIITPet( 104 root=self.root_t, 105 split="trainval", 106 target_types=self.target_type, 107 download=True, 108 ) 109 OxfordIIITPet( 110 root=self.root_t, 111 split="test", 112 target_types=self.target_type, 113 download=True, 114 ) 115 116 pylogger.debug( 117 "The original Oxford-IIIT Pet dataset has been downloaded to %s.", 118 self.root_t, 119 ) 120 121 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 122 """Get the training and validation dataset of task `self.task_id`. 123 124 **Returns:** 125 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 126 """ 127 dataset_train_and_val = OxfordIIITPet( 128 root=self.root_t, 129 split="trainval", 130 target_types=self.target_type, 131 transform=self.train_and_val_transforms(), 132 target_transform=self.target_transform(), 133 download=False, 134 ) 135 136 return random_split( 137 dataset_train_and_val, 138 lengths=[1 - self.validation_percentage, self.validation_percentage], 139 generator=torch.Generator().manual_seed( 140 42 141 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 142 ) 143 144 def test_dataset(self) -> Dataset: 145 r"""Get the test dataset of task `self.task_id`. 146 147 **Returns:** 148 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 149 """ 150 dataset_test = OxfordIIITPet( 151 root=self.root_t, 152 split="test", 153 target_types=self.target_type, 154 transform=self.test_transforms(), 155 target_transform=self.target_transform(), 156 download=False, 157 ) 158 159 return dataset_test
Permuted Oxford-IIIT Pet dataset. The Oxford-IIIT Pet dataset is a collection of cat and dog pictures. It consists of 7,349 images of 37 breeds (classes), each color image. It also provides a binary classification version with 2 classes (cat or dog). We support both versions in Permuted Oxford-IIIT Pet.
PermutedOxfordIIITPet( root: str, target_type: str, num_tasks: int, validation_percentage: float, batch_size: int | dict[int, int] = 1, num_workers: int | dict[int, int] = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType, dict[int, Union[Callable, torchvision.transforms.transforms.Compose, NoneType]]] = None, repeat_channels: int | None | dict[int, int | None] = None, to_tensor: bool | dict[int, bool] = True, resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, permutation_mode: str = 'first_channel_only', permutation_seeds: dict[int, int] | None = None)
26 def __init__( 27 self, 28 root: str, 29 target_type: str, 30 num_tasks: int, 31 validation_percentage: float, 32 batch_size: int | dict[int, int] = 1, 33 num_workers: int | dict[int, int] = 0, 34 custom_transforms: ( 35 Callable 36 | transforms.Compose 37 | None 38 | dict[int, Callable | transforms.Compose | None] 39 ) = None, 40 repeat_channels: int | None | dict[int, int | None] = None, 41 to_tensor: bool | dict[int, bool] = True, 42 resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, 43 permutation_mode: str = "first_channel_only", 44 permutation_seeds: dict[int, int] | None = None, 45 ) -> None: 46 r""" 47 **Args:** 48 - **root** (`str`): the root directory where the original Oxford-IIIT Pet data 'OxfordIIITPet/' live. 49 - **target_type** (`str`): the target type; one of: 50 1. 'category': Label for one of the 37 pet categories. 51 2. 'binary-category': Binary label for cat or dog. 52 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`. 53 - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data. 54 - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders. 55 If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks. 56 - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders. 57 If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks. 58 - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included. 59 If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied. 60 - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat. 61 If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied. 62 - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`. 63 If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks. 64 - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize. 65 If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied. 66 - **permutation_mode** (`str`): the mode of permutation; one of: 67 1. 'all': permute all pixels. 68 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 69 3. 'first_channel_only': permute only the first channel. 70 - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1. 71 """ 72 if target_type == "category": 73 self.original_dataset_python_class: type[Dataset] = OxfordIIITPet37 74 elif target_type == "binary-category": 75 self.original_dataset_python_class: type[Dataset] = OxfordIIITPet2 76 r"""The original dataset class.""" 77 78 super().__init__( 79 root=root, 80 num_tasks=num_tasks, 81 batch_size=batch_size, 82 num_workers=num_workers, 83 custom_transforms=custom_transforms, 84 repeat_channels=repeat_channels, 85 to_tensor=to_tensor, 86 resize=resize, 87 permutation_mode=permutation_mode, 88 permutation_seeds=permutation_seeds, 89 ) 90 91 self.target_type: str = target_type 92 r"""The target type. """ 93 94 self.validation_percentage: float = validation_percentage 95 r"""The percentage to randomly split some training data into validation data."""
Args:
- root (
str): the root directory where the original Oxford-IIIT Pet data 'OxfordIIITPet/' live. - target_type (
str): the target type; one of:- 'category': Label for one of the 37 pet categories.
- 'binary-category': Binary label for cat or dog.
- num_tasks (
int): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 tonum_tasks. - validation_percentage (
float): the percentage to randomly split some training data into validation data. - batch_size (
int|dict[int, int]): the batch size for train, val, and test dataloaders. If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is anint, it is the same batch size for all tasks. - num_workers (
int|dict[int, int]): the number of workers for dataloaders. If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is anint, it is the same number of workers for all tasks. - custom_transforms (
transformortransforms.ComposeorNoneor dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform.ToTensor(), normalization, permute, and so on are not included. If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it isNone, no custom transforms are applied. - repeat_channels (
int|None| dict of them): the number of channels to repeat for each task. Default isNone, which means no repeat. If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is anint, it is the same number of channels to repeat for all tasks. If it isNone, no repeat is applied. - to_tensor (
bool|dict[int, bool]): whether to include theToTensor()transform. Default isTrue. If it is a dict, the keys are task IDs and the values are whether to include theToTensor()transform for each task. If it is a single boolean value, it is applied to all tasks. - resize (
tuple[int, int]|Noneor dict of them): the size to resize the images to. Default isNone, which means no resize. If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it isNone, no resize is applied. - permutation_mode (
str): the mode of permutation; one of:- 'all': permute all pixels.
- 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
- 'first_channel_only': permute only the first channel.
- permutation_seeds (
dict[int, int]|None): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default isNone, which creates a dict of seeds from 0 tonum_tasks-1.
validation_percentage: float
The percentage to randomly split some training data into validation data.
def
prepare_data(self) -> None:
97 def prepare_data(self) -> None: 98 r"""Download the original Oxford-IIIT Pet dataset if haven't.""" 99 100 if self.task_id != 1: 101 return # download all original datasets only at the beginning of first task 102 103 OxfordIIITPet( 104 root=self.root_t, 105 split="trainval", 106 target_types=self.target_type, 107 download=True, 108 ) 109 OxfordIIITPet( 110 root=self.root_t, 111 split="test", 112 target_types=self.target_type, 113 download=True, 114 ) 115 116 pylogger.debug( 117 "The original Oxford-IIIT Pet dataset has been downloaded to %s.", 118 self.root_t, 119 )
Download the original Oxford-IIIT Pet dataset if haven't.
def
train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
121 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 122 """Get the training and validation dataset of task `self.task_id`. 123 124 **Returns:** 125 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 126 """ 127 dataset_train_and_val = OxfordIIITPet( 128 root=self.root_t, 129 split="trainval", 130 target_types=self.target_type, 131 transform=self.train_and_val_transforms(), 132 target_transform=self.target_transform(), 133 download=False, 134 ) 135 136 return random_split( 137 dataset_train_and_val, 138 lengths=[1 - self.validation_percentage, self.validation_percentage], 139 generator=torch.Generator().manual_seed( 140 42 141 ), # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments 142 )
Get the training and validation dataset of task self.task_id.
Returns:
- train_and_val_dataset (
tuple[Dataset, Dataset]): the train and validation dataset of taskself.task_id.
def
test_dataset(self) -> torch.utils.data.dataset.Dataset:
144 def test_dataset(self) -> Dataset: 145 r"""Get the test dataset of task `self.task_id`. 146 147 **Returns:** 148 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 149 """ 150 dataset_test = OxfordIIITPet( 151 root=self.root_t, 152 split="test", 153 target_types=self.target_type, 154 transform=self.test_transforms(), 155 target_transform=self.target_transform(), 156 download=False, 157 ) 158 159 return dataset_test
Get the test dataset of task self.task_id.
Returns:
- test_dataset (
Dataset): the test dataset of taskself.task_id.