clarena.cl_datasets.split_cifar100
The submodule in cl_datasets
for Split CIFAR-100 dataset.
1r""" 2The submodule in `cl_datasets` for Split CIFAR-100 dataset. 3""" 4 5__all__ = ["SplitCIFAR100"] 6 7import logging 8from typing import Callable 9 10from torch.utils.data import Dataset, random_split 11from torchvision.datasets import CIFAR100 12from torchvision.transforms import transforms 13 14from clarena.cl_datasets import CLSplitDataset 15 16# always get logger for built-in logging in each module 17pylogger = logging.getLogger(__name__) 18 19 20class SplitCIFAR100(CLSplitDataset): 21 r"""Split CIFAR-100 dataset. The [original CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html) is a subset of the 80 million tiny images dataset. It consists of 60,000 32x32 colour images in 100 classes, with 600 images per class. There are 50,000 training examples and 10,000 test examples.""" 22 23 num_classes: int = 100 24 r"""The number of classes in CIFAR-100 dataset.""" 25 26 mean_original: tuple[float] = (0.5074, 0.4867, 0.4411) 27 r"""The mean values for normalisation.""" 28 29 std_original: tuple[float] = (0.2011, 0.1987, 0.2025) 30 r"""The standard deviation values for normalisation.""" 31 32 def __init__( 33 self, 34 root: str, 35 num_tasks: int, 36 class_split: list[list[int]], 37 validation_percentage: float, 38 batch_size: int = 1, 39 num_workers: int = 8, 40 custom_transforms: Callable | transforms.Compose | None = None, 41 custom_target_transforms: Callable | transforms.Compose | None = None, 42 ) -> None: 43 r"""Initialise the Split CIFAR-100 dataset. 44 45 **Args:** 46 - **root** (`str`): the root directory where the original CIFAR-100 data 'cifar-100-python/' live. 47 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. 48 - **class_split** (`list[list[int]]`): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task. 49 - **validation_percentage** (`float`): the percentage to randomly split some of the training data into validation data. 50 - **batch_size** (`int`): The batch size in train, val, test dataloader. 51 - **num_workers** (`int`): the number of workers for dataloaders. 52 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. 53 `ToTensor()`, normalise, permute and so on are not included. 54 - **custom_target_transforms** (`transform` or `transforms.Compose` or `None`): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included. 55 - **permutation_mode** (`str`): the mode of permutation, should be one of the following: 56 1. 'all': permute all pixels. 57 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 58 3. 'first_channel_only': permute only the first channel. 59 - **permutation_seeds** (`list[int]` or `None`): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds as `num_tasks`. Default is None, which creates a list of seeds from 1 to `num_tasks`. 60 """ 61 CLSplitDataset.__init__( 62 self, 63 root=root, 64 num_tasks=num_tasks, 65 class_split=class_split, 66 validation_percentage=validation_percentage, 67 batch_size=batch_size, 68 num_workers=num_workers, 69 custom_transforms=custom_transforms, 70 custom_target_transforms=custom_target_transforms, 71 ) 72 73 def prepare_data(self) -> None: 74 r"""Download the original CIFAR-100 dataset if haven't.""" 75 # just download 76 CIFAR100(root=self.root, train=True, download=True) 77 CIFAR100(root=self.root, train=False, download=True) 78 79 pylogger.debug( 80 "The original CIFAR-100 dataset has been downloaded to %s.", self.root 81 ) 82 83 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 84 r"""Get the training and validation dataset of task `self.task_id`. 85 86 **Returns:** 87 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 88 """ 89 dataset_train_and_val = self.get_class_subset( 90 CIFAR100( 91 root=self.root, 92 train=True, 93 transform=self.train_and_val_transforms(to_tensor=True), 94 download=False, 95 ) 96 ) 97 dataset_train_and_val.target_transform = self.target_transforms() 98 99 return random_split( 100 dataset_train_and_val, 101 lengths=[1 - self.validation_percentage, self.validation_percentage], 102 ) 103 104 def test_dataset(self) -> Dataset: 105 r"""Get the test dataset of task `self.task_id`. 106 107 **Returns:** 108 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 109 """ 110 dataset_test = self.get_class_subset( 111 CIFAR100( 112 root=self.root, 113 train=False, 114 transform=self.test_transforms(to_tensor=True), 115 download=False, 116 ) 117 ) 118 dataset_test.target_transform = self.target_transforms() 119 120 return dataset_test
class
SplitCIFAR100(clarena.cl_datasets.base.CLSplitDataset):
21class SplitCIFAR100(CLSplitDataset): 22 r"""Split CIFAR-100 dataset. The [original CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html) is a subset of the 80 million tiny images dataset. It consists of 60,000 32x32 colour images in 100 classes, with 600 images per class. There are 50,000 training examples and 10,000 test examples.""" 23 24 num_classes: int = 100 25 r"""The number of classes in CIFAR-100 dataset.""" 26 27 mean_original: tuple[float] = (0.5074, 0.4867, 0.4411) 28 r"""The mean values for normalisation.""" 29 30 std_original: tuple[float] = (0.2011, 0.1987, 0.2025) 31 r"""The standard deviation values for normalisation.""" 32 33 def __init__( 34 self, 35 root: str, 36 num_tasks: int, 37 class_split: list[list[int]], 38 validation_percentage: float, 39 batch_size: int = 1, 40 num_workers: int = 8, 41 custom_transforms: Callable | transforms.Compose | None = None, 42 custom_target_transforms: Callable | transforms.Compose | None = None, 43 ) -> None: 44 r"""Initialise the Split CIFAR-100 dataset. 45 46 **Args:** 47 - **root** (`str`): the root directory where the original CIFAR-100 data 'cifar-100-python/' live. 48 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. 49 - **class_split** (`list[list[int]]`): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task. 50 - **validation_percentage** (`float`): the percentage to randomly split some of the training data into validation data. 51 - **batch_size** (`int`): The batch size in train, val, test dataloader. 52 - **num_workers** (`int`): the number of workers for dataloaders. 53 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. 54 `ToTensor()`, normalise, permute and so on are not included. 55 - **custom_target_transforms** (`transform` or `transforms.Compose` or `None`): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included. 56 - **permutation_mode** (`str`): the mode of permutation, should be one of the following: 57 1. 'all': permute all pixels. 58 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 59 3. 'first_channel_only': permute only the first channel. 60 - **permutation_seeds** (`list[int]` or `None`): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds as `num_tasks`. Default is None, which creates a list of seeds from 1 to `num_tasks`. 61 """ 62 CLSplitDataset.__init__( 63 self, 64 root=root, 65 num_tasks=num_tasks, 66 class_split=class_split, 67 validation_percentage=validation_percentage, 68 batch_size=batch_size, 69 num_workers=num_workers, 70 custom_transforms=custom_transforms, 71 custom_target_transforms=custom_target_transforms, 72 ) 73 74 def prepare_data(self) -> None: 75 r"""Download the original CIFAR-100 dataset if haven't.""" 76 # just download 77 CIFAR100(root=self.root, train=True, download=True) 78 CIFAR100(root=self.root, train=False, download=True) 79 80 pylogger.debug( 81 "The original CIFAR-100 dataset has been downloaded to %s.", self.root 82 ) 83 84 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 85 r"""Get the training and validation dataset of task `self.task_id`. 86 87 **Returns:** 88 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 89 """ 90 dataset_train_and_val = self.get_class_subset( 91 CIFAR100( 92 root=self.root, 93 train=True, 94 transform=self.train_and_val_transforms(to_tensor=True), 95 download=False, 96 ) 97 ) 98 dataset_train_and_val.target_transform = self.target_transforms() 99 100 return random_split( 101 dataset_train_and_val, 102 lengths=[1 - self.validation_percentage, self.validation_percentage], 103 ) 104 105 def test_dataset(self) -> Dataset: 106 r"""Get the test dataset of task `self.task_id`. 107 108 **Returns:** 109 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 110 """ 111 dataset_test = self.get_class_subset( 112 CIFAR100( 113 root=self.root, 114 train=False, 115 transform=self.test_transforms(to_tensor=True), 116 download=False, 117 ) 118 ) 119 dataset_test.target_transform = self.target_transforms() 120 121 return dataset_test
Split CIFAR-100 dataset. The original CIFAR100 dataset is a subset of the 80 million tiny images dataset. It consists of 60,000 32x32 colour images in 100 classes, with 600 images per class. There are 50,000 training examples and 10,000 test examples.
SplitCIFAR100( root: str, num_tasks: int, class_split: list[list[int]], validation_percentage: float, batch_size: int = 1, num_workers: int = 8, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType] = None, custom_target_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType] = None)
33 def __init__( 34 self, 35 root: str, 36 num_tasks: int, 37 class_split: list[list[int]], 38 validation_percentage: float, 39 batch_size: int = 1, 40 num_workers: int = 8, 41 custom_transforms: Callable | transforms.Compose | None = None, 42 custom_target_transforms: Callable | transforms.Compose | None = None, 43 ) -> None: 44 r"""Initialise the Split CIFAR-100 dataset. 45 46 **Args:** 47 - **root** (`str`): the root directory where the original CIFAR-100 data 'cifar-100-python/' live. 48 - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. 49 - **class_split** (`list[list[int]]`): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task. 50 - **validation_percentage** (`float`): the percentage to randomly split some of the training data into validation data. 51 - **batch_size** (`int`): The batch size in train, val, test dataloader. 52 - **num_workers** (`int`): the number of workers for dataloaders. 53 - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. 54 `ToTensor()`, normalise, permute and so on are not included. 55 - **custom_target_transforms** (`transform` or `transforms.Compose` or `None`): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included. 56 - **permutation_mode** (`str`): the mode of permutation, should be one of the following: 57 1. 'all': permute all pixels. 58 2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order. 59 3. 'first_channel_only': permute only the first channel. 60 - **permutation_seeds** (`list[int]` or `None`): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds as `num_tasks`. Default is None, which creates a list of seeds from 1 to `num_tasks`. 61 """ 62 CLSplitDataset.__init__( 63 self, 64 root=root, 65 num_tasks=num_tasks, 66 class_split=class_split, 67 validation_percentage=validation_percentage, 68 batch_size=batch_size, 69 num_workers=num_workers, 70 custom_transforms=custom_transforms, 71 custom_target_transforms=custom_target_transforms, 72 )
Initialise the Split CIFAR-100 dataset.
Args:
- root (
str
): the root directory where the original CIFAR-100 data 'cifar-100-python/' live. - num_tasks (
int
): the maximum number of tasks supported by the CL dataset. - class_split (
list[list[int]]
): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task. - validation_percentage (
float
): the percentage to randomly split some of the training data into validation data. - batch_size (
int
): The batch size in train, val, test dataloader. - num_workers (
int
): the number of workers for dataloaders. - custom_transforms (
transform
ortransforms.Compose
orNone
): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform.ToTensor()
, normalise, permute and so on are not included. - custom_target_transforms (
transform
ortransforms.Compose
orNone
): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included. - permutation_mode (
str
): the mode of permutation, should be one of the following:- 'all': permute all pixels.
- 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
- 'first_channel_only': permute only the first channel.
- permutation_seeds (
list[int]
orNone
): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds asnum_tasks
. Default is None, which creates a list of seeds from 1 tonum_tasks
.
std_original: tuple[float] =
(0.2011, 0.1987, 0.2025)
The standard deviation values for normalisation.
def
prepare_data(self) -> None:
74 def prepare_data(self) -> None: 75 r"""Download the original CIFAR-100 dataset if haven't.""" 76 # just download 77 CIFAR100(root=self.root, train=True, download=True) 78 CIFAR100(root=self.root, train=False, download=True) 79 80 pylogger.debug( 81 "The original CIFAR-100 dataset has been downloaded to %s.", self.root 82 )
Download the original CIFAR-100 dataset if haven't.
def
train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
84 def train_and_val_dataset(self) -> tuple[Dataset, Dataset]: 85 r"""Get the training and validation dataset of task `self.task_id`. 86 87 **Returns:** 88 - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`. 89 """ 90 dataset_train_and_val = self.get_class_subset( 91 CIFAR100( 92 root=self.root, 93 train=True, 94 transform=self.train_and_val_transforms(to_tensor=True), 95 download=False, 96 ) 97 ) 98 dataset_train_and_val.target_transform = self.target_transforms() 99 100 return random_split( 101 dataset_train_and_val, 102 lengths=[1 - self.validation_percentage, self.validation_percentage], 103 )
Get the training and validation dataset of task self.task_id
.
Returns:
- train_and_val_dataset (
tuple[Dataset, Dataset]
): the train and validation dataset of taskself.task_id
.
def
test_dataset(self) -> torch.utils.data.dataset.Dataset:
105 def test_dataset(self) -> Dataset: 106 r"""Get the test dataset of task `self.task_id`. 107 108 **Returns:** 109 - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`. 110 """ 111 dataset_test = self.get_class_subset( 112 CIFAR100( 113 root=self.root, 114 train=False, 115 transform=self.test_transforms(to_tensor=True), 116 download=False, 117 ) 118 ) 119 dataset_test.target_transform = self.target_transforms() 120 121 return dataset_test
Get the test dataset of task self.task_id
.
Returns:
- test_dataset (
Dataset
): the test dataset of taskself.task_id
.