clarena.cl_datasets.split_cifar100

The submodule in cl_datasets for Split CIFAR-100 dataset.

  1r"""
  2The submodule in `cl_datasets` for Split CIFAR-100 dataset.
  3"""
  4
  5__all__ = ["SplitCIFAR100"]
  6
  7import logging
  8from typing import Callable
  9
 10from torch.utils.data import Dataset, random_split
 11from torchvision.datasets import CIFAR100
 12from torchvision.transforms import transforms
 13
 14from clarena.cl_datasets import CLSplitDataset
 15
 16# always get logger for built-in logging in each module
 17pylogger = logging.getLogger(__name__)
 18
 19
 20class SplitCIFAR100(CLSplitDataset):
 21    r"""Split CIFAR-100 dataset. The [original CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html) is a subset of the 80 million tiny images dataset. It consists of 60,000 32x32 colour images in 100 classes, with 600 images per class. There are 50,000 training examples and 10,000 test examples."""
 22
 23    num_classes: int = 100
 24    r"""The number of classes in CIFAR-100 dataset."""
 25
 26    mean_original: tuple[float] = (0.5074, 0.4867, 0.4411)
 27    r"""The mean values for normalisation."""
 28
 29    std_original: tuple[float] = (0.2011, 0.1987, 0.2025)
 30    r"""The standard deviation values for normalisation."""
 31
 32    def __init__(
 33        self,
 34        root: str,
 35        num_tasks: int,
 36        class_split: list[list[int]],
 37        validation_percentage: float,
 38        batch_size: int = 1,
 39        num_workers: int = 8,
 40        custom_transforms: Callable | transforms.Compose | None = None,
 41        custom_target_transforms: Callable | transforms.Compose | None = None,
 42    ) -> None:
 43        r"""Initialise the Split CIFAR-100 dataset.
 44
 45        **Args:**
 46        - **root** (`str`): the root directory where the original CIFAR-100 data 'cifar-100-python/' live.
 47        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset.
 48        - **class_split** (`list[list[int]]`): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task.
 49        - **validation_percentage** (`float`): the percentage to randomly split some of the training data into validation data.
 50        - **batch_size** (`int`): The batch size in train, val, test dataloader.
 51        - **num_workers** (`int`): the number of workers for dataloaders.
 52        - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform.
 53        `ToTensor()`, normalise, permute and so on are not included.
 54        - **custom_target_transforms** (`transform` or `transforms.Compose` or `None`): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included.
 55        - **permutation_mode** (`str`): the mode of permutation, should be one of the following:
 56            1. 'all': permute all pixels.
 57            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 58            3. 'first_channel_only': permute only the first channel.
 59        - **permutation_seeds** (`list[int]` or `None`): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds as `num_tasks`. Default is None, which creates a list of seeds from 1 to `num_tasks`.
 60        """
 61        CLSplitDataset.__init__(
 62            self,
 63            root=root,
 64            num_tasks=num_tasks,
 65            class_split=class_split,
 66            validation_percentage=validation_percentage,
 67            batch_size=batch_size,
 68            num_workers=num_workers,
 69            custom_transforms=custom_transforms,
 70            custom_target_transforms=custom_target_transforms,
 71        )
 72
 73    def prepare_data(self) -> None:
 74        r"""Download the original CIFAR-100 dataset if haven't."""
 75        # just download
 76        CIFAR100(root=self.root, train=True, download=True)
 77        CIFAR100(root=self.root, train=False, download=True)
 78
 79        pylogger.debug(
 80            "The original CIFAR-100 dataset has been downloaded to %s.", self.root
 81        )
 82
 83    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
 84        r"""Get the training and validation dataset of task `self.task_id`.
 85
 86        **Returns:**
 87        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
 88        """
 89        dataset_train_and_val = self.get_class_subset(
 90            CIFAR100(
 91                root=self.root,
 92                train=True,
 93                transform=self.train_and_val_transforms(to_tensor=True),
 94                download=False,
 95            )
 96        )
 97        dataset_train_and_val.target_transform = self.target_transforms()
 98
 99        return random_split(
100            dataset_train_and_val,
101            lengths=[1 - self.validation_percentage, self.validation_percentage],
102        )
103
104    def test_dataset(self) -> Dataset:
105        r"""Get the test dataset of task `self.task_id`.
106
107        **Returns:**
108        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
109        """
110        dataset_test = self.get_class_subset(
111            CIFAR100(
112                root=self.root,
113                train=False,
114                transform=self.test_transforms(to_tensor=True),
115                download=False,
116            )
117        )
118        dataset_test.target_transform = self.target_transforms()
119
120        return dataset_test
class SplitCIFAR100(clarena.cl_datasets.base.CLSplitDataset):
 21class SplitCIFAR100(CLSplitDataset):
 22    r"""Split CIFAR-100 dataset. The [original CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html) is a subset of the 80 million tiny images dataset. It consists of 60,000 32x32 colour images in 100 classes, with 600 images per class. There are 50,000 training examples and 10,000 test examples."""
 23
 24    num_classes: int = 100
 25    r"""The number of classes in CIFAR-100 dataset."""
 26
 27    mean_original: tuple[float] = (0.5074, 0.4867, 0.4411)
 28    r"""The mean values for normalisation."""
 29
 30    std_original: tuple[float] = (0.2011, 0.1987, 0.2025)
 31    r"""The standard deviation values for normalisation."""
 32
 33    def __init__(
 34        self,
 35        root: str,
 36        num_tasks: int,
 37        class_split: list[list[int]],
 38        validation_percentage: float,
 39        batch_size: int = 1,
 40        num_workers: int = 8,
 41        custom_transforms: Callable | transforms.Compose | None = None,
 42        custom_target_transforms: Callable | transforms.Compose | None = None,
 43    ) -> None:
 44        r"""Initialise the Split CIFAR-100 dataset.
 45
 46        **Args:**
 47        - **root** (`str`): the root directory where the original CIFAR-100 data 'cifar-100-python/' live.
 48        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset.
 49        - **class_split** (`list[list[int]]`): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task.
 50        - **validation_percentage** (`float`): the percentage to randomly split some of the training data into validation data.
 51        - **batch_size** (`int`): The batch size in train, val, test dataloader.
 52        - **num_workers** (`int`): the number of workers for dataloaders.
 53        - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform.
 54        `ToTensor()`, normalise, permute and so on are not included.
 55        - **custom_target_transforms** (`transform` or `transforms.Compose` or `None`): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included.
 56        - **permutation_mode** (`str`): the mode of permutation, should be one of the following:
 57            1. 'all': permute all pixels.
 58            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 59            3. 'first_channel_only': permute only the first channel.
 60        - **permutation_seeds** (`list[int]` or `None`): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds as `num_tasks`. Default is None, which creates a list of seeds from 1 to `num_tasks`.
 61        """
 62        CLSplitDataset.__init__(
 63            self,
 64            root=root,
 65            num_tasks=num_tasks,
 66            class_split=class_split,
 67            validation_percentage=validation_percentage,
 68            batch_size=batch_size,
 69            num_workers=num_workers,
 70            custom_transforms=custom_transforms,
 71            custom_target_transforms=custom_target_transforms,
 72        )
 73
 74    def prepare_data(self) -> None:
 75        r"""Download the original CIFAR-100 dataset if haven't."""
 76        # just download
 77        CIFAR100(root=self.root, train=True, download=True)
 78        CIFAR100(root=self.root, train=False, download=True)
 79
 80        pylogger.debug(
 81            "The original CIFAR-100 dataset has been downloaded to %s.", self.root
 82        )
 83
 84    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
 85        r"""Get the training and validation dataset of task `self.task_id`.
 86
 87        **Returns:**
 88        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
 89        """
 90        dataset_train_and_val = self.get_class_subset(
 91            CIFAR100(
 92                root=self.root,
 93                train=True,
 94                transform=self.train_and_val_transforms(to_tensor=True),
 95                download=False,
 96            )
 97        )
 98        dataset_train_and_val.target_transform = self.target_transforms()
 99
100        return random_split(
101            dataset_train_and_val,
102            lengths=[1 - self.validation_percentage, self.validation_percentage],
103        )
104
105    def test_dataset(self) -> Dataset:
106        r"""Get the test dataset of task `self.task_id`.
107
108        **Returns:**
109        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
110        """
111        dataset_test = self.get_class_subset(
112            CIFAR100(
113                root=self.root,
114                train=False,
115                transform=self.test_transforms(to_tensor=True),
116                download=False,
117            )
118        )
119        dataset_test.target_transform = self.target_transforms()
120
121        return dataset_test

Split CIFAR-100 dataset. The original CIFAR100 dataset is a subset of the 80 million tiny images dataset. It consists of 60,000 32x32 colour images in 100 classes, with 600 images per class. There are 50,000 training examples and 10,000 test examples.

SplitCIFAR100( root: str, num_tasks: int, class_split: list[list[int]], validation_percentage: float, batch_size: int = 1, num_workers: int = 8, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType] = None, custom_target_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType] = None)
33    def __init__(
34        self,
35        root: str,
36        num_tasks: int,
37        class_split: list[list[int]],
38        validation_percentage: float,
39        batch_size: int = 1,
40        num_workers: int = 8,
41        custom_transforms: Callable | transforms.Compose | None = None,
42        custom_target_transforms: Callable | transforms.Compose | None = None,
43    ) -> None:
44        r"""Initialise the Split CIFAR-100 dataset.
45
46        **Args:**
47        - **root** (`str`): the root directory where the original CIFAR-100 data 'cifar-100-python/' live.
48        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset.
49        - **class_split** (`list[list[int]]`): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task.
50        - **validation_percentage** (`float`): the percentage to randomly split some of the training data into validation data.
51        - **batch_size** (`int`): The batch size in train, val, test dataloader.
52        - **num_workers** (`int`): the number of workers for dataloaders.
53        - **custom_transforms** (`transform` or `transforms.Compose` or `None`): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform.
54        `ToTensor()`, normalise, permute and so on are not included.
55        - **custom_target_transforms** (`transform` or `transforms.Compose` or `None`): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included.
56        - **permutation_mode** (`str`): the mode of permutation, should be one of the following:
57            1. 'all': permute all pixels.
58            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
59            3. 'first_channel_only': permute only the first channel.
60        - **permutation_seeds** (`list[int]` or `None`): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds as `num_tasks`. Default is None, which creates a list of seeds from 1 to `num_tasks`.
61        """
62        CLSplitDataset.__init__(
63            self,
64            root=root,
65            num_tasks=num_tasks,
66            class_split=class_split,
67            validation_percentage=validation_percentage,
68            batch_size=batch_size,
69            num_workers=num_workers,
70            custom_transforms=custom_transforms,
71            custom_target_transforms=custom_target_transforms,
72        )

Initialise the Split CIFAR-100 dataset.

Args:

  • root (str): the root directory where the original CIFAR-100 data 'cifar-100-python/' live.
  • num_tasks (int): the maximum number of tasks supported by the CL dataset.
  • class_split (list[list[int]]): the class split for each task. Each element in the list is a list of class labels (integers starting from 0) to split for a task.
  • validation_percentage (float): the percentage to randomly split some of the training data into validation data.
  • batch_size (int): The batch size in train, val, test dataloader.
  • num_workers (int): the number of workers for dataloaders.
  • custom_transforms (transform or transforms.Compose or None): the custom transforms to apply to ONLY TRAIN dataset. Can be a single transform, composed transforms or no transform. ToTensor(), normalise, permute and so on are not included.
  • custom_target_transforms (transform or transforms.Compose or None): the custom target transforms to apply to dataset labels. Can be a single transform, composed transforms or no transform. CL class mapping is not included.
  • permutation_mode (str): the mode of permutation, should be one of the following:
    1. 'all': permute all pixels.
    2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
    3. 'first_channel_only': permute only the first channel.
  • permutation_seeds (list[int] or None): the seeds for permutation operations used to construct tasks. Make sure it has the same number of seeds as num_tasks. Default is None, which creates a list of seeds from 1 to num_tasks.
num_classes: int = 100

The number of classes in CIFAR-100 dataset.

mean_original: tuple[float] = (0.5074, 0.4867, 0.4411)

The mean values for normalisation.

std_original: tuple[float] = (0.2011, 0.1987, 0.2025)

The standard deviation values for normalisation.

def prepare_data(self) -> None:
74    def prepare_data(self) -> None:
75        r"""Download the original CIFAR-100 dataset if haven't."""
76        # just download
77        CIFAR100(root=self.root, train=True, download=True)
78        CIFAR100(root=self.root, train=False, download=True)
79
80        pylogger.debug(
81            "The original CIFAR-100 dataset has been downloaded to %s.", self.root
82        )

Download the original CIFAR-100 dataset if haven't.

def train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
 84    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
 85        r"""Get the training and validation dataset of task `self.task_id`.
 86
 87        **Returns:**
 88        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
 89        """
 90        dataset_train_and_val = self.get_class_subset(
 91            CIFAR100(
 92                root=self.root,
 93                train=True,
 94                transform=self.train_and_val_transforms(to_tensor=True),
 95                download=False,
 96            )
 97        )
 98        dataset_train_and_val.target_transform = self.target_transforms()
 99
100        return random_split(
101            dataset_train_and_val,
102            lengths=[1 - self.validation_percentage, self.validation_percentage],
103        )

Get the training and validation dataset of task self.task_id.

Returns:

  • train_and_val_dataset (tuple[Dataset, Dataset]): the train and validation dataset of task self.task_id.
def test_dataset(self) -> torch.utils.data.dataset.Dataset:
105    def test_dataset(self) -> Dataset:
106        r"""Get the test dataset of task `self.task_id`.
107
108        **Returns:**
109        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
110        """
111        dataset_test = self.get_class_subset(
112            CIFAR100(
113                root=self.root,
114                train=False,
115                transform=self.test_transforms(to_tensor=True),
116                download=False,
117            )
118        )
119        dataset_test.target_transform = self.target_transforms()
120
121        return dataset_test

Get the test dataset of task self.task_id.

Returns:

  • test_dataset (Dataset): the test dataset of task self.task_id.