clarena.cl_datasets.permuted_emnist

The submodule in cl_datasets for Permuted EMNIST dataset.

  1r"""
  2The submodule in `cl_datasets` for Permuted EMNIST dataset.
  3"""
  4
  5__all__ = ["PermutedEMNIST"]
  6
  7import logging
  8from typing import Callable
  9
 10import torch
 11from torch.utils.data import Dataset, random_split
 12from torchvision.datasets import EMNIST
 13from torchvision.transforms import transforms
 14
 15from clarena.cl_datasets import CLPermutedDataset
 16from clarena.stl_datasets.raw import (
 17    EMNISTBalanced,
 18    EMNISTByClass,
 19    EMNISTByMerge,
 20    EMNISTDigits,
 21    EMNISTLetters,
 22)
 23
 24# always get logger for built-in logging in each module
 25pylogger = logging.getLogger(__name__)
 26
 27
 28class PermutedEMNIST(CLPermutedDataset):
 29    r"""Permuted EMNIST dataset. The [EMNIST dataset](https://www.nist.gov/itl/products-and-services/emnist-dataset/) is a collection of handwritten letters and digits (including A-Z, a-z, 0-9). It consists of 814,255 images in 62 classes, each 28x28 grayscale image.
 30
 31    EMNIST has 6 different splits: `byclass`, `bymerge`, `balanced`, `letters`, `digits` and `mnist`, each containing a different subset of the original collection. We support all of them in Permuted EMNIST.
 32    """
 33
 34    def __init__(
 35        self,
 36        root: str,
 37        split: str,
 38        num_tasks: int,
 39        validation_percentage: float,
 40        batch_size: int | dict[int, int] = 1,
 41        num_workers: int | dict[int, int] = 0,
 42        custom_transforms: (
 43            Callable
 44            | transforms.Compose
 45            | None
 46            | dict[int, Callable | transforms.Compose | None]
 47        ) = None,
 48        repeat_channels: int | None | dict[int, int | None] = None,
 49        to_tensor: bool | dict[int, bool] = True,
 50        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 51        permutation_mode: str = "first_channel_only",
 52        permutation_seeds: dict[int, int] | None = None,
 53    ) -> None:
 54        r"""
 55        **Args:**
 56        - **root** (`str`): the root directory where the original EMNIST data 'EMNIST/' live.
 57        - **split** (`str`): the original EMNIST dataset has 6 different splits: `byclass`, `bymerge`, `balanced`, `letters`, `digits` and `mnist`. This argument specifies which one to use.
 58        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 59        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 60        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 61        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 62        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 63        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 64        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 65        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 66        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 67        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 68        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 69        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 70        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 71        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 72        - **permutation_mode** (`str`): the mode of permutation; one of:
 73            1. 'all': permute all pixels.
 74            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 75            3. 'first_channel_only': permute only the first channel.
 76        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 77        """
 78
 79        if split == "byclass":
 80            self.original_dataset_python_class: type[Dataset] = EMNISTByClass
 81        elif split == "bymerge":
 82            self.original_dataset_python_class: type[Dataset] = EMNISTByMerge
 83        elif split == "balanced":
 84            self.original_dataset_python_class: type[Dataset] = EMNISTBalanced
 85        elif split == "letters":
 86            self.original_dataset_python_class: type[Dataset] = EMNISTLetters
 87        elif split == "digits":
 88            self.original_dataset_python_class: type[Dataset] = EMNISTDigits
 89            r"""The original dataset class."""
 90
 91        super().__init__(
 92            root=root,
 93            num_tasks=num_tasks,
 94            batch_size=batch_size,
 95            num_workers=num_workers,
 96            custom_transforms=custom_transforms,
 97            repeat_channels=repeat_channels,
 98            to_tensor=to_tensor,
 99            resize=resize,
100            permutation_mode=permutation_mode,
101            permutation_seeds=permutation_seeds,
102        )
103
104        self.split: str = split
105        r"""The split of the original EMNIST dataset. It can be `byclass`, `bymerge`, `balanced`, `letters`, `digits` or `mnist`."""
106
107        self.validation_percentage: float = validation_percentage
108        r"""The percentage to randomly split some training data into validation data."""
109
110    def prepare_data(self) -> None:
111        r"""Download the original EMNIST dataset if haven't."""
112
113        if self.task_id != 1:
114            return  # download all original datasets only at the beginning of first task
115
116        EMNIST(root=self.root_t, split=self.split, train=True, download=True)
117        EMNIST(root=self.root_t, split=self.split, train=False, download=True)
118
119        pylogger.debug(
120            "The original EMNIST dataset has been downloaded to %s.", self.root_t
121        )
122
123    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
124        """Get the training and validation dataset of task `self.task_id`.
125
126        **Returns:**
127        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
128        """
129        dataset_train_and_val = EMNIST(
130            root=self.root_t,
131            split=self.split,
132            train=True,
133            transform=self.train_and_val_transforms(),
134            target_transform=self.target_transform(),
135            download=False,
136        )
137
138        return random_split(
139            dataset_train_and_val,
140            lengths=[1 - self.validation_percentage, self.validation_percentage],
141            generator=torch.Generator().manual_seed(
142                42
143            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
144        )
145
146    def test_dataset(self) -> Dataset:
147        r"""Get the test dataset of task `self.task_id`.
148
149        **Returns:**
150        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
151        """
152        dataset_test = EMNIST(
153            root=self.root_t,
154            split=self.split,
155            train=False,
156            transform=self.test_transforms(),
157            target_transform=self.target_transform(),
158            download=False,
159        )
160
161        return dataset_test
class PermutedEMNIST(clarena.cl_datasets.base.CLPermutedDataset):
 29class PermutedEMNIST(CLPermutedDataset):
 30    r"""Permuted EMNIST dataset. The [EMNIST dataset](https://www.nist.gov/itl/products-and-services/emnist-dataset/) is a collection of handwritten letters and digits (including A-Z, a-z, 0-9). It consists of 814,255 images in 62 classes, each 28x28 grayscale image.
 31
 32    EMNIST has 6 different splits: `byclass`, `bymerge`, `balanced`, `letters`, `digits` and `mnist`, each containing a different subset of the original collection. We support all of them in Permuted EMNIST.
 33    """
 34
 35    def __init__(
 36        self,
 37        root: str,
 38        split: str,
 39        num_tasks: int,
 40        validation_percentage: float,
 41        batch_size: int | dict[int, int] = 1,
 42        num_workers: int | dict[int, int] = 0,
 43        custom_transforms: (
 44            Callable
 45            | transforms.Compose
 46            | None
 47            | dict[int, Callable | transforms.Compose | None]
 48        ) = None,
 49        repeat_channels: int | None | dict[int, int | None] = None,
 50        to_tensor: bool | dict[int, bool] = True,
 51        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 52        permutation_mode: str = "first_channel_only",
 53        permutation_seeds: dict[int, int] | None = None,
 54    ) -> None:
 55        r"""
 56        **Args:**
 57        - **root** (`str`): the root directory where the original EMNIST data 'EMNIST/' live.
 58        - **split** (`str`): the original EMNIST dataset has 6 different splits: `byclass`, `bymerge`, `balanced`, `letters`, `digits` and `mnist`. This argument specifies which one to use.
 59        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 60        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 61        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 62        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 63        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 64        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 65        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 66        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 67        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 68        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 69        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 70        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 71        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 72        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 73        - **permutation_mode** (`str`): the mode of permutation; one of:
 74            1. 'all': permute all pixels.
 75            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 76            3. 'first_channel_only': permute only the first channel.
 77        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 78        """
 79
 80        if split == "byclass":
 81            self.original_dataset_python_class: type[Dataset] = EMNISTByClass
 82        elif split == "bymerge":
 83            self.original_dataset_python_class: type[Dataset] = EMNISTByMerge
 84        elif split == "balanced":
 85            self.original_dataset_python_class: type[Dataset] = EMNISTBalanced
 86        elif split == "letters":
 87            self.original_dataset_python_class: type[Dataset] = EMNISTLetters
 88        elif split == "digits":
 89            self.original_dataset_python_class: type[Dataset] = EMNISTDigits
 90            r"""The original dataset class."""
 91
 92        super().__init__(
 93            root=root,
 94            num_tasks=num_tasks,
 95            batch_size=batch_size,
 96            num_workers=num_workers,
 97            custom_transforms=custom_transforms,
 98            repeat_channels=repeat_channels,
 99            to_tensor=to_tensor,
100            resize=resize,
101            permutation_mode=permutation_mode,
102            permutation_seeds=permutation_seeds,
103        )
104
105        self.split: str = split
106        r"""The split of the original EMNIST dataset. It can be `byclass`, `bymerge`, `balanced`, `letters`, `digits` or `mnist`."""
107
108        self.validation_percentage: float = validation_percentage
109        r"""The percentage to randomly split some training data into validation data."""
110
111    def prepare_data(self) -> None:
112        r"""Download the original EMNIST dataset if haven't."""
113
114        if self.task_id != 1:
115            return  # download all original datasets only at the beginning of first task
116
117        EMNIST(root=self.root_t, split=self.split, train=True, download=True)
118        EMNIST(root=self.root_t, split=self.split, train=False, download=True)
119
120        pylogger.debug(
121            "The original EMNIST dataset has been downloaded to %s.", self.root_t
122        )
123
124    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
125        """Get the training and validation dataset of task `self.task_id`.
126
127        **Returns:**
128        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
129        """
130        dataset_train_and_val = EMNIST(
131            root=self.root_t,
132            split=self.split,
133            train=True,
134            transform=self.train_and_val_transforms(),
135            target_transform=self.target_transform(),
136            download=False,
137        )
138
139        return random_split(
140            dataset_train_and_val,
141            lengths=[1 - self.validation_percentage, self.validation_percentage],
142            generator=torch.Generator().manual_seed(
143                42
144            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
145        )
146
147    def test_dataset(self) -> Dataset:
148        r"""Get the test dataset of task `self.task_id`.
149
150        **Returns:**
151        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
152        """
153        dataset_test = EMNIST(
154            root=self.root_t,
155            split=self.split,
156            train=False,
157            transform=self.test_transforms(),
158            target_transform=self.target_transform(),
159            download=False,
160        )
161
162        return dataset_test

Permuted EMNIST dataset. The EMNIST dataset is a collection of handwritten letters and digits (including A-Z, a-z, 0-9). It consists of 814,255 images in 62 classes, each 28x28 grayscale image.

EMNIST has 6 different splits: byclass, bymerge, balanced, letters, digits and mnist, each containing a different subset of the original collection. We support all of them in Permuted EMNIST.

PermutedEMNIST( root: str, split: str, num_tasks: int, validation_percentage: float, batch_size: int | dict[int, int] = 1, num_workers: int | dict[int, int] = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType, dict[int, Union[Callable, torchvision.transforms.transforms.Compose, NoneType]]] = None, repeat_channels: int | None | dict[int, int | None] = None, to_tensor: bool | dict[int, bool] = True, resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, permutation_mode: str = 'first_channel_only', permutation_seeds: dict[int, int] | None = None)
 35    def __init__(
 36        self,
 37        root: str,
 38        split: str,
 39        num_tasks: int,
 40        validation_percentage: float,
 41        batch_size: int | dict[int, int] = 1,
 42        num_workers: int | dict[int, int] = 0,
 43        custom_transforms: (
 44            Callable
 45            | transforms.Compose
 46            | None
 47            | dict[int, Callable | transforms.Compose | None]
 48        ) = None,
 49        repeat_channels: int | None | dict[int, int | None] = None,
 50        to_tensor: bool | dict[int, bool] = True,
 51        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 52        permutation_mode: str = "first_channel_only",
 53        permutation_seeds: dict[int, int] | None = None,
 54    ) -> None:
 55        r"""
 56        **Args:**
 57        - **root** (`str`): the root directory where the original EMNIST data 'EMNIST/' live.
 58        - **split** (`str`): the original EMNIST dataset has 6 different splits: `byclass`, `bymerge`, `balanced`, `letters`, `digits` and `mnist`. This argument specifies which one to use.
 59        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 60        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 61        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 62        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 63        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 64        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 65        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 66        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 67        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 68        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 69        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 70        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 71        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 72        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 73        - **permutation_mode** (`str`): the mode of permutation; one of:
 74            1. 'all': permute all pixels.
 75            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 76            3. 'first_channel_only': permute only the first channel.
 77        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 78        """
 79
 80        if split == "byclass":
 81            self.original_dataset_python_class: type[Dataset] = EMNISTByClass
 82        elif split == "bymerge":
 83            self.original_dataset_python_class: type[Dataset] = EMNISTByMerge
 84        elif split == "balanced":
 85            self.original_dataset_python_class: type[Dataset] = EMNISTBalanced
 86        elif split == "letters":
 87            self.original_dataset_python_class: type[Dataset] = EMNISTLetters
 88        elif split == "digits":
 89            self.original_dataset_python_class: type[Dataset] = EMNISTDigits
 90            r"""The original dataset class."""
 91
 92        super().__init__(
 93            root=root,
 94            num_tasks=num_tasks,
 95            batch_size=batch_size,
 96            num_workers=num_workers,
 97            custom_transforms=custom_transforms,
 98            repeat_channels=repeat_channels,
 99            to_tensor=to_tensor,
100            resize=resize,
101            permutation_mode=permutation_mode,
102            permutation_seeds=permutation_seeds,
103        )
104
105        self.split: str = split
106        r"""The split of the original EMNIST dataset. It can be `byclass`, `bymerge`, `balanced`, `letters`, `digits` or `mnist`."""
107
108        self.validation_percentage: float = validation_percentage
109        r"""The percentage to randomly split some training data into validation data."""

Args:

  • root (str): the root directory where the original EMNIST data 'EMNIST/' live.
  • split (str): the original EMNIST dataset has 6 different splits: byclass, bymerge, balanced, letters, digits and mnist. This argument specifies which one to use.
  • num_tasks (int): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to num_tasks.
  • validation_percentage (float): the percentage to randomly split some training data into validation data.
  • batch_size (int | dict[int, int]): the batch size for train, val, and test dataloaders. If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an int, it is the same batch size for all tasks.
  • num_workers (int | dict[int, int]): the number of workers for dataloaders. If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an int, it is the same number of workers for all tasks.
  • custom_transforms (transform or transforms.Compose or None or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. ToTensor(), normalization, permute, and so on are not included. If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is None, no custom transforms are applied.
  • repeat_channels (int | None | dict of them): the number of channels to repeat for each task. Default is None, which means no repeat. If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an int, it is the same number of channels to repeat for all tasks. If it is None, no repeat is applied.
  • to_tensor (bool | dict[int, bool]): whether to include the ToTensor() transform. Default is True. If it is a dict, the keys are task IDs and the values are whether to include the ToTensor() transform for each task. If it is a single boolean value, it is applied to all tasks.
  • resize (tuple[int, int] | None or dict of them): the size to resize the images to. Default is None, which means no resize. If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is None, no resize is applied.
  • permutation_mode (str): the mode of permutation; one of:
    1. 'all': permute all pixels.
    2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
    3. 'first_channel_only': permute only the first channel.
  • permutation_seeds (dict[int, int] | None): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is None, which creates a dict of seeds from 0 to num_tasks-1.
split: str

The split of the original EMNIST dataset. It can be byclass, bymerge, balanced, letters, digits or mnist.

validation_percentage: float

The percentage to randomly split some training data into validation data.

def prepare_data(self) -> None:
111    def prepare_data(self) -> None:
112        r"""Download the original EMNIST dataset if haven't."""
113
114        if self.task_id != 1:
115            return  # download all original datasets only at the beginning of first task
116
117        EMNIST(root=self.root_t, split=self.split, train=True, download=True)
118        EMNIST(root=self.root_t, split=self.split, train=False, download=True)
119
120        pylogger.debug(
121            "The original EMNIST dataset has been downloaded to %s.", self.root_t
122        )

Download the original EMNIST dataset if haven't.

def train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
124    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
125        """Get the training and validation dataset of task `self.task_id`.
126
127        **Returns:**
128        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
129        """
130        dataset_train_and_val = EMNIST(
131            root=self.root_t,
132            split=self.split,
133            train=True,
134            transform=self.train_and_val_transforms(),
135            target_transform=self.target_transform(),
136            download=False,
137        )
138
139        return random_split(
140            dataset_train_and_val,
141            lengths=[1 - self.validation_percentage, self.validation_percentage],
142            generator=torch.Generator().manual_seed(
143                42
144            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
145        )

Get the training and validation dataset of task self.task_id.

Returns:

  • train_and_val_dataset (tuple[Dataset, Dataset]): the train and validation dataset of task self.task_id.
def test_dataset(self) -> torch.utils.data.dataset.Dataset:
147    def test_dataset(self) -> Dataset:
148        r"""Get the test dataset of task `self.task_id`.
149
150        **Returns:**
151        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
152        """
153        dataset_test = EMNIST(
154            root=self.root_t,
155            split=self.split,
156            train=False,
157            transform=self.test_transforms(),
158            target_transform=self.target_transform(),
159            download=False,
160        )
161
162        return dataset_test

Get the test dataset of task self.task_id.

Returns:

  • test_dataset (Dataset): the test dataset of task self.task_id.