clarena.cl_datasets.permuted_facescrub

The submodule in cl_datasets for Permuted FaceScrub dataset.

  1r"""
  2The submodule in `cl_datasets` for Permuted FaceScrub dataset.
  3"""
  4
  5__all__ = ["PermutedFaceScrub"]
  6
  7import logging
  8from typing import Callable
  9
 10import torch
 11from torch.utils.data import Dataset, random_split
 12from torchvision.transforms import transforms
 13
 14from clarena.cl_datasets import CLPermutedDataset
 15from clarena.stl_datasets.raw import FaceScrub10, FaceScrub20, FaceScrub50, FaceScrub100
 16
 17# always get logger for built-in logging in each module
 18pylogger = logging.getLogger(__name__)
 19
 20
 21class PermutedFaceScrub(CLPermutedDataset):
 22    r"""Permuted FaceScrub dataset. The [original FaceScrub dataset](https://vintage.winklerbros.net/facescrub.html) is a collection of human face images. It consists 106,863 images of 530 people (classes), each high resolution color image.
 23
 24    To make it simple, [this version](https://github.com/nkundiushuti/facescrub_subset) uses subset of the official [Megaface FaceScrub challenge](http://megaface.cs.washington.edu/participate/challenge.html), cropped and resized to 32x32. We have [FaceScrub-10](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_10.zip), [FaceScrub-20](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_20.zip), [FaceScrub-50](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_50.zip), [FaceScrub-100](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_100.zip) datasets where the number of classes are 10, 20, 50 and 100 respectively.
 25    """
 26
 27    def __init__(
 28        self,
 29        root: str,
 30        size: str,
 31        num_tasks: int,
 32        validation_percentage: float,
 33        batch_size: int | dict[int, int] = 1,
 34        num_workers: int | dict[int, int] = 0,
 35        custom_transforms: (
 36            Callable
 37            | transforms.Compose
 38            | None
 39            | dict[int, Callable | transforms.Compose | None]
 40        ) = None,
 41        repeat_channels: int | None | dict[int, int | None] = None,
 42        to_tensor: bool | dict[int, bool] = True,
 43        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 44        permutation_mode: str = "first_channel_only",
 45        permutation_seeds: dict[int, int] | None = None,
 46    ) -> None:
 47        r"""
 48        **Args:**
 49        - **root** (`str`): the root directory where the original FaceScrub data 'FaceScrub/' live.
 50        - **size** (`str`): the size of the dataset; one of:
 51            1. '10': 10 classes (10 people).
 52            2. '20': 20 classes (20 people).
 53            3. '50': 50 classes (50 people).
 54            4. '100': 100 classes (100 people).
 55        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 56        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 57        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 58        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 59        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 60        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 61        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 62        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 63        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 64        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 65        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 66        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 67        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 68        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 69        - **permutation_mode** (`str`): the mode of permutation; one of:
 70            1. 'all': permute all pixels.
 71            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 72            3. 'first_channel_only': permute only the first channel.
 73        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 74        """
 75
 76        if size == "10":
 77            self.original_dataset_python_class: type[Dataset] = FaceScrub10
 78        elif size == "20":
 79            self.original_dataset_python_class: type[Dataset] = FaceScrub20
 80        elif size == "50":
 81            self.original_dataset_python_class: type[Dataset] = FaceScrub50
 82        elif size == "100":
 83            self.original_dataset_python_class: type[Dataset] = FaceScrub100
 84            r"""The original dataset class."""
 85
 86        super().__init__(
 87            root=root,
 88            num_tasks=num_tasks,
 89            batch_size=batch_size,
 90            num_workers=num_workers,
 91            custom_transforms=custom_transforms,
 92            repeat_channels=repeat_channels,
 93            to_tensor=to_tensor,
 94            resize=resize,
 95            permutation_mode=permutation_mode,
 96            permutation_seeds=permutation_seeds,
 97        )
 98
 99        self.validation_percentage: float = validation_percentage
100        r"""The percentage to randomly split some training data into validation data."""
101
102    def prepare_data(self) -> None:
103        r"""Download the original FaceScrub dataset if haven't."""
104
105        if self.task_id != 1:
106            return  # download all original datasets only at the beginning of first task
107
108        self.original_dataset_python_class(root=self.root_t, train=True, download=True)
109        self.original_dataset_python_class(root=self.root_t, train=False, download=True)
110
111        pylogger.debug(
112            "The original FaceScrub dataset has been downloaded to %s.", self.root_t
113        )
114
115    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
116        """Get the training and validation dataset of task `self.task_id`.
117
118        **Returns:**
119        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
120        """
121        dataset_train_and_val = self.original_dataset_python_class(
122            root=self.root_t,
123            train=True,
124            transform=self.train_and_val_transforms(),
125            target_transform=self.target_transform(),
126            download=False,
127        )
128
129        return random_split(
130            dataset_train_and_val,
131            lengths=[1 - self.validation_percentage, self.validation_percentage],
132            generator=torch.Generator().manual_seed(
133                42
134            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
135        )
136
137    def test_dataset(self) -> Dataset:
138        r"""Get the test dataset of task `self.task_id`.
139
140        **Returns:**
141        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
142        """
143        dataset_test = self.original_dataset_python_class(
144            root=self.root_t,
145            train=False,
146            transform=self.test_transforms(),
147            target_transform=self.target_transform(),
148            download=False,
149        )
150
151        return dataset_test
class PermutedFaceScrub(clarena.cl_datasets.base.CLPermutedDataset):
 22class PermutedFaceScrub(CLPermutedDataset):
 23    r"""Permuted FaceScrub dataset. The [original FaceScrub dataset](https://vintage.winklerbros.net/facescrub.html) is a collection of human face images. It consists 106,863 images of 530 people (classes), each high resolution color image.
 24
 25    To make it simple, [this version](https://github.com/nkundiushuti/facescrub_subset) uses subset of the official [Megaface FaceScrub challenge](http://megaface.cs.washington.edu/participate/challenge.html), cropped and resized to 32x32. We have [FaceScrub-10](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_10.zip), [FaceScrub-20](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_20.zip), [FaceScrub-50](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_50.zip), [FaceScrub-100](https://github.com/nkundiushuti/facescrub_subset/blob/master/data/facescrub_100.zip) datasets where the number of classes are 10, 20, 50 and 100 respectively.
 26    """
 27
 28    def __init__(
 29        self,
 30        root: str,
 31        size: str,
 32        num_tasks: int,
 33        validation_percentage: float,
 34        batch_size: int | dict[int, int] = 1,
 35        num_workers: int | dict[int, int] = 0,
 36        custom_transforms: (
 37            Callable
 38            | transforms.Compose
 39            | None
 40            | dict[int, Callable | transforms.Compose | None]
 41        ) = None,
 42        repeat_channels: int | None | dict[int, int | None] = None,
 43        to_tensor: bool | dict[int, bool] = True,
 44        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 45        permutation_mode: str = "first_channel_only",
 46        permutation_seeds: dict[int, int] | None = None,
 47    ) -> None:
 48        r"""
 49        **Args:**
 50        - **root** (`str`): the root directory where the original FaceScrub data 'FaceScrub/' live.
 51        - **size** (`str`): the size of the dataset; one of:
 52            1. '10': 10 classes (10 people).
 53            2. '20': 20 classes (20 people).
 54            3. '50': 50 classes (50 people).
 55            4. '100': 100 classes (100 people).
 56        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 57        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 58        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 59        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 60        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 61        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 62        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 63        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 64        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 65        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 66        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 67        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 68        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 69        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 70        - **permutation_mode** (`str`): the mode of permutation; one of:
 71            1. 'all': permute all pixels.
 72            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 73            3. 'first_channel_only': permute only the first channel.
 74        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 75        """
 76
 77        if size == "10":
 78            self.original_dataset_python_class: type[Dataset] = FaceScrub10
 79        elif size == "20":
 80            self.original_dataset_python_class: type[Dataset] = FaceScrub20
 81        elif size == "50":
 82            self.original_dataset_python_class: type[Dataset] = FaceScrub50
 83        elif size == "100":
 84            self.original_dataset_python_class: type[Dataset] = FaceScrub100
 85            r"""The original dataset class."""
 86
 87        super().__init__(
 88            root=root,
 89            num_tasks=num_tasks,
 90            batch_size=batch_size,
 91            num_workers=num_workers,
 92            custom_transforms=custom_transforms,
 93            repeat_channels=repeat_channels,
 94            to_tensor=to_tensor,
 95            resize=resize,
 96            permutation_mode=permutation_mode,
 97            permutation_seeds=permutation_seeds,
 98        )
 99
100        self.validation_percentage: float = validation_percentage
101        r"""The percentage to randomly split some training data into validation data."""
102
103    def prepare_data(self) -> None:
104        r"""Download the original FaceScrub dataset if haven't."""
105
106        if self.task_id != 1:
107            return  # download all original datasets only at the beginning of first task
108
109        self.original_dataset_python_class(root=self.root_t, train=True, download=True)
110        self.original_dataset_python_class(root=self.root_t, train=False, download=True)
111
112        pylogger.debug(
113            "The original FaceScrub dataset has been downloaded to %s.", self.root_t
114        )
115
116    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
117        """Get the training and validation dataset of task `self.task_id`.
118
119        **Returns:**
120        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
121        """
122        dataset_train_and_val = self.original_dataset_python_class(
123            root=self.root_t,
124            train=True,
125            transform=self.train_and_val_transforms(),
126            target_transform=self.target_transform(),
127            download=False,
128        )
129
130        return random_split(
131            dataset_train_and_val,
132            lengths=[1 - self.validation_percentage, self.validation_percentage],
133            generator=torch.Generator().manual_seed(
134                42
135            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
136        )
137
138    def test_dataset(self) -> Dataset:
139        r"""Get the test dataset of task `self.task_id`.
140
141        **Returns:**
142        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
143        """
144        dataset_test = self.original_dataset_python_class(
145            root=self.root_t,
146            train=False,
147            transform=self.test_transforms(),
148            target_transform=self.target_transform(),
149            download=False,
150        )
151
152        return dataset_test

Permuted FaceScrub dataset. The original FaceScrub dataset is a collection of human face images. It consists 106,863 images of 530 people (classes), each high resolution color image.

To make it simple, this version uses subset of the official Megaface FaceScrub challenge, cropped and resized to 32x32. We have FaceScrub-10, FaceScrub-20, FaceScrub-50, FaceScrub-100 datasets where the number of classes are 10, 20, 50 and 100 respectively.

PermutedFaceScrub( root: str, size: str, num_tasks: int, validation_percentage: float, batch_size: int | dict[int, int] = 1, num_workers: int | dict[int, int] = 0, custom_transforms: Union[Callable, torchvision.transforms.transforms.Compose, NoneType, dict[int, Union[Callable, torchvision.transforms.transforms.Compose, NoneType]]] = None, repeat_channels: int | None | dict[int, int | None] = None, to_tensor: bool | dict[int, bool] = True, resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None, permutation_mode: str = 'first_channel_only', permutation_seeds: dict[int, int] | None = None)
 28    def __init__(
 29        self,
 30        root: str,
 31        size: str,
 32        num_tasks: int,
 33        validation_percentage: float,
 34        batch_size: int | dict[int, int] = 1,
 35        num_workers: int | dict[int, int] = 0,
 36        custom_transforms: (
 37            Callable
 38            | transforms.Compose
 39            | None
 40            | dict[int, Callable | transforms.Compose | None]
 41        ) = None,
 42        repeat_channels: int | None | dict[int, int | None] = None,
 43        to_tensor: bool | dict[int, bool] = True,
 44        resize: tuple[int, int] | None | dict[int, tuple[int, int] | None] = None,
 45        permutation_mode: str = "first_channel_only",
 46        permutation_seeds: dict[int, int] | None = None,
 47    ) -> None:
 48        r"""
 49        **Args:**
 50        - **root** (`str`): the root directory where the original FaceScrub data 'FaceScrub/' live.
 51        - **size** (`str`): the size of the dataset; one of:
 52            1. '10': 10 classes (10 people).
 53            2. '20': 20 classes (20 people).
 54            3. '50': 50 classes (50 people).
 55            4. '100': 100 classes (100 people).
 56        - **num_tasks** (`int`): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to `num_tasks`.
 57        - **validation_percentage** (`float`): the percentage to randomly split some training data into validation data.
 58        - **batch_size** (`int` | `dict[int, int]`): the batch size for train, val, and test dataloaders.
 59        If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an `int`, it is the same batch size for all tasks.
 60        - **num_workers** (`int` | `dict[int, int]`): the number of workers for dataloaders.
 61        If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an `int`, it is the same number of workers for all tasks.
 62        - **custom_transforms** (`transform` or `transforms.Compose` or `None` or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. `ToTensor()`, normalization, permute, and so on are not included.
 63        If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is `None`, no custom transforms are applied.
 64        - **repeat_channels** (`int` | `None` | dict of them): the number of channels to repeat for each task. Default is `None`, which means no repeat.
 65        If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an `int`, it is the same number of channels to repeat for all tasks. If it is `None`, no repeat is applied.
 66        - **to_tensor** (`bool` | `dict[int, bool]`): whether to include the `ToTensor()` transform. Default is `True`.
 67        If it is a dict, the keys are task IDs and the values are whether to include the `ToTensor()` transform for each task. If it is a single boolean value, it is applied to all tasks.
 68        - **resize** (`tuple[int, int]` | `None` or dict of them): the size to resize the images to. Default is `None`, which means no resize.
 69        If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is `None`, no resize is applied.
 70        - **permutation_mode** (`str`): the mode of permutation; one of:
 71            1. 'all': permute all pixels.
 72            2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
 73            3. 'first_channel_only': permute only the first channel.
 74        - **permutation_seeds** (`dict[int, int]` | `None`): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is `None`, which creates a dict of seeds from 0 to `num_tasks`-1.
 75        """
 76
 77        if size == "10":
 78            self.original_dataset_python_class: type[Dataset] = FaceScrub10
 79        elif size == "20":
 80            self.original_dataset_python_class: type[Dataset] = FaceScrub20
 81        elif size == "50":
 82            self.original_dataset_python_class: type[Dataset] = FaceScrub50
 83        elif size == "100":
 84            self.original_dataset_python_class: type[Dataset] = FaceScrub100
 85            r"""The original dataset class."""
 86
 87        super().__init__(
 88            root=root,
 89            num_tasks=num_tasks,
 90            batch_size=batch_size,
 91            num_workers=num_workers,
 92            custom_transforms=custom_transforms,
 93            repeat_channels=repeat_channels,
 94            to_tensor=to_tensor,
 95            resize=resize,
 96            permutation_mode=permutation_mode,
 97            permutation_seeds=permutation_seeds,
 98        )
 99
100        self.validation_percentage: float = validation_percentage
101        r"""The percentage to randomly split some training data into validation data."""

Args:

  • root (str): the root directory where the original FaceScrub data 'FaceScrub/' live.
  • size (str): the size of the dataset; one of:
    1. '10': 10 classes (10 people).
    2. '20': 20 classes (20 people).
    3. '50': 50 classes (50 people).
    4. '100': 100 classes (100 people).
  • num_tasks (int): the maximum number of tasks supported by the CL dataset. This decides the valid task IDs from 1 to num_tasks.
  • validation_percentage (float): the percentage to randomly split some training data into validation data.
  • batch_size (int | dict[int, int]): the batch size for train, val, and test dataloaders. If it is a dict, the keys are task IDs and the values are the batch sizes for each task. If it is an int, it is the same batch size for all tasks.
  • num_workers (int | dict[int, int]): the number of workers for dataloaders. If it is a dict, the keys are task IDs and the values are the number of workers for each task. If it is an int, it is the same number of workers for all tasks.
  • custom_transforms (transform or transforms.Compose or None or dict of them): the custom transforms to apply ONLY to the TRAIN dataset. Can be a single transform, composed transforms, or no transform. ToTensor(), normalization, permute, and so on are not included. If it is a dict, the keys are task IDs and the values are the custom transforms for each task. If it is a single transform or composed transforms, it is applied to all tasks. If it is None, no custom transforms are applied.
  • repeat_channels (int | None | dict of them): the number of channels to repeat for each task. Default is None, which means no repeat. If it is a dict, the keys are task IDs and the values are the number of channels to repeat for each task. If it is an int, it is the same number of channels to repeat for all tasks. If it is None, no repeat is applied.
  • to_tensor (bool | dict[int, bool]): whether to include the ToTensor() transform. Default is True. If it is a dict, the keys are task IDs and the values are whether to include the ToTensor() transform for each task. If it is a single boolean value, it is applied to all tasks.
  • resize (tuple[int, int] | None or dict of them): the size to resize the images to. Default is None, which means no resize. If it is a dict, the keys are task IDs and the values are the sizes to resize for each task. If it is a single tuple of two integers, it is applied to all tasks. If it is None, no resize is applied.
  • permutation_mode (str): the mode of permutation; one of:
    1. 'all': permute all pixels.
    2. 'by_channel': permute channel by channel separately. All channels are applied the same permutation order.
    3. 'first_channel_only': permute only the first channel.
  • permutation_seeds (dict[int, int] | None): the dict of seeds for permutation operations used to construct each task. Keys are task IDs and the values are permutation seeds for each task. Default is None, which creates a dict of seeds from 0 to num_tasks-1.
validation_percentage: float

The percentage to randomly split some training data into validation data.

def prepare_data(self) -> None:
103    def prepare_data(self) -> None:
104        r"""Download the original FaceScrub dataset if haven't."""
105
106        if self.task_id != 1:
107            return  # download all original datasets only at the beginning of first task
108
109        self.original_dataset_python_class(root=self.root_t, train=True, download=True)
110        self.original_dataset_python_class(root=self.root_t, train=False, download=True)
111
112        pylogger.debug(
113            "The original FaceScrub dataset has been downloaded to %s.", self.root_t
114        )

Download the original FaceScrub dataset if haven't.

def train_and_val_dataset( self) -> tuple[torch.utils.data.dataset.Dataset, torch.utils.data.dataset.Dataset]:
116    def train_and_val_dataset(self) -> tuple[Dataset, Dataset]:
117        """Get the training and validation dataset of task `self.task_id`.
118
119        **Returns:**
120        - **train_and_val_dataset** (`tuple[Dataset, Dataset]`): the train and validation dataset of task `self.task_id`.
121        """
122        dataset_train_and_val = self.original_dataset_python_class(
123            root=self.root_t,
124            train=True,
125            transform=self.train_and_val_transforms(),
126            target_transform=self.target_transform(),
127            download=False,
128        )
129
130        return random_split(
131            dataset_train_and_val,
132            lengths=[1 - self.validation_percentage, self.validation_percentage],
133            generator=torch.Generator().manual_seed(
134                42
135            ),  # this must be set fixed to make sure the datasets across experiments are the same. Don't handle it to global seed as it might vary across experiments
136        )

Get the training and validation dataset of task self.task_id.

Returns:

  • train_and_val_dataset (tuple[Dataset, Dataset]): the train and validation dataset of task self.task_id.
def test_dataset(self) -> torch.utils.data.dataset.Dataset:
138    def test_dataset(self) -> Dataset:
139        r"""Get the test dataset of task `self.task_id`.
140
141        **Returns:**
142        - **test_dataset** (`Dataset`): the test dataset of task `self.task_id`.
143        """
144        dataset_test = self.original_dataset_python_class(
145            root=self.root_t,
146            train=False,
147            transform=self.test_transforms(),
148            target_transform=self.target_transform(),
149            download=False,
150        )
151
152        return dataset_test

Get the test dataset of task self.task_id.

Returns:

  • test_dataset (Dataset): the test dataset of task self.task_id.