clarena.cl_algorithms

Continual Learning Algorithms

This submodule provides the continual learning algorithms in CLArena.

Here are the base classes for CL algorithms, which inherit from PyTorch Lightning LightningModule:

  • CLAlgorithm: the base class for all continual learning algorithms.
    • UnlearnableCLAlgorithm: the base class for unlearnable continual learning algorithms.
      • AmnesiacCULAlgorithm: the base class for Amnesiac continual learning algorithms.

Please note that this is an API documentation. Please refer to the main documentation pages for more information about and how to configure and implement CL algorithms:

 1r"""
 2
 3# Continual Learning Algorithms
 4
 5This submodule provides the **continual learning algorithms** in CLArena.
 6
 7Here are the base classes for CL algorithms, which inherit from PyTorch Lightning `LightningModule`:
 8
 9- `CLAlgorithm`: the base class for all continual learning algorithms.
10    - `UnlearnableCLAlgorithm`: the base class for unlearnable continual learning algorithms.
11        - `AmnesiacCULAlgorithm`: the base class for Amnesiac continual learning algorithms.
12
13Please note that this is an API documentation. Please refer to the main documentation pages for more information about and how to configure and implement CL algorithms:
14
15- [**Configure CL Algorithm**](https://pengxiang-wang.com/projects/continual-learning-arena/docs/components/cl-algorithm)
16- [**Implement Custom CL Algorithm**](https://pengxiang-wang.com/projects/continual-learning-arena/docs/custom-implementation/cl-algorithm)
17- [**A Beginners' Guide to Continual Learning (Methodology Overview)**](https://pengxiang-wang.com/posts/continual-learning-beginners-guide#sec-methodology)
18
19
20"""
21
22from .base import CLAlgorithm, UnlearnableCLAlgorithm, AmnesiacCLAlgorithm
23
24# finetuning first
25from .finetuning import Finetuning, AmnesiacFinetuning
26from .independent import Independent, UnlearnableIndependent
27from .fix import Fix
28from .random import Random
29
30from .lwf import LwF, AmnesiacLwF
31from .ewc import EWC, AmnesiacEWC
32from .der import DER, DERpp, AmnesiacDER, AmnesiacDERpp
33from .clpu_derpp import CLPUDERpp
34from .cbp import CBP
35
36from .hat import HAT
37from .adahat import AdaHAT
38from .fgadahat import FGAdaHAT
39from .amnesiac_hat import AmnesiacHAT
40from .wsn import WSN
41
42# from .nispa import NISPA
43
44
45__all__ = [
46    "CLAlgorithm",
47    "UnlearnableCLAlgorithm",
48    "AmnesiacCLAlgorithm",
49    "regularizers",
50    "finetuning",
51    "independent",
52    "fix",
53    "random",
54    "lwf",
55    "ewc",
56    "der",
57    "cbp",
58    "hat",
59    "adahat",
60    "fgadahat",
61    "amnesiac_hat",
62    "wsn",
63    # "nispa",
64]
class CLAlgorithm(lightning.pytorch.core.module.LightningModule):
 25class CLAlgorithm(LightningModule):
 26    r"""The base class of continual learning algorithms."""
 27
 28    def __init__(
 29        self,
 30        backbone: CLBackbone,
 31        heads: HeadsTIL | HeadsCIL | HeadDIL,
 32        non_algorithmic_hparams: dict[str, Any] = {},
 33        **kwargs,
 34    ) -> None:
 35        r"""
 36        **Args:**
 37        - **backbone** (`CLBackbone`): backbone network.
 38        - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads.
 39        - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility.
 40        - **kwargs**: Reserved for multiple inheritance.
 41        """
 42        super().__init__()
 43        self.save_hyperparameters(non_algorithmic_hparams)
 44
 45        # components
 46        self.backbone: CLBackbone = backbone
 47        r"""The backbone network."""
 48        self.heads: HeadsTIL | HeadsCIL | HeadDIL = heads
 49        r"""The output heads."""
 50        self.optimizer_t: Optimizer
 51        r"""Optimizer (partially initialized) for the current task `self.task_id`. Will be equipped with parameters in `configure_optimizers()`."""
 52        self.lr_scheduler_t: LRScheduler | None
 53        r"""Learning rate scheduler for the optimizer of the current task `self.task_id`. If `None`, no scheduler is used."""
 54        self.criterion = nn.CrossEntropyLoss()
 55        r"""Loss function between the output logits and the target labels. Default is cross-entropy loss."""
 56
 57        self.if_forward_func_return_logits_only: bool = False
 58        r"""Whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information. Default is `False`."""
 59
 60        # task ID control
 61        self.task_id: int
 62        r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`."""
 63        self.processed_task_ids: list[int] = []
 64        r"""Task IDs that have been processed."""
 65
 66        CLAlgorithm.sanity_check(self)
 67
 68    def sanity_check(self) -> None:
 69        r"""Sanity check."""
 70
 71        # check backbone and heads compatibility
 72        if self.backbone.output_dim != self.heads.input_dim:
 73            raise ValueError(
 74                "The output_dim of the backbone must equal the input_dim of the CL heads."
 75            )
 76
 77    def setup_task_id(
 78        self,
 79        task_id: int,
 80        num_classes: int,
 81        optimizer: Optimizer,
 82        lr_scheduler: LRScheduler | None,
 83    ) -> None:
 84        r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called.
 85
 86        **Args:**
 87        - **task_id** (`int`): the target task ID.
 88        - **num_classes** (`int`): the number of classes in the task.
 89        - **optimizer** (`Optimizer`): the optimizer object (partially initialized) for the task.
 90        - **lr_scheduler** (`LRScheduler` | `None`): the learning rate scheduler for the optimizer. If `None`, no scheduler is used.
 91        """
 92        self.task_id = task_id
 93        self.processed_task_ids.append(task_id)
 94        self.backbone.setup_task_id(task_id=task_id)
 95        if isinstance(self.heads, HeadsTIL) or isinstance(self.heads, HeadsCIL):
 96            self.heads.setup_task_id(task_id, num_classes)
 97        elif isinstance(self.heads, HeadDIL) and not self.heads.if_head_setup():
 98            self.heads.setup_task(num_classes)
 99        self.optimizer_t = optimizer
100        self.lr_scheduler_t = lr_scheduler
101
102    def get_test_task_id_from_dataloader_idx(self, dataloader_idx: int) -> int:
103        r"""Get the test task ID from the dataloader index.
104
105        **Args:**
106        - **dataloader_idx** (`int`): the dataloader index.
107
108        **Returns:**
109        - **test_task_id** (`int`): the test task ID.
110        """
111        dataset_test = self.trainer.datamodule.dataset_test
112        test_task_id = list(dataset_test.keys())[dataloader_idx]
113        return test_task_id
114
115    def set_forward_func_return_logits_only(
116        self, forward_func_return_logits_only: bool
117    ) -> None:
118        r"""Set whether the `forward()` method returns logits only. This is useful for some CL algorithms that require the forward function to return logits only, such as FG-AdaHAT.
119
120        **Args:**
121        - **forward_func_return_logits_only** (`bool`): whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information.
122        """
123        self.if_forward_func_return_logits_only = forward_func_return_logits_only
124
125    def preceding_layer(self, layer_name: str) -> nn.Module | None:
126        r"""Get the preceding layer of the given layer (including backbone and output heads). If the given layer is the first layer, return `None`.
127
128        **Args:**
129        - **layer_name** (`str`): the name of the layer.
130
131        **Returns:**
132        - **preceding_layer** (`nn.Module` | `None`): the preceding layer.
133        """
134
135        if layer_name == "heads":
136            backbone_last_layer_name = self.backbone.weighted_layer_names[-1]
137            backbone_last_layer = self.backbone.get_layer_by_name(
138                backbone_last_layer_name
139            )
140            return backbone_last_layer
141        else:
142            preceding_layer_name = self.backbone.preceding_layer_name(layer_name)
143            preceding_layer = self.backbone.get_layer_by_name(preceding_layer_name)
144
145        return preceding_layer
146
147    def next_layer(self, layer_name: str) -> nn.Module | None:
148        r"""Get the next layer of the given layer (including backbone and output heads). If the given layer is the last layer, return `None`.
149
150        **Args:**
151        - **layer_name** (`str`): the name of the layer.
152
153        **Returns:**
154        - **preceding_layer** (`nn.Module` | `None`): the next layer.
155        """
156
157        if layer_name == "heads":
158            return None
159        else:
160            next_layer_name = self.backbone.next_layer_name(layer_name)
161            if next_layer_name is not None:
162                next_layer = self.backbone.get_layer_by_name(next_layer_name)
163            else:
164                next_layer = self.heads.get_head(self.task_id)
165
166        return next_layer
167
168    def forward(self, input: Tensor, stage: str, task_id: int | None = None) -> Tensor:
169        r"""The forward pass for data from task `task_id`. Note that it is nothing to do with `forward()` method in `nn.Module`. This definition provides a template that many CL algorithm including the vanilla Finetuning algorithm use. It works both for TIL and CIL.
170
171        **Args:**
172        - **input** (`Tensor`): the input tensor from data.
173        - **stage** (`str`): the stage of the forward pass; one of:
174            1. 'train': training stage.
175            2. 'validation': validation stage.
176            3. 'test': testing stage.
177        - **task_id** (`int`): the task ID where the data are from. If stage is 'train' or `validation`, it is usually from the current task `self.task_id`. If stage is 'test', it could be from any seen task. In TIL, the task IDs of test data are provided thus this argument can be used. In CIL, they are not provided, so it is just a placeholder for API consistence but never used, and best practices are not to provide this argument and leave it as the default value.
178
179        **Returns:**
180        - **logits** (`Tensor`): the output logits tensor.
181        - **activations** (`dict[str, Tensor]`): the hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name, value (`Tensor`) is the hidden feature tensor. This is used for the continual learning algorithms that need to use the hidden features for various purposes.
182        """
183        feature, activations = self.backbone(input, stage=stage, task_id=task_id)
184        logits = self.heads(feature, task_id)
185        return (
186            logits if self.if_forward_func_return_logits_only else (logits, activations)
187        )
188
189    def configure_optimizers(self) -> Optimizer:
190        r"""Configure optimizer hooks by Lightning. See [Lightning docs](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers) for more details."""
191        # finish partially initialized optimizer by specifying model parameters. The `parameters()` method of this `CLAlgorithm` (inherited from `LightningModule`) returns both backbone and heads parameters
192        fully_initialized_optimizer = self.optimizer_t(params=self.parameters())
193
194        if self.lr_scheduler_t:
195            fully_initialized_lr_scheduler = self.lr_scheduler_t(
196                optimizer=fully_initialized_optimizer
197            )
198
199            return {
200                "optimizer": fully_initialized_optimizer,
201                "lr_scheduler": {
202                    "scheduler": fully_initialized_lr_scheduler,
203                    "monitor": f"task_{self.task_id}/learning_curve/val/loss_cls",
204                    "interval": "epoch",
205                    "frequency": 1,
206                },
207            }
208
209        return {"optimizer": fully_initialized_optimizer}

The base class of continual learning algorithms.

CLAlgorithm( backbone: clarena.backbones.CLBackbone, heads: clarena.heads.HeadsTIL | clarena.heads.HeadsCIL | clarena.heads.HeadDIL, non_algorithmic_hparams: dict[str, typing.Any] = {}, **kwargs)
28    def __init__(
29        self,
30        backbone: CLBackbone,
31        heads: HeadsTIL | HeadsCIL | HeadDIL,
32        non_algorithmic_hparams: dict[str, Any] = {},
33        **kwargs,
34    ) -> None:
35        r"""
36        **Args:**
37        - **backbone** (`CLBackbone`): backbone network.
38        - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads.
39        - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility.
40        - **kwargs**: Reserved for multiple inheritance.
41        """
42        super().__init__()
43        self.save_hyperparameters(non_algorithmic_hparams)
44
45        # components
46        self.backbone: CLBackbone = backbone
47        r"""The backbone network."""
48        self.heads: HeadsTIL | HeadsCIL | HeadDIL = heads
49        r"""The output heads."""
50        self.optimizer_t: Optimizer
51        r"""Optimizer (partially initialized) for the current task `self.task_id`. Will be equipped with parameters in `configure_optimizers()`."""
52        self.lr_scheduler_t: LRScheduler | None
53        r"""Learning rate scheduler for the optimizer of the current task `self.task_id`. If `None`, no scheduler is used."""
54        self.criterion = nn.CrossEntropyLoss()
55        r"""Loss function between the output logits and the target labels. Default is cross-entropy loss."""
56
57        self.if_forward_func_return_logits_only: bool = False
58        r"""Whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information. Default is `False`."""
59
60        # task ID control
61        self.task_id: int
62        r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`."""
63        self.processed_task_ids: list[int] = []
64        r"""Task IDs that have been processed."""
65
66        CLAlgorithm.sanity_check(self)

Args:

  • backbone (CLBackbone): backbone network.
  • heads (HeadsTIL | HeadsCIL | HeadDIL): output heads.
  • non_algorithmic_hparams (dict[str, Any]): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this LightningModule object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from save_hyperparameters() method. This is useful for the experiment configuration and reproducibility.
  • kwargs: Reserved for multiple inheritance.

The backbone network.

optimizer_t: torch.optim.optimizer.Optimizer

Optimizer (partially initialized) for the current task self.task_id. Will be equipped with parameters in configure_optimizers().

lr_scheduler_t: torch.optim.lr_scheduler.LRScheduler | None

Learning rate scheduler for the optimizer of the current task self.task_id. If None, no scheduler is used.

criterion

Loss function between the output logits and the target labels. Default is cross-entropy loss.

if_forward_func_return_logits_only: bool

Whether the forward() method returns logits only. If False, it returns a dictionary containing logits and other information. Default is False.

task_id: int

Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to cl_dataset.num_tasks.

processed_task_ids: list[int]

Task IDs that have been processed.

def sanity_check(self) -> None:
68    def sanity_check(self) -> None:
69        r"""Sanity check."""
70
71        # check backbone and heads compatibility
72        if self.backbone.output_dim != self.heads.input_dim:
73            raise ValueError(
74                "The output_dim of the backbone must equal the input_dim of the CL heads."
75            )

Sanity check.

def setup_task_id( self, task_id: int, num_classes: int, optimizer: torch.optim.optimizer.Optimizer, lr_scheduler: torch.optim.lr_scheduler.LRScheduler | None) -> None:
 77    def setup_task_id(
 78        self,
 79        task_id: int,
 80        num_classes: int,
 81        optimizer: Optimizer,
 82        lr_scheduler: LRScheduler | None,
 83    ) -> None:
 84        r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called.
 85
 86        **Args:**
 87        - **task_id** (`int`): the target task ID.
 88        - **num_classes** (`int`): the number of classes in the task.
 89        - **optimizer** (`Optimizer`): the optimizer object (partially initialized) for the task.
 90        - **lr_scheduler** (`LRScheduler` | `None`): the learning rate scheduler for the optimizer. If `None`, no scheduler is used.
 91        """
 92        self.task_id = task_id
 93        self.processed_task_ids.append(task_id)
 94        self.backbone.setup_task_id(task_id=task_id)
 95        if isinstance(self.heads, HeadsTIL) or isinstance(self.heads, HeadsCIL):
 96            self.heads.setup_task_id(task_id, num_classes)
 97        elif isinstance(self.heads, HeadDIL) and not self.heads.if_head_setup():
 98            self.heads.setup_task(num_classes)
 99        self.optimizer_t = optimizer
100        self.lr_scheduler_t = lr_scheduler

Set up which task the CL experiment is on. This must be done before forward() method is called.

Args:

  • task_id (int): the target task ID.
  • num_classes (int): the number of classes in the task.
  • optimizer (Optimizer): the optimizer object (partially initialized) for the task.
  • lr_scheduler (LRScheduler | None): the learning rate scheduler for the optimizer. If None, no scheduler is used.
def get_test_task_id_from_dataloader_idx(self, dataloader_idx: int) -> int:
102    def get_test_task_id_from_dataloader_idx(self, dataloader_idx: int) -> int:
103        r"""Get the test task ID from the dataloader index.
104
105        **Args:**
106        - **dataloader_idx** (`int`): the dataloader index.
107
108        **Returns:**
109        - **test_task_id** (`int`): the test task ID.
110        """
111        dataset_test = self.trainer.datamodule.dataset_test
112        test_task_id = list(dataset_test.keys())[dataloader_idx]
113        return test_task_id

Get the test task ID from the dataloader index.

Args:

  • dataloader_idx (int): the dataloader index.

Returns:

  • test_task_id (int): the test task ID.
def set_forward_func_return_logits_only(self, forward_func_return_logits_only: bool) -> None:
115    def set_forward_func_return_logits_only(
116        self, forward_func_return_logits_only: bool
117    ) -> None:
118        r"""Set whether the `forward()` method returns logits only. This is useful for some CL algorithms that require the forward function to return logits only, such as FG-AdaHAT.
119
120        **Args:**
121        - **forward_func_return_logits_only** (`bool`): whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information.
122        """
123        self.if_forward_func_return_logits_only = forward_func_return_logits_only

Set whether the forward() method returns logits only. This is useful for some CL algorithms that require the forward function to return logits only, such as FG-AdaHAT.

Args:

  • forward_func_return_logits_only (bool): whether the forward() method returns logits only. If False, it returns a dictionary containing logits and other information.
def preceding_layer(self, layer_name: str) -> torch.nn.modules.module.Module | None:
125    def preceding_layer(self, layer_name: str) -> nn.Module | None:
126        r"""Get the preceding layer of the given layer (including backbone and output heads). If the given layer is the first layer, return `None`.
127
128        **Args:**
129        - **layer_name** (`str`): the name of the layer.
130
131        **Returns:**
132        - **preceding_layer** (`nn.Module` | `None`): the preceding layer.
133        """
134
135        if layer_name == "heads":
136            backbone_last_layer_name = self.backbone.weighted_layer_names[-1]
137            backbone_last_layer = self.backbone.get_layer_by_name(
138                backbone_last_layer_name
139            )
140            return backbone_last_layer
141        else:
142            preceding_layer_name = self.backbone.preceding_layer_name(layer_name)
143            preceding_layer = self.backbone.get_layer_by_name(preceding_layer_name)
144
145        return preceding_layer

Get the preceding layer of the given layer (including backbone and output heads). If the given layer is the first layer, return None.

Args:

  • layer_name (str): the name of the layer.

Returns:

  • preceding_layer (nn.Module | None): the preceding layer.
def next_layer(self, layer_name: str) -> torch.nn.modules.module.Module | None:
147    def next_layer(self, layer_name: str) -> nn.Module | None:
148        r"""Get the next layer of the given layer (including backbone and output heads). If the given layer is the last layer, return `None`.
149
150        **Args:**
151        - **layer_name** (`str`): the name of the layer.
152
153        **Returns:**
154        - **preceding_layer** (`nn.Module` | `None`): the next layer.
155        """
156
157        if layer_name == "heads":
158            return None
159        else:
160            next_layer_name = self.backbone.next_layer_name(layer_name)
161            if next_layer_name is not None:
162                next_layer = self.backbone.get_layer_by_name(next_layer_name)
163            else:
164                next_layer = self.heads.get_head(self.task_id)
165
166        return next_layer

Get the next layer of the given layer (including backbone and output heads). If the given layer is the last layer, return None.

Args:

  • layer_name (str): the name of the layer.

Returns:

  • preceding_layer (nn.Module | None): the next layer.
def forward( self, input: torch.Tensor, stage: str, task_id: int | None = None) -> torch.Tensor:
168    def forward(self, input: Tensor, stage: str, task_id: int | None = None) -> Tensor:
169        r"""The forward pass for data from task `task_id`. Note that it is nothing to do with `forward()` method in `nn.Module`. This definition provides a template that many CL algorithm including the vanilla Finetuning algorithm use. It works both for TIL and CIL.
170
171        **Args:**
172        - **input** (`Tensor`): the input tensor from data.
173        - **stage** (`str`): the stage of the forward pass; one of:
174            1. 'train': training stage.
175            2. 'validation': validation stage.
176            3. 'test': testing stage.
177        - **task_id** (`int`): the task ID where the data are from. If stage is 'train' or `validation`, it is usually from the current task `self.task_id`. If stage is 'test', it could be from any seen task. In TIL, the task IDs of test data are provided thus this argument can be used. In CIL, they are not provided, so it is just a placeholder for API consistence but never used, and best practices are not to provide this argument and leave it as the default value.
178
179        **Returns:**
180        - **logits** (`Tensor`): the output logits tensor.
181        - **activations** (`dict[str, Tensor]`): the hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name, value (`Tensor`) is the hidden feature tensor. This is used for the continual learning algorithms that need to use the hidden features for various purposes.
182        """
183        feature, activations = self.backbone(input, stage=stage, task_id=task_id)
184        logits = self.heads(feature, task_id)
185        return (
186            logits if self.if_forward_func_return_logits_only else (logits, activations)
187        )

The forward pass for data from task task_id. Note that it is nothing to do with forward() method in nn.Module. This definition provides a template that many CL algorithm including the vanilla Finetuning algorithm use. It works both for TIL and CIL.

Args:

  • input (Tensor): the input tensor from data.
  • stage (str): the stage of the forward pass; one of:
    1. 'train': training stage.
    2. 'validation': validation stage.
    3. 'test': testing stage.
  • task_id (int): the task ID where the data are from. If stage is 'train' or validation, it is usually from the current task self.task_id. If stage is 'test', it could be from any seen task. In TIL, the task IDs of test data are provided thus this argument can be used. In CIL, they are not provided, so it is just a placeholder for API consistence but never used, and best practices are not to provide this argument and leave it as the default value.

Returns:

  • logits (Tensor): the output logits tensor.
  • activations (dict[str, Tensor]): the hidden features (after activation) in each weighted layer. Key (str) is the weighted layer name, value (Tensor) is the hidden feature tensor. This is used for the continual learning algorithms that need to use the hidden features for various purposes.
def configure_optimizers(self) -> torch.optim.optimizer.Optimizer:
189    def configure_optimizers(self) -> Optimizer:
190        r"""Configure optimizer hooks by Lightning. See [Lightning docs](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers) for more details."""
191        # finish partially initialized optimizer by specifying model parameters. The `parameters()` method of this `CLAlgorithm` (inherited from `LightningModule`) returns both backbone and heads parameters
192        fully_initialized_optimizer = self.optimizer_t(params=self.parameters())
193
194        if self.lr_scheduler_t:
195            fully_initialized_lr_scheduler = self.lr_scheduler_t(
196                optimizer=fully_initialized_optimizer
197            )
198
199            return {
200                "optimizer": fully_initialized_optimizer,
201                "lr_scheduler": {
202                    "scheduler": fully_initialized_lr_scheduler,
203                    "monitor": f"task_{self.task_id}/learning_curve/val/loss_cls",
204                    "interval": "epoch",
205                    "frequency": 1,
206                },
207            }
208
209        return {"optimizer": fully_initialized_optimizer}

Configure optimizer hooks by Lightning. See Lightning docs for more details.

class UnlearnableCLAlgorithm(clarena.cl_algorithms.CLAlgorithm):
212class UnlearnableCLAlgorithm(CLAlgorithm):
213    r"""The base class of unlearnable continual learning algorithms."""
214
215    def __init__(
216        self,
217        backbone: CLBackbone,
218        heads: HeadsTIL | HeadsCIL | HeadDIL,
219        non_algorithmic_hparams: dict[str, Any] = {},
220        disable_unlearning: bool = False,
221        **kwargs,
222    ) -> None:
223        r"""
224        **Args:**
225        - **backbone** (`CLBackbone`): backbone network.
226        - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads.
227        - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility.
228        - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`.
229        - **kwargs**: Reserved for multiple inheritance.
230        """
231        super().__init__(
232            backbone=backbone,
233            heads=heads,
234            non_algorithmic_hparams=non_algorithmic_hparams,
235            **kwargs,
236        )
237
238        self.disable_unlearning: bool = disable_unlearning
239        r"""Whether to disable unlearning. This is used in reference experiments following continual learning pipeline."""
240
241        if not self.disable_unlearning:
242            self.unlearning_task_ids: list[int]
243            r"""The list of task IDs that are requested to be unlearned after training `self.task_id`."""
244
245            self.unlearned_task_ids: set[int] = set()
246            r"""The list of task IDs that have been unlearned in the experiment."""
247
248            self.unlearnable_task_ids: list[int]
249            r"""The list of task IDs that are unlearnable at the current `self.task_id`."""
250
251            self.task_ids_just_no_longer_unlearnable: list[int]
252            r"""The list of task IDs that are just no longer unlearnable at the current `self.task_id`."""
253
254            UnlearnableCLAlgorithm.sanity_check(self)
255
256    def sanity_check(self) -> None:
257        r"""Sanity check."""
258
259    def aggregated_backbone_output(self, input: Tensor) -> Tensor:
260        r"""Get the aggregated backbone output for the input data. All parts of backbones should be aggregated together.
261
262        This output feature is used for measuring unlearning metrics, such as Distribution Distance (DD). An aggregated output involving every part of the backbone is needed to ensure the fairness of the metric.
263
264        **Args:**
265        - **input** (`Tensor`): the input tensor from data.
266
267        **Returns:**
268        - **output** (`Tensor`): the aggregated backbone output tensor.
269        """
270        feature = 0
271        forward_params = inspect.signature(self.backbone.forward).parameters
272        supports_test_task_id = (
273            "test_task_id" in forward_params
274            or any(
275                p.kind == inspect.Parameter.VAR_KEYWORD
276                for p in forward_params.values()
277            )
278        )
279
280        for i in self.processed_task_ids:
281            if supports_test_task_id:
282                feature_i = self.backbone(
283                    input, stage="unlearning_test", test_task_id=i
284                )[0]
285            else:
286                feature_i = self.backbone(input, stage="unlearning_test")[0]
287            feature += feature_i
288        feature = feature / len(self.processed_task_ids)
289
290        return feature

The base class of unlearnable continual learning algorithms.

UnlearnableCLAlgorithm( backbone: clarena.backbones.CLBackbone, heads: clarena.heads.HeadsTIL | clarena.heads.HeadsCIL | clarena.heads.HeadDIL, non_algorithmic_hparams: dict[str, typing.Any] = {}, disable_unlearning: bool = False, **kwargs)
215    def __init__(
216        self,
217        backbone: CLBackbone,
218        heads: HeadsTIL | HeadsCIL | HeadDIL,
219        non_algorithmic_hparams: dict[str, Any] = {},
220        disable_unlearning: bool = False,
221        **kwargs,
222    ) -> None:
223        r"""
224        **Args:**
225        - **backbone** (`CLBackbone`): backbone network.
226        - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads.
227        - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility.
228        - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`.
229        - **kwargs**: Reserved for multiple inheritance.
230        """
231        super().__init__(
232            backbone=backbone,
233            heads=heads,
234            non_algorithmic_hparams=non_algorithmic_hparams,
235            **kwargs,
236        )
237
238        self.disable_unlearning: bool = disable_unlearning
239        r"""Whether to disable unlearning. This is used in reference experiments following continual learning pipeline."""
240
241        if not self.disable_unlearning:
242            self.unlearning_task_ids: list[int]
243            r"""The list of task IDs that are requested to be unlearned after training `self.task_id`."""
244
245            self.unlearned_task_ids: set[int] = set()
246            r"""The list of task IDs that have been unlearned in the experiment."""
247
248            self.unlearnable_task_ids: list[int]
249            r"""The list of task IDs that are unlearnable at the current `self.task_id`."""
250
251            self.task_ids_just_no_longer_unlearnable: list[int]
252            r"""The list of task IDs that are just no longer unlearnable at the current `self.task_id`."""
253
254            UnlearnableCLAlgorithm.sanity_check(self)

Args:

  • backbone (CLBackbone): backbone network.
  • heads (HeadsTIL | HeadsCIL | HeadDIL): output heads.
  • non_algorithmic_hparams (dict[str, Any]): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this LightningModule object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from save_hyperparameters() method. This is useful for the experiment configuration and reproducibility.
  • disable_unlearning (bool): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is False.
  • kwargs: Reserved for multiple inheritance.
disable_unlearning: bool

Whether to disable unlearning. This is used in reference experiments following continual learning pipeline.

def sanity_check(self) -> None:
256    def sanity_check(self) -> None:
257        r"""Sanity check."""

Sanity check.

def aggregated_backbone_output(self, input: torch.Tensor) -> torch.Tensor:
259    def aggregated_backbone_output(self, input: Tensor) -> Tensor:
260        r"""Get the aggregated backbone output for the input data. All parts of backbones should be aggregated together.
261
262        This output feature is used for measuring unlearning metrics, such as Distribution Distance (DD). An aggregated output involving every part of the backbone is needed to ensure the fairness of the metric.
263
264        **Args:**
265        - **input** (`Tensor`): the input tensor from data.
266
267        **Returns:**
268        - **output** (`Tensor`): the aggregated backbone output tensor.
269        """
270        feature = 0
271        forward_params = inspect.signature(self.backbone.forward).parameters
272        supports_test_task_id = (
273            "test_task_id" in forward_params
274            or any(
275                p.kind == inspect.Parameter.VAR_KEYWORD
276                for p in forward_params.values()
277            )
278        )
279
280        for i in self.processed_task_ids:
281            if supports_test_task_id:
282                feature_i = self.backbone(
283                    input, stage="unlearning_test", test_task_id=i
284                )[0]
285            else:
286                feature_i = self.backbone(input, stage="unlearning_test")[0]
287            feature += feature_i
288        feature = feature / len(self.processed_task_ids)
289
290        return feature

Get the aggregated backbone output for the input data. All parts of backbones should be aggregated together.

This output feature is used for measuring unlearning metrics, such as Distribution Distance (DD). An aggregated output involving every part of the backbone is needed to ensure the fairness of the metric.

Args:

  • input (Tensor): the input tensor from data.

Returns:

  • output (Tensor): the aggregated backbone output tensor.
class AmnesiacCLAlgorithm(clarena.cl_algorithms.UnlearnableCLAlgorithm):
293class AmnesiacCLAlgorithm(UnlearnableCLAlgorithm):
294    r"""The base class of Amnesiac continual learning algorithms.
295
296    The Amnesiac continual learning algorithm refers to the corresponding continual learning model that the Amnesiac continual unlearning algorithm requires. The Amnesiac continual unlearning algorithm refers to update deletion operation that directly delete the parameter updates during a task's training. This is inspired by [AmnesiacML](https://arxiv.org/abs/2010.10981) in machine unlearning. In detail, the task-wise parameter updates are stored:
297
298    $$\theta_{l,ij}^{(t)} = \theta_{l,ij}^{(0)} + \sum_{\tau=1}^{t} \Delta \theta_{l,ij}^{(\tau)}$$
299
300    To unlearn $u(t)$, delete these updates:
301
302    $$\theta_{l,ij}^{(t-u(t))} = \theta_{l,ij}^{(t)} - \sum_{\tau\in u(t)}\Delta \theta_{l,ij}^{(\tau)}$$
303
304    It is mainly used in AmnesaicHAT, but can also be used in constructing other vanilla baseline continual unlearning algorithms based on different continual learning algorithms.
305    """
306
307    def __init__(
308        self,
309        backbone: CLBackbone,
310        heads: HeadsTIL | HeadsCIL | HeadDIL,
311        non_algorithmic_hparams: dict[str, Any] = {},
312        disable_unlearning: bool = False,
313        **kwargs,
314    ) -> None:
315        r"""
316        **Args:**
317        - **backbone** (`CLBackbone`): backbone network.
318        - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads.
319        - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility.
320        - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`.
321        - **kwargs**: Reserved for multiple inheritance.
322        """
323        super().__init__(
324            backbone=backbone,
325            heads=heads,
326            non_algorithmic_hparams=non_algorithmic_hparams,
327            disable_unlearning=disable_unlearning,
328            **kwargs,
329        )
330
331        self.original_backbone_state_dict: dict[str, Tensor] = deepcopy(
332            backbone.state_dict()
333        )
334        r"""Store the original backbone network state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """
335
336        self.original_heads_state_dict: dict[str, Tensor] = deepcopy(heads.state_dict())
337        r"""Store the original heads state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """
338
339        self.parameters_task_update: dict[int, dict[str, Tensor]] = {}
340        r"""Store the parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """
341
342        self.parameters_task_update_heads: dict[int, dict[str, Tensor]] = {}
343        r"""Store the heads parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """
344
345        self.state_dict_task_start: dict[str, Tensor]
346        r"""Store the backbone state dict at the start of training each task. """
347
348        self.heads_state_dict_task_start: dict[str, Tensor]
349        r"""Store the heads state dict at the start of training each task. """
350
351    def _record_new_head_parameters(self) -> None:
352        r"""Record the initial parameters for any newly created heads. This applies to TIL and CIL settings where new heads are created for new tasks."""
353        current_heads_state_dict = self.heads.state_dict()
354        for param_name, param_tensor in current_heads_state_dict.items():
355            if param_name not in self.original_heads_state_dict:
356                self.original_heads_state_dict[param_name] = deepcopy(param_tensor)
357
358    def setup_task_id(
359        self,
360        task_id: int,
361        num_classes: int,
362        optimizer: Optimizer,
363        lr_scheduler: LRScheduler | None,
364    ) -> None:
365        r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called."""
366        super().setup_task_id(
367            task_id=task_id,
368            num_classes=num_classes,
369            optimizer=optimizer,
370            lr_scheduler=lr_scheduler,
371        )
372
373        if not self.disable_unlearning:
374            # record initial parameters of any newly created heads for later reconstruction
375            self._record_new_head_parameters()
376
377    def on_train_start(self):
378        r"""Store the current state dict at the start of training."""
379        super().on_train_start()
380
381        if not self.disable_unlearning:
382            self.state_dict_task_start = deepcopy(self.backbone.state_dict())
383            self.heads_state_dict_task_start = deepcopy(self.heads.state_dict())
384
385    def on_train_end(self):
386        r"""Store the parameters update of a task at the end of its training."""
387        super().on_train_end()
388
389        if not self.disable_unlearning:
390            current_state_dict = self.backbone.state_dict()
391            parameters_task_t_update = {}
392
393            # compute the parameters update for the current task
394            for layer_name, current_param_tensor in current_state_dict.items():
395                if layer_name.startswith("backup_backbones."):
396                    continue
397                parameters_task_t_update[layer_name] = (
398                    current_param_tensor - self.state_dict_task_start[layer_name]
399                )
400
401            # store the parameters update for the current task
402            self.parameters_task_update[self.task_id] = parameters_task_t_update
403
404            # compute the heads parameters update for the current task
405            current_heads_state_dict = self.heads.state_dict()
406            parameters_task_t_update_heads = {}
407            for param_name, current_param_tensor in current_heads_state_dict.items():
408                if param_name not in self.heads_state_dict_task_start:
409                    pylogger.warning(
410                        "Head parameter %s was not found in task start state dict.",
411                        param_name,
412                    )
413                    continue
414                parameters_task_t_update_heads[param_name] = (
415                    current_param_tensor - self.heads_state_dict_task_start[param_name]
416                )
417
418            # store the heads parameters update for the current task
419            self.parameters_task_update_heads[self.task_id] = (
420                parameters_task_t_update_heads
421            )

The base class of Amnesiac continual learning algorithms.

The Amnesiac continual learning algorithm refers to the corresponding continual learning model that the Amnesiac continual unlearning algorithm requires. The Amnesiac continual unlearning algorithm refers to update deletion operation that directly delete the parameter updates during a task's training. This is inspired by AmnesiacML in machine unlearning. In detail, the task-wise parameter updates are stored:

$$\theta_{l,ij}^{(t)} = \theta_{l,ij}^{(0)} + \sum_{\tau=1}^{t} \Delta \theta_{l,ij}^{(\tau)}$$

To unlearn $u(t)$, delete these updates:

$$\theta_{l,ij}^{(t-u(t))} = \theta_{l,ij}^{(t)} - \sum_{\tau\in u(t)}\Delta \theta_{l,ij}^{(\tau)}$$

It is mainly used in AmnesaicHAT, but can also be used in constructing other vanilla baseline continual unlearning algorithms based on different continual learning algorithms.

AmnesiacCLAlgorithm( backbone: clarena.backbones.CLBackbone, heads: clarena.heads.HeadsTIL | clarena.heads.HeadsCIL | clarena.heads.HeadDIL, non_algorithmic_hparams: dict[str, typing.Any] = {}, disable_unlearning: bool = False, **kwargs)
307    def __init__(
308        self,
309        backbone: CLBackbone,
310        heads: HeadsTIL | HeadsCIL | HeadDIL,
311        non_algorithmic_hparams: dict[str, Any] = {},
312        disable_unlearning: bool = False,
313        **kwargs,
314    ) -> None:
315        r"""
316        **Args:**
317        - **backbone** (`CLBackbone`): backbone network.
318        - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads.
319        - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility.
320        - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`.
321        - **kwargs**: Reserved for multiple inheritance.
322        """
323        super().__init__(
324            backbone=backbone,
325            heads=heads,
326            non_algorithmic_hparams=non_algorithmic_hparams,
327            disable_unlearning=disable_unlearning,
328            **kwargs,
329        )
330
331        self.original_backbone_state_dict: dict[str, Tensor] = deepcopy(
332            backbone.state_dict()
333        )
334        r"""Store the original backbone network state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """
335
336        self.original_heads_state_dict: dict[str, Tensor] = deepcopy(heads.state_dict())
337        r"""Store the original heads state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """
338
339        self.parameters_task_update: dict[int, dict[str, Tensor]] = {}
340        r"""Store the parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """
341
342        self.parameters_task_update_heads: dict[int, dict[str, Tensor]] = {}
343        r"""Store the heads parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """
344
345        self.state_dict_task_start: dict[str, Tensor]
346        r"""Store the backbone state dict at the start of training each task. """
347
348        self.heads_state_dict_task_start: dict[str, Tensor]
349        r"""Store the heads state dict at the start of training each task. """

Args:

  • backbone (CLBackbone): backbone network.
  • heads (HeadsTIL | HeadsCIL | HeadDIL): output heads.
  • non_algorithmic_hparams (dict[str, Any]): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this LightningModule object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from save_hyperparameters() method. This is useful for the experiment configuration and reproducibility.
  • disable_unlearning (bool): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is False.
  • kwargs: Reserved for multiple inheritance.
original_backbone_state_dict: dict[str, torch.Tensor]

Store the original backbone network state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer.

original_heads_state_dict: dict[str, torch.Tensor]

Store the original heads state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head.

parameters_task_update: dict[int, dict[str, torch.Tensor]]

Store the parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer.

parameters_task_update_heads: dict[int, dict[str, torch.Tensor]]

Store the heads parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head.

state_dict_task_start: dict[str, torch.Tensor]

Store the backbone state dict at the start of training each task.

heads_state_dict_task_start: dict[str, torch.Tensor]

Store the heads state dict at the start of training each task.

def setup_task_id( self, task_id: int, num_classes: int, optimizer: torch.optim.optimizer.Optimizer, lr_scheduler: torch.optim.lr_scheduler.LRScheduler | None) -> None:
358    def setup_task_id(
359        self,
360        task_id: int,
361        num_classes: int,
362        optimizer: Optimizer,
363        lr_scheduler: LRScheduler | None,
364    ) -> None:
365        r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called."""
366        super().setup_task_id(
367            task_id=task_id,
368            num_classes=num_classes,
369            optimizer=optimizer,
370            lr_scheduler=lr_scheduler,
371        )
372
373        if not self.disable_unlearning:
374            # record initial parameters of any newly created heads for later reconstruction
375            self._record_new_head_parameters()

Set up which task the CL experiment is on. This must be done before forward() method is called.

def on_train_start(self):
377    def on_train_start(self):
378        r"""Store the current state dict at the start of training."""
379        super().on_train_start()
380
381        if not self.disable_unlearning:
382            self.state_dict_task_start = deepcopy(self.backbone.state_dict())
383            self.heads_state_dict_task_start = deepcopy(self.heads.state_dict())

Store the current state dict at the start of training.

def on_train_end(self):
385    def on_train_end(self):
386        r"""Store the parameters update of a task at the end of its training."""
387        super().on_train_end()
388
389        if not self.disable_unlearning:
390            current_state_dict = self.backbone.state_dict()
391            parameters_task_t_update = {}
392
393            # compute the parameters update for the current task
394            for layer_name, current_param_tensor in current_state_dict.items():
395                if layer_name.startswith("backup_backbones."):
396                    continue
397                parameters_task_t_update[layer_name] = (
398                    current_param_tensor - self.state_dict_task_start[layer_name]
399                )
400
401            # store the parameters update for the current task
402            self.parameters_task_update[self.task_id] = parameters_task_t_update
403
404            # compute the heads parameters update for the current task
405            current_heads_state_dict = self.heads.state_dict()
406            parameters_task_t_update_heads = {}
407            for param_name, current_param_tensor in current_heads_state_dict.items():
408                if param_name not in self.heads_state_dict_task_start:
409                    pylogger.warning(
410                        "Head parameter %s was not found in task start state dict.",
411                        param_name,
412                    )
413                    continue
414                parameters_task_t_update_heads[param_name] = (
415                    current_param_tensor - self.heads_state_dict_task_start[param_name]
416                )
417
418            # store the heads parameters update for the current task
419            self.parameters_task_update_heads[self.task_id] = (
420                parameters_task_t_update_heads
421            )

Store the parameters update of a task at the end of its training.