clarena.cl_algorithms
Continual Learning Algorithms
This submodule provides the continual learning algorithms in CLArena.
Here are the base classes for CL algorithms, which inherit from PyTorch Lightning LightningModule:
CLAlgorithm: the base class for all continual learning algorithms.UnlearnableCLAlgorithm: the base class for unlearnable continual learning algorithms.AmnesiacCULAlgorithm: the base class for Amnesiac continual learning algorithms.
Please note that this is an API documentation. Please refer to the main documentation pages for more information about and how to configure and implement CL algorithms:
1r""" 2 3# Continual Learning Algorithms 4 5This submodule provides the **continual learning algorithms** in CLArena. 6 7Here are the base classes for CL algorithms, which inherit from PyTorch Lightning `LightningModule`: 8 9- `CLAlgorithm`: the base class for all continual learning algorithms. 10 - `UnlearnableCLAlgorithm`: the base class for unlearnable continual learning algorithms. 11 - `AmnesiacCULAlgorithm`: the base class for Amnesiac continual learning algorithms. 12 13Please note that this is an API documentation. Please refer to the main documentation pages for more information about and how to configure and implement CL algorithms: 14 15- [**Configure CL Algorithm**](https://pengxiang-wang.com/projects/continual-learning-arena/docs/components/cl-algorithm) 16- [**Implement Custom CL Algorithm**](https://pengxiang-wang.com/projects/continual-learning-arena/docs/custom-implementation/cl-algorithm) 17- [**A Beginners' Guide to Continual Learning (Methodology Overview)**](https://pengxiang-wang.com/posts/continual-learning-beginners-guide#sec-methodology) 18 19 20""" 21 22from .base import CLAlgorithm, UnlearnableCLAlgorithm, AmnesiacCLAlgorithm 23 24# finetuning first 25from .finetuning import Finetuning, AmnesiacFinetuning 26from .independent import Independent, UnlearnableIndependent 27from .fix import Fix 28from .random import Random 29 30from .lwf import LwF, AmnesiacLwF 31from .ewc import EWC, AmnesiacEWC 32from .der import DER, DERpp, AmnesiacDER, AmnesiacDERpp 33from .clpu_derpp import CLPUDERpp 34from .cbp import CBP 35 36from .hat import HAT 37from .adahat import AdaHAT 38from .fgadahat import FGAdaHAT 39from .amnesiac_hat import AmnesiacHAT 40from .wsn import WSN 41 42# from .nispa import NISPA 43 44 45__all__ = [ 46 "CLAlgorithm", 47 "UnlearnableCLAlgorithm", 48 "AmnesiacCLAlgorithm", 49 "regularizers", 50 "finetuning", 51 "independent", 52 "fix", 53 "random", 54 "lwf", 55 "ewc", 56 "der", 57 "cbp", 58 "hat", 59 "adahat", 60 "fgadahat", 61 "amnesiac_hat", 62 "wsn", 63 # "nispa", 64]
25class CLAlgorithm(LightningModule): 26 r"""The base class of continual learning algorithms.""" 27 28 def __init__( 29 self, 30 backbone: CLBackbone, 31 heads: HeadsTIL | HeadsCIL | HeadDIL, 32 non_algorithmic_hparams: dict[str, Any] = {}, 33 **kwargs, 34 ) -> None: 35 r""" 36 **Args:** 37 - **backbone** (`CLBackbone`): backbone network. 38 - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads. 39 - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility. 40 - **kwargs**: Reserved for multiple inheritance. 41 """ 42 super().__init__() 43 self.save_hyperparameters(non_algorithmic_hparams) 44 45 # components 46 self.backbone: CLBackbone = backbone 47 r"""The backbone network.""" 48 self.heads: HeadsTIL | HeadsCIL | HeadDIL = heads 49 r"""The output heads.""" 50 self.optimizer_t: Optimizer 51 r"""Optimizer (partially initialized) for the current task `self.task_id`. Will be equipped with parameters in `configure_optimizers()`.""" 52 self.lr_scheduler_t: LRScheduler | None 53 r"""Learning rate scheduler for the optimizer of the current task `self.task_id`. If `None`, no scheduler is used.""" 54 self.criterion = nn.CrossEntropyLoss() 55 r"""Loss function between the output logits and the target labels. Default is cross-entropy loss.""" 56 57 self.if_forward_func_return_logits_only: bool = False 58 r"""Whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information. Default is `False`.""" 59 60 # task ID control 61 self.task_id: int 62 r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`.""" 63 self.processed_task_ids: list[int] = [] 64 r"""Task IDs that have been processed.""" 65 66 CLAlgorithm.sanity_check(self) 67 68 def sanity_check(self) -> None: 69 r"""Sanity check.""" 70 71 # check backbone and heads compatibility 72 if self.backbone.output_dim != self.heads.input_dim: 73 raise ValueError( 74 "The output_dim of the backbone must equal the input_dim of the CL heads." 75 ) 76 77 def setup_task_id( 78 self, 79 task_id: int, 80 num_classes: int, 81 optimizer: Optimizer, 82 lr_scheduler: LRScheduler | None, 83 ) -> None: 84 r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called. 85 86 **Args:** 87 - **task_id** (`int`): the target task ID. 88 - **num_classes** (`int`): the number of classes in the task. 89 - **optimizer** (`Optimizer`): the optimizer object (partially initialized) for the task. 90 - **lr_scheduler** (`LRScheduler` | `None`): the learning rate scheduler for the optimizer. If `None`, no scheduler is used. 91 """ 92 self.task_id = task_id 93 self.processed_task_ids.append(task_id) 94 self.backbone.setup_task_id(task_id=task_id) 95 if isinstance(self.heads, HeadsTIL) or isinstance(self.heads, HeadsCIL): 96 self.heads.setup_task_id(task_id, num_classes) 97 elif isinstance(self.heads, HeadDIL) and not self.heads.if_head_setup(): 98 self.heads.setup_task(num_classes) 99 self.optimizer_t = optimizer 100 self.lr_scheduler_t = lr_scheduler 101 102 def get_test_task_id_from_dataloader_idx(self, dataloader_idx: int) -> int: 103 r"""Get the test task ID from the dataloader index. 104 105 **Args:** 106 - **dataloader_idx** (`int`): the dataloader index. 107 108 **Returns:** 109 - **test_task_id** (`int`): the test task ID. 110 """ 111 dataset_test = self.trainer.datamodule.dataset_test 112 test_task_id = list(dataset_test.keys())[dataloader_idx] 113 return test_task_id 114 115 def set_forward_func_return_logits_only( 116 self, forward_func_return_logits_only: bool 117 ) -> None: 118 r"""Set whether the `forward()` method returns logits only. This is useful for some CL algorithms that require the forward function to return logits only, such as FG-AdaHAT. 119 120 **Args:** 121 - **forward_func_return_logits_only** (`bool`): whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information. 122 """ 123 self.if_forward_func_return_logits_only = forward_func_return_logits_only 124 125 def preceding_layer(self, layer_name: str) -> nn.Module | None: 126 r"""Get the preceding layer of the given layer (including backbone and output heads). If the given layer is the first layer, return `None`. 127 128 **Args:** 129 - **layer_name** (`str`): the name of the layer. 130 131 **Returns:** 132 - **preceding_layer** (`nn.Module` | `None`): the preceding layer. 133 """ 134 135 if layer_name == "heads": 136 backbone_last_layer_name = self.backbone.weighted_layer_names[-1] 137 backbone_last_layer = self.backbone.get_layer_by_name( 138 backbone_last_layer_name 139 ) 140 return backbone_last_layer 141 else: 142 preceding_layer_name = self.backbone.preceding_layer_name(layer_name) 143 preceding_layer = self.backbone.get_layer_by_name(preceding_layer_name) 144 145 return preceding_layer 146 147 def next_layer(self, layer_name: str) -> nn.Module | None: 148 r"""Get the next layer of the given layer (including backbone and output heads). If the given layer is the last layer, return `None`. 149 150 **Args:** 151 - **layer_name** (`str`): the name of the layer. 152 153 **Returns:** 154 - **preceding_layer** (`nn.Module` | `None`): the next layer. 155 """ 156 157 if layer_name == "heads": 158 return None 159 else: 160 next_layer_name = self.backbone.next_layer_name(layer_name) 161 if next_layer_name is not None: 162 next_layer = self.backbone.get_layer_by_name(next_layer_name) 163 else: 164 next_layer = self.heads.get_head(self.task_id) 165 166 return next_layer 167 168 def forward(self, input: Tensor, stage: str, task_id: int | None = None) -> Tensor: 169 r"""The forward pass for data from task `task_id`. Note that it is nothing to do with `forward()` method in `nn.Module`. This definition provides a template that many CL algorithm including the vanilla Finetuning algorithm use. It works both for TIL and CIL. 170 171 **Args:** 172 - **input** (`Tensor`): the input tensor from data. 173 - **stage** (`str`): the stage of the forward pass; one of: 174 1. 'train': training stage. 175 2. 'validation': validation stage. 176 3. 'test': testing stage. 177 - **task_id** (`int`): the task ID where the data are from. If stage is 'train' or `validation`, it is usually from the current task `self.task_id`. If stage is 'test', it could be from any seen task. In TIL, the task IDs of test data are provided thus this argument can be used. In CIL, they are not provided, so it is just a placeholder for API consistence but never used, and best practices are not to provide this argument and leave it as the default value. 178 179 **Returns:** 180 - **logits** (`Tensor`): the output logits tensor. 181 - **activations** (`dict[str, Tensor]`): the hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name, value (`Tensor`) is the hidden feature tensor. This is used for the continual learning algorithms that need to use the hidden features for various purposes. 182 """ 183 feature, activations = self.backbone(input, stage=stage, task_id=task_id) 184 logits = self.heads(feature, task_id) 185 return ( 186 logits if self.if_forward_func_return_logits_only else (logits, activations) 187 ) 188 189 def configure_optimizers(self) -> Optimizer: 190 r"""Configure optimizer hooks by Lightning. See [Lightning docs](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers) for more details.""" 191 # finish partially initialized optimizer by specifying model parameters. The `parameters()` method of this `CLAlgorithm` (inherited from `LightningModule`) returns both backbone and heads parameters 192 fully_initialized_optimizer = self.optimizer_t(params=self.parameters()) 193 194 if self.lr_scheduler_t: 195 fully_initialized_lr_scheduler = self.lr_scheduler_t( 196 optimizer=fully_initialized_optimizer 197 ) 198 199 return { 200 "optimizer": fully_initialized_optimizer, 201 "lr_scheduler": { 202 "scheduler": fully_initialized_lr_scheduler, 203 "monitor": f"task_{self.task_id}/learning_curve/val/loss_cls", 204 "interval": "epoch", 205 "frequency": 1, 206 }, 207 } 208 209 return {"optimizer": fully_initialized_optimizer}
The base class of continual learning algorithms.
28 def __init__( 29 self, 30 backbone: CLBackbone, 31 heads: HeadsTIL | HeadsCIL | HeadDIL, 32 non_algorithmic_hparams: dict[str, Any] = {}, 33 **kwargs, 34 ) -> None: 35 r""" 36 **Args:** 37 - **backbone** (`CLBackbone`): backbone network. 38 - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads. 39 - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility. 40 - **kwargs**: Reserved for multiple inheritance. 41 """ 42 super().__init__() 43 self.save_hyperparameters(non_algorithmic_hparams) 44 45 # components 46 self.backbone: CLBackbone = backbone 47 r"""The backbone network.""" 48 self.heads: HeadsTIL | HeadsCIL | HeadDIL = heads 49 r"""The output heads.""" 50 self.optimizer_t: Optimizer 51 r"""Optimizer (partially initialized) for the current task `self.task_id`. Will be equipped with parameters in `configure_optimizers()`.""" 52 self.lr_scheduler_t: LRScheduler | None 53 r"""Learning rate scheduler for the optimizer of the current task `self.task_id`. If `None`, no scheduler is used.""" 54 self.criterion = nn.CrossEntropyLoss() 55 r"""Loss function between the output logits and the target labels. Default is cross-entropy loss.""" 56 57 self.if_forward_func_return_logits_only: bool = False 58 r"""Whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information. Default is `False`.""" 59 60 # task ID control 61 self.task_id: int 62 r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`.""" 63 self.processed_task_ids: list[int] = [] 64 r"""Task IDs that have been processed.""" 65 66 CLAlgorithm.sanity_check(self)
Args:
- backbone (
CLBackbone): backbone network. - heads (
HeadsTIL|HeadsCIL|HeadDIL): output heads. - non_algorithmic_hparams (
dict[str, Any]): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to thisLightningModuleobject from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs fromsave_hyperparameters()method. This is useful for the experiment configuration and reproducibility. - kwargs: Reserved for multiple inheritance.
Optimizer (partially initialized) for the current task self.task_id. Will be equipped with parameters in configure_optimizers().
Learning rate scheduler for the optimizer of the current task self.task_id. If None, no scheduler is used.
Loss function between the output logits and the target labels. Default is cross-entropy loss.
Whether the forward() method returns logits only. If False, it returns a dictionary containing logits and other information. Default is False.
Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to cl_dataset.num_tasks.
68 def sanity_check(self) -> None: 69 r"""Sanity check.""" 70 71 # check backbone and heads compatibility 72 if self.backbone.output_dim != self.heads.input_dim: 73 raise ValueError( 74 "The output_dim of the backbone must equal the input_dim of the CL heads." 75 )
Sanity check.
77 def setup_task_id( 78 self, 79 task_id: int, 80 num_classes: int, 81 optimizer: Optimizer, 82 lr_scheduler: LRScheduler | None, 83 ) -> None: 84 r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called. 85 86 **Args:** 87 - **task_id** (`int`): the target task ID. 88 - **num_classes** (`int`): the number of classes in the task. 89 - **optimizer** (`Optimizer`): the optimizer object (partially initialized) for the task. 90 - **lr_scheduler** (`LRScheduler` | `None`): the learning rate scheduler for the optimizer. If `None`, no scheduler is used. 91 """ 92 self.task_id = task_id 93 self.processed_task_ids.append(task_id) 94 self.backbone.setup_task_id(task_id=task_id) 95 if isinstance(self.heads, HeadsTIL) or isinstance(self.heads, HeadsCIL): 96 self.heads.setup_task_id(task_id, num_classes) 97 elif isinstance(self.heads, HeadDIL) and not self.heads.if_head_setup(): 98 self.heads.setup_task(num_classes) 99 self.optimizer_t = optimizer 100 self.lr_scheduler_t = lr_scheduler
Set up which task the CL experiment is on. This must be done before forward() method is called.
Args:
- task_id (
int): the target task ID. - num_classes (
int): the number of classes in the task. - optimizer (
Optimizer): the optimizer object (partially initialized) for the task. - lr_scheduler (
LRScheduler|None): the learning rate scheduler for the optimizer. IfNone, no scheduler is used.
102 def get_test_task_id_from_dataloader_idx(self, dataloader_idx: int) -> int: 103 r"""Get the test task ID from the dataloader index. 104 105 **Args:** 106 - **dataloader_idx** (`int`): the dataloader index. 107 108 **Returns:** 109 - **test_task_id** (`int`): the test task ID. 110 """ 111 dataset_test = self.trainer.datamodule.dataset_test 112 test_task_id = list(dataset_test.keys())[dataloader_idx] 113 return test_task_id
Get the test task ID from the dataloader index.
Args:
- dataloader_idx (
int): the dataloader index.
Returns:
- test_task_id (
int): the test task ID.
115 def set_forward_func_return_logits_only( 116 self, forward_func_return_logits_only: bool 117 ) -> None: 118 r"""Set whether the `forward()` method returns logits only. This is useful for some CL algorithms that require the forward function to return logits only, such as FG-AdaHAT. 119 120 **Args:** 121 - **forward_func_return_logits_only** (`bool`): whether the `forward()` method returns logits only. If `False`, it returns a dictionary containing logits and other information. 122 """ 123 self.if_forward_func_return_logits_only = forward_func_return_logits_only
Set whether the forward() method returns logits only. This is useful for some CL algorithms that require the forward function to return logits only, such as FG-AdaHAT.
Args:
- forward_func_return_logits_only (
bool): whether theforward()method returns logits only. IfFalse, it returns a dictionary containing logits and other information.
125 def preceding_layer(self, layer_name: str) -> nn.Module | None: 126 r"""Get the preceding layer of the given layer (including backbone and output heads). If the given layer is the first layer, return `None`. 127 128 **Args:** 129 - **layer_name** (`str`): the name of the layer. 130 131 **Returns:** 132 - **preceding_layer** (`nn.Module` | `None`): the preceding layer. 133 """ 134 135 if layer_name == "heads": 136 backbone_last_layer_name = self.backbone.weighted_layer_names[-1] 137 backbone_last_layer = self.backbone.get_layer_by_name( 138 backbone_last_layer_name 139 ) 140 return backbone_last_layer 141 else: 142 preceding_layer_name = self.backbone.preceding_layer_name(layer_name) 143 preceding_layer = self.backbone.get_layer_by_name(preceding_layer_name) 144 145 return preceding_layer
Get the preceding layer of the given layer (including backbone and output heads). If the given layer is the first layer, return None.
Args:
- layer_name (
str): the name of the layer.
Returns:
- preceding_layer (
nn.Module|None): the preceding layer.
147 def next_layer(self, layer_name: str) -> nn.Module | None: 148 r"""Get the next layer of the given layer (including backbone and output heads). If the given layer is the last layer, return `None`. 149 150 **Args:** 151 - **layer_name** (`str`): the name of the layer. 152 153 **Returns:** 154 - **preceding_layer** (`nn.Module` | `None`): the next layer. 155 """ 156 157 if layer_name == "heads": 158 return None 159 else: 160 next_layer_name = self.backbone.next_layer_name(layer_name) 161 if next_layer_name is not None: 162 next_layer = self.backbone.get_layer_by_name(next_layer_name) 163 else: 164 next_layer = self.heads.get_head(self.task_id) 165 166 return next_layer
Get the next layer of the given layer (including backbone and output heads). If the given layer is the last layer, return None.
Args:
- layer_name (
str): the name of the layer.
Returns:
- preceding_layer (
nn.Module|None): the next layer.
168 def forward(self, input: Tensor, stage: str, task_id: int | None = None) -> Tensor: 169 r"""The forward pass for data from task `task_id`. Note that it is nothing to do with `forward()` method in `nn.Module`. This definition provides a template that many CL algorithm including the vanilla Finetuning algorithm use. It works both for TIL and CIL. 170 171 **Args:** 172 - **input** (`Tensor`): the input tensor from data. 173 - **stage** (`str`): the stage of the forward pass; one of: 174 1. 'train': training stage. 175 2. 'validation': validation stage. 176 3. 'test': testing stage. 177 - **task_id** (`int`): the task ID where the data are from. If stage is 'train' or `validation`, it is usually from the current task `self.task_id`. If stage is 'test', it could be from any seen task. In TIL, the task IDs of test data are provided thus this argument can be used. In CIL, they are not provided, so it is just a placeholder for API consistence but never used, and best practices are not to provide this argument and leave it as the default value. 178 179 **Returns:** 180 - **logits** (`Tensor`): the output logits tensor. 181 - **activations** (`dict[str, Tensor]`): the hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name, value (`Tensor`) is the hidden feature tensor. This is used for the continual learning algorithms that need to use the hidden features for various purposes. 182 """ 183 feature, activations = self.backbone(input, stage=stage, task_id=task_id) 184 logits = self.heads(feature, task_id) 185 return ( 186 logits if self.if_forward_func_return_logits_only else (logits, activations) 187 )
The forward pass for data from task task_id. Note that it is nothing to do with forward() method in nn.Module. This definition provides a template that many CL algorithm including the vanilla Finetuning algorithm use. It works both for TIL and CIL.
Args:
- input (
Tensor): the input tensor from data. - stage (
str): the stage of the forward pass; one of:- 'train': training stage.
- 'validation': validation stage.
- 'test': testing stage.
- task_id (
int): the task ID where the data are from. If stage is 'train' orvalidation, it is usually from the current taskself.task_id. If stage is 'test', it could be from any seen task. In TIL, the task IDs of test data are provided thus this argument can be used. In CIL, they are not provided, so it is just a placeholder for API consistence but never used, and best practices are not to provide this argument and leave it as the default value.
Returns:
- logits (
Tensor): the output logits tensor. - activations (
dict[str, Tensor]): the hidden features (after activation) in each weighted layer. Key (str) is the weighted layer name, value (Tensor) is the hidden feature tensor. This is used for the continual learning algorithms that need to use the hidden features for various purposes.
189 def configure_optimizers(self) -> Optimizer: 190 r"""Configure optimizer hooks by Lightning. See [Lightning docs](https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers) for more details.""" 191 # finish partially initialized optimizer by specifying model parameters. The `parameters()` method of this `CLAlgorithm` (inherited from `LightningModule`) returns both backbone and heads parameters 192 fully_initialized_optimizer = self.optimizer_t(params=self.parameters()) 193 194 if self.lr_scheduler_t: 195 fully_initialized_lr_scheduler = self.lr_scheduler_t( 196 optimizer=fully_initialized_optimizer 197 ) 198 199 return { 200 "optimizer": fully_initialized_optimizer, 201 "lr_scheduler": { 202 "scheduler": fully_initialized_lr_scheduler, 203 "monitor": f"task_{self.task_id}/learning_curve/val/loss_cls", 204 "interval": "epoch", 205 "frequency": 1, 206 }, 207 } 208 209 return {"optimizer": fully_initialized_optimizer}
Configure optimizer hooks by Lightning. See Lightning docs for more details.
212class UnlearnableCLAlgorithm(CLAlgorithm): 213 r"""The base class of unlearnable continual learning algorithms.""" 214 215 def __init__( 216 self, 217 backbone: CLBackbone, 218 heads: HeadsTIL | HeadsCIL | HeadDIL, 219 non_algorithmic_hparams: dict[str, Any] = {}, 220 disable_unlearning: bool = False, 221 **kwargs, 222 ) -> None: 223 r""" 224 **Args:** 225 - **backbone** (`CLBackbone`): backbone network. 226 - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads. 227 - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility. 228 - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`. 229 - **kwargs**: Reserved for multiple inheritance. 230 """ 231 super().__init__( 232 backbone=backbone, 233 heads=heads, 234 non_algorithmic_hparams=non_algorithmic_hparams, 235 **kwargs, 236 ) 237 238 self.disable_unlearning: bool = disable_unlearning 239 r"""Whether to disable unlearning. This is used in reference experiments following continual learning pipeline.""" 240 241 if not self.disable_unlearning: 242 self.unlearning_task_ids: list[int] 243 r"""The list of task IDs that are requested to be unlearned after training `self.task_id`.""" 244 245 self.unlearned_task_ids: set[int] = set() 246 r"""The list of task IDs that have been unlearned in the experiment.""" 247 248 self.unlearnable_task_ids: list[int] 249 r"""The list of task IDs that are unlearnable at the current `self.task_id`.""" 250 251 self.task_ids_just_no_longer_unlearnable: list[int] 252 r"""The list of task IDs that are just no longer unlearnable at the current `self.task_id`.""" 253 254 UnlearnableCLAlgorithm.sanity_check(self) 255 256 def sanity_check(self) -> None: 257 r"""Sanity check.""" 258 259 def aggregated_backbone_output(self, input: Tensor) -> Tensor: 260 r"""Get the aggregated backbone output for the input data. All parts of backbones should be aggregated together. 261 262 This output feature is used for measuring unlearning metrics, such as Distribution Distance (DD). An aggregated output involving every part of the backbone is needed to ensure the fairness of the metric. 263 264 **Args:** 265 - **input** (`Tensor`): the input tensor from data. 266 267 **Returns:** 268 - **output** (`Tensor`): the aggregated backbone output tensor. 269 """ 270 feature = 0 271 forward_params = inspect.signature(self.backbone.forward).parameters 272 supports_test_task_id = ( 273 "test_task_id" in forward_params 274 or any( 275 p.kind == inspect.Parameter.VAR_KEYWORD 276 for p in forward_params.values() 277 ) 278 ) 279 280 for i in self.processed_task_ids: 281 if supports_test_task_id: 282 feature_i = self.backbone( 283 input, stage="unlearning_test", test_task_id=i 284 )[0] 285 else: 286 feature_i = self.backbone(input, stage="unlearning_test")[0] 287 feature += feature_i 288 feature = feature / len(self.processed_task_ids) 289 290 return feature
The base class of unlearnable continual learning algorithms.
215 def __init__( 216 self, 217 backbone: CLBackbone, 218 heads: HeadsTIL | HeadsCIL | HeadDIL, 219 non_algorithmic_hparams: dict[str, Any] = {}, 220 disable_unlearning: bool = False, 221 **kwargs, 222 ) -> None: 223 r""" 224 **Args:** 225 - **backbone** (`CLBackbone`): backbone network. 226 - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads. 227 - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility. 228 - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`. 229 - **kwargs**: Reserved for multiple inheritance. 230 """ 231 super().__init__( 232 backbone=backbone, 233 heads=heads, 234 non_algorithmic_hparams=non_algorithmic_hparams, 235 **kwargs, 236 ) 237 238 self.disable_unlearning: bool = disable_unlearning 239 r"""Whether to disable unlearning. This is used in reference experiments following continual learning pipeline.""" 240 241 if not self.disable_unlearning: 242 self.unlearning_task_ids: list[int] 243 r"""The list of task IDs that are requested to be unlearned after training `self.task_id`.""" 244 245 self.unlearned_task_ids: set[int] = set() 246 r"""The list of task IDs that have been unlearned in the experiment.""" 247 248 self.unlearnable_task_ids: list[int] 249 r"""The list of task IDs that are unlearnable at the current `self.task_id`.""" 250 251 self.task_ids_just_no_longer_unlearnable: list[int] 252 r"""The list of task IDs that are just no longer unlearnable at the current `self.task_id`.""" 253 254 UnlearnableCLAlgorithm.sanity_check(self)
Args:
- backbone (
CLBackbone): backbone network. - heads (
HeadsTIL|HeadsCIL|HeadDIL): output heads. - non_algorithmic_hparams (
dict[str, Any]): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to thisLightningModuleobject from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs fromsave_hyperparameters()method. This is useful for the experiment configuration and reproducibility. - disable_unlearning (
bool): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default isFalse. - kwargs: Reserved for multiple inheritance.
Whether to disable unlearning. This is used in reference experiments following continual learning pipeline.
259 def aggregated_backbone_output(self, input: Tensor) -> Tensor: 260 r"""Get the aggregated backbone output for the input data. All parts of backbones should be aggregated together. 261 262 This output feature is used for measuring unlearning metrics, such as Distribution Distance (DD). An aggregated output involving every part of the backbone is needed to ensure the fairness of the metric. 263 264 **Args:** 265 - **input** (`Tensor`): the input tensor from data. 266 267 **Returns:** 268 - **output** (`Tensor`): the aggregated backbone output tensor. 269 """ 270 feature = 0 271 forward_params = inspect.signature(self.backbone.forward).parameters 272 supports_test_task_id = ( 273 "test_task_id" in forward_params 274 or any( 275 p.kind == inspect.Parameter.VAR_KEYWORD 276 for p in forward_params.values() 277 ) 278 ) 279 280 for i in self.processed_task_ids: 281 if supports_test_task_id: 282 feature_i = self.backbone( 283 input, stage="unlearning_test", test_task_id=i 284 )[0] 285 else: 286 feature_i = self.backbone(input, stage="unlearning_test")[0] 287 feature += feature_i 288 feature = feature / len(self.processed_task_ids) 289 290 return feature
Get the aggregated backbone output for the input data. All parts of backbones should be aggregated together.
This output feature is used for measuring unlearning metrics, such as Distribution Distance (DD). An aggregated output involving every part of the backbone is needed to ensure the fairness of the metric.
Args:
- input (
Tensor): the input tensor from data.
Returns:
- output (
Tensor): the aggregated backbone output tensor.
293class AmnesiacCLAlgorithm(UnlearnableCLAlgorithm): 294 r"""The base class of Amnesiac continual learning algorithms. 295 296 The Amnesiac continual learning algorithm refers to the corresponding continual learning model that the Amnesiac continual unlearning algorithm requires. The Amnesiac continual unlearning algorithm refers to update deletion operation that directly delete the parameter updates during a task's training. This is inspired by [AmnesiacML](https://arxiv.org/abs/2010.10981) in machine unlearning. In detail, the task-wise parameter updates are stored: 297 298 $$\theta_{l,ij}^{(t)} = \theta_{l,ij}^{(0)} + \sum_{\tau=1}^{t} \Delta \theta_{l,ij}^{(\tau)}$$ 299 300 To unlearn $u(t)$, delete these updates: 301 302 $$\theta_{l,ij}^{(t-u(t))} = \theta_{l,ij}^{(t)} - \sum_{\tau\in u(t)}\Delta \theta_{l,ij}^{(\tau)}$$ 303 304 It is mainly used in AmnesaicHAT, but can also be used in constructing other vanilla baseline continual unlearning algorithms based on different continual learning algorithms. 305 """ 306 307 def __init__( 308 self, 309 backbone: CLBackbone, 310 heads: HeadsTIL | HeadsCIL | HeadDIL, 311 non_algorithmic_hparams: dict[str, Any] = {}, 312 disable_unlearning: bool = False, 313 **kwargs, 314 ) -> None: 315 r""" 316 **Args:** 317 - **backbone** (`CLBackbone`): backbone network. 318 - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads. 319 - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility. 320 - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`. 321 - **kwargs**: Reserved for multiple inheritance. 322 """ 323 super().__init__( 324 backbone=backbone, 325 heads=heads, 326 non_algorithmic_hparams=non_algorithmic_hparams, 327 disable_unlearning=disable_unlearning, 328 **kwargs, 329 ) 330 331 self.original_backbone_state_dict: dict[str, Tensor] = deepcopy( 332 backbone.state_dict() 333 ) 334 r"""Store the original backbone network state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """ 335 336 self.original_heads_state_dict: dict[str, Tensor] = deepcopy(heads.state_dict()) 337 r"""Store the original heads state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """ 338 339 self.parameters_task_update: dict[int, dict[str, Tensor]] = {} 340 r"""Store the parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """ 341 342 self.parameters_task_update_heads: dict[int, dict[str, Tensor]] = {} 343 r"""Store the heads parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """ 344 345 self.state_dict_task_start: dict[str, Tensor] 346 r"""Store the backbone state dict at the start of training each task. """ 347 348 self.heads_state_dict_task_start: dict[str, Tensor] 349 r"""Store the heads state dict at the start of training each task. """ 350 351 def _record_new_head_parameters(self) -> None: 352 r"""Record the initial parameters for any newly created heads. This applies to TIL and CIL settings where new heads are created for new tasks.""" 353 current_heads_state_dict = self.heads.state_dict() 354 for param_name, param_tensor in current_heads_state_dict.items(): 355 if param_name not in self.original_heads_state_dict: 356 self.original_heads_state_dict[param_name] = deepcopy(param_tensor) 357 358 def setup_task_id( 359 self, 360 task_id: int, 361 num_classes: int, 362 optimizer: Optimizer, 363 lr_scheduler: LRScheduler | None, 364 ) -> None: 365 r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called.""" 366 super().setup_task_id( 367 task_id=task_id, 368 num_classes=num_classes, 369 optimizer=optimizer, 370 lr_scheduler=lr_scheduler, 371 ) 372 373 if not self.disable_unlearning: 374 # record initial parameters of any newly created heads for later reconstruction 375 self._record_new_head_parameters() 376 377 def on_train_start(self): 378 r"""Store the current state dict at the start of training.""" 379 super().on_train_start() 380 381 if not self.disable_unlearning: 382 self.state_dict_task_start = deepcopy(self.backbone.state_dict()) 383 self.heads_state_dict_task_start = deepcopy(self.heads.state_dict()) 384 385 def on_train_end(self): 386 r"""Store the parameters update of a task at the end of its training.""" 387 super().on_train_end() 388 389 if not self.disable_unlearning: 390 current_state_dict = self.backbone.state_dict() 391 parameters_task_t_update = {} 392 393 # compute the parameters update for the current task 394 for layer_name, current_param_tensor in current_state_dict.items(): 395 if layer_name.startswith("backup_backbones."): 396 continue 397 parameters_task_t_update[layer_name] = ( 398 current_param_tensor - self.state_dict_task_start[layer_name] 399 ) 400 401 # store the parameters update for the current task 402 self.parameters_task_update[self.task_id] = parameters_task_t_update 403 404 # compute the heads parameters update for the current task 405 current_heads_state_dict = self.heads.state_dict() 406 parameters_task_t_update_heads = {} 407 for param_name, current_param_tensor in current_heads_state_dict.items(): 408 if param_name not in self.heads_state_dict_task_start: 409 pylogger.warning( 410 "Head parameter %s was not found in task start state dict.", 411 param_name, 412 ) 413 continue 414 parameters_task_t_update_heads[param_name] = ( 415 current_param_tensor - self.heads_state_dict_task_start[param_name] 416 ) 417 418 # store the heads parameters update for the current task 419 self.parameters_task_update_heads[self.task_id] = ( 420 parameters_task_t_update_heads 421 )
The base class of Amnesiac continual learning algorithms.
The Amnesiac continual learning algorithm refers to the corresponding continual learning model that the Amnesiac continual unlearning algorithm requires. The Amnesiac continual unlearning algorithm refers to update deletion operation that directly delete the parameter updates during a task's training. This is inspired by AmnesiacML in machine unlearning. In detail, the task-wise parameter updates are stored:
$$\theta_{l,ij}^{(t)} = \theta_{l,ij}^{(0)} + \sum_{\tau=1}^{t} \Delta \theta_{l,ij}^{(\tau)}$$
To unlearn $u(t)$, delete these updates:
$$\theta_{l,ij}^{(t-u(t))} = \theta_{l,ij}^{(t)} - \sum_{\tau\in u(t)}\Delta \theta_{l,ij}^{(\tau)}$$
It is mainly used in AmnesaicHAT, but can also be used in constructing other vanilla baseline continual unlearning algorithms based on different continual learning algorithms.
307 def __init__( 308 self, 309 backbone: CLBackbone, 310 heads: HeadsTIL | HeadsCIL | HeadDIL, 311 non_algorithmic_hparams: dict[str, Any] = {}, 312 disable_unlearning: bool = False, 313 **kwargs, 314 ) -> None: 315 r""" 316 **Args:** 317 - **backbone** (`CLBackbone`): backbone network. 318 - **heads** (`HeadsTIL` | `HeadsCIL` | `HeadDIL`): output heads. 319 - **non_algorithmic_hparams** (`dict[str, Any]`): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to this `LightningModule` object from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs from `save_hyperparameters()` method. This is useful for the experiment configuration and reproducibility. 320 - **disable_unlearning** (`bool`): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default is `False`. 321 - **kwargs**: Reserved for multiple inheritance. 322 """ 323 super().__init__( 324 backbone=backbone, 325 heads=heads, 326 non_algorithmic_hparams=non_algorithmic_hparams, 327 disable_unlearning=disable_unlearning, 328 **kwargs, 329 ) 330 331 self.original_backbone_state_dict: dict[str, Tensor] = deepcopy( 332 backbone.state_dict() 333 ) 334 r"""Store the original backbone network state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """ 335 336 self.original_heads_state_dict: dict[str, Tensor] = deepcopy(heads.state_dict()) 337 r"""Store the original heads state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """ 338 339 self.parameters_task_update: dict[int, dict[str, Tensor]] = {} 340 r"""Store the parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer. """ 341 342 self.parameters_task_update_heads: dict[int, dict[str, Tensor]] = {} 343 r"""Store the heads parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head. """ 344 345 self.state_dict_task_start: dict[str, Tensor] 346 r"""Store the backbone state dict at the start of training each task. """ 347 348 self.heads_state_dict_task_start: dict[str, Tensor] 349 r"""Store the heads state dict at the start of training each task. """
Args:
- backbone (
CLBackbone): backbone network. - heads (
HeadsTIL|HeadsCIL|HeadDIL): output heads. - non_algorithmic_hparams (
dict[str, Any]): non-algorithmic hyperparameters that are not related to the algorithm itself are passed to thisLightningModuleobject from the config, such as optimizer and learning rate scheduler configurations. They are saved for Lightning APIs fromsave_hyperparameters()method. This is useful for the experiment configuration and reproducibility. - disable_unlearning (
bool): whether to disable unlearning. This is used in reference experiments following continual learning pipeline. Default isFalse. - kwargs: Reserved for multiple inheritance.
Store the original backbone network state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer.
Store the original heads state dict. It is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head.
Store the parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the layer.
Store the heads parameters update in each task. Keys are task IDs and values are the corresponding parameters update tensor. Each tensor is a dict where keys are parameter names and values are the corresponding parameter update tensor for the head.
Store the backbone state dict at the start of training each task.
Store the heads state dict at the start of training each task.
358 def setup_task_id( 359 self, 360 task_id: int, 361 num_classes: int, 362 optimizer: Optimizer, 363 lr_scheduler: LRScheduler | None, 364 ) -> None: 365 r"""Set up which task the CL experiment is on. This must be done before `forward()` method is called.""" 366 super().setup_task_id( 367 task_id=task_id, 368 num_classes=num_classes, 369 optimizer=optimizer, 370 lr_scheduler=lr_scheduler, 371 ) 372 373 if not self.disable_unlearning: 374 # record initial parameters of any newly created heads for later reconstruction 375 self._record_new_head_parameters()
Set up which task the CL experiment is on. This must be done before forward() method is called.
377 def on_train_start(self): 378 r"""Store the current state dict at the start of training.""" 379 super().on_train_start() 380 381 if not self.disable_unlearning: 382 self.state_dict_task_start = deepcopy(self.backbone.state_dict()) 383 self.heads_state_dict_task_start = deepcopy(self.heads.state_dict())
Store the current state dict at the start of training.
385 def on_train_end(self): 386 r"""Store the parameters update of a task at the end of its training.""" 387 super().on_train_end() 388 389 if not self.disable_unlearning: 390 current_state_dict = self.backbone.state_dict() 391 parameters_task_t_update = {} 392 393 # compute the parameters update for the current task 394 for layer_name, current_param_tensor in current_state_dict.items(): 395 if layer_name.startswith("backup_backbones."): 396 continue 397 parameters_task_t_update[layer_name] = ( 398 current_param_tensor - self.state_dict_task_start[layer_name] 399 ) 400 401 # store the parameters update for the current task 402 self.parameters_task_update[self.task_id] = parameters_task_t_update 403 404 # compute the heads parameters update for the current task 405 current_heads_state_dict = self.heads.state_dict() 406 parameters_task_t_update_heads = {} 407 for param_name, current_param_tensor in current_heads_state_dict.items(): 408 if param_name not in self.heads_state_dict_task_start: 409 pylogger.warning( 410 "Head parameter %s was not found in task start state dict.", 411 param_name, 412 ) 413 continue 414 parameters_task_t_update_heads[param_name] = ( 415 current_param_tensor - self.heads_state_dict_task_start[param_name] 416 ) 417 418 # store the heads parameters update for the current task 419 self.parameters_task_update_heads[self.task_id] = ( 420 parameters_task_t_update_heads 421 )
Store the parameters update of a task at the end of its training.