clarena.backbones.mlp

The submodule in backbones for the basic MLP backbone network. It includes the basic MLP and the continual learning MLP.

  1r"""
  2The submodule in `backbones` for the basic MLP backbone network. It includes the basic MLP and the continual learning MLP.
  3"""
  4
  5__all__ = ["MLP", "CLMLP"]
  6
  7import logging
  8
  9from torch import Tensor, nn
 10
 11from clarena.backbones import Backbone, CLBackbone
 12
 13# always get logger for built-in logging in each module
 14pylogger = logging.getLogger(__name__)
 15
 16
 17class MLP(Backbone):
 18    """Multi-layer perceptron (MLP), a.k.a. fully connected network.
 19
 20    MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the output heads.
 21    """
 22
 23    def __init__(
 24        self,
 25        input_dim: int,
 26        hidden_dims: list[int],
 27        output_dim: int,
 28        activation_layer: nn.Module | None = nn.ReLU,
 29        batch_normalization: bool = False,
 30        bias: bool = True,
 31        dropout: float | None = None,
 32        **kwargs,
 33    ) -> None:
 34        r"""Construct and initialize the MLP backbone network.
 35
 36        **Args:**
 37        - **input_dim** (`int`): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks.
 38        - **hidden_dims** (`list[int]`): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension.
 39        - **output_dim** (`int`): The output dimension that connects to output heads.
 40        - **activation_layer** (`nn.Module` | `None`): Activation function of each layer (if not `None`). If `None`, this layer won't be used. Default `nn.ReLU`.
 41        - **batch_normalization** (`bool`): Whether to use batch normalization after the fully connected layers. Default `False`.
 42        - **bias** (`bool`): Whether to use bias in the linear layer. Default `True`.
 43        - **dropout** (`float` | `None`): The probability for the dropout layer. If `None`, this layer won't be used. Default `None`.
 44        - **kwargs**: Reserved for multiple inheritance.
 45        """
 46        super().__init__(output_dim=output_dim, **kwargs)
 47
 48        self.input_dim: int = input_dim
 49        r"""The input dimension of the MLP backbone network."""
 50        self.hidden_dims: list[int] = hidden_dims
 51        r"""The hidden dimensions of the MLP backbone network."""
 52        self.output_dim: int = output_dim
 53        r"""The output dimension of the MLP backbone network."""
 54
 55        self.num_fc_layers: int = len(hidden_dims) + 1
 56        r"""The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass."""
 57        self.batch_normalization: bool = batch_normalization
 58        r"""Whether to use batch normalization after the fully-connected layers."""
 59        self.activation: bool = activation_layer is not None
 60        r"""Whether to use activation function after the fully-connected layers."""
 61        self.dropout: bool = dropout is not None
 62        r"""Whether to use dropout after the fully-connected layers."""
 63
 64        self.fc: nn.ModuleList = nn.ModuleList()
 65        r"""The list of fully connected (`nn.Linear`) layers."""
 66        if self.batch_normalization:
 67            self.fc_bn: nn.ModuleList = nn.ModuleList()
 68            r"""The list of batch normalization (`nn.BatchNorm1d`) layers after the fully connected layers."""
 69        if self.activation:
 70            self.fc_activation: nn.ModuleList = nn.ModuleList()
 71            r"""The list of activation layers after the fully connected layers."""
 72        if self.dropout:
 73            self.fc_dropout: nn.ModuleList = nn.ModuleList()
 74            r"""The list of dropout layers after the fully connected layers."""
 75
 76        # construct the weighted fully connected layers and attached layers (batch norm, activation, dropout, etc.) in a loop
 77        for layer_idx in range(self.num_fc_layers):
 78
 79            # the input and output dim of the current weighted layer
 80            layer_input_dim = (
 81                self.input_dim if layer_idx == 0 else self.hidden_dims[layer_idx - 1]
 82            )
 83            layer_output_dim = (
 84                self.hidden_dims[layer_idx]
 85                if layer_idx != len(self.hidden_dims)
 86                else self.output_dim
 87            )
 88
 89            # construct the fully connected layer
 90            self.fc.append(
 91                nn.Linear(
 92                    in_features=layer_input_dim,
 93                    out_features=layer_output_dim,
 94                    bias=bias,
 95                )
 96            )
 97
 98            # update the weighted layer names
 99            full_layer_name = f"fc/{layer_idx}"
100            self.weighted_layer_names.append(full_layer_name)
101
102            # construct the batch normalization layer
103            if self.batch_normalization:
104                self.fc_bn.append(nn.BatchNorm1d(num_features=(layer_output_dim)))
105
106            # construct the activation layer
107            if self.activation:
108                self.fc_activation.append(activation_layer())
109
110            # construct the dropout layer
111            if self.dropout:
112                self.fc_dropout.append(nn.Dropout(dropout))
113
114    def forward(
115        self, input: Tensor, stage: str = None
116    ) -> tuple[Tensor, dict[str, Tensor]]:
117        r"""The forward pass for data. It is the same for all tasks.
118
119        **Args:**
120        - **input** (`Tensor`): The input tensor from data.
121
122        **Returns:**
123        - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation.
124        - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Keys (`str`) are the weighted layer names and values (`Tensor`) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes.
125        """
126        batch_size = input.size(0)
127        activations = {}
128
129        x = input.view(batch_size, -1)  # flatten before going through MLP
130
131        for layer_idx, layer_name in enumerate(self.weighted_layer_names):
132            x = self.fc[layer_idx](x)  # fully-connected layer first
133            if self.batch_normalization:
134                x = self.fc_bn[layer_idx](
135                    x
136                )  # batch normalization can be before or after activation. We put it before activation here
137            if self.activation:
138                x = self.fc_activation[layer_idx](x)  # activation function third
139            activations[layer_name] = x  # store the hidden feature
140            if self.dropout:
141                x = self.fc_dropout[layer_idx](x)  # dropout last
142
143        output_feature = x
144
145        return output_feature, activations
146
147
148class CLMLP(CLBackbone, MLP):
149    """Multi-layer perceptron (MLP), a.k.a. fully connected network. Used as a continual learning backbone.
150
151    MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the CL output heads.
152    """
153
154    def __init__(
155        self,
156        input_dim: int,
157        hidden_dims: list[int],
158        output_dim: int,
159        activation_layer: nn.Module | None = nn.ReLU,
160        batch_normalization: bool = False,
161        bias: bool = True,
162        dropout: float | None = None,
163        **kwargs,
164    ) -> None:
165        r"""Construct and initialize the CLMLP backbone network.
166
167        **Args:**
168        - **input_dim** (`int`): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks.
169        - **hidden_dims** (`list[int]`): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension.
170        - **output_dim** (`int`): the output dimension which connects to CL output heads.
171        - **activation_layer** (`nn.Module` | `None`): activation function of each layer (if not `None`), if `None` this layer won't be used. Default `nn.ReLU`.
172        - **batch_normalization** (`bool`): whether to use batch normalization after the fully-connected layers. Default `False`.
173        - **bias** (`bool`): whether to use bias in the linear layer. Default `True`.
174        - **dropout** (`float` | `None`): the probability for the dropout layer, if `None` this layer won't be used. Default `None`.
175        - **kwargs**: Reserved for multiple inheritance.
176        """
177        super().__init__(
178            input_dim=input_dim,
179            hidden_dims=hidden_dims,
180            output_dim=output_dim,
181            activation_layer=activation_layer,
182            batch_normalization=batch_normalization,
183            bias=bias,
184            dropout=dropout,
185            **kwargs,
186        )
187
188    def forward(
189        self, input: Tensor, stage: str = None, task_id: int | None = None
190    ) -> tuple[Tensor, dict[str, Tensor]]:
191        r"""The forward pass for data. It is the same for all tasks.
192
193        **Args:**
194        - **input** (`Tensor`): The input tensor from data.
195        - **stage** (`str` | `None`): Unused. Kept for API compatibility with other backbones.
196        - **task_id** (`int` | `None`): Unused. Kept for API compatibility with other continual learning backbones.
197
198        **Returns:**
199        - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation.
200        - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name; value (`Tensor`) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes.
201        """
202        return MLP.forward(self, input, stage)  # call the MLP forward method
class MLP(clarena.backbones.base.Backbone):
 18class MLP(Backbone):
 19    """Multi-layer perceptron (MLP), a.k.a. fully connected network.
 20
 21    MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the output heads.
 22    """
 23
 24    def __init__(
 25        self,
 26        input_dim: int,
 27        hidden_dims: list[int],
 28        output_dim: int,
 29        activation_layer: nn.Module | None = nn.ReLU,
 30        batch_normalization: bool = False,
 31        bias: bool = True,
 32        dropout: float | None = None,
 33        **kwargs,
 34    ) -> None:
 35        r"""Construct and initialize the MLP backbone network.
 36
 37        **Args:**
 38        - **input_dim** (`int`): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks.
 39        - **hidden_dims** (`list[int]`): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension.
 40        - **output_dim** (`int`): The output dimension that connects to output heads.
 41        - **activation_layer** (`nn.Module` | `None`): Activation function of each layer (if not `None`). If `None`, this layer won't be used. Default `nn.ReLU`.
 42        - **batch_normalization** (`bool`): Whether to use batch normalization after the fully connected layers. Default `False`.
 43        - **bias** (`bool`): Whether to use bias in the linear layer. Default `True`.
 44        - **dropout** (`float` | `None`): The probability for the dropout layer. If `None`, this layer won't be used. Default `None`.
 45        - **kwargs**: Reserved for multiple inheritance.
 46        """
 47        super().__init__(output_dim=output_dim, **kwargs)
 48
 49        self.input_dim: int = input_dim
 50        r"""The input dimension of the MLP backbone network."""
 51        self.hidden_dims: list[int] = hidden_dims
 52        r"""The hidden dimensions of the MLP backbone network."""
 53        self.output_dim: int = output_dim
 54        r"""The output dimension of the MLP backbone network."""
 55
 56        self.num_fc_layers: int = len(hidden_dims) + 1
 57        r"""The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass."""
 58        self.batch_normalization: bool = batch_normalization
 59        r"""Whether to use batch normalization after the fully-connected layers."""
 60        self.activation: bool = activation_layer is not None
 61        r"""Whether to use activation function after the fully-connected layers."""
 62        self.dropout: bool = dropout is not None
 63        r"""Whether to use dropout after the fully-connected layers."""
 64
 65        self.fc: nn.ModuleList = nn.ModuleList()
 66        r"""The list of fully connected (`nn.Linear`) layers."""
 67        if self.batch_normalization:
 68            self.fc_bn: nn.ModuleList = nn.ModuleList()
 69            r"""The list of batch normalization (`nn.BatchNorm1d`) layers after the fully connected layers."""
 70        if self.activation:
 71            self.fc_activation: nn.ModuleList = nn.ModuleList()
 72            r"""The list of activation layers after the fully connected layers."""
 73        if self.dropout:
 74            self.fc_dropout: nn.ModuleList = nn.ModuleList()
 75            r"""The list of dropout layers after the fully connected layers."""
 76
 77        # construct the weighted fully connected layers and attached layers (batch norm, activation, dropout, etc.) in a loop
 78        for layer_idx in range(self.num_fc_layers):
 79
 80            # the input and output dim of the current weighted layer
 81            layer_input_dim = (
 82                self.input_dim if layer_idx == 0 else self.hidden_dims[layer_idx - 1]
 83            )
 84            layer_output_dim = (
 85                self.hidden_dims[layer_idx]
 86                if layer_idx != len(self.hidden_dims)
 87                else self.output_dim
 88            )
 89
 90            # construct the fully connected layer
 91            self.fc.append(
 92                nn.Linear(
 93                    in_features=layer_input_dim,
 94                    out_features=layer_output_dim,
 95                    bias=bias,
 96                )
 97            )
 98
 99            # update the weighted layer names
100            full_layer_name = f"fc/{layer_idx}"
101            self.weighted_layer_names.append(full_layer_name)
102
103            # construct the batch normalization layer
104            if self.batch_normalization:
105                self.fc_bn.append(nn.BatchNorm1d(num_features=(layer_output_dim)))
106
107            # construct the activation layer
108            if self.activation:
109                self.fc_activation.append(activation_layer())
110
111            # construct the dropout layer
112            if self.dropout:
113                self.fc_dropout.append(nn.Dropout(dropout))
114
115    def forward(
116        self, input: Tensor, stage: str = None
117    ) -> tuple[Tensor, dict[str, Tensor]]:
118        r"""The forward pass for data. It is the same for all tasks.
119
120        **Args:**
121        - **input** (`Tensor`): The input tensor from data.
122
123        **Returns:**
124        - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation.
125        - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Keys (`str`) are the weighted layer names and values (`Tensor`) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes.
126        """
127        batch_size = input.size(0)
128        activations = {}
129
130        x = input.view(batch_size, -1)  # flatten before going through MLP
131
132        for layer_idx, layer_name in enumerate(self.weighted_layer_names):
133            x = self.fc[layer_idx](x)  # fully-connected layer first
134            if self.batch_normalization:
135                x = self.fc_bn[layer_idx](
136                    x
137                )  # batch normalization can be before or after activation. We put it before activation here
138            if self.activation:
139                x = self.fc_activation[layer_idx](x)  # activation function third
140            activations[layer_name] = x  # store the hidden feature
141            if self.dropout:
142                x = self.fc_dropout[layer_idx](x)  # dropout last
143
144        output_feature = x
145
146        return output_feature, activations

Multi-layer perceptron (MLP), a.k.a. fully connected network.

MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the output heads.

MLP( input_dim: int, hidden_dims: list[int], output_dim: int, activation_layer: torch.nn.modules.module.Module | None = <class 'torch.nn.modules.activation.ReLU'>, batch_normalization: bool = False, bias: bool = True, dropout: float | None = None, **kwargs)
 24    def __init__(
 25        self,
 26        input_dim: int,
 27        hidden_dims: list[int],
 28        output_dim: int,
 29        activation_layer: nn.Module | None = nn.ReLU,
 30        batch_normalization: bool = False,
 31        bias: bool = True,
 32        dropout: float | None = None,
 33        **kwargs,
 34    ) -> None:
 35        r"""Construct and initialize the MLP backbone network.
 36
 37        **Args:**
 38        - **input_dim** (`int`): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks.
 39        - **hidden_dims** (`list[int]`): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension.
 40        - **output_dim** (`int`): The output dimension that connects to output heads.
 41        - **activation_layer** (`nn.Module` | `None`): Activation function of each layer (if not `None`). If `None`, this layer won't be used. Default `nn.ReLU`.
 42        - **batch_normalization** (`bool`): Whether to use batch normalization after the fully connected layers. Default `False`.
 43        - **bias** (`bool`): Whether to use bias in the linear layer. Default `True`.
 44        - **dropout** (`float` | `None`): The probability for the dropout layer. If `None`, this layer won't be used. Default `None`.
 45        - **kwargs**: Reserved for multiple inheritance.
 46        """
 47        super().__init__(output_dim=output_dim, **kwargs)
 48
 49        self.input_dim: int = input_dim
 50        r"""The input dimension of the MLP backbone network."""
 51        self.hidden_dims: list[int] = hidden_dims
 52        r"""The hidden dimensions of the MLP backbone network."""
 53        self.output_dim: int = output_dim
 54        r"""The output dimension of the MLP backbone network."""
 55
 56        self.num_fc_layers: int = len(hidden_dims) + 1
 57        r"""The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass."""
 58        self.batch_normalization: bool = batch_normalization
 59        r"""Whether to use batch normalization after the fully-connected layers."""
 60        self.activation: bool = activation_layer is not None
 61        r"""Whether to use activation function after the fully-connected layers."""
 62        self.dropout: bool = dropout is not None
 63        r"""Whether to use dropout after the fully-connected layers."""
 64
 65        self.fc: nn.ModuleList = nn.ModuleList()
 66        r"""The list of fully connected (`nn.Linear`) layers."""
 67        if self.batch_normalization:
 68            self.fc_bn: nn.ModuleList = nn.ModuleList()
 69            r"""The list of batch normalization (`nn.BatchNorm1d`) layers after the fully connected layers."""
 70        if self.activation:
 71            self.fc_activation: nn.ModuleList = nn.ModuleList()
 72            r"""The list of activation layers after the fully connected layers."""
 73        if self.dropout:
 74            self.fc_dropout: nn.ModuleList = nn.ModuleList()
 75            r"""The list of dropout layers after the fully connected layers."""
 76
 77        # construct the weighted fully connected layers and attached layers (batch norm, activation, dropout, etc.) in a loop
 78        for layer_idx in range(self.num_fc_layers):
 79
 80            # the input and output dim of the current weighted layer
 81            layer_input_dim = (
 82                self.input_dim if layer_idx == 0 else self.hidden_dims[layer_idx - 1]
 83            )
 84            layer_output_dim = (
 85                self.hidden_dims[layer_idx]
 86                if layer_idx != len(self.hidden_dims)
 87                else self.output_dim
 88            )
 89
 90            # construct the fully connected layer
 91            self.fc.append(
 92                nn.Linear(
 93                    in_features=layer_input_dim,
 94                    out_features=layer_output_dim,
 95                    bias=bias,
 96                )
 97            )
 98
 99            # update the weighted layer names
100            full_layer_name = f"fc/{layer_idx}"
101            self.weighted_layer_names.append(full_layer_name)
102
103            # construct the batch normalization layer
104            if self.batch_normalization:
105                self.fc_bn.append(nn.BatchNorm1d(num_features=(layer_output_dim)))
106
107            # construct the activation layer
108            if self.activation:
109                self.fc_activation.append(activation_layer())
110
111            # construct the dropout layer
112            if self.dropout:
113                self.fc_dropout.append(nn.Dropout(dropout))

Construct and initialize the MLP backbone network.

Args:

  • input_dim (int): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks.
  • hidden_dims (list[int]): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension.
  • output_dim (int): The output dimension that connects to output heads.
  • activation_layer (nn.Module | None): Activation function of each layer (if not None). If None, this layer won't be used. Default nn.ReLU.
  • batch_normalization (bool): Whether to use batch normalization after the fully connected layers. Default False.
  • bias (bool): Whether to use bias in the linear layer. Default True.
  • dropout (float | None): The probability for the dropout layer. If None, this layer won't be used. Default None.
  • kwargs: Reserved for multiple inheritance.
input_dim: int

The input dimension of the MLP backbone network.

hidden_dims: list[int]

The hidden dimensions of the MLP backbone network.

output_dim: int

The output dimension of the MLP backbone network.

num_fc_layers: int

The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass.

batch_normalization: bool

Whether to use batch normalization after the fully-connected layers.

activation: bool

Whether to use activation function after the fully-connected layers.

dropout: bool

Whether to use dropout after the fully-connected layers.

fc: torch.nn.modules.container.ModuleList

The list of fully connected (nn.Linear) layers.

def forward( self, input: torch.Tensor, stage: str = None) -> tuple[torch.Tensor, dict[str, torch.Tensor]]:
115    def forward(
116        self, input: Tensor, stage: str = None
117    ) -> tuple[Tensor, dict[str, Tensor]]:
118        r"""The forward pass for data. It is the same for all tasks.
119
120        **Args:**
121        - **input** (`Tensor`): The input tensor from data.
122
123        **Returns:**
124        - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation.
125        - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Keys (`str`) are the weighted layer names and values (`Tensor`) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes.
126        """
127        batch_size = input.size(0)
128        activations = {}
129
130        x = input.view(batch_size, -1)  # flatten before going through MLP
131
132        for layer_idx, layer_name in enumerate(self.weighted_layer_names):
133            x = self.fc[layer_idx](x)  # fully-connected layer first
134            if self.batch_normalization:
135                x = self.fc_bn[layer_idx](
136                    x
137                )  # batch normalization can be before or after activation. We put it before activation here
138            if self.activation:
139                x = self.fc_activation[layer_idx](x)  # activation function third
140            activations[layer_name] = x  # store the hidden feature
141            if self.dropout:
142                x = self.fc_dropout[layer_idx](x)  # dropout last
143
144        output_feature = x
145
146        return output_feature, activations

The forward pass for data. It is the same for all tasks.

Args:

  • input (Tensor): The input tensor from data.

Returns:

  • output_feature (Tensor): The output feature tensor to be passed into heads. This is the main target of backpropagation.
  • activations (dict[str, Tensor]): The hidden features (after activation) in each weighted layer. Keys (str) are the weighted layer names and values (Tensor) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes.
class CLMLP(clarena.backbones.base.CLBackbone, MLP):
149class CLMLP(CLBackbone, MLP):
150    """Multi-layer perceptron (MLP), a.k.a. fully connected network. Used as a continual learning backbone.
151
152    MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the CL output heads.
153    """
154
155    def __init__(
156        self,
157        input_dim: int,
158        hidden_dims: list[int],
159        output_dim: int,
160        activation_layer: nn.Module | None = nn.ReLU,
161        batch_normalization: bool = False,
162        bias: bool = True,
163        dropout: float | None = None,
164        **kwargs,
165    ) -> None:
166        r"""Construct and initialize the CLMLP backbone network.
167
168        **Args:**
169        - **input_dim** (`int`): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks.
170        - **hidden_dims** (`list[int]`): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension.
171        - **output_dim** (`int`): the output dimension which connects to CL output heads.
172        - **activation_layer** (`nn.Module` | `None`): activation function of each layer (if not `None`), if `None` this layer won't be used. Default `nn.ReLU`.
173        - **batch_normalization** (`bool`): whether to use batch normalization after the fully-connected layers. Default `False`.
174        - **bias** (`bool`): whether to use bias in the linear layer. Default `True`.
175        - **dropout** (`float` | `None`): the probability for the dropout layer, if `None` this layer won't be used. Default `None`.
176        - **kwargs**: Reserved for multiple inheritance.
177        """
178        super().__init__(
179            input_dim=input_dim,
180            hidden_dims=hidden_dims,
181            output_dim=output_dim,
182            activation_layer=activation_layer,
183            batch_normalization=batch_normalization,
184            bias=bias,
185            dropout=dropout,
186            **kwargs,
187        )
188
189    def forward(
190        self, input: Tensor, stage: str = None, task_id: int | None = None
191    ) -> tuple[Tensor, dict[str, Tensor]]:
192        r"""The forward pass for data. It is the same for all tasks.
193
194        **Args:**
195        - **input** (`Tensor`): The input tensor from data.
196        - **stage** (`str` | `None`): Unused. Kept for API compatibility with other backbones.
197        - **task_id** (`int` | `None`): Unused. Kept for API compatibility with other continual learning backbones.
198
199        **Returns:**
200        - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation.
201        - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name; value (`Tensor`) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes.
202        """
203        return MLP.forward(self, input, stage)  # call the MLP forward method

Multi-layer perceptron (MLP), a.k.a. fully connected network. Used as a continual learning backbone.

MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the CL output heads.

CLMLP( input_dim: int, hidden_dims: list[int], output_dim: int, activation_layer: torch.nn.modules.module.Module | None = <class 'torch.nn.modules.activation.ReLU'>, batch_normalization: bool = False, bias: bool = True, dropout: float | None = None, **kwargs)
155    def __init__(
156        self,
157        input_dim: int,
158        hidden_dims: list[int],
159        output_dim: int,
160        activation_layer: nn.Module | None = nn.ReLU,
161        batch_normalization: bool = False,
162        bias: bool = True,
163        dropout: float | None = None,
164        **kwargs,
165    ) -> None:
166        r"""Construct and initialize the CLMLP backbone network.
167
168        **Args:**
169        - **input_dim** (`int`): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks.
170        - **hidden_dims** (`list[int]`): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension.
171        - **output_dim** (`int`): the output dimension which connects to CL output heads.
172        - **activation_layer** (`nn.Module` | `None`): activation function of each layer (if not `None`), if `None` this layer won't be used. Default `nn.ReLU`.
173        - **batch_normalization** (`bool`): whether to use batch normalization after the fully-connected layers. Default `False`.
174        - **bias** (`bool`): whether to use bias in the linear layer. Default `True`.
175        - **dropout** (`float` | `None`): the probability for the dropout layer, if `None` this layer won't be used. Default `None`.
176        - **kwargs**: Reserved for multiple inheritance.
177        """
178        super().__init__(
179            input_dim=input_dim,
180            hidden_dims=hidden_dims,
181            output_dim=output_dim,
182            activation_layer=activation_layer,
183            batch_normalization=batch_normalization,
184            bias=bias,
185            dropout=dropout,
186            **kwargs,
187        )

Construct and initialize the CLMLP backbone network.

Args:

  • input_dim (int): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks.
  • hidden_dims (list[int]): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension.
  • output_dim (int): the output dimension which connects to CL output heads.
  • activation_layer (nn.Module | None): activation function of each layer (if not None), if None this layer won't be used. Default nn.ReLU.
  • batch_normalization (bool): whether to use batch normalization after the fully-connected layers. Default False.
  • bias (bool): whether to use bias in the linear layer. Default True.
  • dropout (float | None): the probability for the dropout layer, if None this layer won't be used. Default None.
  • kwargs: Reserved for multiple inheritance.
def forward( self, input: torch.Tensor, stage: str = None, task_id: int | None = None) -> tuple[torch.Tensor, dict[str, torch.Tensor]]:
189    def forward(
190        self, input: Tensor, stage: str = None, task_id: int | None = None
191    ) -> tuple[Tensor, dict[str, Tensor]]:
192        r"""The forward pass for data. It is the same for all tasks.
193
194        **Args:**
195        - **input** (`Tensor`): The input tensor from data.
196        - **stage** (`str` | `None`): Unused. Kept for API compatibility with other backbones.
197        - **task_id** (`int` | `None`): Unused. Kept for API compatibility with other continual learning backbones.
198
199        **Returns:**
200        - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation.
201        - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name; value (`Tensor`) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes.
202        """
203        return MLP.forward(self, input, stage)  # call the MLP forward method

The forward pass for data. It is the same for all tasks.

Args:

  • input (Tensor): The input tensor from data.
  • stage (str | None): Unused. Kept for API compatibility with other backbones.
  • task_id (int | None): Unused. Kept for API compatibility with other continual learning backbones.

Returns:

  • output_feature (Tensor): The output feature tensor to be passed into heads. This is the main target of backpropagation.
  • activations (dict[str, Tensor]): The hidden features (after activation) in each weighted layer. Key (str) is the weighted layer name; value (Tensor) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes.