clarena.backbones.mlp
The submodule in backbones for the basic MLP backbone network. It includes the basic MLP and the continual learning MLP.
1r""" 2The submodule in `backbones` for the basic MLP backbone network. It includes the basic MLP and the continual learning MLP. 3""" 4 5__all__ = ["MLP", "CLMLP"] 6 7import logging 8 9from torch import Tensor, nn 10 11from clarena.backbones import Backbone, CLBackbone 12 13# always get logger for built-in logging in each module 14pylogger = logging.getLogger(__name__) 15 16 17class MLP(Backbone): 18 """Multi-layer perceptron (MLP), a.k.a. fully connected network. 19 20 MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the output heads. 21 """ 22 23 def __init__( 24 self, 25 input_dim: int, 26 hidden_dims: list[int], 27 output_dim: int, 28 activation_layer: nn.Module | None = nn.ReLU, 29 batch_normalization: bool = False, 30 bias: bool = True, 31 dropout: float | None = None, 32 **kwargs, 33 ) -> None: 34 r"""Construct and initialize the MLP backbone network. 35 36 **Args:** 37 - **input_dim** (`int`): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks. 38 - **hidden_dims** (`list[int]`): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension. 39 - **output_dim** (`int`): The output dimension that connects to output heads. 40 - **activation_layer** (`nn.Module` | `None`): Activation function of each layer (if not `None`). If `None`, this layer won't be used. Default `nn.ReLU`. 41 - **batch_normalization** (`bool`): Whether to use batch normalization after the fully connected layers. Default `False`. 42 - **bias** (`bool`): Whether to use bias in the linear layer. Default `True`. 43 - **dropout** (`float` | `None`): The probability for the dropout layer. If `None`, this layer won't be used. Default `None`. 44 - **kwargs**: Reserved for multiple inheritance. 45 """ 46 super().__init__(output_dim=output_dim, **kwargs) 47 48 self.input_dim: int = input_dim 49 r"""The input dimension of the MLP backbone network.""" 50 self.hidden_dims: list[int] = hidden_dims 51 r"""The hidden dimensions of the MLP backbone network.""" 52 self.output_dim: int = output_dim 53 r"""The output dimension of the MLP backbone network.""" 54 55 self.num_fc_layers: int = len(hidden_dims) + 1 56 r"""The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass.""" 57 self.batch_normalization: bool = batch_normalization 58 r"""Whether to use batch normalization after the fully-connected layers.""" 59 self.activation: bool = activation_layer is not None 60 r"""Whether to use activation function after the fully-connected layers.""" 61 self.dropout: bool = dropout is not None 62 r"""Whether to use dropout after the fully-connected layers.""" 63 64 self.fc: nn.ModuleList = nn.ModuleList() 65 r"""The list of fully connected (`nn.Linear`) layers.""" 66 if self.batch_normalization: 67 self.fc_bn: nn.ModuleList = nn.ModuleList() 68 r"""The list of batch normalization (`nn.BatchNorm1d`) layers after the fully connected layers.""" 69 if self.activation: 70 self.fc_activation: nn.ModuleList = nn.ModuleList() 71 r"""The list of activation layers after the fully connected layers.""" 72 if self.dropout: 73 self.fc_dropout: nn.ModuleList = nn.ModuleList() 74 r"""The list of dropout layers after the fully connected layers.""" 75 76 # construct the weighted fully connected layers and attached layers (batch norm, activation, dropout, etc.) in a loop 77 for layer_idx in range(self.num_fc_layers): 78 79 # the input and output dim of the current weighted layer 80 layer_input_dim = ( 81 self.input_dim if layer_idx == 0 else self.hidden_dims[layer_idx - 1] 82 ) 83 layer_output_dim = ( 84 self.hidden_dims[layer_idx] 85 if layer_idx != len(self.hidden_dims) 86 else self.output_dim 87 ) 88 89 # construct the fully connected layer 90 self.fc.append( 91 nn.Linear( 92 in_features=layer_input_dim, 93 out_features=layer_output_dim, 94 bias=bias, 95 ) 96 ) 97 98 # update the weighted layer names 99 full_layer_name = f"fc/{layer_idx}" 100 self.weighted_layer_names.append(full_layer_name) 101 102 # construct the batch normalization layer 103 if self.batch_normalization: 104 self.fc_bn.append(nn.BatchNorm1d(num_features=(layer_output_dim))) 105 106 # construct the activation layer 107 if self.activation: 108 self.fc_activation.append(activation_layer()) 109 110 # construct the dropout layer 111 if self.dropout: 112 self.fc_dropout.append(nn.Dropout(dropout)) 113 114 def forward( 115 self, input: Tensor, stage: str = None 116 ) -> tuple[Tensor, dict[str, Tensor]]: 117 r"""The forward pass for data. It is the same for all tasks. 118 119 **Args:** 120 - **input** (`Tensor`): The input tensor from data. 121 122 **Returns:** 123 - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation. 124 - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Keys (`str`) are the weighted layer names and values (`Tensor`) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes. 125 """ 126 batch_size = input.size(0) 127 activations = {} 128 129 x = input.view(batch_size, -1) # flatten before going through MLP 130 131 for layer_idx, layer_name in enumerate(self.weighted_layer_names): 132 x = self.fc[layer_idx](x) # fully-connected layer first 133 if self.batch_normalization: 134 x = self.fc_bn[layer_idx]( 135 x 136 ) # batch normalization can be before or after activation. We put it before activation here 137 if self.activation: 138 x = self.fc_activation[layer_idx](x) # activation function third 139 activations[layer_name] = x # store the hidden feature 140 if self.dropout: 141 x = self.fc_dropout[layer_idx](x) # dropout last 142 143 output_feature = x 144 145 return output_feature, activations 146 147 148class CLMLP(CLBackbone, MLP): 149 """Multi-layer perceptron (MLP), a.k.a. fully connected network. Used as a continual learning backbone. 150 151 MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the CL output heads. 152 """ 153 154 def __init__( 155 self, 156 input_dim: int, 157 hidden_dims: list[int], 158 output_dim: int, 159 activation_layer: nn.Module | None = nn.ReLU, 160 batch_normalization: bool = False, 161 bias: bool = True, 162 dropout: float | None = None, 163 **kwargs, 164 ) -> None: 165 r"""Construct and initialize the CLMLP backbone network. 166 167 **Args:** 168 - **input_dim** (`int`): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks. 169 - **hidden_dims** (`list[int]`): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension. 170 - **output_dim** (`int`): the output dimension which connects to CL output heads. 171 - **activation_layer** (`nn.Module` | `None`): activation function of each layer (if not `None`), if `None` this layer won't be used. Default `nn.ReLU`. 172 - **batch_normalization** (`bool`): whether to use batch normalization after the fully-connected layers. Default `False`. 173 - **bias** (`bool`): whether to use bias in the linear layer. Default `True`. 174 - **dropout** (`float` | `None`): the probability for the dropout layer, if `None` this layer won't be used. Default `None`. 175 - **kwargs**: Reserved for multiple inheritance. 176 """ 177 super().__init__( 178 input_dim=input_dim, 179 hidden_dims=hidden_dims, 180 output_dim=output_dim, 181 activation_layer=activation_layer, 182 batch_normalization=batch_normalization, 183 bias=bias, 184 dropout=dropout, 185 **kwargs, 186 ) 187 188 def forward( 189 self, input: Tensor, stage: str = None, task_id: int | None = None 190 ) -> tuple[Tensor, dict[str, Tensor]]: 191 r"""The forward pass for data. It is the same for all tasks. 192 193 **Args:** 194 - **input** (`Tensor`): The input tensor from data. 195 - **stage** (`str` | `None`): Unused. Kept for API compatibility with other backbones. 196 - **task_id** (`int` | `None`): Unused. Kept for API compatibility with other continual learning backbones. 197 198 **Returns:** 199 - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation. 200 - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name; value (`Tensor`) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes. 201 """ 202 return MLP.forward(self, input, stage) # call the MLP forward method
18class MLP(Backbone): 19 """Multi-layer perceptron (MLP), a.k.a. fully connected network. 20 21 MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the output heads. 22 """ 23 24 def __init__( 25 self, 26 input_dim: int, 27 hidden_dims: list[int], 28 output_dim: int, 29 activation_layer: nn.Module | None = nn.ReLU, 30 batch_normalization: bool = False, 31 bias: bool = True, 32 dropout: float | None = None, 33 **kwargs, 34 ) -> None: 35 r"""Construct and initialize the MLP backbone network. 36 37 **Args:** 38 - **input_dim** (`int`): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks. 39 - **hidden_dims** (`list[int]`): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension. 40 - **output_dim** (`int`): The output dimension that connects to output heads. 41 - **activation_layer** (`nn.Module` | `None`): Activation function of each layer (if not `None`). If `None`, this layer won't be used. Default `nn.ReLU`. 42 - **batch_normalization** (`bool`): Whether to use batch normalization after the fully connected layers. Default `False`. 43 - **bias** (`bool`): Whether to use bias in the linear layer. Default `True`. 44 - **dropout** (`float` | `None`): The probability for the dropout layer. If `None`, this layer won't be used. Default `None`. 45 - **kwargs**: Reserved for multiple inheritance. 46 """ 47 super().__init__(output_dim=output_dim, **kwargs) 48 49 self.input_dim: int = input_dim 50 r"""The input dimension of the MLP backbone network.""" 51 self.hidden_dims: list[int] = hidden_dims 52 r"""The hidden dimensions of the MLP backbone network.""" 53 self.output_dim: int = output_dim 54 r"""The output dimension of the MLP backbone network.""" 55 56 self.num_fc_layers: int = len(hidden_dims) + 1 57 r"""The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass.""" 58 self.batch_normalization: bool = batch_normalization 59 r"""Whether to use batch normalization after the fully-connected layers.""" 60 self.activation: bool = activation_layer is not None 61 r"""Whether to use activation function after the fully-connected layers.""" 62 self.dropout: bool = dropout is not None 63 r"""Whether to use dropout after the fully-connected layers.""" 64 65 self.fc: nn.ModuleList = nn.ModuleList() 66 r"""The list of fully connected (`nn.Linear`) layers.""" 67 if self.batch_normalization: 68 self.fc_bn: nn.ModuleList = nn.ModuleList() 69 r"""The list of batch normalization (`nn.BatchNorm1d`) layers after the fully connected layers.""" 70 if self.activation: 71 self.fc_activation: nn.ModuleList = nn.ModuleList() 72 r"""The list of activation layers after the fully connected layers.""" 73 if self.dropout: 74 self.fc_dropout: nn.ModuleList = nn.ModuleList() 75 r"""The list of dropout layers after the fully connected layers.""" 76 77 # construct the weighted fully connected layers and attached layers (batch norm, activation, dropout, etc.) in a loop 78 for layer_idx in range(self.num_fc_layers): 79 80 # the input and output dim of the current weighted layer 81 layer_input_dim = ( 82 self.input_dim if layer_idx == 0 else self.hidden_dims[layer_idx - 1] 83 ) 84 layer_output_dim = ( 85 self.hidden_dims[layer_idx] 86 if layer_idx != len(self.hidden_dims) 87 else self.output_dim 88 ) 89 90 # construct the fully connected layer 91 self.fc.append( 92 nn.Linear( 93 in_features=layer_input_dim, 94 out_features=layer_output_dim, 95 bias=bias, 96 ) 97 ) 98 99 # update the weighted layer names 100 full_layer_name = f"fc/{layer_idx}" 101 self.weighted_layer_names.append(full_layer_name) 102 103 # construct the batch normalization layer 104 if self.batch_normalization: 105 self.fc_bn.append(nn.BatchNorm1d(num_features=(layer_output_dim))) 106 107 # construct the activation layer 108 if self.activation: 109 self.fc_activation.append(activation_layer()) 110 111 # construct the dropout layer 112 if self.dropout: 113 self.fc_dropout.append(nn.Dropout(dropout)) 114 115 def forward( 116 self, input: Tensor, stage: str = None 117 ) -> tuple[Tensor, dict[str, Tensor]]: 118 r"""The forward pass for data. It is the same for all tasks. 119 120 **Args:** 121 - **input** (`Tensor`): The input tensor from data. 122 123 **Returns:** 124 - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation. 125 - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Keys (`str`) are the weighted layer names and values (`Tensor`) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes. 126 """ 127 batch_size = input.size(0) 128 activations = {} 129 130 x = input.view(batch_size, -1) # flatten before going through MLP 131 132 for layer_idx, layer_name in enumerate(self.weighted_layer_names): 133 x = self.fc[layer_idx](x) # fully-connected layer first 134 if self.batch_normalization: 135 x = self.fc_bn[layer_idx]( 136 x 137 ) # batch normalization can be before or after activation. We put it before activation here 138 if self.activation: 139 x = self.fc_activation[layer_idx](x) # activation function third 140 activations[layer_name] = x # store the hidden feature 141 if self.dropout: 142 x = self.fc_dropout[layer_idx](x) # dropout last 143 144 output_feature = x 145 146 return output_feature, activations
Multi-layer perceptron (MLP), a.k.a. fully connected network.
MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the output heads.
24 def __init__( 25 self, 26 input_dim: int, 27 hidden_dims: list[int], 28 output_dim: int, 29 activation_layer: nn.Module | None = nn.ReLU, 30 batch_normalization: bool = False, 31 bias: bool = True, 32 dropout: float | None = None, 33 **kwargs, 34 ) -> None: 35 r"""Construct and initialize the MLP backbone network. 36 37 **Args:** 38 - **input_dim** (`int`): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks. 39 - **hidden_dims** (`list[int]`): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension. 40 - **output_dim** (`int`): The output dimension that connects to output heads. 41 - **activation_layer** (`nn.Module` | `None`): Activation function of each layer (if not `None`). If `None`, this layer won't be used. Default `nn.ReLU`. 42 - **batch_normalization** (`bool`): Whether to use batch normalization after the fully connected layers. Default `False`. 43 - **bias** (`bool`): Whether to use bias in the linear layer. Default `True`. 44 - **dropout** (`float` | `None`): The probability for the dropout layer. If `None`, this layer won't be used. Default `None`. 45 - **kwargs**: Reserved for multiple inheritance. 46 """ 47 super().__init__(output_dim=output_dim, **kwargs) 48 49 self.input_dim: int = input_dim 50 r"""The input dimension of the MLP backbone network.""" 51 self.hidden_dims: list[int] = hidden_dims 52 r"""The hidden dimensions of the MLP backbone network.""" 53 self.output_dim: int = output_dim 54 r"""The output dimension of the MLP backbone network.""" 55 56 self.num_fc_layers: int = len(hidden_dims) + 1 57 r"""The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass.""" 58 self.batch_normalization: bool = batch_normalization 59 r"""Whether to use batch normalization after the fully-connected layers.""" 60 self.activation: bool = activation_layer is not None 61 r"""Whether to use activation function after the fully-connected layers.""" 62 self.dropout: bool = dropout is not None 63 r"""Whether to use dropout after the fully-connected layers.""" 64 65 self.fc: nn.ModuleList = nn.ModuleList() 66 r"""The list of fully connected (`nn.Linear`) layers.""" 67 if self.batch_normalization: 68 self.fc_bn: nn.ModuleList = nn.ModuleList() 69 r"""The list of batch normalization (`nn.BatchNorm1d`) layers after the fully connected layers.""" 70 if self.activation: 71 self.fc_activation: nn.ModuleList = nn.ModuleList() 72 r"""The list of activation layers after the fully connected layers.""" 73 if self.dropout: 74 self.fc_dropout: nn.ModuleList = nn.ModuleList() 75 r"""The list of dropout layers after the fully connected layers.""" 76 77 # construct the weighted fully connected layers and attached layers (batch norm, activation, dropout, etc.) in a loop 78 for layer_idx in range(self.num_fc_layers): 79 80 # the input and output dim of the current weighted layer 81 layer_input_dim = ( 82 self.input_dim if layer_idx == 0 else self.hidden_dims[layer_idx - 1] 83 ) 84 layer_output_dim = ( 85 self.hidden_dims[layer_idx] 86 if layer_idx != len(self.hidden_dims) 87 else self.output_dim 88 ) 89 90 # construct the fully connected layer 91 self.fc.append( 92 nn.Linear( 93 in_features=layer_input_dim, 94 out_features=layer_output_dim, 95 bias=bias, 96 ) 97 ) 98 99 # update the weighted layer names 100 full_layer_name = f"fc/{layer_idx}" 101 self.weighted_layer_names.append(full_layer_name) 102 103 # construct the batch normalization layer 104 if self.batch_normalization: 105 self.fc_bn.append(nn.BatchNorm1d(num_features=(layer_output_dim))) 106 107 # construct the activation layer 108 if self.activation: 109 self.fc_activation.append(activation_layer()) 110 111 # construct the dropout layer 112 if self.dropout: 113 self.fc_dropout.append(nn.Dropout(dropout))
Construct and initialize the MLP backbone network.
Args:
- input_dim (
int): The input dimension. Any data need to be flattened before entering the MLP. Note that it is not required in convolutional networks. - hidden_dims (
list[int]): List of hidden layer dimensions. It can be an empty list, which means a single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension, which we take as the output dimension. - output_dim (
int): The output dimension that connects to output heads. - activation_layer (
nn.Module|None): Activation function of each layer (if notNone). IfNone, this layer won't be used. Defaultnn.ReLU. - batch_normalization (
bool): Whether to use batch normalization after the fully connected layers. DefaultFalse. - bias (
bool): Whether to use bias in the linear layer. DefaultTrue. - dropout (
float|None): The probability for the dropout layer. IfNone, this layer won't be used. DefaultNone. - kwargs: Reserved for multiple inheritance.
The number of fully-connected layers in the MLP backbone network, which helps form the loops in constructing layers and forward pass.
115 def forward( 116 self, input: Tensor, stage: str = None 117 ) -> tuple[Tensor, dict[str, Tensor]]: 118 r"""The forward pass for data. It is the same for all tasks. 119 120 **Args:** 121 - **input** (`Tensor`): The input tensor from data. 122 123 **Returns:** 124 - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation. 125 - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Keys (`str`) are the weighted layer names and values (`Tensor`) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes. 126 """ 127 batch_size = input.size(0) 128 activations = {} 129 130 x = input.view(batch_size, -1) # flatten before going through MLP 131 132 for layer_idx, layer_name in enumerate(self.weighted_layer_names): 133 x = self.fc[layer_idx](x) # fully-connected layer first 134 if self.batch_normalization: 135 x = self.fc_bn[layer_idx]( 136 x 137 ) # batch normalization can be before or after activation. We put it before activation here 138 if self.activation: 139 x = self.fc_activation[layer_idx](x) # activation function third 140 activations[layer_name] = x # store the hidden feature 141 if self.dropout: 142 x = self.fc_dropout[layer_idx](x) # dropout last 143 144 output_feature = x 145 146 return output_feature, activations
The forward pass for data. It is the same for all tasks.
Args:
- input (
Tensor): The input tensor from data.
Returns:
- output_feature (
Tensor): The output feature tensor to be passed into heads. This is the main target of backpropagation. - activations (
dict[str, Tensor]): The hidden features (after activation) in each weighted layer. Keys (str) are the weighted layer names and values (Tensor) are the hidden feature tensors. This is used for certain algorithms that need to use hidden features for various purposes.
149class CLMLP(CLBackbone, MLP): 150 """Multi-layer perceptron (MLP), a.k.a. fully connected network. Used as a continual learning backbone. 151 152 MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the CL output heads. 153 """ 154 155 def __init__( 156 self, 157 input_dim: int, 158 hidden_dims: list[int], 159 output_dim: int, 160 activation_layer: nn.Module | None = nn.ReLU, 161 batch_normalization: bool = False, 162 bias: bool = True, 163 dropout: float | None = None, 164 **kwargs, 165 ) -> None: 166 r"""Construct and initialize the CLMLP backbone network. 167 168 **Args:** 169 - **input_dim** (`int`): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks. 170 - **hidden_dims** (`list[int]`): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension. 171 - **output_dim** (`int`): the output dimension which connects to CL output heads. 172 - **activation_layer** (`nn.Module` | `None`): activation function of each layer (if not `None`), if `None` this layer won't be used. Default `nn.ReLU`. 173 - **batch_normalization** (`bool`): whether to use batch normalization after the fully-connected layers. Default `False`. 174 - **bias** (`bool`): whether to use bias in the linear layer. Default `True`. 175 - **dropout** (`float` | `None`): the probability for the dropout layer, if `None` this layer won't be used. Default `None`. 176 - **kwargs**: Reserved for multiple inheritance. 177 """ 178 super().__init__( 179 input_dim=input_dim, 180 hidden_dims=hidden_dims, 181 output_dim=output_dim, 182 activation_layer=activation_layer, 183 batch_normalization=batch_normalization, 184 bias=bias, 185 dropout=dropout, 186 **kwargs, 187 ) 188 189 def forward( 190 self, input: Tensor, stage: str = None, task_id: int | None = None 191 ) -> tuple[Tensor, dict[str, Tensor]]: 192 r"""The forward pass for data. It is the same for all tasks. 193 194 **Args:** 195 - **input** (`Tensor`): The input tensor from data. 196 - **stage** (`str` | `None`): Unused. Kept for API compatibility with other backbones. 197 - **task_id** (`int` | `None`): Unused. Kept for API compatibility with other continual learning backbones. 198 199 **Returns:** 200 - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation. 201 - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name; value (`Tensor`) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes. 202 """ 203 return MLP.forward(self, input, stage) # call the MLP forward method
Multi-layer perceptron (MLP), a.k.a. fully connected network. Used as a continual learning backbone.
MLP is a dense network architecture with several fully connected layers, each followed by an activation function. The last layer connects to the CL output heads.
155 def __init__( 156 self, 157 input_dim: int, 158 hidden_dims: list[int], 159 output_dim: int, 160 activation_layer: nn.Module | None = nn.ReLU, 161 batch_normalization: bool = False, 162 bias: bool = True, 163 dropout: float | None = None, 164 **kwargs, 165 ) -> None: 166 r"""Construct and initialize the CLMLP backbone network. 167 168 **Args:** 169 - **input_dim** (`int`): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks. 170 - **hidden_dims** (`list[int]`): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension. 171 - **output_dim** (`int`): the output dimension which connects to CL output heads. 172 - **activation_layer** (`nn.Module` | `None`): activation function of each layer (if not `None`), if `None` this layer won't be used. Default `nn.ReLU`. 173 - **batch_normalization** (`bool`): whether to use batch normalization after the fully-connected layers. Default `False`. 174 - **bias** (`bool`): whether to use bias in the linear layer. Default `True`. 175 - **dropout** (`float` | `None`): the probability for the dropout layer, if `None` this layer won't be used. Default `None`. 176 - **kwargs**: Reserved for multiple inheritance. 177 """ 178 super().__init__( 179 input_dim=input_dim, 180 hidden_dims=hidden_dims, 181 output_dim=output_dim, 182 activation_layer=activation_layer, 183 batch_normalization=batch_normalization, 184 bias=bias, 185 dropout=dropout, 186 **kwargs, 187 )
Construct and initialize the CLMLP backbone network.
Args:
- input_dim (
int): the input dimension. Any data need to be flattened before going in MLP. Note that it is not required in convolutional networks. - hidden_dims (
list[int]): list of hidden layer dimensions. It can be empty list which means single-layer MLP, and it can be as many layers as you want. Note that it doesn't include the last dimension which we take as output dimension. - output_dim (
int): the output dimension which connects to CL output heads. - activation_layer (
nn.Module|None): activation function of each layer (if notNone), ifNonethis layer won't be used. Defaultnn.ReLU. - batch_normalization (
bool): whether to use batch normalization after the fully-connected layers. DefaultFalse. - bias (
bool): whether to use bias in the linear layer. DefaultTrue. - dropout (
float|None): the probability for the dropout layer, ifNonethis layer won't be used. DefaultNone. - kwargs: Reserved for multiple inheritance.
189 def forward( 190 self, input: Tensor, stage: str = None, task_id: int | None = None 191 ) -> tuple[Tensor, dict[str, Tensor]]: 192 r"""The forward pass for data. It is the same for all tasks. 193 194 **Args:** 195 - **input** (`Tensor`): The input tensor from data. 196 - **stage** (`str` | `None`): Unused. Kept for API compatibility with other backbones. 197 - **task_id** (`int` | `None`): Unused. Kept for API compatibility with other continual learning backbones. 198 199 **Returns:** 200 - **output_feature** (`Tensor`): The output feature tensor to be passed into heads. This is the main target of backpropagation. 201 - **activations** (`dict[str, Tensor]`): The hidden features (after activation) in each weighted layer. Key (`str`) is the weighted layer name; value (`Tensor`) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes. 202 """ 203 return MLP.forward(self, input, stage) # call the MLP forward method
The forward pass for data. It is the same for all tasks.
Args:
- input (
Tensor): The input tensor from data. - stage (
str|None): Unused. Kept for API compatibility with other backbones. - task_id (
int|None): Unused. Kept for API compatibility with other continual learning backbones.
Returns:
- output_feature (
Tensor): The output feature tensor to be passed into heads. This is the main target of backpropagation. - activations (
dict[str, Tensor]): The hidden features (after activation) in each weighted layer. Key (str) is the weighted layer name; value (Tensor) is the hidden feature tensor. This is used for continual learning algorithms that need hidden features for various purposes.