clarena.metrics.cl_acc API documentation

CLAccuracy( save_dir: str, test_acc_csv_name: str = 'acc.csv', test_acc_matrix_plot_name: str | None = None, test_ave_acc_plot_name: str | None = None) View Source

44    def __init__(
45        self,
46        save_dir: str,
47        test_acc_csv_name: str = "acc.csv",
48        test_acc_matrix_plot_name: str | None = None,
49        test_ave_acc_plot_name: str | None = None,
50    ) -> None:
51        r"""
52        **Args:**
53        - **save_dir** (`str`): The directory where data and figures of metrics will be saved. Better inside the output folder.
54        - **test_acc_csv_name** (`str`): file name to save test accuracy matrix and average accuracy as CSV file.
55        - **test_acc_matrix_plot_name** (`str` | `None`): file name to save accuracy matrix plot. If `None`, no file will be saved.
56        - **test_ave_acc_plot_name** (`str` | `None`): file name to save average accuracy as curve plot over different training tasks. If `None`, no file will be saved.
57        """
58        super().__init__(save_dir=save_dir)
59
60        self.test_acc_csv_path: str = os.path.join(save_dir, test_acc_csv_name)
61        r"""The path to save test accuracy matrix and average accuracy CSV file."""
62        if test_acc_matrix_plot_name:
63            self.test_acc_matrix_plot_path: str = os.path.join(
64                save_dir, test_acc_matrix_plot_name
65            )
66            r"""The path to save test accuracy matrix plot."""
67        if test_ave_acc_plot_name:
68            self.test_ave_acc_plot_path: str = os.path.join(
69                save_dir, test_ave_acc_plot_name
70            )
71            r"""The path to save test average accuracy curve plot."""
72
73        # training accumulated metrics
74        self.acc_training_epoch: MeanMetricBatch
75        r"""Classification accuracy of training epoch. Accumulated and calculated from the training batches. See [here](https://pengxiang-wang.com/posts/continual-learning-metrics.html#sec-performance-of-training-epoch) for details. """
76
77        # validation accumulated metrics
78        self.acc_val: MeanMetricBatch
79        r"""Validation classification accuracy of the model after training epoch. Accumulated and calculated from the validation batches. See [here](https://pengxiang-wang.com/posts/continual-learning-metrics.html#sec-validation-performace) for details. """
80
81        # test accumulated metrics
82        self.acc_test: dict[int, MeanMetricBatch]
83        r"""Test classification accuracy of the current model (`self.task_id`) on current and previous tasks. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics. It is the last row of the lower triangular matrix. See [here](https://pengxiang-wang.com/posts/continual-learning-metrics.html#sec-test-performance-of-previous-tasks) for details. """
84
85        # task ID control
86        self.task_id: int
87        r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`."""

Args:

save_dir (str): The directory where data and figures of metrics will be saved. Better inside the output folder.
test_acc_csv_name (str): file name to save test accuracy matrix and average accuracy as CSV file.
test_acc_matrix_plot_name (str | None): file name to save accuracy matrix plot. If None, no file will be saved.
test_ave_acc_plot_name (str | None): file name to save average accuracy as curve plot over different training tasks. If None, no file will be saved.

test_acc_csv_path: str

The path to save test accuracy matrix and average accuracy CSV file.

acc_training_epoch: clarena.utils.metrics.MeanMetricBatch

Classification accuracy of training epoch. Accumulated and calculated from the training batches. See here for details.

acc_val: clarena.utils.metrics.MeanMetricBatch

Validation classification accuracy of the model after training epoch. Accumulated and calculated from the validation batches. See here for details.

acc_test: dict[int, clarena.utils.metrics.MeanMetricBatch]

Test classification accuracy of the current model (self.task_id) on current and previous tasks. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics. It is the last row of the lower triangular matrix. See here for details.

task_id: int

Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to cl_dataset.num_tasks.

@rank_zero_only

def on_fit_start( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm) -> None: View Source

 89    @rank_zero_only
 90    def on_fit_start(self, trainer: Trainer, pl_module: CLAlgorithm) -> None:
 91        r"""Initialize training and validation metrics."""
 92
 93        # set the current task_id from the `CLAlgorithm` object
 94        self.task_id = pl_module.task_id
 95
 96        # get the device to put the metrics on the same device
 97        device = pl_module.device
 98
 99        # initialize training metrics
100        self.acc_training_epoch = MeanMetricBatch().to(device)
101
102        # initialize validation metrics
103        self.acc_val = MeanMetricBatch().to(device)

Initialize training and validation metrics.

@rank_zero_only

def on_train_batch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm, outputs: dict[str, typing.Any], batch: Any, batch_idx: int) -> None: View Source

105    @rank_zero_only
106    def on_train_batch_end(
107        self,
108        trainer: Trainer,
109        pl_module: CLAlgorithm,
110        outputs: dict[str, Any],
111        batch: Any,
112        batch_idx: int,
113    ) -> None:
114        r"""Record training metrics from training batch, log metrics of training batch and accumulated metrics of the epoch to Lightning loggers.
115
116        **Args:**
117        - **outputs** (`dict[str, Any]`): the outputs of the training step, the returns of the `training_step()` method in the `CLAlgorithm`.
118        - **batch** (`Any`): the training data batch.
119        """
120        # get the batch size
121        batch_size = len(batch)
122
123        # get training metrics values of current training batch from the outputs of the `training_step()`
124        acc_batch = outputs["acc"]
125
126        # update accumulated training metrics to calculate training metrics of the epoch
127        self.acc_training_epoch.update(acc_batch, batch_size)
128
129        # log training metrics of current training batch to Lightning loggers
130        pl_module.log(f"task_{self.task_id}/train/acc_batch", acc_batch, prog_bar=True)
131
132        # log accumulated training metrics till this training batch to Lightning loggers
133        pl_module.log(
134            f"task_{self.task_id}/train/acc",
135            self.acc_training_epoch.compute(),
136            prog_bar=True,
137        )

Record training metrics from training batch, log metrics of training batch and accumulated metrics of the epoch to Lightning loggers.

Args:

outputs (dict[str, Any]): the outputs of the training step, the returns of the training_step() method in the CLAlgorithm.
batch (Any): the training data batch.

@rank_zero_only

def on_train_epoch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm) -> None: View Source

139    @rank_zero_only
140    def on_train_epoch_end(
141        self,
142        trainer: Trainer,
143        pl_module: CLAlgorithm,
144    ) -> None:
145        r"""Log metrics of training epoch to plot learning curves and reset the metrics accumulation at the end of training epoch."""
146
147        # log the accumulated and computed metrics of the epoch to Lightning loggers, specially for plotting learning curves
148        pl_module.log(
149            f"task_{self.task_id}/learning_curve/train/acc",
150            self.acc_training_epoch.compute(),
151            on_epoch=True,
152            prog_bar=True,
153        )
154
155        # reset the metrics of training epoch as there are more epochs to go and not only one epoch like in the validation and test
156        self.acc_training_epoch.reset()

Log metrics of training epoch to plot learning curves and reset the metrics accumulation at the end of training epoch.

@rank_zero_only

def on_validation_batch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm, outputs: dict[str, typing.Any], batch: Any, batch_idx: int) -> None: View Source

158    @rank_zero_only
159    def on_validation_batch_end(
160        self,
161        trainer: Trainer,
162        pl_module: CLAlgorithm,
163        outputs: dict[str, Any],
164        batch: Any,
165        batch_idx: int,
166    ) -> None:
167        r"""Accumulating metrics from validation batch. We don't need to log and monitor the metrics of validation batches.
168
169        **Args:**
170        - **outputs** (`dict[str, Any]`): the outputs of the validation step, which is the returns of the `validation_step()` method in the `CLAlgorithm`.
171        - **batch** (`Any`): the validation data batch.
172        """
173
174        # get the batch size
175        batch_size = len(batch)
176
177        # get the metrics values of the batch from the outputs
178        acc_batch = outputs["acc"]
179
180        # update the accumulated metrics in order to calculate the validation metrics
181        self.acc_val.update(acc_batch, batch_size)

Accumulating metrics from validation batch. We don't need to log and monitor the metrics of validation batches.

Args:

outputs (dict[str, Any]): the outputs of the validation step, which is the returns of the validation_step() method in the CLAlgorithm.
batch (Any): the validation data batch.

@rank_zero_only

def on_validation_epoch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm) -> None: View Source

183    @rank_zero_only
184    def on_validation_epoch_end(
185        self,
186        trainer: Trainer,
187        pl_module: CLAlgorithm,
188    ) -> None:
189        r"""Log validation metrics to plot learning curves."""
190
191        # log the accumulated and computed metrics of the epoch to Lightning loggers, specially for plotting learning curves
192        pl_module.log(
193            f"task_{self.task_id}/learning_curve/val/acc",
194            self.acc_val.compute(),
195            on_epoch=True,
196            prog_bar=True,
197        )

Log validation metrics to plot learning curves.

@rank_zero_only

def on_test_start( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm) -> None: View Source

199    @rank_zero_only
200    def on_test_start(
201        self,
202        trainer: Trainer,
203        pl_module: CLAlgorithm,
204    ) -> None:
205        r"""Initialize the metrics for testing each seen task in the beginning of a task's testing."""
206
207        # set the current task_id again (double checking) from the `CLAlgorithm` object
208        self.task_id = pl_module.task_id
209
210        # get the device to put the metrics on the same device
211        device = pl_module.device
212
213        # initialize test metrics for current and previous tasks
214        self.acc_test = {
215            task_id: MeanMetricBatch().to(device)
216            for task_id in pl_module.processed_task_ids
217        }

Initialize the metrics for testing each seen task in the beginning of a task's testing.

@rank_zero_only

def on_test_batch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm, outputs: dict[str, typing.Any], batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None: View Source

219    @rank_zero_only
220    def on_test_batch_end(
221        self,
222        trainer: Trainer,
223        pl_module: CLAlgorithm,
224        outputs: dict[str, Any],
225        batch: Any,
226        batch_idx: int,
227        dataloader_idx: int = 0,
228    ) -> None:
229        r"""Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches.
230
231        **Args:**
232        - **outputs** (`dict[str, Any]`): the outputs of the test step, which is the returns of the `test_step()` method in the `CLAlgorithm`.
233        - **batch** (`Any`): the test data batch.
234        - **dataloader_idx** (`int`): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a `RuntimeError`.
235        """
236
237        # get the batch size
238        batch_size = len(batch)
239
240        test_task_id = pl_module.get_test_task_id_from_dataloader_idx(dataloader_idx)
241
242        # get the metrics values of the batch from the outputs
243        acc_batch = outputs["acc"]
244
245        # update the accumulated metrics in order to calculate the metrics of the epoch
246        self.acc_test[test_task_id].update(acc_batch, batch_size)

Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches.

Args:

outputs (dict[str, Any]): the outputs of the test step, which is the returns of the test_step() method in the CLAlgorithm.
batch (Any): the test data batch.
dataloader_idx (int): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a RuntimeError.

@rank_zero_only

def on_test_epoch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.cl_algorithms.CLAlgorithm) -> None: View Source

248    @rank_zero_only
249    def on_test_epoch_end(
250        self,
251        trainer: Trainer,
252        pl_module: CLAlgorithm,
253    ) -> None:
254        r"""Save and plot test metrics at the end of test."""
255
256        # save (update) the test metrics to CSV files
257        self.update_test_acc_to_csv(
258            after_training_task_id=self.task_id,
259            csv_path=self.test_acc_csv_path,
260        )
261
262        # plot the test metrics
263        if hasattr(self, "test_acc_matrix_plot_path"):
264            self.plot_test_acc_matrix_from_csv(
265                csv_path=self.test_acc_csv_path,
266                plot_path=self.test_acc_matrix_plot_path,
267            )
268        if hasattr(self, "test_ave_acc_plot_path"):
269            self.plot_test_ave_acc_curve_from_csv(
270                csv_path=self.test_acc_csv_path,
271                plot_path=self.test_ave_acc_plot_path,
272            )

Save and plot test metrics at the end of test.

def update_test_acc_to_csv(self, after_training_task_id: int, csv_path: str) -> None: View Source

274    def update_test_acc_to_csv(
275        self,
276        after_training_task_id: int,
277        csv_path: str,
278    ) -> None:
279        r"""Update the test accuracy metrics of seen tasks at the last line to an existing CSV file. A new file will be created if not existing.
280
281        **Args:**
282        - **after_training_task_id** (`int`): the task ID after training.
283        - **csv_path** (`str`): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/acc.csv'.
284        """
285        processed_task_ids = list(self.acc_test.keys())
286        fieldnames = ["after_training_task", "average_accuracy"] + [
287            f"test_on_task_{task_id}" for task_id in processed_task_ids
288        ]
289
290        new_line = {
291            "after_training_task": after_training_task_id
292        }  # construct the first column
293
294        # construct the columns and calculate the average accuracy over tasks at the same time
295        average_accuracy_over_tasks = MeanMetric().to(
296            device=next(iter(self.acc_test.values())).device
297        )
298        for task_id in processed_task_ids:
299            acc = self.acc_test[task_id].compute().item()
300            new_line[f"test_on_task_{task_id}"] = acc
301            average_accuracy_over_tasks(acc)
302        new_line["average_accuracy"] = average_accuracy_over_tasks.compute().item()
303
304        # write to the csv file
305        is_first = not os.path.exists(csv_path)
306        if not is_first:
307            with open(csv_path, "r", encoding="utf-8") as file:
308                lines = file.readlines()
309                del lines[0]
310        # write header
311        with open(csv_path, "w", encoding="utf-8") as file:
312            writer = csv.DictWriter(file, fieldnames=fieldnames)
313            writer.writeheader()
314        # write metrics
315        with open(csv_path, "a", encoding="utf-8") as file:
316            if not is_first:
317                file.writelines(lines)  # write the previous lines
318            writer = csv.DictWriter(file, fieldnames=fieldnames)
319            writer.writerow(new_line)

Update the test accuracy metrics of seen tasks at the last line to an existing CSV file. A new file will be created if not existing.

Args:

after_training_task_id (int): the task ID after training.
csv_path (str): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/acc.csv'.

def plot_test_acc_matrix_from_csv(self, csv_path: str, plot_path: str) -> None: View Source

321    def plot_test_acc_matrix_from_csv(self, csv_path: str, plot_path: str) -> None:
322        """Plot the test accuracy matrix from saved CSV file and save the plot to the designated directory.
323
324        **Args:**
325        - **csv_path** (`str`): the path to the CSV file where the `utils.update_test_acc_to_csv()` saved the test accuracy metric.
326        - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/acc_matrix.png'.
327        """
328        data = pd.read_csv(csv_path)
329        processed_task_ids = [
330            int(col.replace("test_on_task_", ""))
331            for col in data.columns
332            if col.startswith("test_on_task_")
333        ]
334
335        # Get all columns that start with "test_on_task_"
336        test_task_cols = [
337            col for col in data.columns if col.startswith("test_on_task_")
338        ]
339        num_tasks = len(processed_task_ids)
340        num_rows = len(data)
341
342        # Build the accuracy matrix
343        acc_matrix = data[test_task_cols].values
344
345        fig, ax = plt.subplots(
346            figsize=(2 * num_tasks, 2 * num_rows)
347        )  # adaptive figure size
348
349        cax = ax.imshow(
350            acc_matrix,
351            interpolation="nearest",
352            cmap="Greens",
353            vmin=0,
354            vmax=1,
355            aspect="auto",
356        )
357
358        colorbar = fig.colorbar(cax)
359        yticks = colorbar.ax.get_yticks()
360        colorbar.ax.set_yticks(yticks)
361        colorbar.ax.set_yticklabels(
362            [f"{tick:.2f}" for tick in yticks], fontsize=10 + num_tasks
363        )
364
365        # Annotate each cell
366        for r in range(num_rows):
367            for c in range(r + 1):
368                ax.text(
369                    c,
370                    r,
371                    f"{acc_matrix[r, c]:.3f}",
372                    ha="center",
373                    va="center",
374                    color="black",
375                    fontsize=10 + num_tasks,
376                )
377
378        ax.set_xticks(range(num_tasks))
379        ax.set_yticks(range(num_rows))
380        ax.set_xticklabels(processed_task_ids, fontsize=10 + num_tasks)
381        ax.set_yticklabels(
382            data["after_training_task"].astype(int).tolist(), fontsize=10 + num_tasks
383        )
384
385        ax.set_xlabel("Testing on task τ", fontsize=10 + num_tasks)
386        ax.set_ylabel("After training task t", fontsize=10 + num_tasks)
387        fig.tight_layout()
388        fig.savefig(plot_path)
389        plt.close(fig)

Plot the test accuracy matrix from saved CSV file and save the plot to the designated directory.

Args:

csv_path (str): the path to the CSV file where the utils.update_test_acc_to_csv() saved the test accuracy metric.
plot_path (str): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/acc_matrix.png'.

def plot_test_ave_acc_curve_from_csv(self, csv_path: str, plot_path: str) -> None: View Source

391    def plot_test_ave_acc_curve_from_csv(self, csv_path: str, plot_path: str) -> None:
392        """Plot the test average accuracy curve over different training tasks from saved CSV file and save the plot to the designated directory.
393
394        **Args:**
395        - **csv_path** (`str`): the path to the CSV file where the `utils.update_test_acc_to_csv()` saved the test accuracy metric.
396        - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/ave_acc.png'.
397        """
398        data = pd.read_csv(csv_path)
399        after_training_tasks = data["after_training_task"].astype(int).tolist()
400
401        # plot the average accuracy curve over different training tasks
402        fig, ax = plt.subplots(figsize=(16, 9))
403        ax.plot(
404            after_training_tasks,
405            data["average_accuracy"],
406            marker="o",
407            linewidth=2,
408        )
409        ax.set_xlabel("After training task $t$", fontsize=16)
410        ax.set_ylabel("Average Accuracy (AA)", fontsize=16)
411        ax.grid(True)
412        xticks = after_training_tasks
413        yticks = [i * 0.05 for i in range(21)]
414        ax.set_xticks(xticks)
415        ax.set_yticks(yticks)
416        ax.set_xticklabels(xticks, fontsize=16)
417        ax.set_yticklabels([f"{tick:.2f}" for tick in yticks], fontsize=16)
418        fig.savefig(plot_path)
419        plt.close(fig)

Plot the test average accuracy curve over different training tasks from saved CSV file and save the plot to the designated directory.

Args:

csv_path (str): the path to the CSV file where the utils.update_test_acc_to_csv() saved the test accuracy metric.
plot_path (str): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/ave_acc.png'.