clarena.metrics.cul_ad

The submodule in metrics for CULAccuracyDifference.

  1r"""
  2The submodule in `metrics` for `CULAccuracyDifference`.
  3"""
  4
  5__all__ = ["CULAccuracyDifference"]
  6
  7import csv
  8import logging
  9import os
 10from typing import Any
 11
 12import pandas as pd
 13from lightning import Trainer
 14from lightning.pytorch.utilities import rank_zero_only
 15from matplotlib import pyplot as plt
 16from torchmetrics import MeanMetric
 17
 18from clarena.metrics import MetricCallback
 19from clarena.utils.eval import CULEvaluation
 20from clarena.utils.metrics import MeanMetricBatch
 21
 22# always get logger for built-in logging in each module
 23pylogger = logging.getLogger(__name__)
 24
 25
 26class CULAccuracyDifference(MetricCallback):
 27    r"""Provides all actions that are related to CUL accuracy difference (AD) metric, which include:
 28
 29    - Defining, initializing and recording AD metric.
 30    - Saving AD metric to files.
 31    - Visualizing AD metric as plots.
 32
 33    The callback is able to produce the following outputs:
 34
 35    - CSV files for AD in each task.
 36    - Coloured plot for AD in each task.
 37
 38    Note that this callback is designed to be used with the `CULEvaluation` module, which is a special evaluation module for continual unlearning. It is not a typical test step in the algorithm, but rather a test protocol that evaluates the performance of the model on unlearned tasks.
 39
 40    """
 41
 42    def __init__(
 43        self,
 44        save_dir: str,
 45        accuracy_difference_csv_name: str = "ad.csv",
 46        accuracy_difference_plot_name: str | None = None,
 47    ) -> None:
 48        r"""
 49        **Args:**
 50        - **save_dir** (`str`): The directory where data and figures of metrics will be saved. Better inside the output folder.
 51        - **accuracy_difference_csv_name** (`str`): file name to save test accuracy difference metrics as CSV file.
 52        - **accuracy_difference_plot_name** (`str` | `None`): file name to save test accuracy difference metrics as plot. If `None`, no plot will be saved.
 53
 54        """
 55        super().__init__(save_dir=save_dir)
 56
 57        # paths
 58        self.accuracy_difference_csv_path: str = os.path.join(
 59            self.save_dir, accuracy_difference_csv_name
 60        )
 61        r"""The path to save the test accuracy difference metrics CSV file."""
 62        if accuracy_difference_plot_name:
 63            self.accuracy_difference_plot_path: str = os.path.join(
 64                self.save_dir, accuracy_difference_plot_name
 65            )
 66            r"""The path to save the test accuracy difference metrics plot file."""
 67
 68        # test accumulated metrics
 69        self.accuracy_difference: dict[int, MeanMetricBatch] = {}
 70        r"""Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics."""
 71
 72        # task ID control
 73        self.task_id: int
 74        r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`."""
 75
 76    @rank_zero_only
 77    def on_test_start(
 78        self,
 79        trainer: Trainer,
 80        pl_module: CULEvaluation,
 81    ) -> None:
 82        r"""Initialize the metrics for testing each seen task in the beginning of a task's testing."""
 83
 84        # get the device to put the metrics on the same device
 85        device = pl_module.device
 86
 87        # initialize test metrics for evaluation tasks
 88        self.accuracy_difference = {
 89            task_id: MeanMetricBatch().to(device)
 90            for task_id in pl_module.ad_eval_task_ids
 91        }
 92
 93    @rank_zero_only
 94    def on_test_batch_end(
 95        self,
 96        trainer: Trainer,
 97        pl_module: CULEvaluation,
 98        outputs: dict[str, Any],
 99        batch: Any,
100        batch_idx: int,
101        dataloader_idx: int = 0,
102    ) -> None:
103        r"""Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches.
104
105        **Args:**
106        - **outputs** (`dict[str, Any]`): the outputs of the test step, which is the returns of the `test_step()` method in the `CULEvaluation`.
107        - **batch** (`Any`): the validation data batch.
108        - **dataloader_idx** (`int`): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a `RuntimeError`.
109        """
110
111        # get the batch size
112        batch_size = len(batch)
113
114        test_task_id = pl_module.get_test_task_id_from_dataloader_idx(dataloader_idx)
115
116        # get the metrics values of the batch from the outputs
117        acc_diff = outputs["acc_diff"]  # accuracy difference
118
119        # update the accumulated metrics in order to calculate the metrics of the epoch
120        self.accuracy_difference[test_task_id].update(acc_diff, batch_size)
121
122    @rank_zero_only
123    def on_test_epoch_end(
124        self,
125        trainer: Trainer,
126        pl_module: CULEvaluation,
127    ) -> None:
128        r"""Save and plot test metrics at the end of test."""
129
130        self.update_unlearning_accuracy_difference_to_csv(
131            accuracy_difference_metric=self.accuracy_difference,
132            csv_path=self.accuracy_difference_csv_path,
133        )
134
135        if hasattr(self, "accuracy_difference_plot_path"):
136            self.plot_unlearning_accuracy_difference_from_csv(
137                csv_path=self.accuracy_difference_csv_path,
138                plot_path=self.accuracy_difference_plot_path,
139            )
140
141    def update_unlearning_accuracy_difference_to_csv(
142        self,
143        accuracy_difference_metric: dict[int, MeanMetricBatch],
144        csv_path: str,
145    ) -> None:
146        r"""Update the unlearning accuracy difference metrics of unlearning tasks to CSV file.
147
148        **Args:**
149        - **accuracy_difference_metric** (`dict[int, MeanMetricBatch]`): the accuracy difference metric. Accumulated and calculated from the unlearning test batches.
150        - **csv_path** (`str`): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'.
151        """
152
153        eval_task_ids = list(accuracy_difference_metric.keys())
154        fieldnames = ["average_accuracy_difference"] + [
155            f"unlearning_test_on_task_{task_id}" for task_id in eval_task_ids
156        ]
157
158        new_line = {}
159
160        # write to the columns and calculate the average accuracy difference over tasks at the same time
161        average_accuracy_difference_over_unlearned_tasks = MeanMetric().to(
162            next(iter(accuracy_difference_metric.values())).device
163        )
164        for task_id in eval_task_ids:
165            loss_cls = accuracy_difference_metric[task_id].compute().item()
166            new_line[f"unlearning_test_on_task_{task_id}"] = loss_cls
167            average_accuracy_difference_over_unlearned_tasks(loss_cls)
168        new_line["average_accuracy_difference"] = (
169            average_accuracy_difference_over_unlearned_tasks.compute().item()
170        )
171
172        # write to the csv file
173        is_first = not os.path.exists(csv_path)
174        if not is_first:
175            with open(csv_path, "r", encoding="utf-8") as file:
176                lines = file.readlines()
177                del lines[0]
178        # write header
179        with open(csv_path, "w", encoding="utf-8") as file:
180            writer = csv.DictWriter(file, fieldnames=fieldnames)
181            writer.writeheader()
182        # write metrics
183        with open(csv_path, "a", encoding="utf-8") as file:
184            if not is_first:
185                file.writelines(lines)  # write the previous lines
186            writer = csv.DictWriter(file, fieldnames=fieldnames)
187            writer.writerow(new_line)
188
189    def plot_unlearning_accuracy_difference_from_csv(
190        self, csv_path: str, plot_path: str
191    ) -> None:
192        """Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory.
193
194        **Args:**
195        - **csv_path** (`str`): the path to the CSV file where the `utils.save_accuracy_difference_to_csv()` saved the accuracy difference metric.
196        - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'.
197        """
198        data = pd.read_csv(csv_path)
199
200        unlearned_task_ids = [
201            int(col.replace("unlearning_test_on_task_", ""))
202            for col in data.columns
203            if col.startswith("unlearning_test_on_task_")
204        ]
205        num_tasks = len(unlearned_task_ids)
206        num_tests = len(data)
207
208        # plot the accuracy matrix
209        fig, ax = plt.subplots(
210            figsize=(2 * (num_tasks + 1), 2 * (num_tests + 1))
211        )  # adaptive figure size
212        cax = ax.imshow(
213            data.drop(["average_accuracy_difference"], axis=1),
214            interpolation="nearest",
215            cmap="Greens",
216            vmin=0,
217            vmax=1,
218        )
219
220        colorbar = fig.colorbar(cax)
221        yticks = colorbar.ax.get_yticks()
222        colorbar.ax.set_yticks(yticks)
223        colorbar.ax.set_yticklabels(
224            [f"{tick:.2f}" for tick in yticks], fontsize=10 + num_tasks
225        )  # adaptive font size
226
227        r = 0
228        for r in range(num_tests):
229            for c in range(num_tasks):
230                j = unlearned_task_ids[c]
231                s = (
232                    f"{data.loc[r, f'unlearning_test_on_task_{j}']:.3f}"
233                    if f"unlearning_test_on_task_{j}" in data.columns
234                    else ""
235                )
236                ax.text(
237                    c,
238                    r,
239                    s,
240                    ha="center",
241                    va="center",
242                    color="black",
243                    fontsize=10 + num_tasks,  # adaptive font size
244                )
245
246        ax.set_xticks(range(num_tasks))
247        ax.set_yticks(range(num_tests))
248        ax.set_xticklabels(
249            unlearned_task_ids, fontsize=10 + num_tasks
250        )  # adaptive font size
251        ax.set_yticklabels(
252            range(1, num_tests + 1), fontsize=10 + num_tests
253        )  # adaptive font size
254
255        # Labeling the axes
256        ax.set_xlabel(
257            "Testing unlearning on task τ", fontsize=10 + num_tasks
258        )  # adaptive font size
259        ax.set_ylabel(
260            "Unlearning test after training task t", fontsize=10 + num_tasks
261        )  # adaptive font size
262        fig.savefig(plot_path)
263        plt.close(fig)
class CULAccuracyDifference(clarena.metrics.base.MetricCallback):
 27class CULAccuracyDifference(MetricCallback):
 28    r"""Provides all actions that are related to CUL accuracy difference (AD) metric, which include:
 29
 30    - Defining, initializing and recording AD metric.
 31    - Saving AD metric to files.
 32    - Visualizing AD metric as plots.
 33
 34    The callback is able to produce the following outputs:
 35
 36    - CSV files for AD in each task.
 37    - Coloured plot for AD in each task.
 38
 39    Note that this callback is designed to be used with the `CULEvaluation` module, which is a special evaluation module for continual unlearning. It is not a typical test step in the algorithm, but rather a test protocol that evaluates the performance of the model on unlearned tasks.
 40
 41    """
 42
 43    def __init__(
 44        self,
 45        save_dir: str,
 46        accuracy_difference_csv_name: str = "ad.csv",
 47        accuracy_difference_plot_name: str | None = None,
 48    ) -> None:
 49        r"""
 50        **Args:**
 51        - **save_dir** (`str`): The directory where data and figures of metrics will be saved. Better inside the output folder.
 52        - **accuracy_difference_csv_name** (`str`): file name to save test accuracy difference metrics as CSV file.
 53        - **accuracy_difference_plot_name** (`str` | `None`): file name to save test accuracy difference metrics as plot. If `None`, no plot will be saved.
 54
 55        """
 56        super().__init__(save_dir=save_dir)
 57
 58        # paths
 59        self.accuracy_difference_csv_path: str = os.path.join(
 60            self.save_dir, accuracy_difference_csv_name
 61        )
 62        r"""The path to save the test accuracy difference metrics CSV file."""
 63        if accuracy_difference_plot_name:
 64            self.accuracy_difference_plot_path: str = os.path.join(
 65                self.save_dir, accuracy_difference_plot_name
 66            )
 67            r"""The path to save the test accuracy difference metrics plot file."""
 68
 69        # test accumulated metrics
 70        self.accuracy_difference: dict[int, MeanMetricBatch] = {}
 71        r"""Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics."""
 72
 73        # task ID control
 74        self.task_id: int
 75        r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`."""
 76
 77    @rank_zero_only
 78    def on_test_start(
 79        self,
 80        trainer: Trainer,
 81        pl_module: CULEvaluation,
 82    ) -> None:
 83        r"""Initialize the metrics for testing each seen task in the beginning of a task's testing."""
 84
 85        # get the device to put the metrics on the same device
 86        device = pl_module.device
 87
 88        # initialize test metrics for evaluation tasks
 89        self.accuracy_difference = {
 90            task_id: MeanMetricBatch().to(device)
 91            for task_id in pl_module.ad_eval_task_ids
 92        }
 93
 94    @rank_zero_only
 95    def on_test_batch_end(
 96        self,
 97        trainer: Trainer,
 98        pl_module: CULEvaluation,
 99        outputs: dict[str, Any],
100        batch: Any,
101        batch_idx: int,
102        dataloader_idx: int = 0,
103    ) -> None:
104        r"""Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches.
105
106        **Args:**
107        - **outputs** (`dict[str, Any]`): the outputs of the test step, which is the returns of the `test_step()` method in the `CULEvaluation`.
108        - **batch** (`Any`): the validation data batch.
109        - **dataloader_idx** (`int`): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a `RuntimeError`.
110        """
111
112        # get the batch size
113        batch_size = len(batch)
114
115        test_task_id = pl_module.get_test_task_id_from_dataloader_idx(dataloader_idx)
116
117        # get the metrics values of the batch from the outputs
118        acc_diff = outputs["acc_diff"]  # accuracy difference
119
120        # update the accumulated metrics in order to calculate the metrics of the epoch
121        self.accuracy_difference[test_task_id].update(acc_diff, batch_size)
122
123    @rank_zero_only
124    def on_test_epoch_end(
125        self,
126        trainer: Trainer,
127        pl_module: CULEvaluation,
128    ) -> None:
129        r"""Save and plot test metrics at the end of test."""
130
131        self.update_unlearning_accuracy_difference_to_csv(
132            accuracy_difference_metric=self.accuracy_difference,
133            csv_path=self.accuracy_difference_csv_path,
134        )
135
136        if hasattr(self, "accuracy_difference_plot_path"):
137            self.plot_unlearning_accuracy_difference_from_csv(
138                csv_path=self.accuracy_difference_csv_path,
139                plot_path=self.accuracy_difference_plot_path,
140            )
141
142    def update_unlearning_accuracy_difference_to_csv(
143        self,
144        accuracy_difference_metric: dict[int, MeanMetricBatch],
145        csv_path: str,
146    ) -> None:
147        r"""Update the unlearning accuracy difference metrics of unlearning tasks to CSV file.
148
149        **Args:**
150        - **accuracy_difference_metric** (`dict[int, MeanMetricBatch]`): the accuracy difference metric. Accumulated and calculated from the unlearning test batches.
151        - **csv_path** (`str`): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'.
152        """
153
154        eval_task_ids = list(accuracy_difference_metric.keys())
155        fieldnames = ["average_accuracy_difference"] + [
156            f"unlearning_test_on_task_{task_id}" for task_id in eval_task_ids
157        ]
158
159        new_line = {}
160
161        # write to the columns and calculate the average accuracy difference over tasks at the same time
162        average_accuracy_difference_over_unlearned_tasks = MeanMetric().to(
163            next(iter(accuracy_difference_metric.values())).device
164        )
165        for task_id in eval_task_ids:
166            loss_cls = accuracy_difference_metric[task_id].compute().item()
167            new_line[f"unlearning_test_on_task_{task_id}"] = loss_cls
168            average_accuracy_difference_over_unlearned_tasks(loss_cls)
169        new_line["average_accuracy_difference"] = (
170            average_accuracy_difference_over_unlearned_tasks.compute().item()
171        )
172
173        # write to the csv file
174        is_first = not os.path.exists(csv_path)
175        if not is_first:
176            with open(csv_path, "r", encoding="utf-8") as file:
177                lines = file.readlines()
178                del lines[0]
179        # write header
180        with open(csv_path, "w", encoding="utf-8") as file:
181            writer = csv.DictWriter(file, fieldnames=fieldnames)
182            writer.writeheader()
183        # write metrics
184        with open(csv_path, "a", encoding="utf-8") as file:
185            if not is_first:
186                file.writelines(lines)  # write the previous lines
187            writer = csv.DictWriter(file, fieldnames=fieldnames)
188            writer.writerow(new_line)
189
190    def plot_unlearning_accuracy_difference_from_csv(
191        self, csv_path: str, plot_path: str
192    ) -> None:
193        """Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory.
194
195        **Args:**
196        - **csv_path** (`str`): the path to the CSV file where the `utils.save_accuracy_difference_to_csv()` saved the accuracy difference metric.
197        - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'.
198        """
199        data = pd.read_csv(csv_path)
200
201        unlearned_task_ids = [
202            int(col.replace("unlearning_test_on_task_", ""))
203            for col in data.columns
204            if col.startswith("unlearning_test_on_task_")
205        ]
206        num_tasks = len(unlearned_task_ids)
207        num_tests = len(data)
208
209        # plot the accuracy matrix
210        fig, ax = plt.subplots(
211            figsize=(2 * (num_tasks + 1), 2 * (num_tests + 1))
212        )  # adaptive figure size
213        cax = ax.imshow(
214            data.drop(["average_accuracy_difference"], axis=1),
215            interpolation="nearest",
216            cmap="Greens",
217            vmin=0,
218            vmax=1,
219        )
220
221        colorbar = fig.colorbar(cax)
222        yticks = colorbar.ax.get_yticks()
223        colorbar.ax.set_yticks(yticks)
224        colorbar.ax.set_yticklabels(
225            [f"{tick:.2f}" for tick in yticks], fontsize=10 + num_tasks
226        )  # adaptive font size
227
228        r = 0
229        for r in range(num_tests):
230            for c in range(num_tasks):
231                j = unlearned_task_ids[c]
232                s = (
233                    f"{data.loc[r, f'unlearning_test_on_task_{j}']:.3f}"
234                    if f"unlearning_test_on_task_{j}" in data.columns
235                    else ""
236                )
237                ax.text(
238                    c,
239                    r,
240                    s,
241                    ha="center",
242                    va="center",
243                    color="black",
244                    fontsize=10 + num_tasks,  # adaptive font size
245                )
246
247        ax.set_xticks(range(num_tasks))
248        ax.set_yticks(range(num_tests))
249        ax.set_xticklabels(
250            unlearned_task_ids, fontsize=10 + num_tasks
251        )  # adaptive font size
252        ax.set_yticklabels(
253            range(1, num_tests + 1), fontsize=10 + num_tests
254        )  # adaptive font size
255
256        # Labeling the axes
257        ax.set_xlabel(
258            "Testing unlearning on task τ", fontsize=10 + num_tasks
259        )  # adaptive font size
260        ax.set_ylabel(
261            "Unlearning test after training task t", fontsize=10 + num_tasks
262        )  # adaptive font size
263        fig.savefig(plot_path)
264        plt.close(fig)

Provides all actions that are related to CUL accuracy difference (AD) metric, which include:

  • Defining, initializing and recording AD metric.
  • Saving AD metric to files.
  • Visualizing AD metric as plots.

The callback is able to produce the following outputs:

  • CSV files for AD in each task.
  • Coloured plot for AD in each task.

Note that this callback is designed to be used with the CULEvaluation module, which is a special evaluation module for continual unlearning. It is not a typical test step in the algorithm, but rather a test protocol that evaluates the performance of the model on unlearned tasks.

CULAccuracyDifference( save_dir: str, accuracy_difference_csv_name: str = 'ad.csv', accuracy_difference_plot_name: str | None = None)
43    def __init__(
44        self,
45        save_dir: str,
46        accuracy_difference_csv_name: str = "ad.csv",
47        accuracy_difference_plot_name: str | None = None,
48    ) -> None:
49        r"""
50        **Args:**
51        - **save_dir** (`str`): The directory where data and figures of metrics will be saved. Better inside the output folder.
52        - **accuracy_difference_csv_name** (`str`): file name to save test accuracy difference metrics as CSV file.
53        - **accuracy_difference_plot_name** (`str` | `None`): file name to save test accuracy difference metrics as plot. If `None`, no plot will be saved.
54
55        """
56        super().__init__(save_dir=save_dir)
57
58        # paths
59        self.accuracy_difference_csv_path: str = os.path.join(
60            self.save_dir, accuracy_difference_csv_name
61        )
62        r"""The path to save the test accuracy difference metrics CSV file."""
63        if accuracy_difference_plot_name:
64            self.accuracy_difference_plot_path: str = os.path.join(
65                self.save_dir, accuracy_difference_plot_name
66            )
67            r"""The path to save the test accuracy difference metrics plot file."""
68
69        # test accumulated metrics
70        self.accuracy_difference: dict[int, MeanMetricBatch] = {}
71        r"""Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics."""
72
73        # task ID control
74        self.task_id: int
75        r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`."""

Args:

  • save_dir (str): The directory where data and figures of metrics will be saved. Better inside the output folder.
  • accuracy_difference_csv_name (str): file name to save test accuracy difference metrics as CSV file.
  • accuracy_difference_plot_name (str | None): file name to save test accuracy difference metrics as plot. If None, no plot will be saved.
accuracy_difference_csv_path: str

The path to save the test accuracy difference metrics CSV file.

accuracy_difference: dict[int, clarena.utils.metrics.MeanMetricBatch]

Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics.

task_id: int

Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to cl_dataset.num_tasks.

@rank_zero_only
def on_test_start( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.utils.eval.CULEvaluation) -> None:
77    @rank_zero_only
78    def on_test_start(
79        self,
80        trainer: Trainer,
81        pl_module: CULEvaluation,
82    ) -> None:
83        r"""Initialize the metrics for testing each seen task in the beginning of a task's testing."""
84
85        # get the device to put the metrics on the same device
86        device = pl_module.device
87
88        # initialize test metrics for evaluation tasks
89        self.accuracy_difference = {
90            task_id: MeanMetricBatch().to(device)
91            for task_id in pl_module.ad_eval_task_ids
92        }

Initialize the metrics for testing each seen task in the beginning of a task's testing.

@rank_zero_only
def on_test_batch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.utils.eval.CULEvaluation, outputs: dict[str, typing.Any], batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None:
 94    @rank_zero_only
 95    def on_test_batch_end(
 96        self,
 97        trainer: Trainer,
 98        pl_module: CULEvaluation,
 99        outputs: dict[str, Any],
100        batch: Any,
101        batch_idx: int,
102        dataloader_idx: int = 0,
103    ) -> None:
104        r"""Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches.
105
106        **Args:**
107        - **outputs** (`dict[str, Any]`): the outputs of the test step, which is the returns of the `test_step()` method in the `CULEvaluation`.
108        - **batch** (`Any`): the validation data batch.
109        - **dataloader_idx** (`int`): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a `RuntimeError`.
110        """
111
112        # get the batch size
113        batch_size = len(batch)
114
115        test_task_id = pl_module.get_test_task_id_from_dataloader_idx(dataloader_idx)
116
117        # get the metrics values of the batch from the outputs
118        acc_diff = outputs["acc_diff"]  # accuracy difference
119
120        # update the accumulated metrics in order to calculate the metrics of the epoch
121        self.accuracy_difference[test_task_id].update(acc_diff, batch_size)

Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches.

Args:

  • outputs (dict[str, Any]): the outputs of the test step, which is the returns of the test_step() method in the CULEvaluation.
  • batch (Any): the validation data batch.
  • dataloader_idx (int): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a RuntimeError.
@rank_zero_only
def on_test_epoch_end( self, trainer: lightning.pytorch.trainer.trainer.Trainer, pl_module: clarena.utils.eval.CULEvaluation) -> None:
123    @rank_zero_only
124    def on_test_epoch_end(
125        self,
126        trainer: Trainer,
127        pl_module: CULEvaluation,
128    ) -> None:
129        r"""Save and plot test metrics at the end of test."""
130
131        self.update_unlearning_accuracy_difference_to_csv(
132            accuracy_difference_metric=self.accuracy_difference,
133            csv_path=self.accuracy_difference_csv_path,
134        )
135
136        if hasattr(self, "accuracy_difference_plot_path"):
137            self.plot_unlearning_accuracy_difference_from_csv(
138                csv_path=self.accuracy_difference_csv_path,
139                plot_path=self.accuracy_difference_plot_path,
140            )

Save and plot test metrics at the end of test.

def update_unlearning_accuracy_difference_to_csv( self, accuracy_difference_metric: dict[int, clarena.utils.metrics.MeanMetricBatch], csv_path: str) -> None:
142    def update_unlearning_accuracy_difference_to_csv(
143        self,
144        accuracy_difference_metric: dict[int, MeanMetricBatch],
145        csv_path: str,
146    ) -> None:
147        r"""Update the unlearning accuracy difference metrics of unlearning tasks to CSV file.
148
149        **Args:**
150        - **accuracy_difference_metric** (`dict[int, MeanMetricBatch]`): the accuracy difference metric. Accumulated and calculated from the unlearning test batches.
151        - **csv_path** (`str`): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'.
152        """
153
154        eval_task_ids = list(accuracy_difference_metric.keys())
155        fieldnames = ["average_accuracy_difference"] + [
156            f"unlearning_test_on_task_{task_id}" for task_id in eval_task_ids
157        ]
158
159        new_line = {}
160
161        # write to the columns and calculate the average accuracy difference over tasks at the same time
162        average_accuracy_difference_over_unlearned_tasks = MeanMetric().to(
163            next(iter(accuracy_difference_metric.values())).device
164        )
165        for task_id in eval_task_ids:
166            loss_cls = accuracy_difference_metric[task_id].compute().item()
167            new_line[f"unlearning_test_on_task_{task_id}"] = loss_cls
168            average_accuracy_difference_over_unlearned_tasks(loss_cls)
169        new_line["average_accuracy_difference"] = (
170            average_accuracy_difference_over_unlearned_tasks.compute().item()
171        )
172
173        # write to the csv file
174        is_first = not os.path.exists(csv_path)
175        if not is_first:
176            with open(csv_path, "r", encoding="utf-8") as file:
177                lines = file.readlines()
178                del lines[0]
179        # write header
180        with open(csv_path, "w", encoding="utf-8") as file:
181            writer = csv.DictWriter(file, fieldnames=fieldnames)
182            writer.writeheader()
183        # write metrics
184        with open(csv_path, "a", encoding="utf-8") as file:
185            if not is_first:
186                file.writelines(lines)  # write the previous lines
187            writer = csv.DictWriter(file, fieldnames=fieldnames)
188            writer.writerow(new_line)

Update the unlearning accuracy difference metrics of unlearning tasks to CSV file.

Args:

  • accuracy_difference_metric (dict[int, MeanMetricBatch]): the accuracy difference metric. Accumulated and calculated from the unlearning test batches.
  • csv_path (str): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'.
def plot_unlearning_accuracy_difference_from_csv(self, csv_path: str, plot_path: str) -> None:
190    def plot_unlearning_accuracy_difference_from_csv(
191        self, csv_path: str, plot_path: str
192    ) -> None:
193        """Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory.
194
195        **Args:**
196        - **csv_path** (`str`): the path to the CSV file where the `utils.save_accuracy_difference_to_csv()` saved the accuracy difference metric.
197        - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'.
198        """
199        data = pd.read_csv(csv_path)
200
201        unlearned_task_ids = [
202            int(col.replace("unlearning_test_on_task_", ""))
203            for col in data.columns
204            if col.startswith("unlearning_test_on_task_")
205        ]
206        num_tasks = len(unlearned_task_ids)
207        num_tests = len(data)
208
209        # plot the accuracy matrix
210        fig, ax = plt.subplots(
211            figsize=(2 * (num_tasks + 1), 2 * (num_tests + 1))
212        )  # adaptive figure size
213        cax = ax.imshow(
214            data.drop(["average_accuracy_difference"], axis=1),
215            interpolation="nearest",
216            cmap="Greens",
217            vmin=0,
218            vmax=1,
219        )
220
221        colorbar = fig.colorbar(cax)
222        yticks = colorbar.ax.get_yticks()
223        colorbar.ax.set_yticks(yticks)
224        colorbar.ax.set_yticklabels(
225            [f"{tick:.2f}" for tick in yticks], fontsize=10 + num_tasks
226        )  # adaptive font size
227
228        r = 0
229        for r in range(num_tests):
230            for c in range(num_tasks):
231                j = unlearned_task_ids[c]
232                s = (
233                    f"{data.loc[r, f'unlearning_test_on_task_{j}']:.3f}"
234                    if f"unlearning_test_on_task_{j}" in data.columns
235                    else ""
236                )
237                ax.text(
238                    c,
239                    r,
240                    s,
241                    ha="center",
242                    va="center",
243                    color="black",
244                    fontsize=10 + num_tasks,  # adaptive font size
245                )
246
247        ax.set_xticks(range(num_tasks))
248        ax.set_yticks(range(num_tests))
249        ax.set_xticklabels(
250            unlearned_task_ids, fontsize=10 + num_tasks
251        )  # adaptive font size
252        ax.set_yticklabels(
253            range(1, num_tests + 1), fontsize=10 + num_tests
254        )  # adaptive font size
255
256        # Labeling the axes
257        ax.set_xlabel(
258            "Testing unlearning on task τ", fontsize=10 + num_tasks
259        )  # adaptive font size
260        ax.set_ylabel(
261            "Unlearning test after training task t", fontsize=10 + num_tasks
262        )  # adaptive font size
263        fig.savefig(plot_path)
264        plt.close(fig)

Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory.

Args:

  • csv_path (str): the path to the CSV file where the utils.save_accuracy_difference_to_csv() saved the accuracy difference metric.
  • plot_path (str): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'.