clarena.metrics.cul_ad
The submodule in metrics
for CULAccuracyDifference
.
1r""" 2The submodule in `metrics` for `CULAccuracyDifference`. 3""" 4 5__all__ = ["CULAccuracyDifference"] 6 7import csv 8import logging 9import os 10from typing import Any 11 12import pandas as pd 13from lightning import Trainer 14from lightning.pytorch.utilities import rank_zero_only 15from matplotlib import pyplot as plt 16from torchmetrics import MeanMetric 17 18from clarena.metrics import MetricCallback 19from clarena.utils.eval import CULEvaluation 20from clarena.utils.metrics import MeanMetricBatch 21 22# always get logger for built-in logging in each module 23pylogger = logging.getLogger(__name__) 24 25 26class CULAccuracyDifference(MetricCallback): 27 r"""Provides all actions that are related to CUL accuracy difference (AD) metric, which include: 28 29 - Defining, initializing and recording AD metric. 30 - Saving AD metric to files. 31 - Visualizing AD metric as plots. 32 33 The callback is able to produce the following outputs: 34 35 - CSV files for AD in each task. 36 - Coloured plot for AD in each task. 37 38 Note that this callback is designed to be used with the `CULEvaluation` module, which is a special evaluation module for continual unlearning. It is not a typical test step in the algorithm, but rather a test protocol that evaluates the performance of the model on unlearned tasks. 39 40 """ 41 42 def __init__( 43 self, 44 save_dir: str, 45 accuracy_difference_csv_name: str = "ad.csv", 46 accuracy_difference_plot_name: str | None = None, 47 ) -> None: 48 r""" 49 **Args:** 50 - **save_dir** (`str`): The directory where data and figures of metrics will be saved. Better inside the output folder. 51 - **accuracy_difference_csv_name** (`str`): file name to save test accuracy difference metrics as CSV file. 52 - **accuracy_difference_plot_name** (`str` | `None`): file name to save test accuracy difference metrics as plot. If `None`, no plot will be saved. 53 54 """ 55 super().__init__(save_dir=save_dir) 56 57 # paths 58 self.accuracy_difference_csv_path: str = os.path.join( 59 self.save_dir, accuracy_difference_csv_name 60 ) 61 r"""The path to save the test accuracy difference metrics CSV file.""" 62 if accuracy_difference_plot_name: 63 self.accuracy_difference_plot_path: str = os.path.join( 64 self.save_dir, accuracy_difference_plot_name 65 ) 66 r"""The path to save the test accuracy difference metrics plot file.""" 67 68 # test accumulated metrics 69 self.accuracy_difference: dict[int, MeanMetricBatch] = {} 70 r"""Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics.""" 71 72 # task ID control 73 self.task_id: int 74 r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`.""" 75 76 @rank_zero_only 77 def on_test_start( 78 self, 79 trainer: Trainer, 80 pl_module: CULEvaluation, 81 ) -> None: 82 r"""Initialize the metrics for testing each seen task in the beginning of a task's testing.""" 83 84 # get the device to put the metrics on the same device 85 device = pl_module.device 86 87 # initialize test metrics for evaluation tasks 88 self.accuracy_difference = { 89 task_id: MeanMetricBatch().to(device) 90 for task_id in pl_module.ad_eval_task_ids 91 } 92 93 @rank_zero_only 94 def on_test_batch_end( 95 self, 96 trainer: Trainer, 97 pl_module: CULEvaluation, 98 outputs: dict[str, Any], 99 batch: Any, 100 batch_idx: int, 101 dataloader_idx: int = 0, 102 ) -> None: 103 r"""Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches. 104 105 **Args:** 106 - **outputs** (`dict[str, Any]`): the outputs of the test step, which is the returns of the `test_step()` method in the `CULEvaluation`. 107 - **batch** (`Any`): the validation data batch. 108 - **dataloader_idx** (`int`): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a `RuntimeError`. 109 """ 110 111 # get the batch size 112 batch_size = len(batch) 113 114 test_task_id = pl_module.get_test_task_id_from_dataloader_idx(dataloader_idx) 115 116 # get the metrics values of the batch from the outputs 117 acc_diff = outputs["acc_diff"] # accuracy difference 118 119 # update the accumulated metrics in order to calculate the metrics of the epoch 120 self.accuracy_difference[test_task_id].update(acc_diff, batch_size) 121 122 @rank_zero_only 123 def on_test_epoch_end( 124 self, 125 trainer: Trainer, 126 pl_module: CULEvaluation, 127 ) -> None: 128 r"""Save and plot test metrics at the end of test.""" 129 130 self.update_unlearning_accuracy_difference_to_csv( 131 accuracy_difference_metric=self.accuracy_difference, 132 csv_path=self.accuracy_difference_csv_path, 133 ) 134 135 if hasattr(self, "accuracy_difference_plot_path"): 136 self.plot_unlearning_accuracy_difference_from_csv( 137 csv_path=self.accuracy_difference_csv_path, 138 plot_path=self.accuracy_difference_plot_path, 139 ) 140 141 def update_unlearning_accuracy_difference_to_csv( 142 self, 143 accuracy_difference_metric: dict[int, MeanMetricBatch], 144 csv_path: str, 145 ) -> None: 146 r"""Update the unlearning accuracy difference metrics of unlearning tasks to CSV file. 147 148 **Args:** 149 - **accuracy_difference_metric** (`dict[int, MeanMetricBatch]`): the accuracy difference metric. Accumulated and calculated from the unlearning test batches. 150 - **csv_path** (`str`): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'. 151 """ 152 153 eval_task_ids = list(accuracy_difference_metric.keys()) 154 fieldnames = ["average_accuracy_difference"] + [ 155 f"unlearning_test_on_task_{task_id}" for task_id in eval_task_ids 156 ] 157 158 new_line = {} 159 160 # write to the columns and calculate the average accuracy difference over tasks at the same time 161 average_accuracy_difference_over_unlearned_tasks = MeanMetric().to( 162 next(iter(accuracy_difference_metric.values())).device 163 ) 164 for task_id in eval_task_ids: 165 loss_cls = accuracy_difference_metric[task_id].compute().item() 166 new_line[f"unlearning_test_on_task_{task_id}"] = loss_cls 167 average_accuracy_difference_over_unlearned_tasks(loss_cls) 168 new_line["average_accuracy_difference"] = ( 169 average_accuracy_difference_over_unlearned_tasks.compute().item() 170 ) 171 172 # write to the csv file 173 is_first = not os.path.exists(csv_path) 174 if not is_first: 175 with open(csv_path, "r", encoding="utf-8") as file: 176 lines = file.readlines() 177 del lines[0] 178 # write header 179 with open(csv_path, "w", encoding="utf-8") as file: 180 writer = csv.DictWriter(file, fieldnames=fieldnames) 181 writer.writeheader() 182 # write metrics 183 with open(csv_path, "a", encoding="utf-8") as file: 184 if not is_first: 185 file.writelines(lines) # write the previous lines 186 writer = csv.DictWriter(file, fieldnames=fieldnames) 187 writer.writerow(new_line) 188 189 def plot_unlearning_accuracy_difference_from_csv( 190 self, csv_path: str, plot_path: str 191 ) -> None: 192 """Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory. 193 194 **Args:** 195 - **csv_path** (`str`): the path to the CSV file where the `utils.save_accuracy_difference_to_csv()` saved the accuracy difference metric. 196 - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'. 197 """ 198 data = pd.read_csv(csv_path) 199 200 unlearned_task_ids = [ 201 int(col.replace("unlearning_test_on_task_", "")) 202 for col in data.columns 203 if col.startswith("unlearning_test_on_task_") 204 ] 205 num_tasks = len(unlearned_task_ids) 206 num_tests = len(data) 207 208 # plot the accuracy matrix 209 fig, ax = plt.subplots( 210 figsize=(2 * (num_tasks + 1), 2 * (num_tests + 1)) 211 ) # adaptive figure size 212 cax = ax.imshow( 213 data.drop(["average_accuracy_difference"], axis=1), 214 interpolation="nearest", 215 cmap="Greens", 216 vmin=0, 217 vmax=1, 218 ) 219 220 colorbar = fig.colorbar(cax) 221 yticks = colorbar.ax.get_yticks() 222 colorbar.ax.set_yticks(yticks) 223 colorbar.ax.set_yticklabels( 224 [f"{tick:.2f}" for tick in yticks], fontsize=10 + num_tasks 225 ) # adaptive font size 226 227 r = 0 228 for r in range(num_tests): 229 for c in range(num_tasks): 230 j = unlearned_task_ids[c] 231 s = ( 232 f"{data.loc[r, f'unlearning_test_on_task_{j}']:.3f}" 233 if f"unlearning_test_on_task_{j}" in data.columns 234 else "" 235 ) 236 ax.text( 237 c, 238 r, 239 s, 240 ha="center", 241 va="center", 242 color="black", 243 fontsize=10 + num_tasks, # adaptive font size 244 ) 245 246 ax.set_xticks(range(num_tasks)) 247 ax.set_yticks(range(num_tests)) 248 ax.set_xticklabels( 249 unlearned_task_ids, fontsize=10 + num_tasks 250 ) # adaptive font size 251 ax.set_yticklabels( 252 range(1, num_tests + 1), fontsize=10 + num_tests 253 ) # adaptive font size 254 255 # Labeling the axes 256 ax.set_xlabel( 257 "Testing unlearning on task τ", fontsize=10 + num_tasks 258 ) # adaptive font size 259 ax.set_ylabel( 260 "Unlearning test after training task t", fontsize=10 + num_tasks 261 ) # adaptive font size 262 fig.savefig(plot_path) 263 plt.close(fig)
27class CULAccuracyDifference(MetricCallback): 28 r"""Provides all actions that are related to CUL accuracy difference (AD) metric, which include: 29 30 - Defining, initializing and recording AD metric. 31 - Saving AD metric to files. 32 - Visualizing AD metric as plots. 33 34 The callback is able to produce the following outputs: 35 36 - CSV files for AD in each task. 37 - Coloured plot for AD in each task. 38 39 Note that this callback is designed to be used with the `CULEvaluation` module, which is a special evaluation module for continual unlearning. It is not a typical test step in the algorithm, but rather a test protocol that evaluates the performance of the model on unlearned tasks. 40 41 """ 42 43 def __init__( 44 self, 45 save_dir: str, 46 accuracy_difference_csv_name: str = "ad.csv", 47 accuracy_difference_plot_name: str | None = None, 48 ) -> None: 49 r""" 50 **Args:** 51 - **save_dir** (`str`): The directory where data and figures of metrics will be saved. Better inside the output folder. 52 - **accuracy_difference_csv_name** (`str`): file name to save test accuracy difference metrics as CSV file. 53 - **accuracy_difference_plot_name** (`str` | `None`): file name to save test accuracy difference metrics as plot. If `None`, no plot will be saved. 54 55 """ 56 super().__init__(save_dir=save_dir) 57 58 # paths 59 self.accuracy_difference_csv_path: str = os.path.join( 60 self.save_dir, accuracy_difference_csv_name 61 ) 62 r"""The path to save the test accuracy difference metrics CSV file.""" 63 if accuracy_difference_plot_name: 64 self.accuracy_difference_plot_path: str = os.path.join( 65 self.save_dir, accuracy_difference_plot_name 66 ) 67 r"""The path to save the test accuracy difference metrics plot file.""" 68 69 # test accumulated metrics 70 self.accuracy_difference: dict[int, MeanMetricBatch] = {} 71 r"""Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics.""" 72 73 # task ID control 74 self.task_id: int 75 r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`.""" 76 77 @rank_zero_only 78 def on_test_start( 79 self, 80 trainer: Trainer, 81 pl_module: CULEvaluation, 82 ) -> None: 83 r"""Initialize the metrics for testing each seen task in the beginning of a task's testing.""" 84 85 # get the device to put the metrics on the same device 86 device = pl_module.device 87 88 # initialize test metrics for evaluation tasks 89 self.accuracy_difference = { 90 task_id: MeanMetricBatch().to(device) 91 for task_id in pl_module.ad_eval_task_ids 92 } 93 94 @rank_zero_only 95 def on_test_batch_end( 96 self, 97 trainer: Trainer, 98 pl_module: CULEvaluation, 99 outputs: dict[str, Any], 100 batch: Any, 101 batch_idx: int, 102 dataloader_idx: int = 0, 103 ) -> None: 104 r"""Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches. 105 106 **Args:** 107 - **outputs** (`dict[str, Any]`): the outputs of the test step, which is the returns of the `test_step()` method in the `CULEvaluation`. 108 - **batch** (`Any`): the validation data batch. 109 - **dataloader_idx** (`int`): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a `RuntimeError`. 110 """ 111 112 # get the batch size 113 batch_size = len(batch) 114 115 test_task_id = pl_module.get_test_task_id_from_dataloader_idx(dataloader_idx) 116 117 # get the metrics values of the batch from the outputs 118 acc_diff = outputs["acc_diff"] # accuracy difference 119 120 # update the accumulated metrics in order to calculate the metrics of the epoch 121 self.accuracy_difference[test_task_id].update(acc_diff, batch_size) 122 123 @rank_zero_only 124 def on_test_epoch_end( 125 self, 126 trainer: Trainer, 127 pl_module: CULEvaluation, 128 ) -> None: 129 r"""Save and plot test metrics at the end of test.""" 130 131 self.update_unlearning_accuracy_difference_to_csv( 132 accuracy_difference_metric=self.accuracy_difference, 133 csv_path=self.accuracy_difference_csv_path, 134 ) 135 136 if hasattr(self, "accuracy_difference_plot_path"): 137 self.plot_unlearning_accuracy_difference_from_csv( 138 csv_path=self.accuracy_difference_csv_path, 139 plot_path=self.accuracy_difference_plot_path, 140 ) 141 142 def update_unlearning_accuracy_difference_to_csv( 143 self, 144 accuracy_difference_metric: dict[int, MeanMetricBatch], 145 csv_path: str, 146 ) -> None: 147 r"""Update the unlearning accuracy difference metrics of unlearning tasks to CSV file. 148 149 **Args:** 150 - **accuracy_difference_metric** (`dict[int, MeanMetricBatch]`): the accuracy difference metric. Accumulated and calculated from the unlearning test batches. 151 - **csv_path** (`str`): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'. 152 """ 153 154 eval_task_ids = list(accuracy_difference_metric.keys()) 155 fieldnames = ["average_accuracy_difference"] + [ 156 f"unlearning_test_on_task_{task_id}" for task_id in eval_task_ids 157 ] 158 159 new_line = {} 160 161 # write to the columns and calculate the average accuracy difference over tasks at the same time 162 average_accuracy_difference_over_unlearned_tasks = MeanMetric().to( 163 next(iter(accuracy_difference_metric.values())).device 164 ) 165 for task_id in eval_task_ids: 166 loss_cls = accuracy_difference_metric[task_id].compute().item() 167 new_line[f"unlearning_test_on_task_{task_id}"] = loss_cls 168 average_accuracy_difference_over_unlearned_tasks(loss_cls) 169 new_line["average_accuracy_difference"] = ( 170 average_accuracy_difference_over_unlearned_tasks.compute().item() 171 ) 172 173 # write to the csv file 174 is_first = not os.path.exists(csv_path) 175 if not is_first: 176 with open(csv_path, "r", encoding="utf-8") as file: 177 lines = file.readlines() 178 del lines[0] 179 # write header 180 with open(csv_path, "w", encoding="utf-8") as file: 181 writer = csv.DictWriter(file, fieldnames=fieldnames) 182 writer.writeheader() 183 # write metrics 184 with open(csv_path, "a", encoding="utf-8") as file: 185 if not is_first: 186 file.writelines(lines) # write the previous lines 187 writer = csv.DictWriter(file, fieldnames=fieldnames) 188 writer.writerow(new_line) 189 190 def plot_unlearning_accuracy_difference_from_csv( 191 self, csv_path: str, plot_path: str 192 ) -> None: 193 """Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory. 194 195 **Args:** 196 - **csv_path** (`str`): the path to the CSV file where the `utils.save_accuracy_difference_to_csv()` saved the accuracy difference metric. 197 - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'. 198 """ 199 data = pd.read_csv(csv_path) 200 201 unlearned_task_ids = [ 202 int(col.replace("unlearning_test_on_task_", "")) 203 for col in data.columns 204 if col.startswith("unlearning_test_on_task_") 205 ] 206 num_tasks = len(unlearned_task_ids) 207 num_tests = len(data) 208 209 # plot the accuracy matrix 210 fig, ax = plt.subplots( 211 figsize=(2 * (num_tasks + 1), 2 * (num_tests + 1)) 212 ) # adaptive figure size 213 cax = ax.imshow( 214 data.drop(["average_accuracy_difference"], axis=1), 215 interpolation="nearest", 216 cmap="Greens", 217 vmin=0, 218 vmax=1, 219 ) 220 221 colorbar = fig.colorbar(cax) 222 yticks = colorbar.ax.get_yticks() 223 colorbar.ax.set_yticks(yticks) 224 colorbar.ax.set_yticklabels( 225 [f"{tick:.2f}" for tick in yticks], fontsize=10 + num_tasks 226 ) # adaptive font size 227 228 r = 0 229 for r in range(num_tests): 230 for c in range(num_tasks): 231 j = unlearned_task_ids[c] 232 s = ( 233 f"{data.loc[r, f'unlearning_test_on_task_{j}']:.3f}" 234 if f"unlearning_test_on_task_{j}" in data.columns 235 else "" 236 ) 237 ax.text( 238 c, 239 r, 240 s, 241 ha="center", 242 va="center", 243 color="black", 244 fontsize=10 + num_tasks, # adaptive font size 245 ) 246 247 ax.set_xticks(range(num_tasks)) 248 ax.set_yticks(range(num_tests)) 249 ax.set_xticklabels( 250 unlearned_task_ids, fontsize=10 + num_tasks 251 ) # adaptive font size 252 ax.set_yticklabels( 253 range(1, num_tests + 1), fontsize=10 + num_tests 254 ) # adaptive font size 255 256 # Labeling the axes 257 ax.set_xlabel( 258 "Testing unlearning on task τ", fontsize=10 + num_tasks 259 ) # adaptive font size 260 ax.set_ylabel( 261 "Unlearning test after training task t", fontsize=10 + num_tasks 262 ) # adaptive font size 263 fig.savefig(plot_path) 264 plt.close(fig)
Provides all actions that are related to CUL accuracy difference (AD) metric, which include:
- Defining, initializing and recording AD metric.
- Saving AD metric to files.
- Visualizing AD metric as plots.
The callback is able to produce the following outputs:
- CSV files for AD in each task.
- Coloured plot for AD in each task.
Note that this callback is designed to be used with the CULEvaluation
module, which is a special evaluation module for continual unlearning. It is not a typical test step in the algorithm, but rather a test protocol that evaluates the performance of the model on unlearned tasks.
43 def __init__( 44 self, 45 save_dir: str, 46 accuracy_difference_csv_name: str = "ad.csv", 47 accuracy_difference_plot_name: str | None = None, 48 ) -> None: 49 r""" 50 **Args:** 51 - **save_dir** (`str`): The directory where data and figures of metrics will be saved. Better inside the output folder. 52 - **accuracy_difference_csv_name** (`str`): file name to save test accuracy difference metrics as CSV file. 53 - **accuracy_difference_plot_name** (`str` | `None`): file name to save test accuracy difference metrics as plot. If `None`, no plot will be saved. 54 55 """ 56 super().__init__(save_dir=save_dir) 57 58 # paths 59 self.accuracy_difference_csv_path: str = os.path.join( 60 self.save_dir, accuracy_difference_csv_name 61 ) 62 r"""The path to save the test accuracy difference metrics CSV file.""" 63 if accuracy_difference_plot_name: 64 self.accuracy_difference_plot_path: str = os.path.join( 65 self.save_dir, accuracy_difference_plot_name 66 ) 67 r"""The path to save the test accuracy difference metrics plot file.""" 68 69 # test accumulated metrics 70 self.accuracy_difference: dict[int, MeanMetricBatch] = {} 71 r"""Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics.""" 72 73 # task ID control 74 self.task_id: int 75 r"""Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to `cl_dataset.num_tasks`."""
Args:
- save_dir (
str
): The directory where data and figures of metrics will be saved. Better inside the output folder. - accuracy_difference_csv_name (
str
): file name to save test accuracy difference metrics as CSV file. - accuracy_difference_plot_name (
str
|None
): file name to save test accuracy difference metrics as plot. IfNone
, no plot will be saved.
Accuracy difference (between main and full model) metrics for each seen task. Accumulated and calculated from the test batches. Keys are task IDs and values are the corresponding metrics.
Task ID counter indicating which task is being processed. Self updated during the task loop. Valid from 1 to cl_dataset.num_tasks
.
77 @rank_zero_only 78 def on_test_start( 79 self, 80 trainer: Trainer, 81 pl_module: CULEvaluation, 82 ) -> None: 83 r"""Initialize the metrics for testing each seen task in the beginning of a task's testing.""" 84 85 # get the device to put the metrics on the same device 86 device = pl_module.device 87 88 # initialize test metrics for evaluation tasks 89 self.accuracy_difference = { 90 task_id: MeanMetricBatch().to(device) 91 for task_id in pl_module.ad_eval_task_ids 92 }
Initialize the metrics for testing each seen task in the beginning of a task's testing.
94 @rank_zero_only 95 def on_test_batch_end( 96 self, 97 trainer: Trainer, 98 pl_module: CULEvaluation, 99 outputs: dict[str, Any], 100 batch: Any, 101 batch_idx: int, 102 dataloader_idx: int = 0, 103 ) -> None: 104 r"""Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches. 105 106 **Args:** 107 - **outputs** (`dict[str, Any]`): the outputs of the test step, which is the returns of the `test_step()` method in the `CULEvaluation`. 108 - **batch** (`Any`): the validation data batch. 109 - **dataloader_idx** (`int`): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise a `RuntimeError`. 110 """ 111 112 # get the batch size 113 batch_size = len(batch) 114 115 test_task_id = pl_module.get_test_task_id_from_dataloader_idx(dataloader_idx) 116 117 # get the metrics values of the batch from the outputs 118 acc_diff = outputs["acc_diff"] # accuracy difference 119 120 # update the accumulated metrics in order to calculate the metrics of the epoch 121 self.accuracy_difference[test_task_id].update(acc_diff, batch_size)
Accumulating metrics from test batch. We don't need to log and monitor the metrics of test batches.
Args:
- outputs (
dict[str, Any]
): the outputs of the test step, which is the returns of thetest_step()
method in theCULEvaluation
. - batch (
Any
): the validation data batch. - dataloader_idx (
int
): the task ID of seen tasks to be tested. A default value of 0 is given otherwise the LightningModule will raise aRuntimeError
.
123 @rank_zero_only 124 def on_test_epoch_end( 125 self, 126 trainer: Trainer, 127 pl_module: CULEvaluation, 128 ) -> None: 129 r"""Save and plot test metrics at the end of test.""" 130 131 self.update_unlearning_accuracy_difference_to_csv( 132 accuracy_difference_metric=self.accuracy_difference, 133 csv_path=self.accuracy_difference_csv_path, 134 ) 135 136 if hasattr(self, "accuracy_difference_plot_path"): 137 self.plot_unlearning_accuracy_difference_from_csv( 138 csv_path=self.accuracy_difference_csv_path, 139 plot_path=self.accuracy_difference_plot_path, 140 )
Save and plot test metrics at the end of test.
142 def update_unlearning_accuracy_difference_to_csv( 143 self, 144 accuracy_difference_metric: dict[int, MeanMetricBatch], 145 csv_path: str, 146 ) -> None: 147 r"""Update the unlearning accuracy difference metrics of unlearning tasks to CSV file. 148 149 **Args:** 150 - **accuracy_difference_metric** (`dict[int, MeanMetricBatch]`): the accuracy difference metric. Accumulated and calculated from the unlearning test batches. 151 - **csv_path** (`str`): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'. 152 """ 153 154 eval_task_ids = list(accuracy_difference_metric.keys()) 155 fieldnames = ["average_accuracy_difference"] + [ 156 f"unlearning_test_on_task_{task_id}" for task_id in eval_task_ids 157 ] 158 159 new_line = {} 160 161 # write to the columns and calculate the average accuracy difference over tasks at the same time 162 average_accuracy_difference_over_unlearned_tasks = MeanMetric().to( 163 next(iter(accuracy_difference_metric.values())).device 164 ) 165 for task_id in eval_task_ids: 166 loss_cls = accuracy_difference_metric[task_id].compute().item() 167 new_line[f"unlearning_test_on_task_{task_id}"] = loss_cls 168 average_accuracy_difference_over_unlearned_tasks(loss_cls) 169 new_line["average_accuracy_difference"] = ( 170 average_accuracy_difference_over_unlearned_tasks.compute().item() 171 ) 172 173 # write to the csv file 174 is_first = not os.path.exists(csv_path) 175 if not is_first: 176 with open(csv_path, "r", encoding="utf-8") as file: 177 lines = file.readlines() 178 del lines[0] 179 # write header 180 with open(csv_path, "w", encoding="utf-8") as file: 181 writer = csv.DictWriter(file, fieldnames=fieldnames) 182 writer.writeheader() 183 # write metrics 184 with open(csv_path, "a", encoding="utf-8") as file: 185 if not is_first: 186 file.writelines(lines) # write the previous lines 187 writer = csv.DictWriter(file, fieldnames=fieldnames) 188 writer.writerow(new_line)
Update the unlearning accuracy difference metrics of unlearning tasks to CSV file.
Args:
- accuracy_difference_metric (
dict[int, MeanMetricBatch]
): the accuracy difference metric. Accumulated and calculated from the unlearning test batches. - csv_path (
str
): save the test metric to path. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.csv'.
190 def plot_unlearning_accuracy_difference_from_csv( 191 self, csv_path: str, plot_path: str 192 ) -> None: 193 """Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory. 194 195 **Args:** 196 - **csv_path** (`str`): the path to the CSV file where the `utils.save_accuracy_difference_to_csv()` saved the accuracy difference metric. 197 - **plot_path** (`str`): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'. 198 """ 199 data = pd.read_csv(csv_path) 200 201 unlearned_task_ids = [ 202 int(col.replace("unlearning_test_on_task_", "")) 203 for col in data.columns 204 if col.startswith("unlearning_test_on_task_") 205 ] 206 num_tasks = len(unlearned_task_ids) 207 num_tests = len(data) 208 209 # plot the accuracy matrix 210 fig, ax = plt.subplots( 211 figsize=(2 * (num_tasks + 1), 2 * (num_tests + 1)) 212 ) # adaptive figure size 213 cax = ax.imshow( 214 data.drop(["average_accuracy_difference"], axis=1), 215 interpolation="nearest", 216 cmap="Greens", 217 vmin=0, 218 vmax=1, 219 ) 220 221 colorbar = fig.colorbar(cax) 222 yticks = colorbar.ax.get_yticks() 223 colorbar.ax.set_yticks(yticks) 224 colorbar.ax.set_yticklabels( 225 [f"{tick:.2f}" for tick in yticks], fontsize=10 + num_tasks 226 ) # adaptive font size 227 228 r = 0 229 for r in range(num_tests): 230 for c in range(num_tasks): 231 j = unlearned_task_ids[c] 232 s = ( 233 f"{data.loc[r, f'unlearning_test_on_task_{j}']:.3f}" 234 if f"unlearning_test_on_task_{j}" in data.columns 235 else "" 236 ) 237 ax.text( 238 c, 239 r, 240 s, 241 ha="center", 242 va="center", 243 color="black", 244 fontsize=10 + num_tasks, # adaptive font size 245 ) 246 247 ax.set_xticks(range(num_tasks)) 248 ax.set_yticks(range(num_tests)) 249 ax.set_xticklabels( 250 unlearned_task_ids, fontsize=10 + num_tasks 251 ) # adaptive font size 252 ax.set_yticklabels( 253 range(1, num_tests + 1), fontsize=10 + num_tests 254 ) # adaptive font size 255 256 # Labeling the axes 257 ax.set_xlabel( 258 "Testing unlearning on task τ", fontsize=10 + num_tasks 259 ) # adaptive font size 260 ax.set_ylabel( 261 "Unlearning test after training task t", fontsize=10 + num_tasks 262 ) # adaptive font size 263 fig.savefig(plot_path) 264 plt.close(fig)
Plot the unlearning accuracy difference matrix over different unlearned tasks from saved CSV file and save the plot to the designated directory.
Args:
- csv_path (
str
): the path to the CSV file where theutils.save_accuracy_difference_to_csv()
saved the accuracy difference metric. - plot_path (
str
): the path to save plot. Better same as the output directory of the experiment. E.g. './outputs/expr_name/1970-01-01_00-00-00/results/unlearning_test_after_task_X/distance.png'.