From 10594aa350a6e6fa0755b6003ced569a6ecdd1ef Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Sun, 11 Jan 2026 16:01:33 +0000 Subject: [PATCH 1/7] feat(tensorboard): add TensorBoard logging for AutoTuner sweeps * Introduced `TensorBoardLogger` class for logging metrics during sweeps. * Updated `sweep` function to integrate TensorBoard logging. * Enhanced `consumer` function to log metrics after each parameter run. Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 26 +++++++- .../src/autotuner/tensorboard_logger.py | 65 +++++++++++++++++++ tools/AutoTuner/src/autotuner/utils.py | 37 +++++++++-- 3 files changed, 120 insertions(+), 8 deletions(-) create mode 100644 tools/AutoTuner/src/autotuner/tensorboard_logger.py diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index c5b07dc74c..201c9b6799 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -95,6 +95,7 @@ CONSTRAINTS_SDC, FASTROUTE_TCL, ) +from autotuner.tensorboard_logger import TensorBoardLogger # Name of the final metric METRIC = "metric" @@ -566,6 +567,14 @@ def sweep(): else: repo_dir = os.path.abspath(os.path.join(ORFS_FLOW_DIR, "..")) print(f"[INFO TUN-0012] Log folder {LOCAL_DIR}.") + + tb_log_dir = os.path.join(LOCAL_DIR, args.experiment) + print( + f"[INFO TUN-0034] TensorBoard logging enabled. Run: tensorboard --logdir={tb_log_dir}" + ) + + tb_logger = TensorBoardLogger.remote(log_dir=tb_log_dir) + queue = Queue() parameter_list = list() for name, content in config_dict.items(): @@ -577,14 +586,27 @@ def sweep(): sys.exit(1) parameter_list.append([{name: i} for i in np.arange(*content)]) parameter_list = list(product(*parameter_list)) - for parameter in parameter_list: + for idx, parameter in enumerate(parameter_list): temp = dict() for value in parameter: temp.update(value) - queue.put([args, repo_dir, temp, SDC_ORIGINAL, FR_ORIGINAL, INSTALL_PATH]) + queue.put( + [ + args, + repo_dir, + temp, + SDC_ORIGINAL, + FR_ORIGINAL, + INSTALL_PATH, + idx, + tb_logger, + ] + ) workers = [consumer.remote(queue) for _ in range(args.jobs)] print("[INFO TUN-0009] Waiting for results.") ray.get(workers) + ray.get(tb_logger.close.remote()) + print(f"[INFO TUN-0035] TensorBoard events written to {tb_log_dir}") print("[INFO TUN-0010] Sweep complete.") diff --git a/tools/AutoTuner/src/autotuner/tensorboard_logger.py b/tools/AutoTuner/src/autotuner/tensorboard_logger.py new file mode 100644 index 0000000000..15230a633e --- /dev/null +++ b/tools/AutoTuner/src/autotuner/tensorboard_logger.py @@ -0,0 +1,65 @@ +import logging +import os +from typing import Any, Union + +import ray +from tensorboardX import SummaryWriter + +logger = logging.getLogger(__name__) + + +@ray.remote +class TensorBoardLogger: + """TensorBoard logger for AutoTuner experiments""" + + def __init__(self, log_dir: str): + os.makedirs(log_dir, exist_ok=True) + self.writer = SummaryWriter(log_dir=log_dir) + self.log_dir = log_dir + self.step = 0 + logger.info(f"TensorBoard logs will be written to {log_dir}") + + def log_sweep_metrics( + self, + params: dict[str, Any], + metrics: dict[str, Any], + score: float, + effective_clk_period: Union[float, str], + num_drc: Union[int, str], + die_area: Union[float, str], + ) -> None: + """Log metrics from a single sweep run""" + self.writer.add_scalar("sweep/score", score, self.step) + + if effective_clk_period != "-": + self.writer.add_scalar( + "sweep/effective_clk_period", effective_clk_period, self.step + ) + + if num_drc != "-": + self.writer.add_scalar("sweep/num_drc", num_drc, self.step) + + if die_area != "-": + self.writer.add_scalar("sweep/die_area", die_area, self.step) + + for key, value in metrics.items(): + if isinstance(value, (int, float)): + self.writer.add_scalar(f"metrics/{key}", value, self.step) + + self.writer.add_hparams( + { + k: v if isinstance(v, (int, float, str, bool)) else str(v) + for k, v in params.items() + }, + {"hparam/score": score if score != 999999.0 else 0.0}, + ) + + self.step += 1 + + def close(self) -> None: + """Close the TensorBoard writer and log completion message""" + self.writer.close() + logger.info( + f"Sweep complete. View results with: tensorboard --logdir={self.log_dir}" + ) + logger.info(f"Total runs logged: {self.step}") diff --git a/tools/AutoTuner/src/autotuner/utils.py b/tools/AutoTuner/src/autotuner/utils.py index fadab40325..18da2bd936 100644 --- a/tools/AutoTuner/src/autotuner/utils.py +++ b/tools/AutoTuner/src/autotuner/utils.py @@ -669,9 +669,34 @@ def openroad_distributed( @ray.remote def consumer(queue): """consumer""" - while not queue.empty(): - next_item = queue.get() - name = next_item[1] - print(f"[INFO TUN-0007] Scheduling run for parameter {name}.") - ray.get(openroad_distributed.remote(*next_item)) - print(f"[INFO TUN-0008] Finished run for parameter {name}.") + item = queue.get() + tb_logger = item[7] + + while item: + args, repo_dir, config, sdc, fr, install, idx, _ = item + print(f"[INFO TUN-0007] Scheduling run for parameter {config}.") + metric_file, _ = ray.get( + openroad_distributed.remote(args, repo_dir, config, sdc, fr, install) + ) + print(f"[INFO TUN-0008] Finished run for parameter {config}.") + + metrics = read_metrics(metric_file, args.stop_stage) + effective_clk_period = ( + metrics["clk_period"] - metrics["worst_slack"] + if metrics["worst_slack"] not in ("ERR", "N/A") + else "-" + ) + score = effective_clk_period if effective_clk_period != "-" else 999999.0 + + ray.get( + tb_logger.log_sweep_metrics.remote( + params=config, + metrics=metrics, + score=score, + effective_clk_period=effective_clk_period, + num_drc=metrics.get("num_drc", "-"), + die_area=metrics.get("die_area", "-"), + ) + ) + + item = queue.get() if not queue.empty() else None From b38911eeb68967019df76fdd1c7faa666e5cfdac Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Sun, 11 Jan 2026 16:11:43 +0000 Subject: [PATCH 2/7] remove unused `idx` Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 3 +-- tools/AutoTuner/src/autotuner/utils.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 201c9b6799..7a3f752553 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -586,7 +586,7 @@ def sweep(): sys.exit(1) parameter_list.append([{name: i} for i in np.arange(*content)]) parameter_list = list(product(*parameter_list)) - for idx, parameter in enumerate(parameter_list): + for parameter in parameter_list: temp = dict() for value in parameter: temp.update(value) @@ -598,7 +598,6 @@ def sweep(): SDC_ORIGINAL, FR_ORIGINAL, INSTALL_PATH, - idx, tb_logger, ] ) diff --git a/tools/AutoTuner/src/autotuner/utils.py b/tools/AutoTuner/src/autotuner/utils.py index 18da2bd936..cf9e2a0631 100644 --- a/tools/AutoTuner/src/autotuner/utils.py +++ b/tools/AutoTuner/src/autotuner/utils.py @@ -670,10 +670,10 @@ def openroad_distributed( def consumer(queue): """consumer""" item = queue.get() - tb_logger = item[7] + tb_logger = item[6] while item: - args, repo_dir, config, sdc, fr, install, idx, _ = item + args, repo_dir, config, sdc, fr, install, tb_logger = item print(f"[INFO TUN-0007] Scheduling run for parameter {config}.") metric_file, _ = ray.get( openroad_distributed.remote(args, repo_dir, config, sdc, fr, install) From f1f1920980b896e65fb56a6ff1a88e63b8fa2539 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Wed, 14 Jan 2026 17:33:53 +0000 Subject: [PATCH 3/7] * add `calculate_trial_path` function for log path handling, redirecting sdc files to correct dir Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/utils.py | 41 +++++++++++++++++++++----- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/utils.py b/tools/AutoTuner/src/autotuner/utils.py index cf9e2a0631..64ee0f684d 100644 --- a/tools/AutoTuner/src/autotuner/utils.py +++ b/tools/AutoTuner/src/autotuner/utils.py @@ -287,6 +287,21 @@ def run_command( raise RuntimeError +def calculate_trial_path(args, base_dir, flow_variant): + """ + Calculate the log path and flow variant + """ + flow_variant_with_experiment = f"{args.experiment}/{flow_variant}" + log_path = os.path.abspath( + os.path.join( + base_dir, + f"flow/logs/{args.platform}/{args.design}", + flow_variant_with_experiment, + ) + ) + return log_path, flow_variant_with_experiment + + def openroad( args, base_dir, @@ -297,10 +312,8 @@ def openroad( """ Run OpenROAD-flow-scripts with a given set of parameters. """ - # Make sure path ends in a slash, i.e., is a folder - flow_variant = f"{args.experiment}/{flow_variant}" - log_path = os.path.abspath( - os.path.join(base_dir, f"flow/logs/{args.platform}/{args.design}", flow_variant) + log_path, flow_variant = calculate_trial_path( + args=args, base_dir=base_dir, flow_variant=flow_variant ) report_path = os.path.abspath( os.path.join( @@ -643,6 +656,20 @@ def openroad_distributed( variant=None, ): """Simple wrapper to run openroad distributed with Ray.""" + if variant is None: + variant_parts = [] + for key, value in config.items(): + if key not in ["_SDC_FILE_PATH", "_FR_FILE_PATH"]: + variant_parts.append(f"{key}_{value}") + variant = "_".join(variant_parts) if variant_parts else "" + flow_variant = f"{uuid.uuid4()}-{variant}" if variant else f"{uuid.uuid4()}" + + trial_path, _ = calculate_trial_path( + args=args, base_dir=repo_dir, flow_variant=flow_variant + ) + + os.makedirs(trial_path, exist_ok=True) + config = parse_config( config=config, base_dir=repo_dir, @@ -651,15 +678,15 @@ def openroad_distributed( constraints_sdc=CONSTRAINTS_SDC, fr_original=fr_original, fastroute_tcl=FASTROUTE_TCL, + path=trial_path, ) - if variant is None: - variant = config.replace(" ", "_").replace("=", "_") + t = time.time() metric_file = openroad( args=args, base_dir=repo_dir, parameters=config, - flow_variant=f"{uuid.uuid4()}-{variant}" if variant else f"{uuid.uuid4()}", + flow_variant=flow_variant, install_path=install_path, ) duration = time.time() - t From 0d6e7a3d25b54ca87296e95ce9b8923271e13f73 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Thu, 15 Jan 2026 16:44:16 +0000 Subject: [PATCH 4/7] fix tb logger Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/tensorboard_logger.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/tensorboard_logger.py b/tools/AutoTuner/src/autotuner/tensorboard_logger.py index 15230a633e..aaf9aed48e 100644 --- a/tools/AutoTuner/src/autotuner/tensorboard_logger.py +++ b/tools/AutoTuner/src/autotuner/tensorboard_logger.py @@ -31,15 +31,15 @@ def log_sweep_metrics( """Log metrics from a single sweep run""" self.writer.add_scalar("sweep/score", score, self.step) - if effective_clk_period != "-": + if isinstance(effective_clk_period, (int, float)): self.writer.add_scalar( "sweep/effective_clk_period", effective_clk_period, self.step ) - if num_drc != "-": + if isinstance(num_drc, (int, float)): self.writer.add_scalar("sweep/num_drc", num_drc, self.step) - if die_area != "-": + if isinstance(die_area, (int, float)): self.writer.add_scalar("sweep/die_area", die_area, self.step) for key, value in metrics.items(): From b7c958c20869f736b531c49116c8c885a7570824 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Fri, 16 Jan 2026 15:13:48 +0000 Subject: [PATCH 5/7] implement score calculation module (used for tune/sweep) Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/distributed.py | 15 ++-------- tools/AutoTuner/src/autotuner/utils.py | 29 ++++++++++++++------ 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/tools/AutoTuner/src/autotuner/distributed.py b/tools/AutoTuner/src/autotuner/distributed.py index 7a3f752553..2a264a4c36 100644 --- a/tools/AutoTuner/src/autotuner/distributed.py +++ b/tools/AutoTuner/src/autotuner/distributed.py @@ -92,6 +92,8 @@ read_config, read_metrics, prepare_ray_server, + calculate_score, + ERROR_METRIC, CONSTRAINTS_SDC, FASTROUTE_TCL, ) @@ -99,8 +101,6 @@ # Name of the final metric METRIC = "metric" -# The worst of optimized metric -ERROR_METRIC = 9e99 # Path to the FLOW_HOME directory ORFS_FLOW_DIR = os.path.abspath( os.path.join(os.path.dirname(__file__), "../../../../flow") @@ -173,16 +173,7 @@ def evaluate(self, metrics): It can change in any form to minimize the score (return value). Default evaluation function optimizes effective clock period. """ - error = "ERR" in metrics.values() - not_found = "N/A" in metrics.values() - if error or not_found: - return (ERROR_METRIC, "-", "-", "-") - effective_clk_period = metrics["clk_period"] - metrics["worst_slack"] - num_drc = metrics["num_drc"] - gamma = effective_clk_period / 10 - score = effective_clk_period - score = score * (100 / self.step_) + gamma * num_drc - return (score, effective_clk_period, num_drc, metrics["die_area"]) + return calculate_score(metrics, step=self.step_) def _is_valid_config(self, config): """ diff --git a/tools/AutoTuner/src/autotuner/utils.py b/tools/AutoTuner/src/autotuner/utils.py index 64ee0f684d..7b43e25c83 100644 --- a/tools/AutoTuner/src/autotuner/utils.py +++ b/tools/AutoTuner/src/autotuner/utils.py @@ -69,6 +69,24 @@ # Name of the TCL script run before routing FASTROUTE_TCL = "fastroute.tcl" DATE = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") +# The worst of optimized metric +ERROR_METRIC = 9e99 + + +def calculate_score(metrics, step=1): + """Calculate optimization score from metrics.""" + error = "ERR" in metrics.values() + not_found = "N/A" in metrics.values() + + if error or not_found: + return (ERROR_METRIC, "-", "-", "-") + + effective_clk_period = metrics["clk_period"] - metrics["worst_slack"] + num_drc = metrics["num_drc"] + gamma = effective_clk_period / 10 + score = effective_clk_period * (100 / step) + gamma * num_drc + + return (score, effective_clk_period, num_drc, metrics["die_area"]) def write_sdc(variables, path, sdc_original, constraints_sdc): @@ -708,12 +726,7 @@ def consumer(queue): print(f"[INFO TUN-0008] Finished run for parameter {config}.") metrics = read_metrics(metric_file, args.stop_stage) - effective_clk_period = ( - metrics["clk_period"] - metrics["worst_slack"] - if metrics["worst_slack"] not in ("ERR", "N/A") - else "-" - ) - score = effective_clk_period if effective_clk_period != "-" else 999999.0 + score, effective_clk_period, num_drc, die_area = calculate_score(metrics) ray.get( tb_logger.log_sweep_metrics.remote( @@ -721,8 +734,8 @@ def consumer(queue): metrics=metrics, score=score, effective_clk_period=effective_clk_period, - num_drc=metrics.get("num_drc", "-"), - die_area=metrics.get("die_area", "-"), + num_drc=num_drc, + die_area=die_area, ) ) From 10e92e9e71f17a9ad72e8ffbfe8027a675341965 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Fri, 16 Jan 2026 15:56:11 +0000 Subject: [PATCH 6/7] bugfix: use correct hparam/score value for invalids Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/tensorboard_logger.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/AutoTuner/src/autotuner/tensorboard_logger.py b/tools/AutoTuner/src/autotuner/tensorboard_logger.py index aaf9aed48e..59bdc582a6 100644 --- a/tools/AutoTuner/src/autotuner/tensorboard_logger.py +++ b/tools/AutoTuner/src/autotuner/tensorboard_logger.py @@ -5,6 +5,8 @@ import ray from tensorboardX import SummaryWriter +from autotuner.utils import ERROR_METRIC + logger = logging.getLogger(__name__) @@ -51,7 +53,7 @@ def log_sweep_metrics( k: v if isinstance(v, (int, float, str, bool)) else str(v) for k, v in params.items() }, - {"hparam/score": score if score != 999999.0 else 0.0}, + {"hparam/score": score if score < ERROR_METRIC else 0.0}, ) self.step += 1 From 717073334d50bda35dff4cf63b97366d1361c5d4 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Fri, 16 Jan 2026 17:28:56 +0000 Subject: [PATCH 7/7] * hparam/score -> hparam/metric * metrics: show error scores as 9e99 Signed-off-by: Jack Luar --- tools/AutoTuner/src/autotuner/tensorboard_logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/AutoTuner/src/autotuner/tensorboard_logger.py b/tools/AutoTuner/src/autotuner/tensorboard_logger.py index 59bdc582a6..6e0dbf9200 100644 --- a/tools/AutoTuner/src/autotuner/tensorboard_logger.py +++ b/tools/AutoTuner/src/autotuner/tensorboard_logger.py @@ -53,7 +53,7 @@ def log_sweep_metrics( k: v if isinstance(v, (int, float, str, bool)) else str(v) for k, v in params.items() }, - {"hparam/score": score if score < ERROR_METRIC else 0.0}, + {"hparam/metric": score}, ) self.step += 1