Skip to content

Commit 427f846

Browse files
committed
Add --binary and replay command to sampling profiler CLI
Adds --binary output format and --compression option to run/attach commands. The replay command converts binary profiles to other formats: python -m profiling.sampling replay profile.bin python -m profiling.sampling replay --flamegraph -o out.html profile.bin This enables a record-and-replay workflow: capture in binary format during profiling (faster, smaller files), then convert to visualization formats later without re-profiling.
1 parent 2965eff commit 427f846

File tree

1 file changed

+147
-10
lines changed

1 file changed

+147
-10
lines changed

Lib/profiling/sampling/cli.py

Lines changed: 147 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from .stack_collector import CollapsedStackCollector, FlamegraphCollector
1515
from .heatmap_collector import HeatmapCollector
1616
from .gecko_collector import GeckoCollector
17+
from .binary_collector import BinaryCollector
18+
from .binary_reader import BinaryReader, convert_binary_to_format
1719
from .constants import (
1820
PROFILING_MODE_ALL,
1921
PROFILING_MODE_WALL,
@@ -73,6 +75,7 @@ class CustomFormatter(
7375
"flamegraph": "html",
7476
"gecko": "json",
7577
"heatmap": "html",
78+
"binary": "bin",
7679
}
7780

7881
COLLECTOR_MAP = {
@@ -81,6 +84,7 @@ class CustomFormatter(
8184
"flamegraph": FlamegraphCollector,
8285
"gecko": GeckoCollector,
8386
"heatmap": HeatmapCollector,
87+
"binary": BinaryCollector,
8488
}
8589

8690

@@ -278,7 +282,7 @@ def _add_mode_options(parser):
278282
)
279283

280284

281-
def _add_format_options(parser):
285+
def _add_format_options(parser, include_compression=True):
282286
"""Add output format options to a parser."""
283287
output_group = parser.add_argument_group("Output options")
284288
format_group = output_group.add_mutually_exclusive_group()
@@ -317,8 +321,23 @@ def _add_format_options(parser):
317321
dest="format",
318322
help="Generate interactive HTML heatmap visualization with line-level sample counts",
319323
)
324+
format_group.add_argument(
325+
"--binary",
326+
action="store_const",
327+
const="binary",
328+
dest="format",
329+
help="Generate high-performance binary format (use 'replay' command to convert)",
330+
)
320331
parser.set_defaults(format="pstats")
321332

333+
if include_compression:
334+
output_group.add_argument(
335+
"--compression",
336+
choices=["auto", "zstd", "none"],
337+
default="auto",
338+
help="Compression for binary format: auto (use zstd if available), zstd, none",
339+
)
340+
322341
output_group.add_argument(
323342
"-o",
324343
"--output",
@@ -373,15 +392,18 @@ def _sort_to_mode(sort_choice):
373392
return sort_map.get(sort_choice, SORT_MODE_NSAMPLES)
374393

375394

376-
def _create_collector(format_type, interval, skip_idle, opcodes=False):
395+
def _create_collector(format_type, interval, skip_idle, opcodes=False,
396+
output_file=None, compression='auto'):
377397
"""Create the appropriate collector based on format type.
378398
379399
Args:
380-
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap')
400+
format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary')
381401
interval: Sampling interval in microseconds
382402
skip_idle: Whether to skip idle samples
383403
opcodes: Whether to collect opcode information (only used by gecko format
384404
for creating interval markers in Firefox Profiler)
405+
output_file: Output file path (required for binary format)
406+
compression: Compression type for binary format ('auto', 'zstd', 'none')
385407
386408
Returns:
387409
A collector instance of the appropriate type
@@ -390,6 +412,13 @@ def _create_collector(format_type, interval, skip_idle, opcodes=False):
390412
if collector_class is None:
391413
raise ValueError(f"Unknown format: {format_type}")
392414

415+
# Binary format requires output file and compression
416+
if format_type == "binary":
417+
if output_file is None:
418+
raise ValueError("Binary format requires an output file")
419+
return collector_class(output_file, interval, skip_idle=skip_idle,
420+
compression=compression)
421+
393422
# Gecko format never skips idle (it needs both GIL and CPU data)
394423
# and is the only format that uses opcodes for interval markers
395424
if format_type == "gecko":
@@ -425,7 +454,12 @@ def _handle_output(collector, args, pid, mode):
425454
pid: Process ID (for generating filenames)
426455
mode: Profiling mode used
427456
"""
428-
if args.format == "pstats":
457+
if args.format == "binary":
458+
# Binary format already wrote to file incrementally, just finalize
459+
collector.export(None)
460+
filename = collector.filename
461+
print(f"Binary profile written to {filename} ({collector.total_samples} samples)")
462+
elif args.format == "pstats":
429463
if args.outfile:
430464
collector.export(args.outfile)
431465
else:
@@ -449,14 +483,21 @@ def _validate_args(args, parser):
449483
args: Parsed command-line arguments
450484
parser: ArgumentParser instance for error reporting
451485
"""
486+
# Replay command has minimal validation
487+
if args.command == "replay":
488+
# Can't replay to binary format
489+
if args.format == "binary":
490+
parser.error("Cannot replay to binary format. Use a different output format.")
491+
return
492+
452493
# Check if live mode is available
453494
if hasattr(args, 'live') and args.live and LiveStatsCollector is None:
454495
parser.error(
455496
"Live mode requires the curses module, which is not available."
456497
)
457498

458499
# Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads
459-
if args.async_aware:
500+
if getattr(args, 'async_aware', False):
460501
issues = []
461502
if args.native:
462503
issues.append("--native")
@@ -473,7 +514,7 @@ def _validate_args(args, parser):
473514
)
474515

475516
# --async-mode requires --async-aware
476-
if hasattr(args, 'async_mode') and args.async_mode != "running" and not args.async_aware:
517+
if hasattr(args, 'async_mode') and args.async_mode != "running" and not getattr(args, 'async_aware', False):
477518
parser.error("--async-mode requires --async-aware to be enabled.")
478519

479520
# Live mode is incompatible with format options
@@ -501,15 +542,15 @@ def _validate_args(args, parser):
501542
return
502543

503544
# Validate gecko mode doesn't use non-wall mode
504-
if args.format == "gecko" and args.mode != "wall":
545+
if args.format == "gecko" and getattr(args, 'mode', 'wall') != "wall":
505546
parser.error(
506547
"--mode option is incompatible with --gecko. "
507548
"Gecko format automatically includes both GIL-holding and CPU status analysis."
508549
)
509550

510551
# Validate --opcodes is only used with compatible formats
511552
opcodes_compatible_formats = ("live", "gecko", "flamegraph", "heatmap")
512-
if args.opcodes and args.format not in opcodes_compatible_formats:
553+
if getattr(args, 'opcodes', False) and args.format not in opcodes_compatible_formats:
513554
parser.error(
514555
f"--opcodes is only compatible with {', '.join('--' + f for f in opcodes_compatible_formats)}."
515556
)
@@ -621,6 +662,30 @@ def main():
621662
_add_format_options(attach_parser)
622663
_add_pstats_options(attach_parser)
623664

665+
# === REPLAY COMMAND ===
666+
replay_parser = subparsers.add_parser(
667+
"replay",
668+
help="Replay a binary profile and convert to another format",
669+
formatter_class=CustomFormatter,
670+
description="""Replay a binary profile file and convert to another format
671+
672+
Examples:
673+
# Convert binary to flamegraph
674+
`python -m profiling.sampling replay --flamegraph -o output.html profile.bin`
675+
676+
# Convert binary to pstats and print to stdout
677+
`python -m profiling.sampling replay profile.bin`
678+
679+
# Convert binary to gecko format
680+
`python -m profiling.sampling replay --gecko -o profile.json profile.bin`""",
681+
)
682+
replay_parser.add_argument(
683+
"input_file",
684+
help="Binary profile file to replay",
685+
)
686+
_add_format_options(replay_parser, include_compression=False)
687+
_add_pstats_options(replay_parser)
688+
624689
# Parse arguments
625690
args = parser.parse_args()
626691

@@ -631,6 +696,7 @@ def main():
631696
command_handlers = {
632697
"run": _handle_run,
633698
"attach": _handle_attach,
699+
"replay": _handle_replay,
634700
}
635701

636702
# Execute the appropriate command
@@ -660,8 +726,17 @@ def _handle_attach(args):
660726
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
661727
)
662728

729+
# For binary format, determine output file before creating collector
730+
output_file = None
731+
if args.format == "binary":
732+
output_file = args.outfile or _generate_output_filename(args.format, args.pid)
733+
663734
# Create the appropriate collector
664-
collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
735+
collector = _create_collector(
736+
args.format, args.interval, skip_idle, args.opcodes,
737+
output_file=output_file,
738+
compression=getattr(args, 'compression', 'auto')
739+
)
665740

666741
# Sample the process
667742
collector = sample(
@@ -731,8 +806,17 @@ def _handle_run(args):
731806
mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
732807
)
733808

809+
# For binary format, determine output file before creating collector
810+
output_file = None
811+
if args.format == "binary":
812+
output_file = args.outfile or _generate_output_filename(args.format, process.pid)
813+
734814
# Create the appropriate collector
735-
collector = _create_collector(args.format, args.interval, skip_idle, args.opcodes)
815+
collector = _create_collector(
816+
args.format, args.interval, skip_idle, args.opcodes,
817+
output_file=output_file,
818+
compression=getattr(args, 'compression', 'auto')
819+
)
736820

737821
# Profile the subprocess
738822
try:
@@ -852,5 +936,58 @@ def _handle_live_run(args):
852936
process.wait()
853937

854938

939+
def _handle_replay(args):
940+
"""Handle the 'replay' command - convert binary profile to another format."""
941+
import os
942+
943+
# Check input file exists
944+
if not os.path.exists(args.input_file):
945+
sys.exit(f"Error: Input file not found: {args.input_file}")
946+
947+
# Can't replay to binary format
948+
if args.format == "binary":
949+
sys.exit("Error: Cannot replay to binary format. Use a different output format.")
950+
951+
with BinaryReader(args.input_file) as reader:
952+
info = reader.get_info()
953+
interval = info['sample_interval_us']
954+
955+
print(f"Replaying {info['sample_count']} samples from {args.input_file}")
956+
print(f" Sample interval: {interval} us")
957+
print(f" Compression: {'zstd' if info.get('compression_type', 0) == 1 else 'none'}")
958+
959+
# Create appropriate collector
960+
collector = _create_collector(args.format, interval, skip_idle=False)
961+
962+
# Replay with progress bar
963+
def progress_callback(current, total):
964+
if total > 0:
965+
pct = current / total
966+
bar_width = 40
967+
filled = int(bar_width * pct)
968+
bar = '█' * filled + '░' * (bar_width - filled)
969+
print(f"\r [{bar}] {pct*100:5.1f}% ({current:,}/{total:,})", end="", flush=True)
970+
971+
count = reader.replay_samples(collector, progress_callback)
972+
print() # Newline after progress bar
973+
974+
# Handle output similar to other formats
975+
if args.format == "pstats":
976+
if args.outfile:
977+
collector.export(args.outfile)
978+
else:
979+
# Print to stdout with defaults applied
980+
sort_choice = args.sort if args.sort is not None else "nsamples"
981+
limit = args.limit if args.limit is not None else 15
982+
sort_mode = _sort_to_mode(sort_choice)
983+
collector.print_stats(sort_mode, limit, not args.no_summary, PROFILING_MODE_WALL)
984+
else:
985+
# Export to file
986+
filename = args.outfile or _generate_output_filename(args.format, os.getpid())
987+
collector.export(filename)
988+
989+
print(f"Replayed {count} samples")
990+
991+
855992
if __name__ == "__main__":
856993
main()

0 commit comments

Comments
 (0)