1414from .stack_collector import CollapsedStackCollector , FlamegraphCollector
1515from .heatmap_collector import HeatmapCollector
1616from .gecko_collector import GeckoCollector
17+ from .binary_collector import BinaryCollector
18+ from .binary_reader import BinaryReader , convert_binary_to_format
1719from .constants import (
1820 PROFILING_MODE_ALL ,
1921 PROFILING_MODE_WALL ,
@@ -73,6 +75,7 @@ class CustomFormatter(
7375 "flamegraph" : "html" ,
7476 "gecko" : "json" ,
7577 "heatmap" : "html" ,
78+ "binary" : "bin" ,
7679}
7780
7881COLLECTOR_MAP = {
@@ -81,6 +84,7 @@ class CustomFormatter(
8184 "flamegraph" : FlamegraphCollector ,
8285 "gecko" : GeckoCollector ,
8386 "heatmap" : HeatmapCollector ,
87+ "binary" : BinaryCollector ,
8488}
8589
8690
@@ -278,7 +282,7 @@ def _add_mode_options(parser):
278282 )
279283
280284
281- def _add_format_options (parser ):
285+ def _add_format_options (parser , include_compression = True ):
282286 """Add output format options to a parser."""
283287 output_group = parser .add_argument_group ("Output options" )
284288 format_group = output_group .add_mutually_exclusive_group ()
@@ -317,8 +321,23 @@ def _add_format_options(parser):
317321 dest = "format" ,
318322 help = "Generate interactive HTML heatmap visualization with line-level sample counts" ,
319323 )
324+ format_group .add_argument (
325+ "--binary" ,
326+ action = "store_const" ,
327+ const = "binary" ,
328+ dest = "format" ,
329+ help = "Generate high-performance binary format (use 'replay' command to convert)" ,
330+ )
320331 parser .set_defaults (format = "pstats" )
321332
333+ if include_compression :
334+ output_group .add_argument (
335+ "--compression" ,
336+ choices = ["auto" , "zstd" , "none" ],
337+ default = "auto" ,
338+ help = "Compression for binary format: auto (use zstd if available), zstd, none" ,
339+ )
340+
322341 output_group .add_argument (
323342 "-o" ,
324343 "--output" ,
@@ -373,15 +392,18 @@ def _sort_to_mode(sort_choice):
373392 return sort_map .get (sort_choice , SORT_MODE_NSAMPLES )
374393
375394
376- def _create_collector (format_type , interval , skip_idle , opcodes = False ):
395+ def _create_collector (format_type , interval , skip_idle , opcodes = False ,
396+ output_file = None , compression = 'auto' ):
377397 """Create the appropriate collector based on format type.
378398
379399 Args:
380- format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap')
400+ format_type: The output format ('pstats', 'collapsed', 'flamegraph', 'gecko', 'heatmap', 'binary' )
381401 interval: Sampling interval in microseconds
382402 skip_idle: Whether to skip idle samples
383403 opcodes: Whether to collect opcode information (only used by gecko format
384404 for creating interval markers in Firefox Profiler)
405+ output_file: Output file path (required for binary format)
406+ compression: Compression type for binary format ('auto', 'zstd', 'none')
385407
386408 Returns:
387409 A collector instance of the appropriate type
@@ -390,6 +412,13 @@ def _create_collector(format_type, interval, skip_idle, opcodes=False):
390412 if collector_class is None :
391413 raise ValueError (f"Unknown format: { format_type } " )
392414
415+ # Binary format requires output file and compression
416+ if format_type == "binary" :
417+ if output_file is None :
418+ raise ValueError ("Binary format requires an output file" )
419+ return collector_class (output_file , interval , skip_idle = skip_idle ,
420+ compression = compression )
421+
393422 # Gecko format never skips idle (it needs both GIL and CPU data)
394423 # and is the only format that uses opcodes for interval markers
395424 if format_type == "gecko" :
@@ -425,7 +454,12 @@ def _handle_output(collector, args, pid, mode):
425454 pid: Process ID (for generating filenames)
426455 mode: Profiling mode used
427456 """
428- if args .format == "pstats" :
457+ if args .format == "binary" :
458+ # Binary format already wrote to file incrementally, just finalize
459+ collector .export (None )
460+ filename = collector .filename
461+ print (f"Binary profile written to { filename } ({ collector .total_samples } samples)" )
462+ elif args .format == "pstats" :
429463 if args .outfile :
430464 collector .export (args .outfile )
431465 else :
@@ -449,14 +483,21 @@ def _validate_args(args, parser):
449483 args: Parsed command-line arguments
450484 parser: ArgumentParser instance for error reporting
451485 """
486+ # Replay command has minimal validation
487+ if args .command == "replay" :
488+ # Can't replay to binary format
489+ if args .format == "binary" :
490+ parser .error ("Cannot replay to binary format. Use a different output format." )
491+ return
492+
452493 # Check if live mode is available
453494 if hasattr (args , 'live' ) and args .live and LiveStatsCollector is None :
454495 parser .error (
455496 "Live mode requires the curses module, which is not available."
456497 )
457498
458499 # Async-aware mode is incompatible with --native, --no-gc, --mode, and --all-threads
459- if args . async_aware :
500+ if getattr ( args , ' async_aware' , False ) :
460501 issues = []
461502 if args .native :
462503 issues .append ("--native" )
@@ -473,7 +514,7 @@ def _validate_args(args, parser):
473514 )
474515
475516 # --async-mode requires --async-aware
476- if hasattr (args , 'async_mode' ) and args .async_mode != "running" and not args . async_aware :
517+ if hasattr (args , 'async_mode' ) and args .async_mode != "running" and not getattr ( args , ' async_aware' , False ) :
477518 parser .error ("--async-mode requires --async-aware to be enabled." )
478519
479520 # Live mode is incompatible with format options
@@ -501,15 +542,15 @@ def _validate_args(args, parser):
501542 return
502543
503544 # Validate gecko mode doesn't use non-wall mode
504- if args .format == "gecko" and args . mode != "wall" :
545+ if args .format == "gecko" and getattr ( args , ' mode' , 'wall' ) != "wall" :
505546 parser .error (
506547 "--mode option is incompatible with --gecko. "
507548 "Gecko format automatically includes both GIL-holding and CPU status analysis."
508549 )
509550
510551 # Validate --opcodes is only used with compatible formats
511552 opcodes_compatible_formats = ("live" , "gecko" , "flamegraph" , "heatmap" )
512- if args . opcodes and args .format not in opcodes_compatible_formats :
553+ if getattr ( args , ' opcodes' , False ) and args .format not in opcodes_compatible_formats :
513554 parser .error (
514555 f"--opcodes is only compatible with { ', ' .join ('--' + f for f in opcodes_compatible_formats )} ."
515556 )
@@ -621,6 +662,30 @@ def main():
621662 _add_format_options (attach_parser )
622663 _add_pstats_options (attach_parser )
623664
665+ # === REPLAY COMMAND ===
666+ replay_parser = subparsers .add_parser (
667+ "replay" ,
668+ help = "Replay a binary profile and convert to another format" ,
669+ formatter_class = CustomFormatter ,
670+ description = """Replay a binary profile file and convert to another format
671+
672+ Examples:
673+ # Convert binary to flamegraph
674+ `python -m profiling.sampling replay --flamegraph -o output.html profile.bin`
675+
676+ # Convert binary to pstats and print to stdout
677+ `python -m profiling.sampling replay profile.bin`
678+
679+ # Convert binary to gecko format
680+ `python -m profiling.sampling replay --gecko -o profile.json profile.bin`""" ,
681+ )
682+ replay_parser .add_argument (
683+ "input_file" ,
684+ help = "Binary profile file to replay" ,
685+ )
686+ _add_format_options (replay_parser , include_compression = False )
687+ _add_pstats_options (replay_parser )
688+
624689 # Parse arguments
625690 args = parser .parse_args ()
626691
@@ -631,6 +696,7 @@ def main():
631696 command_handlers = {
632697 "run" : _handle_run ,
633698 "attach" : _handle_attach ,
699+ "replay" : _handle_replay ,
634700 }
635701
636702 # Execute the appropriate command
@@ -660,8 +726,17 @@ def _handle_attach(args):
660726 mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
661727 )
662728
729+ # For binary format, determine output file before creating collector
730+ output_file = None
731+ if args .format == "binary" :
732+ output_file = args .outfile or _generate_output_filename (args .format , args .pid )
733+
663734 # Create the appropriate collector
664- collector = _create_collector (args .format , args .interval , skip_idle , args .opcodes )
735+ collector = _create_collector (
736+ args .format , args .interval , skip_idle , args .opcodes ,
737+ output_file = output_file ,
738+ compression = getattr (args , 'compression' , 'auto' )
739+ )
665740
666741 # Sample the process
667742 collector = sample (
@@ -731,8 +806,17 @@ def _handle_run(args):
731806 mode != PROFILING_MODE_WALL if mode != PROFILING_MODE_ALL else False
732807 )
733808
809+ # For binary format, determine output file before creating collector
810+ output_file = None
811+ if args .format == "binary" :
812+ output_file = args .outfile or _generate_output_filename (args .format , process .pid )
813+
734814 # Create the appropriate collector
735- collector = _create_collector (args .format , args .interval , skip_idle , args .opcodes )
815+ collector = _create_collector (
816+ args .format , args .interval , skip_idle , args .opcodes ,
817+ output_file = output_file ,
818+ compression = getattr (args , 'compression' , 'auto' )
819+ )
736820
737821 # Profile the subprocess
738822 try :
@@ -852,5 +936,58 @@ def _handle_live_run(args):
852936 process .wait ()
853937
854938
939+ def _handle_replay (args ):
940+ """Handle the 'replay' command - convert binary profile to another format."""
941+ import os
942+
943+ # Check input file exists
944+ if not os .path .exists (args .input_file ):
945+ sys .exit (f"Error: Input file not found: { args .input_file } " )
946+
947+ # Can't replay to binary format
948+ if args .format == "binary" :
949+ sys .exit ("Error: Cannot replay to binary format. Use a different output format." )
950+
951+ with BinaryReader (args .input_file ) as reader :
952+ info = reader .get_info ()
953+ interval = info ['sample_interval_us' ]
954+
955+ print (f"Replaying { info ['sample_count' ]} samples from { args .input_file } " )
956+ print (f" Sample interval: { interval } us" )
957+ print (f" Compression: { 'zstd' if info .get ('compression_type' , 0 ) == 1 else 'none' } " )
958+
959+ # Create appropriate collector
960+ collector = _create_collector (args .format , interval , skip_idle = False )
961+
962+ # Replay with progress bar
963+ def progress_callback (current , total ):
964+ if total > 0 :
965+ pct = current / total
966+ bar_width = 40
967+ filled = int (bar_width * pct )
968+ bar = '█' * filled + '░' * (bar_width - filled )
969+ print (f"\r [{ bar } ] { pct * 100 :5.1f} % ({ current :,} /{ total :,} )" , end = "" , flush = True )
970+
971+ count = reader .replay_samples (collector , progress_callback )
972+ print () # Newline after progress bar
973+
974+ # Handle output similar to other formats
975+ if args .format == "pstats" :
976+ if args .outfile :
977+ collector .export (args .outfile )
978+ else :
979+ # Print to stdout with defaults applied
980+ sort_choice = args .sort if args .sort is not None else "nsamples"
981+ limit = args .limit if args .limit is not None else 15
982+ sort_mode = _sort_to_mode (sort_choice )
983+ collector .print_stats (sort_mode , limit , not args .no_summary , PROFILING_MODE_WALL )
984+ else :
985+ # Export to file
986+ filename = args .outfile or _generate_output_filename (args .format , os .getpid ())
987+ collector .export (filename )
988+
989+ print (f"Replayed { count } samples" )
990+
991+
855992if __name__ == "__main__" :
856993 main ()
0 commit comments