Skip to content

Commit 2965eff

Browse files
committed
Add BinaryReader for sampling profiler replay
Wrapper around the C BinaryReader providing file info access and replay functionality. The replay() method reconstructs samples from the binary file and feeds them to any collector, enabling format conversion without re-profiling. Includes get_info() for metadata access (sample count, thread count, compression type) and get_stats() for decoding statistics.
1 parent 1f7737e commit 2965eff

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
"""Thin Python wrapper around C binary reader for profiling data."""
2+
3+
4+
class BinaryReader:
5+
"""High-performance binary reader using C implementation.
6+
7+
This reader uses memory-mapped I/O (on Unix) for fast replay of
8+
profiling data from binary files.
9+
10+
Use as a context manager:
11+
with BinaryReader('profile.bin') as reader:
12+
info = reader.get_info()
13+
reader.replay_samples(collector, progress_callback)
14+
"""
15+
16+
def __init__(self, filename):
17+
"""Create a new binary reader.
18+
19+
Args:
20+
filename: Path to input binary file
21+
"""
22+
self.filename = filename
23+
self._reader = None
24+
25+
def __enter__(self):
26+
"""Open the binary file for reading."""
27+
import _remote_debugging
28+
self._reader = _remote_debugging.BinaryReader(self.filename)
29+
return self
30+
31+
def __exit__(self, exc_type, exc_val, exc_tb):
32+
"""Close the binary file."""
33+
if self._reader is not None:
34+
self._reader.close()
35+
self._reader = None
36+
return False
37+
38+
def get_info(self):
39+
"""Get metadata about the binary file.
40+
41+
Returns:
42+
dict: File metadata including:
43+
- sample_count: Number of samples in the file
44+
- sample_interval_us: Sampling interval in microseconds
45+
- start_time_us: Start timestamp in microseconds
46+
- string_count: Number of unique strings
47+
- frame_count: Number of unique frames
48+
- compression: Compression type used
49+
"""
50+
if self._reader is None:
51+
raise RuntimeError("Reader not open. Use as context manager.")
52+
return self._reader.get_info()
53+
54+
def replay_samples(self, collector, progress_callback=None):
55+
"""Replay samples from binary file through a collector.
56+
57+
This allows converting binary profiling data to other formats
58+
(e.g., flamegraph, pstats) by replaying through the appropriate
59+
collector.
60+
61+
Args:
62+
collector: A Collector instance with a collect() method
63+
progress_callback: Optional callable(current, total) for progress
64+
65+
Returns:
66+
int: Number of samples replayed
67+
"""
68+
if self._reader is None:
69+
raise RuntimeError("Reader not open. Use as context manager.")
70+
return self._reader.replay(collector, progress_callback)
71+
72+
@property
73+
def sample_count(self):
74+
"""Number of samples in the file."""
75+
if self._reader is None:
76+
raise RuntimeError("Reader not open. Use as context manager.")
77+
return self._reader.get_info()['sample_count']
78+
79+
def get_stats(self):
80+
"""Get reconstruction statistics from replay.
81+
82+
Returns:
83+
dict: Statistics about record types decoded and samples
84+
reconstructed during replay.
85+
"""
86+
if self._reader is None:
87+
raise RuntimeError("Reader not open. Use as context manager.")
88+
return self._reader.get_stats()
89+
90+
91+
def convert_binary_to_format(input_file, output_file, output_format,
92+
sample_interval_usec=None, progress_callback=None):
93+
"""Convert a binary profiling file to another format.
94+
95+
Args:
96+
input_file: Path to input binary file
97+
output_file: Path to output file
98+
output_format: Target format ('flamegraph', 'collapsed', 'pstats', etc.)
99+
sample_interval_usec: Override sample interval (uses file's if None)
100+
progress_callback: Optional callable(current, total) for progress
101+
102+
Returns:
103+
int: Number of samples converted
104+
"""
105+
from .gecko_collector import GeckoCollector
106+
from .stack_collector import FlamegraphCollector, CollapsedStackCollector
107+
from .pstats_collector import PStatsCollector
108+
109+
with BinaryReader(input_file) as reader:
110+
info = reader.get_info()
111+
interval = sample_interval_usec or info['sample_interval_us']
112+
113+
# Create appropriate collector based on format
114+
if output_format == 'flamegraph':
115+
collector = FlamegraphCollector(interval)
116+
elif output_format == 'collapsed':
117+
collector = CollapsedStackCollector(interval)
118+
elif output_format == 'pstats':
119+
collector = PStatsCollector(interval)
120+
elif output_format == 'gecko':
121+
collector = GeckoCollector(interval)
122+
else:
123+
raise ValueError(f"Unknown output format: {output_format}")
124+
125+
# Replay samples through collector
126+
count = reader.replay_samples(collector, progress_callback)
127+
128+
# Export to target format
129+
collector.export(output_file)
130+
131+
return count

0 commit comments

Comments
 (0)