Skip to content

Commit 1f7737e

Browse files
committed
Add BinaryCollector for sampling profiler
Thin wrapper around the C BinaryWriter. Implements the Collector interface so it can be used interchangeably with other collectors like FlamegraphCollector or GeckoCollector. Compression is configurable: 'auto' uses zstd when available, 'zstd' requires it, 'none' disables compression. The collector passes samples directly to C for encoding without building Python data structures.
1 parent 18287f4 commit 1f7737e

File tree

1 file changed

+123
-0
lines changed

1 file changed

+123
-0
lines changed
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
"""Thin Python wrapper around C binary writer for profiling data."""
2+
3+
import time
4+
5+
from .collector import Collector
6+
7+
# Compression type constants (must match binary_io.h)
8+
COMPRESSION_NONE = 0
9+
COMPRESSION_ZSTD = 1
10+
11+
12+
def _resolve_compression(compression):
13+
"""Resolve compression type from string or int.
14+
15+
Args:
16+
compression: 'auto', 'zstd', 'none', or int (0/1)
17+
18+
Returns:
19+
int: Compression type constant
20+
"""
21+
if isinstance(compression, int):
22+
return compression
23+
24+
compression = compression.lower()
25+
if compression == 'none':
26+
return COMPRESSION_NONE
27+
elif compression == 'zstd':
28+
return COMPRESSION_ZSTD
29+
elif compression == 'auto':
30+
# Auto: use zstd if available, otherwise none
31+
import _remote_debugging
32+
if _remote_debugging.zstd_available():
33+
return COMPRESSION_ZSTD
34+
return COMPRESSION_NONE
35+
else:
36+
raise ValueError(f"Unknown compression type: {compression}")
37+
38+
39+
class BinaryCollector(Collector):
40+
"""High-performance binary collector using C implementation.
41+
42+
This collector writes profiling data directly to a binary file format
43+
with optional zstd compression. All I/O is performed in C for maximum
44+
throughput.
45+
46+
The binary format uses string/frame deduplication and varint encoding
47+
for efficient storage.
48+
"""
49+
50+
def __init__(self, filename, sample_interval_usec, *, skip_idle=False,
51+
compression='auto'):
52+
"""Create a new binary collector.
53+
54+
Args:
55+
filename: Path to output binary file
56+
sample_interval_usec: Sampling interval in microseconds
57+
skip_idle: If True, skip idle threads (not used in binary format)
58+
compression: 'auto', 'zstd', 'none', or int (0=none, 1=zstd)
59+
"""
60+
import _remote_debugging
61+
62+
self.filename = filename
63+
self.sample_interval_usec = sample_interval_usec
64+
self.skip_idle = skip_idle
65+
66+
compression_type = _resolve_compression(compression)
67+
start_time_us = int(time.monotonic() * 1_000_000)
68+
self._writer = _remote_debugging.BinaryWriter(
69+
filename, sample_interval_usec, start_time_us, compression=compression_type
70+
)
71+
72+
def collect(self, stack_frames, timestamp_us=None):
73+
"""Collect profiling data from stack frames.
74+
75+
This passes stack_frames directly to the C writer which handles
76+
all encoding and buffering.
77+
78+
Args:
79+
stack_frames: List of InterpreterInfo objects from _remote_debugging
80+
timestamp_us: Optional timestamp in microseconds. If not provided,
81+
uses time.monotonic() to generate one.
82+
"""
83+
if timestamp_us is None:
84+
timestamp_us = int(time.monotonic() * 1_000_000)
85+
self._writer.write_sample(stack_frames, timestamp_us)
86+
87+
def collect_failed_sample(self):
88+
"""Record a failed sample attempt (no-op for binary format)."""
89+
pass
90+
91+
def export(self, filename=None):
92+
"""Finalize and close the binary file.
93+
94+
Args:
95+
filename: Ignored (binary files are written incrementally)
96+
"""
97+
self._writer.finalize()
98+
99+
@property
100+
def total_samples(self):
101+
"""Total number of samples written."""
102+
return self._writer.total_samples
103+
104+
def get_stats(self):
105+
"""Get encoding statistics.
106+
107+
Returns:
108+
Dict with encoding statistics including repeat/full/suffix/pop-push
109+
record counts, frames written/saved, and compression ratio.
110+
"""
111+
return self._writer.get_stats()
112+
113+
def __enter__(self):
114+
"""Context manager entry."""
115+
return self
116+
117+
def __exit__(self, exc_type, exc_val, exc_tb):
118+
"""Context manager exit - finalize unless there was an error."""
119+
if exc_type is None:
120+
self._writer.finalize()
121+
else:
122+
self._writer.close()
123+
return False

0 commit comments

Comments
 (0)