Skip to content

Commit 1437629

Browse files
committed
Update collector interface for binary replay support
Adds optional timestamp_us parameter to Collector.collect() method. During live profiling this is None and collectors use their own timing. During binary replay the stored timestamp is passed through, allowing collectors to reconstruct the original timing. Also fixes gecko_collector to use time.monotonic() instead of time.time() for consistency with other collectors.
1 parent 427f846 commit 1437629

File tree

6 files changed

+84
-10
lines changed

6 files changed

+84
-10
lines changed

Lib/profiling/sampling/collector.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,15 @@ def extract_lineno(location):
4444

4545
class Collector(ABC):
4646
@abstractmethod
47-
def collect(self, stack_frames):
48-
"""Collect profiling data from stack frames."""
47+
def collect(self, stack_frames, timestamp_us=None):
48+
"""Collect profiling data from stack frames.
49+
50+
Args:
51+
stack_frames: List of InterpreterInfo objects
52+
timestamp_us: Optional timestamp in microseconds. If provided (from
53+
binary replay), use this instead of current time. If None,
54+
collectors should use time.monotonic() or similar.
55+
"""
4956

5057
def collect_failed_sample(self):
5158
"""Collect data about a failed sample attempt."""

Lib/profiling/sampling/gecko_collector.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False):
6666
self.sample_interval_usec = sample_interval_usec
6767
self.skip_idle = skip_idle
6868
self.opcodes_enabled = opcodes
69-
self.start_time = time.time() * 1000 # milliseconds since epoch
69+
self.start_time = time.monotonic() * 1000 # milliseconds since start
7070

7171
# Global string table (shared across all threads)
7272
self.global_strings = ["(root)"] # Start with root
@@ -103,6 +103,9 @@ def __init__(self, sample_interval_usec, *, skip_idle=False, opcodes=False):
103103
# Opcode state tracking per thread: tid -> (opcode, lineno, col_offset, funcname, filename, start_time)
104104
self.opcode_state = {}
105105

106+
# For binary replay: track base timestamp (first sample's timestamp)
107+
self._replay_base_timestamp_us = None
108+
106109
def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
107110
active_name, inactive_name, category, current_time):
108111
"""Track binary state transitions and emit markers.
@@ -138,9 +141,18 @@ def _track_state_transition(self, tid, condition, active_dict, inactive_dict,
138141
self._add_marker(tid, active_name, active_dict.pop(tid),
139142
current_time, category)
140143

141-
def collect(self, stack_frames):
144+
def collect(self, stack_frames, timestamp_us=None):
142145
"""Collect a sample from stack frames."""
143-
current_time = (time.time() * 1000) - self.start_time
146+
if timestamp_us is not None:
147+
# Use provided timestamp (from binary replay)
148+
# Track first timestamp as base for relative time calculation
149+
if self._replay_base_timestamp_us is None:
150+
self._replay_base_timestamp_us = timestamp_us
151+
# Convert to milliseconds relative to first sample
152+
current_time = (timestamp_us - self._replay_base_timestamp_us) / 1000
153+
else:
154+
# Live sampling - use monotonic clock
155+
current_time = (time.monotonic() * 1000) - self.start_time
144156

145157
# Update interval calculation
146158
if self.sample_count > 0 and self.last_sample_time > 0:

Lib/profiling/sampling/live_collector/collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def collect_failed_sample(self):
348348
self.failed_samples += 1
349349
self.total_samples += 1
350350

351-
def collect(self, stack_frames):
351+
def collect(self, stack_frames, timestamp_us=None):
352352
"""Collect and display profiling data."""
353353
if self.start_time is None:
354354
self.start_time = time.perf_counter()

Lib/profiling/sampling/pstats_collector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def _process_frames(self, frames):
5151

5252
self.callers[callee][caller] += 1
5353

54-
def collect(self, stack_frames):
54+
def collect(self, stack_frames, timestamp_us=None):
5555
if stack_frames and hasattr(stack_frames[0], "awaited_by"):
5656
# Async frame processing
5757
for frames, thread_id, task_id in self._iter_async_frames(stack_frames):

Lib/profiling/sampling/sample.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from .stack_collector import CollapsedStackCollector, FlamegraphCollector
1313
from .heatmap_collector import HeatmapCollector
1414
from .gecko_collector import GeckoCollector
15+
from .binary_collector import BinaryCollector
1516
from .constants import (
1617
PROFILING_MODE_WALL,
1718
PROFILING_MODE_CPU,
@@ -137,6 +138,10 @@ def sample(self, collector, duration_sec=10, *, async_aware=False):
137138
if self.collect_stats:
138139
self._print_unwinder_stats()
139140

141+
# Print binary I/O stats if using binary collector
142+
if isinstance(collector, BinaryCollector):
143+
self._print_binary_stats(collector)
144+
140145
# Pass stats to flamegraph collector if it's the right type
141146
if hasattr(collector, 'set_stats'):
142147
collector.set_stats(self.sample_interval_usec, running_time, sample_rate, error_rate, missed_samples, mode=self.mode)
@@ -278,6 +283,56 @@ def _print_unwinder_stats(self):
278283
if stale_invalidations > 0:
279284
print(f" {ANSIColors.YELLOW}Stale cache invalidations: {stale_invalidations}{ANSIColors.RESET}")
280285

286+
def _print_binary_stats(self, collector):
287+
"""Print binary I/O encoding statistics."""
288+
try:
289+
stats = collector.get_stats()
290+
except (ValueError, RuntimeError):
291+
return # Collector closed or stats unavailable
292+
293+
print(f" {ANSIColors.CYAN}Binary Encoding:{ANSIColors.RESET}")
294+
295+
# Record type counts
296+
repeat_records = stats.get('repeat_records', 0)
297+
repeat_samples = stats.get('repeat_samples', 0)
298+
full_records = stats.get('full_records', 0)
299+
suffix_records = stats.get('suffix_records', 0)
300+
pop_push_records = stats.get('pop_push_records', 0)
301+
total_records = stats.get('total_records', 0)
302+
303+
if total_records > 0:
304+
repeat_pct = repeat_records / total_records * 100
305+
full_pct = full_records / total_records * 100
306+
suffix_pct = suffix_records / total_records * 100
307+
pop_push_pct = pop_push_records / total_records * 100
308+
else:
309+
repeat_pct = full_pct = suffix_pct = pop_push_pct = 0
310+
311+
print(f" Records: {total_records:,}")
312+
print(f" RLE repeat: {repeat_records:,} ({ANSIColors.GREEN}{repeat_pct:.1f}%{ANSIColors.RESET}) [{repeat_samples:,} samples]")
313+
print(f" Full stack: {full_records:,} ({full_pct:.1f}%)")
314+
print(f" Suffix match: {suffix_records:,} ({suffix_pct:.1f}%)")
315+
print(f" Pop-push: {pop_push_records:,} ({pop_push_pct:.1f}%)")
316+
317+
# Frame efficiency
318+
frames_written = stats.get('total_frames_written', 0)
319+
frames_saved = stats.get('frames_saved', 0)
320+
compression_pct = stats.get('frame_compression_pct', 0)
321+
322+
print(f" {ANSIColors.CYAN}Frame Efficiency:{ANSIColors.RESET}")
323+
print(f" Frames written: {frames_written:,}")
324+
print(f" Frames saved: {frames_saved:,} ({ANSIColors.GREEN}{compression_pct:.1f}%{ANSIColors.RESET})")
325+
326+
# Bytes written
327+
bytes_written = stats.get('bytes_written', 0)
328+
if bytes_written >= 1024 * 1024:
329+
bytes_str = f"{bytes_written / (1024 * 1024):.1f} MB"
330+
elif bytes_written >= 1024:
331+
bytes_str = f"{bytes_written / 1024:.1f} KB"
332+
else:
333+
bytes_str = f"{bytes_written} B"
334+
print(f" Bytes (pre-zstd): {bytes_str}")
335+
281336

282337
def sample(
283338
pid,

Lib/profiling/sampling/stack_collector.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def __init__(self, sample_interval_usec, *, skip_idle=False):
1818
self.sample_interval_usec = sample_interval_usec
1919
self.skip_idle = skip_idle
2020

21-
def collect(self, stack_frames, skip_idle=False):
21+
def collect(self, stack_frames, timestamp_us=None, skip_idle=False):
2222
if stack_frames and hasattr(stack_frames[0], "awaited_by"):
2323
# Async-aware mode: process async task frames
2424
for frames, thread_id, task_id in self._iter_async_frames(stack_frames):
@@ -96,7 +96,7 @@ def __init__(self, *args, **kwargs):
9696
# Per-thread statistics
9797
self.per_thread_stats = {} # {thread_id: {has_gil, on_cpu, gil_requested, unknown, has_exception, total, gc_samples}}
9898

99-
def collect(self, stack_frames, skip_idle=False):
99+
def collect(self, stack_frames, timestamp_us=None, skip_idle=False):
100100
"""Override to track thread status statistics before processing frames."""
101101
# Increment sample count once per sample
102102
self._sample_count += 1
@@ -128,7 +128,7 @@ def collect(self, stack_frames, skip_idle=False):
128128
self.per_thread_stats[thread_id][key] += value
129129

130130
# Call parent collect to process frames
131-
super().collect(stack_frames, skip_idle=skip_idle)
131+
super().collect(stack_frames, timestamp_us=timestamp_us, skip_idle=skip_idle)
132132

133133
def set_stats(self, sample_interval_usec, duration_sec, sample_rate,
134134
error_rate=None, missed_samples=None, mode=None):

0 commit comments

Comments
 (0)