From d51e4c21348b6d6a949256e57b6c482593fad561 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 11:29:14 +0000
Subject: [PATCH 01/41] refactor(screentracker): internalize initial prompt
 handling in PTYConversation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changes:
- Add InitialPrompt []MessagePart and OnSnapshot callback to PTYConversationConfig
- Remove initialPrompt string parameter from NewPTY function (now reads from config)
- Add initialPromptReady chan struct{} field for signaling readiness
- Add sendLocked() helper (same as Send but without lock)
- Add messagesLocked() helper that returns a copy of messages
- Update statusLocked() to return ConversationStatusChanging while initial prompt
  is pending, fixing the status flip-flop issue (changing → stable → changing)
- Update Start() to use select with:
  - Ticker for snapshots (calling OnSnapshot callback if set)
  - initialPromptReady channel to send initial prompt when ready

This consolidates initial prompt logic inside PTYConversation.Start() instead
of requiring the server to manipulate internal fields directly. The server.go
changes to use this new API will be done in a separate commit.
---
 lib/screentracker/pty_conversation.go | 71 ++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 8 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 38d8e40..cd49436 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -66,7 +66,11 @@ type PTYConversationConfig struct {
 	ReadyForInitialPrompt func(message string) bool
 	// FormatToolCall removes the coder report_task tool call from the agent message and also returns the array of removed tool calls
 	FormatToolCall func(message string) (string, []string)
-	Logger         *slog.Logger
+	// InitialPrompt is the initial prompt to send to the agent once ready
+	InitialPrompt []MessagePart
+	// OnSnapshot is called after each snapshot with current status, messages, and screen content
+	OnSnapshot func(status ConversationStatus, messages []ConversationMessage, screen string)
+	Logger     *slog.Logger
 }
 
 func (cfg PTYConversationConfig) getStableSnapshotsThreshold() int {
@@ -96,13 +100,15 @@ type PTYConversation struct {
 	InitialPromptSent bool
 	// ReadyForInitialPrompt keeps track if the agent is ready to accept the initial prompt
 	ReadyForInitialPrompt bool
+	// initialPromptReady is closed when the agent is ready to receive the initial prompt
+	initialPromptReady chan struct{}
 	// toolCallMessageSet keeps track of the tool calls that have been detected & logged in the current agent message
 	toolCallMessageSet map[string]bool
 }
 
 var _ Conversation = &PTYConversation{}
 
-func NewPTY(ctx context.Context, cfg PTYConversationConfig, initialPrompt string) *PTYConversation {
+func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 	if cfg.Clock == nil {
 		cfg.Clock = quartz.NewReal()
 	}
@@ -118,10 +124,13 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig, initialPrompt string
 				Time:    cfg.Clock.Now(),
 			},
 		},
-		InitialPrompt:      initialPrompt,
-		InitialPromptSent:  len(initialPrompt) == 0,
+		InitialPromptSent:  len(cfg.InitialPrompt) == 0,
 		toolCallMessageSet: make(map[string]bool),
 	}
+	// Initialize the channel only if we have an initial prompt to send
+	if len(cfg.InitialPrompt) > 0 {
+		c.initialPromptReady = make(chan struct{})
+	}
 	return c
 }
 
@@ -129,6 +138,14 @@ func (c *PTYConversation) Start(ctx context.Context) {
 	go func() {
 		ticker := c.cfg.Clock.NewTicker(c.cfg.SnapshotInterval)
 		defer ticker.Stop()
+
+		// Create a nil channel if no initial prompt - select will never receive from it
+		initialPromptReady := c.initialPromptReady
+		if initialPromptReady == nil {
+			initialPromptReady = make(chan struct{})
+			// Don't close it - we want it to block forever
+		}
+
 		for {
 			select {
 			case <-ctx.Done():
@@ -144,7 +161,27 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				c.lock.Lock()
 				screen := c.cfg.AgentIO.ReadScreen()
 				c.snapshotLocked(screen)
+				status := c.statusLocked()
+				messages := c.messagesLocked()
+				c.lock.Unlock()
+
+				// Call snapshot callback if configured
+				if c.cfg.OnSnapshot != nil {
+					c.cfg.OnSnapshot(status, messages, screen)
+				}
+			case <-initialPromptReady:
+				// Agent is ready for initial prompt - send it
+				c.lock.Lock()
+				if !c.InitialPromptSent && len(c.cfg.InitialPrompt) > 0 {
+					if err := c.sendLocked(c.cfg.InitialPrompt...); err != nil {
+						c.cfg.Logger.Error("failed to send initial prompt", "error", err)
+					} else {
+						c.InitialPromptSent = true
+					}
+				}
 				c.lock.Unlock()
+				// Nil out to prevent this case from triggering again
+				initialPromptReady = nil
 			}
 		}
 	}()
@@ -222,6 +259,11 @@ func (c *PTYConversation) Send(messageParts ...MessagePart) error {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
+	return c.sendLocked(messageParts...)
+}
+
+// sendLocked sends a message to the agent. Caller MUST hold c.lock.
+func (c *PTYConversation) sendLocked(messageParts ...MessagePart) error {
 	if !c.cfg.SkipSendMessageStatusCheck && c.statusLocked() != ConversationStatusStable {
 		return ErrMessageValidationChanging
 	}
@@ -356,11 +398,19 @@ func (c *PTYConversation) statusLocked() ConversationStatus {
 		}
 	}
 
-	if !c.InitialPromptSent && !c.ReadyForInitialPrompt {
-		if len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
-			c.ReadyForInitialPrompt = true
-			return ConversationStatusStable
+	// Handle initial prompt readiness: report "changing" until the prompt is sent
+	// to avoid the status flipping "changing" → "stable" → "changing"
+	if !c.InitialPromptSent {
+		if !c.ReadyForInitialPrompt {
+			if len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt != nil && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
+				c.ReadyForInitialPrompt = true
+				// Signal that we're ready - close the channel if it exists
+				if c.initialPromptReady != nil {
+					close(c.initialPromptReady)
+				}
+			}
 		}
+		// Keep returning "changing" until initial prompt is actually sent
 		return ConversationStatusChanging
 	}
 
@@ -371,6 +421,11 @@ func (c *PTYConversation) Messages() []ConversationMessage {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
+	return c.messagesLocked()
+}
+
+// messagesLocked returns a copy of messages. Caller MUST hold c.lock.
+func (c *PTYConversation) messagesLocked() []ConversationMessage {
 	result := make([]ConversationMessage, len(c.messages))
 	copy(result, c.messages)
 	return result

From 241671a0c0f2cf9636d6ad7dfbf57b81c5cd272f Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 11:31:47 +0000
Subject: [PATCH 02/41] refactor(server): use PTYConversation's internal
 snapshot loop and InitialPrompt handling

- Update NewServer to format InitialPrompt into []MessagePart via FormatMessage
- Pass InitialPrompt and OnSnapshot callback in PTYConversationConfig
- OnSnapshot callback emits status/messages/screen changes to EventEmitter
- Remove initialPrompt string parameter from NewPTY call (now in config)
- Simplify StartSnapshotLoop to just call s.conversation.Start(ctx)
- Remove redundant goroutine, ticker, and initial prompt send logic

The snapshot loop and initial prompt handling are now internalized in
PTYConversation.Start(), which calls the OnSnapshot callback after each
snapshot.
---
 lib/httpapi/server.go | 48 +++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 31 deletions(-)

diff --git a/lib/httpapi/server.go b/lib/httpapi/server.go
index cb13a29..def8d11 100644
--- a/lib/httpapi/server.go
+++ b/lib/httpapi/server.go
@@ -244,6 +244,15 @@ func NewServer(ctx context.Context, config ServerConfig) (*Server, error) {
 		return mf.FormatToolCall(config.AgentType, message)
 	}
 
+	emitter := NewEventEmitter(1024)
+	agentType := config.AgentType
+
+	// Format initial prompt into message parts if provided
+	var initialPrompt []st.MessagePart
+	if config.InitialPrompt != "" {
+		initialPrompt = FormatMessage(config.AgentType, config.InitialPrompt)
+	}
+
 	conversation := st.NewPTY(ctx, st.PTYConversationConfig{
 		AgentType:             config.AgentType,
 		AgentIO:               config.Process,
@@ -253,9 +262,14 @@ func NewServer(ctx context.Context, config ServerConfig) (*Server, error) {
 		FormatMessage:         formatMessage,
 		ReadyForInitialPrompt: isAgentReadyForInitialPrompt,
 		FormatToolCall:        formatToolCall,
-		Logger:                logger,
-	}, config.InitialPrompt)
-	emitter := NewEventEmitter(1024)
+		InitialPrompt:         initialPrompt,
+		OnSnapshot: func(status st.ConversationStatus, messages []st.ConversationMessage, screen string) {
+			emitter.UpdateStatusAndEmitChanges(status, agentType)
+			emitter.UpdateMessagesAndEmitChanges(messages)
+			emitter.UpdateScreenAndEmitChanges(screen)
+		},
+		Logger: logger,
+	})
 
 	// Create temporary directory for uploads
 	tempDir, err := os.MkdirTemp("", "agentapi-uploads-")
@@ -338,34 +352,6 @@ func sseMiddleware(ctx huma.Context, next func(huma.Context)) {
 
 func (s *Server) StartSnapshotLoop(ctx context.Context) {
 	s.conversation.Start(ctx)
-	go func() {
-		ticker := s.clock.NewTicker(snapshotInterval)
-		defer ticker.Stop()
-		for {
-			currentStatus := s.conversation.Status()
-
-			// Send initial prompt when agent becomes stable for the first time
-			if !s.conversation.InitialPromptSent && convertStatus(currentStatus) == AgentStatusStable {
-				if err := s.conversation.Send(FormatMessage(s.agentType, s.conversation.InitialPrompt)...); err != nil {
-					s.logger.Error("Failed to send initial prompt", "error", err)
-				} else {
-					s.conversation.InitialPromptSent = true
-					s.conversation.ReadyForInitialPrompt = false
-					currentStatus = st.ConversationStatusChanging
-					s.logger.Info("Initial prompt sent successfully")
-				}
-			}
-			s.emitter.UpdateStatusAndEmitChanges(currentStatus, s.agentType)
-			s.emitter.UpdateMessagesAndEmitChanges(s.conversation.Messages())
-			s.emitter.UpdateScreenAndEmitChanges(s.conversation.Text())
-
-			select {
-			case <-ctx.Done():
-				return
-			case <-ticker.C:
-			}
-		}
-	}()
 }
 
 // registerRoutes sets up all API endpoints

From c80b288db33ae256830116158f9baf726082ab55 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 11:38:46 +0000
Subject: [PATCH 03/41] Update pty_conversation tests for new NewPTY API and
 status behavior

- Update all NewPTY calls to use new signature (config only, no initialPrompt param)
- For tests needing initial prompt, use InitialPrompt config field with []MessagePart
- Update tests to expect status 'changing' until InitialPromptSent is true
  (new behavior prevents status flip 'changing' -> 'stable' -> 'changing')
- Remove direct manipulation of internal fields where possible, use Status() API
- Keep minimal internal field access (InitialPromptSent) where needed for testing
  post-send behavior without running the Start() goroutine
---
 lib/screentracker/pty_conversation_test.go | 92 +++++++++-------------
 1 file changed, 38 insertions(+), 54 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index e290322..4c78e4b 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -40,7 +40,7 @@ func statusTest(t *testing.T, params statusTestParams) {
 		if params.cfg.Clock == nil {
 			params.cfg.Clock = quartz.NewReal()
 		}
-		c := st.NewPTY(ctx, params.cfg, "")
+		c := st.NewPTY(ctx, params.cfg)
 		assert.Equal(t, st.ConversationStatusInitializing, c.Status())
 
 		for i, step := range params.steps {
@@ -147,7 +147,7 @@ func TestMessages(t *testing.T) {
 		for _, opt := range opts {
 			opt(&cfg)
 		}
-		return st.NewPTY(context.Background(), cfg, "")
+		return st.NewPTY(context.Background(), cfg)
 	}
 
 	t.Run("messages are copied", func(t *testing.T) {
@@ -361,19 +361,18 @@ func TestInitialPromptReadiness(t *testing.T) {
 			ReadyForInitialPrompt: func(message string) bool {
 				return message == "ready"
 			},
+			InitialPrompt: []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
 		}
-		c := st.NewPTY(context.Background(), cfg, "initial prompt here")
+		c := st.NewPTY(context.Background(), cfg)
 
 		// Fill buffer with stable snapshots, but agent is not ready
 		c.Snapshot("loading...")
 
 		// Even though screen is stable, status should be changing because agent is not ready
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
-		assert.False(t, c.ReadyForInitialPrompt)
-		assert.False(t, c.InitialPromptSent)
 	})
 
-	t.Run("agent becomes ready - status changes to stable", func(t *testing.T) {
+	t.Run("agent becomes ready - status stays changing until initial prompt sent", func(t *testing.T) {
 		mClock := quartz.NewMock(t)
 		mClock.Set(now)
 		cfg := st.PTYConversationConfig{
@@ -384,21 +383,21 @@ func TestInitialPromptReadiness(t *testing.T) {
 			ReadyForInitialPrompt: func(message string) bool {
 				return message == "ready"
 			},
+			InitialPrompt: []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
 		}
-		c := st.NewPTY(context.Background(), cfg, "initial prompt here")
+		c := st.NewPTY(context.Background(), cfg)
 
 		// Agent not ready initially
 		c.Snapshot("loading...")
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
-		// Agent becomes ready
+		// Agent becomes ready, but status stays "changing" until initial prompt is sent
+		// This is the new behavior - we don't flip to "stable" then back to "changing"
 		c.Snapshot("ready")
-		assert.Equal(t, st.ConversationStatusStable, c.Status())
-		assert.True(t, c.ReadyForInitialPrompt)
-		assert.False(t, c.InitialPromptSent)
+		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 	})
 
-	t.Run("ready for initial prompt lifecycle: false -> true -> false", func(t *testing.T) {
+	t.Run("initial prompt lifecycle - status stays changing until sent", func(t *testing.T) {
 		mClock := quartz.NewMock(t)
 		mClock.Set(now)
 		agent := &testAgent{screen: "loading..."}
@@ -410,35 +409,21 @@ func TestInitialPromptReadiness(t *testing.T) {
 			ReadyForInitialPrompt: func(message string) bool {
 				return message == "ready"
 			},
+			InitialPrompt:              []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
 			SkipWritingMessage:         true,
 			SkipSendMessageStatusCheck: true,
 		}
-		c := st.NewPTY(context.Background(), cfg, "initial prompt here")
+		c := st.NewPTY(context.Background(), cfg)
 
-		// Initial state: ReadyForInitialPrompt should be false
+		// Initial state: status should be changing while waiting for readiness
 		c.Snapshot("loading...")
-		assert.False(t, c.ReadyForInitialPrompt, "should start as false")
-		assert.False(t, c.InitialPromptSent)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
-		// Agent becomes ready: ReadyForInitialPrompt should become true
+		// Agent becomes ready: status still "changing" until initial prompt is actually sent
+		// This prevents the status from flipping "changing" → "stable" → "changing"
 		agent.screen = "ready"
 		c.Snapshot("ready")
-		assert.Equal(t, st.ConversationStatusStable, c.Status())
-		assert.True(t, c.ReadyForInitialPrompt, "should become true when ready")
-		assert.False(t, c.InitialPromptSent)
-
-		// Send the initial prompt
-		assert.NoError(t, c.Send(st.MessagePartText{Content: "initial prompt here"}))
-
-		// After sending initial prompt: ReadyForInitialPrompt should be set back to false
-		// (simulating what happens in the actual server code)
-		c.InitialPromptSent = true
-		c.ReadyForInitialPrompt = false
-
-		// Verify final state
-		assert.False(t, c.ReadyForInitialPrompt, "should be false after initial prompt sent")
-		assert.True(t, c.InitialPromptSent)
+		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 	})
 
 	t.Run("no initial prompt - normal status logic applies", func(t *testing.T) {
@@ -452,56 +437,55 @@ func TestInitialPromptReadiness(t *testing.T) {
 			ReadyForInitialPrompt: func(message string) bool {
 				return false // Agent never ready
 			},
+			// No InitialPrompt set - means no need to wait for readiness
 		}
-		// Empty initial prompt means no need to wait for readiness
-		c := st.NewPTY(context.Background(), cfg, "")
+		c := st.NewPTY(context.Background(), cfg)
 
 		c.Snapshot("loading...")
 
 		// Status should be stable because no initial prompt to wait for
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
-		assert.False(t, c.ReadyForInitialPrompt)
-		assert.True(t, c.InitialPromptSent) // Set to true when initial prompt is empty
 	})
 
-	t.Run("initial prompt sent - normal status logic applies", func(t *testing.T) {
+	t.Run("status after initial prompt sent - normal status logic applies", func(t *testing.T) {
 		mClock := quartz.NewMock(t)
 		mClock.Set(now)
 		agent := &testAgent{screen: "ready"}
 		cfg := st.PTYConversationConfig{
 			Clock:                 mClock,
 			SnapshotInterval:      1 * time.Second,
-			ScreenStabilityLength: 0,
+			ScreenStabilityLength: 2 * time.Second, // threshold = 3
 			AgentIO:               agent,
 			ReadyForInitialPrompt: func(message string) bool {
 				return message == "ready"
 			},
+			InitialPrompt:              []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
 			SkipWritingMessage:         true,
 			SkipSendMessageStatusCheck: true,
 		}
-		c := st.NewPTY(context.Background(), cfg, "initial prompt here")
+		c := st.NewPTY(context.Background(), cfg)
 
-		// First, agent becomes ready
+		// Fill buffer to reach stability with "ready" screen
+		// But since initial prompt not sent, status stays "changing"
 		c.Snapshot("ready")
-		assert.Equal(t, st.ConversationStatusStable, c.Status())
-		assert.True(t, c.ReadyForInitialPrompt)
-		assert.False(t, c.InitialPromptSent)
-
-		// Send the initial prompt
-		agent.screen = "processing..."
-		assert.NoError(t, c.Send(st.MessagePartText{Content: "initial prompt here"}))
+		c.Snapshot("ready")
+		c.Snapshot("ready")
+		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
-		// Mark initial prompt as sent (simulating what the server does)
+		// Simulate initial prompt being sent (this is normally done by Start() goroutine)
 		c.InitialPromptSent = true
-		c.ReadyForInitialPrompt = false
 
-		// Now test that status logic works normally after initial prompt is sent
+		// Now that initial prompt is marked as sent, and screen is stable, status becomes stable
+		assert.Equal(t, st.ConversationStatusStable, c.Status())
+
+		// After screen changes, status becomes changing
+		agent.screen = "processing..."
 		c.Snapshot("processing...")
+		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
-		// Status should be stable because initial prompt was already sent
-		// and the readiness check is bypassed
+		// After screen is stable again (3 identical snapshots), status becomes stable
+		c.Snapshot("processing...")
+		c.Snapshot("processing...")
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
-		assert.False(t, c.ReadyForInitialPrompt)
-		assert.True(t, c.InitialPromptSent)
 	})
 }

From 35fed243bd77f5c2e17f8e92201ea0697e1c2f9b Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 11:43:05 +0000
Subject: [PATCH 04/41] fix: race condition in initial prompt sending

sendLocked() was failing with ErrMessageValidationChanging because
statusLocked() returns ConversationStatusChanging when InitialPromptSent
is false. This is a chicken-and-egg problem: we need to send the initial
prompt before we can set InitialPromptSent=true.

Solution: Add skipStatusCheck parameter to sendLocked() to bypass the
status check for the initial prompt case. The Start() goroutine passes
true to skip the check, while external Send() calls pass false to
preserve the existing validation behavior.
---
 lib/screentracker/pty_conversation.go | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index cd49436..a1c9df9 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -173,7 +173,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				// Agent is ready for initial prompt - send it
 				c.lock.Lock()
 				if !c.InitialPromptSent && len(c.cfg.InitialPrompt) > 0 {
-					if err := c.sendLocked(c.cfg.InitialPrompt...); err != nil {
+					if err := c.sendLocked(true, c.cfg.InitialPrompt...); err != nil {
 						c.cfg.Logger.Error("failed to send initial prompt", "error", err)
 					} else {
 						c.InitialPromptSent = true
@@ -259,12 +259,14 @@ func (c *PTYConversation) Send(messageParts ...MessagePart) error {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
-	return c.sendLocked(messageParts...)
+	return c.sendLocked(false, messageParts...)
 }
 
 // sendLocked sends a message to the agent. Caller MUST hold c.lock.
-func (c *PTYConversation) sendLocked(messageParts ...MessagePart) error {
-	if !c.cfg.SkipSendMessageStatusCheck && c.statusLocked() != ConversationStatusStable {
+// skipStatusCheck bypasses the status check - used for initial prompt sending
+// where status is "changing" until the prompt is sent.
+func (c *PTYConversation) sendLocked(skipStatusCheck bool, messageParts ...MessagePart) error {
+	if !skipStatusCheck && !c.cfg.SkipSendMessageStatusCheck && c.statusLocked() != ConversationStatusStable {
 		return ErrMessageValidationChanging
 	}
 

From 2c907a53fcf3fb898abd5199aad6c05d3e11c65b Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 12:26:56 +0000
Subject: [PATCH 05/41] httpapi: replace StartSnapshotLoop with Conversation
 accessor

Remove the StartSnapshotLoop method which only delegated to
s.conversation.Start(ctx), and add a Conversation() accessor
method instead. This allows callers to invoke Start() directly
on the conversation.

Part of refactoring to move snapshot loop logic inside PTYConversation.
---
 lib/httpapi/server.go | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/httpapi/server.go b/lib/httpapi/server.go
index def8d11..0fc63e0 100644
--- a/lib/httpapi/server.go
+++ b/lib/httpapi/server.go
@@ -350,8 +350,9 @@ func sseMiddleware(ctx huma.Context, next func(huma.Context)) {
 	next(ctx)
 }
 
-func (s *Server) StartSnapshotLoop(ctx context.Context) {
-	s.conversation.Start(ctx)
+// Conversation returns the underlying PTYConversation for direct access.
+func (s *Server) Conversation() *st.PTYConversation {
+	return s.conversation
 }
 
 // registerRoutes sets up all API endpoints

From ccec2330da9e3fa4d186db45ccfef5a0fec43063 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 12:27:58 +0000
Subject: [PATCH 06/41] refactor(screentracker): remove redundant exported
 fields from PTYConversation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove exported InitialPromptSent and ReadyForInitialPrompt boolean fields
from PTYConversation struct:

- InitialPromptSent → initialPromptSent (unexported)
- ReadyForInitialPrompt boolean removed entirely

The initialPromptReady channel now handles readiness signaling entirely.
When the agent is ready (detected via cfg.ReadyForInitialPrompt callback),
the channel is closed and set to nil to prevent double-close.

This simplifies statusLocked() by removing the intermediate boolean state
and using the channel's nil state to track whether readiness was already
signaled.

Note: Tests will need updates to verify behavior through Status() API
rather than setting internal fields directly.
---
 lib/screentracker/pty_conversation.go | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index a1c9df9..9e1b1e4 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -96,10 +96,8 @@ type PTYConversation struct {
 
 	// InitialPrompt is the initial prompt passed to the agent
 	InitialPrompt string
-	// InitialPromptSent keeps track if the InitialPrompt has been successfully sent to the agents
-	InitialPromptSent bool
-	// ReadyForInitialPrompt keeps track if the agent is ready to accept the initial prompt
-	ReadyForInitialPrompt bool
+	// initialPromptSent keeps track if the InitialPrompt has been successfully sent to the agent
+	initialPromptSent bool
 	// initialPromptReady is closed when the agent is ready to receive the initial prompt
 	initialPromptReady chan struct{}
 	// toolCallMessageSet keeps track of the tool calls that have been detected & logged in the current agent message
@@ -124,7 +122,7 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 				Time:    cfg.Clock.Now(),
 			},
 		},
-		InitialPromptSent:  len(cfg.InitialPrompt) == 0,
+		initialPromptSent:  len(cfg.InitialPrompt) == 0,
 		toolCallMessageSet: make(map[string]bool),
 	}
 	// Initialize the channel only if we have an initial prompt to send
@@ -172,11 +170,11 @@ func (c *PTYConversation) Start(ctx context.Context) {
 			case <-initialPromptReady:
 				// Agent is ready for initial prompt - send it
 				c.lock.Lock()
-				if !c.InitialPromptSent && len(c.cfg.InitialPrompt) > 0 {
+				if !c.initialPromptSent && len(c.cfg.InitialPrompt) > 0 {
 					if err := c.sendLocked(true, c.cfg.InitialPrompt...); err != nil {
 						c.cfg.Logger.Error("failed to send initial prompt", "error", err)
 					} else {
-						c.InitialPromptSent = true
+						c.initialPromptSent = true
 					}
 				}
 				c.lock.Unlock()
@@ -402,15 +400,11 @@ func (c *PTYConversation) statusLocked() ConversationStatus {
 
 	// Handle initial prompt readiness: report "changing" until the prompt is sent
 	// to avoid the status flipping "changing" → "stable" → "changing"
-	if !c.InitialPromptSent {
-		if !c.ReadyForInitialPrompt {
-			if len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt != nil && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
-				c.ReadyForInitialPrompt = true
-				// Signal that we're ready - close the channel if it exists
-				if c.initialPromptReady != nil {
-					close(c.initialPromptReady)
-				}
-			}
+	if !c.initialPromptSent {
+		// Check if agent is ready for initial prompt and signal if so
+		if c.initialPromptReady != nil && len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt != nil && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
+			close(c.initialPromptReady)
+			c.initialPromptReady = nil // Prevent double-close
 		}
 		// Keep returning "changing" until initial prompt is actually sent
 		return ConversationStatusChanging

From d16c552ee5558ae5052ab7bb23ef25af1e33c155 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 12:29:06 +0000
Subject: [PATCH 07/41] refactor: use srv.Conversation().Start(ctx) instead of
 removed StartSnapshotLoop

The StartSnapshotLoop method was removed from Server in favor of
exposing a Conversation() accessor that returns the PTYConversation,
which has its own Start(ctx) method.
---
 cmd/server/server.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cmd/server/server.go b/cmd/server/server.go
index 6a7fa7f..2211cbb 100644
--- a/cmd/server/server.go
+++ b/cmd/server/server.go
@@ -136,7 +136,7 @@ func runServer(ctx context.Context, logger *slog.Logger, argsToPass []string) er
 		fmt.Println(srv.GetOpenAPI())
 		return nil
 	}
-	srv.StartSnapshotLoop(ctx)
+	srv.Conversation().Start(ctx)
 	logger.Info("Starting server on port", "port", port)
 	processExitCh := make(chan error, 1)
 	go func() {

From 1fa447e964578c5080a278bed982626ceaac9a24 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 12:30:10 +0000
Subject: [PATCH 08/41] refactor(tests): remove direct field access to
 InitialPromptSent

The InitialPromptSent field was unexported as initialPromptSent.
Rework the test to verify the same behavior (normal status logic
applies after initial prompt handling) by configuring no InitialPrompt
instead of manually setting the field. When no InitialPrompt is
configured, initialPromptSent defaults to true, which achieves the
same testing outcome through the public API.
---
 lib/screentracker/pty_conversation_test.go | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 4c78e4b..7fe061e 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -447,7 +447,9 @@ func TestInitialPromptReadiness(t *testing.T) {
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 
-	t.Run("status after initial prompt sent - normal status logic applies", func(t *testing.T) {
+	t.Run("no initial prompt configured - normal status logic applies", func(t *testing.T) {
+		// When no InitialPrompt is configured, the conversation behaves as if
+		// the initial prompt has already been sent, so normal status logic applies.
 		mClock := quartz.NewMock(t)
 		mClock.Set(now)
 		agent := &testAgent{screen: "ready"}
@@ -456,26 +458,17 @@ func TestInitialPromptReadiness(t *testing.T) {
 			SnapshotInterval:      1 * time.Second,
 			ScreenStabilityLength: 2 * time.Second, // threshold = 3
 			AgentIO:               agent,
-			ReadyForInitialPrompt: func(message string) bool {
-				return message == "ready"
-			},
-			InitialPrompt:              []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
+			// No InitialPrompt configured - normal status logic applies immediately
 			SkipWritingMessage:         true,
 			SkipSendMessageStatusCheck: true,
 		}
 		c := st.NewPTY(context.Background(), cfg)
 
 		// Fill buffer to reach stability with "ready" screen
-		// But since initial prompt not sent, status stays "changing"
 		c.Snapshot("ready")
 		c.Snapshot("ready")
 		c.Snapshot("ready")
-		assert.Equal(t, st.ConversationStatusChanging, c.Status())
-
-		// Simulate initial prompt being sent (this is normally done by Start() goroutine)
-		c.InitialPromptSent = true
-
-		// Now that initial prompt is marked as sent, and screen is stable, status becomes stable
+		// Since no initial prompt is configured, screen stability determines status
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// After screen changes, status becomes changing

From 24ff1061cfa95185c3a095199851adf0c616e154 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 12:45:31 +0000
Subject: [PATCH 09/41] refactor: move conversation.Start() into NewServer()
 and remove Conversation() accessor

- Move the s.conversation.Start(ctx) call into NewServer(), just before
  the return statement, so the conversation starts immediately when the
  server is created.

- Add nil check for config.Process to handle test scenarios where no
  process is configured.

- Remove the Conversation() accessor method from Server since it is no
  longer needed externally.

- Remove the external srv.Conversation().Start(ctx) call from
  cmd/server/server.go.
---
 cmd/server/server.go  |  1 -
 lib/httpapi/server.go | 10 +++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/cmd/server/server.go b/cmd/server/server.go
index 2211cbb..6d5cdec 100644
--- a/cmd/server/server.go
+++ b/cmd/server/server.go
@@ -136,7 +136,6 @@ func runServer(ctx context.Context, logger *slog.Logger, argsToPass []string) er
 		fmt.Println(srv.GetOpenAPI())
 		return nil
 	}
-	srv.Conversation().Start(ctx)
 	logger.Info("Starting server on port", "port", port)
 	processExitCh := make(chan error, 1)
 	go func() {
diff --git a/lib/httpapi/server.go b/lib/httpapi/server.go
index 0fc63e0..0ec1fd4 100644
--- a/lib/httpapi/server.go
+++ b/lib/httpapi/server.go
@@ -295,6 +295,11 @@ func NewServer(ctx context.Context, config ServerConfig) (*Server, error) {
 	// Register API routes
 	s.registerRoutes()
 
+	// Start the conversation polling loop if we have a process
+	if config.Process != nil {
+		s.conversation.Start(ctx)
+	}
+
 	return s, nil
 }
 
@@ -350,11 +355,6 @@ func sseMiddleware(ctx huma.Context, next func(huma.Context)) {
 	next(ctx)
 }
 
-// Conversation returns the underlying PTYConversation for direct access.
-func (s *Server) Conversation() *st.PTYConversation {
-	return s.conversation
-}
-
 // registerRoutes sets up all API endpoints
 func (s *Server) registerRoutes() {
 	// GET /status endpoint

From 9a197fc3a92496666cc8c850a94e5373c4c0d210 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 12:44:42 +0000
Subject: [PATCH 10/41] refactor(screentracker): move status check from
 sendLocked to Send()

Remove the skipStatusCheck parameter from sendLocked and move the
status check into Send() where it belongs. This simplifies the code
since:

- Start() always skipped the check (for initial prompt)
- Send() always respected cfg.SkipSendMessageStatusCheck

Now the check happens in Send() before calling sendLocked, and the
initial prompt in Start() naturally bypasses it by calling sendLocked
directly.
---
 lib/screentracker/pty_conversation.go | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 9e1b1e4..50854ed 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -171,7 +171,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				// Agent is ready for initial prompt - send it
 				c.lock.Lock()
 				if !c.initialPromptSent && len(c.cfg.InitialPrompt) > 0 {
-					if err := c.sendLocked(true, c.cfg.InitialPrompt...); err != nil {
+					if err := c.sendLocked(c.cfg.InitialPrompt...); err != nil {
 						c.cfg.Logger.Error("failed to send initial prompt", "error", err)
 					} else {
 						c.initialPromptSent = true
@@ -257,17 +257,15 @@ func (c *PTYConversation) Send(messageParts ...MessagePart) error {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
-	return c.sendLocked(false, messageParts...)
-}
-
-// sendLocked sends a message to the agent. Caller MUST hold c.lock.
-// skipStatusCheck bypasses the status check - used for initial prompt sending
-// where status is "changing" until the prompt is sent.
-func (c *PTYConversation) sendLocked(skipStatusCheck bool, messageParts ...MessagePart) error {
-	if !skipStatusCheck && !c.cfg.SkipSendMessageStatusCheck && c.statusLocked() != ConversationStatusStable {
+	if !c.cfg.SkipSendMessageStatusCheck && c.statusLocked() != ConversationStatusStable {
 		return ErrMessageValidationChanging
 	}
 
+	return c.sendLocked(messageParts...)
+}
+
+// sendLocked sends a message to the agent. Caller MUST hold c.lock.
+func (c *PTYConversation) sendLocked(messageParts ...MessagePart) error {
 	var sb strings.Builder
 	for _, part := range messageParts {
 		sb.WriteString(part.String())

From 3dd8c56ec7a9d9535d8230d21627fd9255e0a5af Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 13:03:39 +0000
Subject: [PATCH 11/41] docs: add clarifying comments addressing review
 feedback

- Expand comment for process nil check to explain:
  - Process is nil only for --print-openapi mode
  - Process is already running (termexec.StartProcess blocks)
  - Agent readiness is handled asynchronously via ReadyForInitialPrompt

- Add comment for OnSnapshot callback explaining:
  - Callback pattern keeps screentracker decoupled from httpapi
  - Preserves clean package boundaries and avoids import cycles
---
 lib/httpapi/server.go | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/httpapi/server.go b/lib/httpapi/server.go
index 0ec1fd4..24e5e55 100644
--- a/lib/httpapi/server.go
+++ b/lib/httpapi/server.go
@@ -263,6 +263,9 @@ func NewServer(ctx context.Context, config ServerConfig) (*Server, error) {
 		ReadyForInitialPrompt: isAgentReadyForInitialPrompt,
 		FormatToolCall:        formatToolCall,
 		InitialPrompt:         initialPrompt,
+		// OnSnapshot uses a callback rather than passing the emitter directly
+		// to keep the screentracker package decoupled from httpapi concerns.
+		// This preserves clean package boundaries and avoids import cycles.
 		OnSnapshot: func(status st.ConversationStatus, messages []st.ConversationMessage, screen string) {
 			emitter.UpdateStatusAndEmitChanges(status, agentType)
 			emitter.UpdateMessagesAndEmitChanges(messages)
@@ -295,7 +298,12 @@ func NewServer(ctx context.Context, config ServerConfig) (*Server, error) {
 	// Register API routes
 	s.registerRoutes()
 
-	// Start the conversation polling loop if we have a process
+	// Start the conversation polling loop if we have a process.
+	// Process is nil only when --print-openapi is used (no agent runs).
+	// The process is already running at this point - termexec.StartProcess()
+	// blocks until the PTY is created and the process is active. Agent
+	// readiness (waiting for the prompt) is handled asynchronously inside
+	// conversation.Start() via ReadyForInitialPrompt.
 	if config.Process != nil {
 		s.conversation.Start(ctx)
 	}

From f5bd08d091ff89b57b4e6bed9f2ddad1c04dbd89 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 15:48:36 +0000
Subject: [PATCH 12/41] refactor: use Conversation interface in Server struct

Change Server.conversation from *st.PTYConversation to st.Conversation
to program against the interface abstraction rather than the concrete type.
This ensures the Conversation interface is a complete abstraction.
---
 lib/httpapi/server.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/httpapi/server.go b/lib/httpapi/server.go
index 24e5e55..b14510f 100644
--- a/lib/httpapi/server.go
+++ b/lib/httpapi/server.go
@@ -41,7 +41,7 @@ type Server struct {
 	srv          *http.Server
 	mu           sync.RWMutex
 	logger       *slog.Logger
-	conversation *st.PTYConversation
+	conversation st.Conversation
 	agentio      *termexec.Process
 	agentType    mf.AgentType
 	emitter      *EventEmitter

From 86c9d91fd40ec4ff64ce2b594391cc070fc96f8c Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 15:51:21 +0000
Subject: [PATCH 13/41] refactor: remove unnecessary agentType local variable

Use config.AgentType directly in the OnSnapshot closure instead of
creating a redundant local variable.
---
 lib/httpapi/server.go | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/httpapi/server.go b/lib/httpapi/server.go
index b14510f..e43315b 100644
--- a/lib/httpapi/server.go
+++ b/lib/httpapi/server.go
@@ -245,7 +245,6 @@ func NewServer(ctx context.Context, config ServerConfig) (*Server, error) {
 	}
 
 	emitter := NewEventEmitter(1024)
-	agentType := config.AgentType
 
 	// Format initial prompt into message parts if provided
 	var initialPrompt []st.MessagePart
@@ -267,7 +266,7 @@ func NewServer(ctx context.Context, config ServerConfig) (*Server, error) {
 		// to keep the screentracker package decoupled from httpapi concerns.
 		// This preserves clean package boundaries and avoids import cycles.
 		OnSnapshot: func(status st.ConversationStatus, messages []st.ConversationMessage, screen string) {
-			emitter.UpdateStatusAndEmitChanges(status, agentType)
+			emitter.UpdateStatusAndEmitChanges(status, config.AgentType)
 			emitter.UpdateMessagesAndEmitChanges(messages)
 			emitter.UpdateScreenAndEmitChanges(screen)
 		},

From b5437f748623084e595a169f061ff8970a51e9fb Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 16:02:12 +0000
Subject: [PATCH 14/41] refactor: simplify nil channel handling in Start()

Remove unnecessary channel creation for nil initialPromptReady.
In Go's select statement, nil channel cases are simply skipped
(never selected), so we don't need to create a new channel that
blocks forever - the nil channel already has the desired behavior.

Addresses PR review feedback.
---
 lib/screentracker/pty_conversation.go | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 50854ed..2295be3 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -137,12 +137,8 @@ func (c *PTYConversation) Start(ctx context.Context) {
 		ticker := c.cfg.Clock.NewTicker(c.cfg.SnapshotInterval)
 		defer ticker.Stop()
 
-		// Create a nil channel if no initial prompt - select will never receive from it
+		// If nil, the select case below is simply never chosen (nil channels are skipped in select)
 		initialPromptReady := c.initialPromptReady
-		if initialPromptReady == nil {
-			initialPromptReady = make(chan struct{})
-			// Don't close it - we want it to block forever
-		}
 
 		for {
 			select {

From 593b65f91202dcb4de798bfb84b6099ba8df8e88 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Wed, 4 Feb 2026 16:24:25 +0000
Subject: [PATCH 15/41] refactor: remove unused InitialPrompt field from
 PTYConversation

The public InitialPrompt string field is no longer used after refactoring.
The initial prompt is now stored in cfg.InitialPrompt (as []MessagePart)
and managed internally. Removing this field avoids confusion and maintains
clean encapsulation.

Addresses PR review feedback.
---
 lib/screentracker/pty_conversation.go | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 2295be3..b386214 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -94,9 +94,7 @@ type PTYConversation struct {
 	screenBeforeLastUserMessage string
 	lock                        sync.Mutex
 
-	// InitialPrompt is the initial prompt passed to the agent
-	InitialPrompt string
-	// initialPromptSent keeps track if the InitialPrompt has been successfully sent to the agent
+	// initialPromptSent keeps track if the initial prompt has been successfully sent to the agent
 	initialPromptSent bool
 	// initialPromptReady is closed when the agent is ready to receive the initial prompt
 	initialPromptReady chan struct{}

From 6079777c4860a4eed4433caf825e054e6149e2df Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 11:39:35 +0000
Subject: [PATCH 16/41] refactor: replace initialPromptReady/Sent with outbound
 message queue

Replace initialPromptSent bool and initialPromptReady chan with:
- outboundQueue chan []MessagePart (buffered, size 1)
- agentReady chan struct{} (nil if no initial prompt)

The initial prompt is now enqueued in NewPTY() and sent via the
queue in Start(). This makes the code more extensible for future
queued message handling.

Start() now uses a two-phase loop:
- Phase 1: Wait for agentReady while still processing ticker snapshots
- Phase 2: Normal loop with ticker + outboundQueue select cases

No external API behavior changes.
---
 lib/screentracker/pty_conversation.go | 80 +++++++++++++++------------
 1 file changed, 46 insertions(+), 34 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index b386214..d4391ec 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -92,12 +92,13 @@ type PTYConversation struct {
 	snapshotBuffer              *RingBuffer[screenSnapshot]
 	messages                    []ConversationMessage
 	screenBeforeLastUserMessage string
-	lock                        sync.Mutex
+	lock sync.Mutex
 
-	// initialPromptSent keeps track if the initial prompt has been successfully sent to the agent
-	initialPromptSent bool
-	// initialPromptReady is closed when the agent is ready to receive the initial prompt
-	initialPromptReady chan struct{}
+	// outboundQueue holds messages waiting to be sent to the agent
+	outboundQueue chan []MessagePart
+	// agentReady is closed when the agent is ready to receive the initial prompt.
+	// It is nil if no initial prompt was configured.
+	agentReady chan struct{}
 	// toolCallMessageSet keeps track of the tool calls that have been detected & logged in the current agent message
 	toolCallMessageSet map[string]bool
 }
@@ -120,12 +121,13 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 				Time:    cfg.Clock.Now(),
 			},
 		},
-		initialPromptSent:  len(cfg.InitialPrompt) == 0,
+		outboundQueue:      make(chan []MessagePart, 1),
 		toolCallMessageSet: make(map[string]bool),
 	}
-	// Initialize the channel only if we have an initial prompt to send
+	// If we have an initial prompt, enqueue it and create the ready signal
 	if len(cfg.InitialPrompt) > 0 {
-		c.initialPromptReady = make(chan struct{})
+		c.agentReady = make(chan struct{})
+		c.outboundQueue <- cfg.InitialPrompt
 	}
 	return c
 }
@@ -135,21 +137,39 @@ func (c *PTYConversation) Start(ctx context.Context) {
 		ticker := c.cfg.Clock.NewTicker(c.cfg.SnapshotInterval)
 		defer ticker.Stop()
 
-		// If nil, the select case below is simply never chosen (nil channels are skipped in select)
-		initialPromptReady := c.initialPromptReady
+		// Phase 1: Wait for agent to be ready (only if we have an initial prompt)
+		agentReady := c.agentReady
+		if agentReady != nil {
+			for {
+				select {
+				case <-ctx.Done():
+					return
+				case <-ticker.C:
+					c.lock.Lock()
+					screen := c.cfg.AgentIO.ReadScreen()
+					c.snapshotLocked(screen)
+					status := c.statusLocked()
+					messages := c.messagesLocked()
+					c.lock.Unlock()
+
+					if c.cfg.OnSnapshot != nil {
+						c.cfg.OnSnapshot(status, messages, screen)
+					}
+				case <-agentReady:
+					// Agent is ready, proceed to phase 2
+					agentReady = nil
+					goto phase2
+				}
+			}
+		}
 
+	phase2:
+		// Phase 2: Normal loop with ticker snapshots and outbound queue processing
 		for {
 			select {
 			case <-ctx.Done():
 				return
 			case <-ticker.C:
-				// It's important that we hold the lock while reading the screen.
-				// There's a race condition that occurs without it:
-				// 1. The screen is read
-				// 2. Independently, Send is called and takes the lock.
-				// 3. snapshotLocked is called and waits on the lock.
-				// 4. Send modifies the terminal state, releases the lock
-				// 5. snapshotLocked adds a snapshot from a stale screen
 				c.lock.Lock()
 				screen := c.cfg.AgentIO.ReadScreen()
 				c.snapshotLocked(screen)
@@ -157,23 +177,15 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				messages := c.messagesLocked()
 				c.lock.Unlock()
 
-				// Call snapshot callback if configured
 				if c.cfg.OnSnapshot != nil {
 					c.cfg.OnSnapshot(status, messages, screen)
 				}
-			case <-initialPromptReady:
-				// Agent is ready for initial prompt - send it
+			case parts := <-c.outboundQueue:
 				c.lock.Lock()
-				if !c.initialPromptSent && len(c.cfg.InitialPrompt) > 0 {
-					if err := c.sendLocked(c.cfg.InitialPrompt...); err != nil {
-						c.cfg.Logger.Error("failed to send initial prompt", "error", err)
-					} else {
-						c.initialPromptSent = true
-					}
+				if err := c.sendLocked(parts...); err != nil {
+					c.cfg.Logger.Error("failed to send queued message", "error", err)
 				}
 				c.lock.Unlock()
-				// Nil out to prevent this case from triggering again
-				initialPromptReady = nil
 			}
 		}
 	}()
@@ -390,15 +402,15 @@ func (c *PTYConversation) statusLocked() ConversationStatus {
 		}
 	}
 
-	// Handle initial prompt readiness: report "changing" until the prompt is sent
+	// Handle initial prompt readiness: report "changing" until the queue is drained
 	// to avoid the status flipping "changing" → "stable" → "changing"
-	if !c.initialPromptSent {
+	if c.agentReady != nil || len(c.outboundQueue) > 0 {
 		// Check if agent is ready for initial prompt and signal if so
-		if c.initialPromptReady != nil && len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt != nil && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
-			close(c.initialPromptReady)
-			c.initialPromptReady = nil // Prevent double-close
+		if c.agentReady != nil && len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt != nil && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
+			close(c.agentReady)
+			c.agentReady = nil // Prevent double-close
 		}
-		// Keep returning "changing" until initial prompt is actually sent
+		// Keep returning "changing" until outbound queue is drained
 		return ConversationStatusChanging
 	}
 

From 002fd67f650396d43334e981b83541571be398ca Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 12:06:35 +0000
Subject: [PATCH 17/41] screentracker: use no-op defaults for optional
 callbacks

Add no-op default functions for OnSnapshot and ReadyForInitialPrompt
in NewPTY() constructor instead of checking for nil throughout the code.

This removes:
- Two nil checks for OnSnapshot in Start() phase 1 and phase 2 loops
- One nil check for ReadyForInitialPrompt in statusLocked()
---
 lib/screentracker/pty_conversation.go | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index d4391ec..f680319 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -129,6 +129,13 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 		c.agentReady = make(chan struct{})
 		c.outboundQueue <- cfg.InitialPrompt
 	}
+	// Set no-op defaults for optional callbacks
+	if c.cfg.OnSnapshot == nil {
+		c.cfg.OnSnapshot = func(ConversationStatus, []ConversationMessage, string) {}
+	}
+	if c.cfg.ReadyForInitialPrompt == nil {
+		c.cfg.ReadyForInitialPrompt = func(string) bool { return true }
+	}
 	return c
 }
 
@@ -152,9 +159,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 					messages := c.messagesLocked()
 					c.lock.Unlock()
 
-					if c.cfg.OnSnapshot != nil {
-						c.cfg.OnSnapshot(status, messages, screen)
-					}
+					c.cfg.OnSnapshot(status, messages, screen)
 				case <-agentReady:
 					// Agent is ready, proceed to phase 2
 					agentReady = nil
@@ -177,9 +182,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				messages := c.messagesLocked()
 				c.lock.Unlock()
 
-				if c.cfg.OnSnapshot != nil {
-					c.cfg.OnSnapshot(status, messages, screen)
-				}
+				c.cfg.OnSnapshot(status, messages, screen)
 			case parts := <-c.outboundQueue:
 				c.lock.Lock()
 				if err := c.sendLocked(parts...); err != nil {
@@ -406,7 +409,7 @@ func (c *PTYConversation) statusLocked() ConversationStatus {
 	// to avoid the status flipping "changing" → "stable" → "changing"
 	if c.agentReady != nil || len(c.outboundQueue) > 0 {
 		// Check if agent is ready for initial prompt and signal if so
-		if c.agentReady != nil && len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt != nil && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
+		if c.agentReady != nil && len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
 			close(c.agentReady)
 			c.agentReady = nil // Prevent double-close
 		}

From a6b54a4c3afada68f4150851b564fa52828f96f2 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 12:08:47 +0000
Subject: [PATCH 18/41] screentracker: remove duplicate agentReady nil check

---
 lib/screentracker/pty_conversation.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index f680319..844b51b 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -407,13 +407,15 @@ func (c *PTYConversation) statusLocked() ConversationStatus {
 
 	// Handle initial prompt readiness: report "changing" until the queue is drained
 	// to avoid the status flipping "changing" → "stable" → "changing"
-	if c.agentReady != nil || len(c.outboundQueue) > 0 {
+	if c.agentReady != nil {
 		// Check if agent is ready for initial prompt and signal if so
-		if c.agentReady != nil && len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
+		if len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
 			close(c.agentReady)
 			c.agentReady = nil // Prevent double-close
 		}
-		// Keep returning "changing" until outbound queue is drained
+		return ConversationStatusChanging
+	}
+	if len(c.outboundQueue) > 0 {
 		return ConversationStatusChanging
 	}
 

From 3259c1f78ddd657d46a77bd52131dcae67af4b1c Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 12:24:38 +0000
Subject: [PATCH 19/41] screentracker: remove unnecessary comment and replace
 goto with labeled break

---
 lib/screentracker/pty_conversation.go | 37 ++++++++++++---------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 844b51b..fde50d0 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -129,7 +129,6 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 		c.agentReady = make(chan struct{})
 		c.outboundQueue <- cfg.InitialPrompt
 	}
-	// Set no-op defaults for optional callbacks
 	if c.cfg.OnSnapshot == nil {
 		c.cfg.OnSnapshot = func(ConversationStatus, []ConversationMessage, string) {}
 	}
@@ -146,29 +145,25 @@ func (c *PTYConversation) Start(ctx context.Context) {
 
 		// Phase 1: Wait for agent to be ready (only if we have an initial prompt)
 		agentReady := c.agentReady
-		if agentReady != nil {
-			for {
-				select {
-				case <-ctx.Done():
-					return
-				case <-ticker.C:
-					c.lock.Lock()
-					screen := c.cfg.AgentIO.ReadScreen()
-					c.snapshotLocked(screen)
-					status := c.statusLocked()
-					messages := c.messagesLocked()
-					c.lock.Unlock()
-
-					c.cfg.OnSnapshot(status, messages, screen)
-				case <-agentReady:
-					// Agent is ready, proceed to phase 2
-					agentReady = nil
-					goto phase2
-				}
+	phase1:
+		for agentReady != nil {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				c.lock.Lock()
+				screen := c.cfg.AgentIO.ReadScreen()
+				c.snapshotLocked(screen)
+				status := c.statusLocked()
+				messages := c.messagesLocked()
+				c.lock.Unlock()
+
+				c.cfg.OnSnapshot(status, messages, screen)
+			case <-agentReady:
+				break phase1
 			}
 		}
 
-	phase2:
 		// Phase 2: Normal loop with ticker snapshots and outbound queue processing
 		for {
 			select {

From 54b8ef4fec18c4e21f3a73c6aa6b19b7145b0a5f Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 14:44:01 +0000
Subject: [PATCH 20/41] refactor: use separate snapshot/send goroutines with
 stableSignal channel

Replace the two-phase loop design in PTYConversation.Start() with two
independent goroutines coordinated by an unbuffered stableSignal channel:

- Snapshot loop: takes periodic snapshots and signals when stable with
  queued items
- Send loop: waits for stable signal, then processes the outbound queue

This replaces the agentReady channel mechanism with a simpler coordination
pattern that eliminates duplicated snapshot logic.
---
 lib/screentracker/pty_conversation.go | 59 ++++++++++++---------------
 1 file changed, 25 insertions(+), 34 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index fde50d0..d734f2d 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -96,9 +96,9 @@ type PTYConversation struct {
 
 	// outboundQueue holds messages waiting to be sent to the agent
 	outboundQueue chan []MessagePart
-	// agentReady is closed when the agent is ready to receive the initial prompt.
-	// It is nil if no initial prompt was configured.
-	agentReady chan struct{}
+	// stableSignal is used by the snapshot loop to signal the send loop
+	// when the agent is stable and there are items in the outbound queue.
+	stableSignal chan struct{}
 	// toolCallMessageSet keeps track of the tool calls that have been detected & logged in the current agent message
 	toolCallMessageSet map[string]bool
 }
@@ -122,11 +122,11 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 			},
 		},
 		outboundQueue:      make(chan []MessagePart, 1),
+		stableSignal:       make(chan struct{}),
 		toolCallMessageSet: make(map[string]bool),
 	}
-	// If we have an initial prompt, enqueue it and create the ready signal
+	// If we have an initial prompt, enqueue it
 	if len(cfg.InitialPrompt) > 0 {
-		c.agentReady = make(chan struct{})
 		c.outboundQueue <- cfg.InitialPrompt
 	}
 	if c.cfg.OnSnapshot == nil {
@@ -139,14 +139,12 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 }
 
 func (c *PTYConversation) Start(ctx context.Context) {
+	// Snapshot loop
 	go func() {
 		ticker := c.cfg.Clock.NewTicker(c.cfg.SnapshotInterval)
 		defer ticker.Stop()
 
-		// Phase 1: Wait for agent to be ready (only if we have an initial prompt)
-		agentReady := c.agentReady
-	phase1:
-		for agentReady != nil {
+		for {
 			select {
 			case <-ctx.Done():
 				return
@@ -159,31 +157,32 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				c.lock.Unlock()
 
 				c.cfg.OnSnapshot(status, messages, screen)
-			case <-agentReady:
-				break phase1
+
+				// Signal send loop if stable and queue has items
+				if status == ConversationStatusStable && len(c.outboundQueue) > 0 {
+					c.stableSignal <- struct{}{}
+				}
 			}
 		}
+	}()
 
-		// Phase 2: Normal loop with ticker snapshots and outbound queue processing
+	// Send loop
+	go func() {
 		for {
 			select {
 			case <-ctx.Done():
 				return
-			case <-ticker.C:
-				c.lock.Lock()
-				screen := c.cfg.AgentIO.ReadScreen()
-				c.snapshotLocked(screen)
-				status := c.statusLocked()
-				messages := c.messagesLocked()
-				c.lock.Unlock()
-
-				c.cfg.OnSnapshot(status, messages, screen)
-			case parts := <-c.outboundQueue:
-				c.lock.Lock()
-				if err := c.sendLocked(parts...); err != nil {
-					c.cfg.Logger.Error("failed to send queued message", "error", err)
+			case <-c.stableSignal:
+				select {
+				case parts := <-c.outboundQueue:
+					c.lock.Lock()
+					if err := c.sendLocked(parts...); err != nil {
+						c.cfg.Logger.Error("failed to send queued message", "error", err)
+					}
+					c.lock.Unlock()
+				default:
+					c.cfg.Logger.Error("received stable signal but outbound queue is empty")
 				}
-				c.lock.Unlock()
 			}
 		}
 	}()
@@ -402,14 +401,6 @@ func (c *PTYConversation) statusLocked() ConversationStatus {
 
 	// Handle initial prompt readiness: report "changing" until the queue is drained
 	// to avoid the status flipping "changing" → "stable" → "changing"
-	if c.agentReady != nil {
-		// Check if agent is ready for initial prompt and signal if so
-		if len(snapshots) > 0 && c.cfg.ReadyForInitialPrompt(snapshots[len(snapshots)-1].screen) {
-			close(c.agentReady)
-			c.agentReady = nil // Prevent double-close
-		}
-		return ConversationStatusChanging
-	}
 	if len(c.outboundQueue) > 0 {
 		return ConversationStatusChanging
 	}

From b157ff795a3b7445b8e591ba1978930b3837db74 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 15:52:27 +0000
Subject: [PATCH 21/41] e2e: always rebuild binary when AGENTAPI_BINARY_PATH is
 not set

Previously the test only built the binary if it didn't exist, causing
stale binary issues when testing changes. Now the binary is always
rebuilt unless AGENTAPI_BINARY_PATH is set to use a custom binary.
---
 e2e/echo_test.go | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/e2e/echo_test.go b/e2e/echo_test.go
index fbc1efc..861fb42 100644
--- a/e2e/echo_test.go
+++ b/e2e/echo_test.go
@@ -133,14 +133,11 @@ func setup(ctx context.Context, t testing.TB, p *params) ([]ScriptEntry, *agenta
 		cwd, err := os.Getwd()
 		require.NoError(t, err, "Failed to get current working directory")
 		binaryPath = filepath.Join(cwd, "..", "out", "agentapi")
-		_, err = os.Stat(binaryPath)
-		if err != nil {
-			t.Logf("Building binary at %s", binaryPath)
-			buildCmd := exec.CommandContext(ctx, "go", "build", "-o", binaryPath, ".")
-			buildCmd.Dir = filepath.Join(cwd, "..")
-			t.Logf("run: %s", buildCmd.String())
-			require.NoError(t, buildCmd.Run(), "Failed to build binary")
-		}
+		t.Logf("Building binary at %s", binaryPath)
+		buildCmd := exec.CommandContext(ctx, "go", "build", "-o", binaryPath, ".")
+		buildCmd.Dir = filepath.Join(cwd, "..")
+		t.Logf("run: %s", buildCmd.String())
+		require.NoError(t, buildCmd.Run(), "Failed to build binary")
 	}
 
 	serverPort, err := getFreePort()

From 7d488f39b612a0b50b29834d7f8415b05dd7dd98 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 15:53:00 +0000
Subject: [PATCH 22/41] Fix stableSignal deadlock in pty_conversation.go

The previous implementation had a deadlock condition:
- Snapshot loop signals stableSignal when status == Stable && queue > 0
- But statusLocked() returns 'changing' when queue > 0
- These conditions were mutually exclusive

Fixed by:
1. Adding isScreenStableLocked() helper to check screen stability independently
2. Using the helper in statusLocked() (refactor, same behavior)
3. Making stableSignal a buffered channel (size 1) to allow non-blocking sends
4. Updating snapshot loop to check screen stability and ReadyForInitialPrompt
   independently of statusLocked(), with non-blocking send to avoid deadlock
---
 lib/screentracker/pty_conversation.go | 41 ++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index d734f2d..1f810c8 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -122,7 +122,7 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 			},
 		},
 		outboundQueue:      make(chan []MessagePart, 1),
-		stableSignal:       make(chan struct{}),
+		stableSignal:       make(chan struct{}, 1),
 		toolCallMessageSet: make(map[string]bool),
 	}
 	// If we have an initial prompt, enqueue it
@@ -154,14 +154,20 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				c.snapshotLocked(screen)
 				status := c.statusLocked()
 				messages := c.messagesLocked()
+
+				// Signal send loop if agent is ready and queue has items.
+				// We check readiness independently of statusLocked() because
+				// statusLocked() returns "changing" when queue has items.
+				if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && c.cfg.ReadyForInitialPrompt(screen) {
+					select {
+					case c.stableSignal <- struct{}{}:
+					default:
+						// Signal already pending
+					}
+				}
 				c.lock.Unlock()
 
 				c.cfg.OnSnapshot(status, messages, screen)
-
-				// Signal send loop if stable and queue has items
-				if status == ConversationStatusStable && len(c.outboundQueue) > 0 {
-					c.stableSignal <- struct{}{}
-				}
 			}
 		}
 	}()
@@ -371,6 +377,21 @@ func (c *PTYConversation) Status() ConversationStatus {
 	return c.statusLocked()
 }
 
+// isScreenStableLocked returns true if the screen content has been stable
+// for the required number of snapshots. Caller MUST hold c.lock.
+func (c *PTYConversation) isScreenStableLocked() bool {
+	snapshots := c.snapshotBuffer.GetAll()
+	if len(snapshots) != c.stableSnapshotsThreshold {
+		return false
+	}
+	for i := 1; i < len(snapshots); i++ {
+		if snapshots[0].screen != snapshots[i].screen {
+			return false
+		}
+	}
+	return true
+}
+
 // caller MUST hold c.lock
 func (c *PTYConversation) statusLocked() ConversationStatus {
 	// sanity checks
@@ -393,14 +414,12 @@ func (c *PTYConversation) statusLocked() ConversationStatus {
 		return ConversationStatusInitializing
 	}
 
-	for i := 1; i < len(snapshots); i++ {
-		if snapshots[0].screen != snapshots[i].screen {
-			return ConversationStatusChanging
-		}
+	if !c.isScreenStableLocked() {
+		return ConversationStatusChanging
 	}
 
 	// Handle initial prompt readiness: report "changing" until the queue is drained
-	// to avoid the status flipping "changing" → "stable" → "changing"
+	// to avoid the status flipping "changing" -> "stable" -> "changing"
 	if len(c.outboundQueue) > 0 {
 		return ConversationStatusChanging
 	}

From f725683f4f419027a9845fd676f50a43da77c349 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 16:13:38 +0000
Subject: [PATCH 23/41] Optimize ReadyForInitialPrompt check and fix stability
 comparison

- Use separate goroutine to poll ReadyForInitialPrompt instead of
  checking on every snapshot tick (reduces overhead)
- Use atomic.Bool to track when initial prompt readiness is achieved
- Change isScreenStableLocked() to use < instead of != for robustness
---
 lib/screentracker/pty_conversation.go | 29 +++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 1f810c8..e5fd809 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -6,6 +6,7 @@ import (
 	"log/slog"
 	"strings"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	"github.com/coder/agentapi/lib/msgfmt"
@@ -101,6 +102,9 @@ type PTYConversation struct {
 	stableSignal chan struct{}
 	// toolCallMessageSet keeps track of the tool calls that have been detected & logged in the current agent message
 	toolCallMessageSet map[string]bool
+	// initialPromptReady is set to true when ReadyForInitialPrompt returns true.
+	// This is checked by a separate goroutine to avoid calling ReadyForInitialPrompt on every tick.
+	initialPromptReady atomic.Bool
 }
 
 var _ Conversation = &PTYConversation{}
@@ -139,6 +143,27 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 }
 
 func (c *PTYConversation) Start(ctx context.Context) {
+	// Initial prompt readiness loop - polls ReadyForInitialPrompt until it returns true,
+	// then sets initialPromptReady and exits. This avoids calling ReadyForInitialPrompt
+	// on every snapshot tick.
+	go func() {
+		ticker := c.cfg.Clock.NewTicker(100 * time.Millisecond)
+		defer ticker.Stop()
+
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case <-ticker.C:
+				screen := c.cfg.AgentIO.ReadScreen()
+				if c.cfg.ReadyForInitialPrompt(screen) {
+					c.initialPromptReady.Store(true)
+					return
+				}
+			}
+		}
+	}()
+
 	// Snapshot loop
 	go func() {
 		ticker := c.cfg.Clock.NewTicker(c.cfg.SnapshotInterval)
@@ -158,7 +183,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				// Signal send loop if agent is ready and queue has items.
 				// We check readiness independently of statusLocked() because
 				// statusLocked() returns "changing" when queue has items.
-				if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && c.cfg.ReadyForInitialPrompt(screen) {
+				if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && c.initialPromptReady.Load() {
 					select {
 					case c.stableSignal <- struct{}{}:
 					default:
@@ -381,7 +406,7 @@ func (c *PTYConversation) Status() ConversationStatus {
 // for the required number of snapshots. Caller MUST hold c.lock.
 func (c *PTYConversation) isScreenStableLocked() bool {
 	snapshots := c.snapshotBuffer.GetAll()
-	if len(snapshots) != c.stableSnapshotsThreshold {
+	if len(snapshots) < c.stableSnapshotsThreshold {
 		return false
 	}
 	for i := 1; i < len(snapshots); i++ {

From 0b9e68729308a4c0596558c64fc969a2867eb55a Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Thu, 5 Feb 2026 18:23:43 +0000
Subject: [PATCH 24/41] refactor: use channel for initialPromptReady one-time
 signaling

Replace atomic.Bool with a channel that gets closed for one-time
signaling, which is more idiomatic Go. Remove sync/atomic import
since it's no longer needed.
---
 lib/screentracker/pty_conversation.go | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index e5fd809..5ae29b2 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -6,7 +6,6 @@ import (
 	"log/slog"
 	"strings"
 	"sync"
-	"sync/atomic"
 	"time"
 
 	"github.com/coder/agentapi/lib/msgfmt"
@@ -102,9 +101,9 @@ type PTYConversation struct {
 	stableSignal chan struct{}
 	// toolCallMessageSet keeps track of the tool calls that have been detected & logged in the current agent message
 	toolCallMessageSet map[string]bool
-	// initialPromptReady is set to true when ReadyForInitialPrompt returns true.
+	// initialPromptReady is closed when ReadyForInitialPrompt returns true.
 	// This is checked by a separate goroutine to avoid calling ReadyForInitialPrompt on every tick.
-	initialPromptReady atomic.Bool
+	initialPromptReady chan struct{}
 }
 
 var _ Conversation = &PTYConversation{}
@@ -128,6 +127,7 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 		outboundQueue:      make(chan []MessagePart, 1),
 		stableSignal:       make(chan struct{}, 1),
 		toolCallMessageSet: make(map[string]bool),
+		initialPromptReady: make(chan struct{}),
 	}
 	// If we have an initial prompt, enqueue it
 	if len(cfg.InitialPrompt) > 0 {
@@ -157,7 +157,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 			case <-ticker.C:
 				screen := c.cfg.AgentIO.ReadScreen()
 				if c.cfg.ReadyForInitialPrompt(screen) {
-					c.initialPromptReady.Store(true)
+					close(c.initialPromptReady)
 					return
 				}
 			}
@@ -183,7 +183,13 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				// Signal send loop if agent is ready and queue has items.
 				// We check readiness independently of statusLocked() because
 				// statusLocked() returns "changing" when queue has items.
-				if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && c.initialPromptReady.Load() {
+				isReady := false
+				select {
+				case <-c.initialPromptReady:
+					isReady = true
+				default:
+				}
+				if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && isReady {
 					select {
 					case c.stableSignal <- struct{}{}:
 					default:

From 18e2773ceb2cf00224a1355742bff8b7689701ae Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Mon, 9 Feb 2026 13:17:49 +0000
Subject: [PATCH 25/41] fix: handle context cancellation in inner select

---
 lib/screentracker/pty_conversation.go | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 5ae29b2..30541a6 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -92,7 +92,7 @@ type PTYConversation struct {
 	snapshotBuffer              *RingBuffer[screenSnapshot]
 	messages                    []ConversationMessage
 	screenBeforeLastUserMessage string
-	lock sync.Mutex
+	lock                        sync.Mutex
 
 	// outboundQueue holds messages waiting to be sent to the agent
 	outboundQueue chan []MessagePart
@@ -211,6 +211,8 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				return
 			case <-c.stableSignal:
 				select {
+				case <-ctx.Done():
+					return
 				case parts := <-c.outboundQueue:
 					c.lock.Lock()
 					if err := c.sendLocked(parts...); err != nil {

From 1ed23705cc9d588504b0a7679b841ab2e67adf2b Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Mon, 9 Feb 2026 13:53:58 +0000
Subject: [PATCH 26/41] unify send path: route all sends through outbound queue

- Add outboundMessage type with parts and errCh for async response
- Update outboundQueue to use outboundMessage type
- Rewrite Send() to validate, then enqueue with error channel
- Simplify send loop to read from queue and return errors via errCh
- Remove duplicate validation from sendLocked() (now done in Send())
- Add started flag to track if Start() was called (for test compat)
---
 lib/screentracker/pty_conversation.go | 65 +++++++++++++++++++--------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 30541a6..826e30c 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -43,6 +43,12 @@ func (p MessagePartText) String() string {
 	return p.Content
 }
 
+// outboundMessage wraps a message to be sent with its error channel
+type outboundMessage struct {
+	parts []MessagePart
+	errCh chan error
+}
+
 // PTYConversationConfig is the configuration for a PTYConversation.
 type PTYConversationConfig struct {
 	AgentType msgfmt.AgentType
@@ -95,7 +101,7 @@ type PTYConversation struct {
 	lock                        sync.Mutex
 
 	// outboundQueue holds messages waiting to be sent to the agent
-	outboundQueue chan []MessagePart
+	outboundQueue chan outboundMessage
 	// stableSignal is used by the snapshot loop to signal the send loop
 	// when the agent is stable and there are items in the outbound queue.
 	stableSignal chan struct{}
@@ -104,6 +110,8 @@ type PTYConversation struct {
 	// initialPromptReady is closed when ReadyForInitialPrompt returns true.
 	// This is checked by a separate goroutine to avoid calling ReadyForInitialPrompt on every tick.
 	initialPromptReady chan struct{}
+	// started is set when Start() is called, enabling the send loop
+	started bool
 }
 
 var _ Conversation = &PTYConversation{}
@@ -124,14 +132,14 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 				Time:    cfg.Clock.Now(),
 			},
 		},
-		outboundQueue:      make(chan []MessagePart, 1),
+		outboundQueue:      make(chan outboundMessage, 1),
 		stableSignal:       make(chan struct{}, 1),
 		toolCallMessageSet: make(map[string]bool),
 		initialPromptReady: make(chan struct{}),
 	}
 	// If we have an initial prompt, enqueue it
 	if len(cfg.InitialPrompt) > 0 {
-		c.outboundQueue <- cfg.InitialPrompt
+		c.outboundQueue <- outboundMessage{parts: cfg.InitialPrompt, errCh: nil}
 	}
 	if c.cfg.OnSnapshot == nil {
 		c.cfg.OnSnapshot = func(ConversationStatus, []ConversationMessage, string) {}
@@ -143,6 +151,10 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 }
 
 func (c *PTYConversation) Start(ctx context.Context) {
+	c.lock.Lock()
+	c.started = true
+	c.lock.Unlock()
+
 	// Initial prompt readiness loop - polls ReadyForInitialPrompt until it returns true,
 	// then sets initialPromptReady and exits. This avoids calling ReadyForInitialPrompt
 	// on every snapshot tick.
@@ -203,7 +215,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 		}
 	}()
 
-	// Send loop
+	// Send loop - primary call site for sendLocked() in production
 	go func() {
 		for {
 			select {
@@ -213,12 +225,13 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				select {
 				case <-ctx.Done():
 					return
-				case parts := <-c.outboundQueue:
+				case msg := <-c.outboundQueue:
 					c.lock.Lock()
-					if err := c.sendLocked(parts...); err != nil {
-						c.cfg.Logger.Error("failed to send queued message", "error", err)
-					}
+					err := c.sendLocked(msg.parts...)
 					c.lock.Unlock()
+					if msg.errCh != nil {
+						msg.errCh <- err
+					}
 				default:
 					c.cfg.Logger.Error("received stable signal but outbound queue is empty")
 				}
@@ -296,31 +309,45 @@ func (c *PTYConversation) snapshotLocked(screen string) {
 }
 
 func (c *PTYConversation) Send(messageParts ...MessagePart) error {
-	c.lock.Lock()
-	defer c.lock.Unlock()
+	// Validate message content before enqueueing
+	var sb strings.Builder
+	for _, part := range messageParts {
+		sb.WriteString(part.String())
+	}
+	message := sb.String()
+	if message != msgfmt.TrimWhitespace(message) {
+		return ErrMessageValidationWhitespace
+	}
+	if message == "" {
+		return ErrMessageValidationEmpty
+	}
 
+	c.lock.Lock()
 	if !c.cfg.SkipSendMessageStatusCheck && c.statusLocked() != ConversationStatusStable {
+		c.lock.Unlock()
 		return ErrMessageValidationChanging
 	}
+	// If Start() hasn't been called, send directly (for tests)
+	if !c.started {
+		err := c.sendLocked(messageParts...)
+		c.lock.Unlock()
+		return err
+	}
+	c.lock.Unlock()
 
-	return c.sendLocked(messageParts...)
+	errCh := make(chan error, 1)
+	c.outboundQueue <- outboundMessage{parts: messageParts, errCh: errCh}
+	return <-errCh
 }
 
 // sendLocked sends a message to the agent. Caller MUST hold c.lock.
+// Validation is done by the caller (Send() validates, initial prompt is trusted).
 func (c *PTYConversation) sendLocked(messageParts ...MessagePart) error {
 	var sb strings.Builder
 	for _, part := range messageParts {
 		sb.WriteString(part.String())
 	}
 	message := sb.String()
-	if message != msgfmt.TrimWhitespace(message) {
-		// msgfmt formatting functions assume this
-		return ErrMessageValidationWhitespace
-	}
-	if message == "" {
-		// writeMessageWithConfirmation requires a non-empty message
-		return ErrMessageValidationEmpty
-	}
 
 	screenBeforeMessage := c.cfg.AgentIO.ReadScreen()
 	now := c.cfg.Clock.Now()

From 777a103d14d0bd4c3ed3f8f5f5123b90d27fdfb4 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 11:05:49 +0000
Subject: [PATCH 27/41] refactor: remove test escape hatches, convert to
 TickerFunc

- Remove 'started' field and synchronous bypass in Send()
- Remove SkipWritingMessage config field
- Remove public Snapshot() test-only method
- Convert readiness/snapshot loops from NewTicker to TickerFunc
- Pass Clock to WaitFor calls in writeStabilize
---
 lib/screentracker/pty_conversation.go | 118 +++++++++-----------------
 1 file changed, 39 insertions(+), 79 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 826e30c..c96e581 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -62,9 +62,6 @@ type PTYConversationConfig struct {
 	// Function to format the messages received from the agent
 	// userInput is the last user message
 	FormatMessage func(message string, userInput string) string
-	// SkipWritingMessage skips the writing of a message to the agent.
-	// This is used in tests
-	SkipWritingMessage bool
 	// SkipSendMessageStatusCheck skips the check for whether the message can be sent.
 	// This is used in tests
 	SkipSendMessageStatusCheck bool
@@ -110,12 +107,14 @@ type PTYConversation struct {
 	// initialPromptReady is closed when ReadyForInitialPrompt returns true.
 	// This is checked by a separate goroutine to avoid calling ReadyForInitialPrompt on every tick.
 	initialPromptReady chan struct{}
-	// started is set when Start() is called, enabling the send loop
-	started bool
 }
 
 var _ Conversation = &PTYConversation{}
 
+// errInitialPromptReady is a sentinel used to stop the readiness TickerFunc
+// after ReadyForInitialPrompt returns true.
+var errInitialPromptReady = xerrors.New("initial prompt ready")
+
 func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 	if cfg.Clock == nil {
 		cfg.Clock = quartz.NewReal()
@@ -151,69 +150,47 @@ func NewPTY(ctx context.Context, cfg PTYConversationConfig) *PTYConversation {
 }
 
 func (c *PTYConversation) Start(ctx context.Context) {
-	c.lock.Lock()
-	c.started = true
-	c.lock.Unlock()
-
 	// Initial prompt readiness loop - polls ReadyForInitialPrompt until it returns true,
-	// then sets initialPromptReady and exits. This avoids calling ReadyForInitialPrompt
+	// then closes initialPromptReady and exits. This avoids calling ReadyForInitialPrompt
 	// on every snapshot tick.
-	go func() {
-		ticker := c.cfg.Clock.NewTicker(100 * time.Millisecond)
-		defer ticker.Stop()
-
-		for {
-			select {
-			case <-ctx.Done():
-				return
-			case <-ticker.C:
-				screen := c.cfg.AgentIO.ReadScreen()
-				if c.cfg.ReadyForInitialPrompt(screen) {
-					close(c.initialPromptReady)
-					return
-				}
-			}
+	c.cfg.Clock.TickerFunc(ctx, 100*time.Millisecond, func() error {
+		screen := c.cfg.AgentIO.ReadScreen()
+		if c.cfg.ReadyForInitialPrompt(screen) {
+			close(c.initialPromptReady)
+			return errInitialPromptReady
 		}
-	}()
+		return nil
+	}, "readiness")
 
 	// Snapshot loop
-	go func() {
-		ticker := c.cfg.Clock.NewTicker(c.cfg.SnapshotInterval)
-		defer ticker.Stop()
-
-		for {
+	c.cfg.Clock.TickerFunc(ctx, c.cfg.SnapshotInterval, func() error {
+		c.lock.Lock()
+		screen := c.cfg.AgentIO.ReadScreen()
+		c.snapshotLocked(screen)
+		status := c.statusLocked()
+		messages := c.messagesLocked()
+
+		// Signal send loop if agent is ready and queue has items.
+		// We check readiness independently of statusLocked() because
+		// statusLocked() returns "changing" when queue has items.
+		isReady := false
+		select {
+		case <-c.initialPromptReady:
+			isReady = true
+		default:
+		}
+		if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && isReady {
 			select {
-			case <-ctx.Done():
-				return
-			case <-ticker.C:
-				c.lock.Lock()
-				screen := c.cfg.AgentIO.ReadScreen()
-				c.snapshotLocked(screen)
-				status := c.statusLocked()
-				messages := c.messagesLocked()
-
-				// Signal send loop if agent is ready and queue has items.
-				// We check readiness independently of statusLocked() because
-				// statusLocked() returns "changing" when queue has items.
-				isReady := false
-				select {
-				case <-c.initialPromptReady:
-					isReady = true
-				default:
-				}
-				if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && isReady {
-					select {
-					case c.stableSignal <- struct{}{}:
-					default:
-						// Signal already pending
-					}
-				}
-				c.lock.Unlock()
-
-				c.cfg.OnSnapshot(status, messages, screen)
+			case c.stableSignal <- struct{}{}:
+			default:
+				// Signal already pending
 			}
 		}
-	}()
+		c.lock.Unlock()
+
+		c.cfg.OnSnapshot(status, messages, screen)
+		return nil
+	}, "snapshot")
 
 	// Send loop - primary call site for sendLocked() in production
 	go func() {
@@ -288,16 +265,6 @@ func (c *PTYConversation) updateLastAgentMessageLocked(screen string, timestamp
 	c.messages[len(c.messages)-1].Id = len(c.messages) - 1
 }
 
-// Snapshot writes the current screen snapshot to the snapshot buffer.
-// ONLY TO BE USED FOR TESTING PURPOSES.
-// TODO(Cian): This method can be removed by mocking AgentIO.
-func (c *PTYConversation) Snapshot(screen string) {
-	c.lock.Lock()
-	defer c.lock.Unlock()
-
-	c.snapshotLocked(screen)
-}
-
 // caller MUST hold c.lock
 func (c *PTYConversation) snapshotLocked(screen string) {
 	snapshot := screenSnapshot{
@@ -327,12 +294,6 @@ func (c *PTYConversation) Send(messageParts ...MessagePart) error {
 		c.lock.Unlock()
 		return ErrMessageValidationChanging
 	}
-	// If Start() hasn't been called, send directly (for tests)
-	if !c.started {
-		err := c.sendLocked(messageParts...)
-		c.lock.Unlock()
-		return err
-	}
 	c.lock.Unlock()
 
 	errCh := make(chan error, 1)
@@ -369,9 +330,6 @@ func (c *PTYConversation) sendLocked(messageParts ...MessagePart) error {
 
 // writeStabilize writes messageParts to the screen and waits for the screen to stabilize after the message is written.
 func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...MessagePart) error {
-	if c.cfg.SkipWritingMessage {
-		return nil
-	}
 	screenBeforeMessage := c.cfg.AgentIO.ReadScreen()
 	for _, part := range messageParts {
 		if err := part.Do(c.cfg.AgentIO); err != nil {
@@ -383,6 +341,7 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me
 		Timeout:     15 * time.Second,
 		MinInterval: 50 * time.Millisecond,
 		InitialWait: true,
+		Clock:       c.cfg.Clock,
 	}, func() (bool, error) {
 		screen := c.cfg.AgentIO.ReadScreen()
 		if screen != screenBeforeMessage {
@@ -405,6 +364,7 @@ func (c *PTYConversation) writeStabilize(ctx context.Context, messageParts ...Me
 	if err := util.WaitFor(ctx, util.WaitTimeout{
 		Timeout:     15 * time.Second,
 		MinInterval: 25 * time.Millisecond,
+		Clock:       c.cfg.Clock,
 	}, func() (bool, error) {
 		// we don't want to spam additional carriage returns because the agent may process them
 		// (aider does this), but we do want to retry sending one if nothing's

From 75d89c31b90e7bf792465eaf003a81bc699863aa Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 11:37:06 +0000
Subject: [PATCH 28/41] fix: split lock in sendMessage and fix test clock races

- Rename sendLocked to sendMessage; release lock during writeStabilize
  to avoid deadlocking with the snapshot TickerFunc, then re-acquire
  and re-apply the pre-send agent message to correct for intermediate
  snapshot loop updates.
- Fix advancePast: retry briefly when no events are pending instead of
  calling Advance(remaining) which races with goroutine timer creation.
- Fix sendWithClockDrive: use AdvanceNext instead of Advance(d) to
  avoid race between Peek and Advance.
- Fix initial prompt lifecycle test: advance 2 ticks to account for
  snapshot timer alignment.
---
 lib/screentracker/pty_conversation.go      |  21 +-
 lib/screentracker/pty_conversation_test.go | 638 +++++++++++++--------
 2 files changed, 411 insertions(+), 248 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index c96e581..22f709a 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -203,9 +203,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 				case <-ctx.Done():
 					return
 				case msg := <-c.outboundQueue:
-					c.lock.Lock()
-					err := c.sendLocked(msg.parts...)
-					c.lock.Unlock()
+					err := c.sendMessage(ctx, msg.parts...)
 					if msg.errCh != nil {
 						msg.errCh <- err
 					}
@@ -301,23 +299,31 @@ func (c *PTYConversation) Send(messageParts ...MessagePart) error {
 	return <-errCh
 }
 
-// sendLocked sends a message to the agent. Caller MUST hold c.lock.
-// Validation is done by the caller (Send() validates, initial prompt is trusted).
-func (c *PTYConversation) sendLocked(messageParts ...MessagePart) error {
+// sendMessage sends a message to the agent. It acquires and releases c.lock
+// around the parts that access shared state, but releases it during
+// writeStabilize to avoid blocking the snapshot loop.
+func (c *PTYConversation) sendMessage(ctx context.Context, messageParts ...MessagePart) error {
 	var sb strings.Builder
 	for _, part := range messageParts {
 		sb.WriteString(part.String())
 	}
 	message := sb.String()
 
+	c.lock.Lock()
 	screenBeforeMessage := c.cfg.AgentIO.ReadScreen()
 	now := c.cfg.Clock.Now()
 	c.updateLastAgentMessageLocked(screenBeforeMessage, now)
+	c.lock.Unlock()
 
-	if err := c.writeStabilize(context.Background(), messageParts...); err != nil {
+	if err := c.writeStabilize(ctx, messageParts...); err != nil {
 		return xerrors.Errorf("failed to send message: %w", err)
 	}
 
+	c.lock.Lock()
+	// Re-apply the pre-send screen to the agent message. While the lock
+	// was released during writeStabilize, the snapshot loop may have
+	// overwritten the agent message with intermediate screen content.
+	c.updateLastAgentMessageLocked(screenBeforeMessage, now)
 	c.screenBeforeLastUserMessage = screenBeforeMessage
 	c.messages = append(c.messages, ConversationMessage{
 		Id:      len(c.messages),
@@ -325,6 +331,7 @@ func (c *PTYConversation) sendLocked(messageParts ...MessagePart) error {
 		Role:    ConversationRoleUser,
 		Time:    now,
 	})
+	c.lock.Unlock()
 	return nil
 }
 
diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 7fe061e..4fceef4 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -3,48 +3,177 @@ package screentracker_test
 import (
 	"context"
 	"fmt"
+	"io"
+	"log/slog"
+	"sync"
 	"testing"
 	"time"
 
 	"github.com/coder/quartz"
 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
 
 	st "github.com/coder/agentapi/lib/screentracker"
 )
 
-type statusTestStep struct {
-	snapshot string
-	status   st.ConversationStatus
-}
-type statusTestParams struct {
-	cfg   st.PTYConversationConfig
-	steps []statusTestStep
-}
-
+// testAgent is a goroutine-safe mock implementation of AgentIO.
 type testAgent struct {
-	st.AgentIO
-	screen string
+	mu      sync.Mutex
+	screen  string
+	onWrite func(data []byte)
 }
 
 func (a *testAgent) ReadScreen() string {
+	a.mu.Lock()
+	defer a.mu.Unlock()
 	return a.screen
 }
 
 func (a *testAgent) Write(data []byte) (int, error) {
-	return 0, nil
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	if a.onWrite != nil {
+		a.onWrite(data)
+	}
+	return len(data), nil
+}
+
+func (a *testAgent) setScreen(s string) {
+	a.mu.Lock()
+	defer a.mu.Unlock()
+	a.screen = s
+}
+
+// advancePast advances the mock clock by total, stepping through each
+// intermediate event so that TickerFunc callbacks run to completion.
+func advancePast(ctx context.Context, t *testing.T, mClock *quartz.Mock, total time.Duration) {
+	t.Helper()
+	target := mClock.Now().Add(total)
+	noEventRetries := 0
+	for mClock.Now().Before(target) {
+		remaining := target.Sub(mClock.Now())
+		d, ok := mClock.Peek()
+		if !ok {
+			// No events pending. A background goroutine may be about to
+			// register a timer, so retry a few times before jumping.
+			noEventRetries++
+			if noEventRetries > 50 {
+				mClock.Advance(remaining).MustWait(ctx)
+				return
+			}
+			time.Sleep(1 * time.Millisecond)
+			continue
+		}
+		noEventRetries = 0
+		if d > remaining {
+			// Next event is past our target; safe to jump.
+			mClock.Advance(remaining).MustWait(ctx)
+			return
+		}
+		// Step to the next event.
+		_, w := mClock.AdvanceNext()
+		w.MustWait(ctx)
+	}
+}
+
+// fillToStable sets the screen and advances the clock enough times to fill the
+// snapshot buffer, making status reach "stable".
+func fillToStable(ctx context.Context, t *testing.T, agent *testAgent, mClock *quartz.Mock, screen string, interval time.Duration, threshold int) {
+	t.Helper()
+	agent.setScreen(screen)
+	for i := 0; i < threshold; i++ {
+		advancePast(ctx, t, mClock, interval)
+	}
+}
+
+// sendWithClockDrive calls Send() in a goroutine and advances the mock clock
+// until Send completes. This drives the snapshot loop (which signals
+// stableSignal) and writeStabilize (which creates mock timers).
+func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation, mClock *quartz.Mock, parts ...st.MessagePart) error {
+	t.Helper()
+	errCh := make(chan error, 1)
+	go func() {
+		errCh <- c.Send(parts...)
+	}()
+	// Give the goroutine a moment to start and enqueue.
+	time.Sleep(1 * time.Millisecond)
+	deadline := time.Now().Add(10 * time.Second)
+	for time.Now().Before(deadline) {
+		select {
+		case err := <-errCh:
+			return err
+		default:
+		}
+		_, ok := mClock.Peek()
+		if !ok {
+			// No mock events yet; the send loop goroutine may still be
+			// setting up timers. Wait briefly for it to schedule.
+			time.Sleep(5 * time.Millisecond)
+			continue
+		}
+		// Use AdvanceNext instead of Advance(d) to avoid a race: between
+		// Peek() and Advance(d), a goroutine may create a closer timer,
+		// making Advance(d) illegal. AdvanceNext always advances to the
+		// nearest event regardless.
+		_, w := mClock.AdvanceNext()
+		w.MustWait(ctx)
+	}
+	t.Fatal("sendWithClockDrive timed out")
+	return nil
+}
+
+// msgNoTime is a ConversationMessage without the Time field for easy comparison.
+type msgNoTime struct {
+	Id      int
+	Message string
+	Role    st.ConversationRole
+}
+
+func stripTimes(msgs []st.ConversationMessage) []msgNoTime {
+	result := make([]msgNoTime, len(msgs))
+	for i, m := range msgs {
+		result[i] = msgNoTime{Id: m.Id, Message: m.Message, Role: m.Role}
+	}
+	return result
+}
+
+func assertMessages(t *testing.T, c *st.PTYConversation, expected []msgNoTime) {
+	t.Helper()
+	assert.Equal(t, expected, stripTimes(c.Messages()))
+}
+
+type statusTestStep struct {
+	snapshot string
+	status   st.ConversationStatus
+}
+type statusTestParams struct {
+	cfg   st.PTYConversationConfig
+	steps []statusTestStep
 }
 
 func statusTest(t *testing.T, params statusTestParams) {
-	ctx := context.Background()
+	ctx, cancel := context.WithCancel(context.Background())
+	t.Cleanup(cancel)
 	t.Run(fmt.Sprintf("interval-%s,stability_length-%s", params.cfg.SnapshotInterval, params.cfg.ScreenStabilityLength), func(t *testing.T) {
-		if params.cfg.Clock == nil {
-			params.cfg.Clock = quartz.NewReal()
+		mClock := quartz.NewMock(t)
+		params.cfg.Clock = mClock
+		agent := &testAgent{}
+		if params.cfg.AgentIO != nil {
+			if a, ok := params.cfg.AgentIO.(*testAgent); ok {
+				agent = a
+			}
 		}
+		params.cfg.AgentIO = agent
+		params.cfg.Logger = slog.New(slog.NewTextHandler(io.Discard, nil))
+
 		c := st.NewPTY(ctx, params.cfg)
+		c.Start(ctx)
+
 		assert.Equal(t, st.ConversationStatusInitializing, c.Status())
 
 		for i, step := range params.steps {
-			c.Snapshot(step.snapshot)
+			agent.setScreen(step.snapshot)
+			advancePast(ctx, t, mClock, params.cfg.SnapshotInterval)
 			assert.Equal(t, step.status, c.Status(), "step %d", i)
 		}
 	})
@@ -115,57 +244,61 @@ func TestConversation(t *testing.T) {
 
 func TestMessages(t *testing.T) {
 	now := time.Now()
-	agentMsg := func(id int, msg string) st.ConversationMessage {
-		return st.ConversationMessage{
-			Id:      id,
-			Message: msg,
-			Role:    st.ConversationRoleAgent,
-			Time:    now,
-		}
-	}
-	userMsg := func(id int, msg string) st.ConversationMessage {
-		return st.ConversationMessage{
-			Id:      id,
-			Message: msg,
-			Role:    st.ConversationRoleUser,
-			Time:    now,
+
+	// newConversation creates a started conversation with a mock clock and
+	// testAgent. Tests that Send() messages must use sendWithClockDrive.
+	newConversation := func(t *testing.T, opts ...func(*st.PTYConversationConfig)) (*st.PTYConversation, *testAgent, *quartz.Mock, context.Context) {
+		t.Helper()
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
+
+		writeCounter := 0
+		agent := &testAgent{}
+		// Default onWrite: each write produces a unique screen so that
+		// writeStabilize can detect screen changes.
+		agent.onWrite = func(data []byte) {
+			writeCounter++
+			agent.screen = fmt.Sprintf("__write_%d", writeCounter)
 		}
-	}
-	sendMsg := func(c *st.PTYConversation, msg string) error {
-		return c.Send(st.MessagePartText{Content: msg})
-	}
-	newConversation := func(opts ...func(*st.PTYConversationConfig)) *st.PTYConversation {
 		mClock := quartz.NewMock(t)
 		mClock.Set(now)
 		cfg := st.PTYConversationConfig{
 			Clock:                      mClock,
-			SnapshotInterval:           1 * time.Second,
-			ScreenStabilityLength:      2 * time.Second,
-			SkipWritingMessage:         true,
+			AgentIO:                    agent,
+			SnapshotInterval:           100 * time.Millisecond,
+			ScreenStabilityLength:      200 * time.Millisecond,
 			SkipSendMessageStatusCheck: true,
+			Logger:                     slog.New(slog.NewTextHandler(io.Discard, nil)),
 		}
 		for _, opt := range opts {
 			opt(&cfg)
 		}
-		return st.NewPTY(context.Background(), cfg)
+		if a, ok := cfg.AgentIO.(*testAgent); ok {
+			agent = a
+		}
+
+		c := st.NewPTY(ctx, cfg)
+		c.Start(ctx)
+
+		return c, agent, mClock, ctx
 	}
 
+	// threshold = 3 (200ms / 100ms = 2, + 1 = 3)
+	const threshold = 3
+	const interval = 100 * time.Millisecond
+
 	t.Run("messages are copied", func(t *testing.T) {
-		c := newConversation()
+		c, _, _, _ := newConversation(t)
 		messages := c.Messages()
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, ""),
-		}, messages)
+		assertMessages(t, c, []msgNoTime{{0, "", st.ConversationRoleAgent}})
 
 		messages[0].Message = "modification"
 
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, ""),
-		}, c.Messages())
+		assertMessages(t, c, []msgNoTime{{0, "", st.ConversationRoleAgent}})
 	})
 
 	t.Run("whitespace-padding", func(t *testing.T) {
-		c := newConversation()
+		c, _, _, _ := newConversation(t)
 		for _, msg := range []string{"123 ", " 123", "123\t\t", "\n123", "123\n\t", " \t123\n\t"} {
 			err := c.Send(st.MessagePartText{Content: msg})
 			assert.ErrorIs(t, err, st.ErrMessageValidationWhitespace)
@@ -173,312 +306,335 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("no-change-no-message-update", func(t *testing.T) {
-		mClock := quartz.NewMock(t)
-		mClock.Set(now)
-		c := newConversation(func(cfg *st.PTYConversationConfig) {
-			cfg.Clock = mClock
-		})
+		c, agent, mClock, ctx := newConversation(t)
 
-		c.Snapshot("1")
-		msgs := c.Messages()
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "1"),
-		}, msgs)
-		mClock.Set(now.Add(1 * time.Second))
-		c.Snapshot("1")
-		assert.Equal(t, msgs, c.Messages())
+		agent.setScreen("1")
+		advancePast(ctx, t, mClock, interval)
+		msgs := stripTimes(c.Messages())
+		assert.Equal(t, []msgNoTime{{0, "1", st.ConversationRoleAgent}}, msgs)
+
+		advancePast(ctx, t, mClock, interval)
+		assert.Equal(t, msgs, stripTimes(c.Messages()))
 	})
 
 	t.Run("tracking messages", func(t *testing.T) {
-		agent := &testAgent{}
-		c := newConversation(func(cfg *st.PTYConversationConfig) {
-			cfg.AgentIO = agent
+		c, agent, mClock, ctx := newConversation(t)
+
+		// Agent message is recorded when the first snapshot is taken.
+		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		assertMessages(t, c, []msgNoTime{
+			{0, "1", st.ConversationRoleAgent},
+		})
+
+		// Agent message is updated when the screen changes.
+		agent.setScreen("2")
+		advancePast(ctx, t, mClock, interval)
+		assertMessages(t, c, []msgNoTime{
+			{0, "2", st.ConversationRoleAgent},
+		})
+
+		// Fill to stable so Send can proceed (screen is "2").
+		fillToStable(ctx, t, agent, mClock, "2", interval, threshold)
+
+		// User message is recorded.
+		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "3"}))
+
+		// After send, screen is dirty from writeStabilize. Set to "4" and stabilize.
+		fillToStable(ctx, t, agent, mClock, "4", interval, threshold)
+		assertMessages(t, c, []msgNoTime{
+			{0, "2", st.ConversationRoleAgent},
+			{1, "3", st.ConversationRoleUser},
+			{2, "4", st.ConversationRoleAgent},
+		})
+
+		// Agent message is updated when the screen changes before a user message.
+		fillToStable(ctx, t, agent, mClock, "5", interval, threshold)
+		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "6"}))
+
+		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
+		assertMessages(t, c, []msgNoTime{
+			{0, "2", st.ConversationRoleAgent},
+			{1, "3", st.ConversationRoleUser},
+			{2, "5", st.ConversationRoleAgent},
+			{3, "6", st.ConversationRoleUser},
+			{4, "7", st.ConversationRoleAgent},
 		})
-		// agent message is recorded when the first snapshot is added
-		c.Snapshot("1")
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "1"),
-		}, c.Messages())
-
-		// agent message is updated when the screen changes
-		c.Snapshot("2")
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "2"),
-		}, c.Messages())
-
-		// user message is recorded
-		agent.screen = "2"
-		assert.NoError(t, sendMsg(c, "3"))
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "2"),
-			userMsg(1, "3"),
-		}, c.Messages())
-
-		// agent message is added after a user message
-		c.Snapshot("4")
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "2"),
-			userMsg(1, "3"),
-			agentMsg(2, "4"),
-		}, c.Messages())
-
-		// agent message is updated when the screen changes before a user message
-		agent.screen = "5"
-		assert.NoError(t, sendMsg(c, "6"))
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "2"),
-			userMsg(1, "3"),
-			agentMsg(2, "5"),
-			userMsg(3, "6"),
-		}, c.Messages())
-
-		// conversation status is changing right after a user message
-		c.Snapshot("7")
-		c.Snapshot("7")
-		c.Snapshot("7")
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
-		agent.screen = "7"
-		assert.NoError(t, sendMsg(c, "8"))
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "2"),
-			userMsg(1, "3"),
-			agentMsg(2, "5"),
-			userMsg(3, "6"),
-			agentMsg(4, "7"),
-			userMsg(5, "8"),
-		}, c.Messages())
-		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
-		// conversation status is back to stable after a snapshot that
-		// doesn't change the screen
-		c.Snapshot("7")
+		// Send another message.
+		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "8"}))
+
+		// After filling to stable, messages and status are correct.
+		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 
 	t.Run("tracking messages overlap", func(t *testing.T) {
-		agent := &testAgent{}
-		c := newConversation(func(cfg *st.PTYConversationConfig) {
-			cfg.AgentIO = agent
+		c, agent, mClock, ctx := newConversation(t)
+
+		// Common overlap between screens is removed after a user message.
+		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"}))
+		fillToStable(ctx, t, agent, mClock, "1\n3", interval, threshold)
+		assertMessages(t, c, []msgNoTime{
+			{0, "1", st.ConversationRoleAgent},
+			{1, "2", st.ConversationRoleUser},
+			{2, "3", st.ConversationRoleAgent},
 		})
 
-		// common overlap between screens is removed after a user message
-		c.Snapshot("1")
-		agent.screen = "1"
-		assert.NoError(t, sendMsg(c, "2"))
-		c.Snapshot("1\n3")
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "1"),
-			userMsg(1, "2"),
-			agentMsg(2, "3"),
-		}, c.Messages())
-
-		agent.screen = "1\n3x"
-		assert.NoError(t, sendMsg(c, "4"))
-		c.Snapshot("1\n3x\n5")
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "1"),
-			userMsg(1, "2"),
-			agentMsg(2, "3x"),
-			userMsg(3, "4"),
-			agentMsg(4, "5"),
-		}, c.Messages())
+		fillToStable(ctx, t, agent, mClock, "1\n3x", interval, threshold)
+		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"}))
+		fillToStable(ctx, t, agent, mClock, "1\n3x\n5", interval, threshold)
+		assertMessages(t, c, []msgNoTime{
+			{0, "1", st.ConversationRoleAgent},
+			{1, "2", st.ConversationRoleUser},
+			{2, "3x", st.ConversationRoleAgent},
+			{3, "4", st.ConversationRoleUser},
+			{4, "5", st.ConversationRoleAgent},
+		})
 	})
 
 	t.Run("format-message", func(t *testing.T) {
-		agent := &testAgent{}
-		c := newConversation(func(cfg *st.PTYConversationConfig) {
-			cfg.AgentIO = agent
+		c, agent, mClock, ctx := newConversation(t, func(cfg *st.PTYConversationConfig) {
 			cfg.FormatMessage = func(message string, userInput string) string {
 				return message + " " + userInput
 			}
 		})
-		agent.screen = "1"
-		assert.NoError(t, sendMsg(c, "2"))
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "1 "),
-			userMsg(1, "2"),
-		}, c.Messages())
-		agent.screen = "x"
-		c.Snapshot("x")
-		assert.Equal(t, []st.ConversationMessage{
-			agentMsg(0, "1 "),
-			userMsg(1, "2"),
-			agentMsg(2, "x 2"),
-		}, c.Messages())
+
+		// Fill to stable with screen "1", then send.
+		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"}))
+
+		// After send, set screen to "x" and take snapshots for new agent message.
+		fillToStable(ctx, t, agent, mClock, "x", interval, threshold)
+		assertMessages(t, c, []msgNoTime{
+			{0, "1 ", st.ConversationRoleAgent},
+			{1, "2", st.ConversationRoleUser},
+			{2, "x 2", st.ConversationRoleAgent},
+		})
 	})
 
-	t.Run("format-message", func(t *testing.T) {
-		agent := &testAgent{}
-		c := newConversation(func(cfg *st.PTYConversationConfig) {
-			cfg.AgentIO = agent
+	t.Run("format-message-initial", func(t *testing.T) {
+		c, _, _, _ := newConversation(t, func(cfg *st.PTYConversationConfig) {
 			cfg.FormatMessage = func(message string, userInput string) string {
 				return "formatted"
 			}
 		})
-		assert.Equal(t, []st.ConversationMessage{
-			{
-				Id:      0,
-				Message: "",
-				Role:    st.ConversationRoleAgent,
-				Time:    now,
-			},
-		}, c.Messages())
+		assertMessages(t, c, []msgNoTime{
+			{0, "", st.ConversationRoleAgent},
+		})
 	})
 
 	t.Run("send-message-status-check", func(t *testing.T) {
-		c := newConversation(func(cfg *st.PTYConversationConfig) {
+		c, agent, mClock, ctx := newConversation(t, func(cfg *st.PTYConversationConfig) {
 			cfg.SkipSendMessageStatusCheck = false
-			cfg.SnapshotInterval = 1 * time.Second
-			cfg.ScreenStabilityLength = 2 * time.Second
-			cfg.AgentIO = &testAgent{}
 		})
-		assert.ErrorIs(t, sendMsg(c, "1"), st.ErrMessageValidationChanging)
-		for range 3 {
-			c.Snapshot("1")
+
+		sendMsg := func(msg string) error {
+			return c.Send(st.MessagePartText{Content: msg})
 		}
-		assert.NoError(t, sendMsg(c, "4"))
-		c.Snapshot("2")
-		assert.ErrorIs(t, sendMsg(c, "5"), st.ErrMessageValidationChanging)
+
+		// Status is initializing, send should fail.
+		assert.ErrorIs(t, sendMsg("1"), st.ErrMessageValidationChanging)
+
+		// Fill to stable.
+		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		assert.Equal(t, st.ConversationStatusStable, c.Status())
+
+		// Now send should succeed.
+		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"}))
+
+		// After send, screen is dirty. Set to "2" (different from "1") so status is changing.
+		agent.setScreen("2")
+		advancePast(ctx, t, mClock, interval)
+		assert.Equal(t, st.ConversationStatusChanging, c.Status())
+		assert.ErrorIs(t, sendMsg("5"), st.ErrMessageValidationChanging)
 	})
 
 	t.Run("send-message-empty-message", func(t *testing.T) {
-		c := newConversation()
-		assert.ErrorIs(t, sendMsg(c, ""), st.ErrMessageValidationEmpty)
+		c, _, _, _ := newConversation(t)
+		assert.ErrorIs(t, c.Send(st.MessagePartText{Content: ""}), st.ErrMessageValidationEmpty)
 	})
 }
 
 func TestInitialPromptReadiness(t *testing.T) {
-	now := time.Now()
+	discardLogger := slog.New(slog.NewTextHandler(io.Discard, nil))
 
 	t.Run("agent not ready - status remains changing", func(t *testing.T) {
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
-		mClock.Set(now)
+		agent := &testAgent{screen: "loading..."}
 		cfg := st.PTYConversationConfig{
 			Clock:                 mClock,
 			SnapshotInterval:      1 * time.Second,
 			ScreenStabilityLength: 0,
-			AgentIO:               &testAgent{screen: "loading..."},
+			AgentIO:               agent,
 			ReadyForInitialPrompt: func(message string) bool {
 				return message == "ready"
 			},
 			InitialPrompt: []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
+			Logger:        discardLogger,
 		}
-		c := st.NewPTY(context.Background(), cfg)
 
-		// Fill buffer with stable snapshots, but agent is not ready
-		c.Snapshot("loading...")
+		c := st.NewPTY(ctx, cfg)
+		c.Start(ctx)
 
-		// Even though screen is stable, status should be changing because agent is not ready
+		// Take a snapshot with "loading...". Threshold is 1 (stability 0 / interval 1s = 0 + 1 = 1).
+		advancePast(ctx, t, mClock, 1*time.Second)
+
+		// Even though screen is stable, status should be changing because
+		// the initial prompt is still in the outbound queue.
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 	})
 
 	t.Run("agent becomes ready - status stays changing until initial prompt sent", func(t *testing.T) {
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
-		mClock.Set(now)
+		agent := &testAgent{screen: "loading..."}
 		cfg := st.PTYConversationConfig{
 			Clock:                 mClock,
 			SnapshotInterval:      1 * time.Second,
 			ScreenStabilityLength: 0,
-			AgentIO:               &testAgent{screen: "loading..."},
+			AgentIO:               agent,
 			ReadyForInitialPrompt: func(message string) bool {
 				return message == "ready"
 			},
 			InitialPrompt: []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
+			Logger:        discardLogger,
 		}
-		c := st.NewPTY(context.Background(), cfg)
 
-		// Agent not ready initially
-		c.Snapshot("loading...")
+		c := st.NewPTY(ctx, cfg)
+		c.Start(ctx)
+
+		// Agent not ready initially.
+		advancePast(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
-		// Agent becomes ready, but status stays "changing" until initial prompt is sent
-		// This is the new behavior - we don't flip to "stable" then back to "changing"
-		c.Snapshot("ready")
+		// Agent becomes ready, but status stays "changing" because the
+		// initial prompt is still in the outbound queue.
+		agent.setScreen("ready")
+		advancePast(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 	})
 
 	t.Run("initial prompt lifecycle - status stays changing until sent", func(t *testing.T) {
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
-		mClock.Set(now)
 		agent := &testAgent{screen: "loading..."}
+		writeCounter := 0
+		agent.onWrite = func(data []byte) {
+			writeCounter++
+			agent.screen = fmt.Sprintf("__write_%d", writeCounter)
+		}
 		cfg := st.PTYConversationConfig{
-			Clock:                 mClock,
-			SnapshotInterval:      1 * time.Second,
-			ScreenStabilityLength: 0,
-			AgentIO:               agent,
+			Clock:                      mClock,
+			SnapshotInterval:           1 * time.Second,
+			ScreenStabilityLength:      0,
+			AgentIO:                    agent,
 			ReadyForInitialPrompt: func(message string) bool {
 				return message == "ready"
 			},
-			InitialPrompt:              []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
-			SkipWritingMessage:         true,
+			InitialPrompt:             []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
 			SkipSendMessageStatusCheck: true,
+			Logger:                     discardLogger,
 		}
-		c := st.NewPTY(context.Background(), cfg)
 
-		// Initial state: status should be changing while waiting for readiness
-		c.Snapshot("loading...")
-		assert.Equal(t, st.ConversationStatusChanging, c.Status())
+		c := st.NewPTY(ctx, cfg)
+		c.Start(ctx)
 
-		// Agent becomes ready: status still "changing" until initial prompt is actually sent
-		// This prevents the status from flipping "changing" → "stable" → "changing"
-		agent.screen = "ready"
-		c.Snapshot("ready")
+		// Status is "changing" while waiting for readiness.
+		advancePast(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
+
+		// Agent becomes ready. The readiness loop detects this, the snapshot
+		// loop sees queue + stable + ready and signals the send loop.
+		// writeStabilize runs with onWrite changing the screen, so it completes.
+		agent.setScreen("ready")
+		// Drive clock until the initial prompt is sent (queue drains).
+		for i := 0; i < 500; i++ {
+			_, ok := mClock.Peek()
+			if !ok {
+				time.Sleep(1 * time.Millisecond)
+				continue
+			}
+			_, w := mClock.AdvanceNext()
+			w.MustWait(ctx)
+			// Check if the queue has been drained by checking status.
+			// After the initial prompt is sent, last message is user, so status
+			// is "changing". Then after more snapshots, it becomes stable.
+			// We just need to advance until the send loop has processed the message.
+			// A simple heuristic: check if Messages() shows a user message.
+			msgs := c.Messages()
+			if len(msgs) >= 2 {
+				break
+			}
+		}
+
+		// The initial prompt should have been sent. Set a clean screen and
+		// advance enough ticks for the snapshot loop to record it as an
+		// agent message and fill the stability buffer (threshold=1).
+		agent.setScreen("response")
+		advancePast(ctx, t, mClock, 2*time.Second)
+		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 
 	t.Run("no initial prompt - normal status logic applies", func(t *testing.T) {
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
-		mClock.Set(now)
+		agent := &testAgent{screen: "loading..."}
 		cfg := st.PTYConversationConfig{
 			Clock:                 mClock,
 			SnapshotInterval:      1 * time.Second,
 			ScreenStabilityLength: 0,
-			AgentIO:               &testAgent{screen: "loading..."},
+			AgentIO:               agent,
 			ReadyForInitialPrompt: func(message string) bool {
-				return false // Agent never ready
+				return false
 			},
-			// No InitialPrompt set - means no need to wait for readiness
+			Logger: discardLogger,
 		}
-		c := st.NewPTY(context.Background(), cfg)
 
-		c.Snapshot("loading...")
+		c := st.NewPTY(ctx, cfg)
+		c.Start(ctx)
+
+		advancePast(ctx, t, mClock, 1*time.Second)
 
-		// Status should be stable because no initial prompt to wait for
+		// Status should be stable because no initial prompt to wait for.
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 
 	t.Run("no initial prompt configured - normal status logic applies", func(t *testing.T) {
-		// When no InitialPrompt is configured, the conversation behaves as if
-		// the initial prompt has already been sent, so normal status logic applies.
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
-		mClock.Set(now)
 		agent := &testAgent{screen: "ready"}
 		cfg := st.PTYConversationConfig{
-			Clock:                 mClock,
-			SnapshotInterval:      1 * time.Second,
-			ScreenStabilityLength: 2 * time.Second, // threshold = 3
-			AgentIO:               agent,
-			// No InitialPrompt configured - normal status logic applies immediately
-			SkipWritingMessage:         true,
+			Clock:                      mClock,
+			SnapshotInterval:           1 * time.Second,
+			ScreenStabilityLength:      2 * time.Second, // threshold = 3
+			AgentIO:                    agent,
 			SkipSendMessageStatusCheck: true,
+			Logger:                     discardLogger,
 		}
-		c := st.NewPTY(context.Background(), cfg)
 
-		// Fill buffer to reach stability with "ready" screen
-		c.Snapshot("ready")
-		c.Snapshot("ready")
-		c.Snapshot("ready")
-		// Since no initial prompt is configured, screen stability determines status
+		c := st.NewPTY(ctx, cfg)
+		c.Start(ctx)
+
+		// Fill buffer to reach stability with "ready" screen.
+		fillToStable(ctx, t, agent, mClock, "ready", 1*time.Second, 3)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
-		// After screen changes, status becomes changing
-		agent.screen = "processing..."
-		c.Snapshot("processing...")
+		// After screen changes, status becomes changing.
+		agent.setScreen("processing...")
+		advancePast(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
-		// After screen is stable again (3 identical snapshots), status becomes stable
-		c.Snapshot("processing...")
-		c.Snapshot("processing...")
+		// After screen is stable again (3 identical snapshots), status becomes stable.
+		advancePast(ctx, t, mClock, 1*time.Second)
+		advancePast(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 }

From c17e6d7d06e62b026ea803f239b1a836d9b14bcf Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 11:52:03 +0000
Subject: [PATCH 29/41] Remove SkipSendMessageStatusCheck from
 PTYConversationConfig

---
 lib/screentracker/pty_conversation.go      | 5 +----
 lib/screentracker/pty_conversation_test.go | 7 +------
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index 22f709a..e5f5c55 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -62,9 +62,6 @@ type PTYConversationConfig struct {
 	// Function to format the messages received from the agent
 	// userInput is the last user message
 	FormatMessage func(message string, userInput string) string
-	// SkipSendMessageStatusCheck skips the check for whether the message can be sent.
-	// This is used in tests
-	SkipSendMessageStatusCheck bool
 	// ReadyForInitialPrompt detects whether the agent has initialized and is ready to accept the initial prompt
 	ReadyForInitialPrompt func(message string) bool
 	// FormatToolCall removes the coder report_task tool call from the agent message and also returns the array of removed tool calls
@@ -288,7 +285,7 @@ func (c *PTYConversation) Send(messageParts ...MessagePart) error {
 	}
 
 	c.lock.Lock()
-	if !c.cfg.SkipSendMessageStatusCheck && c.statusLocked() != ConversationStatusStable {
+	if c.statusLocked() != ConversationStatusStable {
 		c.lock.Unlock()
 		return ErrMessageValidationChanging
 	}
diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 4fceef4..6e829e8 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -267,7 +267,6 @@ func TestMessages(t *testing.T) {
 			AgentIO:                    agent,
 			SnapshotInterval:           100 * time.Millisecond,
 			ScreenStabilityLength:      200 * time.Millisecond,
-			SkipSendMessageStatusCheck: true,
 			Logger:                     slog.New(slog.NewTextHandler(io.Discard, nil)),
 		}
 		for _, opt := range opts {
@@ -426,9 +425,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("send-message-status-check", func(t *testing.T) {
-		c, agent, mClock, ctx := newConversation(t, func(cfg *st.PTYConversationConfig) {
-			cfg.SkipSendMessageStatusCheck = false
-		})
+		c, agent, mClock, ctx := newConversation(t)
 
 		sendMsg := func(msg string) error {
 			return c.Send(st.MessagePartText{Content: msg})
@@ -538,7 +535,6 @@ func TestInitialPromptReadiness(t *testing.T) {
 				return message == "ready"
 			},
 			InitialPrompt:             []st.MessagePart{st.MessagePartText{Content: "initial prompt here"}},
-			SkipSendMessageStatusCheck: true,
 			Logger:                     discardLogger,
 		}
 
@@ -616,7 +612,6 @@ func TestInitialPromptReadiness(t *testing.T) {
 			SnapshotInterval:           1 * time.Second,
 			ScreenStabilityLength:      2 * time.Second, // threshold = 3
 			AgentIO:                    agent,
-			SkipSendMessageStatusCheck: true,
 			Logger:                     discardLogger,
 		}
 

From c073f0428671ef7951ee5676f1d46ad9772aaf79 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 12:43:40 +0000
Subject: [PATCH 30/41] Reorder readiness check for short-circuit and expand
 re-apply comment

---
 lib/screentracker/pty_conversation.go | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/lib/screentracker/pty_conversation.go b/lib/screentracker/pty_conversation.go
index e5f5c55..551ea50 100644
--- a/lib/screentracker/pty_conversation.go
+++ b/lib/screentracker/pty_conversation.go
@@ -176,7 +176,7 @@ func (c *PTYConversation) Start(ctx context.Context) {
 			isReady = true
 		default:
 		}
-		if len(c.outboundQueue) > 0 && c.isScreenStableLocked() && isReady {
+		if isReady && len(c.outboundQueue) > 0 && c.isScreenStableLocked() {
 			select {
 			case c.stableSignal <- struct{}{}:
 			default:
@@ -317,9 +317,15 @@ func (c *PTYConversation) sendMessage(ctx context.Context, messageParts ...Messa
 	}
 
 	c.lock.Lock()
-	// Re-apply the pre-send screen to the agent message. While the lock
-	// was released during writeStabilize, the snapshot loop may have
-	// overwritten the agent message with intermediate screen content.
+	// Re-apply the pre-send agent message from the screen captured before
+	// the write. While the lock was released during writeStabilize, the
+	// snapshot loop continued taking snapshots and calling
+	// updateLastAgentMessageLocked with whatever was on screen at each
+	// tick (typically echoed user input or intermediate terminal state).
+	// Those updates corrupt the agent message for this turn. Restoring it
+	// here ensures the conversation history is correct. The next line sets
+	// screenBeforeLastUserMessage so the *next* agent message will be
+	// diffed relative to the pre-send screen.
 	c.updateLastAgentMessageLocked(screenBeforeMessage, now)
 	c.screenBeforeLastUserMessage = screenBeforeMessage
 	c.messages = append(c.messages, ConversationMessage{

From 429f35d337d46094c85476e1943d5a5a006c4a67 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 12:46:00 +0000
Subject: [PATCH 31/41] refactor: simplify test helpers in
 pty_conversation_test.go

- Pass context.Context into newConversation, return 3 values
- Simplify advancePast: remove retry loop and time.Sleep
- Simplify sendWithClockDrive: remove Peek polling and time.Sleep
---
 lib/screentracker/pty_conversation_test.go | 67 ++++++++--------------
 1 file changed, 23 insertions(+), 44 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 6e829e8..df8ed6c 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -49,30 +49,14 @@ func (a *testAgent) setScreen(s string) {
 func advancePast(ctx context.Context, t *testing.T, mClock *quartz.Mock, total time.Duration) {
 	t.Helper()
 	target := mClock.Now().Add(total)
-	noEventRetries := 0
 	for mClock.Now().Before(target) {
 		remaining := target.Sub(mClock.Now())
 		d, ok := mClock.Peek()
-		if !ok {
-			// No events pending. A background goroutine may be about to
-			// register a timer, so retry a few times before jumping.
-			noEventRetries++
-			if noEventRetries > 50 {
-				mClock.Advance(remaining).MustWait(ctx)
-				return
-			}
-			time.Sleep(1 * time.Millisecond)
-			continue
-		}
-		noEventRetries = 0
-		if d > remaining {
-			// Next event is past our target; safe to jump.
+		if !ok || d > remaining {
 			mClock.Advance(remaining).MustWait(ctx)
 			return
 		}
-		// Step to the next event.
-		_, w := mClock.AdvanceNext()
-		w.MustWait(ctx)
+		mClock.Advance(d).MustWait(ctx)
 	}
 }
 
@@ -95,8 +79,6 @@ func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation
 	go func() {
 		errCh <- c.Send(parts...)
 	}()
-	// Give the goroutine a moment to start and enqueue.
-	time.Sleep(1 * time.Millisecond)
 	deadline := time.Now().Add(10 * time.Second)
 	for time.Now().Before(deadline) {
 		select {
@@ -104,17 +86,6 @@ func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation
 			return err
 		default:
 		}
-		_, ok := mClock.Peek()
-		if !ok {
-			// No mock events yet; the send loop goroutine may still be
-			// setting up timers. Wait briefly for it to schedule.
-			time.Sleep(5 * time.Millisecond)
-			continue
-		}
-		// Use AdvanceNext instead of Advance(d) to avoid a race: between
-		// Peek() and Advance(d), a goroutine may create a closer timer,
-		// making Advance(d) illegal. AdvanceNext always advances to the
-		// nearest event regardless.
 		_, w := mClock.AdvanceNext()
 		w.MustWait(ctx)
 	}
@@ -247,10 +218,8 @@ func TestMessages(t *testing.T) {
 
 	// newConversation creates a started conversation with a mock clock and
 	// testAgent. Tests that Send() messages must use sendWithClockDrive.
-	newConversation := func(t *testing.T, opts ...func(*st.PTYConversationConfig)) (*st.PTYConversation, *testAgent, *quartz.Mock, context.Context) {
+	newConversation := func(ctx context.Context, t *testing.T, opts ...func(*st.PTYConversationConfig)) (*st.PTYConversation, *testAgent, *quartz.Mock) {
 		t.Helper()
-		ctx, cancel := context.WithCancel(context.Background())
-		t.Cleanup(cancel)
 
 		writeCounter := 0
 		agent := &testAgent{}
@@ -279,7 +248,7 @@ func TestMessages(t *testing.T) {
 		c := st.NewPTY(ctx, cfg)
 		c.Start(ctx)
 
-		return c, agent, mClock, ctx
+		return c, agent, mClock
 	}
 
 	// threshold = 3 (200ms / 100ms = 2, + 1 = 3)
@@ -287,7 +256,7 @@ func TestMessages(t *testing.T) {
 	const interval = 100 * time.Millisecond
 
 	t.Run("messages are copied", func(t *testing.T) {
-		c, _, _, _ := newConversation(t)
+		c, _, _ := newConversation(context.Background(), t)
 		messages := c.Messages()
 		assertMessages(t, c, []msgNoTime{{0, "", st.ConversationRoleAgent}})
 
@@ -297,7 +266,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("whitespace-padding", func(t *testing.T) {
-		c, _, _, _ := newConversation(t)
+		c, _, _ := newConversation(context.Background(), t)
 		for _, msg := range []string{"123 ", " 123", "123\t\t", "\n123", "123\n\t", " \t123\n\t"} {
 			err := c.Send(st.MessagePartText{Content: msg})
 			assert.ErrorIs(t, err, st.ErrMessageValidationWhitespace)
@@ -305,7 +274,9 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("no-change-no-message-update", func(t *testing.T) {
-		c, agent, mClock, ctx := newConversation(t)
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
+		c, agent, mClock := newConversation(ctx, t)
 
 		agent.setScreen("1")
 		advancePast(ctx, t, mClock, interval)
@@ -317,7 +288,9 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("tracking messages", func(t *testing.T) {
-		c, agent, mClock, ctx := newConversation(t)
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
+		c, agent, mClock := newConversation(ctx, t)
 
 		// Agent message is recorded when the first snapshot is taken.
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
@@ -369,7 +342,9 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("tracking messages overlap", func(t *testing.T) {
-		c, agent, mClock, ctx := newConversation(t)
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
+		c, agent, mClock := newConversation(ctx, t)
 
 		// Common overlap between screens is removed after a user message.
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
@@ -394,7 +369,9 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("format-message", func(t *testing.T) {
-		c, agent, mClock, ctx := newConversation(t, func(cfg *st.PTYConversationConfig) {
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
+		c, agent, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) {
 			cfg.FormatMessage = func(message string, userInput string) string {
 				return message + " " + userInput
 			}
@@ -414,7 +391,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("format-message-initial", func(t *testing.T) {
-		c, _, _, _ := newConversation(t, func(cfg *st.PTYConversationConfig) {
+		c, _, _ := newConversation(context.Background(), t, func(cfg *st.PTYConversationConfig) {
 			cfg.FormatMessage = func(message string, userInput string) string {
 				return "formatted"
 			}
@@ -425,7 +402,9 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("send-message-status-check", func(t *testing.T) {
-		c, agent, mClock, ctx := newConversation(t)
+		ctx, cancel := context.WithCancel(context.Background())
+		t.Cleanup(cancel)
+		c, agent, mClock := newConversation(ctx, t)
 
 		sendMsg := func(msg string) error {
 			return c.Send(st.MessagePartText{Content: msg})
@@ -449,7 +428,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("send-message-empty-message", func(t *testing.T) {
-		c, _, _, _ := newConversation(t)
+		c, _, _ := newConversation(context.Background(), t)
 		assert.ErrorIs(t, c.Send(st.MessagePartText{Content: ""}), st.ErrMessageValidationEmpty)
 	})
 }

From 85d39653787b6a7509a4cb89d4d28d2893484b04 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 13:08:34 +0000
Subject: [PATCH 32/41] Replace wall-clock deadline with context timeout in
 sendWithClockDrive tests

---
 lib/screentracker/pty_conversation_test.go | 29 +++++++++++-----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index df8ed6c..8cdc1fe 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -16,6 +16,8 @@ import (
 	st "github.com/coder/agentapi/lib/screentracker"
 )
 
+const testTimeout = 10 * time.Second
+
 // testAgent is a goroutine-safe mock implementation of AgentIO.
 type testAgent struct {
 	mu      sync.Mutex
@@ -79,8 +81,7 @@ func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation
 	go func() {
 		errCh <- c.Send(parts...)
 	}()
-	deadline := time.Now().Add(10 * time.Second)
-	for time.Now().Before(deadline) {
+	for {
 		select {
 		case err := <-errCh:
 			return err
@@ -89,8 +90,6 @@ func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation
 		_, w := mClock.AdvanceNext()
 		w.MustWait(ctx)
 	}
-	t.Fatal("sendWithClockDrive timed out")
-	return nil
 }
 
 // msgNoTime is a ConversationMessage without the Time field for easy comparison.
@@ -123,7 +122,7 @@ type statusTestParams struct {
 }
 
 func statusTest(t *testing.T, params statusTestParams) {
-	ctx, cancel := context.WithCancel(context.Background())
+	ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 	t.Cleanup(cancel)
 	t.Run(fmt.Sprintf("interval-%s,stability_length-%s", params.cfg.SnapshotInterval, params.cfg.ScreenStabilityLength), func(t *testing.T) {
 		mClock := quartz.NewMock(t)
@@ -274,7 +273,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("no-change-no-message-update", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		c, agent, mClock := newConversation(ctx, t)
 
@@ -288,7 +287,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("tracking messages", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		c, agent, mClock := newConversation(ctx, t)
 
@@ -342,7 +341,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("tracking messages overlap", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		c, agent, mClock := newConversation(ctx, t)
 
@@ -369,7 +368,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("format-message", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		c, agent, mClock := newConversation(ctx, t, func(cfg *st.PTYConversationConfig) {
 			cfg.FormatMessage = func(message string, userInput string) string {
@@ -402,7 +401,7 @@ func TestMessages(t *testing.T) {
 	})
 
 	t.Run("send-message-status-check", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		c, agent, mClock := newConversation(ctx, t)
 
@@ -437,7 +436,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 	discardLogger := slog.New(slog.NewTextHandler(io.Discard, nil))
 
 	t.Run("agent not ready - status remains changing", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
 		agent := &testAgent{screen: "loading..."}
@@ -465,7 +464,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 	})
 
 	t.Run("agent becomes ready - status stays changing until initial prompt sent", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
 		agent := &testAgent{screen: "loading..."}
@@ -496,7 +495,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 	})
 
 	t.Run("initial prompt lifecycle - status stays changing until sent", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
 		agent := &testAgent{screen: "loading..."}
@@ -557,7 +556,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 	})
 
 	t.Run("no initial prompt - normal status logic applies", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
 		agent := &testAgent{screen: "loading..."}
@@ -582,7 +581,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 	})
 
 	t.Run("no initial prompt configured - normal status logic applies", func(t *testing.T) {
-		ctx, cancel := context.WithCancel(context.Background())
+		ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
 		t.Cleanup(cancel)
 		mClock := quartz.NewMock(t)
 		agent := &testAgent{screen: "ready"}

From a5edc13bf437dd7baa8cb93351535a5396cdcdad Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 13:21:11 +0000
Subject: [PATCH 33/41] Remove msgNoTime/stripTimes, assert full
 ConversationMessage values

- Replace time.Now() with fixed time.Date(2025, 1, 1, ...)
- Delete msgNoTime type and stripTimes function
- Rewrite assertMessages to compare full ConversationMessage fields
  with flexible time checking (zero = assert non-zero)
- Update all call sites to use []st.ConversationMessage with named fields
---
 lib/screentracker/pty_conversation_test.go | 108 +++++++++++----------
 1 file changed, 55 insertions(+), 53 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 8cdc1fe..1dffbde 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -92,24 +92,20 @@ func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation
 	}
 }
 
-// msgNoTime is a ConversationMessage without the Time field for easy comparison.
-type msgNoTime struct {
-	Id      int
-	Message string
-	Role    st.ConversationRole
-}
-
-func stripTimes(msgs []st.ConversationMessage) []msgNoTime {
-	result := make([]msgNoTime, len(msgs))
-	for i, m := range msgs {
-		result[i] = msgNoTime{Id: m.Id, Message: m.Message, Role: m.Role}
-	}
-	return result
-}
-
-func assertMessages(t *testing.T, c *st.PTYConversation, expected []msgNoTime) {
+func assertMessages(t *testing.T, c *st.PTYConversation, expected []st.ConversationMessage) {
 	t.Helper()
-	assert.Equal(t, expected, stripTimes(c.Messages()))
+	actual := c.Messages()
+	require.Len(t, actual, len(expected))
+	for i := range expected {
+		assert.Equal(t, expected[i].Id, actual[i].Id, "message %d Id", i)
+		assert.Equal(t, expected[i].Message, actual[i].Message, "message %d Message", i)
+		assert.Equal(t, expected[i].Role, actual[i].Role, "message %d Role", i)
+		if expected[i].Time.IsZero() {
+			assert.False(t, actual[i].Time.IsZero(), "message %d Time should be non-zero", i)
+		} else {
+			assert.Equal(t, expected[i].Time, actual[i].Time, "message %d Time", i)
+		}
+	}
 }
 
 type statusTestStep struct {
@@ -213,7 +209,7 @@ func TestConversation(t *testing.T) {
 }
 
 func TestMessages(t *testing.T) {
-	now := time.Now()
+	now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
 
 	// newConversation creates a started conversation with a mock clock and
 	// testAgent. Tests that Send() messages must use sendWithClockDrive.
@@ -257,11 +253,15 @@ func TestMessages(t *testing.T) {
 	t.Run("messages are copied", func(t *testing.T) {
 		c, _, _ := newConversation(context.Background(), t)
 		messages := c.Messages()
-		assertMessages(t, c, []msgNoTime{{0, "", st.ConversationRoleAgent}})
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "", Role: st.ConversationRoleAgent},
+		})
 
 		messages[0].Message = "modification"
 
-		assertMessages(t, c, []msgNoTime{{0, "", st.ConversationRoleAgent}})
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "", Role: st.ConversationRoleAgent},
+		})
 	})
 
 	t.Run("whitespace-padding", func(t *testing.T) {
@@ -279,11 +279,13 @@ func TestMessages(t *testing.T) {
 
 		agent.setScreen("1")
 		advancePast(ctx, t, mClock, interval)
-		msgs := stripTimes(c.Messages())
-		assert.Equal(t, []msgNoTime{{0, "1", st.ConversationRoleAgent}}, msgs)
+		msgs := c.Messages()
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
+		})
 
 		advancePast(ctx, t, mClock, interval)
-		assert.Equal(t, msgs, stripTimes(c.Messages()))
+		assert.Equal(t, msgs, c.Messages())
 	})
 
 	t.Run("tracking messages", func(t *testing.T) {
@@ -293,15 +295,15 @@ func TestMessages(t *testing.T) {
 
 		// Agent message is recorded when the first snapshot is taken.
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
-		assertMessages(t, c, []msgNoTime{
-			{0, "1", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 		})
 
 		// Agent message is updated when the screen changes.
 		agent.setScreen("2")
 		advancePast(ctx, t, mClock, interval)
-		assertMessages(t, c, []msgNoTime{
-			{0, "2", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
 		})
 
 		// Fill to stable so Send can proceed (screen is "2").
@@ -312,10 +314,10 @@ func TestMessages(t *testing.T) {
 
 		// After send, screen is dirty from writeStabilize. Set to "4" and stabilize.
 		fillToStable(ctx, t, agent, mClock, "4", interval, threshold)
-		assertMessages(t, c, []msgNoTime{
-			{0, "2", st.ConversationRoleAgent},
-			{1, "3", st.ConversationRoleUser},
-			{2, "4", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
+			{Id: 1, Message: "3", Role: st.ConversationRoleUser},
+			{Id: 2, Message: "4", Role: st.ConversationRoleAgent},
 		})
 
 		// Agent message is updated when the screen changes before a user message.
@@ -323,12 +325,12 @@ func TestMessages(t *testing.T) {
 		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "6"}))
 
 		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
-		assertMessages(t, c, []msgNoTime{
-			{0, "2", st.ConversationRoleAgent},
-			{1, "3", st.ConversationRoleUser},
-			{2, "5", st.ConversationRoleAgent},
-			{3, "6", st.ConversationRoleUser},
-			{4, "7", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
+			{Id: 1, Message: "3", Role: st.ConversationRoleUser},
+			{Id: 2, Message: "5", Role: st.ConversationRoleAgent},
+			{Id: 3, Message: "6", Role: st.ConversationRoleUser},
+			{Id: 4, Message: "7", Role: st.ConversationRoleAgent},
 		})
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
@@ -349,21 +351,21 @@ func TestMessages(t *testing.T) {
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
 		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"}))
 		fillToStable(ctx, t, agent, mClock, "1\n3", interval, threshold)
-		assertMessages(t, c, []msgNoTime{
-			{0, "1", st.ConversationRoleAgent},
-			{1, "2", st.ConversationRoleUser},
-			{2, "3", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
+			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
+			{Id: 2, Message: "3", Role: st.ConversationRoleAgent},
 		})
 
 		fillToStable(ctx, t, agent, mClock, "1\n3x", interval, threshold)
 		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"}))
 		fillToStable(ctx, t, agent, mClock, "1\n3x\n5", interval, threshold)
-		assertMessages(t, c, []msgNoTime{
-			{0, "1", st.ConversationRoleAgent},
-			{1, "2", st.ConversationRoleUser},
-			{2, "3x", st.ConversationRoleAgent},
-			{3, "4", st.ConversationRoleUser},
-			{4, "5", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
+			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
+			{Id: 2, Message: "3x", Role: st.ConversationRoleAgent},
+			{Id: 3, Message: "4", Role: st.ConversationRoleUser},
+			{Id: 4, Message: "5", Role: st.ConversationRoleAgent},
 		})
 	})
 
@@ -382,10 +384,10 @@ func TestMessages(t *testing.T) {
 
 		// After send, set screen to "x" and take snapshots for new agent message.
 		fillToStable(ctx, t, agent, mClock, "x", interval, threshold)
-		assertMessages(t, c, []msgNoTime{
-			{0, "1 ", st.ConversationRoleAgent},
-			{1, "2", st.ConversationRoleUser},
-			{2, "x 2", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "1 ", Role: st.ConversationRoleAgent},
+			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
+			{Id: 2, Message: "x 2", Role: st.ConversationRoleAgent},
 		})
 	})
 
@@ -395,8 +397,8 @@ func TestMessages(t *testing.T) {
 				return "formatted"
 			}
 		})
-		assertMessages(t, c, []msgNoTime{
-			{0, "", st.ConversationRoleAgent},
+		assertMessages(t, c, []st.ConversationMessage{
+			{Id: 0, Message: "", Role: st.ConversationRoleAgent},
 		})
 	})
 

From 38f119df50a6b295d857f2b897de82e9cb2aaadc Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 14:04:51 +0000
Subject: [PATCH 34/41] Simplify assertMessages to use require.Equal instead of
 per-field loop

---
 lib/screentracker/pty_conversation_test.go | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 1dffbde..b54fc55 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -95,17 +95,11 @@ func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation
 func assertMessages(t *testing.T, c *st.PTYConversation, expected []st.ConversationMessage) {
 	t.Helper()
 	actual := c.Messages()
-	require.Len(t, actual, len(expected))
-	for i := range expected {
-		assert.Equal(t, expected[i].Id, actual[i].Id, "message %d Id", i)
-		assert.Equal(t, expected[i].Message, actual[i].Message, "message %d Message", i)
-		assert.Equal(t, expected[i].Role, actual[i].Role, "message %d Role", i)
-		if expected[i].Time.IsZero() {
-			assert.False(t, actual[i].Time.IsZero(), "message %d Time should be non-zero", i)
-		} else {
-			assert.Equal(t, expected[i].Time, actual[i].Time, "message %d Time", i)
-		}
+	for i := range actual {
+		require.False(t, actual[i].Time.IsZero(), "message %d Time should be non-zero", i)
+		actual[i].Time = time.Time{}
 	}
+	require.Equal(t, expected, actual)
 }
 
 type statusTestStep struct {

From 9a1b2387173c42918a7caaad6677de41a9ba6621 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 14:14:34 +0000
Subject: [PATCH 35/41] screentracker test: add ctx.Done select and onWrite
 comment

---
 lib/screentracker/pty_conversation_test.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index b54fc55..ed67f8d 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -22,6 +22,9 @@ const testTimeout = 10 * time.Second
 type testAgent struct {
 	mu      sync.Mutex
 	screen  string
+	// onWrite is called during Write to simulate the agent reacting to
+	// terminal input (e.g., changing the screen), which unblocks
+	// writeStabilize's polling loops.
 	onWrite func(data []byte)
 }
 
@@ -85,6 +88,8 @@ func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation
 		select {
 		case err := <-errCh:
 			return err
+		case <-ctx.Done():
+			return ctx.Err()
 		default:
 		}
 		_, w := mClock.AdvanceNext()

From 8ca7856b4bea7a28cf7bcafaeb32075e56270bc0 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 14:39:17 +0000
Subject: [PATCH 36/41] screentracker test: extract driveClockUntil helper

Extract a shared driveClockUntil helper that advances the mock clock
one event at a time until a condition is met. Use it in both
sendWithClockDrive and the initial prompt lifecycle test, replacing
the ad-hoc 500-iteration loop.

Also change sendWithClockDrive to return nothing (instead of error)
since all call sites were wrapping it with require.NoError. The
error check now happens inside via require.NoError.
---
 lib/screentracker/pty_conversation_test.go | 77 ++++++++++++----------
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index ed67f8d..6fb289a 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -75,26 +75,52 @@ func fillToStable(ctx context.Context, t *testing.T, agent *testAgent, mClock *q
 	}
 }
 
+// driveClockUntil advances the mock clock one event at a time until done
+// returns true or the context is cancelled.
+func driveClockUntil(ctx context.Context, t *testing.T, mClock *quartz.Mock, done func() bool) {
+	t.Helper()
+	for i := 0; ; i++ {
+		if done() {
+			return
+		}
+		select {
+		case <-ctx.Done():
+			t.Fatal("context cancelled waiting for condition")
+		default:
+		}
+		_, ok := mClock.Peek()
+		if !ok {
+			time.Sleep(time.Millisecond)
+			continue
+		}
+		_, w := mClock.AdvanceNext()
+		w.MustWait(ctx)
+		// Periodically yield so conversation goroutines can process
+		// clock events between advances.
+		if i%10 == 0 {
+			time.Sleep(time.Millisecond)
+		}
+	}
+}
+
 // sendWithClockDrive calls Send() in a goroutine and advances the mock clock
 // until Send completes. This drives the snapshot loop (which signals
 // stableSignal) and writeStabilize (which creates mock timers).
-func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation, mClock *quartz.Mock, parts ...st.MessagePart) error {
+func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation, mClock *quartz.Mock, parts ...st.MessagePart) {
 	t.Helper()
 	errCh := make(chan error, 1)
 	go func() {
 		errCh <- c.Send(parts...)
 	}()
-	for {
+	driveClockUntil(ctx, t, mClock, func() bool {
 		select {
 		case err := <-errCh:
-			return err
-		case <-ctx.Done():
-			return ctx.Err()
+			require.NoError(t, err)
+			return true
 		default:
+			return false
 		}
-		_, w := mClock.AdvanceNext()
-		w.MustWait(ctx)
-	}
+	})
 }
 
 func assertMessages(t *testing.T, c *st.PTYConversation, expected []st.ConversationMessage) {
@@ -309,7 +335,7 @@ func TestMessages(t *testing.T) {
 		fillToStable(ctx, t, agent, mClock, "2", interval, threshold)
 
 		// User message is recorded.
-		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "3"}))
+		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "3"})
 
 		// After send, screen is dirty from writeStabilize. Set to "4" and stabilize.
 		fillToStable(ctx, t, agent, mClock, "4", interval, threshold)
@@ -321,7 +347,7 @@ func TestMessages(t *testing.T) {
 
 		// Agent message is updated when the screen changes before a user message.
 		fillToStable(ctx, t, agent, mClock, "5", interval, threshold)
-		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "6"}))
+		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "6"})
 
 		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
 		assertMessages(t, c, []st.ConversationMessage{
@@ -334,7 +360,7 @@ func TestMessages(t *testing.T) {
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// Send another message.
-		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "8"}))
+		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "8"})
 
 		// After filling to stable, messages and status are correct.
 		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
@@ -348,7 +374,7 @@ func TestMessages(t *testing.T) {
 
 		// Common overlap between screens is removed after a user message.
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
-		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"}))
+		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
 		fillToStable(ctx, t, agent, mClock, "1\n3", interval, threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
@@ -357,7 +383,7 @@ func TestMessages(t *testing.T) {
 		})
 
 		fillToStable(ctx, t, agent, mClock, "1\n3x", interval, threshold)
-		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"}))
+		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
 		fillToStable(ctx, t, agent, mClock, "1\n3x\n5", interval, threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
@@ -379,7 +405,7 @@ func TestMessages(t *testing.T) {
 
 		// Fill to stable with screen "1", then send.
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
-		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"}))
+		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
 
 		// After send, set screen to "x" and take snapshots for new agent message.
 		fillToStable(ctx, t, agent, mClock, "x", interval, threshold)
@@ -418,7 +444,7 @@ func TestMessages(t *testing.T) {
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// Now send should succeed.
-		require.NoError(t, sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"}))
+		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
 
 		// After send, screen is dirty. Set to "2" (different from "1") so status is changing.
 		agent.setScreen("2")
@@ -529,24 +555,9 @@ func TestInitialPromptReadiness(t *testing.T) {
 		// writeStabilize runs with onWrite changing the screen, so it completes.
 		agent.setScreen("ready")
 		// Drive clock until the initial prompt is sent (queue drains).
-		for i := 0; i < 500; i++ {
-			_, ok := mClock.Peek()
-			if !ok {
-				time.Sleep(1 * time.Millisecond)
-				continue
-			}
-			_, w := mClock.AdvanceNext()
-			w.MustWait(ctx)
-			// Check if the queue has been drained by checking status.
-			// After the initial prompt is sent, last message is user, so status
-			// is "changing". Then after more snapshots, it becomes stable.
-			// We just need to advance until the send loop has processed the message.
-			// A simple heuristic: check if Messages() shows a user message.
-			msgs := c.Messages()
-			if len(msgs) >= 2 {
-				break
-			}
-		}
+		driveClockUntil(ctx, t, mClock, func() bool {
+			return len(c.Messages()) >= 2
+		})
 
 		// The initial prompt should have been sent. Set a clean screen and
 		// advance enough ticks for the snapshot loop to record it as an

From a134116a4d44bd69110f545dd7955c205a087b74 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 17:26:16 +0000
Subject: [PATCH 37/41] fix: rewrite util.WaitFor to use a single timer

---
 lib/screentracker/pty_conversation_test.go | 49 +++++++------------
 lib/util/util.go                           | 55 +++++++++-------------
 2 files changed, 40 insertions(+), 64 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 6fb289a..da288d7 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -75,44 +75,31 @@ func fillToStable(ctx context.Context, t *testing.T, agent *testAgent, mClock *q
 	}
 }
 
-// driveClockUntil advances the mock clock one event at a time until done
-// returns true or the context is cancelled.
-func driveClockUntil(ctx context.Context, t *testing.T, mClock *quartz.Mock, done func() bool) {
+// advanceUntil advances the mock clock one event at a time until done returns
+// true. Because the snapshot TickerFunc is always pending and WaitFor reuses a
+// single timer via Reset, there is always at least one event to advance.
+func advanceUntil(ctx context.Context, t *testing.T, mClock *quartz.Mock, done func() bool) {
 	t.Helper()
-	for i := 0; ; i++ {
-		if done() {
-			return
-		}
+	for !done() {
 		select {
 		case <-ctx.Done():
 			t.Fatal("context cancelled waiting for condition")
 		default:
 		}
-		_, ok := mClock.Peek()
-		if !ok {
-			time.Sleep(time.Millisecond)
-			continue
-		}
 		_, w := mClock.AdvanceNext()
 		w.MustWait(ctx)
-		// Periodically yield so conversation goroutines can process
-		// clock events between advances.
-		if i%10 == 0 {
-			time.Sleep(time.Millisecond)
-		}
 	}
 }
 
-// sendWithClockDrive calls Send() in a goroutine and advances the mock clock
-// until Send completes. This drives the snapshot loop (which signals
-// stableSignal) and writeStabilize (which creates mock timers).
-func sendWithClockDrive(ctx context.Context, t *testing.T, c *st.PTYConversation, mClock *quartz.Mock, parts ...st.MessagePart) {
+// sendAndAdvance calls Send() in a goroutine and advances the mock clock until
+// Send completes.
+func sendAndAdvance(ctx context.Context, t *testing.T, c *st.PTYConversation, mClock *quartz.Mock, parts ...st.MessagePart) {
 	t.Helper()
 	errCh := make(chan error, 1)
 	go func() {
 		errCh <- c.Send(parts...)
 	}()
-	driveClockUntil(ctx, t, mClock, func() bool {
+	advanceUntil(ctx, t, mClock, func() bool {
 		select {
 		case err := <-errCh:
 			require.NoError(t, err)
@@ -237,7 +224,7 @@ func TestMessages(t *testing.T) {
 	now := time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
 
 	// newConversation creates a started conversation with a mock clock and
-	// testAgent. Tests that Send() messages must use sendWithClockDrive.
+	// testAgent. Tests that Send() messages must use sendAndAdvance.
 	newConversation := func(ctx context.Context, t *testing.T, opts ...func(*st.PTYConversationConfig)) (*st.PTYConversation, *testAgent, *quartz.Mock) {
 		t.Helper()
 
@@ -335,7 +322,7 @@ func TestMessages(t *testing.T) {
 		fillToStable(ctx, t, agent, mClock, "2", interval, threshold)
 
 		// User message is recorded.
-		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "3"})
+		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "3"})
 
 		// After send, screen is dirty from writeStabilize. Set to "4" and stabilize.
 		fillToStable(ctx, t, agent, mClock, "4", interval, threshold)
@@ -347,7 +334,7 @@ func TestMessages(t *testing.T) {
 
 		// Agent message is updated when the screen changes before a user message.
 		fillToStable(ctx, t, agent, mClock, "5", interval, threshold)
-		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "6"})
+		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "6"})
 
 		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
 		assertMessages(t, c, []st.ConversationMessage{
@@ -360,7 +347,7 @@ func TestMessages(t *testing.T) {
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// Send another message.
-		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "8"})
+		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "8"})
 
 		// After filling to stable, messages and status are correct.
 		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
@@ -374,7 +361,7 @@ func TestMessages(t *testing.T) {
 
 		// Common overlap between screens is removed after a user message.
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
-		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
+		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
 		fillToStable(ctx, t, agent, mClock, "1\n3", interval, threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
@@ -383,7 +370,7 @@ func TestMessages(t *testing.T) {
 		})
 
 		fillToStable(ctx, t, agent, mClock, "1\n3x", interval, threshold)
-		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
+		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
 		fillToStable(ctx, t, agent, mClock, "1\n3x\n5", interval, threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
@@ -405,7 +392,7 @@ func TestMessages(t *testing.T) {
 
 		// Fill to stable with screen "1", then send.
 		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
-		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
+		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
 
 		// After send, set screen to "x" and take snapshots for new agent message.
 		fillToStable(ctx, t, agent, mClock, "x", interval, threshold)
@@ -444,7 +431,7 @@ func TestMessages(t *testing.T) {
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// Now send should succeed.
-		sendWithClockDrive(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
+		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
 
 		// After send, screen is dirty. Set to "2" (different from "1") so status is changing.
 		agent.setScreen("2")
@@ -555,7 +542,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 		// writeStabilize runs with onWrite changing the screen, so it completes.
 		agent.setScreen("ready")
 		// Drive clock until the initial prompt is sent (queue drains).
-		driveClockUntil(ctx, t, mClock, func() bool {
+		advanceUntil(ctx, t, mClock, func() bool {
 			return len(c.Messages()) >= 2
 		})
 
diff --git a/lib/util/util.go b/lib/util/util.go
index bbd70d5..4c8be4b 100644
--- a/lib/util/util.go
+++ b/lib/util/util.go
@@ -23,6 +23,8 @@ var WaitTimedOut = xerrors.New("timeout waiting for condition")
 
 // WaitFor waits for a condition to be true or the timeout to expire.
 // It will wait for the condition to be true with exponential backoff.
+// A single sleep timer is reused across iterations via Reset so that
+// mock-clock tests always have a pending timer to advance.
 func WaitFor(ctx context.Context, timeout WaitTimeout, condition func() (bool, error)) error {
 	clock := timeout.Clock
 	if clock == nil {
@@ -41,53 +43,40 @@ func WaitFor(ctx context.Context, timeout WaitTimeout, condition func() (bool, e
 	if timeoutDuration == 0 {
 		timeoutDuration = 10 * time.Second
 	}
-	timeoutTimer := clock.NewTimer(timeoutDuration)
-	defer timeoutTimer.Stop()
-
 	if minInterval > maxInterval {
 		return xerrors.Errorf("minInterval is greater than maxInterval")
 	}
 
+	timeoutTimer := clock.NewTimer(timeoutDuration)
+	defer timeoutTimer.Stop()
+
 	interval := minInterval
-	if timeout.InitialWait {
-		initialTimer := clock.NewTimer(interval)
-		defer initialTimer.Stop()
-		select {
-		case <-initialTimer.C:
-		case <-ctx.Done():
-			initialTimer.Stop()
-			return ctx.Err()
-		case <-timeoutTimer.C:
-			initialTimer.Stop()
-			return WaitTimedOut
-		}
-	}
+	sleepTimer := clock.NewTimer(interval)
+	defer sleepTimer.Stop()
+
+	waitForTimer := timeout.InitialWait
 	for {
-		select {
-		case <-ctx.Done():
-			return ctx.Err()
-		case <-timeoutTimer.C:
-			return WaitTimedOut
-		default:
-			ok, err := condition()
-			if err != nil {
-				return err
-			}
-			if ok {
-				return nil
-			}
-			sleepTimer := clock.NewTimer(interval)
+		if waitForTimer {
 			select {
 			case <-sleepTimer.C:
 			case <-ctx.Done():
-				sleepTimer.Stop()
 				return ctx.Err()
 			case <-timeoutTimer.C:
-				sleepTimer.Stop()
 				return WaitTimedOut
 			}
-			interval = min(interval*2, maxInterval)
 		}
+		waitForTimer = true
+
+		ok, err := condition()
+		if err != nil {
+			return err
+		}
+		if ok {
+			return nil
+		}
+
+		interval = min(interval*2, maxInterval)
+		sleepTimer.Reset(interval)
 	}
 }
 

From 131e4a5dc719013ee9d41908ce1d1540903b4a2a Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 17:26:32 +0000
Subject: [PATCH 38/41] e2e: improve debug output

---
 e2e/echo_test.go | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/e2e/echo_test.go b/e2e/echo_test.go
index 861fb42..765521c 100644
--- a/e2e/echo_test.go
+++ b/e2e/echo_test.go
@@ -251,7 +251,11 @@ func waitAgentAPIStable(ctx context.Context, t testing.TB, apiClient *agentapisd
 					return nil
 				}
 			} else {
-				t.Logf("Got %T event", evt)
+				var sb strings.Builder
+				if err := json.NewEncoder(&sb).Encode(evt); err != nil {
+					t.Logf("Failed to encode event: %v", err)
+				}
+				t.Logf("Got event: %s", sb.String())
 			}
 		case err := <-errs:
 			return fmt.Errorf("read events: %w", err)

From 8debc4ed42d53ba2d8b03593dd57fe016914be87 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 17:37:16 +0000
Subject: [PATCH 39/41] refactor: inline fillToStable

---
 lib/screentracker/pty_conversation_test.go | 52 ++++++++++++----------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index da288d7..558b01d 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -65,16 +65,6 @@ func advancePast(ctx context.Context, t *testing.T, mClock *quartz.Mock, total t
 	}
 }
 
-// fillToStable sets the screen and advances the clock enough times to fill the
-// snapshot buffer, making status reach "stable".
-func fillToStable(ctx context.Context, t *testing.T, agent *testAgent, mClock *quartz.Mock, screen string, interval time.Duration, threshold int) {
-	t.Helper()
-	agent.setScreen(screen)
-	for i := 0; i < threshold; i++ {
-		advancePast(ctx, t, mClock, interval)
-	}
-}
-
 // advanceUntil advances the mock clock one event at a time until done returns
 // true. Because the snapshot TickerFunc is always pending and WaitFor reuses a
 // single timer via Reset, there is always at least one event to advance.
@@ -306,7 +296,8 @@ func TestMessages(t *testing.T) {
 		c, agent, mClock := newConversation(ctx, t)
 
 		// Agent message is recorded when the first snapshot is taken.
-		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		agent.setScreen("1")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 		})
@@ -319,13 +310,15 @@ func TestMessages(t *testing.T) {
 		})
 
 		// Fill to stable so Send can proceed (screen is "2").
-		fillToStable(ctx, t, agent, mClock, "2", interval, threshold)
+		agent.setScreen("2")
+		advancePast(ctx, t, mClock, interval*threshold)
 
 		// User message is recorded.
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "3"})
 
 		// After send, screen is dirty from writeStabilize. Set to "4" and stabilize.
-		fillToStable(ctx, t, agent, mClock, "4", interval, threshold)
+		agent.setScreen("4")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "3", Role: st.ConversationRoleUser},
@@ -333,10 +326,12 @@ func TestMessages(t *testing.T) {
 		})
 
 		// Agent message is updated when the screen changes before a user message.
-		fillToStable(ctx, t, agent, mClock, "5", interval, threshold)
+		agent.setScreen("5")
+		advancePast(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "6"})
 
-		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
+		agent.setScreen("7")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "3", Role: st.ConversationRoleUser},
@@ -350,7 +345,8 @@ func TestMessages(t *testing.T) {
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "8"})
 
 		// After filling to stable, messages and status are correct.
-		fillToStable(ctx, t, agent, mClock, "7", interval, threshold)
+		agent.setScreen("7")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 
@@ -360,18 +356,22 @@ func TestMessages(t *testing.T) {
 		c, agent, mClock := newConversation(ctx, t)
 
 		// Common overlap between screens is removed after a user message.
-		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		agent.setScreen("1")
+		advancePast(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
-		fillToStable(ctx, t, agent, mClock, "1\n3", interval, threshold)
+		agent.setScreen("1\n3")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
 			{Id: 2, Message: "3", Role: st.ConversationRoleAgent},
 		})
 
-		fillToStable(ctx, t, agent, mClock, "1\n3x", interval, threshold)
+		agent.setScreen("1\n3x")
+		advancePast(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
-		fillToStable(ctx, t, agent, mClock, "1\n3x\n5", interval, threshold)
+		agent.setScreen("1\n3x\n5")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
@@ -391,11 +391,13 @@ func TestMessages(t *testing.T) {
 		})
 
 		// Fill to stable with screen "1", then send.
-		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		agent.setScreen("1")
+		advancePast(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
 
 		// After send, set screen to "x" and take snapshots for new agent message.
-		fillToStable(ctx, t, agent, mClock, "x", interval, threshold)
+		agent.setScreen("x")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1 ", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
@@ -427,7 +429,8 @@ func TestMessages(t *testing.T) {
 		assert.ErrorIs(t, sendMsg("1"), st.ErrMessageValidationChanging)
 
 		// Fill to stable.
-		fillToStable(ctx, t, agent, mClock, "1", interval, threshold)
+		agent.setScreen("1")
+		advancePast(ctx, t, mClock, interval*threshold)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// Now send should succeed.
@@ -596,7 +599,8 @@ func TestInitialPromptReadiness(t *testing.T) {
 		c.Start(ctx)
 
 		// Fill buffer to reach stability with "ready" screen.
-		fillToStable(ctx, t, agent, mClock, "ready", 1*time.Second, 3)
+		agent.setScreen("ready")
+		advancePast(ctx, t, mClock, 3*time.Second)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// After screen changes, status becomes changing.

From 87d36abe1564279752340ee1b77a0591ce341770 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 17:44:20 +0000
Subject: [PATCH 40/41] refactor: simplify advance test helpers

---
 lib/screentracker/pty_conversation_test.go | 71 ++++++++++------------
 1 file changed, 31 insertions(+), 40 deletions(-)

diff --git a/lib/screentracker/pty_conversation_test.go b/lib/screentracker/pty_conversation_test.go
index 558b01d..eaa4a69 100644
--- a/lib/screentracker/pty_conversation_test.go
+++ b/lib/screentracker/pty_conversation_test.go
@@ -49,20 +49,11 @@ func (a *testAgent) setScreen(s string) {
 	a.screen = s
 }
 
-// advancePast advances the mock clock by total, stepping through each
-// intermediate event so that TickerFunc callbacks run to completion.
-func advancePast(ctx context.Context, t *testing.T, mClock *quartz.Mock, total time.Duration) {
+// advanceFor is a shorthand for advanceUntil with a time-based condition.
+func advanceFor(ctx context.Context, t *testing.T, mClock *quartz.Mock, total time.Duration) {
 	t.Helper()
 	target := mClock.Now().Add(total)
-	for mClock.Now().Before(target) {
-		remaining := target.Sub(mClock.Now())
-		d, ok := mClock.Peek()
-		if !ok || d > remaining {
-			mClock.Advance(remaining).MustWait(ctx)
-			return
-		}
-		mClock.Advance(d).MustWait(ctx)
-	}
+	advanceUntil(ctx, t, mClock, func() bool { return !mClock.Now().Before(target) })
 }
 
 // advanceUntil advances the mock clock one event at a time until done returns
@@ -141,7 +132,7 @@ func statusTest(t *testing.T, params statusTestParams) {
 
 		for i, step := range params.steps {
 			agent.setScreen(step.snapshot)
-			advancePast(ctx, t, mClock, params.cfg.SnapshotInterval)
+			advanceFor(ctx, t, mClock, params.cfg.SnapshotInterval)
 			assert.Equal(t, step.status, c.Status(), "step %d", i)
 		}
 	})
@@ -280,13 +271,13 @@ func TestMessages(t *testing.T) {
 		c, agent, mClock := newConversation(ctx, t)
 
 		agent.setScreen("1")
-		advancePast(ctx, t, mClock, interval)
+		advanceFor(ctx, t, mClock, interval)
 		msgs := c.Messages()
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 		})
 
-		advancePast(ctx, t, mClock, interval)
+		advanceFor(ctx, t, mClock, interval)
 		assert.Equal(t, msgs, c.Messages())
 	})
 
@@ -297,28 +288,28 @@ func TestMessages(t *testing.T) {
 
 		// Agent message is recorded when the first snapshot is taken.
 		agent.setScreen("1")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 		})
 
 		// Agent message is updated when the screen changes.
 		agent.setScreen("2")
-		advancePast(ctx, t, mClock, interval)
+		advanceFor(ctx, t, mClock, interval)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
 		})
 
 		// Fill to stable so Send can proceed (screen is "2").
 		agent.setScreen("2")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 
 		// User message is recorded.
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "3"})
 
 		// After send, screen is dirty from writeStabilize. Set to "4" and stabilize.
 		agent.setScreen("4")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "3", Role: st.ConversationRoleUser},
@@ -327,11 +318,11 @@ func TestMessages(t *testing.T) {
 
 		// Agent message is updated when the screen changes before a user message.
 		agent.setScreen("5")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "6"})
 
 		agent.setScreen("7")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "2", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "3", Role: st.ConversationRoleUser},
@@ -346,7 +337,7 @@ func TestMessages(t *testing.T) {
 
 		// After filling to stable, messages and status are correct.
 		agent.setScreen("7")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 
@@ -357,10 +348,10 @@ func TestMessages(t *testing.T) {
 
 		// Common overlap between screens is removed after a user message.
 		agent.setScreen("1")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
 		agent.setScreen("1\n3")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
@@ -368,10 +359,10 @@ func TestMessages(t *testing.T) {
 		})
 
 		agent.setScreen("1\n3x")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "4"})
 		agent.setScreen("1\n3x\n5")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
@@ -392,12 +383,12 @@ func TestMessages(t *testing.T) {
 
 		// Fill to stable with screen "1", then send.
 		agent.setScreen("1")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		sendAndAdvance(ctx, t, c, mClock, st.MessagePartText{Content: "2"})
 
 		// After send, set screen to "x" and take snapshots for new agent message.
 		agent.setScreen("x")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assertMessages(t, c, []st.ConversationMessage{
 			{Id: 0, Message: "1 ", Role: st.ConversationRoleAgent},
 			{Id: 1, Message: "2", Role: st.ConversationRoleUser},
@@ -430,7 +421,7 @@ func TestMessages(t *testing.T) {
 
 		// Fill to stable.
 		agent.setScreen("1")
-		advancePast(ctx, t, mClock, interval*threshold)
+		advanceFor(ctx, t, mClock, interval*threshold)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// Now send should succeed.
@@ -438,7 +429,7 @@ func TestMessages(t *testing.T) {
 
 		// After send, screen is dirty. Set to "2" (different from "1") so status is changing.
 		agent.setScreen("2")
-		advancePast(ctx, t, mClock, interval)
+		advanceFor(ctx, t, mClock, interval)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 		assert.ErrorIs(t, sendMsg("5"), st.ErrMessageValidationChanging)
 	})
@@ -473,7 +464,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 		c.Start(ctx)
 
 		// Take a snapshot with "loading...". Threshold is 1 (stability 0 / interval 1s = 0 + 1 = 1).
-		advancePast(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
 
 		// Even though screen is stable, status should be changing because
 		// the initial prompt is still in the outbound queue.
@@ -501,13 +492,13 @@ func TestInitialPromptReadiness(t *testing.T) {
 		c.Start(ctx)
 
 		// Agent not ready initially.
-		advancePast(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
 		// Agent becomes ready, but status stays "changing" because the
 		// initial prompt is still in the outbound queue.
 		agent.setScreen("ready")
-		advancePast(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 	})
 
@@ -537,7 +528,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 		c.Start(ctx)
 
 		// Status is "changing" while waiting for readiness.
-		advancePast(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
 		// Agent becomes ready. The readiness loop detects this, the snapshot
@@ -553,7 +544,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 		// advance enough ticks for the snapshot loop to record it as an
 		// agent message and fill the stability buffer (threshold=1).
 		agent.setScreen("response")
-		advancePast(ctx, t, mClock, 2*time.Second)
+		advanceFor(ctx, t, mClock, 2*time.Second)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 
@@ -576,7 +567,7 @@ func TestInitialPromptReadiness(t *testing.T) {
 		c := st.NewPTY(ctx, cfg)
 		c.Start(ctx)
 
-		advancePast(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
 
 		// Status should be stable because no initial prompt to wait for.
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
@@ -600,17 +591,17 @@ func TestInitialPromptReadiness(t *testing.T) {
 
 		// Fill buffer to reach stability with "ready" screen.
 		agent.setScreen("ready")
-		advancePast(ctx, t, mClock, 3*time.Second)
+		advanceFor(ctx, t, mClock, 3*time.Second)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 
 		// After screen changes, status becomes changing.
 		agent.setScreen("processing...")
-		advancePast(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusChanging, c.Status())
 
 		// After screen is stable again (3 identical snapshots), status becomes stable.
-		advancePast(ctx, t, mClock, 1*time.Second)
-		advancePast(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
+		advanceFor(ctx, t, mClock, 1*time.Second)
 		assert.Equal(t, st.ConversationStatusStable, c.Status())
 	})
 }

From 0e37003ecb6ad52763472192c87d9b2aa5ebd395 Mon Sep 17 00:00:00 2001
From: Cian Johnston <cian@coder.com>
Date: Tue, 10 Feb 2026 18:20:47 +0000
Subject: [PATCH 41/41] chore: fix e2e race condition

---
 e2e/echo.go | 47 ++++++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/e2e/echo.go b/e2e/echo.go
index 7388d5e..0923539 100644
--- a/e2e/echo.go
+++ b/e2e/echo.go
@@ -89,11 +89,10 @@ func runEchoAgent(scriptPath string) {
 			if entry.ThinkDurationMS > 0 {
 				redrawTerminal(messages, true)
 				spinnerCtx, spinnerCancel := context.WithCancel(ctx)
-				go runSpinner(spinnerCtx)
+				spinnerDone := runSpinner(spinnerCtx)
 				time.Sleep(time.Duration(entry.ThinkDurationMS) * time.Millisecond)
-				if spinnerCancel != nil {
-					spinnerCancel()
-				}
+				spinnerCancel()
+				<-spinnerDone
 			}
 
 			messages = append(messages, st.ConversationMessage{
@@ -133,9 +132,10 @@ func runEchoAgent(scriptPath string) {
 		if entry.ThinkDurationMS > 0 {
 			redrawTerminal(messages, true)
 			spinnerCtx, spinnerCancel := context.WithCancel(ctx)
-			go runSpinner(spinnerCtx)
+			spinnerDone := runSpinner(spinnerCtx)
 			time.Sleep(time.Duration(entry.ThinkDurationMS) * time.Millisecond)
 			spinnerCancel()
+			<-spinnerDone
 		}
 
 		messages = append(messages, st.ConversationMessage{
@@ -190,21 +190,26 @@ func cleanTerminalInput(input string) string {
 	return strings.TrimSpace(input)
 }
 
-func runSpinner(ctx context.Context) {
-	spinnerChars := []string{"|", "/", "-", "\\"}
-	ticker := time.NewTicker(200 * time.Millisecond)
-	defer ticker.Stop()
-	i := 0
-
-	for {
-		select {
-		case <-ticker.C:
-			fmt.Printf("\rThinking %s", spinnerChars[i%len(spinnerChars)])
-			i++
-		case <-ctx.Done():
-			// Clear spinner on cancellation
-			fmt.Print("\r" + strings.Repeat(" ", 20) + "\r")
-			return
+func runSpinner(ctx context.Context) <-chan struct{} {
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		spinnerChars := []string{"|", "/", "-", "\\"}
+		ticker := time.NewTicker(200 * time.Millisecond)
+		defer ticker.Stop()
+		i := 0
+
+		for {
+			select {
+			case <-ticker.C:
+				fmt.Printf("\rThinking %s", spinnerChars[i%len(spinnerChars)])
+				i++
+			case <-ctx.Done():
+				// Clear spinner on cancellation
+				fmt.Print("\r" + strings.Repeat(" ", 20) + "\r")
+				return
+			}
 		}
-	}
+	}()
+	return done
 }