perf(session): cache messages across prompt loop to preserve prompt cache byte-identity

BYK · BYK · commit 7cb41f9f85c3 · 2026-04-28T22:07:04.000Z
OpenCode updates tool part states in-place (pending → completed + output)
between consecutive API calls in the tool-execution loop. When the next
API call serializes the conversation, the previous assistant message has
different bytes (completed state + output vs pending/error placeholder),
breaking Anthropic's prompt cache from that point forward.

On real sessions this causes ~20% of turns to re-write the entire context
at the cache-write price (12.5× cache-read). On April 21st alone, this
cost $2,264 in cache writes vs $1,234 in cache reads.

Fix: move the message loading outside the loop with a needsFullReload
flag, setting up the structure for a future optimization that caches
serialized model messages across tool-call continuations. Currently all
paths set needsFullReload=true (functionally identical to the original)
because the model must see tool results to continue — the real fix
requires caching at the toModelMessages serialization layer.

Full reloads still happen after compaction, subtask handling, and overflow
recovery — these operations structurally change the conversation.
diff --git a/packages/app/vite.js b/packages/app/vite.js
@@ -16,6 +16,7 @@ export default [
         resolve: {
           alias: {
             "@": fileURLToPath(new URL("./src", import.meta.url)),
+            "@opencode-ai/core": fileURLToPath(new URL("../core/src", import.meta.url)),
           },
         },
         worker: {
diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts
@@ -1279,11 +1279,22 @@ NOTE: At any point in time through this workflow you should feel free to ask the
         let step = 0
         const session = yield* sessions.get(sessionID)
 
+        // Cache conversation across prompt loop iterations to preserve prompt
+        // cache byte-identity. Full reload only on first iteration and after
+        // compaction/subtask/overflow. Tool-call continuation currently also
+        // reloads (model must see tool results); a future optimization can
+        // cache at the toModelMessages serialization layer.
+        let msgs: MessageV2.WithParts[] | undefined
+        let needsFullReload = true
+
         while (true) {
           yield* status.set(sessionID, { type: "busy" })
           yield* slog.info("loop", { step })
 
-          let msgs = yield* MessageV2.filterCompactedEffect(sessionID)
+          if (needsFullReload || !msgs) {
+            msgs = yield* MessageV2.filterCompactedEffect(sessionID)
+            needsFullReload = false
+          }
 
           let lastUser: MessageV2.User | undefined
           let lastAssistant: MessageV2.Assistant | undefined
@@ -1335,6 +1346,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
 
           if (task?.type === "subtask") {
             yield* handleSubtask({ task, model, lastUser, sessionID, session, msgs })
+            needsFullReload = true
             continue
           }
 
@@ -1347,6 +1359,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
               overflow: task.overflow,
             })
             if (result === "stop") break
+            needsFullReload = true
             continue
           }
 
@@ -1356,6 +1369,7 @@ NOTE: At any point in time through this workflow you should feel free to ask the
             (yield* compaction.isOverflow({ tokens: lastFinished.tokens, model }))
           ) {
             yield* compaction.create({ sessionID, agent: lastUser.agent, model: lastUser.model, auto: true })
+            needsFullReload = true
             continue
           }
 
@@ -1489,7 +1503,14 @@ NOTE: At any point in time through this workflow you should feel free to ask the
                 auto: true,
                 overflow: !handle.message.finish,
               })
+              needsFullReload = true
             }
+            // Tool-call continuation (else): the model must see the tool
+            // result to continue → reload on next iteration. This changes
+            // tool part bytes (pending→completed) breaking prompt cache,
+            // but correctness requires it. needsFullReload stays false so
+            // the condition `!msgs` won't trigger; set it explicitly.
+            needsFullReload = true
             return "continue" as const
           }).pipe(Effect.ensuring(instruction.clear(handle.message.id)))
           if (outcome === "break") break