docker · aheritier · Jun 12, 2026
@@ -143,16 +143,23 @@ models:
 
 ### Anthropic
 
-Uses an integer token budget (1024–32768):
+Uses token budgets on Claude Sonnet / older Opus models, and adaptive thinking on Claude Opus 4.7+:
 
 ```yaml
 models:
   claude:
     provider: anthropic
     model: claude-sonnet-4-5
-    thinking_budget: 16384 # must be < max_tokens
+    thinking_budget: 16384 # token budget; must be < max_tokens
+
+  opus:
+    provider: anthropic
+    model: claude-opus-4-8
+    thinking_budget: adaptive/high # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max
 ```
 
+Claude Opus 4.7+ rejects token-based thinking requests; prefer `adaptive` or `adaptive/<level>` for those models.
+
 ### Google Gemini 2.5
 
 Uses an integer token budget. `0` disables, `-1` lets the model decide:
@@ -256,13 +263,13 @@ models:
 
 ## Thinking Display (Anthropic)
 
-For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); set this provider option to receive summarized thinking:
+For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7+ hides thinking content by default (`omitted`); set this provider option to receive summarized thinking:
 
 ```yaml
 models:
-  opus-4-7:
+  opus-4-8:
     provider: anthropic
-    model: claude-opus-4-7
+    model: claude-opus-4-8
     thinking_budget: adaptive
     provider_opts:
       thinking_display: summarized # "summarized", "display", or "omitted"

@@ -25,10 +25,10 @@ docker-agent exposes this through a single `thinking_budget` field on any named
 | Provider       | Format     | Values                                                       | Default      |
 | -------------- | ---------- | ------------------------------------------------------------ | ------------ |
 | OpenAI         | string     | `minimal`, `low`, `medium`, `high`, `xhigh`, `none`, `adaptive/<level>` (`max` only via `adaptive/max`) | `medium`     |
-| Anthropic      | int or str | 1024–32768 tokens, or `adaptive`, `low`–`max`, `none`        | off          |
+| Anthropic      | int or str | 1024–32768 tokens (Sonnet / older Opus), or `adaptive`, `adaptive/<level>`, `low`–`max`, `none` | off          |
 | Gemini 2.5     | int        | `0` (off), `-1` (dynamic), or token count (max 24576 / 32768) | `-1` (dynamic)|
 | Gemini 3       | string     | `minimal`, `low`, `medium`, `high`                           | model-dependent |
-| AWS Bedrock    | int or str | 1024–32768 tokens (`minimal`–`max` mapped to tokens)         | off          |
+| AWS Bedrock    | int or str | 1024–32768 tokens for Claude token-budget models; `adaptive` / `adaptive/<level>` for Opus 4.7+ | off          |
 | xAI / Mistral  | string     | `minimal`, `low`, `medium`, `high`, `xhigh`, `none`          | off          |
 
 ## OpenAI
@@ -78,7 +78,7 @@ models:
 
 Anthropic Claude supports two thinking modes: a **token budget** (older models) and **adaptive / effort-based** thinking (newer models).
 
-### Token budget (Claude 4 and earlier)
+### Token budget (Claude Sonnet and older Opus models)
 
 Set an explicit number of thinking tokens (1024–32768). This must be less than `max_tokens`:
 
@@ -92,6 +92,11 @@ models:
 
 docker-agent auto-adjusts `max_tokens` when you set a thinking budget but leave `max_tokens` at its default. If you set `max_tokens` explicitly, it must be greater than `thinking_budget`.
 
+<div class="callout callout-warning" markdown="1">
+<div class="callout-title">Opus 4.7+ uses adaptive thinking</div>
+  <p>Claude Opus 4.7 and newer, including Opus 4.8, reject token-based thinking requests. Prefer <code>thinking_budget: adaptive</code> or <code>thinking_budget: adaptive/high</code>. docker-agent converts numeric budgets on these models to adaptive thinking for compatibility.</p>
+</div>
+
 ### Adaptive thinking (Claude Opus 4.6+)
 
 Newer Claude models support adaptive thinking, where the model decides how much to think. Use `adaptive` or pair it with an effort level:
@@ -100,13 +105,13 @@ Newer Claude models support adaptive thinking, where the model decides how much
 models:
   claude-adaptive:
     provider: anthropic
-    model: claude-opus-4-6
+    model: claude-opus-4-8
     thinking_budget: adaptive          # model decides effort
 
   claude-adaptive-low:
     provider: anthropic
-    model: claude-opus-4-6
-    thinking_budget: low               # adaptive with low effort: low | medium | high | max
+    model: claude-opus-4-8
+    thinking_budget: adaptive/low      # adaptive with low effort: low | medium | high | xhigh | max
 ```
 
 **Adaptive effort levels:**
@@ -116,6 +121,7 @@ models:
 | `low`     | Minimal thinking; fastest adaptive mode.          |
 | `medium`  | Moderate effort.                                  |
 | `high`    | Thorough reasoning; default for `adaptive`.       |
+| `xhigh`   | Extra-high reasoning effort.                      |
 | `max`     | Maximum effort.                                   |
 
 ### Disabling thinking
@@ -147,13 +153,13 @@ models:
 
 ### Thinking display
 
-Claude Opus 4.7 hides thinking content by default. Use `thinking_display` in `provider_opts` to control what you receive:
+Claude Opus 4.7+ hides thinking content by default. Use `thinking_display` in `provider_opts` to control what you receive:
 
 ```yaml
 models:
-  opus-47:
+  opus-48:
     provider: anthropic
-    model: claude-opus-4-7
+    model: claude-opus-4-8
     thinking_budget: adaptive
     provider_opts:
       thinking_display: summarized   # summarized | display | omitted
@@ -163,7 +169,7 @@ models:
 | ------------ | ------------------------------------------------------------------------------------- |
 | `summarized` | Thinking blocks returned with a text summary (default for Claude 4 models pre-4.7).  |
 | `display`    | Full thinking blocks returned for display.                                            |
-| `omitted`    | Thinking blocks hidden — only the signature is returned (default for Opus 4.7).       |
+| `omitted`    | Thinking blocks hidden — only the signature is returned (default for Opus 4.7+).      |
 
 Full thinking tokens are billed regardless of `thinking_display`.
 
@@ -223,7 +229,19 @@ models:
 
 ## AWS Bedrock (Claude)
 
-Bedrock Claude uses a token budget like Anthropic, but only supports integer token values. String effort levels (`minimal`–`max`) are mapped automatically:
+Bedrock Claude supports extended thinking — an internal reasoning phase before the model produces its response. Most Claude models use a token budget; Claude Opus 4.7+ uses adaptive thinking instead:
+
+```yaml
+models:
+  bedrock-claude-adaptive:
+    provider: amazon-bedrock
+    model: global.anthropic.claude-opus-4-8-20260601-v1:0
+    thinking_budget: adaptive/high   # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max
+    provider_opts:
+      region: us-east-1
+```
+
+For models that still accept token budgets, use an integer token count (1024–32768) or an effort level string that maps automatically:
 
 | Effort level | Token budget |
 | ------------ | ------------ |

@@ -95,16 +95,19 @@ models:
 
 ## Available Models
 
-| Model ID            | Description                                         |
-| ------------------- | --------------------------------------------------- |
-| `claude-opus-4-7`   | Highest-capability Opus model; supports task budget |
-| `claude-sonnet-4-5` | Most capable Sonnet; supports extended thinking     |
-| `claude-sonnet-4-0` | Previous Sonnet generation, still supported         |
-| `claude-haiku-4-5`  | Fast and inexpensive, good for tight loops          |
+| Model ID            | Description                                                 |
+| ------------------- | ----------------------------------------------------------- |
+| `claude-opus-4-8`   | Highest-capability Opus model; uses adaptive thinking       |
+| `claude-opus-4-7`   | Opus model with adaptive thinking and task budget support   |
+| `claude-sonnet-4-5` | Most capable Sonnet; supports extended thinking             |
+| `claude-sonnet-4-0` | Previous Sonnet generation, still supported                 |
+| `claude-haiku-4-5`  | Fast and inexpensive, good for tight loops                  |
 
 ## Thinking Budget
 
-Anthropic uses integer token budgets (1024–32768). Thinking is off unless you set `thinking_budget`; when set, interleaved thinking is auto-enabled:
+Anthropic supports both token budgets and adaptive thinking. Thinking is off unless you set `thinking_budget`; when set, interleaved thinking is auto-enabled.
+
+Use numeric token budgets with Claude Sonnet and older Opus models:
 
 ```yaml
 models:
@@ -114,6 +117,18 @@ models:
     thinking_budget: 16384 # must be < max_tokens
 ```
 
+Use adaptive thinking with Claude Opus 4.7+ (including Opus 4.8):
+
+```yaml
+models:
+  opus:
+    provider: anthropic
+    model: claude-opus-4-8
+    thinking_budget: adaptive/high # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max
+```
+
+Claude Opus 4.7+ rejects token-based thinking requests (`thinking.type=enabled`). docker-agent converts numeric budgets on these models to adaptive thinking, but new configs should prefer `adaptive` or `adaptive/<level>` directly.
+
 ## Interleaved Thinking
 
 Auto-enabled whenever a thinking budget is configured on a Claude model. Allows tool calls during model reasoning for more integrated problem-solving:
@@ -196,13 +211,13 @@ AI, or the Message Batches API.
 
 ## Thinking Display
 
-Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override:
+Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7+ hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override:
 
 ```yaml
 models:
-  claude-opus-4-7:
+  claude-opus-4-8:
     provider: anthropic
-    model: claude-opus-4-7
+    model: claude-opus-4-8
     thinking_budget: adaptive
     provider_opts:
       thinking_display: summarized # "summarized", "display", or "omitted"
@@ -212,7 +227,7 @@ Valid values:
 
 - `summarized`: thinking blocks are returned with summarized thinking text (default for Claude 4 models prior to Opus 4.7).
 - `display`: thinking blocks are returned for display (use this to re-enable thinking output on Opus 4.7).
-- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7). Useful to reduce time-to-first-text-token when streaming.
+- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7+). Useful to reduce time-to-first-text-token when streaming.
 
 Note: `thinking_display` applies to both `thinking_budget` with token counts and adaptive/effort-based budgets. Full thinking tokens are billed regardless of the `thinking_display` value.
 

@@ -103,7 +103,21 @@ Use inference profile prefixes for optimal routing:
 
 ## Thinking Budget (Claude on Bedrock)
 
-Bedrock Claude models support extended thinking — an internal reasoning phase before the model produces its response. Set `thinking_budget` to a token count (1024–32768) or an effort level string that maps automatically:
+Bedrock Claude models support extended thinking — an internal reasoning phase before the model produces its response. Use adaptive thinking for Claude Opus 4.7+ (including Opus 4.8):
+
+```yaml
+models:
+  bedrock-opus-thinking:
+    provider: amazon-bedrock
+    model: global.anthropic.claude-opus-4-8-20260601-v1:0
+    thinking_budget: adaptive/high # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max
+    provider_opts:
+      region: us-east-1
+```
+
+Claude Opus 4.7+ rejects token-based thinking requests (`thinking.type=enabled`). docker-agent converts numeric budgets on these models to adaptive thinking, but new configs should prefer `adaptive` or `adaptive/<level>` directly.
+
+For Claude models that still accept token budgets, set `thinking_budget` to a token count (1024–32768) or an effort level string that maps automatically:
 
 | Effort level | Token budget |
 | ------------ | ------------ |
@@ -124,7 +138,7 @@ models:
       region: us-east-1
 ```
 
-`thinking_budget` must be ≥ 1024 and less than `max_tokens`. Values outside this range are logged as a warning and ignored.
+For token-budget models, `thinking_budget` must be ≥ 1024 and less than `max_tokens`. Values outside this range are logged as a warning and ignored.
 
 <div class="callout callout-info" markdown="1">
 <div class="callout-title">Temperature and top_p

@@ -4,7 +4,7 @@
 agents:
   root:
     model: gpt-5-mini-min # <- try with gpt-5-mini-high
-    # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high or claude-opus-4-6-adaptive
+    # model: claude-opus-4-8-adaptive # <- try with Opus adaptive thinking
     # model: gemini-2-5-flash-dynamic-thinking # <- try with -no-thinking, -low or -high variants
     description: a helpful assistant that thinks
     instruction: you are a helpful assistant who can also use tools, but only if you need to
@@ -36,22 +36,22 @@ models:
     provider_opts:
       interleaved_thinking: true # <- enables interleaved thinking, aka tool calling during model reasoning
 
-  claude-opus-4-6-adaptive:
+  claude-opus-4-8-adaptive:
     provider: anthropic
-    model: claude-opus-4-6
-    thinking_budget: adaptive # <- lets the model decide when and how much to think (recommended for 4.6)
+    model: claude-opus-4-8
+    thinking_budget: adaptive # <- required style for Opus 4.7+; token budgets are converted for compatibility
 
-  claude-opus-4-6-low:
+  claude-opus-4-8-low:
     provider: anthropic
-    model: claude-opus-4-6
-    thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max"
+    model: claude-opus-4-8
+    thinking_budget: adaptive/low # <- adaptive thinking with low effort: "low", "medium", "high", "xhigh", "max"
 
-  claude-opus-4-7-summarized:
+  claude-opus-4-8-summarized:
     provider: anthropic
-    model: claude-opus-4-6 # <- Opus 4.7 hides thinking by default; use the same flag with any recent Claude model
+    model: claude-opus-4-8 # <- Opus 4.7+ hides thinking by default; use thinking_display to receive summaries
     thinking_budget: adaptive
     provider_opts:
-      thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7 defaults to omitted)
+      thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7+ defaults to omitted)
 
   gemini-2-5-flash-dynamic-thinking:
     provider: google

@@ -208,8 +208,8 @@ func TestApplyThinkingConfig(t *testing.T) {
 			wantTokens:  2048,
 		},
 		{
-			name:         "opus-4-6 token budget auto-switches to adaptive",
-			model:        "claude-opus-4-6",
+			name:         "opus-4-8 token budget auto-switches to adaptive",
+			model:        "claude-opus-4-8",
 			budget:       &latest.ThinkingBudget{Tokens: 4096},
 			maxTokens:    8192,
 			wantEnabled:  true,
@@ -226,8 +226,8 @@ func TestApplyThinkingConfig(t *testing.T) {
 			wantEffort:   "high",
 		},
 		{
-			name:            "opus-4-6 dated variant token budget auto-switches to adaptive",
-			model:           "claude-opus-4-6-20251101",
+			name:            "opus-4-8 dated variant token budget auto-switches to adaptive",
+			model:           "claude-opus-4-8-20260601",
 			budget:          &latest.ThinkingBudget{Tokens: 8000},
 			opts:            map[string]any{"thinking_display": "summarized"},
 			maxTokens:       16384,
@@ -326,8 +326,8 @@ func TestApplyBetaThinkingConfig(t *testing.T) {
 			maxTokens: 8192,
 		},
 		{
-			name:         "opus-4-6 token budget auto-switches to adaptive",
-			model:        "claude-opus-4-6",
+			name:         "opus-4-8 token budget auto-switches to adaptive",
+			model:        "claude-opus-4-8",
 			budget:       &latest.ThinkingBudget{Tokens: 4096},
 			maxTokens:    8192,
 			wantAdaptive: true,
@@ -407,11 +407,11 @@ func TestAdjustMaxTokensForThinking(t *testing.T) {
 		assert.Contains(t, err.Error(), "max_tokens")
 	})
 
-	t.Run("opus-4-6 with token budget skips adjustment (will be coerced to adaptive)", func(t *testing.T) {
+	t.Run("opus-4-6 with token budget is adjusted", func(t *testing.T) {
 		c := clientWithModel("claude-opus-4-6", &latest.ThinkingBudget{Tokens: 16384}, nil)
 		got, err := c.adjustMaxTokensForThinking(8192)
 		require.NoError(t, err)
-		assert.Equal(t, int64(8192), got)
+		assert.Equal(t, int64(16384+8192), got)
 	})
 
 	t.Run("opus-4-7 with token budget skips adjustment (will be coerced to adaptive)", func(t *testing.T) {
@@ -422,6 +422,15 @@ func TestAdjustMaxTokensForThinking(t *testing.T) {
 		require.NoError(t, err)
 		assert.Equal(t, int64(8192), got)
 	})
+
+	t.Run("opus-4-8 with token budget skips adjustment (will be coerced to adaptive)", func(t *testing.T) {
+		c := clientWithModel("claude-opus-4-8", &latest.ThinkingBudget{Tokens: 32768}, nil)
+		userMax := int64(8192)
+		c.ModelConfig.MaxTokens = &userMax
+		got, err := c.adjustMaxTokensForThinking(8192)
+		require.NoError(t, err)
+		assert.Equal(t, int64(8192), got)
+	})
 }
 
 func TestCoerceAdaptiveThinking(t *testing.T) {
@@ -436,16 +445,10 @@ func TestCoerceAdaptiveThinking(t *testing.T) {
 		assert.Same(t, in, c.coerceAdaptiveThinking(), "budget pointer must not be replaced")
 	})
 
-	t.Run("opus-4-6 token budget is coerced to adaptive", func(t *testing.T) {
+	t.Run("opus-4-6 token budget is preserved", func(t *testing.T) {
 		in := &latest.ThinkingBudget{Tokens: 4096}
 		c := clientWithModel("claude-opus-4-6", in, nil)
-		got := c.coerceAdaptiveThinking()
-		require.NotNil(t, got)
-		assert.Equal(t, "adaptive", got.Effort)
-		assert.Equal(t, 0, got.Tokens)
-		// Original must not be mutated.
-		assert.Equal(t, 4096, in.Tokens)
-		assert.Empty(t, in.Effort)
+		assert.Same(t, in, c.coerceAdaptiveThinking())
 	})
 
 	t.Run("opus-4-7 adaptive budget is preserved as-is", func(t *testing.T) {
@@ -454,6 +457,15 @@ func TestCoerceAdaptiveThinking(t *testing.T) {
 		assert.Same(t, in, c.coerceAdaptiveThinking())
 	})
 
+	t.Run("opus-4-8 token budget is coerced to adaptive", func(t *testing.T) {
+		in := &latest.ThinkingBudget{Tokens: 4096}
+		c := clientWithModel("claude-opus-4-8", in, nil)
+		got := c.coerceAdaptiveThinking()
+		require.NotNil(t, got)
+		assert.Equal(t, "adaptive", got.Effort)
+		assert.Equal(t, 0, got.Tokens)
+	})
+
 	// Disabled or non-positive token budgets must NOT be silently coerced to
 	// adaptive thinking on Opus 4.6/4.7 — the user has either explicitly
 	// disabled thinking or supplied an invalid value.