diff --git a/docs/configuration/models/index.md b/docs/configuration/models/index.md index d3b6093f8..6e7d97e49 100644 --- a/docs/configuration/models/index.md +++ b/docs/configuration/models/index.md @@ -143,16 +143,23 @@ models: ### Anthropic -Uses an integer token budget (1024–32768): +Uses token budgets on Claude Sonnet / older Opus models, and adaptive thinking on Claude Opus 4.7+: ```yaml models: claude: provider: anthropic model: claude-sonnet-4-5 - thinking_budget: 16384 # must be < max_tokens + thinking_budget: 16384 # token budget; must be < max_tokens + + opus: + provider: anthropic + model: claude-opus-4-8 + thinking_budget: adaptive/high # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max ``` +Claude Opus 4.7+ rejects token-based thinking requests; prefer `adaptive` or `adaptive/` for those models. + ### Google Gemini 2.5 Uses an integer token budget. `0` disables, `-1` lets the model decide: @@ -256,13 +263,13 @@ models: ## Thinking Display (Anthropic) -For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); set this provider option to receive summarized thinking: +For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7+ hides thinking content by default (`omitted`); set this provider option to receive summarized thinking: ```yaml models: - opus-4-7: + opus-4-8: provider: anthropic - model: claude-opus-4-7 + model: claude-opus-4-8 thinking_budget: adaptive provider_opts: thinking_display: summarized # "summarized", "display", or "omitted" diff --git a/docs/guides/thinking/index.md b/docs/guides/thinking/index.md index 19239fbb4..083b9f45a 100644 --- a/docs/guides/thinking/index.md +++ b/docs/guides/thinking/index.md @@ -25,10 +25,10 @@ docker-agent exposes this through a single `thinking_budget` field on any named | Provider | Format | Values | Default | | -------------- | ---------- | ------------------------------------------------------------ | ------------ | | OpenAI | string | `minimal`, `low`, `medium`, `high`, `xhigh`, `none`, `adaptive/` (`max` only via `adaptive/max`) | `medium` | -| Anthropic | int or str | 1024–32768 tokens, or `adaptive`, `low`–`max`, `none` | off | +| Anthropic | int or str | 1024–32768 tokens (Sonnet / older Opus), or `adaptive`, `adaptive/`, `low`–`max`, `none` | off | | Gemini 2.5 | int | `0` (off), `-1` (dynamic), or token count (max 24576 / 32768) | `-1` (dynamic)| | Gemini 3 | string | `minimal`, `low`, `medium`, `high` | model-dependent | -| AWS Bedrock | int or str | 1024–32768 tokens (`minimal`–`max` mapped to tokens) | off | +| AWS Bedrock | int or str | 1024–32768 tokens for Claude token-budget models; `adaptive` / `adaptive/` for Opus 4.7+ | off | | xAI / Mistral | string | `minimal`, `low`, `medium`, `high`, `xhigh`, `none` | off | ## OpenAI @@ -78,7 +78,7 @@ models: Anthropic Claude supports two thinking modes: a **token budget** (older models) and **adaptive / effort-based** thinking (newer models). -### Token budget (Claude 4 and earlier) +### Token budget (Claude Sonnet and older Opus models) Set an explicit number of thinking tokens (1024–32768). This must be less than `max_tokens`: @@ -92,6 +92,11 @@ models: docker-agent auto-adjusts `max_tokens` when you set a thinking budget but leave `max_tokens` at its default. If you set `max_tokens` explicitly, it must be greater than `thinking_budget`. +
+
Opus 4.7+ uses adaptive thinking
+

Claude Opus 4.7 and newer, including Opus 4.8, reject token-based thinking requests. Prefer thinking_budget: adaptive or thinking_budget: adaptive/high. docker-agent converts numeric budgets on these models to adaptive thinking for compatibility.

+
+ ### Adaptive thinking (Claude Opus 4.6+) Newer Claude models support adaptive thinking, where the model decides how much to think. Use `adaptive` or pair it with an effort level: @@ -100,13 +105,13 @@ Newer Claude models support adaptive thinking, where the model decides how much models: claude-adaptive: provider: anthropic - model: claude-opus-4-6 + model: claude-opus-4-8 thinking_budget: adaptive # model decides effort claude-adaptive-low: provider: anthropic - model: claude-opus-4-6 - thinking_budget: low # adaptive with low effort: low | medium | high | max + model: claude-opus-4-8 + thinking_budget: adaptive/low # adaptive with low effort: low | medium | high | xhigh | max ``` **Adaptive effort levels:** @@ -116,6 +121,7 @@ models: | `low` | Minimal thinking; fastest adaptive mode. | | `medium` | Moderate effort. | | `high` | Thorough reasoning; default for `adaptive`. | +| `xhigh` | Extra-high reasoning effort. | | `max` | Maximum effort. | ### Disabling thinking @@ -147,13 +153,13 @@ models: ### Thinking display -Claude Opus 4.7 hides thinking content by default. Use `thinking_display` in `provider_opts` to control what you receive: +Claude Opus 4.7+ hides thinking content by default. Use `thinking_display` in `provider_opts` to control what you receive: ```yaml models: - opus-47: + opus-48: provider: anthropic - model: claude-opus-4-7 + model: claude-opus-4-8 thinking_budget: adaptive provider_opts: thinking_display: summarized # summarized | display | omitted @@ -163,7 +169,7 @@ models: | ------------ | ------------------------------------------------------------------------------------- | | `summarized` | Thinking blocks returned with a text summary (default for Claude 4 models pre-4.7). | | `display` | Full thinking blocks returned for display. | -| `omitted` | Thinking blocks hidden — only the signature is returned (default for Opus 4.7). | +| `omitted` | Thinking blocks hidden — only the signature is returned (default for Opus 4.7+). | Full thinking tokens are billed regardless of `thinking_display`. @@ -223,7 +229,19 @@ models: ## AWS Bedrock (Claude) -Bedrock Claude uses a token budget like Anthropic, but only supports integer token values. String effort levels (`minimal`–`max`) are mapped automatically: +Bedrock Claude supports extended thinking — an internal reasoning phase before the model produces its response. Most Claude models use a token budget; Claude Opus 4.7+ uses adaptive thinking instead: + +```yaml +models: + bedrock-claude-adaptive: + provider: amazon-bedrock + model: global.anthropic.claude-opus-4-8-20260601-v1:0 + thinking_budget: adaptive/high # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max + provider_opts: + region: us-east-1 +``` + +For models that still accept token budgets, use an integer token count (1024–32768) or an effort level string that maps automatically: | Effort level | Token budget | | ------------ | ------------ | diff --git a/docs/providers/anthropic/index.md b/docs/providers/anthropic/index.md index 3945275c9..a24759789 100644 --- a/docs/providers/anthropic/index.md +++ b/docs/providers/anthropic/index.md @@ -95,16 +95,19 @@ models: ## Available Models -| Model ID | Description | -| ------------------- | --------------------------------------------------- | -| `claude-opus-4-7` | Highest-capability Opus model; supports task budget | -| `claude-sonnet-4-5` | Most capable Sonnet; supports extended thinking | -| `claude-sonnet-4-0` | Previous Sonnet generation, still supported | -| `claude-haiku-4-5` | Fast and inexpensive, good for tight loops | +| Model ID | Description | +| ------------------- | ----------------------------------------------------------- | +| `claude-opus-4-8` | Highest-capability Opus model; uses adaptive thinking | +| `claude-opus-4-7` | Opus model with adaptive thinking and task budget support | +| `claude-sonnet-4-5` | Most capable Sonnet; supports extended thinking | +| `claude-sonnet-4-0` | Previous Sonnet generation, still supported | +| `claude-haiku-4-5` | Fast and inexpensive, good for tight loops | ## Thinking Budget -Anthropic uses integer token budgets (1024–32768). Thinking is off unless you set `thinking_budget`; when set, interleaved thinking is auto-enabled: +Anthropic supports both token budgets and adaptive thinking. Thinking is off unless you set `thinking_budget`; when set, interleaved thinking is auto-enabled. + +Use numeric token budgets with Claude Sonnet and older Opus models: ```yaml models: @@ -114,6 +117,18 @@ models: thinking_budget: 16384 # must be < max_tokens ``` +Use adaptive thinking with Claude Opus 4.7+ (including Opus 4.8): + +```yaml +models: + opus: + provider: anthropic + model: claude-opus-4-8 + thinking_budget: adaptive/high # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max +``` + +Claude Opus 4.7+ rejects token-based thinking requests (`thinking.type=enabled`). docker-agent converts numeric budgets on these models to adaptive thinking, but new configs should prefer `adaptive` or `adaptive/` directly. + ## Interleaved Thinking Auto-enabled whenever a thinking budget is configured on a Claude model. Allows tool calls during model reasoning for more integrated problem-solving: @@ -196,13 +211,13 @@ AI, or the Message Batches API. ## Thinking Display -Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override: +Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7+ hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override: ```yaml models: - claude-opus-4-7: + claude-opus-4-8: provider: anthropic - model: claude-opus-4-7 + model: claude-opus-4-8 thinking_budget: adaptive provider_opts: thinking_display: summarized # "summarized", "display", or "omitted" @@ -212,7 +227,7 @@ Valid values: - `summarized`: thinking blocks are returned with summarized thinking text (default for Claude 4 models prior to Opus 4.7). - `display`: thinking blocks are returned for display (use this to re-enable thinking output on Opus 4.7). -- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7). Useful to reduce time-to-first-text-token when streaming. +- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7+). Useful to reduce time-to-first-text-token when streaming. Note: `thinking_display` applies to both `thinking_budget` with token counts and adaptive/effort-based budgets. Full thinking tokens are billed regardless of the `thinking_display` value. diff --git a/docs/providers/bedrock/index.md b/docs/providers/bedrock/index.md index 95f093d3a..25d95fd9c 100644 --- a/docs/providers/bedrock/index.md +++ b/docs/providers/bedrock/index.md @@ -103,7 +103,21 @@ Use inference profile prefixes for optimal routing: ## Thinking Budget (Claude on Bedrock) -Bedrock Claude models support extended thinking — an internal reasoning phase before the model produces its response. Set `thinking_budget` to a token count (1024–32768) or an effort level string that maps automatically: +Bedrock Claude models support extended thinking — an internal reasoning phase before the model produces its response. Use adaptive thinking for Claude Opus 4.7+ (including Opus 4.8): + +```yaml +models: + bedrock-opus-thinking: + provider: amazon-bedrock + model: global.anthropic.claude-opus-4-8-20260601-v1:0 + thinking_budget: adaptive/high # adaptive | adaptive/low | adaptive/medium | adaptive/high | adaptive/xhigh | adaptive/max + provider_opts: + region: us-east-1 +``` + +Claude Opus 4.7+ rejects token-based thinking requests (`thinking.type=enabled`). docker-agent converts numeric budgets on these models to adaptive thinking, but new configs should prefer `adaptive` or `adaptive/` directly. + +For Claude models that still accept token budgets, set `thinking_budget` to a token count (1024–32768) or an effort level string that maps automatically: | Effort level | Token budget | | ------------ | ------------ | @@ -124,7 +138,7 @@ models: region: us-east-1 ``` -`thinking_budget` must be ≥ 1024 and less than `max_tokens`. Values outside this range are logged as a warning and ignored. +For token-budget models, `thinking_budget` must be ≥ 1024 and less than `max_tokens`. Values outside this range are logged as a warning and ignored.
Temperature and top_p diff --git a/examples/thinking_budget.yaml b/examples/thinking_budget.yaml index e0a96da1b..735c18cf9 100644 --- a/examples/thinking_budget.yaml +++ b/examples/thinking_budget.yaml @@ -4,7 +4,7 @@ agents: root: model: gpt-5-mini-min # <- try with gpt-5-mini-high - # model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high or claude-opus-4-6-adaptive + # model: claude-opus-4-8-adaptive # <- try with Opus adaptive thinking # model: gemini-2-5-flash-dynamic-thinking # <- try with -no-thinking, -low or -high variants description: a helpful assistant that thinks instruction: you are a helpful assistant who can also use tools, but only if you need to @@ -36,22 +36,22 @@ models: provider_opts: interleaved_thinking: true # <- enables interleaved thinking, aka tool calling during model reasoning - claude-opus-4-6-adaptive: + claude-opus-4-8-adaptive: provider: anthropic - model: claude-opus-4-6 - thinking_budget: adaptive # <- lets the model decide when and how much to think (recommended for 4.6) + model: claude-opus-4-8 + thinking_budget: adaptive # <- required style for Opus 4.7+; token budgets are converted for compatibility - claude-opus-4-6-low: + claude-opus-4-8-low: provider: anthropic - model: claude-opus-4-6 - thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max" + model: claude-opus-4-8 + thinking_budget: adaptive/low # <- adaptive thinking with low effort: "low", "medium", "high", "xhigh", "max" - claude-opus-4-7-summarized: + claude-opus-4-8-summarized: provider: anthropic - model: claude-opus-4-6 # <- Opus 4.7 hides thinking by default; use the same flag with any recent Claude model + model: claude-opus-4-8 # <- Opus 4.7+ hides thinking by default; use thinking_display to receive summaries thinking_budget: adaptive provider_opts: - thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7 defaults to omitted) + thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7+ defaults to omitted) gemini-2-5-flash-dynamic-thinking: provider: google diff --git a/pkg/model/provider/anthropic/thinking_test.go b/pkg/model/provider/anthropic/thinking_test.go index da59d606d..3b6a4c5b4 100644 --- a/pkg/model/provider/anthropic/thinking_test.go +++ b/pkg/model/provider/anthropic/thinking_test.go @@ -208,8 +208,8 @@ func TestApplyThinkingConfig(t *testing.T) { wantTokens: 2048, }, { - name: "opus-4-6 token budget auto-switches to adaptive", - model: "claude-opus-4-6", + name: "opus-4-8 token budget auto-switches to adaptive", + model: "claude-opus-4-8", budget: &latest.ThinkingBudget{Tokens: 4096}, maxTokens: 8192, wantEnabled: true, @@ -226,8 +226,8 @@ func TestApplyThinkingConfig(t *testing.T) { wantEffort: "high", }, { - name: "opus-4-6 dated variant token budget auto-switches to adaptive", - model: "claude-opus-4-6-20251101", + name: "opus-4-8 dated variant token budget auto-switches to adaptive", + model: "claude-opus-4-8-20260601", budget: &latest.ThinkingBudget{Tokens: 8000}, opts: map[string]any{"thinking_display": "summarized"}, maxTokens: 16384, @@ -326,8 +326,8 @@ func TestApplyBetaThinkingConfig(t *testing.T) { maxTokens: 8192, }, { - name: "opus-4-6 token budget auto-switches to adaptive", - model: "claude-opus-4-6", + name: "opus-4-8 token budget auto-switches to adaptive", + model: "claude-opus-4-8", budget: &latest.ThinkingBudget{Tokens: 4096}, maxTokens: 8192, wantAdaptive: true, @@ -407,11 +407,11 @@ func TestAdjustMaxTokensForThinking(t *testing.T) { assert.Contains(t, err.Error(), "max_tokens") }) - t.Run("opus-4-6 with token budget skips adjustment (will be coerced to adaptive)", func(t *testing.T) { + t.Run("opus-4-6 with token budget is adjusted", func(t *testing.T) { c := clientWithModel("claude-opus-4-6", &latest.ThinkingBudget{Tokens: 16384}, nil) got, err := c.adjustMaxTokensForThinking(8192) require.NoError(t, err) - assert.Equal(t, int64(8192), got) + assert.Equal(t, int64(16384+8192), got) }) t.Run("opus-4-7 with token budget skips adjustment (will be coerced to adaptive)", func(t *testing.T) { @@ -422,6 +422,15 @@ func TestAdjustMaxTokensForThinking(t *testing.T) { require.NoError(t, err) assert.Equal(t, int64(8192), got) }) + + t.Run("opus-4-8 with token budget skips adjustment (will be coerced to adaptive)", func(t *testing.T) { + c := clientWithModel("claude-opus-4-8", &latest.ThinkingBudget{Tokens: 32768}, nil) + userMax := int64(8192) + c.ModelConfig.MaxTokens = &userMax + got, err := c.adjustMaxTokensForThinking(8192) + require.NoError(t, err) + assert.Equal(t, int64(8192), got) + }) } func TestCoerceAdaptiveThinking(t *testing.T) { @@ -436,16 +445,10 @@ func TestCoerceAdaptiveThinking(t *testing.T) { assert.Same(t, in, c.coerceAdaptiveThinking(), "budget pointer must not be replaced") }) - t.Run("opus-4-6 token budget is coerced to adaptive", func(t *testing.T) { + t.Run("opus-4-6 token budget is preserved", func(t *testing.T) { in := &latest.ThinkingBudget{Tokens: 4096} c := clientWithModel("claude-opus-4-6", in, nil) - got := c.coerceAdaptiveThinking() - require.NotNil(t, got) - assert.Equal(t, "adaptive", got.Effort) - assert.Equal(t, 0, got.Tokens) - // Original must not be mutated. - assert.Equal(t, 4096, in.Tokens) - assert.Empty(t, in.Effort) + assert.Same(t, in, c.coerceAdaptiveThinking()) }) t.Run("opus-4-7 adaptive budget is preserved as-is", func(t *testing.T) { @@ -454,6 +457,15 @@ func TestCoerceAdaptiveThinking(t *testing.T) { assert.Same(t, in, c.coerceAdaptiveThinking()) }) + t.Run("opus-4-8 token budget is coerced to adaptive", func(t *testing.T) { + in := &latest.ThinkingBudget{Tokens: 4096} + c := clientWithModel("claude-opus-4-8", in, nil) + got := c.coerceAdaptiveThinking() + require.NotNil(t, got) + assert.Equal(t, "adaptive", got.Effort) + assert.Equal(t, 0, got.Tokens) + }) + // Disabled or non-positive token budgets must NOT be silently coerced to // adaptive thinking on Opus 4.6/4.7 — the user has either explicitly // disabled thinking or supplied an invalid value. diff --git a/pkg/model/provider/bedrock/client.go b/pkg/model/provider/bedrock/client.go index fb6bcce1e..53368c6f7 100644 --- a/pkg/model/provider/bedrock/client.go +++ b/pkg/model/provider/bedrock/client.go @@ -17,11 +17,13 @@ import ( "github.com/docker/docker-agent/pkg/chat" "github.com/docker/docker-agent/pkg/config/latest" + "github.com/docker/docker-agent/pkg/effort" "github.com/docker/docker-agent/pkg/environment" "github.com/docker/docker-agent/pkg/httpclient" "github.com/docker/docker-agent/pkg/model/provider/base" "github.com/docker/docker-agent/pkg/model/provider/options" "github.com/docker/docker-agent/pkg/model/provider/providerutil" + "github.com/docker/docker-agent/pkg/modelinfo" "github.com/docker/docker-agent/pkg/modelsdev" "github.com/docker/docker-agent/pkg/tools" ) @@ -320,20 +322,7 @@ func (c *Client) interleavedThinkingEnabled() bool { // It mirrors the validation in buildAdditionalModelRequestFields but without // side effects (no logging), so it can safely be used to gate inference config. func (c *Client) isThinkingEnabled() bool { - if c.ModelConfig.ThinkingBudget == nil { - return false - } - tokens := c.ModelConfig.ThinkingBudget.Tokens - if t, ok := c.ModelConfig.ThinkingBudget.EffortTokens(); ok { - tokens = t - } - if tokens < 1024 { - return false - } - if c.ModelConfig.MaxTokens != nil && tokens >= int(*c.ModelConfig.MaxTokens) { - return false - } - return true + return c.thinkingConfig() != nil } func (c *Client) promptCachingEnabled() bool { @@ -355,45 +344,100 @@ func (c *Client) buildAdditionalModelRequestFields() document.Interface { } // Configure thinking budget if present and valid - if budget := c.ModelConfig.ThinkingBudget; budget != nil { - tokens := budget.Tokens - if t, ok := budget.EffortTokens(); ok { - tokens = t + if thinking := c.thinkingConfig(); thinking != nil { + fields["thinking"] = thinking.thinking + if thinking.outputEffort != "" { + fields["output_config"] = map[string]any{"effort": thinking.outputEffort} } - valid := tokens > 0 - if valid && tokens < 1024 { - slog.Warn("Bedrock thinking_budget below minimum (1024), ignoring", "tokens", tokens) - valid = false - } - if valid && c.ModelConfig.MaxTokens != nil && tokens >= int(*c.ModelConfig.MaxTokens) { - slog.Warn("Bedrock thinking_budget must be less than max_tokens, ignoring", - "thinking_budget", tokens, - "max_tokens", *c.ModelConfig.MaxTokens) - valid = false + switch { + case thinking.adaptive: + slog.Debug("Bedrock request using adaptive thinking", "effort", thinking.outputEffort) + case c.interleavedThinkingEnabled(): + fields["anthropic_beta"] = []string{"interleaved-thinking-2025-05-14"} + slog.Debug("Bedrock request using interleaved thinking beta") + default: + slog.Warn("Bedrock thinking_budget is set but interleaved_thinking is explicitly disabled; " + + "the anthropic_beta header will not be sent, which may cause the thinking budget to be ignored") } + } - if valid { - slog.Debug("Bedrock request using thinking_budget", "budget_tokens", tokens) - fields["thinking"] = map[string]any{ - "type": "enabled", - "budget_tokens": tokens, - } + if len(fields) == 0 { + return nil + } + return document.NewLazyDocument(fields) +} - if c.interleavedThinkingEnabled() { - fields["anthropic_beta"] = []string{"interleaved-thinking-2025-05-14"} - slog.Debug("Bedrock request using interleaved thinking beta") - } else { - slog.Warn("Bedrock thinking_budget is set but interleaved_thinking is explicitly disabled; " + - "the anthropic_beta header will not be sent, which may cause the thinking budget to be ignored") - } +type bedrockThinkingConfig struct { + thinking map[string]any + adaptive bool + outputEffort string +} + +func (c *Client) thinkingConfig() *bedrockThinkingConfig { + budget := c.ModelConfig.ThinkingBudget + if budget == nil || budget.IsDisabled() { + return nil + } + + if effortStr, ok := bedrockThinkingEffort(budget); ok { + return &bedrockThinkingConfig{ + thinking: map[string]any{"type": "adaptive"}, + adaptive: true, + outputEffort: effortStr, } } - if len(fields) == 0 { + if modelinfo.RejectsTokenThinking(c.ModelConfig.Model) { + if budget.Tokens <= 0 { + return nil + } + slog.Warn("Bedrock: model rejects token-based thinking budgets; switching to adaptive thinking", + "model", c.ModelConfig.Model, + "thinking_budget_tokens", budget.Tokens) + return &bedrockThinkingConfig{ + thinking: map[string]any{"type": "adaptive"}, + adaptive: true, + outputEffort: "high", + } + } + + tokens := budget.Tokens + if t, ok := budget.EffortTokens(); ok { + tokens = t + } + if tokens < 1024 { + slog.Warn("Bedrock thinking_budget below minimum (1024), ignoring", "tokens", tokens) return nil } - return document.NewLazyDocument(fields) + if c.ModelConfig.MaxTokens != nil && tokens >= int(*c.ModelConfig.MaxTokens) { + slog.Warn("Bedrock thinking_budget must be less than max_tokens, ignoring", + "thinking_budget", tokens, + "max_tokens", *c.ModelConfig.MaxTokens) + return nil + } + + slog.Debug("Bedrock request using thinking_budget", "budget_tokens", tokens) + return &bedrockThinkingConfig{ + thinking: map[string]any{ + "type": "enabled", + "budget_tokens": tokens, + }, + } +} + +func bedrockThinkingEffort(b *latest.ThinkingBudget) (string, bool) { + if b == nil { + return "", false + } + if e, ok := b.AdaptiveEffort(); ok { + return e, true + } + l, ok := b.EffortLevel() + if !ok { + return "", false + } + return effort.ForAnthropic(l) } func getProviderOpt[T any](opts map[string]any, key string) T { diff --git a/pkg/model/provider/bedrock/client_test.go b/pkg/model/provider/bedrock/client_test.go index fda3fc397..bdb60bf9e 100644 --- a/pkg/model/provider/bedrock/client_test.go +++ b/pkg/model/provider/bedrock/client_test.go @@ -2,6 +2,7 @@ package bedrock import ( "encoding/base64" + "encoding/json" "net/http" "net/http/httptest" "testing" @@ -643,6 +644,82 @@ func TestBuildAdditionalModelRequestFields_Enabled(t *testing.T) { result := client.buildAdditionalModelRequestFields() require.NotNil(t, result, "expected document for valid thinking_budget") + fields := documentToMap(t, result) + thinking, ok := fields["thinking"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "enabled", thinking["type"]) + assert.InDelta(t, float64(16384), thinking["budget_tokens"], 0) +} + +func documentToMap(t *testing.T, doc any) map[string]any { + t.Helper() + d, ok := doc.(interface{ MarshalSmithyDocument() ([]byte, error) }) + require.True(t, ok, "expected smithy document") + data, err := d.MarshalSmithyDocument() + require.NoError(t, err) + var result map[string]any + require.NoError(t, json.Unmarshal(data, &result)) + return result +} + +func TestBuildAdditionalModelRequestFields_AdaptiveForOpus48(t *testing.T) { + t.Parallel() + + maxTokens := int64(64000) + client := &Client{ + Config: base.Config{ + ModelConfig: latest.ModelConfig{ + Provider: "amazon-bedrock", + Model: "anthropic.claude-opus-4-8", + MaxTokens: &maxTokens, + ThinkingBudget: &latest.ThinkingBudget{ + Tokens: 32768, + }, + }, + }, + } + + result := client.buildAdditionalModelRequestFields() + + require.NotNil(t, result) + fields := documentToMap(t, result) + thinking, ok := fields["thinking"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "adaptive", thinking["type"]) + assert.NotContains(t, thinking, "budget_tokens") + outputConfig, ok := fields["output_config"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "high", outputConfig["effort"]) + assert.NotContains(t, fields, "anthropic_beta") +} + +func TestBuildAdditionalModelRequestFields_AdaptiveEffort(t *testing.T) { + t.Parallel() + + maxTokens := int64(64000) + client := &Client{ + Config: base.Config{ + ModelConfig: latest.ModelConfig{ + Provider: "amazon-bedrock", + Model: "anthropic.claude-opus-4-8", + MaxTokens: &maxTokens, + ThinkingBudget: &latest.ThinkingBudget{ + Effort: "adaptive/xhigh", + }, + }, + }, + } + + result := client.buildAdditionalModelRequestFields() + + require.NotNil(t, result) + fields := documentToMap(t, result) + thinking, ok := fields["thinking"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "adaptive", thinking["type"]) + outputConfig, ok := fields["output_config"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "xhigh", outputConfig["effort"]) } func TestBuildAdditionalModelRequestFields_Nil(t *testing.T) { diff --git a/pkg/modelinfo/modelinfo.go b/pkg/modelinfo/modelinfo.go index 827657c3b..229cfbae0 100644 --- a/pkg/modelinfo/modelinfo.go +++ b/pkg/modelinfo/modelinfo.go @@ -35,6 +35,7 @@ package modelinfo import ( "context" "log/slog" + "strconv" "strings" "time" @@ -87,19 +88,37 @@ func AlwaysReasons(modelID string) bool { // `thinking.type=enabled` (token-based extended thinking) and instead requires // `thinking.type=adaptive`. // -// Currently Claude Opus 4.6 and 4.7 (and dated variants like -// claude-opus-4-7-20251101). For these models the agent transparently -// switches a token-based budget to adaptive thinking. +// Claude Opus 4.7+ only supports adaptive thinking. Opus 4.6 also accepts +// adaptive thinking, but it still accepts token budgets, so it is not coerced. // // See https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking func RejectsTokenThinking(modelID string) bool { - m := normalize(modelID) - for _, prefix := range []string{"claude-opus-4-6", "claude-opus-4-7"} { - if m == prefix || strings.HasPrefix(m, prefix+"-") { - return true - } + m := normalizeClaudeName(modelID) + return matchesClaudeFamilyAtLeast(m, "opus", 4, 7) +} + +func matchesClaudeFamilyAtLeast(modelID, family string, major, minor int) bool { + prefix := "claude-" + family + "-" + if !strings.HasPrefix(modelID, prefix) { + return false } - return false + rest := strings.TrimPrefix(modelID, prefix) + parts := strings.Split(rest, "-") + if len(parts) < 2 || len(parts[1]) != 1 { + return false + } + gotMajor, err := strconv.Atoi(parts[0]) + if err != nil { + return false + } + gotMinor, err := strconv.Atoi(parts[1]) + if err != nil { + return false + } + if gotMajor != major { + return gotMajor > major + } + return gotMinor >= minor } // UsesThinkingLevel reports whether a Google Gemini model uses level-based @@ -198,6 +217,14 @@ func normalize(modelID string) string { return strings.ToLower(strings.TrimSpace(modelID)) } +func normalizeClaudeName(modelID string) string { + m := normalize(modelID) + if i := strings.Index(m, "claude-"); i >= 0 { + return m[i:] + } + return m +} + // isOSeries reports whether the (already-normalized) identifier names an // OpenAI o-series reasoning model (o1/o3/o4 and their variants). func isOSeries(m string) bool { diff --git a/pkg/modelinfo/modelinfo_test.go b/pkg/modelinfo/modelinfo_test.go index 5984591d4..75e3a9529 100644 --- a/pkg/modelinfo/modelinfo_test.go +++ b/pkg/modelinfo/modelinfo_test.go @@ -133,15 +133,17 @@ func TestRejectsTokenThinking(t *testing.T) { model string want bool }{ - {"claude-opus-4-6", true}, + {"claude-opus-4-6", false}, {"claude-opus-4-7", true}, - {"claude-opus-4-6-20251101", true}, + {"claude-opus-4-6-20251101", false}, {"claude-opus-4-7-20260101", true}, + {"claude-opus-4-8", true}, + {"claude-opus-4-8-20260601", true}, + {"claude-opus-4-9", true}, {"CLAUDE-OPUS-4-7", true}, // case-insensitive - {" claude-opus-4-6 ", true}, // trims whitespace + {" claude-opus-4-7 ", true}, // trims whitespace {"claude-opus-4-5", false}, {"claude-opus-4-5-20251015", false}, - {"claude-opus-4-8", false}, {"claude-sonnet-4-7", false}, {"claude-sonnet-4-5", false}, {"claude-haiku-4-5", false},