diff --git a/crates/goose-cli/src/commands/configure.rs b/crates/goose-cli/src/commands/configure.rs index ce6cb63acadc..3817d04cc8c1 100644 --- a/crates/goose-cli/src/commands/configure.rs +++ b/crates/goose-cli/src/commands/configure.rs @@ -776,8 +776,9 @@ pub async fn configure_provider_dialog() -> anyhow::Result { .item("low", "Low - Better latency, lighter reasoning", "") .item("medium", "Medium - Moderate thinking", "") .item("high", "High - Deep reasoning", "") + .item("xhigh", "XHigh - Extended coding and agentic work", "") .item("max", "Max - No constraints on thinking depth", "") - .initial_value("off") + .initial_value("high") .interact()?; config.set_goose_thinking_effort(effort)?; } diff --git a/crates/goose/src/config/base.rs b/crates/goose/src/config/base.rs index f26d9bdab0ef..0f83818737ea 100644 --- a/crates/goose/src/config/base.rs +++ b/crates/goose/src/config/base.rs @@ -1100,7 +1100,7 @@ config_value!(CURSOR_AGENT_COMMAND, String, "cursor-agent"); config_value!(CODEX_COMMAND, String, "codex"); config_value!(CODEX_ENABLE_SKILLS, String, "true"); config_value!(CODEX_SKIP_GIT_CHECK, String, "false"); -config_value!(CHATGPT_CODEX_REASONING_EFFORT, String, "medium"); +config_value!(CHATGPT_CODEX_REASONING_EFFORT, String); config_value!(GOOSE_SEARCH_PATHS, Vec); config_value!(GOOSE_MODE, GooseMode); diff --git a/crates/goose/src/model.rs b/crates/goose/src/model.rs index eeb54562ffaa..02b366a4c200 100644 --- a/crates/goose/src/model.rs +++ b/crates/goose/src/model.rs @@ -17,6 +17,7 @@ pub enum ThinkingEffort { Low, Medium, High, + XHigh, Max, } @@ -28,7 +29,8 @@ impl FromStr for ThinkingEffort { "low" => Ok(Self::Low), "medium" | "med" => Ok(Self::Medium), "high" => Ok(Self::High), - "max" | "xhigh" => Ok(Self::Max), + "xhigh" => Ok(Self::XHigh), + "max" => Ok(Self::Max), other => Err(format!("unknown thinking effort: '{other}'")), } } @@ -41,6 +43,7 @@ impl fmt::Display for ThinkingEffort { Self::Low => write!(f, "low"), Self::Medium => write!(f, "medium"), Self::High => write!(f, "high"), + Self::XHigh => write!(f, "xhigh"), Self::Max => write!(f, "max"), } } @@ -445,7 +448,7 @@ impl ModelConfig { "low" => ThinkingEffort::Low, "medium" => ThinkingEffort::Medium, "high" => ThinkingEffort::High, - "xhigh" => ThinkingEffort::Max, + "xhigh" => ThinkingEffort::XHigh, _ => return, }; self.model_name = parts[..parts.len() - 1].join("-"); @@ -794,7 +797,7 @@ mod tests { ]); let config = ModelConfig::new("gpt-5.4-xhigh").unwrap(); assert_eq!(config.model_name, "gpt-5.4"); - assert_eq!(config.thinking_effort(), Some(ThinkingEffort::Max)); + assert_eq!(config.thinking_effort(), Some(ThinkingEffort::XHigh)); } #[test] @@ -857,7 +860,7 @@ mod tests { ); assert_eq!("med".parse::(), Ok(ThinkingEffort::Medium)); assert_eq!("max".parse::(), Ok(ThinkingEffort::Max)); - assert_eq!("xhigh".parse::(), Ok(ThinkingEffort::Max)); + assert_eq!("xhigh".parse::(), Ok(ThinkingEffort::XHigh)); assert!("invalid".parse::().is_err()); } } diff --git a/crates/goose/src/providers/anthropic.rs b/crates/goose/src/providers/anthropic.rs index 3a990645522c..4b7a86f90513 100644 --- a/crates/goose/src/providers/anthropic.rs +++ b/crates/goose/src/providers/anthropic.rs @@ -30,6 +30,8 @@ const ANTHROPIC_PROVIDER_NAME: &str = "anthropic"; pub const ANTHROPIC_DEFAULT_MODEL: &str = "claude-sonnet-4-5"; const ANTHROPIC_DEFAULT_FAST_MODEL: &str = "claude-haiku-4-5"; const ANTHROPIC_KNOWN_MODELS: &[&str] = &[ + // Claude 4.7 models + "claude-opus-4-7", // Claude 4.6 models "claude-opus-4-6", "claude-sonnet-4-6", diff --git a/crates/goose/src/providers/chatgpt_codex.rs b/crates/goose/src/providers/chatgpt_codex.rs index 1aa29862b01b..2fc83c73b4d8 100644 --- a/crates/goose/src/providers/chatgpt_codex.rs +++ b/crates/goose/src/providers/chatgpt_codex.rs @@ -209,38 +209,41 @@ fn build_input_items(messages: &[Message]) -> Result> { Ok(items) } -fn get_reasoning_effort(model_name: &str) -> String { - let config = crate::config::Config::global(); - let effort = config - .get_chatgpt_codex_reasoning_effort() - .map(String::from) - .unwrap_or_else(|_| "medium".to_string()); - +fn configured_reasoning_effort(model_name: &str, config: &crate::config::Config) -> Option { + let effort = config.get_chatgpt_codex_reasoning_effort().ok()?; let valid_levels = reasoning_levels_for_model(model_name); + if valid_levels.contains(&effort.as_str()) { - effort + Some(effort) } else { tracing::warn!( - "Invalid CHATGPT_CODEX_REASONING_EFFORT '{}' for model '{}', using 'medium'", + "Invalid CHATGPT_CODEX_REASONING_EFFORT '{}' for model '{}', omitting reasoning effort", effort, model_name ); - "medium".to_string() + None } } fn reasoning_effort_for_config(model_config: &ModelConfig) -> Option { + reasoning_effort_for_config_with_config(model_config, crate::config::Config::global()) +} + +fn reasoning_effort_for_config_with_config( + model_config: &ModelConfig, + config: &crate::config::Config, +) -> Option { use crate::model::ThinkingEffort; - model_config - .thinking_effort() - .map(|effort| { + match model_config.thinking_effort() { + Some(effort) => { let valid_levels = reasoning_levels_for_model(&model_config.model_name); let preferred_levels: &[&str] = match effort { ThinkingEffort::Off => return None, ThinkingEffort::Low => &["low", "medium", "high", "xhigh"], ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"], ThinkingEffort::High => &["high", "medium", "xhigh", "low"], + ThinkingEffort::XHigh => &["xhigh", "high", "medium", "low"], ThinkingEffort::Max => &["xhigh", "high", "medium", "low"], }; @@ -248,8 +251,9 @@ fn reasoning_effort_for_config(model_config: &ModelConfig) -> Option { .iter() .find(|level| valid_levels.contains(level)) .map(|level| (*level).to_string()) - }) - .unwrap_or_else(|| Some(get_reasoning_effort(&model_config.model_name))) + } + None => configured_reasoning_effort(&model_config.model_name, config), + } } fn create_codex_request( @@ -1242,6 +1246,21 @@ mod tests { assert!(payload.get("reasoning_effort").is_none()); } + #[test] + fn test_create_codex_request_omits_reasoning_when_effort_unset() { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("CHATGPT_CODEX_REASONING_EFFORT", None::<&str>), + ]); + let config_file = tempfile::NamedTempFile::new().unwrap(); + let secrets_file = tempfile::NamedTempFile::new().unwrap(); + let empty_config = + crate::config::Config::new_with_file_secrets(config_file.path(), secrets_file.path()) + .unwrap(); + + assert!(configured_reasoning_effort("gpt-5.2-codex", &empty_config).is_none()); + } + #[test_case( JwtClaims { chatgpt_account_id: Some("account-1".to_string()), diff --git a/crates/goose/src/providers/codex.rs b/crates/goose/src/providers/codex.rs index 8b420d4146e3..0d2223f64c15 100644 --- a/crates/goose/src/providers/codex.rs +++ b/crates/goose/src/providers/codex.rs @@ -69,7 +69,7 @@ impl CodexProvider { "low" => Some(crate::model::ThinkingEffort::Low), "medium" => Some(crate::model::ThinkingEffort::Medium), "high" => Some(crate::model::ThinkingEffort::High), - "xhigh" => Some(crate::model::ThinkingEffort::Max), + "xhigh" => Some(crate::model::ThinkingEffort::XHigh), _ => None, }) } @@ -79,14 +79,12 @@ impl CodexProvider { effort: Option, ) -> Option { use crate::model::ThinkingEffort; - match effort - .or_else(Self::legacy_reasoning_effort) - .unwrap_or(ThinkingEffort::High) - { + match effort.or_else(Self::legacy_reasoning_effort)? { ThinkingEffort::Off => Some("none".to_string()), ThinkingEffort::Low => Some("low".to_string()), ThinkingEffort::Medium => Some("medium".to_string()), ThinkingEffort::High => Some("high".to_string()), + ThinkingEffort::XHigh => Some("xhigh".to_string()), ThinkingEffort::Max => Some("xhigh".to_string()), } } @@ -1259,7 +1257,7 @@ mod tests { ); assert_eq!( CodexProvider::map_thinking_effort("gpt-5.2-codex", None), - Some("high".to_string()) + None ); } diff --git a/crates/goose/src/providers/formats/anthropic.rs b/crates/goose/src/providers/formats/anthropic.rs index 92803dea4b9b..38a553d01c4b 100644 --- a/crates/goose/src/providers/formats/anthropic.rs +++ b/crates/goose/src/providers/formats/anthropic.rs @@ -69,7 +69,13 @@ impl AnthropicFormatOptions { pub fn supports_adaptive_thinking(model_name: &str) -> bool { let lower = model_name.to_lowercase(); - lower.contains("claude-opus-4-6") || lower.contains("claude-sonnet-4-6") + lower.contains("claude-opus-4-7") + || lower.contains("claude-opus-4-6") + || lower.contains("claude-sonnet-4-6") +} + +fn is_claude_opus_47(model_name: &str) -> bool { + model_name.to_lowercase().contains("claude-opus-4-7") } pub fn thinking_type(model_config: &ModelConfig) -> ThinkingType { @@ -112,6 +118,7 @@ const IS_ERROR_FIELD: &str = "is_error"; const SIGNATURE_FIELD: &str = "signature"; const DATA_FIELD: &str = "data"; const EVENT_MESSAGE_START: &str = "message_start"; +const DEFAULT_PRESERVED_THINKING_BUDGET_TOKENS: i32 = 16000; const EVENT_MESSAGE_DELTA: &str = "message_delta"; const EVENT_MESSAGE_STOP: &str = "message_stop"; const EVENT_CONTENT_BLOCK_START: &str = "content_block_start"; @@ -491,36 +498,90 @@ pub fn get_usage(data: &Value) -> Result { } } -pub fn thinking_effort(model_config: &ModelConfig) -> ThinkingEffort { - model_config - .thinking_effort() - .unwrap_or(ThinkingEffort::High) +pub fn thinking_effort(model_config: &ModelConfig) -> Option { + model_config.thinking_effort() +} + +pub fn adaptive_effort_value(model_config: &ModelConfig) -> Option<&'static str> { + adaptive_effort_value_for_model(&model_config.model_name, model_config.thinking_effort()) +} + +fn adaptive_effort_value_for_model( + model_name: &str, + effort: Option, +) -> Option<&'static str> { + match effort? { + ThinkingEffort::Off => None, + ThinkingEffort::Low => Some("low"), + ThinkingEffort::Medium => Some("medium"), + ThinkingEffort::High => Some("high"), + ThinkingEffort::XHigh if is_claude_opus_47(model_name) => Some("xhigh"), + ThinkingEffort::XHigh => Some("high"), + ThinkingEffort::Max => Some("max"), + } +} + +pub fn thinking_budget_tokens(model_config: &ModelConfig) -> Option { + thinking_budget_tokens_for_effort(model_config, model_config.thinking_effort()) } -pub fn thinking_budget_tokens(model_config: &ModelConfig) -> i32 { - if let Some(request_param) = model_config +fn preserved_thinking_budget_tokens(model_config: &ModelConfig) -> i32 { + let request_budget = model_config .request_params .as_ref() .and_then(|params| params.get("budget_tokens")) - .and_then(|v| serde_json::from_value::(v.clone()).ok()) - { - return request_param.max(1024); + .and_then(|v| serde_json::from_value::(v.clone()).ok()); + + preserved_thinking_budget_tokens_for_values( + request_budget, + legacy_thinking_budget_tokens(), + model_config.thinking_effort(), + ) +} + +fn thinking_budget_tokens_for_effort( + model_config: &ModelConfig, + effort: Option, +) -> Option { + let request_budget = model_config + .request_params + .as_ref() + .and_then(|params| params.get("budget_tokens")) + .and_then(|v| serde_json::from_value::(v.clone()).ok()); + + thinking_budget_tokens_for_values(request_budget, legacy_thinking_budget_tokens(), effort) +} + +fn thinking_budget_tokens_for_values( + request_budget: Option, + legacy_budget: Option, + effort: Option, +) -> Option { + if let Some(request_budget) = request_budget { + return Some(request_budget.max(1024)); } - if let Some(budget) = legacy_thinking_budget_tokens() { - return budget; + if let Some(legacy_budget) = legacy_budget { + return Some(legacy_budget); } - let effort = model_config - .thinking_effort() - .unwrap_or(ThinkingEffort::High); - match effort { + Some(match effort? { ThinkingEffort::Off => 1024, ThinkingEffort::Low => 4000, ThinkingEffort::Medium => 10000, ThinkingEffort::High => 16000, + ThinkingEffort::XHigh => 24000, ThinkingEffort::Max => 32000, - } + }) +} + +fn preserved_thinking_budget_tokens_for_values( + request_budget: Option, + legacy_budget: Option, + effort: Option, +) -> i32 { + thinking_budget_tokens_for_values(request_budget, legacy_budget, effort) + .unwrap_or(DEFAULT_PRESERVED_THINKING_BUDGET_TOKENS) } fn legacy_thinking_budget_tokens() -> Option { @@ -542,36 +603,50 @@ fn apply_thinking_config( let obj = payload.as_object_mut().unwrap(); match thinking_type(model_config) { ThinkingType::Adaptive => { - obj.insert("thinking".to_string(), json!({"type": "adaptive"})); - let effort = thinking_effort(model_config).to_string(); - obj.insert("output_config".to_string(), json!({"effort": effort})); - } - ThinkingType::Enabled => { - let budget_tokens = thinking_budget_tokens(model_config); - - obj.insert("max_tokens".to_string(), json!(max_tokens + budget_tokens)); obj.insert( "thinking".to_string(), - json!({ - "type": "enabled", - "budget_tokens": budget_tokens - }), + json!({"type": "adaptive", "display": "summarized"}), ); + if let Some(effort) = adaptive_effort_value(model_config) { + obj.insert("output_config".to_string(), json!({"effort": effort})); + } + } + ThinkingType::Enabled => { + if let Some(budget_tokens) = thinking_budget_tokens(model_config) { + obj.insert("max_tokens".to_string(), json!(max_tokens + budget_tokens)); + obj.insert( + "thinking".to_string(), + json!({ + "type": "enabled", + "budget_tokens": budget_tokens + }), + ); + } } ThinkingType::Disabled => {} } if options.preserve_thinking_context { if !obj.contains_key("thinking") { - let budget_tokens = thinking_budget_tokens(model_config); - obj.insert("max_tokens".to_string(), json!(max_tokens + budget_tokens)); - obj.insert( - "thinking".to_string(), - json!({ - "type": "enabled", - "budget_tokens": budget_tokens - }), - ); + if supports_adaptive_thinking(&model_config.model_name) { + obj.insert( + "thinking".to_string(), + json!({"type": "adaptive", "display": "summarized"}), + ); + if let Some(effort) = adaptive_effort_value(model_config) { + obj.insert("output_config".to_string(), json!({"effort": effort})); + } + } else { + let budget_tokens = preserved_thinking_budget_tokens(model_config); + obj.insert("max_tokens".to_string(), json!(max_tokens + budget_tokens)); + obj.insert( + "thinking".to_string(), + json!({ + "type": "enabled", + "budget_tokens": budget_tokens + }), + ); + } } if let Some(thinking) = obj.get_mut("thinking").and_then(|t| t.as_object_mut()) { @@ -634,10 +709,17 @@ pub fn create_request_with_options( } if let Some(temp) = model_config.temperature { - payload - .as_object_mut() - .unwrap() - .insert("temperature".to_string(), json!(temp)); + if is_claude_opus_47(&model_config.model_name) { + tracing::warn!( + "Temperature is not supported for {}, omitting configured temperature", + model_config.model_name + ); + } else { + payload + .as_object_mut() + .unwrap() + .insert("temperature".to_string(), json!(temp)); + } } apply_thinking_config(&mut payload, model_config, max_tokens, options); @@ -1185,12 +1267,86 @@ mod tests { let payload = create_request(&config, "system", &messages, &[])?; assert_eq!(payload["thinking"]["type"], "adaptive"); + assert_eq!(payload["thinking"]["display"], "summarized"); assert_eq!(payload["output_config"]["effort"], "high"); assert!(payload.get("budget_tokens").is_none()); Ok(()) } + #[test] + fn test_create_request_adaptive_thinking_for_opus_47_max_effort() -> Result<()> { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + + let mut config = cfg_with_effort("claude-opus-4-7", "max"); + config.max_tokens = Some(4096); + let messages = vec![Message::user().with_text("Hello")]; + let payload = create_request(&config, "system", &messages, &[])?; + + assert_eq!(payload["thinking"]["type"], "adaptive"); + assert_eq!(payload["thinking"]["display"], "summarized"); + assert_eq!(payload["output_config"]["effort"], "max"); + assert_eq!(payload["max_tokens"], 4096); + + Ok(()) + } + + #[test] + fn test_create_request_adaptive_thinking_for_opus_47_xhigh_effort() -> Result<()> { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + + let mut config = cfg_with_effort("claude-opus-4-7", "xhigh"); + config.max_tokens = Some(4096); + let messages = vec![Message::user().with_text("Hello")]; + let payload = create_request(&config, "system", &messages, &[])?; + + assert_eq!(payload["thinking"]["type"], "adaptive"); + assert_eq!(payload["thinking"]["display"], "summarized"); + assert_eq!(payload["output_config"]["effort"], "xhigh"); + assert_eq!(payload["max_tokens"], 4096); + + Ok(()) + } + + #[test] + fn test_create_request_adaptive_thinking_for_46_xhigh_effort_downgrades() -> Result<()> { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + + let mut config = cfg_with_effort("claude-sonnet-4-6", "xhigh"); + config.max_tokens = Some(4096); + let messages = vec![Message::user().with_text("Hello")]; + let payload = create_request(&config, "system", &messages, &[])?; + + assert_eq!(payload["thinking"]["type"], "adaptive"); + assert_eq!(payload["thinking"]["display"], "summarized"); + assert_eq!(payload["output_config"]["effort"], "high"); + assert_eq!(payload["max_tokens"], 4096); + + Ok(()) + } + + #[test] + fn test_adaptive_effort_omits_unset_default() { + assert_eq!( + adaptive_effort_value_for_model("claude-opus-4-7", None), + None + ); + } + + #[test] + fn test_create_request_omits_temperature_for_opus_47() -> Result<()> { + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + + let mut config = cfg("claude-opus-4-7"); + config.temperature = Some(0.2); + let messages = vec![Message::user().with_text("Hello")]; + let payload = create_request(&config, "system", &messages, &[])?; + + assert!(payload.get("temperature").is_none()); + + Ok(()) + } + #[test] fn test_create_request_enabled_thinking_with_budget() -> Result<()> { let _guard = env_lock::lock_env([ @@ -1230,8 +1386,10 @@ mod tests { } #[test] - fn test_create_request_preserves_thinking_context_for_compatible_models() -> Result<()> { + fn test_create_request_preserves_thinking_context_without_effort_for_compatible_models( + ) -> Result<()> { let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), ("CLAUDE_THINKING_TYPE", None::<&str>), ("CLAUDE_THINKING_ENABLED", None::<&str>), ("ANTHROPIC_THINKING_BUDGET", None::<&str>), @@ -1271,6 +1429,19 @@ mod tests { Ok(()) } + #[test] + fn test_thinking_budget_omits_unset_default() { + assert_eq!(thinking_budget_tokens_for_values(None, None, None), None); + } + + #[test] + fn test_preserved_thinking_context_uses_budget_when_effort_unset() { + assert_eq!( + preserved_thinking_budget_tokens_for_values(None, None, None), + DEFAULT_PRESERVED_THINKING_BUDGET_TOKENS + ); + } + #[test] fn test_create_request_model_params_enable_preserved_thinking_context() -> Result<()> { let _guard = env_lock::lock_env([ @@ -1284,6 +1455,7 @@ mod tests { let mut params = std::collections::HashMap::new(); params.insert("preserve_thinking_context".to_string(), json!(true)); + params.insert("thinking_effort".to_string(), json!("high")); let mut config = cfg("glm-4.7"); config.request_params = Some(params); @@ -1301,6 +1473,47 @@ mod tests { Ok(()) } + #[test] + fn test_create_request_preserves_thinking_context_with_opus_47_adaptive() -> Result<()> { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("CLAUDE_THINKING_TYPE", None::<&str>), + ("CLAUDE_THINKING_ENABLED", None::<&str>), + ("ANTHROPIC_THINKING_BUDGET", None::<&str>), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ("ANTHROPIC_PRESERVE_THINKING_CONTEXT", None::<&str>), + ("ANTHROPIC_PRESERVE_UNSIGNED_THINKING", None::<&str>), + ]); + + let mut config = cfg_with_effort("claude-opus-4-7", "max"); + config.max_tokens = Some(4096); + let messages = vec![ + Message::assistant().with_content(MessageContent::thinking("internal", "")), + Message::user().with_text("Continue"), + ]; + + let payload = create_request_with_options( + &config, + "system", + &messages, + &[], + AnthropicFormatOptions { + preserve_unsigned_thinking: true, + preserve_thinking_context: true, + }, + )?; + + assert_eq!(payload["thinking"]["type"], "adaptive"); + assert_eq!(payload["thinking"]["display"], "summarized"); + assert_eq!(payload["thinking"]["clear_thinking"], false); + assert_eq!(payload["output_config"]["effort"], "max"); + assert!(payload["thinking"].get("budget_tokens").is_none()); + assert_eq!(payload["max_tokens"], 4096); + assert_eq!(payload["messages"][0]["content"][0]["type"], "thinking"); + + Ok(()) + } + #[test] fn test_tool_error_handling_maintains_pairing() { use crate::conversation::message::Message; @@ -1451,6 +1664,10 @@ mod tests { thinking_type(&cfg_with_effort("claude-opus-4-6", "high")), ThinkingType::Adaptive ); + assert_eq!( + thinking_type(&cfg_with_effort("claude-opus-4-7", "max")), + ThinkingType::Adaptive + ); // Adaptive model with off → disabled assert_eq!( thinking_type(&cfg_with_effort("claude-opus-4-6", "off")), @@ -1476,7 +1693,7 @@ mod tests { ("CLAUDE_THINKING_BUDGET", None::<&str>), ]); let config = cfg_with_effort("claude-3-7-sonnet-20250219", "high"); - assert_eq!(thinking_budget_tokens(&config), 8192); + assert_eq!(thinking_budget_tokens(&config), Some(8192)); } #[test] diff --git a/crates/goose/src/providers/formats/databricks.rs b/crates/goose/src/providers/formats/databricks.rs index 7918c75004eb..ca982b9c4104 100644 --- a/crates/goose/src/providers/formats/databricks.rs +++ b/crates/goose/src/providers/formats/databricks.rs @@ -1,7 +1,7 @@ use crate::conversation::message::{Message, MessageContent}; use crate::model::ModelConfig; use crate::providers::formats::anthropic::{ - thinking_budget_tokens, thinking_effort, thinking_type, ThinkingType, + adaptive_effort_value, thinking_budget_tokens, thinking_type, ThinkingType, }; use crate::providers::utils::{ convert_image, detect_image_path, extract_reasoning_effort, is_openai_responses_model, @@ -237,27 +237,27 @@ fn apply_claude_thinking_config(payload: &mut Value, model_config: &ModelConfig) match thinking_type(model_config) { ThinkingType::Adaptive => { obj.insert("thinking".to_string(), json!({ "type": "adaptive" })); - obj.insert( - "output_config".to_string(), - json!({ "effort": thinking_effort(model_config).to_string() }), - ); + if let Some(effort) = adaptive_effort_value(model_config) { + obj.insert("output_config".to_string(), json!({ "effort": effort })); + } obj.insert( "max_completion_tokens".to_string(), json!(model_config.max_output_tokens()), ); } ThinkingType::Enabled => { - let budget_tokens = thinking_budget_tokens(model_config); - let max_tokens = model_config.max_output_tokens() + budget_tokens; - obj.insert("max_tokens".to_string(), json!(max_tokens)); - obj.insert( - "thinking".to_string(), - json!({ - "type": "enabled", - "budget_tokens": budget_tokens - }), - ); - obj.insert("temperature".to_string(), json!(2)); + if let Some(budget_tokens) = thinking_budget_tokens(model_config) { + let max_tokens = model_config.max_output_tokens() + budget_tokens; + obj.insert("max_tokens".to_string(), json!(max_tokens)); + obj.insert( + "thinking".to_string(), + json!({ + "type": "enabled", + "budget_tokens": budget_tokens + }), + ); + obj.insert("temperature".to_string(), json!(2)); + } } ThinkingType::Disabled => { if let Some(temp) = model_config.temperature { @@ -1114,19 +1114,12 @@ mod tests { #[test] fn test_create_request_reasoning_effort_xhigh() -> anyhow::Result<()> { - let model_config = ModelConfig { - model_name: "o3-xhigh".to_string(), - context_limit: Some(4096), - temperature: None, - max_tokens: Some(1024), - toolshim: false, - toolshim_model: None, - fast_model_config: None, - request_params: None, - reasoning: None, - }; + let _guard = env_lock::lock_env([("GOOSE_THINKING_EFFORT", None::<&str>)]); + + let mut model_config = ModelConfig::new_or_fail("gpt-5.4-xhigh"); + model_config.max_tokens = Some(1024); let request = create_request(&model_config, "system", &[], &[], &ImageFormat::OpenAi)?; - assert_eq!(request["model"], "o3"); + assert_eq!(request["model"], "gpt-5.4"); assert_eq!(request["reasoning_effort"], "xhigh"); Ok(()) } @@ -1209,10 +1202,17 @@ mod tests { #[test] fn test_create_request_enabled_thinking_budget_tracks_effort() -> anyhow::Result<()> { + let _guard = env_lock::lock_env([ + ("GOOSE_THINKING_EFFORT", None::<&str>), + ("ANTHROPIC_THINKING_BUDGET", None::<&str>), + ("CLAUDE_THINKING_BUDGET", None::<&str>), + ]); + for (effort, expected_budget) in [ ("low", 4000), ("medium", 10000), ("high", 16000), + ("xhigh", 24000), ("max", 32000), ] { let mut model_config = ModelConfig::new_or_fail("databricks-claude-3-7-sonnet"); diff --git a/crates/goose/src/providers/formats/google.rs b/crates/goose/src/providers/formats/google.rs index b35c2db504a0..b0de7afebaeb 100644 --- a/crates/goose/src/providers/formats/google.rs +++ b/crates/goose/src/providers/formats/google.rs @@ -553,7 +553,9 @@ fn get_thinking_config(model_config: &ModelConfig) -> Option { ThinkingEffort::Off | ThinkingEffort::Low | ThinkingEffort::Medium => { ThinkingLevel::Low } - ThinkingEffort::High | ThinkingEffort::Max => ThinkingLevel::High, + ThinkingEffort::High | ThinkingEffort::XHigh | ThinkingEffort::Max => { + ThinkingLevel::High + } }; Some(ThinkingConfig { diff --git a/crates/goose/src/providers/formats/openrouter.rs b/crates/goose/src/providers/formats/openrouter.rs index 22ac7465b252..d7d7ef35bae6 100644 --- a/crates/goose/src/providers/formats/openrouter.rs +++ b/crates/goose/src/providers/formats/openrouter.rs @@ -94,6 +94,7 @@ fn reasoning_effort_for_openrouter(effort: ThinkingEffort) -> &'static str { ThinkingEffort::Low => "low", ThinkingEffort::Medium => "medium", ThinkingEffort::High => "high", + ThinkingEffort::XHigh => "xhigh", ThinkingEffort::Max => "xhigh", } } diff --git a/crates/goose/src/providers/utils.rs b/crates/goose/src/providers/utils.rs index 87be4af7515f..1c18d3a105cc 100644 --- a/crates/goose/src/providers/utils.rs +++ b/crates/goose/src/providers/utils.rs @@ -251,6 +251,7 @@ pub fn openai_reasoning_effort_for_thinking( ThinkingEffort::Low => &["low", "medium", "high", "xhigh"], ThinkingEffort::Medium => &["medium", "high", "low", "xhigh"], ThinkingEffort::High => &["high", "medium", "xhigh", "low"], + ThinkingEffort::XHigh => &["xhigh", "high", "medium", "low"], ThinkingEffort::Max => &["xhigh", "high", "medium", "low"], }; diff --git a/ui/desktop/openapi.json b/ui/desktop/openapi.json index 7cebf3353e59..90f267e14516 100644 --- a/ui/desktop/openapi.json +++ b/ui/desktop/openapi.json @@ -8676,6 +8676,7 @@ "low", "medium", "high", + "xhigh", "max" ] }, diff --git a/ui/desktop/src/api/types.gen.ts b/ui/desktop/src/api/types.gen.ts index b5535a239fb6..10463baaefb0 100644 --- a/ui/desktop/src/api/types.gen.ts +++ b/ui/desktop/src/api/types.gen.ts @@ -1502,7 +1502,7 @@ export type ThinkingContent = { thinking: string; }; -export type ThinkingEffort = 'off' | 'low' | 'medium' | 'high' | 'max'; +export type ThinkingEffort = 'off' | 'low' | 'medium' | 'high' | 'xhigh' | 'max'; export type TokenState = { accumulatedCost?: number | null; diff --git a/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx b/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx index e127fd5e92da..0e831c010b1b 100644 --- a/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx +++ b/ui/desktop/src/components/settings/models/subcomponents/SwitchModelModal.tsx @@ -26,6 +26,8 @@ import { getPredefinedModelsFromEnv, shouldShowPredefinedModels } from '../prede import type { ProviderType, ThinkingEffort } from '../../../../api'; import { trackModelChanged } from '../../../../utils/analytics'; +const DEFAULT_THINKING_EFFORT: ThinkingEffort = 'high'; + const i18n = defineMessages({ thinkingEffortOff: { id: 'switchModelModal.thinkingEffortOff', @@ -51,6 +53,10 @@ const i18n = defineMessages({ id: 'switchModelModal.claudeEffortHigh', defaultMessage: 'High - Deep reasoning (default)', }, + claudeEffortXHigh: { + id: 'switchModelModal.claudeEffortXHigh', + defaultMessage: 'XHigh - Extended coding and agentic work', + }, claudeEffortMax: { id: 'switchModelModal.claudeEffortMax', defaultMessage: 'Max - No constraints on thinking depth', @@ -260,6 +266,7 @@ export const SwitchModelModal = ({ { value: 'low', label: intl.formatMessage(i18n.claudeEffortLow) }, { value: 'medium', label: intl.formatMessage(i18n.claudeEffortMedium) }, { value: 'high', label: intl.formatMessage(i18n.claudeEffortHigh) }, + { value: 'xhigh', label: intl.formatMessage(i18n.claudeEffortXHigh) }, { value: 'max', label: intl.formatMessage(i18n.claudeEffortMax) }, ]; @@ -413,12 +420,16 @@ export const SwitchModelModal = ({ }; if (showThinkingControl) { - const effort = thinkingEffort ?? modelObj.request_params?.thinking_effort ?? 'off'; - modelObj = { - ...modelObj, - request_params: { ...modelObj.request_params, thinking_effort: effort }, - }; - upsert('GOOSE_THINKING_EFFORT', effort, false).catch(console.warn); + const effort = (thinkingEffort ?? + modelObj.request_params?.thinking_effort ?? + DEFAULT_THINKING_EFFORT) as ThinkingEffort; + if (effort) { + modelObj = { + ...modelObj, + request_params: { ...modelObj.request_params, thinking_effort: effort }, + }; + upsert('GOOSE_THINKING_EFFORT', effort, false).catch(console.warn); + } } const success = await changeModel(sessionId, modelObj); @@ -704,6 +715,11 @@ export const SwitchModelModal = ({ } }; + const selectedThinkingEffort = + thinkingEffort ?? + (selectedPredefinedModel?.request_params?.thinking_effort as ThinkingEffort | undefined) ?? + DEFAULT_THINKING_EFFORT; + const thinkingEffortControl = showThinkingControl && (