diff --git a/scripts/codex_audit_service.py b/scripts/codex_audit_service.py index f712281..0dc03e3 100644 --- a/scripts/codex_audit_service.py +++ b/scripts/codex_audit_service.py @@ -1,20 +1,9 @@ #!/usr/bin/env python3 -"""AI Gateway — unified service for LLM analysis and Codex execution. - -Architecture: - - POST /v1/codex-audit/jobs ──▶ AiGateway - { │ - "task": "analyze" | "execute", │──▶ LlmAdapter (API call, no repo) - "model": "claude-sonnet-4-6", │──▶ CodexAdapter (codex exec, needs repo) - "prompt": "...", │──▶ FutureAdapter (extensible) - ... │ - } │ - -Benefits: - - API keys live on the VPS (one place), not in N repos - - New AI backends = new adapter class, no service changes - - Same auth, same job lifecycle, same polling for all tasks +"""Codex audit service — authenticated VPS facade for Codex execution. + +The VPS service intentionally runs only Codex. Claude/GPT direct API fallbacks +remain in caller-side GitHub workflows/scripts so provider API keys do not live +in, or pass through, this service. """ from __future__ import annotations @@ -50,7 +39,7 @@ _JWKS_CACHE_EXPIRES_AT = 0.0 _JOB_WRITE_LOCK = threading.Lock() -SUPPORTED_TASKS = frozenset({"analyze", "execute"}) +SUPPORTED_TASKS = frozenset({"execute"}) AUDIT_EXECUTE_TASKS = frozenset({"monthly_snapshot_audit", "long_horizon_signal_shadow"}) CODEX_REVIEW_TASKS = frozenset({"pr_review", "review"}) SUPPORTED_MODES = frozenset({"review_only", "review_and_fix"}) @@ -59,7 +48,7 @@ TASK_COMPLEXITY_MEDIUM = "medium" TASK_COMPLEXITY_HIGH = "high" TASK_COMPLEXITY_LEVELS = (TASK_COMPLEXITY_LOW, TASK_COMPLEXITY_MEDIUM, TASK_COMPLEXITY_HIGH) -AI_GATEWAY_LLM_DEFAULT_MODEL = "claude-sonnet-4-6" +AI_GATEWAY_LLM_DEFAULT_MODEL = "gpt-5.4" AI_GATEWAY_LLM_DEFAULT_MODEL_LOW = os.environ.get( "AI_GATEWAY_LLM_LOW_COMPLEXITY_MODEL", "gpt-5.4-mini" ).strip() @@ -67,7 +56,7 @@ "AI_GATEWAY_LLM_MEDIUM_COMPLEXITY_MODEL", AI_GATEWAY_LLM_DEFAULT_MODEL ).strip() AI_GATEWAY_LLM_DEFAULT_MODEL_HIGH = os.environ.get( - "AI_GATEWAY_LLM_HIGH_COMPLEXITY_MODEL", "claude-fable-5" + "AI_GATEWAY_LLM_HIGH_COMPLEXITY_MODEL", "gpt-5.5" ).strip() @@ -77,10 +66,8 @@ class AiAdapter(ABC): """Base adapter for AI backends. - Each adapter implements one AI backend: - - LlmAdapter: calls Claude/GPT API directly (text-only, no repo) - - CodexAdapter: runs codex exec (code changes, repo checkout) - - FutureAdapter: your custom backend + The service currently exposes only the Codex backend. Caller-side scripts + may still perform Claude/GPT direct API fallback outside the VPS service. The adapter receives: - prompt: the full instruction text @@ -245,7 +232,6 @@ def _build_env(self) -> dict[str, str]: # ── Adapter Registry ───────────────────────────────────────────────── _ADAPTER_REGISTRY: dict[str, AiAdapter] = { - "analyze": LlmAdapter(), "execute": CodexAdapter(), } @@ -283,13 +269,7 @@ def resolve_adapter(task: str, model: str) -> AiAdapter: def _detect_task_from_model(model: str) -> str: - """Determine the task type from the model name. - Claude/GPT models → analyze (API call). - Others (codex, empty) → execute (codex CLI). - """ - m = (model or "").lower().strip() - if m.startswith("claude") or m.startswith("gpt"): - return "analyze" + """The VPS service is Codex-only; API model names do not select API adapters.""" return "execute" @@ -823,11 +803,11 @@ def _handle_post_job(self) -> None: # Execute tasks run async (slow, repo clone) self._handle_async_job(payload) else: - # Analyze tasks run sync (fast, API call) + # Non-Codex tasks are rejected during validation; this is defensive. self._handle_sync_task(payload) def _handle_sync_task(self, payload: dict[str, Any]) -> None: - """Analyze tasks: call LLM API directly, return result inline.""" + """Defensive path for unsupported sync tasks.""" try: output = _run_task(payload, repo_dir=None) _json_response(self, HTTPStatus.OK, {"status": "succeeded", "output": output}) diff --git a/tests/test_codex_audit_service_complexity.py b/tests/test_codex_audit_service_complexity.py index 327bd0d..1326c36 100644 --- a/tests/test_codex_audit_service_complexity.py +++ b/tests/test_codex_audit_service_complexity.py @@ -34,8 +34,8 @@ def setUp(self) -> None: self._orig_high_char = _service.TASK_COMPLEXITY_HIGH_PROMPT_THRESHOLD _service.AI_GATEWAY_LLM_DEFAULT_MODEL_LOW = "gpt-test-low" - _service.AI_GATEWAY_LLM_DEFAULT_MODEL_MEDIUM = "claude-test-medium" - _service.AI_GATEWAY_LLM_DEFAULT_MODEL_HIGH = "claude-test-high" + _service.AI_GATEWAY_LLM_DEFAULT_MODEL_MEDIUM = "gpt-test-medium" + _service.AI_GATEWAY_LLM_DEFAULT_MODEL_HIGH = "gpt-test-high" _service.TASK_COMPLEXITY_MEDIUM_LINE_THRESHOLD = 40 _service.TASK_COMPLEXITY_HIGH_LINE_THRESHOLD = 80 _service.TASK_COMPLEXITY_MEDIUM_PROMPT_THRESHOLD = 120 @@ -60,7 +60,7 @@ def test_review_alias_uses_complexity_routing(self) -> None: }, "review", ) - self.assertEqual(model, "claude-test-high") + self.assertEqual(model, "gpt-test-high") def test_auto_complexity_uses_prompt_estimation(self) -> None: payload = { @@ -70,16 +70,17 @@ def test_auto_complexity_uses_prompt_estimation(self) -> None: "changed_lines": 120, } model = _service._resolve_model(payload, "pr_review") - self.assertEqual(model, "claude-test-high") + self.assertEqual(model, "gpt-test-high") def test_task_requires_async_review_and_execute(self) -> None: self.assertTrue(_service._task_requires_async("pr_review")) self.assertTrue(_service._task_requires_async("review")) self.assertTrue(_service._task_requires_async("execute")) self.assertTrue(_service._task_requires_async("monthly_snapshot_audit")) - self.assertFalse(_service._task_requires_async("analyze")) self.assertEqual(_service._adapter_task("execute", ""), "execute") self.assertEqual(_service._adapter_task("pr_review", ""), "execute") + with self.assertRaisesRegex(ValueError, "Unsupported task='analyze'"): + _service.resolve_adapter("analyze", "") def test_direct_api_model_for_complexity_reads_provider_specific_env(self) -> None: with mock.patch.dict(