Skip to content
This repository was archived by the owner on Jul 3, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 13 additions & 33 deletions scripts/codex_audit_service.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,9 @@
#!/usr/bin/env python3
"""AI Gateway — unified service for LLM analysis and Codex execution.

Architecture:

POST /v1/codex-audit/jobs ──▶ AiGateway
{ │
"task": "analyze" | "execute", │──▶ LlmAdapter (API call, no repo)
"model": "claude-sonnet-4-6", │──▶ CodexAdapter (codex exec, needs repo)
"prompt": "...", │──▶ FutureAdapter (extensible)
... │
} │

Benefits:
- API keys live on the VPS (one place), not in N repos
- New AI backends = new adapter class, no service changes
- Same auth, same job lifecycle, same polling for all tasks
"""Codex audit service — authenticated VPS facade for Codex execution.

The VPS service intentionally runs only Codex. Claude/GPT direct API fallbacks
remain in caller-side GitHub workflows/scripts so provider API keys do not live
in, or pass through, this service.
"""

from __future__ import annotations
Expand Down Expand Up @@ -50,7 +39,7 @@
_JWKS_CACHE_EXPIRES_AT = 0.0
_JOB_WRITE_LOCK = threading.Lock()

SUPPORTED_TASKS = frozenset({"analyze", "execute"})
SUPPORTED_TASKS = frozenset({"execute"})
AUDIT_EXECUTE_TASKS = frozenset({"monthly_snapshot_audit", "long_horizon_signal_shadow"})
CODEX_REVIEW_TASKS = frozenset({"pr_review", "review"})
SUPPORTED_MODES = frozenset({"review_only", "review_and_fix"})
Expand All @@ -59,15 +48,15 @@
TASK_COMPLEXITY_MEDIUM = "medium"
TASK_COMPLEXITY_HIGH = "high"
TASK_COMPLEXITY_LEVELS = (TASK_COMPLEXITY_LOW, TASK_COMPLEXITY_MEDIUM, TASK_COMPLEXITY_HIGH)
AI_GATEWAY_LLM_DEFAULT_MODEL = "claude-sonnet-4-6"
AI_GATEWAY_LLM_DEFAULT_MODEL = "gpt-5.4"
AI_GATEWAY_LLM_DEFAULT_MODEL_LOW = os.environ.get(
"AI_GATEWAY_LLM_LOW_COMPLEXITY_MODEL", "gpt-5.4-mini"
).strip()
AI_GATEWAY_LLM_DEFAULT_MODEL_MEDIUM = os.environ.get(
"AI_GATEWAY_LLM_MEDIUM_COMPLEXITY_MODEL", AI_GATEWAY_LLM_DEFAULT_MODEL
).strip()
AI_GATEWAY_LLM_DEFAULT_MODEL_HIGH = os.environ.get(
"AI_GATEWAY_LLM_HIGH_COMPLEXITY_MODEL", "claude-fable-5"
"AI_GATEWAY_LLM_HIGH_COMPLEXITY_MODEL", "gpt-5.5"
).strip()


Expand All @@ -77,10 +66,8 @@
class AiAdapter(ABC):
"""Base adapter for AI backends.

Each adapter implements one AI backend:
- LlmAdapter: calls Claude/GPT API directly (text-only, no repo)
- CodexAdapter: runs codex exec (code changes, repo checkout)
- FutureAdapter: your custom backend
The service currently exposes only the Codex backend. Caller-side scripts
may still perform Claude/GPT direct API fallback outside the VPS service.

The adapter receives:
- prompt: the full instruction text
Expand Down Expand Up @@ -245,7 +232,6 @@ def _build_env(self) -> dict[str, str]:
# ── Adapter Registry ─────────────────────────────────────────────────

_ADAPTER_REGISTRY: dict[str, AiAdapter] = {
"analyze": LlmAdapter(),
"execute": CodexAdapter(),
}

Expand Down Expand Up @@ -283,13 +269,7 @@ def resolve_adapter(task: str, model: str) -> AiAdapter:


def _detect_task_from_model(model: str) -> str:
"""Determine the task type from the model name.
Claude/GPT models → analyze (API call).
Others (codex, empty) → execute (codex CLI).
"""
m = (model or "").lower().strip()
if m.startswith("claude") or m.startswith("gpt"):
return "analyze"
"""The VPS service is Codex-only; API model names do not select API adapters."""
return "execute"


Expand Down Expand Up @@ -823,11 +803,11 @@ def _handle_post_job(self) -> None:
# Execute tasks run async (slow, repo clone)
self._handle_async_job(payload)
else:
# Analyze tasks run sync (fast, API call)
# Non-Codex tasks are rejected during validation; this is defensive.
self._handle_sync_task(payload)

def _handle_sync_task(self, payload: dict[str, Any]) -> None:
"""Analyze tasks: call LLM API directly, return result inline."""
"""Defensive path for unsupported sync tasks."""
try:
output = _run_task(payload, repo_dir=None)
_json_response(self, HTTPStatus.OK, {"status": "succeeded", "output": output})
Expand Down
11 changes: 6 additions & 5 deletions tests/test_codex_audit_service_complexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def setUp(self) -> None:
self._orig_high_char = _service.TASK_COMPLEXITY_HIGH_PROMPT_THRESHOLD

_service.AI_GATEWAY_LLM_DEFAULT_MODEL_LOW = "gpt-test-low"
_service.AI_GATEWAY_LLM_DEFAULT_MODEL_MEDIUM = "claude-test-medium"
_service.AI_GATEWAY_LLM_DEFAULT_MODEL_HIGH = "claude-test-high"
_service.AI_GATEWAY_LLM_DEFAULT_MODEL_MEDIUM = "gpt-test-medium"
_service.AI_GATEWAY_LLM_DEFAULT_MODEL_HIGH = "gpt-test-high"
_service.TASK_COMPLEXITY_MEDIUM_LINE_THRESHOLD = 40
_service.TASK_COMPLEXITY_HIGH_LINE_THRESHOLD = 80
_service.TASK_COMPLEXITY_MEDIUM_PROMPT_THRESHOLD = 120
Expand All @@ -60,7 +60,7 @@ def test_review_alias_uses_complexity_routing(self) -> None:
},
"review",
)
self.assertEqual(model, "claude-test-high")
self.assertEqual(model, "gpt-test-high")

def test_auto_complexity_uses_prompt_estimation(self) -> None:
payload = {
Expand All @@ -70,16 +70,17 @@ def test_auto_complexity_uses_prompt_estimation(self) -> None:
"changed_lines": 120,
}
model = _service._resolve_model(payload, "pr_review")
self.assertEqual(model, "claude-test-high")
self.assertEqual(model, "gpt-test-high")

def test_task_requires_async_review_and_execute(self) -> None:
self.assertTrue(_service._task_requires_async("pr_review"))
self.assertTrue(_service._task_requires_async("review"))
self.assertTrue(_service._task_requires_async("execute"))
self.assertTrue(_service._task_requires_async("monthly_snapshot_audit"))
self.assertFalse(_service._task_requires_async("analyze"))
self.assertEqual(_service._adapter_task("execute", ""), "execute")
self.assertEqual(_service._adapter_task("pr_review", ""), "execute")
with self.assertRaisesRegex(ValueError, "Unsupported task='analyze'"):
_service.resolve_adapter("analyze", "")

def test_direct_api_model_for_complexity_reads_provider_specific_env(self) -> None:
with mock.patch.dict(
Expand Down
Loading