diff --git a/src/paperscout/errors.py b/src/paperscout/errors.py index 675e9e2..23b0ece 100644 --- a/src/paperscout/errors.py +++ b/src/paperscout/errors.py @@ -17,3 +17,11 @@ class FailureCategory(str, enum.Enum): class ConfigurationError(Exception): """Permanent misconfiguration (credentials, required integration settings).""" + + +class IndexRefreshError(Exception): + """Transient index download failure (retry-eligible).""" + + def __init__(self, category: FailureCategory, message: str = "") -> None: + super().__init__(message) + self.category = category diff --git a/src/paperscout/models.py b/src/paperscout/models.py index e63413d..3138787 100644 --- a/src/paperscout/models.py +++ b/src/paperscout/models.py @@ -192,6 +192,14 @@ class CycleStatus(str, Enum): FAILED = "failed" +@dataclass(frozen=True, slots=True) +class IndexRefreshResult: + """Outcome of ``WG21Index.refresh()`` / ``fetch()``.""" + + papers: dict[str, Paper] + stale: bool = False + + @dataclass(frozen=True, slots=True) class CycleResult: """Discriminated probe cycle result (success vs empty vs failed).""" diff --git a/src/paperscout/monitor.py b/src/paperscout/monitor.py index 643bd0b..4c8c703 100644 --- a/src/paperscout/monitor.py +++ b/src/paperscout/monitor.py @@ -18,8 +18,8 @@ from .concurrency import run_blocking_io from .config import Settings, settings -from .errors import ConfigurationError, FailureCategory -from .models import CycleResult, CycleStatus, Paper, PerUserMatches, ProbeHit +from .errors import ConfigurationError, FailureCategory, IndexRefreshError +from .models import CycleResult, CycleStatus, IndexRefreshResult, Paper, PerUserMatches, ProbeHit from .protocols import ( SOURCE_ISO_PROBE, SOURCE_OPEN_STD, @@ -187,6 +187,7 @@ def __init__( self._last_cycle_status: CycleStatus | None = None self._last_cycle_error: str | None = None self._last_ops_alert: float | None = None + self._index_stale = False self._health_lock = threading.Lock() self._health_snapshot: SchedulerSnapshot | None = None @@ -233,12 +234,24 @@ def _log_index_diff(self, diff: DiffResult) -> None: async def _poll_sources(self, *, baseline: bool = False) -> tuple[DiffResult, list[ProbeHit]]: diff = DiffResult(new_papers=[], updated_papers=[]) probe_hits: list[ProbeHit] = [] + self._index_stale = False for source in self.sources: if not self._source_enabled(source.source_id): continue - current = await source.fetch() + raw = await source.fetch() + if source.source_id == SOURCE_WG21_INDEX: + refresh_result = cast(IndexRefreshResult, raw) + current = refresh_result.papers + if refresh_result.stale: + log.warning( + "INDEX-STALE papers=%d (poll using persisted cache)", + len(current), + ) + self._index_stale = True + else: + current = raw if baseline: self._snapshots[source.source_id] = current if source.source_id == SOURCE_ISO_PROBE: @@ -253,8 +266,7 @@ async def _poll_sources(self, *, baseline: bool = False) -> tuple[DiffResult, li if source.source_id == SOURCE_WG21_INDEX: diff = result - papers = cast(dict[str, Paper], current) - log.info("INDEX-LOAD papers=%d", len(papers)) + log.info("INDEX-LOAD papers=%d", len(current)) self._log_index_diff(diff) elif source.source_id == SOURCE_ISO_PROBE: cycle = cast(CycleResult, current) @@ -293,6 +305,15 @@ def _mark_poll_successful_if_probe_ok(self) -> None: if self._last_cycle_status is not CycleStatus.FAILED: self._last_successful_poll = time.time() + def _advance_staleness_clock_if_ok(self) -> None: + """Advance staleness clock when index is fresh and probe cycle did not fail.""" + if self._index_stale: + return + if self.cfg.enable_iso_probe: + self._mark_poll_successful_if_probe_ok() + else: + self._last_successful_poll = time.time() + def _publish_health_snapshot(self) -> None: """Publish immutable snapshot for cross-thread health reads (event loop only).""" lsp = self._last_successful_poll @@ -371,11 +392,8 @@ async def poll_once(self) -> PollResult: if not self._seeded: seed_result = await self.seed() if not seed_result.had_prior_state: - if self.cfg.enable_iso_probe: - # Stats already recorded in seed() after run_cycle. - self._mark_poll_successful_if_probe_ok() - else: - self._last_successful_poll = time.time() + self._advance_staleness_clock_if_ok() + if not self.cfg.enable_iso_probe: self._record_probe_cycle_completion() self._publish_health_snapshot() return PollResult( @@ -415,11 +433,8 @@ async def poll_once(self) -> PollResult: ) if self.notify_callback: self.notify_callback(result) - if self.cfg.enable_iso_probe: - # Stats already recorded in seed() after run_cycle. - self._mark_poll_successful_if_probe_ok() - else: - self._last_successful_poll = time.time() + self._advance_staleness_clock_if_ok() + if not self.cfg.enable_iso_probe: self._record_probe_cycle_completion() self._publish_health_snapshot() return result @@ -512,11 +527,8 @@ async def poll_once(self) -> PollResult: len(dp_transitions), len(per_user_matches), ) - if self.cfg.enable_iso_probe: - # Stats already recorded above after run_cycle. - self._mark_poll_successful_if_probe_ok() - else: - self._last_successful_poll = time.time() + self._advance_staleness_clock_if_ok() + if not self.cfg.enable_iso_probe: self._record_probe_cycle_completion() self._publish_health_snapshot() return result @@ -581,6 +593,13 @@ async def run_forever(self, shutdown_event: asyncio.Event | None = None) -> None exc, ) return + except IndexRefreshError as exc: + log.error( + "POLL-ERROR failure_category=%s poll=%d %s", + exc.category.value, + self._poll_count, + exc, + ) except httpx.TimeoutException as exc: log.error( "POLL-ERROR failure_category=%s poll=%d %s", diff --git a/src/paperscout/sources.py b/src/paperscout/sources.py index e9475b7..501dd26 100644 --- a/src/paperscout/sources.py +++ b/src/paperscout/sources.py @@ -17,8 +17,8 @@ import httpx from .config import Settings, settings -from .errors import FailureCategory -from .models import CycleResult, CycleStatus, Paper, ProbeHit, Tier +from .errors import ConfigurationError, FailureCategory, IndexRefreshError +from .models import CycleResult, CycleStatus, IndexRefreshResult, Paper, ProbeHit, Tier from .protocols import SOURCE_ISO_PROBE, SOURCE_OPEN_STD, SOURCE_WG21_INDEX from .storage import PaperCache, ProbeState, UserWatchlist @@ -54,34 +54,51 @@ def __init__(self, pool, cfg: Settings | None = None): self._max_p: int = 0 # absolute highest P-number self._sorted_p_nums: list[int] = [] # sorted unique P-numbers, for gap analysis - async def refresh(self) -> dict[str, Paper]: + async def refresh(self) -> IndexRefreshResult: """Load index from cache or network; populate ``self.papers``.""" cached = self._cache.read_if_fresh() - if cached is not None: - log.info("Loaded %d entries from cache", len(cached)) - self.papers = self._parse_and_index(cached) - return self.papers - - raw = await self._download() - if raw is not None: - self._cache.write(raw) - log.info("Downloaded and cached %d entries", len(raw)) - self.papers = self._parse_and_index(raw) - return self.papers - - stale = self._cache.read() - if stale is not None: + if cached: + papers, max_rev, max_p = self._parse_raw(cached) + if papers: + self._commit_index(papers, max_rev, max_p) + log.info("Loaded %d entries from cache", len(papers)) + return IndexRefreshResult(self.papers, stale=False) + + raw, category = await self._download() + if raw: + papers, max_rev, max_p = self._parse_raw(raw) + if papers: + self._cache.write(raw) + self._commit_index(papers, max_rev, max_p) + log.info("Downloaded and cached %d entries", len(papers)) + return IndexRefreshResult(self.papers, stale=False) log.warning( - "INDEX-STALE-FALLBACK entries=%d (download failed; using persisted cache)", - len(stale), + "INDEX-FETCH failure_category=%s url=%s payload produced zero parseable papers", + FailureCategory.CONFIGURATION.value, + WG21_INDEX_URL, ) - self.papers = self._parse_and_index(stale) - return self.papers + category = FailureCategory.CONFIGURATION + + stale = self._cache.read() + if stale: + papers, max_rev, max_p = self._parse_raw(stale) + if papers: + self._commit_index(papers, max_rev, max_p) + log.warning( + "INDEX-STALE-FALLBACK entries=%d (download failed; using persisted cache)", + len(papers), + ) + return IndexRefreshResult(self.papers, stale=True) log.error("No index data available") - return self.papers + self._raise_on_download_failure(category or FailureCategory.CONFIGURATION) - async def _download(self) -> dict | None: + def _raise_on_download_failure(self, category: FailureCategory) -> None: + if category is FailureCategory.CONFIGURATION: + raise ConfigurationError("No index data available") + raise IndexRefreshError(category=category) + + async def _download(self) -> tuple[dict | None, FailureCategory | None]: timeout = httpx.Timeout(self._cfg.wg21_index_timeout_s) try: async with httpx.AsyncClient( @@ -93,9 +110,20 @@ async def _download(self) -> dict | None: resp.raise_for_status() data = resp.json() if isinstance(data, dict): - return data - log.warning("Index response is not a dict") - return None + if not data: + log.warning( + "INDEX-FETCH failure_category=%s url=%s empty index payload", + FailureCategory.CONFIGURATION.value, + WG21_INDEX_URL, + ) + return None, FailureCategory.CONFIGURATION + return data, None + log.warning( + "INDEX-FETCH failure_category=%s url=%s response is not a dict", + FailureCategory.CONFIGURATION.value, + WG21_INDEX_URL, + ) + return None, FailureCategory.CONFIGURATION except httpx.TimeoutException as exc: log.warning( "INDEX-FETCH failure_category=%s url=%s %s", @@ -103,7 +131,7 @@ async def _download(self) -> dict | None: WG21_INDEX_URL, exc, ) - return None + return None, FailureCategory.TIMEOUT except httpx.HTTPStatusError as exc: cat = ( FailureCategory.RATE_LIMIT @@ -116,17 +144,25 @@ async def _download(self) -> dict | None: WG21_INDEX_URL, exc.response.status_code, ) - return None - except (httpx.HTTPError, ValueError) as exc: + return None, cat + except httpx.HTTPError as exc: log.error( "INDEX-FETCH failure_category=%s url=%s %s", FailureCategory.NETWORK.value, WG21_INDEX_URL, exc, ) - return None + return None, FailureCategory.NETWORK + except ValueError as exc: + log.error( + "INDEX-FETCH failure_category=%s url=%s %s", + FailureCategory.CONFIGURATION.value, + WG21_INDEX_URL, + exc, + ) + return None, FailureCategory.CONFIGURATION - def _parse_and_index(self, raw: dict) -> dict[str, Paper]: + def _parse_raw(self, raw: dict) -> tuple[dict[str, Paper], dict[int, int], int]: papers: dict[str, Paper] = {} max_rev: dict[int, int] = {} max_p = 0 @@ -141,9 +177,17 @@ def _parse_and_index(self, raw: dict) -> dict[str, Paper]: prev = max_rev.get(paper.number, -1) if paper.revision > prev: max_rev[paper.number] = paper.revision + return papers, max_rev, max_p + + def _commit_index(self, papers: dict[str, Paper], max_rev: dict[int, int], max_p: int) -> None: + self.papers = papers self._max_rev = max_rev self._max_p = max_p self._sorted_p_nums = sorted(max_rev.keys()) + + def _parse_and_index(self, raw: dict) -> dict[str, Paper]: + papers, max_rev, max_p = self._parse_raw(raw) + self._commit_index(papers, max_rev, max_p) return papers def highest_p_number(self) -> int: @@ -188,8 +232,8 @@ def get_papers_snapshot(self) -> Mapping[str, Paper]: """Read-only mapping copy of ``papers`` (not the live dict object).""" return MappingProxyType(dict(self.papers)) - async def fetch(self) -> dict[str, Paper]: - """DataSource: load index and return the current paper map.""" + async def fetch(self) -> IndexRefreshResult: + """DataSource: load index and return papers plus stale signal.""" return await self.refresh() def diff( diff --git a/tests/test_callback_protocols.py b/tests/test_callback_protocols.py index bca724e..7a9e3bc 100644 --- a/tests/test_callback_protocols.py +++ b/tests/test_callback_protocols.py @@ -8,7 +8,7 @@ from pathlib import Path from unittest.mock import AsyncMock, MagicMock -from paperscout.models import CycleResult, CycleStatus +from paperscout.models import CycleResult, CycleStatus, IndexRefreshResult from paperscout.monitor import DiffResult, PollResult, Scheduler from paperscout.protocols import SOURCE_ISO_PROBE, SOURCE_WG21_INDEX, NotifyCallback, OpsAlertFn from paperscout.sources import ISOProber, WG21Index @@ -45,7 +45,7 @@ def on_ops_alert(message: str) -> None: wg21 = MagicMock(spec=WG21Index) wg21.source_id = SOURCE_WG21_INDEX - wg21.fetch = AsyncMock(return_value={}) + wg21.fetch = AsyncMock(return_value=IndexRefreshResult({}, stale=False)) wg21.diff = MagicMock(return_value=DiffResult([], [])) iso = MagicMock(spec=ISOProber) diff --git a/tests/test_datasource_protocol.py b/tests/test_datasource_protocol.py index 8f15021..cd84fcc 100644 --- a/tests/test_datasource_protocol.py +++ b/tests/test_datasource_protocol.py @@ -6,7 +6,7 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock -from paperscout.models import CycleResult, CycleStatus, Paper +from paperscout.models import CycleResult, CycleStatus, IndexRefreshResult, Paper from paperscout.monitor import DiffResult, Scheduler from paperscout.protocols import SOURCE_ISO_PROBE, SOURCE_WG21_INDEX, DataSource from paperscout.sources import ISOProber, WG21Index @@ -67,7 +67,7 @@ async def test_scheduler_polls_custom_datasource(self, fake_pool): ) wg21 = MagicMock(spec=WG21Index) wg21.source_id = SOURCE_WG21_INDEX - wg21.fetch = AsyncMock(return_value={}) + wg21.fetch = AsyncMock(return_value=IndexRefreshResult({}, stale=False)) wg21.diff = MagicMock(return_value=DiffResult([], [])) iso = MagicMock(spec=ISOProber) diff --git a/tests/test_monitor.py b/tests/test_monitor.py index 9711e1e..fe68369 100644 --- a/tests/test_monitor.py +++ b/tests/test_monitor.py @@ -11,10 +11,11 @@ import httpx import pytest -from paperscout.errors import ConfigurationError +from paperscout.errors import ConfigurationError, FailureCategory, IndexRefreshError from paperscout.models import ( CycleResult, CycleStatus, + IndexRefreshResult, MatchReason, Paper, PerUserMatches, @@ -29,7 +30,7 @@ from paperscout.protocols import SOURCE_ISO_PROBE, SOURCE_WG21_INDEX from paperscout.sources import ISOProber, WG21Index from paperscout.storage import ProbeState, UserWatchlist -from tests.conftest import make_test_settings +from tests.conftest import SAMPLE_INDEX_DATA, make_test_settings def _wait_for_timeout(awaitable, timeout=None): @@ -211,7 +212,7 @@ def _make_mock_wg21() -> MagicMock: mock.papers = {} async def _fetch(): - return dict(mock.papers) + return IndexRefreshResult(dict(mock.papers), stale=False) mock.fetch = AsyncMock(side_effect=_fetch) mock.diff = lambda previous, current: diff_snapshots(previous or {}, current) @@ -260,6 +261,22 @@ def _make_scheduler(fake_pool, **cfg_overrides): return scheduler, wg21, iso, user_watchlist, state +def _make_scheduler_with_real_wg21(fake_pool, **cfg_overrides): + cfg = make_test_settings(**cfg_overrides) + wg21 = WG21Index(fake_pool, cfg=cfg) + iso = _make_mock_iso() + user_watchlist = MagicMock(spec=UserWatchlist) + user_watchlist.matches_for_users.return_value = {} + state = ProbeState(fake_pool) + scheduler = Scheduler( + sources=[wg21, iso], + user_watchlist=user_watchlist, + state=state, + cfg=cfg, + ) + return scheduler, wg21, iso, user_watchlist, state + + class TestScheduler: async def test_poll_once_seeds_on_first_call(self, fake_pool): scheduler, index, prober, _, _ = _make_scheduler(fake_pool) @@ -778,3 +795,87 @@ def capture_wait_for(awaitable, timeout=None): assert len(slept) == 1 assert slept[0] == pytest.approx(300.0) + + async def test_poll_once_propagates_index_refresh_error(self, fake_pool): + scheduler, wg21, _, _, _ = _make_scheduler_with_real_wg21(fake_pool, enable_iso_probe=False) + req = MagicMock() + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=httpx.ReadTimeout("timed out", request=req)) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with pytest.raises(IndexRefreshError) as exc_info: + await scheduler.poll_once() + assert exc_info.value.category is FailureCategory.TIMEOUT + assert wg21.papers == {} + + async def test_poll_once_uses_stale_index(self, fake_pool, caplog): + scheduler, wg21, prober, _, _ = _make_scheduler_with_real_wg21(fake_pool) + wg21._cache.write(SAMPLE_INDEX_DATA) + wg21._cache.ttl_seconds = 0 + req = MagicMock() + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=httpx.ReadTimeout("timed out", request=req)) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with caplog.at_level(logging.WARNING, logger="paperscout.monitor"): + await scheduler.poll_once() + assert "P2300R10" in wg21.papers + assert "INDEX-STALE" in caplog.text + assert scheduler._last_successful_poll is None + prober.fetch.assert_called_once() + + prober.fetch.reset_mock() + await scheduler.poll_once() + assert scheduler._last_successful_poll is None + prober.fetch.assert_called_once() + + async def test_run_forever_continues_after_index_refresh_error(self, fake_pool, caplog): + scheduler, _, _, _, _ = _make_scheduler_with_real_wg21( + fake_pool, + enable_iso_probe=False, + poll_interval_minutes=0, + poll_overrun_cooldown_seconds=0, + ) + shutdown_event = asyncio.Event() + call_count = 0 + req = MagicMock() + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=httpx.ReadTimeout("timed out", request=req)) + real_poll_once = scheduler.poll_once + + async def counting_poll_once(): + nonlocal call_count + call_count += 1 + if call_count >= 2: + shutdown_event.set() + return await real_poll_once() + + scheduler.poll_once = counting_poll_once + with caplog.at_level(logging.ERROR, logger="paperscout.monitor"): + with patch("asyncio.wait_for", _wait_for_timeout): + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + await scheduler.run_forever(shutdown_event) + assert call_count == 2 + assert "failure_category=TIMEOUT" in caplog.text + + async def test_run_forever_halts_on_configuration_error_from_index(self, fake_pool, caplog): + scheduler, _, _, _, _ = _make_scheduler_with_real_wg21(fake_pool, enable_iso_probe=False) + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.json = MagicMock(return_value=[1, 2, 3]) + mock_resp.raise_for_status = MagicMock() + mock_resp.headers = {} + mock_client = AsyncMock() + mock_client.get = AsyncMock(return_value=mock_resp) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with caplog.at_level(logging.CRITICAL, logger="paperscout.monitor"): + with patch("asyncio.sleep", AsyncMock(side_effect=AssertionError("sleep reached"))): + await scheduler.run_forever() + assert "failure_category=CONFIGURATION" in caplog.text + assert "POLL-FATAL" in caplog.text diff --git a/tests/test_sources.py b/tests/test_sources.py index b8a714e..43756ba 100644 --- a/tests/test_sources.py +++ b/tests/test_sources.py @@ -12,6 +12,7 @@ import pytest from paperscout.config import override_settings +from paperscout.errors import ConfigurationError, FailureCategory, IndexRefreshError from paperscout.models import CycleStatus, Paper from paperscout.sources import ( ISOProber, @@ -92,33 +93,41 @@ def _old_lm() -> datetime: class TestWG21Index: async def test_refresh_downloads_when_no_cache(self, fake_pool): index = WG21Index(fake_pool) - with patch.object(index, "_download", AsyncMock(return_value=SAMPLE_INDEX_DATA)): - papers = await index.refresh() - assert "P2300R10" in papers - assert "N4950" in papers + with patch.object(index, "_download", AsyncMock(return_value=(SAMPLE_INDEX_DATA, None))): + result = await index.refresh() + assert result.stale is False + assert "P2300R10" in result.papers + assert "N4950" in result.papers async def test_refresh_uses_cache_when_fresh(self, fake_pool): index = WG21Index(fake_pool) index._cache.write(SAMPLE_INDEX_DATA) mock_download = AsyncMock() with patch.object(index, "_download", mock_download): - papers = await index.refresh() + result = await index.refresh() mock_download.assert_not_called() - assert "P2300R10" in papers + assert result.stale is False + assert "P2300R10" in result.papers async def test_refresh_falls_back_to_stale_cache(self, fake_pool): index = WG21Index(fake_pool) index._cache.write(SAMPLE_INDEX_DATA) index._cache.ttl_seconds = 0 - with patch.object(index, "_download", AsyncMock(return_value=None)): - papers = await index.refresh() - assert "P2300R10" in papers - - async def test_refresh_returns_empty_when_no_data(self, fake_pool): + with patch.object( + index, "_download", AsyncMock(return_value=(None, FailureCategory.NETWORK)) + ): + result = await index.refresh() + assert result.stale is True + assert "P2300R10" in result.papers + + async def test_refresh_raises_on_transient_failure_when_no_data(self, fake_pool): index = WG21Index(fake_pool) - with patch.object(index, "_download", AsyncMock(return_value=None)): - papers = await index.refresh() - assert papers == {} + with patch.object( + index, "_download", AsyncMock(return_value=(None, FailureCategory.NETWORK)) + ): + with pytest.raises(IndexRefreshError) as exc_info: + await index.refresh() + assert exc_info.value.category is FailureCategory.NETWORK async def test_download_success(self, fake_pool): index = WG21Index(fake_pool) @@ -127,18 +136,22 @@ async def test_download_success(self, fake_pool): with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) - result = await index._download() - assert result == SAMPLE_INDEX_DATA + data, category = await index._download() + assert data == SAMPLE_INDEX_DATA + assert category is None - async def test_download_non_dict_response(self, fake_pool): + async def test_download_non_dict_response(self, fake_pool, caplog): index = WG21Index(fake_pool) mock_resp = _make_response(200, json_data=[1, 2, 3]) mock_client = _make_async_client(get_resp=mock_resp) with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) - result = await index._download() - assert result is None + with caplog.at_level(logging.WARNING, logger="paperscout.sources"): + data, category = await index._download() + assert data is None + assert category is FailureCategory.CONFIGURATION + assert "failure_category=CONFIGURATION" in caplog.text async def test_download_http_error(self, fake_pool, caplog): index = WG21Index(fake_pool) @@ -148,8 +161,9 @@ async def test_download_http_error(self, fake_pool, caplog): mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) with caplog.at_level(logging.ERROR, logger="paperscout.sources"): - result = await index._download() - assert result is None + data, category = await index._download() + assert data is None + assert category is FailureCategory.NETWORK assert "failure_category=NETWORK" in caplog.text async def test_download_http_status_429_emits_rate_limit(self, fake_pool, caplog): @@ -160,8 +174,9 @@ async def test_download_http_status_429_emits_rate_limit(self, fake_pool, caplog mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) with caplog.at_level(logging.ERROR, logger="paperscout.sources"): - result = await index._download() - assert result is None + data, category = await index._download() + assert data is None + assert category is FailureCategory.RATE_LIMIT assert "failure_category=RATE_LIMIT" in caplog.text async def test_download_http_status_500_emits_network(self, fake_pool, caplog): @@ -172,8 +187,9 @@ async def test_download_http_status_500_emits_network(self, fake_pool, caplog): mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) with caplog.at_level(logging.ERROR, logger="paperscout.sources"): - result = await index._download() - assert result is None + data, category = await index._download() + assert data is None + assert category is FailureCategory.NETWORK assert "failure_category=NETWORK" in caplog.text async def test_download_uses_wg21_index_timeout_from_settings(self, fake_pool): @@ -204,12 +220,129 @@ async def test_refresh_timeout_then_stale_fallback(self, fake_pool, caplog): mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) with caplog.at_level(logging.WARNING, logger="paperscout.sources"): - papers = await index.refresh() + result = await index.refresh() - assert "P2300R10" in papers + assert result.stale is True + assert "P2300R10" in result.papers assert "failure_category=TIMEOUT" in caplog.text assert "INDEX-STALE-FALLBACK" in caplog.text + async def test_refresh_timeout_raises_when_no_stale_cache(self, fake_pool): + index = WG21Index(fake_pool) + req = MagicMock() + mock_client = AsyncMock() + mock_client.get = AsyncMock(side_effect=httpx.ReadTimeout("timed out", request=req)) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with pytest.raises(IndexRefreshError) as exc_info: + await index.refresh() + assert exc_info.value.category is FailureCategory.TIMEOUT + + async def test_refresh_429_raises_rate_limit(self, fake_pool): + index = WG21Index(fake_pool) + mock_resp = _make_response(429, json_data={}) + mock_client = _make_async_client(get_resp=mock_resp) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with pytest.raises(IndexRefreshError) as exc_info: + await index.refresh() + assert exc_info.value.category is FailureCategory.RATE_LIMIT + + async def test_refresh_stale_fallback_sets_stale_signal(self, fake_pool): + index = WG21Index(fake_pool) + index._cache.write(SAMPLE_INDEX_DATA) + index._cache.ttl_seconds = 0 + with patch.object( + index, "_download", AsyncMock(return_value=(None, FailureCategory.TIMEOUT)) + ): + result = await index.refresh() + assert result.stale is True + assert len(result.papers) > 0 + + async def test_refresh_no_data_raises_configuration_error(self, fake_pool): + index = WG21Index(fake_pool) + mock_resp = _make_response(200, json_data=[1, 2, 3]) + mock_client = _make_async_client(get_resp=mock_resp) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with pytest.raises(ConfigurationError, match="No index data available"): + await index.refresh() + + async def test_refresh_empty_cache_does_not_return_fresh_success(self, fake_pool): + index = WG21Index(fake_pool) + index._cache.write({}) + with patch.object( + index, "_download", AsyncMock(return_value=(None, FailureCategory.NETWORK)) + ): + with pytest.raises(IndexRefreshError) as exc_info: + await index.refresh() + assert exc_info.value.category is FailureCategory.NETWORK + + async def test_refresh_unparseable_fresh_cache_falls_through(self, fake_pool): + index = WG21Index(fake_pool) + index._cache.write({"bad": "not-a-dict", "also": 1}) + with patch.object( + index, "_download", AsyncMock(return_value=(None, FailureCategory.NETWORK)) + ): + with pytest.raises(IndexRefreshError) as exc_info: + await index.refresh() + assert exc_info.value.category is FailureCategory.NETWORK + + async def test_refresh_unparseable_download_preserves_stale_cache(self, fake_pool): + index = WG21Index(fake_pool) + index._cache.write(SAMPLE_INDEX_DATA) + index._cache.ttl_seconds = 0 + with patch.object( + index, + "_download", + AsyncMock(return_value=({"bad": "not-a-dict"}, None)), + ): + result = await index.refresh() + assert result.stale is True + assert "P2300R10" in result.papers + + async def test_refresh_rejected_parse_preserves_index_metadata(self, fake_pool): + index = WG21Index(fake_pool) + index._parse_and_index(SAMPLE_INDEX_DATA) + original_max_p = index._max_p + original_papers = dict(index.papers) + index._cache.write({"bad": "not-a-dict"}) + with patch.object( + index, "_download", AsyncMock(return_value=(None, FailureCategory.NETWORK)) + ): + with pytest.raises(IndexRefreshError): + await index.refresh() + assert index._max_p == original_max_p + assert index.papers == original_papers + + async def test_download_json_parse_error_emits_configuration(self, fake_pool, caplog): + index = WG21Index(fake_pool) + mock_resp = _make_response(200) + mock_resp.json = MagicMock(side_effect=ValueError("bad json")) + mock_client = _make_async_client(get_resp=mock_resp) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with caplog.at_level(logging.ERROR, logger="paperscout.sources"): + data, category = await index._download() + assert data is None + assert category is FailureCategory.CONFIGURATION + assert "failure_category=CONFIGURATION" in caplog.text + + async def test_refresh_malformed_json_raises_configuration_error(self, fake_pool): + index = WG21Index(fake_pool) + mock_resp = _make_response(200) + mock_resp.json = MagicMock(side_effect=ValueError("bad json")) + mock_client = _make_async_client(get_resp=mock_resp) + with patch("paperscout.sources.httpx.AsyncClient") as mock_cls: + mock_cls.return_value.__aenter__ = AsyncMock(return_value=mock_client) + mock_cls.return_value.__aexit__ = AsyncMock(return_value=False) + with pytest.raises(ConfigurationError, match="No index data available"): + await index.refresh() + def test_parse_and_index(self, fake_pool): index = WG21Index(fake_pool) papers = index._parse_and_index(SAMPLE_INDEX_DATA)