From e2325734fbba44153a90d188705dbc9fccea804e Mon Sep 17 00:00:00 2001 From: kirilklein Date: Sat, 30 May 2026 23:13:11 +0200 Subject: [PATCH 1/3] Fix pathologically slow assertion diffs for large inputs (#8998) Comparing very large strings, lists, or dataclasses in an ``assert`` could hang for a long time (sometimes minutes) while pytest built the failure diff. The cost comes from ``difflib.ndiff``: its character-level "fancy replace" step is quadratic in the size of the differing region, and the underlying ``SequenceMatcher`` is quadratic in the number of lines (a large nested structure can pretty-print to hundreds of thousands of lines). Add a deterministic size heuristic (no wall-clock timeouts, per the maintainer discussion in the issue): when the input is too large for ``ndiff`` to be fast, fall back to a coarser line-level ``unified_diff``, capped to a bounded number of lines so it always completes in milliseconds, and note this in the output. Smaller comparisons keep the existing detailed ``ndiff`` output unchanged. --- AUTHORS | 1 + changelog/8998.bugfix.rst | 3 ++ src/_pytest/assertion/_compare_sequence.py | 5 ++ src/_pytest/assertion/_diff.py | 61 ++++++++++++++++++++++ src/_pytest/assertion/compare_text.py | 12 +++-- testing/test_assertion.py | 51 ++++++++++++++++++ 6 files changed, 129 insertions(+), 4 deletions(-) create mode 100644 changelog/8998.bugfix.rst create mode 100644 src/_pytest/assertion/_diff.py diff --git a/AUTHORS b/AUTHORS index 972f39aa45e..06ba837f43d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -267,6 +267,7 @@ Kevin J. Foley Kian Eliasi Kian-Meng Ang Kim Soo +Kiril Klein Kodi B. Arfer Kojo Idrissa Kostis Anagnostopoulos diff --git a/changelog/8998.bugfix.rst b/changelog/8998.bugfix.rst new file mode 100644 index 00000000000..bd3e51f7fcc --- /dev/null +++ b/changelog/8998.bugfix.rst @@ -0,0 +1,3 @@ +Assertion failures comparing very large strings, lists, or dataclasses no longer hang for a long time (sometimes minutes) while building the diff. + +When the inputs are large enough that :func:`difflib.ndiff` would be pathologically slow, pytest now falls back to a faster line-level diff and notes this in the output. diff --git a/src/_pytest/assertion/_compare_sequence.py b/src/_pytest/assertion/_compare_sequence.py index cd0043bf7ce..78976655d4f 100644 --- a/src/_pytest/assertion/_compare_sequence.py +++ b/src/_pytest/assertion/_compare_sequence.py @@ -6,6 +6,8 @@ from _pytest._io.pprint import PrettyPrinter from _pytest._io.saferepr import saferepr +from _pytest.assertion._diff import fast_unified_diff +from _pytest.assertion._diff import ndiff_too_slow from _pytest.assertion._typing import _HighlightFunc from _pytest.compat import running_on_ci @@ -27,6 +29,9 @@ def _compare_eq_iterable( yield "" yield "Full diff:" + if ndiff_too_slow(left_formatting, right_formatting): + yield from fast_unified_diff(left_formatting, right_formatting, highlighter) + return # "right" is the expected base against which we compare "left", # see https://github.com/pytest-dev/pytest/issues/3333 yield from highlighter( diff --git a/src/_pytest/assertion/_diff.py b/src/_pytest/assertion/_diff.py new file mode 100644 index 00000000000..763dc48c918 --- /dev/null +++ b/src/_pytest/assertion/_diff.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from collections.abc import Iterator +from collections.abc import Sequence + +from _pytest.assertion._typing import _HighlightFunc + + +# Above this combined input size (in characters), ``difflib.ndiff`` becomes +# pathologically slow: its character-level "fancy replace" step is quadratic in +# the size of the differing region, so a few tens of kilobytes of differing text +# can hang for minutes (see issue #8998). +NDIFF_MAX_INPUT_SIZE = 10_000 + +# Above this number of lines, both ``ndiff`` and ``unified_diff`` get slow, +# since the underlying ``SequenceMatcher`` is quadratic in the number of lines +# (a large nested structure can pretty-print to hundreds of thousands of lines). +# We both fall back and cap the fallback's input at this many lines. +DIFF_MAX_LINES = 1_000 + + +def ndiff_too_slow(left_lines: Sequence[str], right_lines: Sequence[str]) -> bool: + """Return True if ``difflib.ndiff`` would likely be pathologically slow.""" + if len(left_lines) > DIFF_MAX_LINES or len(right_lines) > DIFF_MAX_LINES: + return True + size = sum(len(line) for line in left_lines) + sum( + len(line) for line in right_lines + ) + return size > NDIFF_MAX_INPUT_SIZE + + +def fast_unified_diff( + left_lines: Sequence[str], + right_lines: Sequence[str], + highlighter: _HighlightFunc, +) -> Iterator[str]: + """Yield a fast, coarse line-level diff for inputs too large for ``ndiff``. + + Unlike ``ndiff`` this does not produce character-level "?" guide lines, and + it only diffs the first ``DIFF_MAX_LINES`` lines of each side, but it + completes in milliseconds where ``ndiff`` would hang (see issue #8998). + + "right" is the expected base against which we compare "left", + see https://github.com/pytest-dev/pytest/issues/3333. + """ + from difflib import unified_diff + + yield ( + f"Diff too large to compute in full (over {NDIFF_MAX_INPUT_SIZE} " + "characters); showing a faster line-level diff instead:" + ) + left = [line.rstrip("\n") for line in left_lines[:DIFF_MAX_LINES]] + right = [line.rstrip("\n") for line in right_lines[:DIFF_MAX_LINES]] + hidden = max(len(left_lines), len(right_lines)) - DIFF_MAX_LINES + if hidden > 0: + yield f"Diffing only the first {DIFF_MAX_LINES} lines; {hidden} more hidden" + diff = unified_diff(right, left, n=3, lineterm="") + # The first two lines are the always-empty "--- "/"+++ " file headers. + next(diff, None) + next(diff, None) + yield from highlighter("\n".join(diff), lexer="diff").splitlines() diff --git a/src/_pytest/assertion/compare_text.py b/src/_pytest/assertion/compare_text.py index 31096444ba6..92e9e209f69 100644 --- a/src/_pytest/assertion/compare_text.py +++ b/src/_pytest/assertion/compare_text.py @@ -3,6 +3,8 @@ from collections.abc import Iterator from _pytest._io.saferepr import saferepr +from _pytest.assertion._diff import fast_unified_diff +from _pytest.assertion._diff import ndiff_too_slow from _pytest.assertion._typing import _AssertionTextDiffStyle from _pytest.assertion._typing import _HighlightFunc from _pytest.assertion.highlight import dummy_highlighter @@ -75,13 +77,15 @@ def _diff_text( left = repr(str(left)) right = repr(str(right)) yield "Strings contain only whitespace, escaping them using repr()" + left_lines = left.splitlines(keepends) + right_lines = right.splitlines(keepends) + if ndiff_too_slow(left_lines, right_lines): + yield from fast_unified_diff(left_lines, right_lines, highlighter) + return # "right" is the expected base against which we compare "left", # see https://github.com/pytest-dev/pytest/issues/3333 yield from highlighter( - "\n".join( - line.strip("\n") - for line in ndiff(right.splitlines(keepends), left.splitlines(keepends)) - ), + "\n".join(line.strip("\n") for line in ndiff(right_lines, left_lines)), lexer="diff", ).splitlines() diff --git a/testing/test_assertion.py b/testing/test_assertion.py index c25487bdf33..79c576d9547 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -17,6 +17,7 @@ from _pytest.assertion import truncate from _pytest.assertion import util from _pytest.assertion._compare_any import _compare_eq_cls +from _pytest.assertion._diff import ndiff_too_slow from _pytest.assertion.compare_text import _compare_eq_text from _pytest.config import Config as _Config from _pytest.monkeypatch import MonkeyPatch @@ -459,6 +460,19 @@ def callequal( ) +class TestNdiffTooSlow: + """Heuristic guarding against pathologically slow diffs (#8998).""" + + def test_small_input_uses_ndiff(self) -> None: + assert ndiff_too_slow(["spam"], ["eggs"]) is False + + def test_many_characters_is_too_slow(self) -> None: + assert ndiff_too_slow(["a" * 6000], ["b" * 6000]) is True + + def test_many_lines_is_too_slow(self) -> None: + assert ndiff_too_slow(["x"] * 1001, ["y"]) is True + + class TestAssert_reprcompare: def test_different_types(self) -> None: assert callequal([0, 1], "foo") is None @@ -513,6 +527,32 @@ def test_text_skipping_verbose(self) -> None: assert "- " + "a" * 50 + "eggs" in lines assert "+ " + "a" * 50 + "spam" in lines + def test_text_diff_large_input_skips_ndiff(self) -> None: + # A single huge differing line is above the character cutoff and falls + # back to a fast line-level diff instead of the pathologically slow + # ndiff (#8998). + left = "a" + "x" * 20000 + right = "b" + "y" * 20000 + lines = callequal(left, right, verbose=1) + assert lines is not None + assert any("Diff too large to compute in full" in line for line in lines) + # The character-level "?" guide lines produced by ndiff must not appear. + assert not any(line.startswith("? ") for line in lines) + + def test_text_diff_many_lines_skips_ndiff(self) -> None: + # Many lines are above the line cutoff and fall back, capping the + # number of lines actually diffed (#8998). + left = "\n".join(f"left line {i}" for i in range(2000)) + right = "\n".join(f"right line {i}" for i in range(2000)) + lines = callequal(left, right, verbose=1) + assert lines is not None + assert any("Diff too large to compute in full" in line for line in lines) + assert any("Diffing only the first 1000 lines" in line for line in lines) + assert not any(line.startswith("? ") for line in lines) + # The fallback still shows which lines differ. + assert "-right line 0" in lines + assert "+left line 0" in lines + def test_multiline_text_diff(self) -> None: left = "foo\nspam\nbar" right = "foo\neggs\nbar" @@ -673,6 +713,17 @@ def test_iterable_quiet(self) -> None: "Use -v to get more diff", ] + def test_iterable_large_input_skips_ndiff(self) -> None: + # Large iterables fall back to a fast line-level diff instead of the + # pathologically slow ndiff over their pprint output (#8998). + left = [f"item-{i}" for i in range(2000)] + right = [f"other-{i}" for i in range(2000)] + lines = callequal(left, right, verbose=1) + assert lines is not None + assert "Full diff:" in lines + assert any("Diff too large to compute in full" in line for line in lines) + assert not any(line.startswith("? ") for line in lines) + def test_iterable_full_diff_ci( self, monkeypatch: MonkeyPatch, pytester: Pytester ) -> None: From 88ba6e1d95d0c75d6e68a89732a4832c55b3b6ff Mon Sep 17 00:00:00 2001 From: kirilklein Date: Wed, 17 Jun 2026 20:56:44 +0200 Subject: [PATCH 2/3] Address review: keep detailed (fancy) diff and cheaper heuristic (#8998) Responding to review feedback on the size heuristic and fallback: - Show a real ``ndiff`` over a bounded prefix instead of a coarse ``unified_diff``, so the character-level diff is kept for the part shown (the fallback no longer drops to a "non-fancy" line diff). - Bound the input to ``ndiff`` by both line and character count: its "fancy replace" cost grows with the product of the two, so a few hundred similar lines (e.g. a pretty-printed list of repeated values) could still take seconds. Lower DIFF_MAX_LINES accordingly so the worst case stays under ~1s. - The "too slow" checks now short-circuit instead of measuring the whole input, and the text check counts line separators instead of splitting the string into a list first. - Fix the fallback message, which wrongly claimed only the character limit was exceeded when it could be either limit. - Tests shrink the limits via monkeypatch instead of building huge data. --- changelog/8998.bugfix.rst | 2 +- src/_pytest/assertion/_compare_sequence.py | 8 +- src/_pytest/assertion/_diff.py | 109 ++++++++++++++------- src/_pytest/assertion/compare_text.py | 17 ++-- testing/test_assertion.py | 82 +++++++++------- 5 files changed, 133 insertions(+), 85 deletions(-) diff --git a/changelog/8998.bugfix.rst b/changelog/8998.bugfix.rst index bd3e51f7fcc..534db4e1151 100644 --- a/changelog/8998.bugfix.rst +++ b/changelog/8998.bugfix.rst @@ -1,3 +1,3 @@ Assertion failures comparing very large strings, lists, or dataclasses no longer hang for a long time (sometimes minutes) while building the diff. -When the inputs are large enough that :func:`difflib.ndiff` would be pathologically slow, pytest now falls back to a faster line-level diff and notes this in the output. +When the inputs are large enough that :func:`difflib.ndiff` would be pathologically slow, pytest now runs it over a bounded prefix of the input instead, so the detailed (character-level) diff is kept for the part shown while the rest is truncated with a note. diff --git a/src/_pytest/assertion/_compare_sequence.py b/src/_pytest/assertion/_compare_sequence.py index 78976655d4f..478e521687a 100644 --- a/src/_pytest/assertion/_compare_sequence.py +++ b/src/_pytest/assertion/_compare_sequence.py @@ -6,8 +6,8 @@ from _pytest._io.pprint import PrettyPrinter from _pytest._io.saferepr import saferepr -from _pytest.assertion._diff import fast_unified_diff -from _pytest.assertion._diff import ndiff_too_slow +from _pytest.assertion._diff import ndiff_too_slow_for_lines +from _pytest.assertion._diff import truncated_ndiff from _pytest.assertion._typing import _HighlightFunc from _pytest.compat import running_on_ci @@ -29,8 +29,8 @@ def _compare_eq_iterable( yield "" yield "Full diff:" - if ndiff_too_slow(left_formatting, right_formatting): - yield from fast_unified_diff(left_formatting, right_formatting, highlighter) + if ndiff_too_slow_for_lines(left_formatting, right_formatting): + yield from truncated_ndiff(left_formatting, right_formatting, highlighter) return # "right" is the expected base against which we compare "left", # see https://github.com/pytest-dev/pytest/issues/3333 diff --git a/src/_pytest/assertion/_diff.py b/src/_pytest/assertion/_diff.py index 763dc48c918..e41cb12b6a6 100644 --- a/src/_pytest/assertion/_diff.py +++ b/src/_pytest/assertion/_diff.py @@ -2,60 +2,93 @@ from collections.abc import Iterator from collections.abc import Sequence +from itertools import chain from _pytest.assertion._typing import _HighlightFunc -# Above this combined input size (in characters), ``difflib.ndiff`` becomes -# pathologically slow: its character-level "fancy replace" step is quadratic in -# the size of the differing region, so a few tens of kilobytes of differing text -# can hang for minutes (see issue #8998). -NDIFF_MAX_INPUT_SIZE = 10_000 +# Past these limits ``difflib.ndiff`` becomes pathologically slow: its +# character-level "fancy replace" step compares every pair of similar lines in a +# differing block, so its cost grows with the *product* of the line count and +# the character count. A few hundred similar lines can already take seconds, and +# the pretty-printed form of a large list/dataclass takes minutes (see issue +# #8998). The limits below keep ``ndiff`` under roughly a second in the worst +# case. Above them we still run ``ndiff`` -- so the detailed diff is kept -- but +# only over a bounded prefix of the input. +NDIFF_MAX_INPUT_SIZE = 10_000 # characters (left + right) +DIFF_MAX_LINES = 100 # lines (left + right) -# Above this number of lines, both ``ndiff`` and ``unified_diff`` get slow, -# since the underlying ``SequenceMatcher`` is quadratic in the number of lines -# (a large nested structure can pretty-print to hundreds of thousands of lines). -# We both fall back and cap the fallback's input at this many lines. -DIFF_MAX_LINES = 1_000 +def ndiff_too_slow_for_text(left: str, right: str) -> bool: + """Whether ``ndiff`` would be pathologically slow for these strings. -def ndiff_too_slow(left_lines: Sequence[str], right_lines: Sequence[str]) -> bool: - """Return True if ``difflib.ndiff`` would likely be pathologically slow.""" - if len(left_lines) > DIFF_MAX_LINES or len(right_lines) > DIFF_MAX_LINES: + Counts line separators instead of splitting into lines, so the check stays + cheap even for huge inputs. + """ + if left.count("\n") + right.count("\n") > DIFF_MAX_LINES: return True - size = sum(len(line) for line in left_lines) + sum( - len(line) for line in right_lines - ) - return size > NDIFF_MAX_INPUT_SIZE + return len(left) + len(right) > NDIFF_MAX_INPUT_SIZE + + +def ndiff_too_slow_for_lines( + left_lines: Sequence[str], right_lines: Sequence[str] +) -> bool: + """Whether ``ndiff`` would be pathologically slow for these lines. + + Exits as soon as a limit is exceeded instead of measuring the whole input. + """ + if len(left_lines) + len(right_lines) > DIFF_MAX_LINES: + return True + size = 0 + for line in chain(left_lines, right_lines): + size += len(line) + if size > NDIFF_MAX_INPUT_SIZE: + return True + return False -def fast_unified_diff( +def truncated_ndiff( left_lines: Sequence[str], right_lines: Sequence[str], highlighter: _HighlightFunc, ) -> Iterator[str]: - """Yield a fast, coarse line-level diff for inputs too large for ``ndiff``. - - Unlike ``ndiff`` this does not produce character-level "?" guide lines, and - it only diffs the first ``DIFF_MAX_LINES`` lines of each side, but it - completes in milliseconds where ``ndiff`` would hang (see issue #8998). + """Yield an ``ndiff`` over a bounded prefix of the input (issue #8998). - "right" is the expected base against which we compare "left", - see https://github.com/pytest-dev/pytest/issues/3333. + The character-level diff is kept, but only for a slice small enough to + compute quickly; the rest of the input is dropped. """ - from difflib import unified_diff + from difflib import ndiff + left = _bounded_prefix(left_lines, DIFF_MAX_LINES // 2, NDIFF_MAX_INPUT_SIZE // 2) + right = _bounded_prefix(right_lines, DIFF_MAX_LINES // 2, NDIFF_MAX_INPUT_SIZE // 2) yield ( - f"Diff too large to compute in full (over {NDIFF_MAX_INPUT_SIZE} " - "characters); showing a faster line-level diff instead:" + f"Diff too large to show in full (over {NDIFF_MAX_INPUT_SIZE} characters " + f"or {DIFF_MAX_LINES} lines); showing a truncated diff:" ) - left = [line.rstrip("\n") for line in left_lines[:DIFF_MAX_LINES]] - right = [line.rstrip("\n") for line in right_lines[:DIFF_MAX_LINES]] - hidden = max(len(left_lines), len(right_lines)) - DIFF_MAX_LINES - if hidden > 0: - yield f"Diffing only the first {DIFF_MAX_LINES} lines; {hidden} more hidden" - diff = unified_diff(right, left, n=3, lineterm="") - # The first two lines are the always-empty "--- "/"+++ " file headers. - next(diff, None) - next(diff, None) - yield from highlighter("\n".join(diff), lexer="diff").splitlines() + # "right" is the expected base against which we compare "left", + # see https://github.com/pytest-dev/pytest/issues/3333 + yield from highlighter( + "\n".join(line.rstrip("\n") for line in ndiff(right, left)), + lexer="diff", + ).splitlines() + + +def _bounded_prefix(lines: Sequence[str], max_lines: int, max_chars: int) -> list[str]: + """Return the longest prefix of ``lines`` within both limits. + + The line that would cross the character limit is included truncated, so a + single huge line still yields some (bounded) output. + """ + kept: list[str] = [] + chars = 0 + for line in lines: + if len(kept) >= max_lines: + break + room = max_chars - chars + if len(line) > room: + if room > 0: + kept.append(line[:room]) + break + kept.append(line) + chars += len(line) + return kept diff --git a/src/_pytest/assertion/compare_text.py b/src/_pytest/assertion/compare_text.py index 92e9e209f69..1cd16c4d941 100644 --- a/src/_pytest/assertion/compare_text.py +++ b/src/_pytest/assertion/compare_text.py @@ -3,8 +3,8 @@ from collections.abc import Iterator from _pytest._io.saferepr import saferepr -from _pytest.assertion._diff import fast_unified_diff -from _pytest.assertion._diff import ndiff_too_slow +from _pytest.assertion._diff import ndiff_too_slow_for_text +from _pytest.assertion._diff import truncated_ndiff from _pytest.assertion._typing import _AssertionTextDiffStyle from _pytest.assertion._typing import _HighlightFunc from _pytest.assertion.highlight import dummy_highlighter @@ -77,15 +77,18 @@ def _diff_text( left = repr(str(left)) right = repr(str(right)) yield "Strings contain only whitespace, escaping them using repr()" - left_lines = left.splitlines(keepends) - right_lines = right.splitlines(keepends) - if ndiff_too_slow(left_lines, right_lines): - yield from fast_unified_diff(left_lines, right_lines, highlighter) + if ndiff_too_slow_for_text(left, right): + yield from truncated_ndiff( + left.splitlines(keepends), right.splitlines(keepends), highlighter + ) return # "right" is the expected base against which we compare "left", # see https://github.com/pytest-dev/pytest/issues/3333 yield from highlighter( - "\n".join(line.strip("\n") for line in ndiff(right_lines, left_lines)), + "\n".join( + line.strip("\n") + for line in ndiff(right.splitlines(keepends), left.splitlines(keepends)) + ), lexer="diff", ).splitlines() diff --git a/testing/test_assertion.py b/testing/test_assertion.py index 79c576d9547..71018b4b3a6 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -14,10 +14,12 @@ from _pytest import outcomes import _pytest.assertion as plugin +from _pytest.assertion import _diff from _pytest.assertion import truncate from _pytest.assertion import util from _pytest.assertion._compare_any import _compare_eq_cls -from _pytest.assertion._diff import ndiff_too_slow +from _pytest.assertion._diff import ndiff_too_slow_for_lines +from _pytest.assertion._diff import ndiff_too_slow_for_text from _pytest.assertion.compare_text import _compare_eq_text from _pytest.config import Config as _Config from _pytest.monkeypatch import MonkeyPatch @@ -463,14 +465,19 @@ def callequal( class TestNdiffTooSlow: """Heuristic guarding against pathologically slow diffs (#8998).""" - def test_small_input_uses_ndiff(self) -> None: - assert ndiff_too_slow(["spam"], ["eggs"]) is False + def test_small_input_is_not_too_slow(self) -> None: + assert ndiff_too_slow_for_text("spam", "eggs") is False + assert ndiff_too_slow_for_lines(["spam"], ["eggs"]) is False - def test_many_characters_is_too_slow(self) -> None: - assert ndiff_too_slow(["a" * 6000], ["b" * 6000]) is True + def test_too_many_characters(self, monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr(_diff, "NDIFF_MAX_INPUT_SIZE", 5) + assert ndiff_too_slow_for_text("abc", "abcd") is True + assert ndiff_too_slow_for_lines(["abc"], ["abcd"]) is True - def test_many_lines_is_too_slow(self) -> None: - assert ndiff_too_slow(["x"] * 1001, ["y"]) is True + def test_too_many_lines(self, monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 3) + assert ndiff_too_slow_for_text("a\nb\nc\nd\ne", "f") is True + assert ndiff_too_slow_for_lines(["a", "b", "c", "d"], ["e"]) is True class TestAssert_reprcompare: @@ -527,31 +534,33 @@ def test_text_skipping_verbose(self) -> None: assert "- " + "a" * 50 + "eggs" in lines assert "+ " + "a" * 50 + "spam" in lines - def test_text_diff_large_input_skips_ndiff(self) -> None: - # A single huge differing line is above the character cutoff and falls - # back to a fast line-level diff instead of the pathologically slow - # ndiff (#8998). - left = "a" + "x" * 20000 - right = "b" + "y" * 20000 + def test_text_diff_large_input_is_truncated(self, monkeypatch: MonkeyPatch) -> None: + # Inputs over the character limit show a fast, truncated diff instead + # of the pathologically slow full ndiff (#8998). + monkeypatch.setattr(_diff, "NDIFF_MAX_INPUT_SIZE", 40) + left = "the answer is 41\nand a tail" + "x" * 100 + right = "the answer is 42\nand a tail" + "x" * 100 lines = callequal(left, right, verbose=1) assert lines is not None - assert any("Diff too large to compute in full" in line for line in lines) - # The character-level "?" guide lines produced by ndiff must not appear. - assert not any(line.startswith("? ") for line in lines) - - def test_text_diff_many_lines_skips_ndiff(self) -> None: - # Many lines are above the line cutoff and fall back, capping the - # number of lines actually diffed (#8998). - left = "\n".join(f"left line {i}" for i in range(2000)) - right = "\n".join(f"right line {i}" for i in range(2000)) + assert any("Diff too large to show in full" in line for line in lines) + # ndiff is still used, so the character-level detail is kept. + assert any(line.startswith("? ") for line in lines) + + def test_text_diff_many_lines_is_truncated(self, monkeypatch: MonkeyPatch) -> None: + # Inputs over the line limit are diffed over a bounded prefix only, so + # far fewer than all the lines are shown (#8998). + monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 4) + left = "\n".join(f"left line {i}" for i in range(50)) + right = "\n".join(f"right line {i}" for i in range(50)) lines = callequal(left, right, verbose=1) assert lines is not None - assert any("Diff too large to compute in full" in line for line in lines) - assert any("Diffing only the first 1000 lines" in line for line in lines) - assert not any(line.startswith("? ") for line in lines) - # The fallback still shows which lines differ. - assert "-right line 0" in lines - assert "+left line 0" in lines + assert any("Diff too large to show in full" in line for line in lines) + # The fallback still shows which of the first lines differ. + assert "- right line 0" in lines + assert "+ left line 0" in lines + # Only a bounded prefix is diffed, not all 50 lines. + differing = [line for line in lines if line.startswith(("- ", "+ "))] + assert 0 < len(differing) < 50 def test_multiline_text_diff(self) -> None: left = "foo\nspam\nbar" @@ -713,16 +722,19 @@ def test_iterable_quiet(self) -> None: "Use -v to get more diff", ] - def test_iterable_large_input_skips_ndiff(self) -> None: - # Large iterables fall back to a fast line-level diff instead of the - # pathologically slow ndiff over their pprint output (#8998). - left = [f"item-{i}" for i in range(2000)] - right = [f"other-{i}" for i in range(2000)] + def test_iterable_large_input_is_truncated(self, monkeypatch: MonkeyPatch) -> None: + # Large iterables show a truncated diff over a bounded prefix of their + # pprint output instead of the pathologically slow full ndiff (#8998). + monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 6) + left = [f"item-{i}" for i in range(50)] + right = [f"other-{i}" for i in range(50)] lines = callequal(left, right, verbose=1) assert lines is not None assert "Full diff:" in lines - assert any("Diff too large to compute in full" in line for line in lines) - assert not any(line.startswith("? ") for line in lines) + assert any("Diff too large to show in full" in line for line in lines) + # Only a bounded prefix is diffed, not all 50+ pprint lines. + differing = [line for line in lines if line.startswith(("- ", "+ "))] + assert 0 < len(differing) < 50 def test_iterable_full_diff_ci( self, monkeypatch: MonkeyPatch, pytester: Pytester From 76143eb7ef1e767b18f3ef4cd3b7605d78f2ceab Mon Sep 17 00:00:00 2001 From: kirilklein Date: Wed, 17 Jun 2026 21:04:06 +0200 Subject: [PATCH 3/3] Cover _bounded_prefix edge branches (#8998) Add a direct unit test exercising all four branches of _bounded_prefix (within limits, line cap, char-truncated line, and exact-fill drop) so patch coverage stays complete. --- testing/test_assertion.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/testing/test_assertion.py b/testing/test_assertion.py index 3a4fb41e62f..58b58f92357 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -479,6 +479,27 @@ def test_too_many_lines(self, monkeypatch: MonkeyPatch) -> None: assert ndiff_too_slow_for_text("a\nb\nc\nd\ne", "f") is True assert ndiff_too_slow_for_lines(["a", "b", "c", "d"], ["e"]) is True + def test_bounded_prefix(self) -> None: + # All lines fit within both limits: everything is kept. + assert _diff._bounded_prefix(["a", "b"], max_lines=10, max_chars=100) == [ + "a", + "b", + ] + # The line limit stops collection. + assert _diff._bounded_prefix(["a", "b", "c"], max_lines=2, max_chars=100) == [ + "a", + "b", + ] + # The line crossing the character limit is kept truncated. + assert _diff._bounded_prefix(["abc", "defgh"], max_lines=10, max_chars=4) == [ + "abc", + "d", + ] + # When the character limit is exactly full, the next line is dropped. + assert _diff._bounded_prefix(["abcd", "e"], max_lines=10, max_chars=4) == [ + "abcd" + ] + class TestAssert_reprcompare: def test_different_types(self) -> None: