diff --git a/AUTHORS b/AUTHORS index 972f39aa45e..06ba837f43d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -267,6 +267,7 @@ Kevin J. Foley Kian Eliasi Kian-Meng Ang Kim Soo +Kiril Klein Kodi B. Arfer Kojo Idrissa Kostis Anagnostopoulos diff --git a/changelog/8998.bugfix.rst b/changelog/8998.bugfix.rst new file mode 100644 index 00000000000..534db4e1151 --- /dev/null +++ b/changelog/8998.bugfix.rst @@ -0,0 +1,3 @@ +Assertion failures comparing very large strings, lists, or dataclasses no longer hang for a long time (sometimes minutes) while building the diff. + +When the inputs are large enough that :func:`difflib.ndiff` would be pathologically slow, pytest now runs it over a bounded prefix of the input instead, so the detailed (character-level) diff is kept for the part shown while the rest is truncated with a note. diff --git a/src/_pytest/assertion/_compare_sequence.py b/src/_pytest/assertion/_compare_sequence.py index cd0043bf7ce..478e521687a 100644 --- a/src/_pytest/assertion/_compare_sequence.py +++ b/src/_pytest/assertion/_compare_sequence.py @@ -6,6 +6,8 @@ from _pytest._io.pprint import PrettyPrinter from _pytest._io.saferepr import saferepr +from _pytest.assertion._diff import ndiff_too_slow_for_lines +from _pytest.assertion._diff import truncated_ndiff from _pytest.assertion._typing import _HighlightFunc from _pytest.compat import running_on_ci @@ -27,6 +29,9 @@ def _compare_eq_iterable( yield "" yield "Full diff:" + if ndiff_too_slow_for_lines(left_formatting, right_formatting): + yield from truncated_ndiff(left_formatting, right_formatting, highlighter) + return # "right" is the expected base against which we compare "left", # see https://github.com/pytest-dev/pytest/issues/3333 yield from highlighter( diff --git a/src/_pytest/assertion/_diff.py b/src/_pytest/assertion/_diff.py new file mode 100644 index 00000000000..e41cb12b6a6 --- /dev/null +++ b/src/_pytest/assertion/_diff.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +from collections.abc import Iterator +from collections.abc import Sequence +from itertools import chain + +from _pytest.assertion._typing import _HighlightFunc + + +# Past these limits ``difflib.ndiff`` becomes pathologically slow: its +# character-level "fancy replace" step compares every pair of similar lines in a +# differing block, so its cost grows with the *product* of the line count and +# the character count. A few hundred similar lines can already take seconds, and +# the pretty-printed form of a large list/dataclass takes minutes (see issue +# #8998). The limits below keep ``ndiff`` under roughly a second in the worst +# case. Above them we still run ``ndiff`` -- so the detailed diff is kept -- but +# only over a bounded prefix of the input. +NDIFF_MAX_INPUT_SIZE = 10_000 # characters (left + right) +DIFF_MAX_LINES = 100 # lines (left + right) + + +def ndiff_too_slow_for_text(left: str, right: str) -> bool: + """Whether ``ndiff`` would be pathologically slow for these strings. + + Counts line separators instead of splitting into lines, so the check stays + cheap even for huge inputs. + """ + if left.count("\n") + right.count("\n") > DIFF_MAX_LINES: + return True + return len(left) + len(right) > NDIFF_MAX_INPUT_SIZE + + +def ndiff_too_slow_for_lines( + left_lines: Sequence[str], right_lines: Sequence[str] +) -> bool: + """Whether ``ndiff`` would be pathologically slow for these lines. + + Exits as soon as a limit is exceeded instead of measuring the whole input. + """ + if len(left_lines) + len(right_lines) > DIFF_MAX_LINES: + return True + size = 0 + for line in chain(left_lines, right_lines): + size += len(line) + if size > NDIFF_MAX_INPUT_SIZE: + return True + return False + + +def truncated_ndiff( + left_lines: Sequence[str], + right_lines: Sequence[str], + highlighter: _HighlightFunc, +) -> Iterator[str]: + """Yield an ``ndiff`` over a bounded prefix of the input (issue #8998). + + The character-level diff is kept, but only for a slice small enough to + compute quickly; the rest of the input is dropped. + """ + from difflib import ndiff + + left = _bounded_prefix(left_lines, DIFF_MAX_LINES // 2, NDIFF_MAX_INPUT_SIZE // 2) + right = _bounded_prefix(right_lines, DIFF_MAX_LINES // 2, NDIFF_MAX_INPUT_SIZE // 2) + yield ( + f"Diff too large to show in full (over {NDIFF_MAX_INPUT_SIZE} characters " + f"or {DIFF_MAX_LINES} lines); showing a truncated diff:" + ) + # "right" is the expected base against which we compare "left", + # see https://github.com/pytest-dev/pytest/issues/3333 + yield from highlighter( + "\n".join(line.rstrip("\n") for line in ndiff(right, left)), + lexer="diff", + ).splitlines() + + +def _bounded_prefix(lines: Sequence[str], max_lines: int, max_chars: int) -> list[str]: + """Return the longest prefix of ``lines`` within both limits. + + The line that would cross the character limit is included truncated, so a + single huge line still yields some (bounded) output. + """ + kept: list[str] = [] + chars = 0 + for line in lines: + if len(kept) >= max_lines: + break + room = max_chars - chars + if len(line) > room: + if room > 0: + kept.append(line[:room]) + break + kept.append(line) + chars += len(line) + return kept diff --git a/src/_pytest/assertion/compare_text.py b/src/_pytest/assertion/compare_text.py index 31096444ba6..1cd16c4d941 100644 --- a/src/_pytest/assertion/compare_text.py +++ b/src/_pytest/assertion/compare_text.py @@ -3,6 +3,8 @@ from collections.abc import Iterator from _pytest._io.saferepr import saferepr +from _pytest.assertion._diff import ndiff_too_slow_for_text +from _pytest.assertion._diff import truncated_ndiff from _pytest.assertion._typing import _AssertionTextDiffStyle from _pytest.assertion._typing import _HighlightFunc from _pytest.assertion.highlight import dummy_highlighter @@ -75,6 +77,11 @@ def _diff_text( left = repr(str(left)) right = repr(str(right)) yield "Strings contain only whitespace, escaping them using repr()" + if ndiff_too_slow_for_text(left, right): + yield from truncated_ndiff( + left.splitlines(keepends), right.splitlines(keepends), highlighter + ) + return # "right" is the expected base against which we compare "left", # see https://github.com/pytest-dev/pytest/issues/3333 yield from highlighter( diff --git a/testing/test_assertion.py b/testing/test_assertion.py index 492834ba9de..58b58f92357 100644 --- a/testing/test_assertion.py +++ b/testing/test_assertion.py @@ -14,9 +14,12 @@ from _pytest import outcomes import _pytest.assertion as plugin +from _pytest.assertion import _diff from _pytest.assertion import truncate from _pytest.assertion import util from _pytest.assertion._compare_any import _compare_eq_cls +from _pytest.assertion._diff import ndiff_too_slow_for_lines +from _pytest.assertion._diff import ndiff_too_slow_for_text from _pytest.assertion.compare_text import _compare_eq_text from _pytest.config import Config as _Config from _pytest.monkeypatch import MonkeyPatch @@ -459,6 +462,45 @@ def callequal( ) +class TestNdiffTooSlow: + """Heuristic guarding against pathologically slow diffs (#8998).""" + + def test_small_input_is_not_too_slow(self) -> None: + assert ndiff_too_slow_for_text("spam", "eggs") is False + assert ndiff_too_slow_for_lines(["spam"], ["eggs"]) is False + + def test_too_many_characters(self, monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr(_diff, "NDIFF_MAX_INPUT_SIZE", 5) + assert ndiff_too_slow_for_text("abc", "abcd") is True + assert ndiff_too_slow_for_lines(["abc"], ["abcd"]) is True + + def test_too_many_lines(self, monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 3) + assert ndiff_too_slow_for_text("a\nb\nc\nd\ne", "f") is True + assert ndiff_too_slow_for_lines(["a", "b", "c", "d"], ["e"]) is True + + def test_bounded_prefix(self) -> None: + # All lines fit within both limits: everything is kept. + assert _diff._bounded_prefix(["a", "b"], max_lines=10, max_chars=100) == [ + "a", + "b", + ] + # The line limit stops collection. + assert _diff._bounded_prefix(["a", "b", "c"], max_lines=2, max_chars=100) == [ + "a", + "b", + ] + # The line crossing the character limit is kept truncated. + assert _diff._bounded_prefix(["abc", "defgh"], max_lines=10, max_chars=4) == [ + "abc", + "d", + ] + # When the character limit is exactly full, the next line is dropped. + assert _diff._bounded_prefix(["abcd", "e"], max_lines=10, max_chars=4) == [ + "abcd" + ] + + class TestAssert_reprcompare: def test_different_types(self) -> None: assert callequal([0, 1], "foo") is None @@ -513,6 +555,34 @@ def test_text_skipping_verbose(self) -> None: assert "- " + "a" * 50 + "eggs" in lines assert "+ " + "a" * 50 + "spam" in lines + def test_text_diff_large_input_is_truncated(self, monkeypatch: MonkeyPatch) -> None: + # Inputs over the character limit show a fast, truncated diff instead + # of the pathologically slow full ndiff (#8998). + monkeypatch.setattr(_diff, "NDIFF_MAX_INPUT_SIZE", 40) + left = "the answer is 41\nand a tail" + "x" * 100 + right = "the answer is 42\nand a tail" + "x" * 100 + lines = callequal(left, right, verbose=1) + assert lines is not None + assert any("Diff too large to show in full" in line for line in lines) + # ndiff is still used, so the character-level detail is kept. + assert any(line.startswith("? ") for line in lines) + + def test_text_diff_many_lines_is_truncated(self, monkeypatch: MonkeyPatch) -> None: + # Inputs over the line limit are diffed over a bounded prefix only, so + # far fewer than all the lines are shown (#8998). + monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 4) + left = "\n".join(f"left line {i}" for i in range(50)) + right = "\n".join(f"right line {i}" for i in range(50)) + lines = callequal(left, right, verbose=1) + assert lines is not None + assert any("Diff too large to show in full" in line for line in lines) + # The fallback still shows which of the first lines differ. + assert "- right line 0" in lines + assert "+ left line 0" in lines + # Only a bounded prefix is diffed, not all 50 lines. + differing = [line for line in lines if line.startswith(("- ", "+ "))] + assert 0 < len(differing) < 50 + def test_multiline_text_diff(self) -> None: left = "foo\nspam\nbar" right = "foo\neggs\nbar" @@ -673,6 +743,20 @@ def test_iterable_quiet(self) -> None: "Use -v to get more diff", ] + def test_iterable_large_input_is_truncated(self, monkeypatch: MonkeyPatch) -> None: + # Large iterables show a truncated diff over a bounded prefix of their + # pprint output instead of the pathologically slow full ndiff (#8998). + monkeypatch.setattr(_diff, "DIFF_MAX_LINES", 6) + left = [f"item-{i}" for i in range(50)] + right = [f"other-{i}" for i in range(50)] + lines = callequal(left, right, verbose=1) + assert lines is not None + assert "Full diff:" in lines + assert any("Diff too large to show in full" in line for line in lines) + # Only a bounded prefix is diffed, not all 50+ pprint lines. + differing = [line for line in lines if line.startswith(("- ", "+ "))] + assert 0 < len(differing) < 50 + def test_iterable_full_diff_ci( self, monkeypatch: MonkeyPatch, pytester: Pytester ) -> None: