From 05e9cb6686485d71f8c565013294a469bbc5c705 Mon Sep 17 00:00:00 2001 From: Guy Vago Date: Tue, 14 Apr 2026 18:37:25 +0300 Subject: [PATCH 1/3] fix(patch): guard against None match in hunk header extraction In `decouple_and_convert_to_hunks_with_lines_numbers` and `extract_hunk_lines_from_patch`, the call to `extract_hunk_headers(match)` was outside proper `if match:` guards. If a line starts with `@@` but doesn't match `RE_HUNK_HEADER`, `match` is None, causing an `AttributeError` crash on `match.groups()`. Move the `extract_hunk_headers` call inside the match guard in both functions, and skip malformed hunk header lines gracefully. --- pr_agent/algo/git_patch_processing.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 553914e8d9..ff1e0e969c 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -375,8 +375,9 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str: old_content_lines = [] if match: prev_header_line = header_line - - section_header, size1, size2, start1, start2 = extract_hunk_headers(match) + section_header, size1, size2, start1, start2 = extract_hunk_headers(match) + else: + continue # skip lines that start with @@ but don't match the hunk header pattern elif line.startswith('+'): new_content_lines.append(line) @@ -430,6 +431,9 @@ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, s header_line = line match = RE_HUNK_HEADER.match(line) + if not match: + skip_hunk = True + continue section_header, size1, size2, start1, start2 = extract_hunk_headers(match) From 89fbb48fcb23d86f0f230778f04ece3bea85af4e Mon Sep 17 00:00:00 2001 From: Guy Vago Date: Wed, 15 Apr 2026 11:36:32 +0300 Subject: [PATCH 2/3] fix(patch): flush previous hunk before overwriting match on malformed @@ lines The decouple_and_convert_to_hunks_with_lines_numbers() function overwrote `match` with the new RE_HUNK_HEADER result before checking whether the previous hunk needed to be finalized. When a malformed @@ line produced match=None, the flush condition `if match and (new/old_content_lines)` was False, silently dropping the previous hunk's content. Fix: save `prev_match` before overwriting and use it for the flush decision. Also use `prev_header_line` instead of `match`/`header_line` in the post-loop finalization, so a trailing malformed @@ cannot suppress the last valid hunk. Adds 7 unit tests covering malformed @@ scenarios: crash safety, content preservation, trailing malformed headers, line-number accuracy, all-malformed patches, and deletion-only hunks. --- pr_agent/algo/git_patch_processing.py | 11 +- tests/unittest/test_malformed_hunk_header.py | 119 +++++++++++++++++++ 2 files changed, 126 insertions(+), 4 deletions(-) create mode 100644 tests/unittest/test_malformed_hunk_header.py diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index ff1e0e969c..25815e3f87 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -354,8 +354,9 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str: if line.startswith('@@'): header_line = line + prev_match = match # save previous match before overwriting match = RE_HUNK_HEADER.match(line) - if match and (new_content_lines or old_content_lines): # found a new hunk, split the previous lines + if prev_match and (new_content_lines or old_content_lines): # flush the previous hunk if prev_header_line: patch_with_lines_str += f'\n{prev_header_line}\n' is_plus_lines = is_minus_lines = False @@ -392,9 +393,11 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str: new_content_lines.append(line) old_content_lines.append(line) - # finishing last hunk - if match and new_content_lines: - patch_with_lines_str += f'\n{header_line}\n' + # finishing last hunk — use prev_header_line (not match/header_line) because + # match may have been set to None by a trailing malformed @@ line, and + # header_line may point to that malformed line instead of the last valid hunk + if prev_header_line and new_content_lines: + patch_with_lines_str += f'\n{prev_header_line}\n' is_plus_lines = is_minus_lines = False if new_content_lines: is_plus_lines = any([line.startswith('+') for line in new_content_lines]) diff --git a/tests/unittest/test_malformed_hunk_header.py b/tests/unittest/test_malformed_hunk_header.py new file mode 100644 index 0000000000..7270940f6b --- /dev/null +++ b/tests/unittest/test_malformed_hunk_header.py @@ -0,0 +1,119 @@ +""" +Tests for malformed @@ hunk headers in decouple_and_convert_to_hunks_with_lines_numbers(). + +Verifies that: +1. A malformed @@ line does not crash the function. +2. Content from valid hunks before and after a malformed @@ is preserved. +3. When a malformed @@ is the last @@-starting line, the preceding hunk is + still finalized correctly. +""" + +from pr_agent.algo.git_patch_processing import decouple_and_convert_to_hunks_with_lines_numbers +from pr_agent.config_loader import get_settings + +get_settings(use_context=False).set("CONFIG.CLI_MODE", True) + + +class _FakeFile: + """Minimal file object expected by the function under test.""" + + def __init__(self, filename="test_file.py"): + self.filename = filename + + +class TestMalformedHunkHeader: + def test_malformed_hunk_does_not_crash(self): + """A patch whose only @@ line is malformed should not raise.""" + patch = "@@ THIS IS NOT A VALID HUNK HEADER @@\n+added line\n-removed line" + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + assert isinstance(result, str) + + def test_valid_hunk_before_malformed_is_preserved(self): + """Content from a valid hunk that precedes a malformed @@ must appear in output.""" + patch = ( + "@@ -1,3 +1,4 @@ section\n" + " context\n" + "+added_line\n" + " more_context\n" + "@@ MALFORMED @@ not a real header\n" + ) + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + # The valid hunk's added line must be present + assert "+added_line" in result + assert "__new hunk__" in result + + def test_malformed_hunk_between_two_valid_hunks(self): + """A malformed @@ between two valid hunks must not drop either hunk's content.""" + patch = ( + "@@ -1,3 +1,4 @@ first_section\n" + " ctx1\n" + "+add1\n" + " ctx2\n" + "@@ GARBAGE @@ not valid\n" + "@@ -10,2 +11,3 @@ second_section\n" + " ctx3\n" + "+add2\n" + ) + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + assert "+add1" in result, "First hunk content was dropped" + assert "+add2" in result, "Second hunk content was dropped" + + def test_trailing_malformed_hunk_does_not_drop_last_valid(self): + """When a malformed @@ is the LAST @@-line, the previous hunk must still be finalized.""" + patch = ( + "@@ -5,3 +5,4 @@ my_func\n" + " existing_line\n" + "+new_line\n" + " another_existing\n" + "@@ INVALID HEADER\n" + ) + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + # The valid hunk before the malformed trailing @@ must be present + assert "+new_line" in result + assert "__new hunk__" in result + # The malformed header text should NOT appear as a hunk header in the output + assert "INVALID HEADER" not in result + + def test_line_numbers_correct_with_malformed_between(self): + """Line numbers from valid hunks are correct even when a malformed @@ sits between them.""" + patch = ( + "@@ -1,2 +1,3 @@ header1\n" + " line_a\n" + "+line_b\n" + "@@ NOT_VALID\n" + "@@ -10,2 +11,3 @@ header2\n" + " line_c\n" + "+line_d\n" + ) + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + # First hunk starts at new-line 1: "1 line_a", "2 +line_b" + assert "1 line_a" in result + assert "2 +line_b" in result + # Second hunk starts at new-line 11: "11 line_c", "12 +line_d" + assert "11 line_c" in result + assert "12 +line_d" in result + + def test_only_malformed_hunks_returns_file_header_only(self): + """A patch with ONLY malformed @@ lines should return just the file header.""" + patch = ( + "@@ BROKEN1 @@\n" + "+orphan_add\n" + "@@ BROKEN2 @@\n" + "-orphan_del\n" + ) + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + # No valid hunk was ever parsed, so no __new hunk__ / __old hunk__ sections + assert "__new hunk__" not in result + assert "__old hunk__" not in result + + def test_deletion_only_hunk_before_malformed(self): + """A hunk with only deletions before a malformed @@ is still finalized.""" + patch = ( + "@@ -1,3 +1,2 @@ section\n" + " context\n" + "-removed_line\n" + "@@ NOT VALID @@\n" + ) + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + assert "-removed_line" in result + assert "__old hunk__" in result From 7ede47cc1b1c9343ea7b595645249dfddcb46fa4 Mon Sep 17 00:00:00 2001 From: Guy Vago Date: Thu, 16 Apr 2026 11:17:12 +0300 Subject: [PATCH 3/3] fix: clear hunk buffers unconditionally on every @@ line Orphan lines between a malformed @@ (match=None) and the next valid @@ were leaking into the next hunk. The buffer reset was inside the `if prev_match:` flush block, so when prev_match was None (set by the malformed @@), the buffers were never cleared. Move the buffer reset outside the conditional so it runs on every @@ encounter, and add a test that verifies orphan lines are discarded. --- pr_agent/algo/git_patch_processing.py | 7 +++-- tests/unittest/test_malformed_hunk_header.py | 32 ++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py index 25815e3f87..3f559896bf 100644 --- a/pr_agent/algo/git_patch_processing.py +++ b/pr_agent/algo/git_patch_processing.py @@ -372,8 +372,11 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str: patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n' for line_old in old_content_lines: patch_with_lines_str += f"{line_old}\n" - new_content_lines = [] - old_content_lines = [] + # Always reset buffers on any @@ line — whether prev_match flushed + # or not. This prevents orphan lines (collected after a malformed @@ + # that set match=None) from leaking into the next valid hunk. + new_content_lines = [] + old_content_lines = [] if match: prev_header_line = header_line section_header, size1, size2, start1, start2 = extract_hunk_headers(match) diff --git a/tests/unittest/test_malformed_hunk_header.py b/tests/unittest/test_malformed_hunk_header.py index 7270940f6b..3091539f49 100644 --- a/tests/unittest/test_malformed_hunk_header.py +++ b/tests/unittest/test_malformed_hunk_header.py @@ -117,3 +117,35 @@ def test_deletion_only_hunk_before_malformed(self): result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) assert "-removed_line" in result assert "__old hunk__" in result + + def test_orphan_lines_after_malformed_not_joined_to_next_hunk(self): + """Orphan lines between a malformed @@ and the next valid @@ must be discarded. + + Without clearing the buffers unconditionally on every @@ line, orphan + lines collected after a malformed @@ (where match=None) leak into the + next valid hunk because prev_match is None so the flush block is + skipped and the buffers are never reset. + """ + patch = ( + "@@ -1,3 +1,4 @@ first_section\n" + " ctx1\n" + "+add1\n" + " ctx2\n" + "@@ MALFORMED @@ not a real header\n" + "+orphan_line_should_be_discarded\n" + "-orphan_del_should_be_discarded\n" + "@@ -10,2 +11,3 @@ second_section\n" + " ctx3\n" + "+add2\n" + ) + result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile()) + # Valid hunk content must be present + assert "+add1" in result, "First hunk content was dropped" + assert "+add2" in result, "Second hunk content was dropped" + # Orphan lines must NOT appear in any hunk + assert "orphan_line_should_be_discarded" not in result, ( + "Orphan line after malformed @@ leaked into the next hunk" + ) + assert "orphan_del_should_be_discarded" not in result, ( + "Orphan deletion after malformed @@ leaked into the next hunk" + )