From 05e9cb6686485d71f8c565013294a469bbc5c705 Mon Sep 17 00:00:00 2001
From: Guy Vago <guyvago@gmail.com>
Date: Tue, 14 Apr 2026 18:37:25 +0300
Subject: [PATCH 1/3] fix(patch): guard against None match in hunk header
 extraction

In `decouple_and_convert_to_hunks_with_lines_numbers` and
`extract_hunk_lines_from_patch`, the call to `extract_hunk_headers(match)`
was outside proper `if match:` guards. If a line starts with `@@` but
doesn't match `RE_HUNK_HEADER`, `match` is None, causing an
`AttributeError` crash on `match.groups()`.

Move the `extract_hunk_headers` call inside the match guard in both
functions, and skip malformed hunk header lines gracefully.
---
 pr_agent/algo/git_patch_processing.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
index 553914e8d9..ff1e0e969c 100644
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@@ -375,8 +375,9 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
                 old_content_lines = []
             if match:
                 prev_header_line = header_line
-
-            section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
+                section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
+            else:
+                continue  # skip lines that start with @@ but don't match the hunk header pattern
 
         elif line.startswith('+'):
             new_content_lines.append(line)
@@ -430,6 +431,9 @@ def extract_hunk_lines_from_patch(patch: str, file_name, line_start, line_end, s
                 header_line = line
 
                 match = RE_HUNK_HEADER.match(line)
+                if not match:
+                    skip_hunk = True
+                    continue
 
                 section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
 

From 89fbb48fcb23d86f0f230778f04ece3bea85af4e Mon Sep 17 00:00:00 2001
From: Guy Vago <guyvago@gmail.com>
Date: Wed, 15 Apr 2026 11:36:32 +0300
Subject: [PATCH 2/3] fix(patch): flush previous hunk before overwriting match
 on malformed @@ lines

The decouple_and_convert_to_hunks_with_lines_numbers() function overwrote
`match` with the new RE_HUNK_HEADER result before checking whether the
previous hunk needed to be finalized. When a malformed @@ line produced
match=None, the flush condition `if match and (new/old_content_lines)` was
False, silently dropping the previous hunk's content.

Fix: save `prev_match` before overwriting and use it for the flush decision.
Also use `prev_header_line` instead of `match`/`header_line` in the
post-loop finalization, so a trailing malformed @@ cannot suppress the
last valid hunk.

Adds 7 unit tests covering malformed @@ scenarios: crash safety,
content preservation, trailing malformed headers, line-number accuracy,
all-malformed patches, and deletion-only hunks.
---
 pr_agent/algo/git_patch_processing.py        |  11 +-
 tests/unittest/test_malformed_hunk_header.py | 119 +++++++++++++++++++
 2 files changed, 126 insertions(+), 4 deletions(-)
 create mode 100644 tests/unittest/test_malformed_hunk_header.py

diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
index ff1e0e969c..25815e3f87 100644
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@@ -354,8 +354,9 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
 
         if line.startswith('@@'):
             header_line = line
+            prev_match = match  # save previous match before overwriting
             match = RE_HUNK_HEADER.match(line)
-            if match and (new_content_lines or old_content_lines):  # found a new hunk, split the previous lines
+            if prev_match and (new_content_lines or old_content_lines):  # flush the previous hunk
                 if prev_header_line:
                     patch_with_lines_str += f'\n{prev_header_line}\n'
                 is_plus_lines = is_minus_lines = False
@@ -392,9 +393,11 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
             new_content_lines.append(line)
             old_content_lines.append(line)
 
-    # finishing last hunk
-    if match and new_content_lines:
-        patch_with_lines_str += f'\n{header_line}\n'
+    # finishing last hunk — use prev_header_line (not match/header_line) because
+    # match may have been set to None by a trailing malformed @@ line, and
+    # header_line may point to that malformed line instead of the last valid hunk
+    if prev_header_line and new_content_lines:
+        patch_with_lines_str += f'\n{prev_header_line}\n'
         is_plus_lines = is_minus_lines = False
         if new_content_lines:
             is_plus_lines = any([line.startswith('+') for line in new_content_lines])
diff --git a/tests/unittest/test_malformed_hunk_header.py b/tests/unittest/test_malformed_hunk_header.py
new file mode 100644
index 0000000000..7270940f6b
--- /dev/null
+++ b/tests/unittest/test_malformed_hunk_header.py
@@ -0,0 +1,119 @@
+"""
+Tests for malformed @@ hunk headers in decouple_and_convert_to_hunks_with_lines_numbers().
+
+Verifies that:
+1. A malformed @@ line does not crash the function.
+2. Content from valid hunks before and after a malformed @@ is preserved.
+3. When a malformed @@ is the last @@-starting line, the preceding hunk is
+   still finalized correctly.
+"""
+
+from pr_agent.algo.git_patch_processing import decouple_and_convert_to_hunks_with_lines_numbers
+from pr_agent.config_loader import get_settings
+
+get_settings(use_context=False).set("CONFIG.CLI_MODE", True)
+
+
+class _FakeFile:
+    """Minimal file object expected by the function under test."""
+
+    def __init__(self, filename="test_file.py"):
+        self.filename = filename
+
+
+class TestMalformedHunkHeader:
+    def test_malformed_hunk_does_not_crash(self):
+        """A patch whose only @@ line is malformed should not raise."""
+        patch = "@@ THIS IS NOT A VALID HUNK HEADER @@\n+added line\n-removed line"
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        assert isinstance(result, str)
+
+    def test_valid_hunk_before_malformed_is_preserved(self):
+        """Content from a valid hunk that precedes a malformed @@ must appear in output."""
+        patch = (
+            "@@ -1,3 +1,4 @@ section\n"
+            " context\n"
+            "+added_line\n"
+            " more_context\n"
+            "@@ MALFORMED @@ not a real header\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # The valid hunk's added line must be present
+        assert "+added_line" in result
+        assert "__new hunk__" in result
+
+    def test_malformed_hunk_between_two_valid_hunks(self):
+        """A malformed @@ between two valid hunks must not drop either hunk's content."""
+        patch = (
+            "@@ -1,3 +1,4 @@ first_section\n"
+            " ctx1\n"
+            "+add1\n"
+            " ctx2\n"
+            "@@ GARBAGE @@ not valid\n"
+            "@@ -10,2 +11,3 @@ second_section\n"
+            " ctx3\n"
+            "+add2\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        assert "+add1" in result, "First hunk content was dropped"
+        assert "+add2" in result, "Second hunk content was dropped"
+
+    def test_trailing_malformed_hunk_does_not_drop_last_valid(self):
+        """When a malformed @@ is the LAST @@-line, the previous hunk must still be finalized."""
+        patch = (
+            "@@ -5,3 +5,4 @@ my_func\n"
+            " existing_line\n"
+            "+new_line\n"
+            " another_existing\n"
+            "@@ INVALID HEADER\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # The valid hunk before the malformed trailing @@ must be present
+        assert "+new_line" in result
+        assert "__new hunk__" in result
+        # The malformed header text should NOT appear as a hunk header in the output
+        assert "INVALID HEADER" not in result
+
+    def test_line_numbers_correct_with_malformed_between(self):
+        """Line numbers from valid hunks are correct even when a malformed @@ sits between them."""
+        patch = (
+            "@@ -1,2 +1,3 @@ header1\n"
+            " line_a\n"
+            "+line_b\n"
+            "@@ NOT_VALID\n"
+            "@@ -10,2 +11,3 @@ header2\n"
+            " line_c\n"
+            "+line_d\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # First hunk starts at new-line 1: "1  line_a", "2 +line_b"
+        assert "1  line_a" in result
+        assert "2 +line_b" in result
+        # Second hunk starts at new-line 11: "11  line_c", "12 +line_d"
+        assert "11  line_c" in result
+        assert "12 +line_d" in result
+
+    def test_only_malformed_hunks_returns_file_header_only(self):
+        """A patch with ONLY malformed @@ lines should return just the file header."""
+        patch = (
+            "@@ BROKEN1 @@\n"
+            "+orphan_add\n"
+            "@@ BROKEN2 @@\n"
+            "-orphan_del\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # No valid hunk was ever parsed, so no __new hunk__ / __old hunk__ sections
+        assert "__new hunk__" not in result
+        assert "__old hunk__" not in result
+
+    def test_deletion_only_hunk_before_malformed(self):
+        """A hunk with only deletions before a malformed @@ is still finalized."""
+        patch = (
+            "@@ -1,3 +1,2 @@ section\n"
+            " context\n"
+            "-removed_line\n"
+            "@@ NOT VALID @@\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        assert "-removed_line" in result
+        assert "__old hunk__" in result

From 7ede47cc1b1c9343ea7b595645249dfddcb46fa4 Mon Sep 17 00:00:00 2001
From: Guy Vago <guyvago@gmail.com>
Date: Thu, 16 Apr 2026 11:17:12 +0300
Subject: [PATCH 3/3] fix: clear hunk buffers unconditionally on every @@ line

Orphan lines between a malformed @@ (match=None) and the next valid @@
were leaking into the next hunk. The buffer reset was inside the
`if prev_match:` flush block, so when prev_match was None (set by
the malformed @@), the buffers were never cleared.

Move the buffer reset outside the conditional so it runs on every @@
encounter, and add a test that verifies orphan lines are discarded.
---
 pr_agent/algo/git_patch_processing.py        |  7 +++--
 tests/unittest/test_malformed_hunk_header.py | 32 ++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/pr_agent/algo/git_patch_processing.py b/pr_agent/algo/git_patch_processing.py
index 25815e3f87..3f559896bf 100644
--- a/pr_agent/algo/git_patch_processing.py
+++ b/pr_agent/algo/git_patch_processing.py
@@ -372,8 +372,11 @@ def decouple_and_convert_to_hunks_with_lines_numbers(patch: str, file) -> str:
                     patch_with_lines_str = patch_with_lines_str.rstrip() + '\n__old hunk__\n'
                     for line_old in old_content_lines:
                         patch_with_lines_str += f"{line_old}\n"
-                new_content_lines = []
-                old_content_lines = []
+            # Always reset buffers on any @@ line — whether prev_match flushed
+            # or not. This prevents orphan lines (collected after a malformed @@
+            # that set match=None) from leaking into the next valid hunk.
+            new_content_lines = []
+            old_content_lines = []
             if match:
                 prev_header_line = header_line
                 section_header, size1, size2, start1, start2 = extract_hunk_headers(match)
diff --git a/tests/unittest/test_malformed_hunk_header.py b/tests/unittest/test_malformed_hunk_header.py
index 7270940f6b..3091539f49 100644
--- a/tests/unittest/test_malformed_hunk_header.py
+++ b/tests/unittest/test_malformed_hunk_header.py
@@ -117,3 +117,35 @@ def test_deletion_only_hunk_before_malformed(self):
         result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
         assert "-removed_line" in result
         assert "__old hunk__" in result
+
+    def test_orphan_lines_after_malformed_not_joined_to_next_hunk(self):
+        """Orphan lines between a malformed @@ and the next valid @@ must be discarded.
+
+        Without clearing the buffers unconditionally on every @@ line, orphan
+        lines collected after a malformed @@ (where match=None) leak into the
+        next valid hunk because prev_match is None so the flush block is
+        skipped and the buffers are never reset.
+        """
+        patch = (
+            "@@ -1,3 +1,4 @@ first_section\n"
+            " ctx1\n"
+            "+add1\n"
+            " ctx2\n"
+            "@@ MALFORMED @@ not a real header\n"
+            "+orphan_line_should_be_discarded\n"
+            "-orphan_del_should_be_discarded\n"
+            "@@ -10,2 +11,3 @@ second_section\n"
+            " ctx3\n"
+            "+add2\n"
+        )
+        result = decouple_and_convert_to_hunks_with_lines_numbers(patch, _FakeFile())
+        # Valid hunk content must be present
+        assert "+add1" in result, "First hunk content was dropped"
+        assert "+add2" in result, "Second hunk content was dropped"
+        # Orphan lines must NOT appear in any hunk
+        assert "orphan_line_should_be_discarded" not in result, (
+            "Orphan line after malformed @@ leaked into the next hunk"
+        )
+        assert "orphan_del_should_be_discarded" not in result, (
+            "Orphan deletion after malformed @@ leaked into the next hunk"
+        )