diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index b3f54920d0..2e388d3bcd 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -2,6 +2,8 @@ import hashlib import re import urllib.parse +from datetime import datetime, timezone +from types import SimpleNamespace from typing import Any, Optional, Tuple, Union from urllib.parse import parse_qs, urlparse @@ -15,18 +17,76 @@ from ..algo.file_filter import filter_ignored from ..algo.git_patch_processing import decode_if_bytes from ..algo.language_handler import is_valid_file -from ..algo.utils import (clip_tokens, +from ..algo.utils import (PRReviewHeader, clip_tokens, find_line_number_of_relevant_line_in_file, load_large_diff) from ..config_loader import get_settings from ..log import get_logger -from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider +from .git_provider import MAX_FILES_ALLOWED_FULL, GitProvider, IncrementalPR class DiffNotFoundError(Exception): """Raised when the diff for a merge request cannot be found.""" pass + +def _parse_gitlab_iso_datetime(value) -> Optional[datetime]: + """Parse a GitLab ISO 8601 datetime string into a naive UTC datetime. + + GitLab returns timestamps with timezone info (e.g. "2024-01-15T14:30:00.000+02:00"). + We normalise to naive UTC so they can be compared with PyGithub-style naive datetimes + used in the shared incremental-review code path. + """ + if value is None: + return None + if isinstance(value, datetime): + if value.tzinfo is not None: + return value.astimezone(timezone.utc).replace(tzinfo=None) + return value + if not isinstance(value, str): + return None + try: + s = value.replace("Z", "+00:00") + dt = datetime.fromisoformat(s) + if dt.tzinfo is not None: + dt = dt.astimezone(timezone.utc).replace(tzinfo=None) + return dt + except (ValueError, AttributeError): + return None + + +class _GitlabIncrementalCommit: + """Adapter exposing a GitLab ProjectCommit with the attribute shape PyGithub Commit objects use. + + Shared incremental-review code reads `.sha` and `.commit.author.date`; we mimic that surface + so the provider can plug into `IncrementalPR.first_new_commit_sha` and the threshold checks + in `_can_run_incremental_review` without further branching. + """ + + def __init__(self, gl_commit): + self._gl_commit = gl_commit + self.sha = getattr(gl_commit, 'id', None) + date = _parse_gitlab_iso_datetime( + getattr(gl_commit, 'committed_date', None) + or getattr(gl_commit, 'authored_date', None) + or getattr(gl_commit, 'created_at', None) + ) + self.commit = SimpleNamespace(author=SimpleNamespace(date=date)) + + +class _GitlabIncrementalNote: + """Adapter exposing a GitLab note (issue comment) with attributes the reviewer expects. + + The reviewer reads `.created_at` (datetime) and `.html_url` from `previous_review`. + """ + + def __init__(self, note, mr_web_url: Optional[str] = None): + self._note = note + self.id = getattr(note, 'id', None) + self.body = getattr(note, 'body', '') or '' + self.created_at = _parse_gitlab_iso_datetime(getattr(note, 'created_at', None)) + self.html_url = f"{mr_web_url}#note_{self.id}" if mr_web_url else "" + class GitLabProvider(GitProvider): def __init__(self, merge_request_url: Optional[str] = None, incremental: Optional[bool] = False): @@ -347,6 +407,208 @@ def _set_merge_request(self, merge_request_url: str): get_logger().error(f"Could not get diff for merge request {self.id_mr}") raise DiffNotFoundError(f"Could not get diff for merge request {self.id_mr}") from e + # Anchor-note prefixes per incremental "kind". An incremental run looks for the most recent + # prior note matching ANY of these prefixes and uses its timestamp as the timeline anchor. + _INCREMENTAL_ANCHOR_PREFIXES = { + "review": ( + PRReviewHeader.REGULAR.value, # "## PR Reviewer Guide" + PRReviewHeader.INCREMENTAL.value, # "## Incremental PR Reviewer Guide" + ), + "suggestions": ( + "## PR Code Suggestions ✨", # summary-table mode + "**Suggestion:**", # commitable-suggestions inline mode + ), + } + + def get_incremental_commits(self, incremental: Optional[IncrementalPR] = None, kind: str = "review"): + """Populate state needed for an incremental run. + + Mirrors `GithubProvider.get_incremental_commits` for `/review -i`, and also supports + `/improve -i` via `kind="suggestions"` — in that case we anchor on the most recent prior + `## PR Code Suggestions` or inline `**Suggestion:**` note instead of a review note, so + re-runs of `/improve` only act on commits added since the last suggestions pass. + """ + if incremental is None: + incremental = IncrementalPR(False) + self.incremental = incremental + if not self.incremental.is_incremental: + return + self.unreviewed_files_set = {} + self._incremental_kind = kind + self._get_incremental_commits() + + def _get_incremental_commits(self): + if not getattr(self, 'mr_commits', None): + # gitlab returns commits newest-first; reverse to match PyGithub's oldest-first ordering + self.mr_commits = list(self.mr.commits())[::-1] + + kind = getattr(self, '_incremental_kind', 'review') + prefixes = self._INCREMENTAL_ANCHOR_PREFIXES.get(kind, ()) + self.previous_review = self._find_anchor_note(prefixes) if prefixes else None + if not self.previous_review: + get_logger().info( + f"No previous {kind} comment found, will fall back to a full run" + ) + self.incremental.is_incremental = False + return + + self.incremental.commits_range = self.get_commit_range() + if not self.incremental.commits_range: + # Disambiguate two cases: + # - last_seen_commit is set: we successfully walked the commit timeline and + # found that all commits are at-or-before the previous review, i.e. legitimately + # no new commits since the last review. Keep is_incremental=True so the reviewer + # surfaces "Incremental Review Skipped — no files changed". + # - last_seen_commit is unset: we couldn't anchor any commit on the timeline + # (the previous review's timestamp didn't parse, or every post-review commit was + # dateless). Fall back to a full review rather than silently dropping the run. + if self.incremental.last_seen_commit is None: + get_logger().info( + "Could not establish a commit timeline against the previous review " + "(missing/unparseable timestamps); falling back to a full review" + ) + self.incremental.is_incremental = False + return + + last_seen_sha = self.incremental.last_seen_commit_sha + try: + head_sha = self.mr.diff_refs['head_sha'] + except (KeyError, TypeError, AttributeError): + head_sha = None + self._incremental_head_sha = head_sha + + if not last_seen_sha or not head_sha: + # The previous review predates every commit on the branch (or refs unavailable); + # nothing to anchor an incremental diff against, fall back to a full review. + get_logger().info( + "Incremental review cannot anchor a base commit (no last_seen_sha or head_sha); " + "falling back to a full review" + ) + self.incremental.is_incremental = False + return + + try: + project = self.gl.projects.get(self.id_project) + compare_result = project.repository_compare(last_seen_sha, head_sha) + except Exception as e: + get_logger().error( + f"Failed to compare commits {last_seen_sha}..{head_sha} for incremental review: {e}" + ) + self.incremental.is_incremental = False + return + + if isinstance(compare_result, dict): + diffs = compare_result.get('diffs', []) or [] + else: + diffs = getattr(compare_result, 'diffs', []) or [] + + # `repository_compare(last_seen_sha, head_sha)` walks every commit on the path between + # the two SHAs, so if `git merge ` was run on the MR branch since the last + # incremental pass, files that only changed in the target branch (and were brought in + # via the merge) appear in `diffs` — even though they are not part of the MR's own + # contribution and would never appear in a full /review. + # + # `mr.changes()` is anchored on the MR's merge-base with target, so it correctly excludes + # target-side changes. Intersect file paths to drop "phantom" files brought in via merge. + mr_change_paths = None + try: + mr_change_paths = { + c.get('new_path') + for c in self.mr.changes().get('changes', []) + if c.get('new_path') + } + except Exception as e: + get_logger().warning( + f"Could not fetch mr.changes() to filter incremental scope; " + f"merge-from-target changes may leak into the review: {e}" + ) + + for diff in diffs: + # `repository_compare` normally yields dict entries, but defend against object-shaped + # responses too — otherwise a stricter library or stubbed client silently empties the + # incremental set and we degrade to "no new files". + if isinstance(diff, dict): + new_path = diff.get('new_path') + else: + new_path = getattr(diff, 'new_path', None) + if not new_path: + continue + if mr_change_paths is not None and new_path not in mr_change_paths: + get_logger().debug( + f"Excluding {new_path} from incremental scope: not part of the MR diff " + f"(likely brought in via a merge from the target branch)" + ) + continue + self.unreviewed_files_set[new_path] = diff + + def get_commit_range(self): + last_review_time = getattr(self.previous_review, 'created_at', None) + if last_review_time is None: + return [] + first_new_commit_index = None + for index in range(len(self.mr_commits) - 1, -1, -1): + adapter = _GitlabIncrementalCommit(self.mr_commits[index]) + commit_time = adapter.commit.author.date + if commit_time is None: + # A commit without a parseable timestamp cannot be placed on the timeline; + # skip it so it never lands in last_seen_commit (PRReviewer compares that + # date with `>`, which would TypeError against None). + get_logger().warning( + f"Skipping commit {adapter.sha} with unparseable timestamp during incremental review" + ) + continue + if commit_time > last_review_time: + self.incremental.first_new_commit = adapter + first_new_commit_index = index + else: + self.incremental.last_seen_commit = adapter + break + return self.mr_commits[first_new_commit_index:] if first_new_commit_index is not None else [] + + def get_previous_review(self, *, full: bool, incremental: bool): + if not (full or incremental): + raise ValueError("At least one of full or incremental must be True") + prefixes = [] + if full: + prefixes.append(PRReviewHeader.REGULAR.value) + if incremental: + prefixes.append(PRReviewHeader.INCREMENTAL.value) + return self._find_anchor_note(prefixes) + + def _find_anchor_note(self, prefixes): + """Return the most recent MR note whose body starts with any of `prefixes`. + + Used by incremental flows (`/review -i`, `/improve -i`) to find the timestamp + we anchor the commit timeline on. Returns a `_GitlabIncrementalNote` adapter + with `.created_at` parsed to a naive UTC datetime (possibly `None` when the + GitLab payload had an unexpected shape), or `None` if no match. + + We rely on GitLab returning notes in `created_at DESC` order (the API default) + and take the first match — re-sorting locally with a `datetime.min` fallback + for unparseable timestamps would silently demote the newest match in favour + of an older parseable one, leading to commits being re-reviewed. If the chosen + anchor's timestamp doesn't parse, `_get_incremental_commits` falls back to a + full run via the existing `last_seen_commit is None` branch. + """ + if not prefixes: + return None + # Use hasattr (not truthy) so a legitimately empty notes list still counts as cached; + # otherwise we'd re-fetch from GitLab on every call for MRs that have no notes. + if not hasattr(self, '_incremental_notes_cache'): + try: + self._incremental_notes_cache = list(self.mr.notes.list(get_all=True)) + except Exception as e: + get_logger().error(f"Failed to list MR notes for incremental review: {e}") + return None + mr_web_url = getattr(self.mr, 'web_url', None) + for note in self._incremental_notes_cache: + body = getattr(note, 'body', None) + if not isinstance(body, str): + continue + if any(body.startswith(prefix) for prefix in prefixes): + return _GitlabIncrementalNote(note, mr_web_url=mr_web_url) + return None + def get_pr_file_content(self, file_path: str, branch: str) -> str: try: file_obj = self.gl.projects.get(self.id_project).files.get(file_path, branch) @@ -405,9 +667,28 @@ def get_diff_files(self) -> list[FilePatchInfo]: if self.diff_files: return self.diff_files - # filter files using [ignore] patterns - raw_changes = self.mr.changes().get('changes', []) - raw_changes = self._expand_submodule_changes(raw_changes) + incremental_active = bool( + getattr(self, 'incremental', None) + and getattr(self.incremental, 'is_incremental', False) + and getattr(self, 'unreviewed_files_set', None) + ) + + if incremental_active: + raw_changes = list(self.unreviewed_files_set.values()) + # Apply submodule expansion symmetrically with the full-review path so that + # `GITLAB.EXPAND_SUBMODULE_DIFFS` keeps working under `/review -i`. + raw_changes = self._expand_submodule_changes(raw_changes) + base_sha_for_content = self.incremental.last_seen_commit_sha + # `_incremental_head_sha` is populated by `_get_incremental_commits()` whenever + # incremental_active is true; we still guard for defensive callers. + head_sha_for_content = getattr(self, '_incremental_head_sha', None) + if not head_sha_for_content: + head_sha_for_content = (self.mr.diff_refs or {}).get('head_sha') + else: + raw_changes = self.mr.changes().get('changes', []) + raw_changes = self._expand_submodule_changes(raw_changes) + base_sha_for_content = self.mr.diff_refs['base_sha'] + head_sha_for_content = self.mr.diff_refs['head_sha'] diffs_original = raw_changes diffs = filter_ignored(diffs_original, 'gitlab') if diffs != diffs_original: @@ -432,8 +713,8 @@ def get_diff_files(self) -> list[FilePatchInfo]: # allow only a limited number of files to be fully loaded. We can manage the rest with diffs only counter_valid += 1 if counter_valid < MAX_FILES_ALLOWED_FULL or not diff['diff']: - original_file_content_str = self.get_pr_file_content(diff['old_path'], self.mr.diff_refs['base_sha']) - new_file_content_str = self.get_pr_file_content(diff['new_path'], self.mr.diff_refs['head_sha']) + original_file_content_str = self.get_pr_file_content(diff['old_path'], base_sha_for_content) + new_file_content_str = self.get_pr_file_content(diff['new_path'], head_sha_for_content) else: if counter_valid == MAX_FILES_ALLOWED_FULL: get_logger().info(f"Too many files in PR, will avoid loading full content for rest of files") @@ -477,6 +758,10 @@ def get_diff_files(self) -> list[FilePatchInfo]: return diff_files def get_files(self) -> list: + if (getattr(self, 'incremental', None) + and getattr(self.incremental, 'is_incremental', False) + and getattr(self, 'unreviewed_files_set', None)): + return list(self.unreviewed_files_set.keys()) if not self.git_files: raw_changes = self.mr.changes().get('changes', []) raw_changes = self._expand_submodule_changes(raw_changes) diff --git a/pr_agent/tools/pr_code_suggestions.py b/pr_agent/tools/pr_code_suggestions.py index 6372396c8b..47f08f76bf 100644 --- a/pr_agent/tools/pr_code_suggestions.py +++ b/pr_agent/tools/pr_code_suggestions.py @@ -24,7 +24,7 @@ from pr_agent.git_providers import (AzureDevopsProvider, GithubProvider, GitLabProvider, get_git_provider, get_git_provider_with_context) -from pr_agent.git_providers.git_provider import get_main_pr_language, GitProvider +from pr_agent.git_providers.git_provider import GitProvider, IncrementalPR, get_main_pr_language from pr_agent.log import get_logger from pr_agent.servers.help import HelpMessage from pr_agent.tools.pr_description import insert_br_after_x_chars @@ -36,6 +36,33 @@ def __init__(self, pr_url: str, cli_mode=False, args: list = None, ai_handler: partial[BaseAiHandler,] = LiteLLMAIHandler): self.git_provider = get_git_provider_with_context(pr_url) + self.pr_url = pr_url # set early so the no-op log line in `run()` can reference it + self.args = args + self.incremental = self._parse_incremental(args) + self._incremental_empty_scope = False + # When invoked as `/improve -i`, narrow `git_provider.get_diff_files()` to the files + # changed since the previous suggestions pass. Falls back to full when the provider + # doesn't support incremental scope or no prior suggestion comment exists. + if self.incremental.is_incremental and hasattr(self.git_provider, "get_incremental_commits"): + try: + self.git_provider.get_incremental_commits(self.incremental, kind="suggestions") + except TypeError: + # Older provider signature without the `kind` kwarg — skip incremental scope. + get_logger().info( + "Provider does not support kind-based incremental commits; " + "running /improve on the full MR diff" + ) + self.incremental = IncrementalPR(False) + # If incremental is active but the scope came back empty (no files changed since the + # previous suggestions pass), short-circuit init now. `run()` checks the same flag and + # exits without touching the model. This avoids a wasted `mr.changes()` round-trip via + # `get_files()` — when `unreviewed_files_set` is `{}` it's falsy and `get_files()` falls + # back to the full MR file list, which is pure waste on the "nothing new" path. + if (self.incremental.is_incremental + and hasattr(self.git_provider, "unreviewed_files_set") + and not self.git_provider.unreviewed_files_set): + self._incremental_empty_scope = True + return self.main_language = get_main_pr_language( self.git_provider.get_languages(), self.git_provider.get_files() ) @@ -46,7 +73,6 @@ def __init__(self, pr_url: str, cli_mode=False, args: list = None, self.ai_handler.main_pr_language = self.main_language self.patches_diff = None self.prediction = None - self.pr_url = pr_url self.cli_mode = cli_mode self.pr_description, self.pr_description_files = ( self.git_provider.get_pr_description(split_changes_walkthrough=True)) @@ -90,8 +116,24 @@ def __init__(self, pr_url: str, cli_mode=False, args: list = None, self.progress = build_progress_comment() self.progress_response = None + @staticmethod + def _parse_incremental(args): + """Parse the `-i` flag for `/improve` exactly like `PRReviewer.parse_incremental`.""" + is_incremental = bool(args and len(args) >= 1 and args[0] == "-i") + return IncrementalPR(is_incremental) + async def run(self): try: + if getattr(self, "_incremental_empty_scope", False): + # Set by `__init__` when incremental anchored cleanly but no files changed + # since the previous suggestions pass. Skip silently — re-running on the + # full MR diff here would just re-post the same inline suggestions. + get_logger().info( + f"Incremental /improve for {self.pr_url}: no files changed since the previous " + f"suggestions pass; skipping" + ) + return None + if not self.git_provider.get_files(): get_logger().info(f"PR has no files: {self.pr_url}, skipping code suggestions") return None diff --git a/tests/unittest/test_gitlab_provider.py b/tests/unittest/test_gitlab_provider.py index c3864264d8..91dd485f37 100644 --- a/tests/unittest/test_gitlab_provider.py +++ b/tests/unittest/test_gitlab_provider.py @@ -1,3 +1,4 @@ +from datetime import datetime from unittest.mock import MagicMock, patch import pytest @@ -5,7 +6,13 @@ from gitlab.exceptions import GitlabGetError from gitlab.v4.objects import Project, ProjectFile -from pr_agent.git_providers.gitlab_provider import GitLabProvider +from pr_agent.git_providers.git_provider import IncrementalPR +from pr_agent.git_providers.gitlab_provider import ( + GitLabProvider, + _GitlabIncrementalCommit, + _GitlabIncrementalNote, + _parse_gitlab_iso_datetime, +) class TestGitLabProvider: @@ -232,3 +239,399 @@ def test_get_line_link_handles_file_and_line_ranges(self, gitlab_provider): assert gitlab_provider.get_line_link("src/app.py", 10, 12) == ( "https://gitlab.com/group/repo/-/blob/feature/cache/src/app.py?ref_type=heads#L10-12" ) + + +class TestGitLabIncrementalHelpers: + """Pure-function tests for the incremental-review helpers.""" + + @pytest.mark.parametrize("value,expected", [ + ("2024-05-01T10:00:00.000Z", datetime(2024, 5, 1, 10, 0, 0)), + ("2024-05-01T12:00:00+02:00", datetime(2024, 5, 1, 10, 0, 0)), + ("2024-05-01T10:00:00", datetime(2024, 5, 1, 10, 0, 0)), + (datetime(2024, 5, 1, 10, 0, 0), datetime(2024, 5, 1, 10, 0, 0)), + (None, None), + ("not a date", None), + (12345, None), + ]) + def test_parse_iso_datetime(self, value, expected): + assert _parse_gitlab_iso_datetime(value) == expected + + def test_commit_adapter_exposes_pygithub_shape(self): + gl_commit = MagicMock() + gl_commit.id = "abc123" + gl_commit.committed_date = "2024-05-01T10:00:00.000Z" + gl_commit.authored_date = "2024-04-30T10:00:00.000Z" + + adapter = _GitlabIncrementalCommit(gl_commit) + + assert adapter.sha == "abc123" + # committed_date takes precedence over authored_date + assert adapter.commit.author.date == datetime(2024, 5, 1, 10, 0, 0) + + def test_commit_adapter_falls_back_to_authored_date(self): + gl_commit = MagicMock(spec=["id", "authored_date"]) + gl_commit.id = "abc" + gl_commit.authored_date = "2024-04-30T10:00:00Z" + + adapter = _GitlabIncrementalCommit(gl_commit) + + assert adapter.commit.author.date == datetime(2024, 4, 30, 10, 0, 0) + + def test_note_adapter_builds_html_url(self): + note = MagicMock() + note.id = 42 + note.body = "## PR Reviewer Guide 🔍\n..." + note.created_at = "2024-05-01T10:00:00Z" + + adapter = _GitlabIncrementalNote(note, mr_web_url="https://gitlab.com/x/y/-/merge_requests/1") + + assert adapter.id == 42 + assert adapter.html_url == "https://gitlab.com/x/y/-/merge_requests/1#note_42" + assert adapter.created_at == datetime(2024, 5, 1, 10, 0, 0) + + +class TestGitLabIncrementalReview: + """Tests for the GitLab incremental-review flow.""" + + @pytest.fixture + def mock_gitlab_client(self): + return MagicMock() + + @pytest.fixture + def mock_project(self): + return MagicMock() + + @pytest.fixture + def gitlab_provider(self, mock_gitlab_client, mock_project): + with patch('pr_agent.git_providers.gitlab_provider.gitlab.Gitlab', return_value=mock_gitlab_client), \ + patch('pr_agent.git_providers.gitlab_provider.get_settings') as mock_settings: + mock_settings.return_value.get.side_effect = lambda key, default=None: { + "GITLAB.URL": "https://gitlab.com", + "GITLAB.PERSONAL_ACCESS_TOKEN": "fake_token", + }.get(key, default) + mock_gitlab_client.projects.get.return_value = mock_project + provider = GitLabProvider("https://gitlab.com/test/repo/-/merge_requests/1") + provider.gl = mock_gitlab_client + provider.id_project = "test/repo" + provider.mr = MagicMock() + provider.mr.web_url = "https://gitlab.com/test/repo/-/merge_requests/1" + provider.mr.diff_refs = {"base_sha": "base", "head_sha": "head", "start_sha": "base"} + return provider + + @staticmethod + def _make_note(note_id, body, created_at): + n = MagicMock() + n.id = note_id + n.body = body + n.created_at = created_at + return n + + @staticmethod + def _make_commit(sha, committed_date): + c = MagicMock(spec=["id", "committed_date", "authored_date", "created_at"]) + c.id = sha + c.committed_date = committed_date + c.authored_date = committed_date + c.created_at = committed_date + return c + + def test_get_incremental_commits_no_previous_review_falls_back(self, gitlab_provider): + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(1, "Just a comment", "2024-05-01T10:00:00Z"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c1", "2024-05-02T10:00:00Z"), + ] + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + assert gitlab_provider.incremental.is_incremental is False + + def test_get_incremental_commits_picks_commits_after_review(self, gitlab_provider, mock_project): + # Previous review at T=10:00. Commit c0 at 09:00 (before), c1 and c2 at 11:00 (after). + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(7, "## PR Reviewer Guide 🔍\nbody", "2024-05-01T10:00:00Z"), + self._make_note(1, "older note", "2024-04-01T10:00:00Z"), + ] + # gitlab returns commits newest-first + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c2", "2024-05-01T11:30:00Z"), + self._make_commit("c1", "2024-05-01T11:00:00Z"), + self._make_commit("c0", "2024-05-01T09:00:00Z"), + ] + mock_project.repository_compare.return_value = { + "diffs": [ + {"new_path": "a.py", "old_path": "a.py", "diff": "@@ -1 +1 @@\n-old\n+new\n", + "new_file": False, "deleted_file": False, "renamed_file": False}, + {"new_path": "b.py", "old_path": "b.py", "diff": "@@ ... @@", + "new_file": True, "deleted_file": False, "renamed_file": False}, + ] + } + # mr.changes() is intersected with repository_compare to exclude files brought in + # via a merge from the target branch. Here both files are part of the MR. + gitlab_provider.mr.changes.return_value = { + "changes": [{"new_path": "a.py"}, {"new_path": "b.py"}] + } + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + assert gitlab_provider.incremental.is_incremental is True + assert gitlab_provider.incremental.first_new_commit_sha == "c1" + assert gitlab_provider.incremental.last_seen_commit_sha == "c0" + assert set(gitlab_provider.unreviewed_files_set.keys()) == {"a.py", "b.py"} + mock_project.repository_compare.assert_called_once_with("c0", "head") + + def test_get_incremental_commits_no_new_commits_yields_empty_set(self, gitlab_provider, mock_project): + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(7, "## PR Reviewer Guide 🔍\nbody", "2024-05-01T20:00:00Z"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c0", "2024-05-01T09:00:00Z"), + ] + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + # is_incremental stays True so the reviewer publishes the "no new files" message; + # unreviewed_files_set is empty. + assert gitlab_provider.incremental.is_incremental is True + assert gitlab_provider.unreviewed_files_set == {} + mock_project.repository_compare.assert_not_called() + + def test_get_incremental_commits_no_anchor_commit_falls_back(self, gitlab_provider, mock_project): + # All commits are after the previous review -> no last_seen_commit -> can't anchor. + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(7, "## PR Reviewer Guide 🔍\nbody", "2024-05-01T08:00:00Z"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c1", "2024-05-01T11:00:00Z"), + ] + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + assert gitlab_provider.incremental.is_incremental is False + mock_project.repository_compare.assert_not_called() + + def test_get_files_uses_incremental_set_when_active(self, gitlab_provider): + gitlab_provider.incremental = IncrementalPR(True) + gitlab_provider.unreviewed_files_set = {"a.py": {"new_path": "a.py"}} + + assert gitlab_provider.get_files() == ["a.py"] + gitlab_provider.mr.changes.assert_not_called() + + def test_get_files_falls_back_to_mr_changes_when_not_incremental(self, gitlab_provider): + gitlab_provider.incremental = IncrementalPR(False) + gitlab_provider.git_files = None + gitlab_provider.mr.changes.return_value = {"changes": [{"new_path": "x.py"}]} + + assert gitlab_provider.get_files() == ["x.py"] + + def test_get_previous_review_returns_most_recent_match(self, gitlab_provider): + from pr_agent.algo.utils import PRReviewHeader + + # GitLab returns notes in created_at-DESC order. The helper relies on that order + # (no local sort) — the unrelated newest note must be skipped, the newer matching + # note must win over the older matching note. + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(3, "unrelated", "2024-06-01T10:00:00Z"), + self._make_note(2, f"{PRReviewHeader.REGULAR.value} 🔍\nnew", "2024-05-01T10:00:00Z"), + self._make_note(1, f"{PRReviewHeader.REGULAR.value} 🔍\nold", "2024-04-01T10:00:00Z"), + ] + + result = gitlab_provider.get_previous_review(full=True, incremental=True) + + assert result is not None + assert result.id == 2 + + def test_master_merge_files_are_excluded_from_incremental_scope(self, gitlab_provider, mock_project): + # Reproduction of the MR !1115 bug: user ran `git merge master` on the feature branch, + # which brought CI/config changes into the branch via a merge commit. Those files are + # NOT part of mr.changes() (the MR's actual contribution against its merge-base), but + # repository_compare(last_seen, head) walks through the merge and surfaces them. + # The fix intersects with mr.changes() to drop these "phantom" files. + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(7, "## PR Reviewer Guide 🔍\nbody", "2024-05-01T10:00:00Z"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("merge", "2024-05-01T11:30:00Z"), # merge from master + self._make_commit("feat", "2024-05-01T11:00:00Z"), # author commit on feature + self._make_commit("c0", "2024-05-01T09:00:00Z"), # anchor (pre-review) + ] + mock_project.repository_compare.return_value = { + "diffs": [ + # MR's own change to a frontend file — must be reviewed + {"new_path": "src/feature.js", "old_path": "src/feature.js", "diff": "@@ ... @@", + "new_file": False, "deleted_file": False, "renamed_file": False}, + # Pulled in via merge from master, NOT part of the MR — must be excluded + {"new_path": ".gitlab-ci.yml", "old_path": ".gitlab-ci.yml", "diff": "@@ ... @@", + "new_file": False, "deleted_file": False, "renamed_file": False}, + ] + } + # mr.changes() returns only the MR's actual contribution; .gitlab-ci.yml is absent + # because the change to it lives in the target branch already. + gitlab_provider.mr.changes.return_value = { + "changes": [{"new_path": "src/feature.js"}] + } + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + assert gitlab_provider.incremental.is_incremental is True + assert set(gitlab_provider.unreviewed_files_set.keys()) == {"src/feature.js"} + assert ".gitlab-ci.yml" not in gitlab_provider.unreviewed_files_set + + def test_commit_with_unparseable_date_is_skipped_not_anchored(self, gitlab_provider, mock_project): + # Anchor commit (c0) has a valid date older than the review; a stray dateless + # commit (cX) sits between the new commits and must not become last_seen_commit. + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(7, "## PR Reviewer Guide 🔍\nbody", "2024-05-01T10:00:00Z"), + ] + bad_commit = MagicMock(spec=["id", "committed_date", "authored_date", "created_at"]) + bad_commit.id = "cX" + bad_commit.committed_date = "not-a-date" + bad_commit.authored_date = None + bad_commit.created_at = None + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c1", "2024-05-01T11:00:00Z"), + bad_commit, + self._make_commit("c0", "2024-05-01T09:00:00Z"), + ] + mock_project.repository_compare.return_value = { + "diffs": [{"new_path": "a.py", "old_path": "a.py", "diff": "@@ ... @@", + "new_file": False, "deleted_file": False, "renamed_file": False}], + } + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + # The dateless commit must be ignored: anchor falls through to c0 (valid date). + assert gitlab_provider.incremental.is_incremental is True + assert gitlab_provider.incremental.last_seen_commit_sha == "c0" + assert gitlab_provider.incremental.last_seen_commit.commit.author.date is not None + assert gitlab_provider.incremental.first_new_commit_sha == "c1" + + def test_unparseable_review_timestamp_falls_back_to_full(self, gitlab_provider, mock_project): + # If the previous review's created_at didn't parse, we can't position commits on the + # timeline; we must fall back to a full review rather than silently report "no new files". + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(7, "## PR Reviewer Guide 🔍\nbody", "not-a-date"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c1", "2024-05-01T11:00:00Z"), + ] + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + assert gitlab_provider.incremental.is_incremental is False + mock_project.repository_compare.assert_not_called() + + def test_all_post_review_commits_dateless_falls_back_to_full(self, gitlab_provider, mock_project): + # If every commit after the previous review has an unparseable timestamp, we can't + # anchor a last_seen_commit. The fix must fall back to full review, not produce a + # spurious "Incremental Review Skipped" message. + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(7, "## PR Reviewer Guide 🔍\nbody", "2024-05-01T10:00:00Z"), + ] + bad1 = MagicMock(spec=["id", "committed_date", "authored_date", "created_at"]) + bad1.id, bad1.committed_date, bad1.authored_date, bad1.created_at = "cX1", None, None, None + bad2 = MagicMock(spec=["id", "committed_date", "authored_date", "created_at"]) + bad2.id, bad2.committed_date, bad2.authored_date, bad2.created_at = "cX2", "garbage", None, None + gitlab_provider.mr.commits.return_value = [bad1, bad2] + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + assert gitlab_provider.incremental.is_incremental is False + mock_project.repository_compare.assert_not_called() + + def test_get_previous_review_caches_empty_notes_list(self, gitlab_provider): + # An MR with no notes must still cache the result; falsy-checks would re-fetch each call. + gitlab_provider.mr.notes.list.return_value = [] + + first = gitlab_provider.get_previous_review(full=True, incremental=True) + second = gitlab_provider.get_previous_review(full=True, incremental=True) + + assert first is None and second is None + assert gitlab_provider.mr.notes.list.call_count == 1 + + def test_incremental_kind_suggestions_anchors_on_suggestion_note(self, gitlab_provider, mock_project): + # When kind="suggestions", we anchor on the latest /improve output (either the + # "## PR Code Suggestions ✨" summary or an inline "**Suggestion:**" note), + # NOT on a /review note posted later in the same CI run. + gitlab_provider.mr.notes.list.return_value = [ + # Most recent: a review-incremental note posted AFTER the last /improve run. + self._make_note(9, "## Incremental PR Reviewer Guide 🔍\nbody", "2026-05-15T10:05:00Z"), + # The actual /improve anchor we want to pick. + self._make_note(8, "**Suggestion:** Используйте Number вместо parseInt...", "2026-05-15T10:00:00Z"), + # An older /review note that should NOT win over the suggestion above. + self._make_note(5, "## PR Reviewer Guide 🔍\nold", "2026-05-15T09:00:00Z"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c2", "2026-05-15T10:10:00Z"), # after the suggestion note + self._make_commit("c0", "2026-05-15T09:30:00Z"), # before the suggestion note + ] + mock_project.repository_compare.return_value = { + "diffs": [{"new_path": "a.py", "old_path": "a.py", "diff": "@@ ... @@", + "new_file": False, "deleted_file": False, "renamed_file": False}], + } + gitlab_provider.mr.changes.return_value = {"changes": [{"new_path": "a.py"}]} + + gitlab_provider.get_incremental_commits(IncrementalPR(True), kind="suggestions") + + assert gitlab_provider.incremental.is_incremental is True + assert gitlab_provider.incremental.first_new_commit_sha == "c2" + assert gitlab_provider.incremental.last_seen_commit_sha == "c0" + mock_project.repository_compare.assert_called_once_with("c0", "head") + + def test_incremental_kind_suggestions_falls_back_when_no_prior_suggestion(self, gitlab_provider, mock_project): + # A /review note exists, but no /improve has ever run. /improve -i must fall back to + # a full pass, not anchor on the review note. + gitlab_provider.mr.notes.list.return_value = [ + self._make_note(5, "## PR Reviewer Guide 🔍\nbody", "2026-05-15T09:00:00Z"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c0", "2026-05-15T08:30:00Z"), + ] + + gitlab_provider.get_incremental_commits(IncrementalPR(True), kind="suggestions") + + assert gitlab_provider.incremental.is_incremental is False + mock_project.repository_compare.assert_not_called() + + def test_get_incremental_commits_default_kind_is_review(self, gitlab_provider, mock_project): + # Sanity-check backward compatibility: no kind kwarg ⇒ behaves like a review run. + gitlab_provider.mr.notes.list.return_value = [ + # A /improve note that must be IGNORED in default (review) mode. + self._make_note(8, "**Suggestion:** xyz", "2026-05-15T10:00:00Z"), + ] + gitlab_provider.mr.commits.return_value = [ + self._make_commit("c0", "2026-05-15T09:30:00Z"), + ] + + gitlab_provider.get_incremental_commits(IncrementalPR(True)) + + # No review note exists -> fallback to full review (NOT anchoring on the suggestion note). + assert gitlab_provider.incremental.is_incremental is False + mock_project.repository_compare.assert_not_called() + + def test_incremental_get_diff_files_expands_submodule_changes(self, gitlab_provider): + # Set up incremental state directly to isolate get_diff_files behaviour. + gitlab_provider.incremental = IncrementalPR(True) + gitlab_provider.unreviewed_files_set = { + "libs/sub": {"new_path": "libs/sub", "old_path": "libs/sub", + "diff": "-Subproject commit aaa\n+Subproject commit bbb\n", + "new_file": False, "deleted_file": False, "renamed_file": False} + } + gitlab_provider._incremental_head_sha = "head" + gitlab_provider.incremental.last_seen_commit = _GitlabIncrementalCommit( + self._make_commit("c0", "2024-05-01T09:00:00Z") + ) + + expanded = [{ + "new_path": "libs/sub/file.py", "old_path": "libs/sub/file.py", + "diff": "@@ ... @@", "new_file": False, "deleted_file": False, "renamed_file": False, + }] + with patch.object(gitlab_provider, "_expand_submodule_changes", return_value=expanded) as m_exp, \ + patch.object(gitlab_provider, "get_pr_file_content", return_value=""): + files = gitlab_provider.get_diff_files() + + # _expand_submodule_changes was called with the incremental raw_changes, + # and the resulting file list reflects the expanded entries. + m_exp.assert_called_once() + assert [f.filename for f in files] == ["libs/sub/file.py"]