diff --git a/pr_agent/git_providers/azuredevops_provider.py b/pr_agent/git_providers/azuredevops_provider.py index b9d2f3990c..1d05ac5a3e 100644 --- a/pr_agent/git_providers/azuredevops_provider.py +++ b/pr_agent/git_providers/azuredevops_provider.py @@ -1,5 +1,6 @@ from __future__ import annotations +import datetime as _dt import os from typing import Optional, Tuple from urllib.parse import urlparse @@ -8,12 +9,12 @@ from ..algo.file_filter import filter_ignored from ..algo.language_handler import is_valid_file -from ..algo.utils import (PRDescriptionHeader, clip_tokens, +from ..algo.utils import (PRDescriptionHeader, PRReviewHeader, clip_tokens, find_line_number_of_relevant_line_in_file, load_large_diff) from ..config_loader import get_settings from ..log import get_logger -from .git_provider import GitProvider +from .git_provider import GitProvider, IncrementalPR AZURE_DEVOPS_AVAILABLE = True ADO_APP_CLIENT_DEFAULT_ID = "499b84ac-1321-427f-aa17-267ca6975798/.default" @@ -32,6 +33,32 @@ AZURE_DEVOPS_AVAILABLE = False +def _to_naive_utc(dt): + if dt is None: + return None + if getattr(dt, "tzinfo", None) is not None: + return dt.astimezone(_dt.timezone.utc).replace(tzinfo=None) + return dt + + +class _AzureCommitInner: + def __init__(self, raw): + self.message = getattr(raw, "comment", "") or "" + author = getattr(raw, "author", None) + author_date = _to_naive_utc(getattr(author, "date", None)) if author else None + self.author = type("_AzureAuthor", (), {"date": author_date})() + + +class _AzureCommitAdapter: + """Mimics PyGithub `Commit` shape (.sha, .commit.author.date, .commit.message, .parents).""" + + def __init__(self, raw): + self.sha = raw.commit_id + self.commit_id = raw.commit_id + self.commit = _AzureCommitInner(raw) + self.parents = list(getattr(raw, "parents", None) or []) + + class AzureDevopsProvider(GitProvider): def __init__( @@ -51,6 +78,9 @@ def __init__( self.pr = None self.temp_comments = [] self.incremental = incremental + self.unreviewed_files_map = {} + self.pr_commits = None + self.previous_review = None if pr_url: self.set_pr(pr_url) @@ -158,6 +188,135 @@ def set_pr(self, pr_url: str): self.workspace_slug, self.repo_slug, self.pr_num = self._parse_pr_url(pr_url) self.pr = self._get_pr() + def get_incremental_commits(self, incremental=None): + if incremental is None: + incremental = IncrementalPR(False) + self.incremental = incremental + if self.incremental.is_incremental: + self.unreviewed_files_map = {} + self._get_incremental_commits() + + def _get_incremental_commits(self): + if not self.pr_commits: + raw = list(self.azure_devops_client.get_pull_request_commits( + project=self.workspace_slug, + repository_id=self.repo_slug, + pull_request_id=self.pr_num, + )) + # Azure returns newest-first; oldest-first matches GitHub iteration order. + raw.reverse() + self.pr_commits = [_AzureCommitAdapter(c) for c in raw] + + self.previous_review = self.get_previous_review(full=True, incremental=True) + if not self.previous_review: + get_logger().info("No previous review found, will review the entire PR") + self.incremental.is_incremental = False + return + + self.incremental.commits_range = self._get_commit_range() + if self.incremental.commits_range is None: + return + candidate_paths = [] + had_errors = False + non_merge_seen = False + for commit in self.incremental.commits_range: + if len(commit.parents) > 1: + get_logger().info(f"Skipping merge commit {commit.sha}") + continue + non_merge_seen = True + try: + changes_obj = self.azure_devops_client.get_changes( + project=self.workspace_slug, + repository_id=self.repo_slug, + commit_id=commit.commit_id, + ) + except Exception as e: + had_errors = True + get_logger().warning(f"Failed to fetch changes for {commit.commit_id}: {e}") + continue + for change in (getattr(changes_obj, "changes", None) or []): + try: + item = change["item"] + except (KeyError, TypeError): + additional = getattr(change, "additional_properties", None) or {} + item = additional.get("item") or {} + if not isinstance(item, dict) or item.get("gitObjectType") == "tree": + continue + path = item.get("path") + if path: + candidate_paths.append(path) + + if candidate_paths: + deduped = list(dict.fromkeys(candidate_paths)) + filtered = filter_ignored(deduped, "azure") + for path in filtered: + if is_valid_file(path): + self.unreviewed_files_map[path] = path + elif had_errors and self.incremental.commits_range: + get_logger().warning( + "Failed to fetch changes for incremental commits; falling back to full review." + ) + self.incremental.is_incremental = False + elif self.incremental.commits_range and not non_merge_seen: + get_logger().info( + "Incremental range only contains merge commits; falling back to full review." + ) + self.incremental.is_incremental = False + + def _get_commit_range(self): + last_review_time = _to_naive_utc(getattr(self.previous_review, "created_at", None)) + if last_review_time is None or not self.pr_commits: + get_logger().info( + "Cannot compute incremental commit range " + "(missing previous review timestamp or PR commits); falling back to full review." + ) + self.incremental.is_incremental = False + return None + first_new_commit_index = None + saw_reliable_date = False + for index in range(len(self.pr_commits) - 1, -1, -1): + cdate = self.pr_commits[index].commit.author.date + if cdate is None: + continue + saw_reliable_date = True + if cdate > last_review_time: + self.incremental.first_new_commit = self.pr_commits[index] + first_new_commit_index = index + else: + self.incremental.last_seen_commit = self.pr_commits[index] + break + if not saw_reliable_date: + get_logger().info( + "All PR commit author dates are missing; cannot compute incremental range. " + "Falling back to full review." + ) + self.incremental.is_incremental = False + return None + return self.pr_commits[first_new_commit_index:] if first_new_commit_index is not None else [] + + def get_previous_review(self, *, full: bool, incremental: bool): + if not (full or incremental): + raise ValueError("At least one of full or incremental must be True") + prefixes = [] + if full: + prefixes.append(PRReviewHeader.REGULAR.value) + if incremental: + prefixes.append(PRReviewHeader.INCREMENTAL.value) + matches = [] + for comment in self.get_issue_comments(): + body = getattr(comment, "body", None) + if body and any(body.startswith(p) for p in prefixes): + matches.append(comment) + if not matches: + return None + latest = max( + matches, + key=lambda c: _to_naive_utc(getattr(c, "published_date", None)) or _dt.datetime.min, + ) + latest.html_url = self.get_comment_url(latest) + latest.created_at = _to_naive_utc(getattr(latest, "published_date", None)) + return latest + def get_repo_settings(self): try: contents = self.azure_devops_client.get_item_content( @@ -175,6 +334,13 @@ def get_repo_settings(self): return "" def get_files(self): + if (isinstance(getattr(self, "incremental", None), IncrementalPR) + and self.incremental.is_incremental + and self.unreviewed_files_map): + return list(self.unreviewed_files_map.keys()) + return self._get_files_full() + + def _get_files_full(self): files = [] for i in self.azure_devops_client.get_pull_request_commits( project=self.workspace_slug, @@ -194,9 +360,17 @@ def get_files(self): def get_diff_files(self) -> list[FilePatchInfo]: try: - if self.diff_files: + if self.diff_files is not None: return self.diff_files + if self.pr.last_merge_commit is None or self.pr.last_merge_target_commit is None: + get_logger().info( + f"PR {self.pr_num} has no last_merge_commit/last_merge_target_commit; " + f"cannot compute diff files." + ) + self.diff_files = [] + return [] + base_sha = self.pr.last_merge_target_commit head_sha = self.pr.last_merge_commit @@ -254,7 +428,7 @@ def get_diff_files(self) -> list[FilePatchInfo]: # diffs = list(set(diffs)) diffs_original = diffs - diffs = filter_ignored(diffs_original, 'azure') + diffs = filter_ignored(diffs_original, "azure") if diffs_original != diffs: try: get_logger().info(f"Filtered out [ignore] files for pull request:", extra= @@ -263,6 +437,15 @@ def get_diff_files(self) -> list[FilePatchInfo]: except Exception: pass + incremental_active = ( + isinstance(getattr(self, "incremental", None), IncrementalPR) + and self.incremental.is_incremental + and bool(self.unreviewed_files_map) + and self.incremental.last_seen_commit_sha + ) + if incremental_active: + diffs = [f for f in diffs if f in self.unreviewed_files_map] + invalid_files_names = [] for file in diffs: if not is_valid_file(file): @@ -301,29 +484,53 @@ def get_diff_files(self) -> list[FilePatchInfo]: elif "rename" in diff_types[file]: # diff_type can be `rename` | `edit, rename` edit_type = EDIT_TYPE.RENAMED - version = GitVersionDescriptor( - version=base_sha.commit_id, version_type="commit" - ) if edit_type == EDIT_TYPE.ADDED or edit_type == EDIT_TYPE.RENAMED: original_file_content_str = "" + elif incremental_active: + inc_version = GitVersionDescriptor( + version=self.incremental.last_seen_commit_sha, version_type="commit" + ) + try: + inc_original = self.azure_devops_client.get_item( + repository_id=self.repo_slug, + path=file, + project=self.workspace_slug, + version_descriptor=inc_version, + download=False, + include_content=True, + ) + original_file_content_str = inc_original.content or "" + except Exception as error: + get_logger().warning( + f"Failed to retrieve original of {file} at {self.incremental.last_seen_commit_sha}: {error}" + ) + original_file_content_str = "" else: + base_version = GitVersionDescriptor( + version=base_sha.commit_id, version_type="commit" + ) try: - original_file_content_str = self.azure_devops_client.get_item( + base_original = self.azure_devops_client.get_item( repository_id=self.repo_slug, path=file, project=self.workspace_slug, - version_descriptor=version, + version_descriptor=base_version, download=False, include_content=True, ) - original_file_content_str = original_file_content_str.content + original_file_content_str = base_original.content except Exception as error: - get_logger().error(f"Failed to retrieve original file content of {file} at version {version}", error=error) + get_logger().error( + f"Failed to retrieve original file content of {file} at version {base_version}", + error=error, + ) original_file_content_str = "" patch = load_large_diff( file, new_file_content_str, original_file_content_str, show_warning=False ).rstrip() + if incremental_active: + self.unreviewed_files_map[file] = patch # count number of lines added and removed patch_lines = patch.splitlines(keepends=True) diff --git a/pr_agent/git_providers/gitea_provider.py b/pr_agent/git_providers/gitea_provider.py index 89a6248e9b..d3d7f20624 100644 --- a/pr_agent/git_providers/gitea_provider.py +++ b/pr_agent/git_providers/gitea_provider.py @@ -64,7 +64,7 @@ def __init__(self, url: Optional[str] = None): self.diff_files = [] self.incremental = IncrementalPR(False) self.comments_list = [] - self.unreviewed_files_set = dict() + self.unreviewed_files_map = dict() if "pulls" in url: self.pr_url = url @@ -475,9 +475,9 @@ def get_diff_files(self) -> List[FilePatchInfo]: # Get file content from this pr head_file = self.file_contents.get(filename,"") - if self.incremental.is_incremental and self.unreviewed_files_set: + if self.incremental.is_incremental and self.unreviewed_files_map: base_file = self._get_file_content_from_latest_commit(filename) - self.unreviewed_files_set[filename] = patch + self.unreviewed_files_map[filename] = patch else: if avoid_load: base_file = "" diff --git a/pr_agent/git_providers/github_provider.py b/pr_agent/git_providers/github_provider.py index fa52b7dc05..901f66b5d1 100644 --- a/pr_agent/git_providers/github_provider.py +++ b/pr_agent/git_providers/github_provider.py @@ -79,7 +79,7 @@ def _get_issue_handle(self, issue_url) -> Optional[Issue]: def get_incremental_commits(self, incremental=IncrementalPR(False)): self.incremental = incremental if self.incremental.is_incremental: - self.unreviewed_files_set = dict() + self.unreviewed_files_map = dict() self._get_incremental_commits() def is_supported(self, capability: str) -> bool: @@ -162,7 +162,7 @@ def _get_incremental_commits(self): if commit.commit.message.startswith(f"Merge branch '{self._get_repo().default_branch}'"): get_logger().info(f"Skipping merge commit {commit.commit.message}") continue - self.unreviewed_files_set.update({file.filename: file for file in commit.files}) + self.unreviewed_files_map.update({file.filename: file for file in commit.files}) else: get_logger().info("No previous review found, will review the entire PR") self.incremental.is_incremental = False @@ -194,8 +194,8 @@ def get_previous_review(self, *, full: bool, incremental: bool): return self.comments[index] def get_files(self): - if self.incremental.is_incremental and self.unreviewed_files_set: - return self.unreviewed_files_set.values() + if self.incremental.is_incremental and self.unreviewed_files_map: + return self.unreviewed_files_map.values() try: git_files = context.get("git_files", None) if git_files: @@ -296,10 +296,10 @@ def get_diff_files(self) -> list[FilePatchInfo]: else: new_file_content_str = self._get_pr_file_content(file, self.pr.head.sha) # communication with GitHub - if self.incremental.is_incremental and self.unreviewed_files_set: + if self.incremental.is_incremental and self.unreviewed_files_map: original_file_content_str = self._get_pr_file_content(file, self.incremental.last_seen_commit_sha) patch = load_large_diff(file.filename, new_file_content_str, original_file_content_str) - self.unreviewed_files_set[file.filename] = patch + self.unreviewed_files_map[file.filename] = patch else: if avoid_load: original_file_content_str = "" diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py index c4917f3597..2b1f358011 100644 --- a/pr_agent/tools/pr_reviewer.py +++ b/pr_agent/tools/pr_reviewer.py @@ -123,8 +123,11 @@ async def run(self) -> None: get_logger().info(f"PR has no files: {self.pr_url}, skipping review") return None - if self.incremental.is_incremental and not self._can_run_incremental_review(): - return None + if self.incremental.is_incremental: + can_run = self._can_run_incremental_review() + # If the gate disabled incremental (e.g., commits_range is None), fall through to full review. + if not can_run and self.incremental.is_incremental: + return None # if isinstance(self.args, list) and self.args and self.args[0] == 'auto_approve': # get_logger().info(f'Auto approve flow PR: {self.pr_url} ...') @@ -139,11 +142,15 @@ async def run(self) -> None: # ticket extraction if exists await extract_and_cache_pr_tickets(self.git_provider, self.vars) - if self.incremental.is_incremental and hasattr(self.git_provider, "unreviewed_files_set") and not self.git_provider.unreviewed_files_set: + if ( + self.incremental.is_incremental + and hasattr(self.git_provider, "unreviewed_files_map") + and not self.git_provider.unreviewed_files_map + ): get_logger().info(f"Incremental review is enabled for {self.pr_url} but there are no new files") previous_review_url = "" - if hasattr(self.git_provider, "previous_review"): - previous_review_url = self.git_provider.previous_review.html_url + if hasattr(self.git_provider, "previous_review") and self.git_provider.previous_review is not None: + previous_review_url = getattr(self.git_provider.previous_review, "html_url", "") or "" if get_settings().config.publish_output: self.git_provider.publish_comment(f"Incremental Review Skipped\n" f"No files were changed since the [previous PR Review]({previous_review_url})") @@ -337,6 +344,13 @@ def _can_run_incremental_review(self) -> bool: if not hasattr(self.git_provider, "get_incremental_commits"): get_logger().info(f"Incremental review is not supported for {get_settings().config.git_provider}") return False + if self.incremental.commits_range is None: + get_logger().info( + f"Incremental review not initialized for {get_settings().config.git_provider}; " + f"falling back to full review." + ) + self.incremental.is_incremental = False + return False # checking if there are enough commits to start the review num_new_commits = len(self.incremental.commits_range) num_commits_threshold = get_settings().pr_reviewer.minimal_commits_for_incremental_review diff --git a/tests/unittest/test_azure_devops_incremental.py b/tests/unittest/test_azure_devops_incremental.py new file mode 100644 index 0000000000..a58f2603a2 --- /dev/null +++ b/tests/unittest/test_azure_devops_incremental.py @@ -0,0 +1,188 @@ +import datetime as _dt +from unittest.mock import MagicMock, patch + +from pr_agent.git_providers import AzureDevopsProvider +from pr_agent.git_providers.azuredevops_provider import ( + _AzureCommitAdapter, + _to_naive_utc, +) +from pr_agent.git_providers.git_provider import IncrementalPR + + +def _raw_commit(commit_id, comment, author_date, parents=None): + raw = MagicMock() + raw.commit_id = commit_id + raw.comment = comment + raw.author = MagicMock() + raw.author.date = author_date + raw.parents = parents or [] + return raw + + +def _comment(body, published_date): + c = MagicMock() + c.body = body + c.content = body + c.published_date = published_date + c.thread_id = 7 + return c + + +class TestNaiveUtc: + def test_strips_tz_from_aware(self): + aware = _dt.datetime(2024, 1, 1, 12, 0, tzinfo=_dt.timezone.utc) + naive = _to_naive_utc(aware) + assert naive.tzinfo is None + assert naive == _dt.datetime(2024, 1, 1, 12, 0) + + def test_passes_naive_through(self): + naive = _dt.datetime(2024, 1, 1, 12, 0) + assert _to_naive_utc(naive) == naive + + def test_none_returns_none(self): + assert _to_naive_utc(None) is None + + +class TestAzureCommitAdapter: + def test_exposes_github_shape(self): + date = _dt.datetime(2024, 1, 1, 12, 0, tzinfo=_dt.timezone.utc) + raw = _raw_commit("abc123", "fix bug", date, parents=["p1"]) + adapter = _AzureCommitAdapter(raw) + assert adapter.sha == "abc123" + assert adapter.commit_id == "abc123" + assert adapter.commit.message == "fix bug" + assert adapter.commit.author.date.tzinfo is None + assert adapter.parents == ["p1"] + + def test_handles_missing_author(self): + raw = MagicMock() + raw.commit_id = "x" + raw.comment = "" + raw.author = None + raw.parents = None + adapter = _AzureCommitAdapter(raw) + assert adapter.commit.author.date is None + assert adapter.parents == [] + + +class TestGetIncrementalCommits: + def _make_provider(self): + with patch.object( + AzureDevopsProvider, "_get_azure_devops_client", + return_value=(MagicMock(), MagicMock()), + ): + provider = AzureDevopsProvider() + provider.workspace_slug = "ws" + provider.repo_slug = "repo" + provider.pr_num = 1 + provider.pr_url = "https://dev.azure.com/o/ws/_git/repo/pullrequest/1" + return provider + + def test_no_previous_review_disables_incremental(self): + provider = self._make_provider() + provider.azure_devops_client.get_pull_request_commits = MagicMock(return_value=[]) + provider.get_issue_comments = MagicMock(return_value=[]) + + inc = IncrementalPR(True) + provider.get_incremental_commits(inc) + + assert provider.incremental.is_incremental is False + assert provider.previous_review is None + + def test_populates_commits_range_and_files(self): + provider = self._make_provider() + + review_time = _dt.datetime(2024, 6, 1, 10, 0, tzinfo=_dt.timezone.utc) + old = _raw_commit( + "old1", "first", _dt.datetime(2024, 5, 1, tzinfo=_dt.timezone.utc), parents=["p0"], + ) + new1 = _raw_commit( + "new1", "second", _dt.datetime(2024, 6, 2, tzinfo=_dt.timezone.utc), parents=["old1"], + ) + new2 = _raw_commit( + "new2", "third", _dt.datetime(2024, 6, 3, tzinfo=_dt.timezone.utc), parents=["new1"], + ) + # Azure returns newest-first. + provider.azure_devops_client.get_pull_request_commits = MagicMock( + return_value=[new2, new1, old] + ) + + prev = _comment("## PR Reviewer Guide\nbody", review_time) + provider.get_issue_comments = MagicMock(return_value=[prev]) + + changes_obj = MagicMock() + changes_obj.changes = [ + {"item": {"path": "/foo.py", "gitObjectType": "blob"}}, + {"item": {"path": "/bar.py", "gitObjectType": "blob"}}, + {"item": {"path": "/somedir", "gitObjectType": "tree"}}, + ] + provider.azure_devops_client.get_changes = MagicMock(return_value=changes_obj) + + inc = IncrementalPR(True) + provider.get_incremental_commits(inc) + + assert provider.incremental.is_incremental + assert len(provider.incremental.commits_range) == 2 + assert provider.incremental.first_new_commit.sha == "new1" + assert provider.incremental.last_seen_commit.sha == "old1" + assert provider.incremental.last_seen_commit_sha == "old1" + assert "/foo.py" in provider.unreviewed_files_map + assert "/bar.py" in provider.unreviewed_files_map + assert "/somedir" not in provider.unreviewed_files_map + assert prev.html_url == provider.get_comment_url(prev) + + def test_skips_merge_commits(self): + provider = self._make_provider() + review_time = _dt.datetime(2024, 6, 1, tzinfo=_dt.timezone.utc) + merge = _raw_commit( + "m1", "merge", _dt.datetime(2024, 6, 2, tzinfo=_dt.timezone.utc), + parents=["a", "b"], + ) + old = _raw_commit("o", "old", _dt.datetime(2024, 5, 1, tzinfo=_dt.timezone.utc)) + provider.azure_devops_client.get_pull_request_commits = MagicMock( + return_value=[merge, old] + ) + provider.get_issue_comments = MagicMock( + return_value=[_comment("## PR Reviewer Guide", review_time)] + ) + provider.azure_devops_client.get_changes = MagicMock() + + provider.get_incremental_commits(IncrementalPR(True)) + provider.azure_devops_client.get_changes.assert_not_called() + + def test_all_merge_commits_falls_back_to_full(self): + provider = self._make_provider() + review_time = _dt.datetime(2024, 6, 1, tzinfo=_dt.timezone.utc) + merge1 = _raw_commit( + "m1", "merge1", _dt.datetime(2024, 6, 2, tzinfo=_dt.timezone.utc), + parents=["a", "b"], + ) + merge2 = _raw_commit( + "m2", "merge2", _dt.datetime(2024, 6, 3, tzinfo=_dt.timezone.utc), + parents=["c", "d"], + ) + provider.azure_devops_client.get_pull_request_commits = MagicMock( + return_value=[merge2, merge1] + ) + provider.get_issue_comments = MagicMock( + return_value=[_comment("## PR Reviewer Guide", review_time)] + ) + provider.azure_devops_client.get_changes = MagicMock() + + provider.get_incremental_commits(IncrementalPR(True)) + + assert provider.incremental.is_incremental is False + provider.azure_devops_client.get_changes.assert_not_called() + + +class TestPrReviewerGuard: + def test_can_run_returns_false_when_commits_range_none(self): + from pr_agent.tools.pr_reviewer import PRReviewer + reviewer = PRReviewer.__new__(PRReviewer) + reviewer.is_auto = False + reviewer.pr_url = "u" + reviewer.incremental = IncrementalPR(True) + # incremental.commits_range stays None — provider has the method but didn't populate it. + reviewer.git_provider = MagicMock(spec=["get_incremental_commits"]) + # Should not raise NoneType len() error. + assert reviewer._can_run_incremental_review() is False