From 38f43b2cba3ead853ff95fd20d3ed2390f11dec5 Mon Sep 17 00:00:00 2001 From: jclee Date: Sun, 3 May 2026 14:36:37 +0900 Subject: [PATCH 1/2] fix(stability): close mkstemp fd, add request timeouts, use context manager - pr_agent/git_providers/utils.py: close mkstemp fd before remove (prevents fd leak under load when applying repo settings). - pr_agent/servers/bitbucket_app.py: open atlassian-connect.json with context manager; add timeout=30 on bitbucket commits API; replace bare except. - pr_agent/servers/github_polling.py: add timeout=30 on GitHub PR comments fetch (was hanging connection risk). - pr_agent/git_providers/gerrit_provider.py: add timeout=30 on patch upload POST. Identified during full-project stabilization audit. --- pr_agent/git_providers/gerrit_provider.py | 3 ++- pr_agent/git_providers/utils.py | 5 ++++- pr_agent/servers/bitbucket_app.py | 9 +++++---- pr_agent/servers/github_polling.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/pr_agent/git_providers/gerrit_provider.py b/pr_agent/git_providers/gerrit_provider.py index ced150c915..91b0faaf1d 100644 --- a/pr_agent/git_providers/gerrit_provider.py +++ b/pr_agent/git_providers/gerrit_provider.py @@ -165,7 +165,8 @@ def upload_patch(patch, path): headers={ "Content-Type": "application/json", "Authorization": f"Bearer {patch_server_token}", - } + }, + timeout=30, ) response.raise_for_status() patch_server_endpoint = patch_server_endpoint.rstrip("/") diff --git a/pr_agent/git_providers/utils.py b/pr_agent/git_providers/utils.py index 1e64b9578d..49dad2cba4 100644 --- a/pr_agent/git_providers/utils.py +++ b/pr_agent/git_providers/utils.py @@ -35,7 +35,10 @@ def apply_repo_settings(pr_url): category = 'local' try: fd, repo_settings_file = tempfile.mkstemp(suffix='.toml') - os.write(fd, repo_settings) + try: + os.write(fd, repo_settings) + finally: + os.close(fd) try: dynconf_kwargs = {'core_loaders': [], # DISABLE default loaders, otherwise will load toml files more than once. diff --git a/pr_agent/servers/bitbucket_app.py b/pr_agent/servers/bitbucket_app.py index 272332767e..ca59c4ed55 100644 --- a/pr_agent/servers/bitbucket_app.py +++ b/pr_agent/servers/bitbucket_app.py @@ -61,12 +61,13 @@ async def get_bearer_token(shared_secret: str, client_key: str): @router.get("/") async def handle_manifest(request: Request, response: Response): cur_dir = os.path.dirname(os.path.abspath(__file__)) - manifest = open(os.path.join(cur_dir, "atlassian-connect.json"), "rt").read() + with open(os.path.join(cur_dir, "atlassian-connect.json"), "rt") as f: + manifest = f.read() try: manifest = manifest.replace("app_key", get_settings().bitbucket.app_key) manifest = manifest.replace("base_url", get_settings().bitbucket.base_url) - except: - get_logger().error("Failed to replace api_key in Bitbucket manifest, trying to continue") + except Exception: + get_logger().error("Failed to replace api_key in Bitbucket manifest, trying to continue", exc_info=True) manifest_obj = json.loads(manifest) return JSONResponse(manifest_obj) @@ -101,7 +102,7 @@ async def _validate_time_from_last_commit_to_pr_update(data: dict) -> bool: 'Authorization': f'Bearer {bearer_token}', 'Accept': 'application/json' } - response = requests.get(commits_api, headers=headers) + response = requests.get(commits_api, headers=headers, timeout=30) if response.status_code != 200: get_logger().warning(f"Bitbucket commits API returned {response.status_code} for {commits_api}") return False diff --git a/pr_agent/servers/github_polling.py b/pr_agent/servers/github_polling.py index ab02339109..8b5731f3f9 100644 --- a/pr_agent/servers/github_polling.py +++ b/pr_agent/servers/github_polling.py @@ -114,7 +114,7 @@ async def is_valid_notification(notification, headers, handled_ids, session, use else: # we could not find the user tag in the latest comment. Check previous comments # get all comments in the PR requests_url = f"{pr_url}/comments".replace("pulls", "issues") - comments_response = requests.get(requests_url, headers=headers) + comments_response = requests.get(requests_url, headers=headers, timeout=30) comments = comments_response.json()[::-1] max_comment_to_scan = 4 for comment in comments[:max_comment_to_scan]: From c91353467ee13c4ef3b2928d3e3f33551ae41996 Mon Sep 17 00:00:00 2001 From: jclee Date: Sun, 3 May 2026 14:38:55 +0900 Subject: [PATCH 2/2] fix(stability): replace bare except, add timeout on rate-limit calls - pr_agent/algo/token_handler.py: TokenHandler.__init__ used vars: dict = {} as default, which is a shared mutable across instances. Switch to None sentinel + assignment inside the function. - pr_agent/algo/utils.py: get_rate_limit_status / validate_and_await_rate_limit used bare except: that swallowed all errors silently and called requests.get with no timeout. Use except Exception: + exc_info logging and timeout=10s on both rate-limit GET calls. Found during full-project stabilization audit. --- pr_agent/algo/token_handler.py | 4 +++- pr_agent/algo/utils.py | 11 ++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/pr_agent/algo/token_handler.py b/pr_agent/algo/token_handler.py index cb313f023f..3a42097c9c 100644 --- a/pr_agent/algo/token_handler.py +++ b/pr_agent/algo/token_handler.py @@ -56,7 +56,7 @@ class TokenHandler: CLAUDE_MODEL = "claude-3-7-sonnet-20250219" CLAUDE_MAX_CONTENT_SIZE = 9_000_000 # Maximum allowed content size (9MB) for Claude API - def __init__(self, pr=None, vars: dict = {}, system="", user=""): + def __init__(self, pr=None, vars: dict = None, system="", user=""): """ Initializes the TokenHandler object. @@ -66,6 +66,8 @@ def __init__(self, pr=None, vars: dict = {}, system="", user=""): - system: The system string. - user: The user string. """ + if vars is None: + vars = {} self.encoder = TokenEncoder.get_token_encoder() if pr is not None: diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py index 3e8576753f..0d44d6d679 100644 --- a/pr_agent/algo/utils.py +++ b/pr_agent/algo/utils.py @@ -1204,15 +1204,16 @@ def get_rate_limit_status(github_token) -> dict: "Authorization": f"token {github_token}" } - response = requests.get(RATE_LIMIT_URL, headers=HEADERS) + response = requests.get(RATE_LIMIT_URL, headers=HEADERS, timeout=10) try: rate_limit_info = response.json() if rate_limit_info.get('message') == 'Rate limiting is not enabled.': # for github enterprise return {'resources': {}} response.raise_for_status() # Check for HTTP errors - except: # retry + except Exception: # retry + get_logger().warning("Rate limit check failed, retrying once", exc_info=True) time.sleep(0.1) - response = requests.get(RATE_LIMIT_URL, headers=HEADERS) + response = requests.get(RATE_LIMIT_URL, headers=HEADERS, timeout=10) return response.json() return rate_limit_info @@ -1249,8 +1250,8 @@ def validate_and_await_rate_limit(github_token): time.sleep(sleep_time_sec + 1) rate_limit_status = get_rate_limit_status(github_token) return rate_limit_status - except: - get_logger().error("Error in rate limit") + except Exception: + get_logger().error("Error in rate limit", exc_info=True) return None