diff --git a/.githooks/commit-msg b/.githooks/commit-msg new file mode 100755 index 0000000..4179e46 --- /dev/null +++ b/.githooks/commit-msg @@ -0,0 +1,14 @@ +#!/bin/sh +set -eu + +MSG_FILE="$1" + +if grep -qiE '^Co-Authored-By:' "$MSG_FILE"; then + echo "ERROR: Co-Authored-By trailers are forbidden in this repository." >&2 + exit 1 +fi + +if grep -qiE '\b(claude|anthropic|openai|gpt-[0-9]|copilot|cursor|codex)\b' "$MSG_FILE"; then + echo "ERROR: AI assistant names are forbidden in commit messages for this repository." >&2 + exit 1 +fi diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..55c0691 --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,19 @@ +#!/bin/sh +set -eu + +ALLOWED_NAME="Xiangyue-Zhang" +ALLOWED_EMAIL="85532891+Xiangyue-Zhang@users.noreply.github.com" + +AUTHOR_IDENT="$(git var GIT_AUTHOR_IDENT)" +AUTHOR_NAME="$(printf '%s' "$AUTHOR_IDENT" | sed -E 's/^(.*) <.*$/\1/')" +AUTHOR_EMAIL="$(printf '%s' "$AUTHOR_IDENT" | sed -E 's/^.* <([^>]*)>.*$/\1/')" + +if [ "$AUTHOR_NAME" != "$ALLOWED_NAME" ]; then + echo "ERROR: author name must be '$ALLOWED_NAME' but is '$AUTHOR_NAME'" >&2 + exit 1 +fi + +if [ "$AUTHOR_EMAIL" != "$ALLOWED_EMAIL" ]; then + echo "ERROR: author email must be '$ALLOWED_EMAIL' but is '$AUTHOR_EMAIL'" >&2 + exit 1 +fi diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100755 index 0000000..9b88c5d --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,49 @@ +#!/bin/sh +set -eu + +ALLOWED_NAME="Xiangyue-Zhang" +ALLOWED_EMAIL="85532891+Xiangyue-Zhang@users.noreply.github.com" + +check_commit() { + sha="$1" + author_name="$(git show -s --format='%an' "$sha")" + author_email="$(git show -s --format='%ae' "$sha")" + message="$(git show -s --format='%B' "$sha")" + + if [ "$author_name" != "$ALLOWED_NAME" ]; then + echo "ERROR: commit $sha has author '$author_name', expected '$ALLOWED_NAME'" >&2 + exit 1 + fi + + if [ "$author_email" != "$ALLOWED_EMAIL" ]; then + echo "ERROR: commit $sha has email '$author_email', expected '$ALLOWED_EMAIL'" >&2 + exit 1 + fi + + if printf '%s\n' "$message" | grep -qiE '^Co-Authored-By:'; then + echo "ERROR: commit $sha contains forbidden Co-Authored-By trailer" >&2 + exit 1 + fi + + if printf '%s\n' "$message" | grep -qiE '\b(claude|anthropic|openai|gpt-[0-9]|copilot|cursor|codex)\b'; then + echo "ERROR: commit $sha contains forbidden AI assistant name in message" >&2 + exit 1 + fi +} + +while read -r local_ref local_sha remote_ref remote_sha +do + if [ "$local_sha" = "0000000000000000000000000000000000000000" ]; then + continue + fi + + if [ "$remote_sha" = "0000000000000000000000000000000000000000" ]; then + range="$local_sha" + else + range="$remote_sha..$local_sha" + fi + + for sha in $(git rev-list "$range"); do + check_commit "$sha" + done +done diff --git a/.github/workflows/contributor-guard.yml b/.github/workflows/contributor-guard.yml index 3823961..517ee42 100644 --- a/.github/workflows/contributor-guard.yml +++ b/.github/workflows/contributor-guard.yml @@ -46,7 +46,7 @@ jobs: FAIL=0 ALLOWED_NAME="Xiangyue-Zhang" - ALLOWED_EMAILS="85532891\\+Xiangyue-Zhang@users\\.noreply\\.github\\.com|Xiangyue-Zhang@users\\.noreply\\.github\\.com" + ALLOWED_EMAIL="85532891+Xiangyue-Zhang@users.noreply.github.com" for sha in $(git log --format='%H' "$RANGE" 2>/dev/null || git log --format='%H' HEAD); do AN=$(git show -s --format='%an' "$sha") @@ -63,8 +63,8 @@ jobs: fi # Check author email - if ! echo "$AE" | grep -qE "^($ALLOWED_EMAILS)$"; then - echo " ❌ author email '$AE' is not in allow-list" + if [ "$AE" != "$ALLOWED_EMAIL" ]; then + echo " ❌ author email '$AE' is not '$ALLOWED_EMAIL'" FAIL=1 fi @@ -76,8 +76,8 @@ jobs: # Check for AI assistant names in commit message if echo "$MSG" | grep -qiE '\b(claude|anthropic|openai|gpt-[0-9]|copilot|cursor|codex)\b'; then - echo " ⚠️ commit message mentions AI assistant name (allowed if intentional, but flagged)" - # warn only — do not fail, since legitimate features may mention these + echo " ❌ commit message mentions forbidden AI assistant name" + FAIL=1 fi echo "" diff --git a/README.md b/README.md index 1c8d0e2..e3f2363 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,11 @@ ## Recent Updates +**2026-04-09** +- Reduced token growth by resetting leader context between cycles. +- Added a lightweight fallback to avoid repeated no-progress loops. +- Hardened tool execution against path traversal and shell injection. + **2026-04-08** - Added progress tracking exports for experiment monitoring. - Supports optional Obsidian sync for a live dashboard plus daily notes. diff --git a/config.yaml b/config.yaml index 823c833..795fc47 100644 --- a/config.yaml +++ b/config.yaml @@ -18,6 +18,7 @@ agent: max_cycles: -1 # -1 = run forever max_steps_per_cycle: 3 # Max worker dispatches per cycle cooldown_interval: 300 # Smart cooldown polling interval (seconds) + no_progress_fallback_threshold: 3 # Back off after repeated no-progress cycles on the same plan memory: brief_max_chars: 3000 # Tier 1: PROJECT_BRIEF cap diff --git a/core/__init__.py b/core/__init__.py index b2412b3..3b51888 100644 --- a/core/__init__.py +++ b/core/__init__.py @@ -6,5 +6,5 @@ from .agents import AgentDispatcher from .tools import ToolRegistry -__version__ = "0.1.0" +__version__ = "0.1.1" __all__ = ["ResearchLoop", "MemoryManager", "ExperimentMonitor", "AgentDispatcher", "ToolRegistry"] diff --git a/core/loop.py b/core/loop.py index 585754c..7131a57 100644 --- a/core/loop.py +++ b/core/loop.py @@ -63,7 +63,10 @@ def __init__(self, config: dict, project_dir: str): self.cycle_count = self._load_cycle_counter() self.max_cycles = config.get("agent", {}).get("max_cycles", -1) self.cooldown = config.get("agent", {}).get("cooldown_interval", 300) + self.no_progress_fallback_threshold = config.get("agent", {}).get("no_progress_fallback_threshold", 3) self._running = True + self._no_progress_streak = 0 + self._last_no_progress_signature = "" # Graceful shutdown signal.signal(signal.SIGTERM, self._handle_signal) @@ -83,6 +86,9 @@ def run(self): logger.info(f"=== Cycle {self.cycle_count} ===") try: + # Keep leader context bounded to one cycle. + self.dispatcher.reset_leader_history() + # Check for human directive directive = self._consume_directive() self._update_state( @@ -96,9 +102,18 @@ def run(self): # THINK: Analyze and plan think_result = self._think(directive) + think_result = self._apply_no_progress_fallback(think_result, directive) if think_result.get("action") == "wait": logger.info("THINK decided to wait. Entering cooldown.") + self._update_state( + { + "cycle": self.cycle_count, + "status": "waiting", + "updated_at": time.time(), + "suggested_next_step": think_result.get("reason", ""), + } + ) self._smart_cooldown() continue @@ -146,6 +161,7 @@ def run(self): "last_error": "", } ) + self._record_cycle_outcome(think_result, execute_result, reflect_result) self._refresh_obsidian(reflect_result=reflect_result, directive=directive) except Exception as e: @@ -248,6 +264,69 @@ def _refresh_obsidian(self, reflect_result: dict, directive: Optional[str]): directive=directive, ) + def _plan_signature(self, plan: dict) -> str: + """Build a stable signature for repeated-plan detection.""" + normalized = { + "action": plan.get("action", ""), + "agent": plan.get("agent", ""), + "task": " ".join(plan.get("task", "").split())[:300], + "hypothesis": " ".join(plan.get("hypothesis", "").split())[:200], + } + return json.dumps(normalized, sort_keys=True, ensure_ascii=True) + + def _apply_no_progress_fallback(self, think_result: dict, directive: Optional[str]) -> dict: + """Back off if the same experiment plan keeps repeating without progress.""" + if directive or self.no_progress_fallback_threshold <= 0: + return think_result + + if think_result.get("action") != "experiment": + return think_result + + signature = self._plan_signature(think_result) + if ( + self._no_progress_streak >= self.no_progress_fallback_threshold + and signature == self._last_no_progress_signature + ): + reason = ( + f"Fallback triggered after {self._no_progress_streak} no-progress cycles on the same plan. " + "Backing off to avoid empty loops until new signal arrives." + ) + logger.warning(reason) + self.memory.log_decision(reason) + return { + "action": "wait", + "reason": reason, + "decision": reason, + } + + return think_result + + def _record_cycle_outcome(self, think_result: dict, execute_result: dict, reflect_result: dict): + """Track whether repeated cycles are producing real progress.""" + if think_result.get("action") != "experiment": + if think_result.get("action") != "wait": + self._no_progress_streak = 0 + self._last_no_progress_signature = "" + return + + signature = self._plan_signature(think_result) + made_progress = bool( + execute_result.get("experiment_launched") + or execute_result.get("final_metrics") + or reflect_result.get("milestone") + ) + + if made_progress: + self._no_progress_streak = 0 + self._last_no_progress_signature = "" + return + + if signature == self._last_no_progress_signature: + self._no_progress_streak += 1 + else: + self._last_no_progress_signature = signature + self._no_progress_streak = 1 + def _smart_cooldown(self): """Poll at short intervals instead of fixed long wait.""" logger.info(f"Smart cooldown: polling every {self.cooldown}s") diff --git a/core/tools.py b/core/tools.py index 42167eb..011534a 100644 --- a/core/tools.py +++ b/core/tools.py @@ -9,6 +9,7 @@ import subprocess import json import logging +import shlex from pathlib import Path from typing import Optional @@ -28,7 +29,7 @@ class ToolRegistry: """ def __init__(self, workspace: Path): - self.workspace = Path(workspace) + self.workspace = Path(workspace).resolve() self._protected_files = {"state.json", "MEMORY_LOG.md", "PROJECT_BRIEF.md", ".lock"} def get_tools_for(self, agent_type: str) -> list[dict]: @@ -177,17 +178,59 @@ def _tool_log_memory(self) -> dict: # --- Tool Implementations --- + def _resolve_workspace_path(self, path: str) -> Path: + """Resolve a user-supplied relative path and keep it inside the workspace.""" + if path is None or not str(path).strip(): + raise ValueError("Path cannot be empty") + + requested = Path(path) + if requested.is_absolute(): + raise ValueError("Path must be relative to workspace") + + resolved = (self.workspace / requested).resolve(strict=False) + + try: + resolved.relative_to(self.workspace) + except ValueError as exc: + raise ValueError(f"Path escapes workspace: {path}") from exc + + return resolved + + def _parse_command(self, command: str) -> list[str]: + """Parse command text into argv without invoking a shell.""" + if not command or not command.strip(): + raise ValueError("Command cannot be empty") + + try: + argv = shlex.split(command) + except ValueError as exc: + raise ValueError(f"Invalid command syntax: {exc}") from exc + + if not argv: + raise ValueError("Command cannot be empty") + + dangerous_bins = { + "rm", + "sudo", + "su", + "mkfs", + "dd", + "shutdown", + "reboot", + "poweroff", + "halt", + } + if Path(argv[0]).name in dangerous_bins: + raise ValueError(f"Blocked executable: {argv[0]}") + + return argv + def _exec_run_shell(self, command: str, timeout: int = 120) -> str: """Execute a shell command with timeout.""" - # Safety: block dangerous commands - dangerous = ["rm -rf /", "mkfs", "> /dev/sd", "dd if=/dev/zero"] - if any(d in command for d in dangerous): - return json.dumps({"error": "Blocked: dangerous command"}) - try: + argv = self._parse_command(command) result = subprocess.run( - command, - shell=True, + argv, capture_output=True, text=True, timeout=timeout, @@ -207,17 +250,17 @@ def _exec_launch_experiment(self, command: str, log_file: str, gpu: str = None) if gpu: env["CUDA_VISIBLE_DEVICES"] = gpu - log_path = self.workspace / log_file + argv = self._parse_command(command) + log_path = self._resolve_workspace_path(log_file) log_path.parent.mkdir(parents=True, exist_ok=True) with open(log_path, "w") as f: proc = subprocess.Popen( - f"nohup {command}", - shell=True, + argv, stdout=f, stderr=subprocess.STDOUT, env=env, - preexec_fn=os.setsid, + start_new_session=True, cwd=str(self.workspace), ) @@ -228,14 +271,14 @@ def _exec_write_file(self, path: str, content: str) -> str: if Path(path).name in self._protected_files: return json.dumps({"error": f"Cannot overwrite protected file: {path}"}) - file_path = self.workspace / path + file_path = self._resolve_workspace_path(path) file_path.parent.mkdir(parents=True, exist_ok=True) file_path.write_text(content) return json.dumps({"status": "written", "path": str(file_path), "bytes": len(content)}) def _exec_read_file(self, path: str) -> str: """Read file contents.""" - file_path = self.workspace / path + file_path = self._resolve_workspace_path(path) if not file_path.exists(): return json.dumps({"error": f"File not found: {path}"}) content = file_path.read_text() @@ -243,7 +286,7 @@ def _exec_read_file(self, path: str) -> str: def _exec_list_files(self, path: str = ".") -> str: """List directory contents.""" - dir_path = self.workspace / path + dir_path = self._resolve_workspace_path(path) if not dir_path.is_dir(): return json.dumps({"error": f"Not a directory: {path}"}) files = sorted([f.name for f in dir_path.iterdir()]) diff --git a/tests/test_loop_fallback.py b/tests/test_loop_fallback.py new file mode 100644 index 0000000..26ba618 --- /dev/null +++ b/tests/test_loop_fallback.py @@ -0,0 +1,59 @@ +import tempfile +import unittest +from pathlib import Path +from unittest.mock import Mock + +from core.loop import ResearchLoop + + +class ResearchLoopFallbackTests(unittest.TestCase): + def setUp(self): + self.tempdir = tempfile.TemporaryDirectory() + self.project_dir = Path(self.tempdir.name) + (self.project_dir / "PROJECT_BRIEF.md").write_text("Test brief") + self.loop = ResearchLoop( + config={ + "project": {"workspace": "workspace"}, + "agent": { + "max_cycles": 1, + "cooldown_interval": 0, + "no_progress_fallback_threshold": 2, + }, + "obsidian": {"enabled": False}, + }, + project_dir=str(self.project_dir), + ) + + def tearDown(self): + self.tempdir.cleanup() + + def test_run_resets_leader_history_each_cycle(self): + self.loop.dispatcher.reset_leader_history = Mock() + self.loop._think = lambda directive=None: {"action": "wait", "reason": "idle"} + self.loop._smart_cooldown = lambda: None + + self.loop.run() + + self.loop.dispatcher.reset_leader_history.assert_called_once() + + def test_repeated_no_progress_plan_triggers_wait_fallback(self): + plan = { + "action": "experiment", + "agent": "code", + "task": "Retry the same broken command", + "hypothesis": "It might work this time", + } + execute_result = {"experiment_launched": False} + reflect_result = {} + + self.loop._record_cycle_outcome(plan, execute_result, reflect_result) + self.loop._record_cycle_outcome(plan, execute_result, reflect_result) + + fallback = self.loop._apply_no_progress_fallback(plan, directive=None) + + self.assertEqual(fallback["action"], "wait") + self.assertIn("Fallback triggered", fallback["reason"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_tools_security.py b/tests/test_tools_security.py new file mode 100644 index 0000000..164b3ee --- /dev/null +++ b/tests/test_tools_security.py @@ -0,0 +1,76 @@ +import importlib.util +import json +import tempfile +import unittest +from pathlib import Path + + +def load_tool_registry(): + module_path = Path(__file__).resolve().parents[1] / "core" / "tools.py" + spec = importlib.util.spec_from_file_location("core.tools", module_path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + spec.loader.exec_module(module) + return module.ToolRegistry + + +ToolRegistry = load_tool_registry() + + +class ToolRegistrySecurityTests(unittest.TestCase): + def setUp(self): + self.tempdir = tempfile.TemporaryDirectory() + self.workspace = Path(self.tempdir.name) / "workspace" + self.workspace.mkdir() + self.registry = ToolRegistry(self.workspace) + + def tearDown(self): + self.tempdir.cleanup() + + def test_write_file_rejects_path_traversal(self): + result = json.loads( + self.registry.execute_tool("write_file", {"path": "../escape.txt", "content": "owned"}) + ) + self.assertIn("error", result) + self.assertIn("escapes workspace", result["error"]) + self.assertFalse((self.workspace.parent / "escape.txt").exists()) + + def test_read_file_rejects_absolute_path(self): + result = json.loads(self.registry.execute_tool("read_file", {"path": "/etc/hosts"})) + self.assertIn("error", result) + self.assertIn("relative to workspace", result["error"]) + + def test_list_files_rejects_parent_escape(self): + result = json.loads(self.registry.execute_tool("list_files", {"path": ".."})) + self.assertIn("error", result) + self.assertIn("escapes workspace", result["error"]) + + def test_run_shell_does_not_execute_shell_injection_payload(self): + result = json.loads( + self.registry.execute_tool("run_shell", {"command": "echo hello; touch injected.txt"}) + ) + self.assertEqual(result["returncode"], 0) + self.assertIn("hello; touch injected.txt", result["stdout"]) + self.assertFalse((self.workspace / "injected.txt").exists()) + + def test_run_shell_blocks_dangerous_binaries(self): + result = json.loads(self.registry.execute_tool("run_shell", {"command": "rm -rf tmp"})) + self.assertIn("error", result) + self.assertIn("Blocked executable", result["error"]) + + def test_launch_experiment_rejects_log_path_traversal(self): + result = json.loads( + self.registry.execute_tool( + "launch_experiment", + { + "command": 'python3 -c "print(\'hi\')"', + "log_file": "../outside.log", + }, + ) + ) + self.assertIn("error", result) + self.assertIn("escapes workspace", result["error"]) + + +if __name__ == "__main__": + unittest.main()