diff --git a/docs/cli.md b/docs/cli.md index 21a44c8b..bb94f30f 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -44,14 +44,27 @@ Global flags (apply to all subcommands): `--verbose, -v` (enable info-level logg - `--dry-run` - Print the planned set without calling the ADO API. - `--org / --project / --pat` - ADO context overrides (same semantics as the other lifecycle commands). - `--definition-ids ` - Explicit pipeline definition IDs (comma-separated; skips local-fixture auto-detection). + - `--all-repos` - **Project-scope mode.** Activates Preview-driven discovery and operates on every ado-aw pipeline ADO knows about in the project — direct ado-aw definitions *and* consumer pipelines that include ado-aw templates — regardless of which repo their root YAML lives in. Mutually exclusive with `--definition-ids`. Ignores local lock files for matching (uses ADO Pipeline Preview to find marker steps). + - `--source ` - **Filter by template.** Restricts to definitions whose `# ado-aw-metadata` marker references the given source path (e.g. `agents/security-scan.md`). Activates the discovery code path; pairs with `--all-repos` to scope across the whole project. Mutually exclusive with `--definition-ids`. - `secrets list [PATH]` - List variable names and their `isSecret` / `allowOverride` flags on every matched definition. **Never prints values.** - `--json` - Emit machine-readable JSON. - `--org / --project / --pat / --definition-ids` - As above. + - `--all-repos / --source ` - As for `secrets set` (project-scope discovery). - `secrets delete [PATH]` - Delete the named variable from every matched definition. No-op when the variable is absent. - `--dry-run` - Print the planned deletion plan without calling the ADO API. - `--org / --project / --pat / --definition-ids` - As above. + - `--all-repos / --source ` - As for `secrets set` (project-scope discovery). + +### Project-scope discovery (`--all-repos` / `--source`) + +`secrets set / list / delete` accept two opt-in flags that activate **Preview-driven discovery** instead of the default lexical local-fixture matching. They are the surface that solves token management for templates that get included by other pipelines. + +- **`--all-repos`** — search every definition in the ADO project. With it, you can `secrets set GITHUB_TOKEN --all-repos` from anywhere; no local checkout of the consumer pipelines is needed. +- **`--source `** — filter results to definitions whose `# ado-aw-metadata` marker references that template. Useful for fan-out: `secrets set GITHUB_TOKEN --source agents/security-scan.md` rotates the token on every consumer pipeline that includes that template. + +Both flags route through `ado-aw`'s `discover_ado_aw_pipelines` machinery, which calls ADO's Pipeline Preview API per definition and scans the expanded YAML for an `ado-aw-marker` step that every compiled pipeline now carries. `--definition-ids` remains the explicit-ID escape hatch and is mutually exclusive with these flags. `enable`, `disable`, and `remove` are **not** changed — they retain their source-scoped safety semantics. - `enable [PATH]` - Register an ADO build definition for each compiled pipeline discovered under `PATH` (or the current directory) and ensure it is `enabled`. For each fixture, matches against the existing ADO definitions by `yamlFilename` first, then by sanitized display name; creates a new definition when neither matches, flips `queueStatus` to `enabled` when an existing definition is `disabled` / `paused`, and skips when it is already `enabled`. Fail-soft per fixture; exits non-zero if any fixture failed. diff --git a/src/ado/discovery.rs b/src/ado/discovery.rs new file mode 100644 index 00000000..22531b13 --- /dev/null +++ b/src/ado/discovery.rs @@ -0,0 +1,1240 @@ +//! Preview-driven discovery of ado-aw pipelines. +//! +//! Replaces the lexical `match_definitions_in` approach (which only +//! finds definitions whose root YAML is itself an ado-aw lock file) +//! with a content-marker scan over expanded YAML returned by ADO's +//! Pipeline Preview API. Picks up consumer pipelines (definitions whose +//! root YAML is hand-written but `include`s an ado-aw template) and is +//! rename-resilient because the marker is in the YAML content rather +//! than the definition's `process.yamlFilename` field. +//! +//! ## Algorithm +//! +//! 1. List every definition in the project via `list_definitions`. +//! 2. Filter by [`DiscoveryScope`] — e.g., `CurrentRepo` matches +//! `repository.url` against the resolved git remote. +//! 3. Fast path: when a definition's `process.yamlFilename` matches one +//! of the supplied local lock files, parse the local file's +//! `# @ado-aw` header and mark the definition `Direct` without +//! spending a Preview call. +//! 4. Otherwise: POST to `/_apis/pipelines/{id}/preview` and scan the +//! response's `finalYaml` for `# ado-aw-metadata: {…}` markers +//! (see [`crate::detect::parse_marker_step`]). +//! 5. Classify per [`DiscoveryStatus`]. +//! +//! ## Empirical grounding +//! +//! The Preview API was validated against the live `msazuresphere/4x4` +//! project (definition 2434, `OS Release Readiness`) — a 56 KB +//! `finalYaml` was returned, comments inside step bodies preserved, +//! top-of-document comments stripped. The marker-step design in +//! `src/compile/extensions/ado_aw_marker.rs` solves the stripping +//! problem by embedding the marker inside a bash heredoc. + +use anyhow::{Context, Result}; +use log::{debug, warn}; +use serde::Deserialize; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use tokio::sync::Semaphore; + +use super::{ + AdoAuth, AdoContext, DefinitionSummary, MatchMethod, MatchedDefinition, list_definitions, +}; +use crate::detect::{MarkerMetadata, parse_marker_step}; + +/// Default permits used to throttle concurrent Preview HTTP calls. +/// Tunable via the `ADO_AW_PREVIEW_CONCURRENCY` environment variable so +/// users hitting ADO rate limits in large projects can dial it down +/// without a code change. +const DEFAULT_PREVIEW_CONCURRENCY: usize = 8; + +// ─── Public types ──────────────────────────────────────────────────── + +/// Which ADO definitions to consider during discovery. +#[derive(Debug, Clone)] +pub enum DiscoveryScope { + /// Only definitions whose backing repository URL matches the + /// resolved git remote of the current working directory. Default + /// for project-scope CLI commands. + CurrentRepo, + /// Every definition in the project, regardless of repository. + AllRepos, + /// A pre-resolved list of definition IDs; bypasses listing and + /// scope filtering entirely. + /// + /// **Reserved for future use.** No production callsite constructs + /// this variant today — the `--definition-ids` CLI flag is handled + /// by `crate::ado::resolve_definitions` (the legacy lexical + /// matcher), which short-circuits before discovery is invoked. The + /// variant exists so callers that want to feed an explicit ID list + /// into the discovery pipeline (e.g. for future automation that + /// has pre-filtered definitions) don't need a parallel API. + Explicit(Vec), +} + +/// Classification of a single ADO definition with respect to ado-aw. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DiscoveryStatus { + /// The definition's root YAML is itself an ado-aw lock file — + /// either via the local-fixture fast-path, or because the expanded + /// YAML's first marker step appears at the root level. + Direct, + /// The definition's root YAML is hand-written but includes one or + /// more ado-aw templates (detected by markers inside the expanded + /// YAML). + Consumer, + /// Preview returned 400, typically because the consumer's + /// `parameters:` block has required fields with no defaults. The + /// definition may still be an ado-aw consumer; we just couldn't + /// confirm without supplying parameter values. + UnknownRequiredParams, + /// Preview returned 403 — the calling identity lacks read access + /// on the definition or one of its referenced repos. + UnknownForbidden, + /// Preview returned 404 — the definition disappeared between + /// `list_definitions` and `preview_pipeline` (race with a + /// concurrent delete). Tracked as a distinct status so it can be + /// filtered out of the skip-warning counts: there's no operator + /// action to take for a definition that no longer exists. + NotFound, + /// Preview returned some other error (5xx, network failure, etc.). + PreviewFailed(String), + /// Preview succeeded but no ado-aw marker was found in the + /// expanded YAML; the definition is not an ado-aw pipeline. + NotAdoAw, +} + +/// Result of discovery for a single ADO definition. +#[derive(Debug, Clone)] +pub struct DiscoveredPipeline { + pub definition_id: u64, + pub definition_name: String, + pub repository_url: Option, + pub queue_status: Option, + pub markers: Vec, + pub status: DiscoveryStatus, +} + +// ─── Preview API client ────────────────────────────────────────────── + +/// Typed error from a `preview_pipeline` call so callers can map ADO +/// failure modes to [`DiscoveryStatus`] without re-inspecting HTTP +/// response bodies. +#[derive(Debug, Clone)] +pub enum PreviewError { + /// 400 from ADO — usually means the pipeline declares required + /// `parameters:` without defaults. + RequiredParams, + /// 403 — calling identity lacks read access. + Forbidden, + /// 404 — definition does not exist (or is hidden from the caller). + NotFound, + /// Any other failure (5xx, network, malformed JSON). + Other(String), +} + +impl std::fmt::Display for PreviewError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PreviewError::RequiredParams => write!( + f, + "preview returned 400 (required parameters without defaults)" + ), + PreviewError::Forbidden => write!(f, "preview returned 403 (forbidden)"), + PreviewError::NotFound => write!(f, "preview returned 404 (not found)"), + PreviewError::Other(msg) => write!(f, "preview failed: {msg}"), + } + } +} + +/// JSON shape of the Pipeline Preview response. Only `finalYaml` is +/// consumed; other fields (`yaml`, `id`, etc.) are intentionally +/// ignored — Preview also returns the un-rendered yaml under `yaml` +/// which is the wrong surface for marker discovery. +#[derive(Debug, Deserialize)] +struct PreviewResponse { + #[serde(rename = "finalYaml", default)] + final_yaml: Option, +} + +/// Call ADO's Pipeline Preview API and return the expanded `finalYaml`. +/// +/// Uses the `/_apis/pipelines/{id}/preview?api-version=7.1-preview.1` +/// endpoint (not the legacy build-definitions surface — Preview is the +/// only documented public way to get expanded YAML). +pub async fn preview_pipeline( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + definition_id: u64, +) -> std::result::Result { + let url = format!( + "{}/{}/_apis/pipelines/{}/preview?api-version=7.1-preview.1", + ctx.org_url.trim_end_matches('/'), + percent_encoding::utf8_percent_encode(&ctx.project, super::PATH_SEGMENT), + definition_id + ); + + let body = serde_json::json!({ + "previewRun": true, + "templateParameters": {} + }); + + debug!("POST {} (preview definition {})", url, definition_id); + + let resp = auth + .apply(client.post(&url).json(&body)) + .send() + .await + .map_err(|e| PreviewError::Other(format!("network error: {e}")))?; + + let status = resp.status(); + if status.as_u16() == 400 { + return Err(PreviewError::RequiredParams); + } + if status.as_u16() == 403 || status.as_u16() == 401 { + return Err(PreviewError::Forbidden); + } + if status.as_u16() == 404 { + return Err(PreviewError::NotFound); + } + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + return Err(PreviewError::Other(format!( + "HTTP {} from preview endpoint: {}", + status, + body.chars().take(500).collect::() + ))); + } + + let parsed: PreviewResponse = resp + .json() + .await + .map_err(|e| PreviewError::Other(format!("malformed preview JSON: {e}")))?; + + parsed + .final_yaml + .ok_or_else(|| PreviewError::Other("preview response missing finalYaml field".to_string())) +} + +// ─── Discovery driver ──────────────────────────────────────────────── + +/// Discover every ado-aw pipeline in scope. +/// +/// `local_lock_paths` enables the fast-path: definitions whose +/// `process.yamlFilename` matches one of these paths skip the Preview +/// call and are classified `Direct` by reading the local file's +/// `# @ado-aw` header (cheap, no HTTP). When `None` or empty, every +/// definition in scope is Previewed. +pub async fn discover_ado_aw_pipelines( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + scope: DiscoveryScope, + local_lock_paths: Option<&[PathBuf]>, +) -> Result> { + let definitions = list_definitions(client, ctx, auth) + .await + .context("Failed to list ADO definitions for discovery")?; + + let filtered = apply_scope_filter(definitions, &scope, &ctx.repo_url()); + + // Build a (normalized yamlFilename → local lock path) map for the + // fast-path. Path comparison uses the same normalization as + // `match_definitions_in`. + let lock_map = build_lock_path_map(local_lock_paths); + + // Resolve concurrency from env; warn (not silently clamp) when an + // operator sets `=0`, since the deadlock-avoidance `.max(1)` would + // mask the typo and leave the user wondering why throughput hasn't + // changed. + let raw_permits = std::env::var("ADO_AW_PREVIEW_CONCURRENCY") + .ok() + .and_then(|v| v.parse::().ok()); + let permits = match raw_permits { + Some(0) => { + warn!( + "ADO_AW_PREVIEW_CONCURRENCY=0 would deadlock the Preview semaphore; \ + clamping to 1. Set a positive integer to control concurrency.", + ); + 1 + } + Some(n) => n, + None => DEFAULT_PREVIEW_CONCURRENCY, + }; + let semaphore = Arc::new(Semaphore::new(permits.max(1))); + + let mut handles = Vec::with_capacity(filtered.len()); + for def in filtered { + let local_match = def + .process + .as_ref() + .and_then(|p| p.yaml_filename.as_ref()) + .and_then(|f| lock_map.get(&super::normalize_ado_yaml_path(f)).cloned()); + + let sem = Arc::clone(&semaphore); + let client = client.clone(); + let ctx = ctx.clone(); + let auth = auth.clone(); + + handles.push(tokio::spawn(async move { + let _permit = sem.acquire_owned().await.expect("semaphore not closed"); + classify_definition(&client, &ctx, &auth, def, local_match).await + })); + } + + let mut results = Vec::with_capacity(handles.len()); + for handle in handles { + match handle.await { + Ok(p) => results.push(p), + Err(e) => warn!("Discovery worker panicked: {e}"), + } + } + + Ok(results) +} + +/// Pure scope filter, factored out so it's exercised by unit tests. +fn apply_scope_filter( + definitions: Vec, + scope: &DiscoveryScope, + current_repo_url: &Option, +) -> Vec { + match scope { + DiscoveryScope::AllRepos => definitions, + DiscoveryScope::Explicit(ids) => definitions + .into_iter() + .filter(|d| ids.contains(&d.id)) + .collect(), + DiscoveryScope::CurrentRepo => { + let Some(current) = current_repo_url else { + // No git remote → nothing matches CurrentRepo. Caller + // can fall back to AllRepos or an explicit ID list. + return Vec::new(); + }; + let target = normalize_repo_url(current); + definitions + .into_iter() + .filter(|d| { + d.repository + .as_ref() + .and_then(|r| r.url.as_ref()) + .map(|u| normalize_repo_url(u) == target) + .unwrap_or(false) + }) + .collect() + } + } +} + +/// Normalize a repo URL for equality comparison. +/// +/// Two normalizations are applied so the comparison is robust to the +/// shape ADO returns: +/// +/// 1. **Percent-decode** the URL so a project named e.g. `My Project` +/// compares equal whether ADO returned `My%20Project` or the (rare +/// but legal) decoded `My Project`. Lossy decoding on invalid UTF-8 +/// keeps us forward-compatible — anything ADO can return, we can +/// compare. +/// 2. **ASCII-lowercase** because ADO is case-insensitive on org / +/// project / repo identifiers, and trim any trailing `/`. +/// +/// Without (1), the comparison would silently fail for any project +/// name containing a percent-reserved character if `ado_ctx.repo_url()` +/// emitted the encoded form and `repository.url` returned the decoded +/// form (or vice-versa). +fn normalize_repo_url(url: &str) -> String { + let decoded = percent_encoding::percent_decode_str(url) + .decode_utf8_lossy() + .into_owned(); + decoded.trim_end_matches('/').to_ascii_lowercase() +} + +/// Build a `(normalized yamlFilename → local lock path)` lookup table +/// from `--source agents/foo.lock.yml` or similar inputs. +/// +/// The key is produced by [`super::normalize_ado_yaml_path`], the same +/// helper applied to ADO's returned `process.yamlFilename` during +/// classification. Routing both sides through one function future-proofs +/// the lookup: if `normalize_ado_yaml_path` ever gains case-folding or +/// URL-decoding, the keys stay in sync without a second edit. +fn build_lock_path_map(local_lock_paths: Option<&[PathBuf]>) -> HashMap { + let mut map = HashMap::new(); + let Some(paths) = local_lock_paths else { + return map; + }; + for path in paths { + let key = super::normalize_ado_yaml_path(&path.to_string_lossy()); + map.insert(key, path.clone()); + } + map +} + +async fn classify_definition( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + def: DefinitionSummary, + local_match: Option, +) -> DiscoveredPipeline { + let repository_url = def.repository.as_ref().and_then(|r| r.url.clone()); + + // Fast path: if process.yamlFilename matched a local lock file, + // parse it directly. Avoids one HTTP round-trip per ado-aw-owned + // definition. The local file existing but not parsing falls + // through to Preview as a defensive measure. + if let Some(local_path) = local_match + && let Some(meta) = parse_local_lock(&local_path).await + { + return DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![meta], + status: DiscoveryStatus::Direct, + }; + } + + match preview_pipeline(client, ctx, auth, def.id).await { + Ok(final_yaml) => { + let markers = parse_marker_step(&final_yaml); + let status = if markers.is_empty() { + DiscoveryStatus::NotAdoAw + } else if is_direct_match(&def, &markers) { + DiscoveryStatus::Direct + } else { + DiscoveryStatus::Consumer + }; + DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers, + status, + } + } + Err(PreviewError::RequiredParams) => DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::UnknownRequiredParams, + }, + Err(PreviewError::Forbidden) => DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::UnknownForbidden, + }, + Err(PreviewError::NotFound) => { + // Definition was deleted between `list_definitions` and the + // Preview call (TOCTOU race with a concurrent delete). + // Track as a distinct status so it's excluded from the + // "Preview Failed" warning — there's no operator action to + // take for a definition that no longer exists. + debug!( + "Definition {} ({}) disappeared between list and preview (404); skipping", + def.id, def.name + ); + DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::NotFound, + } + } + Err(e) => DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::PreviewFailed(e.to_string()), + }, + } +} + +/// Heuristic: classify as `Direct` if the definition's root YAML is +/// itself an ado-aw lock file, otherwise `Consumer`. The check uses +/// the definition's `process.yamlFilename` as a proxy — if the source +/// markdown referenced by the marker has the same stem as the root +/// YAML, the definition is the direct owner. Anything else is a +/// consumer that pulls the template in via `template:` indirection. +/// +/// Returns `false` for the marker-less case — `classify_definition` +/// only routes here when at least one marker was found, but defensive +/// against future callers. +fn is_direct_match(def: &DefinitionSummary, markers: &[MarkerMetadata]) -> bool { + if markers.is_empty() { + // 0 markers means "not ado-aw at all", which is neither direct + // nor consumer. Belt-and-braces — `classify_definition` + // currently guards this, but the guard could move. + return false; + } + if markers.len() > 1 { + // A compiled ado-aw pipeline's expanded YAML carries exactly + // one marker — its own Agent-job prepare step. More than one + // means the YAML was produced by expanding a consumer that + // `template:`-includes multiple ado-aw lock files (each + // contributing its own marker step). None of those templates + // are the consumer's own root YAML, so it can't be Direct. + return false; + } + let marker = &markers[0]; + let Some(yaml_filename) = def.process.as_ref().and_then(|p| p.yaml_filename.as_ref()) else { + return false; + }; + let yaml_normalized = super::normalize_ado_yaml_path(yaml_filename); + + // Map e.g. `agents/foo.md` → `agents/foo.lock.yml` and compare to + // the definition's root YAML. Convention: `.md` compiles to + // `.lock.yml`. + // + // Equality is required — an earlier version also accepted + // `yaml_normalized.ends_with("/{stem}")` for a defensive + // tail-match, but that produced false-positives when an unrelated + // pipeline happened to live under a same-named lock file in a + // different directory (e.g. marker `agents/foo.md` + yamlFilename + // `other/agents/foo.lock.yml` would mislabel a Consumer as Direct). + // Both `marker.source` and the post-`normalize_ado_yaml_path` + // form of `yaml_filename` are repo-root-relative without a leading + // slash, so strict equality is the correct check. + // + // Non-`.md` sources are treated conservatively as `Consumer`: this + // branch is unreachable today (the compiler always emits `.md` + // source paths) but stays defensive against future extensions that + // allow `.yaml` / `.json` / etc. agent sources. Returning `false` + // here means the definition will be classified as `Consumer` + // rather than `Direct`, which is the safe default — a write + // command still acts on it, just labelled differently in the + // summary. + let Some(stem) = marker + .source + .strip_suffix(".md") + .map(|s| format!("{s}.lock.yml")) + else { + return false; + }; + yaml_normalized == stem +} + +/// Decide whether a marker's `(org, repo)` identifies the same +/// repository as the discovery context. Empty marker fields (legacy +/// markers produced before the org/repo embed landed, or markers from +/// non-ADO compile environments) are treated as wildcards so existing +/// deployments are not silently excluded. Once those lock files are +/// recompiled, the match becomes strict. +fn marker_origin_matches( + marker: &MarkerMetadata, + current_org_lc: &str, + current_repo_lc: &str, +) -> bool { + if marker.org.is_empty() && marker.repo.is_empty() { + return true; + } + // Marker fields are already lower-cased at emit time. Be defensive + // anyway — round-tripping through serde_json doesn't change case + // but a hand-edited fixture or future producer might. + marker.org.eq_ignore_ascii_case(current_org_lc) + && marker.repo.eq_ignore_ascii_case(current_repo_lc) +} + +async fn parse_local_lock(path: &Path) -> Option { + let content = tokio::fs::read_to_string(path).await.ok()?; + // Two surfaces, in order of preference: + // (1) the structural marker step — survives Preview expansion and + // is identical to what the Preview path would parse; + // (2) the legacy top-of-file `# @ado-aw` header — kept for + // backward compat with pre-marker-extension lock files. + if let Some(meta) = parse_marker_step(&content).into_iter().next() { + return Some(meta); + } + // Fall back to the legacy header. + for line in content.lines().take(5) { + if let Some(h) = crate::detect::parse_header_line(line) { + return Some(MarkerMetadata { + schema: 0, + source: h.source, + org: String::new(), + repo: String::new(), + version: h.version, + target: String::new(), + }); + } + } + None +} + +// ─── Adapters into the existing CLI types ──────────────────────────── + +/// Convert a [`DiscoveredPipeline`] into a [`MatchedDefinition`], the +/// shape used by every CLI command (`list`, `secrets set/list/delete`, +/// etc.). This keeps the rest of the codebase unchanged when commands +/// opt into discovery via `--all-repos` / `--source`. +/// +/// Returns `None` for any classification that isn't safely actionable +/// by a write command. In particular `UnknownRequiredParams`, +/// `UnknownForbidden`, and `PreviewFailed` are dropped because we +/// have no markers to attach (so we can't even tell the user which +/// template a write would affect); `NotAdoAw` is dropped because it +/// isn't ado-aw at all. Callers that want a richer summary (e.g. a +/// future `list --all-repos`) should inspect `DiscoveredPipeline` +/// directly rather than going through this adapter. +pub fn discovered_to_matched(d: &DiscoveredPipeline) -> Option { + match d.status { + DiscoveryStatus::Direct | DiscoveryStatus::Consumer => {} + DiscoveryStatus::NotAdoAw + | DiscoveryStatus::NotFound + | DiscoveryStatus::UnknownForbidden + | DiscoveryStatus::UnknownRequiredParams + | DiscoveryStatus::PreviewFailed(_) => return None, + } + + // Join every marker's source path so consumers that include + // multiple templates show up honestly in the CLI summary instead + // of silently truncating to whichever marker happened to be + // first. Also apply the canonical pipeline-command neutraliser: + // the `yaml_path` ends up in `print_matched_summary` (which writes + // to stdout), and if `ado-aw secrets set --all-repos` is ever + // invoked from inside an ADO pipeline step, an attacker-controlled + // marker source path containing `##vso[` would otherwise be + // processed by the agent's logging-command scanner. Reusing the + // shared helper keeps this in sync with every other sanitisation + // surface (front matter, safe outputs, agent stats). + let yaml_path = if d.markers.is_empty() { + String::new() + } else { + let joined = d + .markers + .iter() + .map(|m| m.source.as_str()) + .collect::>() + .join(", "); + crate::sanitize::neutralize_pipeline_commands(&joined) + }; + + Some(MatchedDefinition { + id: d.definition_id, + name: d.definition_name.clone(), + match_method: MatchMethod::Discovery, + yaml_path, + queue_status: d.queue_status.clone(), + }) +} + +/// CLI-facing wrapper: run Preview-driven discovery with the given +/// scope, optionally filter to consumers of a single template source, +/// and return the result as `Vec`. +/// +/// `source_filter` filters discovery results so only definitions whose +/// markers reference that source path are kept. Match is by exact +/// equality on the normalized source string in the marker JSON. +/// Normalisation is applied to the user-supplied value too +/// ([`crate::compile::normalize_source_path`] — forward-slash separators, +/// CR/LF stripping, leading `./` collapsed) so the common variants +/// (`./agents/foo.md`, `agents\foo.md` on Windows) match. Matching is +/// **case-sensitive** even on Windows; pass the path in the same case +/// it was compiled with. +/// +/// Source-only matching is ambiguous when two repos in the same ADO +/// project happen to define a file of the same name (e.g. both have +/// `agents/foo.md`). To disambiguate, the marker carries the ADO +/// `org` and `repo` of the compiling repository (lower-cased). When +/// `source_filter` is active, the marker's `(org, repo)` must also +/// equal `ctx`'s — i.e. the operator gets only consumers whose +/// template originated in the **current repo**. Markers with empty +/// `org` / `repo` (legacy or non-ADO compilers) match leniently so +/// pre-existing deployments are not silently excluded; once everything +/// is recompiled with this version, the match becomes strict. +/// +/// Skip-summary warnings are emitted differently depending on whether +/// `source_filter` is active: +/// +/// - **Unfiltered (`--all-repos` alone)**: every `UnknownRequiredParams` +/// / `UnknownForbidden` / `PreviewFailed` definition is counted — +/// under `--all-repos` the user is operating on every ado-aw pipeline +/// in scope, so each failure represents a real skip. +/// +/// - **Filtered (`--source `)**: we can't tell whether a failed +/// definition would have been a consumer of `path` because we never +/// got markers out of it. Emitting per-status counts would mislead +/// the user into thinking they're missing consumers of their +/// template. Instead, emit a single conservative warning ("N +/// definitions could not be inspected; consumers of `` among +/// them may have been silently skipped") so the operator is informed +/// without being told false specifics. +pub async fn resolve_definitions_via_discovery( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + scope: DiscoveryScope, + local_lock_paths: Option<&[PathBuf]>, + source_filter: Option<&str>, +) -> Result> { + let discovered = discover_ado_aw_pipelines(client, ctx, auth, scope, local_lock_paths).await?; + + // Normalize the user-supplied `--source` value through the same + // canonical form the compiler uses for the marker JSON's `source` + // field. Without this, `--source ./agents/foo.md` or + // `--source agents\foo.md` (Windows) silently matches nothing + // because the marker stores `agents/foo.md`. + let normalized_filter: Option = + source_filter.map(|s| crate::compile::normalize_source_path(Path::new(s))); + + // Origin scoping: when filtering by `--source`, also require the + // marker's (org, repo) to identify the current repository. This + // disambiguates the source field when two repos in the same + // project define files of the same name. Lower-cased to align with + // the marker's lower-casing at emit time (ADO identifiers are + // case-insensitive). Markers with empty fields (legacy / non-ADO + // compiles) match leniently so already-deployed pipelines remain + // discoverable until they are recompiled. + let current_org = ctx + .org_name() + .map(|s| s.to_ascii_lowercase()) + .unwrap_or_default(); + let current_repo = ctx.repo_name.to_ascii_lowercase(); + + // Pass 1: classify each discovered definition into "keep / skip + // silently / skip with reason". The previous shape stuffed all of + // this into a side-effecting `.filter()` closure that mutated + // counters while deciding inclusion — explicit two-pass form keeps + // the counts honestly derived from the same iteration and makes it + // obvious what ends up in the returned vec. + // + // The per-status counters (`uninspectable_required_params` / + // `_forbidden` / `_failed`) tally Preview failures by reason. They + // intentionally do NOT distinguish "ado-aw consumer" from + // "unrelated pipeline" — Preview failed for these, so we have no + // markers to tell which is which. A non-ado-aw project may have + // hundreds of definitions that legitimately require + // templateParameters; we can't claim any of them were ado-aw + // consumers without inspecting them, so the warning text below is + // written to be honest about that uncertainty. + let mut uninspectable_required_params = 0usize; + let mut uninspectable_forbidden = 0usize; + let mut uninspectable_failed = 0usize; + let mut selected: Vec = Vec::with_capacity(discovered.len()); + + for d in discovered { + let matches_filter = match normalized_filter.as_deref() { + Some(src) => d + .markers + .iter() + .any(|m| m.source == src && marker_origin_matches(m, ¤t_org, ¤t_repo)), + None => true, + }; + + // Tally uninspectable statuses *before* the actionability + // guard below so the count is accurate regardless of whether + // the definition makes it into `selected`. + match d.status { + DiscoveryStatus::UnknownRequiredParams => uninspectable_required_params += 1, + DiscoveryStatus::UnknownForbidden => uninspectable_forbidden += 1, + DiscoveryStatus::PreviewFailed(_) => uninspectable_failed += 1, + _ => {} + } + + // Drop non-actionable statuses up-front instead of letting + // them ride along in `selected` only to be filtered out by + // `discovered_to_matched` at the end. In an `--all-repos` run + // against a large project where most definitions are + // `NotAdoAw` or `PreviewFailed`, this keeps the intermediate + // vec tight and makes the filter pass's intent obvious. + let actionable = matches!( + d.status, + DiscoveryStatus::Direct | DiscoveryStatus::Consumer + ); + + if matches_filter && actionable { + selected.push(d); + } + } + + let uninspectable = + uninspectable_required_params + uninspectable_forbidden + uninspectable_failed; + + // Pass 2: emit a single warning that's honest about uncertainty, + // and surface the per-status breakdown at debug level for + // operators who want to know whether the misses were + // permission-related or template-parameter-related. + // + // Previously we emitted three separate warn-level messages keyed + // on the per-status counts (e.g. "Discovery skipped N definitions + // whose Pipeline Preview requires templateParameters") — but in + // `--all-repos` mode that's misleading: a project with hundreds of + // non-ado-aw pipelines that legitimately require parameters would + // make the operator think they'd missed N ado-aw consumers, when + // none of them were ado-aw in the first place. We can't tell + // which is which without successful Preview output. + if uninspectable > 0 { + match normalized_filter.as_deref() { + Some(src) => warn!( + "Discovery could not inspect {uninspectable} definition(s) (Preview failure, \ + forbidden, or required-parameters); any consumers of `{src}` among them have \ + been silently skipped. Re-run with --debug for per-definition reasons.", + ), + None => warn!( + "Discovery could not inspect {uninspectable} definition(s) (Preview failure, \ + forbidden, or required-parameters); any ado-aw pipelines among them have been \ + silently skipped. Re-run with --debug for per-definition reasons.", + ), + } + debug!( + "Uninspectable breakdown: {uninspectable_required_params} required-parameters, \ + {uninspectable_forbidden} forbidden, {uninspectable_failed} other Preview errors.", + ); + } + + Ok(selected.iter().filter_map(discovered_to_matched).collect()) +} + +// AdoContext helper: derive the resolved git remote URL for +// `CurrentRepo` scoping. Lives here (rather than on `AdoContext`) +// because the context only stores org+project+repo_name today; +// reconstructing the URL is a local detail of discovery. +// +// Percent-encodes `project` and `repo_name` to match the form ADO +// returns in `repository.url` — without this, projects whose names +// contain spaces or other reserved chars would silently match nothing +// because the lowercase comparison can't reconcile e.g. `my project` +// with `my%20project`. +impl AdoContext { + fn repo_url(&self) -> Option { + if self.repo_name.is_empty() { + return None; + } + Some(format!( + "{}/{}/_git/{}", + self.org_url.trim_end_matches('/'), + percent_encoding::utf8_percent_encode(&self.project, super::PATH_SEGMENT), + percent_encoding::utf8_percent_encode(&self.repo_name, super::PATH_SEGMENT), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ado::{ProcessInfo, Repository}; + + fn def_with( + id: u64, + name: &str, + yaml_filename: Option<&str>, + repo_url: Option<&str>, + ) -> DefinitionSummary { + DefinitionSummary { + id, + name: name.to_string(), + process: yaml_filename.map(|f| ProcessInfo { + yaml_filename: Some(f.to_string()), + }), + queue_status: None, + path: None, + repository: repo_url.map(|u| Repository { + url: Some(u.to_string()), + name: None, + repo_type: None, + id: None, + }), + revision: None, + } + } + + // ── apply_scope_filter ─────────────────────────────────────────── + + #[test] + fn scope_all_repos_returns_everything() { + let defs = vec![ + def_with(1, "a", None, Some("https://dev.azure.com/o/p/_git/a")), + def_with(2, "b", None, Some("https://dev.azure.com/o/p/_git/b")), + ]; + let kept = apply_scope_filter(defs, &DiscoveryScope::AllRepos, &None); + assert_eq!(kept.len(), 2); + } + + #[test] + fn scope_explicit_filters_by_id() { + let defs = vec![ + def_with(1, "a", None, None), + def_with(2, "b", None, None), + def_with(3, "c", None, None), + ]; + let kept = apply_scope_filter(defs, &DiscoveryScope::Explicit(vec![1, 3]), &None); + assert_eq!(kept.iter().map(|d| d.id).collect::>(), vec![1, 3]); + } + + #[test] + fn scope_current_repo_matches_normalized_url() { + let defs = vec![ + def_with(1, "a", None, Some("https://dev.azure.com/Org/P/_git/Repo")), + def_with(2, "b", None, Some("https://dev.azure.com/Org/P/_git/Other")), + ]; + let current = Some("https://dev.azure.com/org/p/_git/repo/".to_string()); + let kept = apply_scope_filter(defs, &DiscoveryScope::CurrentRepo, ¤t); + assert_eq!(kept.len(), 1); + assert_eq!(kept[0].id, 1); + } + + #[test] + fn scope_current_repo_with_no_remote_returns_empty() { + let defs = vec![def_with( + 1, + "a", + None, + Some("https://dev.azure.com/o/p/_git/x"), + )]; + let kept = apply_scope_filter(defs, &DiscoveryScope::CurrentRepo, &None); + assert!(kept.is_empty()); + } + + #[test] + fn scope_current_repo_excludes_definitions_without_repository() { + let defs = vec![ + def_with(1, "a", None, None), + def_with(2, "b", None, Some("https://dev.azure.com/o/p/_git/x")), + ]; + let current = Some("https://dev.azure.com/o/p/_git/x".to_string()); + let kept = apply_scope_filter(defs, &DiscoveryScope::CurrentRepo, ¤t); + assert_eq!(kept.len(), 1); + assert_eq!(kept[0].id, 2); + } + + // ── is_direct_match ────────────────────────────────────────────── + + #[test] + fn direct_when_yaml_filename_matches_marker_stem() { + let def = def_with(1, "a", Some("/agents/foo.lock.yml"), None); + let markers = vec![MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "standalone".to_string(), + ..Default::default() + }]; + assert!(is_direct_match(&def, &markers)); + } + + #[test] + fn direct_when_yaml_filename_has_leading_slash() { + // ADO sometimes returns yamlFilename with a leading slash. The + // `normalize_ado_yaml_path` helper strips it, so equality with + // the derived `.lock.yml` still holds. + let def = def_with(1, "a", Some("/agents/foo.lock.yml"), None); + let markers = vec![MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "standalone".to_string(), + ..Default::default() + }]; + assert!(is_direct_match(&def, &markers)); + } + + #[test] + fn consumer_when_same_stem_in_different_directory() { + // Regression: previously `yaml_normalized.ends_with("/{stem}")` + // would mislabel a Consumer pipeline as Direct whenever a + // same-named lock file lived under any unrelated prefix + // (e.g. marker `agents/foo.md` + yamlFilename + // `other/agents/foo.lock.yml`). The fix requires strict + // equality after normalisation. + let def = def_with(1, "a", Some("other/agents/foo.lock.yml"), None); + let markers = vec![MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "standalone".to_string(), + ..Default::default() + }]; + assert!(!is_direct_match(&def, &markers)); + } + + #[test] + fn consumer_when_yaml_filename_does_not_match_marker() { + let def = def_with(1, "a", Some("/release-readiness.yml"), None); + let markers = vec![MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "stage".to_string(), + ..Default::default() + }]; + assert!(!is_direct_match(&def, &markers)); + } + + #[test] + fn consumer_when_multiple_markers_present() { + let def = def_with(1, "a", Some("/agents/foo.lock.yml"), None); + let markers = vec![ + MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "stage".to_string(), + ..Default::default() + }, + MarkerMetadata { + schema: 1, + source: "agents/bar.md".to_string(), + version: "0.30.0".to_string(), + target: "job".to_string(), + ..Default::default() + }, + ]; + // Multiple markers = at least one template is being included + // alongside something else; not a single direct ownership. + assert!(!is_direct_match(&def, &markers)); + } + + // ── build_lock_path_map ────────────────────────────────────────── + + #[test] + fn lock_map_normalizes_paths() { + let paths = vec![ + PathBuf::from("agents\\foo.lock.yml"), + PathBuf::from("/agents/bar.lock.yml"), + ]; + let map = build_lock_path_map(Some(&paths)); + assert!(map.contains_key("agents/foo.lock.yml")); + assert!(map.contains_key("agents/bar.lock.yml")); + } + + #[test] + fn lock_map_empty_for_none() { + assert!(build_lock_path_map(None).is_empty()); + } + + // ── PreviewError ───────────────────────────────────────────────── + + #[test] + fn preview_error_display_is_actionable() { + assert!( + PreviewError::RequiredParams + .to_string() + .contains("required parameters") + ); + assert!(PreviewError::Forbidden.to_string().contains("403")); + assert!(PreviewError::NotFound.to_string().contains("404")); + } + + // ── source_filter normalization ────────────────────────────────── + + #[test] + fn source_filter_normalization_matches_marker_form() { + // The marker stores normalized form (`agents/foo.md`). Verify + // that the same normalization applied to user input produces + // matchable strings for the common variants. + use crate::compile::normalize_source_path; + use std::path::Path; + + let canonical = normalize_source_path(Path::new("agents/foo.md")); + assert_eq!(canonical, "agents/foo.md"); + + // Leading `./` is stripped. + assert_eq!( + normalize_source_path(Path::new("./agents/foo.md")), + canonical + ); + + // Backslashes are normalized to forward slashes. + assert_eq!( + normalize_source_path(Path::new(r"agents\foo.md")), + canonical + ); + } + + // ── marker_origin_matches ──────────────────────────────────────── + + #[test] + fn origin_matches_strict_when_marker_has_org_and_repo() { + let marker = MarkerMetadata { + org: "myorg".to_string(), + repo: "templates-a".to_string(), + source: "agents/foo.md".to_string(), + ..Default::default() + }; + assert!(marker_origin_matches(&marker, "myorg", "templates-a")); + assert!(!marker_origin_matches(&marker, "myorg", "templates-b")); + assert!(!marker_origin_matches(&marker, "otherorg", "templates-a")); + } + + #[test] + fn origin_matches_case_insensitively() { + // ADO identifiers are case-insensitive. Marker fields are + // lower-cased at emit time, but a fixture or hand-edited + // marker might carry uppercase — accept either. + let marker = MarkerMetadata { + org: "MyOrg".to_string(), + repo: "Templates-A".to_string(), + source: "agents/foo.md".to_string(), + ..Default::default() + }; + assert!(marker_origin_matches(&marker, "myorg", "templates-a")); + } + + #[test] + fn origin_matches_leniently_when_marker_org_repo_empty() { + // Legacy markers (pre-org/repo embed) and markers compiled + // outside an ADO checkout carry empty org/repo. Match anything + // so existing deployments keep working until recompiled. + let marker = MarkerMetadata { + source: "agents/foo.md".to_string(), + ..Default::default() + }; + assert!(marker_origin_matches(&marker, "myorg", "templates-a")); + assert!(marker_origin_matches(&marker, "", "")); + } + + #[test] + fn origin_matches_strictly_when_only_one_field_empty() { + // If only one half of (org, repo) is set, we treat the marker + // as non-legacy and require both to match. Pre-empts a + // malformed fixture passing through the lenient path. + let half_marker = MarkerMetadata { + org: "myorg".to_string(), + repo: String::new(), + source: "agents/foo.md".to_string(), + ..Default::default() + }; + assert!(!marker_origin_matches(&half_marker, "myorg", "templates-a")); + } + + // ── discovered_to_matched ──────────────────────────────────────── + + fn discovered(status: DiscoveryStatus) -> DiscoveredPipeline { + DiscoveredPipeline { + definition_id: 42, + definition_name: "test".to_string(), + repository_url: None, + queue_status: None, + markers: vec![], + status, + } + } + + #[test] + fn discovered_to_matched_drops_not_found() { + // 404 from Preview (definition deleted in flight) must not + // surface as a matched definition that a write command would + // act on — there's nothing to act on. + assert!(discovered_to_matched(&discovered(DiscoveryStatus::NotFound)).is_none()); + } + + #[test] + fn discovered_to_matched_drops_unactionable_statuses() { + for status in [ + DiscoveryStatus::NotAdoAw, + DiscoveryStatus::NotFound, + DiscoveryStatus::UnknownForbidden, + DiscoveryStatus::UnknownRequiredParams, + DiscoveryStatus::PreviewFailed("boom".to_string()), + ] { + assert!( + discovered_to_matched(&discovered(status.clone())).is_none(), + "expected {status:?} to map to None" + ); + } + } + + #[test] + fn discovered_to_matched_keeps_direct_and_consumer() { + assert!(discovered_to_matched(&discovered(DiscoveryStatus::Direct)).is_some()); + assert!(discovered_to_matched(&discovered(DiscoveryStatus::Consumer)).is_some()); + } + + #[test] + fn discovered_to_matched_joins_multiple_marker_sources() { + // A consumer that includes two templates must surface both + // sources in the yaml_path summary, not silently truncate to + // whichever happened to be first. + let mut d = discovered(DiscoveryStatus::Consumer); + d.markers = vec![ + MarkerMetadata { + schema: 1, + source: "agents/a.md".to_string(), + version: "1.0".to_string(), + target: "job".to_string(), + ..Default::default() + }, + MarkerMetadata { + schema: 1, + source: "agents/b.md".to_string(), + version: "1.0".to_string(), + target: "stage".to_string(), + ..Default::default() + }, + ]; + let matched = discovered_to_matched(&d).expect("Consumer kept"); + assert!( + matched.yaml_path.contains("agents/a.md") && matched.yaml_path.contains("agents/b.md"), + "expected both marker sources in yaml_path, got: {}", + matched.yaml_path + ); + } + + #[test] + fn discovered_to_matched_sanitises_vso_in_yaml_path() { + // The yaml_path ends up in stdout via print_matched_summary. + // If the CLI is invoked from inside an ADO pipeline step, an + // attacker-controlled marker source path containing `##vso[` + // would otherwise be processed by the agent's logging-command + // scanner. + // + // The canonical `neutralize_pipeline_commands` wraps the + // prefix in backticks (`` `##vso[` ``) — the literal `##vso[` + // token no longer matches the agent's scanner. The canonical + // helper's own behaviour is exhaustively tested in + // `src/sanitize.rs`; this test is just the integration point. + let mut d = discovered(DiscoveryStatus::Consumer); + d.markers = vec![MarkerMetadata { + schema: 1, + source: "agents/##vso[task.setvariable variable=X]value.md".to_string(), + version: "1.0".to_string(), + target: "job".to_string(), + ..Default::default() + }]; + let matched = discovered_to_matched(&d).expect("Consumer kept"); + assert!( + !matched.yaml_path.contains("agents/##vso["), + "raw ##vso[ leaked into yaml_path: {}", + matched.yaml_path, + ); + assert!( + matched.yaml_path.contains("`##vso[`"), + "expected `##vso[` neutralised via canonical backtick-wrap: {}", + matched.yaml_path, + ); + } + + // ── normalize_repo_url ─────────────────────────────────────────── + + #[test] + fn normalize_repo_url_is_encoding_independent() { + // ADO usually returns percent-encoded URLs (`My%20Project`), + // but the comparison must work whichever shape both sides + // happen to be in. + let encoded = "https://dev.azure.com/Org/My%20Project/_git/Repo"; + let decoded = "https://dev.azure.com/Org/My Project/_git/Repo"; + assert_eq!(normalize_repo_url(encoded), normalize_repo_url(decoded)); + } + + #[test] + fn normalize_repo_url_is_case_insensitive_and_trims_trailing_slash() { + assert_eq!( + normalize_repo_url("https://dev.azure.com/Org/P/_git/Repo/"), + normalize_repo_url("https://dev.azure.com/org/p/_git/repo") + ); + } +} diff --git a/src/ado/mod.rs b/src/ado/mod.rs index 5f3bd6de..f000a5d4 100644 --- a/src/ado/mod.rs +++ b/src/ado/mod.rs @@ -16,6 +16,8 @@ use std::path::Path; use crate::detect; +pub mod discovery; + /// ADO resource ID for minting ADO-scoped tokens via Azure CLI. const ADO_RESOURCE_ID: &str = "499b84ac-1321-427f-aa17-267ca6975798"; @@ -77,6 +79,17 @@ pub struct AdoContext { pub repo_name: String, } +impl AdoContext { + /// Extract just the org slug from `org_url` (e.g. + /// `https://dev.azure.com/MyOrg/` → `Some("MyOrg")`). Mirrors the + /// inline parse in `CompileContext::ado_org`; lives here so + /// non-compile callers (Preview-driven discovery) can reuse it. + pub fn org_name(&self) -> Option<&str> { + let org = self.org_url.trim_end_matches('/').rsplit('/').next()?; + if org.is_empty() { None } else { Some(org) } + } +} + /// Parse the ADO org, project, and repo from a git remote URL. /// /// Supports: @@ -209,6 +222,38 @@ pub struct DefinitionSummary { /// `includeAllProperties=true`. May be absent on older API versions. #[serde(default)] pub path: Option, + /// Backing git repository (URL, name, type, id). Populated by ADO's + /// list endpoint without any extra query parameters. Used by + /// project-scope discovery to filter definitions by the current + /// git remote (`DiscoveryScope::CurrentRepo`). + #[serde(default)] + pub repository: Option, + /// Monotonic revision counter ADO bumps on every definition edit. + /// Deserialised here so a future Preview-driven discovery cache + /// can key on `(definition_id, revision)`. **No caching is + /// implemented yet** — see the discovery module for the current + /// in-process behaviour. Track in a follow-up before depending on + /// this for staleness checks. + #[serde(default)] + pub revision: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Repository { + /// Browse URL of the backing repo (e.g. + /// `https://dev.azure.com/{org}/{project}/_git/{repo}`). Used for + /// `DiscoveryScope::CurrentRepo` filtering. + #[serde(default)] + pub url: Option, + /// Human-readable repo name. + #[serde(default)] + pub name: Option, + /// Repository provider (e.g. `"TfsGit"`, `"GitHub"`). + #[serde(rename = "type", default)] + pub repo_type: Option, + /// Backing repository ID (GUID for TfsGit, owner/repo for GitHub). + #[serde(default)] + pub id: Option, } #[derive(Debug, Deserialize)] @@ -223,6 +268,11 @@ pub enum MatchMethod { YamlPath, PipelineName, Explicit, + /// Found via Preview-driven discovery (Workstream P). Uniquely + /// identifies definitions that ADO knows about but where the + /// caller has no corresponding local lock file — i.e. consumer + /// pipelines and ado-aw definitions in other repos. + Discovery, } impl std::fmt::Display for MatchMethod { @@ -231,6 +281,7 @@ impl std::fmt::Display for MatchMethod { MatchMethod::YamlPath => write!(f, "yaml-path"), MatchMethod::PipelineName => write!(f, "pipeline-name"), MatchMethod::Explicit => write!(f, "explicit"), + MatchMethod::Discovery => write!(f, "discovery"), } } } @@ -1366,6 +1417,8 @@ mod tests { process: None, queue_status: None, path: None, + repository: None, + revision: None, } } @@ -1378,6 +1431,8 @@ mod tests { }), queue_status: None, path: None, + repository: None, + revision: None, } } diff --git a/src/compile/common.rs b/src/compile/common.rs index e5078235..016cae8f 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -1220,14 +1220,31 @@ pub const HEADER_MARKER: &str = "# @ado-aw"; /// and auto-discovery recompile work regardless of how the user invoked the /// compiler (relative path, absolute path, etc.). Path separators are /// normalized to forward slashes for cross-platform consistency. -pub fn generate_header_comment(input_path: &std::path::Path) -> String { - let version = env!("CARGO_PKG_VERSION"); - - // If the caller supplied an absolute path (e.g. `ado-aw compile - // /repo/agents/foo.md`), make it relative to the current working directory - // so that `--source agents/foo.md` filters can match it. When the path is - // not under the CWD (unusual), fall back to the original path rather than - // silently producing a wrong value. +/// +/// Normalise a source markdown path into the canonical form embedded +/// in the `# ado-aw-metadata:` JSON marker and the `# @ado-aw` YAML +/// header comment. +/// +/// Applies forward-slash separator normalisation, strips CR/LF, and +/// collapses any leading `./`. Does **not** escape `"` — JSON encoding +/// is the caller's job (`serde_json::json!` handles it for the marker; +/// [`generate_header_comment`] escapes for its YAML comment surface). +/// Previously this also escaped `"`, but that produced a literal `\"` +/// inside the marker JSON (serde_json then escaped the leading +/// backslash on top, storing a spurious extra backslash on every +/// round-trip). The header comment now applies its own escape inline. +/// +/// Shared between [`generate_header_comment`], the always-on +/// `ado-aw-marker` compiler extension, and the `--source` filter in +/// Preview-driven discovery so all surfaces agree on the canonical +/// form of a source path. +/// +/// Absolute inputs (e.g. `ado-aw compile /repo/agents/foo.md`) are +/// converted to a path relative to the current working directory so that +/// `--source agents/foo.md` filters can match them. When the absolute path +/// is not under the CWD (unusual), falls back to the original input rather +/// than silently producing a wrong value. +pub fn normalize_source_path(input_path: &std::path::Path) -> String { let relative: std::borrow::Cow = if input_path.is_absolute() { std::env::current_dir() .ok() @@ -1241,8 +1258,7 @@ pub fn generate_header_comment(input_path: &std::path::Path) -> String { let mut source_path = relative .to_string_lossy() .replace('\\', "/") - .replace(['\n', '\r'], "") - .replace('"', "\\\""); + .replace(['\n', '\r'], ""); // Strip redundant leading "./" prefixes to prevent accumulation when // compile_all_pipelines re-joins paths through Path::new(".").join(source). @@ -1250,6 +1266,17 @@ pub fn generate_header_comment(input_path: &std::path::Path) -> String { source_path = source_path[2..].to_string(); } + source_path +} + +pub fn generate_header_comment(input_path: &std::path::Path) -> String { + let version = env!("CARGO_PKG_VERSION"); + // The header comment embeds the source inside double quotes + // (`source="…"`); escape `"` so legacy `parse_header_line` consumers + // can still recover the original. The JSON marker does not need + // this — serde_json escapes JSON-meaningful chars on its own. + let source_path = normalize_source_path(input_path).replace('"', "\\\""); + format!( "# This file is auto-generated by ado-aw. Do not edit manually.\n\ # @ado-aw source=\"{}\" version={}\n", @@ -2194,12 +2221,13 @@ pub fn generate_teardown_job( pub fn generate_prepare_steps( prepare_steps: &[serde_yaml::Value], extensions: &[super::extensions::Extension], + ctx: &CompileContext, ) -> Result { let mut parts = Vec::new(); // Extension prepare steps and prompt supplements (runtimes + first-party tools) for ext in extensions { - for step in ext.prepare_steps() { + for step in ext.prepare_steps(ctx) { parts.push(step); } if let Some(prompt) = ext.prompt_supplement() { @@ -3021,7 +3049,7 @@ pub async fn compile_shared( .is_some_and(|cm| cm.is_enabled()); let parameters = build_parameters(&front_matter.parameters, has_memory); let parameters_yaml = generate_parameters(¶meters)?; - let prepare_steps = generate_prepare_steps(&front_matter.steps, extensions)?; + let prepare_steps = generate_prepare_steps(&front_matter.steps, extensions, ctx)?; let finalize_steps = generate_finalize_steps(&front_matter.post_steps); let pr_expression = pr_filters.and_then(|f| f.expression.as_deref()); let pipeline_expression = pipeline_filters.and_then(|f| f.expression.as_deref()); @@ -3237,8 +3265,7 @@ pub async fn compile_template_target( let extensions = super::extensions::collect_extensions(front_matter); // Build compile context for MCPG config generation - let input_dir = input_path.parent().unwrap_or(Path::new(".")); - let ctx = CompileContext::new(front_matter, input_dir).await?; + let ctx = CompileContext::new(front_matter, input_path).await?; // Generate stage prefix for job-name uniqueness and template parameters let stage_prefix = generate_stage_prefix(&front_matter.name); @@ -5982,7 +6009,8 @@ safe-outputs: "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); + let result = generate_prepare_steps(&[], &exts, &ctx).unwrap(); assert!( !result.is_empty(), "memory steps must be emitted when cache-memory enabled" @@ -5997,7 +6025,8 @@ safe-outputs: fn test_generate_prepare_steps_without_memory_and_no_steps_has_safeoutputs_prompt() { let fm = minimal_front_matter(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); + let result = generate_prepare_steps(&[], &exts, &ctx).unwrap(); // SafeOutputs always contributes a prompt supplement assert!( result.contains("Safe Outputs"), @@ -6011,7 +6040,8 @@ safe-outputs: "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); + let result = generate_prepare_steps(&[], &exts, &ctx).unwrap(); assert!( result.contains("DownloadPipelineArtifact"), "memory steps must include the artifact download task" @@ -6026,9 +6056,10 @@ safe-outputs: fn test_generate_prepare_steps_without_memory_with_user_steps() { let fm = minimal_front_matter(); let exts = crate::compile::extensions::collect_extensions(&fm); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); let step: serde_yaml::Value = serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); + let result = generate_prepare_steps(&[step], &exts, &ctx).unwrap(); assert!(!result.is_empty(), "user steps should be present"); assert!( !result.contains("agent_memory"), @@ -6042,9 +6073,10 @@ safe-outputs: "---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); let step: serde_yaml::Value = serde_yaml::from_str("bash: echo hello\ndisplayName: greet").unwrap(); - let result = generate_prepare_steps(&[step], &exts).unwrap(); + let result = generate_prepare_steps(&[step], &exts, &ctx).unwrap(); assert!( result.contains("agent_memory"), "memory reference must be present" @@ -6061,7 +6093,8 @@ safe-outputs: "---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); + let result = generate_prepare_steps(&[], &exts, &ctx).unwrap(); assert!(result.contains("elan-init.sh"), "should include elan installer"); assert!(result.contains("Lean 4"), "should include Lean prompt"); assert!(result.contains("--default-toolchain stable"), "should default to stable"); @@ -6074,7 +6107,8 @@ safe-outputs: "---\nname: test\ndescription: test\nruntimes:\n lean:\n toolchain: \"leanprover/lean4:v4.29.1\"\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); + let result = generate_prepare_steps(&[], &exts, &ctx).unwrap(); assert!( result.contains("--default-toolchain leanprover/lean4:v4.29.1"), "should use specified toolchain" @@ -6087,7 +6121,8 @@ safe-outputs: "---\nname: test\ndescription: test\nruntimes:\n lean: true\ntools:\n cache-memory: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); - let result = generate_prepare_steps(&[], &exts).unwrap(); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); + let result = generate_prepare_steps(&[], &exts, &ctx).unwrap(); assert!(result.contains("agent_memory"), "memory steps present"); assert!(result.contains("elan-init.sh"), "lean install present"); assert!(result.contains("Lean 4"), "lean prompt present"); @@ -6099,6 +6134,7 @@ safe-outputs: fn test_generate_awf_mounts_no_extensions() { let fm = minimal_front_matter(); let exts = crate::compile::extensions::collect_extensions(&fm); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); let result = generate_awf_mounts(&exts); assert_eq!(result, "\\", "no mounts should produce bare continuation"); } @@ -6109,6 +6145,7 @@ safe-outputs: "---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n", ).unwrap(); let exts = crate::compile::extensions::collect_extensions(&fm); + let ctx = crate::compile::extensions::CompileContext::for_test(&fm); let result = generate_awf_mounts(&exts); assert!(result.contains("--mount"), "should contain --mount flag"); assert!(result.contains(".elan"), "should reference .elan directory"); diff --git a/src/compile/extensions/ado_aw_marker.rs b/src/compile/extensions/ado_aw_marker.rs new file mode 100644 index 00000000..7ed7b247 --- /dev/null +++ b/src/compile/extensions/ado_aw_marker.rs @@ -0,0 +1,417 @@ +//! Always-on ado-aw marker extension. +//! +//! Injects a single informational step into the prepare phase of the +//! Agent job of every compiled pipeline. The step's bash body carries a +//! machine-readable JSON metadata blob keyed by a `# ado-aw-metadata:` +//! prefix, plus a runtime `echo` for build-log visibility. +//! +//! Why `prepare_steps` (Agent job) and not `setup_steps` (Setup job): +//! a Setup-job injection would force every compiled pipeline to spin +//! up a dedicated pool agent just to emit a metadata comment, even for +//! pipelines that have no other reason to need a Setup job. The Agent +//! job is always present, so `prepare_steps` is free. +//! +//! Why a step (and not a top-of-file comment): ADO's Pipeline Preview +//! API strips top-of-document leading comments during YAML expansion +//! (verified empirically against live def 2434 in `msazuresphere/4x4`). +//! Comments embedded inside step bodies are preserved verbatim. The +//! marker has to live inside a step body to survive Preview-driven +//! discovery. +//! +//! Why JSON inside the marker: forward-compatible schema. New fields +//! (e.g., compiler-derived secrets list) can be added without breaking +//! older parsers, mirroring gh-aw's `# gh-aw-metadata: {...}` shape. + +use super::{CompileContext, CompilerExtension, ExtensionPhase}; + +// ─── ado-aw marker (always-on, internal) ───────────────────────────── + +/// Always-on internal extension that embeds machine-readable +/// `# ado-aw-metadata: {…}` JSON inside an injected Agent-job prepare +/// step. +/// +/// The metadata is the canonical surface consumed by Preview-driven +/// project-scope discovery in [`crate::ado`]. Discovery enumerates ADO +/// definitions, expands each via the Pipeline Preview API, and greps +/// the result for this marker. +pub struct AdoAwMarkerExtension; + +impl CompilerExtension for AdoAwMarkerExtension { + fn name(&self) -> &str { + "ado-aw-marker" + } + + fn phase(&self) -> ExtensionPhase { + // Tool phase keeps the marker step grouped with the other + // always-on internal extensions (GitHub, SafeOutputs). The + // marker has no execution dependency on anything else; the + // phase choice is purely about emit order. + ExtensionPhase::Tool + } + + fn prepare_steps(&self, ctx: &CompileContext) -> Vec { + // Inject the marker step into the Agent job's prepare phase + // (NOT a separate Setup job). Setup-job injection would force + // every compiled pipeline to spin up an extra agent pool job + // just to emit a metadata comment — wasteful for pipelines + // that have no other reason to need a Setup job. prepare_steps + // lands inside the always-present Agent job's + // `{{ prepare_steps }}` block, so it costs zero extra + // jobs/agents/pool time. + // + // In unit-test contexts that build a CompileContext without an + // input_path (e.g. CompileContext::for_test), skip the marker. + // Production paths always populate input_path via + // CompileContext::new. + let Some(input_path) = ctx.input_path else { + return vec![]; + }; + + let source = super::super::common::normalize_source_path(input_path); + let version = env!("CARGO_PKG_VERSION"); + let target = ctx.front_matter.target.as_str(); + + // ADO origin of the source markdown — disambiguates the + // `source` field when two repos in the same project happen to + // have files of the same name (e.g. both define `agents/foo.md`). + // Lower-cased so case-insensitive ADO identifiers compare cleanly. + // Empty strings when no ADO context could be inferred — production + // runs always have one thanks to the non-GitHub-remote guard, but + // unit-test contexts via `CompileContext::for_test` will not. + let org = ctx + .ado_org() + .map(|s| s.to_ascii_lowercase()) + .unwrap_or_default(); + let repo = ctx + .ado_context + .as_ref() + .map(|c| c.repo_name.to_ascii_lowercase()) + .unwrap_or_default(); + + let metadata_json = serde_json::json!({ + "schema": 1, + "source": source, + "org": org, + "repo": repo, + "version": version, + "target": target, + }) + .to_string(); + + // The `# ado-aw-metadata:` line is the parse target for + // discovery. The `echo` makes the same information visible in + // the build log at runtime, which is a free human-discoverability + // bonus and costs nothing because the step runs in milliseconds. + // + // The echo's user-controlled values go through two sanitisations: + // + // 1. `crate::sanitize::neutralize_pipeline_commands` neutralises + // `##vso[` and `##[` prefixes by wrapping them in backticks. + // The ADO build agent scans stdout for those sequences and + // treats them as logging commands (e.g. `task.setvariable`). + // An attacker who controls a markdown filename could + // otherwise inject a logging command into the build log via + // the echoed source path. Reusing the canonical helper keeps + // this in sync with the rest of the sanitisation surfaces. + // + // 2. `bash_single_quote_escape` applies the `'\''` idiom so a + // filename containing `'` (e.g. `agents/foo's.md`) doesn't + // produce syntactically broken bash. `version` and `target` + // are controlled inputs and can't contain either. + // + // `org` and `repo` are derived from ADO remote parsing, which + // already restricts them to a safe character set, but we apply + // the same defence-in-depth pattern for consistency. + let echo_source = + bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands(&source)); + let echo_org = + bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands(&org)); + let echo_repo = + bash_single_quote_escape(&crate::sanitize::neutralize_pipeline_commands(&repo)); + let step = format!( + "- bash: |\n \ + # ado-aw-metadata: {metadata}\n \ + echo 'ado-aw metadata: source={echo_source} org={echo_org} repo={echo_repo} version={version} target={target}'\n \ + displayName: \"ado-aw\"\n", + metadata = metadata_json, + echo_source = echo_source, + echo_org = echo_org, + echo_repo = echo_repo, + version = version, + target = target, + ); + + vec![step] + } +} + +/// Escape any `'` in `s` so it can be safely embedded inside a single-quoted +/// bash string. Replaces each `'` with `'\''` (close-quote, escaped quote, +/// reopen-quote — the canonical idiom). +fn bash_single_quote_escape(s: &str) -> String { + s.replace('\'', "'\\''") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::compile::extensions::CompileContext; + use crate::compile::types::FrontMatter; + use std::path::Path; + + fn parse_fm(yaml: &str) -> FrontMatter { + serde_yaml::from_str(yaml).expect("front matter parses") + } + + #[test] + fn returns_no_step_when_input_path_absent() { + let fm = parse_fm("name: t\ndescription: x\n"); + let ctx = CompileContext::for_test(&fm); + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert!( + steps.is_empty(), + "expected no marker when input_path is None" + ); + } + + #[test] + fn emits_single_step_with_canonical_displayname() { + // Production path: CompileContext::new populates input_path. + // Simulate by hand for this unit test. + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/foo.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert_eq!(steps.len(), 1); + let step = &steps[0]; + assert!( + step.contains("displayName: \"ado-aw\""), + "step missing displayName:\n{step}" + ); + assert!( + step.contains("# ado-aw-metadata:"), + "step missing JSON marker line:\n{step}" + ); + assert!( + step.contains("\"source\":\"agents/foo.md\""), + "step missing source field:\n{step}" + ); + assert!( + step.contains("\"target\":\"standalone\""), + "step missing target field:\n{step}" + ); + assert!( + step.contains("\"schema\":1"), + "step missing schema field:\n{step}" + ); + // No ado_context => org/repo emit as empty strings. + assert!( + step.contains("\"org\":\"\""), + "step missing org field:\n{step}" + ); + assert!( + step.contains("\"repo\":\"\""), + "step missing repo field:\n{step}" + ); + } + + #[test] + fn org_and_repo_embed_from_ado_context_lowercased() { + // When the compiler runs inside an ADO checkout (the production + // path — the non-GitHub-remote guard enforces this), the JSON + // marker carries `org` and `repo` so discovery can disambiguate + // a same-named `source` across two repos in the same project. + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/foo.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: Some(crate::ado::AdoContext { + org_url: "https://dev.azure.com/MyOrg".to_string(), + project: "MyProject".to_string(), + repo_name: "Templates-A".to_string(), + }), + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert_eq!(steps.len(), 1); + let step = &steps[0]; + // ADO identifiers are case-insensitive; lowercase to make + // comparisons in discovery deterministic. + assert!( + step.contains("\"org\":\"myorg\""), + "expected lowercased org field:\n{step}" + ); + assert!( + step.contains("\"repo\":\"templates-a\""), + "expected lowercased repo field:\n{step}" + ); + // The echo line surfaces them too for build-log readability. + assert!( + step.contains("org=myorg repo=templates-a"), + "expected echo to include org/repo:\n{step}" + ); + } + + #[test] + fn target_field_reflects_front_matter() { + for (raw_target, expected) in [ + ("standalone", "standalone"), + ("1es", "1es"), + ("job", "job"), + ("stage", "stage"), + ] { + let yaml = format!("name: t\ndescription: x\ntarget: {raw_target}\n"); + let fm = parse_fm(&yaml); + let input_path = Path::new("agents/foo.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert_eq!(steps.len(), 1, "target={raw_target}"); + assert!( + steps[0].contains(&format!("\"target\":\"{expected}\"")), + "expected target={expected} in step (raw input {raw_target}):\n{}", + steps[0] + ); + } + } + + #[test] + fn bash_single_quote_escape_idiom_is_correct() { + // Standard bash idiom: close-quote, escaped quote, reopen. + assert_eq!(bash_single_quote_escape("a'b"), "a'\\''b"); + assert_eq!(bash_single_quote_escape("''"), "'\\'''\\''"); + assert_eq!(bash_single_quote_escape("plain"), "plain"); + assert_eq!(bash_single_quote_escape(""), ""); + } + + #[test] + fn echo_line_handles_single_quote_in_source_path() { + // A markdown filename with `'` in it must produce syntactically + // valid bash. Without the escape, the generated step would + // break with "unexpected EOF while looking for matching `''". + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/foo's-agent.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert_eq!(steps.len(), 1); + let step = &steps[0]; + assert!( + step.contains("echo 'ado-aw metadata: source=agents/foo'\\''s-agent.md "), + "single-quote in source should be escaped via the '\\'' idiom; got:\n{step}", + ); + // The JSON marker line should still carry the raw (un-bash-escaped) + // source — JSON has no quoting concern with `'`. + assert!( + step.contains("\"source\":\"agents/foo's-agent.md\""), + "JSON marker should carry raw source unchanged:\n{step}", + ); + } + + #[test] + fn echo_line_neutralises_vso_injection_attempt() { + // An attacker who controls a markdown filename must not be able + // to inject ADO logging commands into the build log via the + // echoed source path. The ADO agent scans stdout for `##vso[` + // and `##[` prefixes and treats matching sequences as task + // commands (setvariable, setoutput, etc.). + // + // Marker uses the canonical `crate::sanitize::neutralize_pipeline_commands` + // which backtick-wraps the prefix (`` `##vso[` ``) — the literal + // `##vso[` no longer starts a token in the agent's scanner. See + // `src/sanitize.rs` for the canonical helper's own tests. + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/##vso[task.setvariable variable=FOO]value.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert_eq!(steps.len(), 1); + let step = &steps[0]; + + // Find the `echo` line specifically — the `# ado-aw-metadata` + // JSON line is allowed to carry the raw source (it's not echoed + // to stdout by ADO; it's a comment in the bash heredoc, not + // output at runtime). The JSON line *does* get written to the + // build log when ADO renders the step body, but as `# ...` + // comments inside the rendered yaml; those don't trip the + // logging-command scanner. + let echo_line = step + .lines() + .find(|l| l.trim_start().starts_with("echo 'ado-aw metadata:")) + .expect("must have echo line"); + // `neutralize_pipeline_commands` wraps the matched prefix in + // backticks, breaking the `##vso[` token at the start of the + // sequence. The agent's scanner is anchored to the literal + // prefix; the backtick-wrapped form passes through unprocessed. + assert!( + !echo_line.contains(" ##vso["), + "raw ##vso[ leaked into echo line (must be backtick-wrapped): {echo_line}" + ); + assert!( + echo_line.contains("`##vso[`"), + "expected `##vso[` neutralised via canonical backtick-wrap in echo line: {echo_line}" + ); + } + + #[test] + fn json_marker_quote_in_source_round_trips_correctly() { + // Regression: `normalize_source_path` previously escaped `"` to + // `\"` before embedding the path. `serde_json::json!` then + // double-encoded the backslash, so the marker JSON looked like + // `"source":"agents/foo\\\"bar.md"` — and the path returned by + // `parse_marker_step` carried a spurious `\` that did not exist + // in the original filename. The fix is to feed the canonical + // (unescaped) path into the JSON value and let serde_json do + // the JSON-level escaping. + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new(r#"agents/foo"bar.md"#); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.prepare_steps(&ctx); + assert_eq!(steps.len(), 1); + + // Parse the marker step back via the canonical discovery parser + // and confirm the source field reconstructs to the original + // path (forward-slash-normalised, no spurious backslashes). + let parsed = crate::detect::parse_marker_step(&steps[0]); + assert_eq!(parsed.len(), 1, "expected exactly one marker in step"); + assert_eq!( + parsed[0].source, r#"agents/foo"bar.md"#, + "marker source should round-trip without spurious backslash" + ); + } +} diff --git a/src/compile/extensions/mod.rs b/src/compile/extensions/mod.rs index 1d2964f3..da41b161 100644 --- a/src/compile/extensions/mod.rs +++ b/src/compile/extensions/mod.rs @@ -109,15 +109,31 @@ pub struct CompileContext<'a> { /// `nuget.config` should be resolved). `None` for unit-test contexts /// where no on-disk repo exists. pub compile_dir: Option<&'a Path>, + /// Path of the input markdown file being compiled (e.g. + /// `agents/release-readiness.md`). `None` for unit-test contexts. + /// Consumed by the always-on `ado-aw-marker` compiler extension to + /// embed source-path metadata in the compiled YAML. + pub input_path: Option<&'a Path>, } impl<'a> CompileContext<'a> { /// Build a fully-resolved compile context. /// /// Resolves the engine implementation from front matter and infers ADO - /// context from the git remote in `compile_dir`. Returns an error if - /// the engine identifier is unsupported. - pub async fn new(front_matter: &'a FrontMatter, compile_dir: &'a Path) -> Result { + /// context from the git remote in the directory containing `input_path`. + /// Returns an error if the engine identifier is unsupported. + pub async fn new(front_matter: &'a FrontMatter, input_path: &'a Path) -> Result { + // `Path::parent()` is subtle: for a bare filename like `foo.md` + // it returns `Some(Path::new(""))` rather than `None`, so the + // `unwrap_or(Path::new("."))` fallback wouldn't catch it. An + // empty path passed to `git -C ""` behaves differently from + // `git -C "."` (some platforms reject it, others quietly use + // the parent process's cwd), so we normalise both the + // `None` and empty-`Some` cases to `.`. + let compile_dir = match input_path.parent() { + Some(p) if !p.as_os_str().is_empty() => p, + _ => Path::new("."), + }; let engine = engine::get_engine(front_matter.engine.engine_id())?; let ado_context = Self::infer_ado_context(compile_dir).await; Ok(Self { @@ -126,6 +142,7 @@ impl<'a> CompileContext<'a> { ado_context, engine, compile_dir: Some(compile_dir), + input_path: Some(input_path), }) } @@ -175,6 +192,7 @@ impl<'a> CompileContext<'a> { ado_context: None, engine: crate::engine::Engine::Copilot, compile_dir: None, + input_path: None, } } @@ -191,6 +209,7 @@ impl<'a> CompileContext<'a> { }), engine: crate::engine::Engine::Copilot, compile_dir: None, + input_path: None, } } @@ -203,6 +222,7 @@ impl<'a> CompileContext<'a> { ado_context: None, engine: crate::engine::Engine::Copilot, compile_dir: Some(compile_dir), + input_path: None, } } } @@ -268,7 +288,11 @@ pub trait CompilerExtension { /// Pipeline steps (YAML strings) to run before the agent. /// /// Each element is a complete YAML step (e.g., `- bash: |...`). - fn prepare_steps(&self) -> Vec { + /// These are injected into the Agent job's `{{ prepare_steps }}` + /// block — no new job/stage is created, so always-on extensions + /// (like `ado-aw-marker`) can emit metadata steps with zero impact + /// on pipeline structure. + fn prepare_steps(&self, _ctx: &CompileContext) -> Vec { vec![] } @@ -541,8 +565,8 @@ macro_rules! extension_enum { fn prompt_supplement(&self) -> Option { match self { $( $Enum::$Variant(e) => e.prompt_supplement(), )+ } } - fn prepare_steps(&self) -> Vec { - match self { $( $Enum::$Variant(e) => e.prepare_steps(), )+ } + fn prepare_steps(&self, ctx: &CompileContext) -> Vec { + match self { $( $Enum::$Variant(e) => e.prepare_steps(ctx), )+ } } fn setup_steps(&self, ctx: &CompileContext) -> Result> { match self { $( $Enum::$Variant(e) => e.setup_steps(ctx), )+ } @@ -572,11 +596,13 @@ macro_rules! extension_enum { }; } +mod ado_aw_marker; mod github; mod safe_outputs; pub(crate) mod trigger_filters; // Re-export tool/runtime extensions from their colocated homes +pub use ado_aw_marker::AdoAwMarkerExtension; pub use crate::tools::azure_devops::AzureDevOpsExtension; pub use crate::tools::cache_memory::CacheMemoryExtension; pub use github::GitHubExtension; @@ -593,6 +619,7 @@ extension_enum! { /// Uses static dispatch (no `Box`) — each variant delegates to /// the inner type's [`CompilerExtension`] implementation. pub enum Extension { + AdoAwMarker(AdoAwMarkerExtension), GitHub(GitHubExtension), SafeOutputs(SafeOutputsExtension), Lean(LeanExtension), @@ -624,6 +651,7 @@ pub fn collect_extensions(front_matter: &FrontMatter) -> Vec { let mut extensions = Vec::new(); // ── Always-on internal extensions ── + extensions.push(Extension::AdoAwMarker(AdoAwMarkerExtension)); extensions.push(Extension::GitHub(GitHubExtension)); extensions.push(Extension::SafeOutputs(SafeOutputsExtension)); diff --git a/src/compile/extensions/tests.rs b/src/compile/extensions/tests.rs index 4f182c36..5592e3a7 100644 --- a/src/compile/extensions/tests.rs +++ b/src/compile/extensions/tests.rs @@ -88,8 +88,9 @@ fn test_awf_mount_serde_roundtrip() { fn test_collect_extensions_empty_front_matter() { let fm = minimal_front_matter(); let exts = collect_extensions(&fm); - // Always-on: GitHub + SafeOutputs - assert_eq!(exts.len(), 2); + // Always-on: ado-aw-marker + GitHub + SafeOutputs + assert_eq!(exts.len(), 3); + assert!(exts.iter().any(|e| e.name() == "ado-aw-marker")); assert!(exts.iter().any(|e| e.name() == "GitHub")); assert!(exts.iter().any(|e| e.name() == "SafeOutputs")); } @@ -100,7 +101,7 @@ fn test_collect_extensions_lean_enabled() { parse_markdown("---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 3); // GitHub + SafeOutputs + Lean + assert_eq!(exts.len(), 4); // ado-aw-marker + GitHub + SafeOutputs + Lean assert_eq!(exts[0].name(), "Lean 4"); // Runtime phase sorts first } @@ -110,7 +111,7 @@ fn test_collect_extensions_lean_disabled() { parse_markdown("---\nname: test\ndescription: test\nruntimes:\n lean: false\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 2); // Just always-on + assert_eq!(exts.len(), 3); // Just always-on } #[test] @@ -119,7 +120,7 @@ fn test_collect_extensions_azure_devops_enabled() { parse_markdown("---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 3); // GitHub + SafeOutputs + AzureDevOps + assert_eq!(exts.len(), 4); // ado-aw-marker + GitHub + SafeOutputs + AzureDevOps assert!(exts.iter().any(|e| e.name() == "Azure DevOps MCP")); } @@ -129,7 +130,7 @@ fn test_collect_extensions_cache_memory_enabled() { parse_markdown("---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 3); // GitHub + SafeOutputs + CacheMemory + assert_eq!(exts.len(), 4); // ado-aw-marker + GitHub + SafeOutputs + CacheMemory assert!(exts.iter().any(|e| e.name() == "Cache Memory")); } @@ -140,7 +141,7 @@ fn test_collect_extensions_all_enabled() { ) .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 5); // GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory + assert_eq!(exts.len(), 6); // ado-aw-marker + GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory assert_eq!(exts[0].name(), "Lean 4"); // Runtime phase first // All tool-phase extensions follow assert!(exts[1..].iter().all(|e| e.phase() == ExtensionPhase::Tool)); @@ -156,7 +157,7 @@ fn test_collect_extensions_runtimes_always_before_tools() { ) .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 5); // GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory + assert_eq!(exts.len(), 6); // ado-aw-marker + GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory // Find the boundary: last Runtime and first Tool let last_runtime_idx = exts @@ -206,7 +207,9 @@ fn test_lean_prompt_supplement() { #[test] fn test_lean_prepare_steps() { let ext = LeanExtension::new(LeanRuntimeConfig::Enabled(true)); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 1); assert!(steps[0].contains("elan-init.sh")); } @@ -323,7 +326,9 @@ fn test_ado_validate_duplicate_mcp_warning() { #[test] fn test_cache_memory_prepare_steps() { let ext = CacheMemoryExtension::new(CacheMemoryToolConfig::Enabled(true)); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 1); assert!(steps[0].contains("DownloadPipelineArtifact")); } @@ -400,7 +405,9 @@ fn test_python_prepare_steps() { let ext = crate::runtimes::python::PythonExtension::new( crate::runtimes::python::PythonRuntimeConfig::Enabled(true), ); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 1, "no auth step without feed-url/config"); assert!(steps[0].contains("UsePythonVersion@0")); } @@ -412,7 +419,9 @@ fn test_python_prepare_steps_with_feed_url() { ).unwrap(); let python = fm.runtimes.as_ref().unwrap().python.as_ref().unwrap(); let ext = crate::runtimes::python::PythonExtension::new(python.clone()); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 2); assert!(steps[0].contains("UsePythonVersion@0")); assert!(steps[1].contains("PipAuthenticate@1")); @@ -542,7 +551,9 @@ fn test_node_prepare_steps() { let ext = crate::runtimes::node::NodeExtension::new( crate::runtimes::node::NodeRuntimeConfig::Enabled(true), ); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 1, "no auth steps without feed-url/config"); assert!(steps[0].contains("NodeTool@0")); } @@ -554,7 +565,9 @@ fn test_node_prepare_steps_with_feed_url() { ).unwrap(); let node = fm.runtimes.as_ref().unwrap().node.as_ref().unwrap(); let ext = crate::runtimes::node::NodeExtension::new(node.clone()); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 3); assert!(steps[0].contains("NodeTool@0")); assert!(steps[1].contains("Ensure .npmrc")); @@ -707,7 +720,9 @@ fn test_dotnet_prepare_steps() { let ext = crate::runtimes::dotnet::DotnetExtension::new( crate::runtimes::dotnet::DotnetRuntimeConfig::Enabled(true), ); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 1, "no auth steps without feed-url/config"); assert!(steps[0].contains("UseDotNet@2")); assert!(steps[0].contains("packageType: 'sdk'")); @@ -720,7 +735,9 @@ fn test_dotnet_prepare_steps_with_feed_url() { ).unwrap(); let dotnet = fm.runtimes.as_ref().unwrap().dotnet.as_ref().unwrap(); let ext = crate::runtimes::dotnet::DotnetExtension::new(dotnet.clone()); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert_eq!(steps.len(), 3); assert!(steps[0].contains("UseDotNet@2")); assert!(steps[1].contains("Ensure nuget.config")); @@ -734,7 +751,9 @@ fn test_dotnet_prepare_steps_with_config_only() { ).unwrap(); let dotnet = fm.runtimes.as_ref().unwrap().dotnet.as_ref().unwrap(); let ext = crate::runtimes::dotnet::DotnetExtension::new(dotnet.clone()); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); // config: alone trusts the user-checked-in nuget.config — no shim, // just the auth step. assert_eq!(steps.len(), 2); @@ -795,7 +814,9 @@ fn test_dotnet_global_json_sentinel_emits_use_global_json() { let dotnet = fm.runtimes.as_ref().unwrap().dotnet.as_ref().unwrap(); assert!(dotnet.use_global_json()); let ext = crate::runtimes::dotnet::DotnetExtension::new(dotnet.clone()); - let steps = ext.prepare_steps(); + let fm = minimal_front_matter(); + let ctx = ctx_from(&fm); + let steps = ext.prepare_steps(&ctx); assert!(steps[0].contains("useGlobalJson: true")); assert!(!steps[0].contains("version:"), "explicit version must be omitted in global.json mode"); assert!(steps[0].contains("from global.json")); diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 1c698cbc..52f6c314 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -31,6 +31,7 @@ pub use common::parse_markdown; #[allow(unused_imports)] pub use common::{atomic_write, parse_markdown_detailed, reconstruct_source, ParsedSource}; pub use common::HEADER_MARKER; +pub use common::normalize_source_path; pub use common::ADO_MCP_ENTRYPOINT; pub use common::ADO_MCP_IMAGE; pub use common::ADO_MCP_PACKAGE; diff --git a/src/compile/onees.rs b/src/compile/onees.rs index ee45e31b..558ccbc4 100644 --- a/src/compile/onees.rs +++ b/src/compile/onees.rs @@ -47,8 +47,7 @@ impl Compiler for OneESCompiler { let extensions = super::extensions::collect_extensions(front_matter); // Build compile context for MCPG config generation - let input_dir = input_path.parent().unwrap_or(Path::new(".")); - let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await?; + let ctx = super::extensions::CompileContext::new(front_matter, input_path).await?; // Generate values shared with standalone that are passed as extra replacements let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; @@ -66,7 +65,7 @@ impl Compiler for OneESCompiler { // Generate 1ES-specific setup/teardown jobs(no per-job pool, uses templateContext). // These override the shared {{ setup_job }} / {{ teardown_job }} markers via // extra_replacements, which are applied before the shared replacements. - let setup_job = generate_setup_job(&front_matter.setup); + let setup_job = generate_setup_job(&front_matter.setup, &extensions, &ctx)?; let teardown_job = generate_teardown_job(&front_matter.teardown); let config = CompileConfig { @@ -102,14 +101,45 @@ impl Compiler for OneESCompiler { /// Generate setup job for 1ES template. /// Unlike standalone, 1ES jobs don't have per-job `pool:` — the pool is at /// the top-level `parameters.pool`. Jobs use `templateContext: type: buildJob`. -fn generate_setup_job(setup_steps: &[serde_yaml::Value]) -> String { - if setup_steps.is_empty() { - return String::new(); +/// +/// Extension `setup_steps()` are injected before user setup steps (mirrors the +/// shared `generate_setup_job` in common.rs). The always-on ado-aw-marker +/// extension is the primary contributor; user setup_steps are appended after. +fn generate_setup_job( + setup_steps: &[serde_yaml::Value], + extensions: &[super::extensions::Extension], + ctx: &super::extensions::CompileContext, +) -> anyhow::Result { + use super::extensions::CompilerExtension; + + // Collect setup_steps from ALL extensions + let mut ext_setup_steps: Vec = Vec::new(); + for ext in extensions { + ext_setup_steps.extend(ext.setup_steps(ctx)?); } - let steps_yaml = format_steps_yaml_indented(setup_steps, 6); + if setup_steps.is_empty() && ext_setup_steps.is_empty() { + return Ok(String::new()); + } - format!( + // Steps in the 1ES templateContext.steps block are indented 6 spaces. + let mut body = String::new(); + + if !ext_setup_steps.is_empty() { + let ext_steps_combined = ext_setup_steps.join("\n\n"); + let indented = indent_block(&ext_steps_combined, " "); + body.push_str(&indented); + if !body.ends_with('\n') { + body.push('\n'); + } + } + + if !setup_steps.is_empty() { + let user_steps_yaml = format_steps_yaml_indented(setup_steps, 6); + body.push_str(&user_steps_yaml); + } + + Ok(format!( r#"- job: Setup displayName: "Setup" templateContext: @@ -118,8 +148,23 @@ fn generate_setup_job(setup_steps: &[serde_yaml::Value]) -> String { - checkout: self {} "#, - steps_yaml - ) + body.trim_end_matches('\n') + )) +} + +/// Indent every non-empty line in `block` with `prefix`. +fn indent_block(block: &str, prefix: &str) -> String { + block + .lines() + .map(|line| { + if line.is_empty() { + String::new() + } else { + format!("{prefix}{line}") + } + }) + .collect::>() + .join("\n") } /// Generate teardown job for 1ES template. @@ -153,17 +198,27 @@ mod tests { #[test] fn test_generate_setup_job_empty_steps() { - let result = generate_setup_job(&[]); - assert!(result.is_empty(), "Empty setup steps should return empty string"); + let fm = parse_test_fm("name: t\ndescription: x\n"); + let ctx = super::super::extensions::CompileContext::for_test(&fm); + let result = generate_setup_job(&[], &[], &ctx).expect("call ok"); + assert!( + result.is_empty(), + "Empty setup steps with no extensions should return empty string" + ); } #[test] fn test_generate_setup_job_with_steps() { let step: serde_yaml::Value = serde_yaml::from_str("bash: echo setup").expect("valid yaml"); - let result = generate_setup_job(&[step]); + let fm = parse_test_fm("name: t\ndescription: x\n"); + let ctx = super::super::extensions::CompileContext::for_test(&fm); + let result = generate_setup_job(&[step], &[], &ctx).expect("call ok"); assert!(result.contains("Setup"), "Should define a Setup job"); - assert!(result.contains("displayName: \"Setup\""), "Should use simple display name"); + assert!( + result.contains("displayName: \"Setup\""), + "Should use simple display name" + ); assert!(result.contains("checkout: self"), "Should include self checkout"); assert!(result.contains("echo setup"), "Should include the step content"); assert!(result.contains("templateContext"), "Should include templateContext"); @@ -171,6 +226,10 @@ mod tests { assert!(!result.contains("pool:"), "Should not include per-job pool"); } + fn parse_test_fm(yaml: &str) -> crate::compile::types::FrontMatter { + serde_yaml::from_str(yaml).expect("parse fm") + } + // ─── generate_teardown_job ─────────────────────────────────────────────── #[test] diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 063c7dff..1cd44565 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -48,8 +48,7 @@ impl Compiler for StandaloneCompiler { let extensions = super::extensions::collect_extensions(front_matter); // Build compile context for MCPG config generation - let input_dir = input_path.parent().unwrap_or(std::path::Path::new(".")); - let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await?; + let ctx = super::extensions::CompileContext::new(front_matter, input_path).await?; // Standalone-specific values let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; diff --git a/src/compile/types.rs b/src/compile/types.rs index 02acc7ee..bb44ce4b 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -23,6 +23,20 @@ pub enum CompileTarget { Stage, } +impl CompileTarget { + /// Canonical lowercase string for this target (matches the serde rename). + /// Used by the always-on ado-aw-marker extension when emitting the + /// machine-readable metadata blob. + pub fn as_str(&self) -> &'static str { + match self { + Self::Standalone => "standalone", + Self::OneES => "1es", + Self::Job => "job", + Self::Stage => "stage", + } + } +} + /// Pool configuration - accepts both string and object formats /// /// Examples: diff --git a/src/detect.rs b/src/detect.rs index 16eb33df..a46433d3 100644 --- a/src/detect.rs +++ b/src/detect.rs @@ -5,6 +5,7 @@ use anyhow::{Context, Result}; use log::{debug, info}; +use serde::Deserialize; use std::path::{Path, PathBuf}; use tokio::io::AsyncBufReadExt; @@ -124,6 +125,100 @@ async fn try_detect_pipeline( Ok(None) } +/// Prefix of the machine-readable marker emitted into every compiled +/// pipeline by the always-on `ado-aw-marker` compiler extension. +/// +/// The marker is a `# ado-aw-metadata: { … JSON … }` line embedded +/// inside the bash body of an injected Agent-job prepare step. The +/// step body (unlike top-of-file YAML comments) survives ADO Pipeline +/// Preview expansion, so this prefix is the canonical surface +/// project-scope discovery searches for in expanded YAML. +pub const MARKER_STEP_PREFIX: &str = "# ado-aw-metadata:"; + +/// Parsed metadata from a `# ado-aw-metadata: {…}` marker step line. +/// +/// The schema is forward-compatible: unknown JSON fields are ignored, +/// and missing fields fall through to defaults (empty string / zero). +/// Callers that need a specific field should check it explicitly. +#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)] +pub struct MarkerMetadata { + /// Schema version; `1` for the initial release. + #[serde(default)] + pub schema: u32, + /// Source markdown path, normalised forward-slash form (e.g. + /// `agents/release-readiness.md`). + #[serde(default)] + pub source: String, + /// ADO organisation name the source markdown was compiled in + /// (e.g. `myorg`). Lowercased at emit time. Combined with + /// [`MarkerMetadata::repo`] this disambiguates the marker's + /// `source` field when two repos in the same ADO project happen + /// to have files of the same name (e.g. both define + /// `agents/foo.md`). Empty string when the compiler ran outside + /// an ADO checkout (rare in production thanks to the + /// non-GitHub-remote guard). + #[serde(default)] + pub org: String, + /// ADO repository name the source markdown was compiled in + /// (e.g. `templates-a`). Lowercased at emit time. See + /// [`MarkerMetadata::org`] for rationale. Empty string when the + /// compiler ran outside an ADO checkout. + #[serde(default)] + pub repo: String, + /// Compiler version that produced this YAML (`CARGO_PKG_VERSION`). + #[serde(default)] + pub version: String, + /// Compile target: `standalone` / `1es` / `job` / `stage`. + #[serde(default)] + pub target: String, +} + +/// Scan raw pipeline YAML for `# ado-aw-metadata: {…}` marker lines. +/// +/// Returns one [`MarkerMetadata`] per parseable hit. Multiple hits in a +/// single document indicate a consumer pipeline that includes more than +/// one ado-aw-generated template; project-scope discovery uses that to +/// classify the definition as `Consumer`. Malformed JSON entries are +/// skipped (logged at debug) rather than panicking — defensive against +/// future schema drift inside the JSON blob. +/// +/// Designed to be called against either: +/// - Raw compiled-on-disk lock-file YAML, or +/// - The `finalYaml` returned by ADO's Pipeline Preview API (which +/// strips top-of-file comments but preserves step bodies). +pub fn parse_marker_step(yaml: &str) -> Vec { + let mut results = Vec::new(); + + for line in yaml.lines() { + // Trim leading whitespace; the marker lives inside an indented + // bash heredoc, so the actual `#` may be indented arbitrarily. + let trimmed = line.trim_start(); + let Some(rest) = trimmed.strip_prefix(MARKER_STEP_PREFIX) else { + continue; + }; + + let json_str = rest.trim(); + if !json_str.starts_with('{') { + // Marker prefix matched but no JSON payload — likely + // documentation or a non-marker comment. Skip silently. + continue; + } + + match serde_json::from_str::(json_str) { + Ok(meta) => results.push(meta), + Err(e) => { + log::debug!( + "Skipping malformed ado-aw-metadata marker: {} (payload: {})", + e, + json_str + ); + } + } + } + + results +} + /// Parsed metadata from a `# @ado-aw` header line. pub struct HeaderMetadata { pub source: String, @@ -201,6 +296,86 @@ pub fn parse_header_line(line: &str) -> Option { mod tests { use super::*; + #[test] + fn test_parse_marker_step_single() { + let yaml = "\ +- bash: | + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/foo.md\",\"version\":\"0.28.0\",\"target\":\"standalone\"} + echo hello + displayName: \"ado-aw\" +"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 1); + assert_eq!(metas[0].schema, 1); + assert_eq!(metas[0].source, "agents/foo.md"); + assert_eq!(metas[0].version, "0.28.0"); + assert_eq!(metas[0].target, "standalone"); + } + + #[test] + fn test_parse_marker_step_multiple() { + // A consumer pipeline pulling in two templates expands to two + // marker steps in finalYaml. + let yaml = "\ +jobs: + - job: a + steps: + - bash: | + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/a.md\",\"version\":\"0.28.0\",\"target\":\"job\"} + echo a + displayName: \"ado-aw\" + - job: b + steps: + - bash: | + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/b.md\",\"version\":\"0.27.0\",\"target\":\"job\"} + echo b + displayName: \"ado-aw\" +"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 2); + let sources: Vec<&str> = metas.iter().map(|m| m.source.as_str()).collect(); + assert!(sources.contains(&"agents/a.md")); + assert!(sources.contains(&"agents/b.md")); + } + + #[test] + fn test_parse_marker_step_no_match_returns_empty() { + let yaml = "name: foo\njobs:\n - job: x\n steps:\n - bash: echo hi\n"; + assert!(parse_marker_step(yaml).is_empty()); + } + + #[test] + fn test_parse_marker_step_skips_malformed_json() { + // The prefix matches but the JSON is broken; the parser must + // skip silently rather than panic. A valid marker on a later + // line is still returned. + let yaml = "\ +- bash: | + # ado-aw-metadata: {not valid json + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/ok.md\",\"version\":\"1.0.0\",\"target\":\"stage\"} +"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 1); + assert_eq!(metas[0].source, "agents/ok.md"); + } + + #[test] + fn test_parse_marker_step_tolerates_extra_json_fields() { + // Forward-compatibility: an unknown future field shouldn't + // break the parser. + let yaml = " # ado-aw-metadata: {\"schema\":2,\"source\":\"agents/x.md\",\"version\":\"1.0.0\",\"target\":\"standalone\",\"future_field\":[1,2,3]}\n"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 1); + assert_eq!(metas[0].source, "agents/x.md"); + assert_eq!(metas[0].schema, 2); + } + + #[test] + fn test_parse_marker_step_ignores_prefix_without_json() { + let yaml = "# ado-aw-metadata: (manual documentation note, not a marker)\n"; + assert!(parse_marker_step(yaml).is_empty()); + } + #[test] fn test_parse_header_line_valid() { let line = "# @ado-aw source=agents/my-agent.md version=0.3.2"; diff --git a/src/enable.rs b/src/enable.rs index 726971d7..e2483516 100644 --- a/src/enable.rs +++ b/src/enable.rs @@ -453,6 +453,8 @@ mod tests { }), queue_status: status.map(str::to_string), path: None, + repository: None, + revision: None, } } diff --git a/src/list.rs b/src/list.rs index 03a00217..556066f5 100644 --- a/src/list.rs +++ b/src/list.rs @@ -313,6 +313,8 @@ mod tests { }), queue_status: status.map(str::to_string), path: folder.map(str::to_string), + repository: None, + revision: None, } } diff --git a/src/main.rs b/src/main.rs index 1defa470..7a4ce2dc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -64,6 +64,20 @@ enum SecretsCmd { /// Explicit definition IDs (skips local-fixture auto-detection). #[arg(long, value_delimiter = ',')] definition_ids: Option>, + /// Use Preview-driven discovery against every definition in the + /// project (not just those in the current repo). Implies the + /// discovery code path; ignores local lock files for matching. + #[arg(long, conflicts_with = "definition_ids")] + all_repos: bool, + /// Filter discovered definitions to consumers of one specific + /// ado-aw template (e.g. `agents/security-scan.md`). Activates + /// the discovery code path. **Without `--all-repos`, only + /// definitions in the current repository are searched** — pair + /// with `--all-repos` to search the full project. Path matching + /// is case-sensitive and forward-slash-normalised; on Windows, + /// pass the path in the same case it was compiled with. + #[arg(long, conflicts_with = "definition_ids")] + source: Option, }, /// List variable names + flags on every matched definition. Never prints values. List { @@ -78,6 +92,17 @@ enum SecretsCmd { json: bool, #[arg(long, value_delimiter = ',')] definition_ids: Option>, + /// Use Preview-driven discovery against every definition in the + /// project (not just those in the current repo). + #[arg(long, conflicts_with = "definition_ids")] + all_repos: bool, + /// Filter discovered definitions to consumers of one specific + /// ado-aw template. **Without `--all-repos`, only definitions + /// in the current repository are searched.** Path matching is + /// case-sensitive and forward-slash-normalised; on Windows, + /// pass the path in the same case it was compiled with. + #[arg(long, conflicts_with = "definition_ids")] + source: Option, }, /// Delete a named variable from every matched definition. Delete { @@ -93,6 +118,17 @@ enum SecretsCmd { dry_run: bool, #[arg(long, value_delimiter = ',')] definition_ids: Option>, + /// Use Preview-driven discovery against every definition in the + /// project. + #[arg(long, conflicts_with = "definition_ids")] + all_repos: bool, + /// Filter discovered definitions to consumers of one specific + /// ado-aw template. **Without `--all-repos`, only definitions + /// in the current repository are searched.** Path matching is + /// case-sensitive and forward-slash-normalised; on Windows, + /// pass the path in the same case it was compiled with. + #[arg(long, conflicts_with = "definition_ids")] + source: Option, }, } @@ -907,6 +943,8 @@ async fn main() -> Result<()> { value_stdin, dry_run, definition_ids, + all_repos, + source, } => { secrets::run_set(secrets::SetOptions { name: &name, @@ -919,6 +957,8 @@ async fn main() -> Result<()> { value_stdin, dry_run, definition_ids: definition_ids.as_deref(), + all_repos, + source: source.as_deref(), }) .await?; } @@ -929,6 +969,8 @@ async fn main() -> Result<()> { pat, json, definition_ids, + all_repos, + source, } => { secrets::run_list(secrets::ListOptions { org: org.as_deref(), @@ -937,6 +979,8 @@ async fn main() -> Result<()> { path: path.as_deref(), json, definition_ids: definition_ids.as_deref(), + all_repos, + source: source.as_deref(), }) .await?; } @@ -948,6 +992,8 @@ async fn main() -> Result<()> { pat, dry_run, definition_ids, + all_repos, + source, } => { secrets::run_delete(secrets::DeleteOptions { name: &name, @@ -957,6 +1003,8 @@ async fn main() -> Result<()> { path: path.as_deref(), dry_run, definition_ids: definition_ids.as_deref(), + all_repos, + source: source.as_deref(), }) .await?; } diff --git a/src/runtimes/dotnet/extension.rs b/src/runtimes/dotnet/extension.rs index 675eb54b..43a3cdb5 100644 --- a/src/runtimes/dotnet/extension.rs +++ b/src/runtimes/dotnet/extension.rs @@ -62,7 +62,7 @@ in the repository.\n" ) } - fn prepare_steps(&self) -> Vec { + fn prepare_steps(&self, _ctx: &CompileContext) -> Vec { let mut steps = vec![generate_dotnet_install(&self.config)]; // Emit ensure-nuget.config + NuGetAuthenticate when an internal feed // is configured. When only `config:` is set, the user-checked-in diff --git a/src/runtimes/lean/extension.rs b/src/runtimes/lean/extension.rs index 18a3378c..f7fe6e8d 100644 --- a/src/runtimes/lean/extension.rs +++ b/src/runtimes/lean/extension.rs @@ -52,7 +52,7 @@ the toolchain. Lean files use the `.lean` extension.\n" ) } - fn prepare_steps(&self) -> Vec { + fn prepare_steps(&self, _ctx: &CompileContext) -> Vec { vec![generate_lean_install(&self.config)] } diff --git a/src/runtimes/node/extension.rs b/src/runtimes/node/extension.rs index 1cac8abf..256eb6e3 100644 --- a/src/runtimes/node/extension.rs +++ b/src/runtimes/node/extension.rs @@ -54,7 +54,7 @@ Node.js is installed and available. Use `node` to run scripts, \ ) } - fn prepare_steps(&self) -> Vec { + fn prepare_steps(&self, _ctx: &CompileContext) -> Vec { let mut steps = vec![generate_node_install(&self.config)]; // Emit ensure-npmrc + npmAuthenticate only when an internal feed is configured if self.config.feed_url().is_some() || self.config.config().is_some() { diff --git a/src/runtimes/python/extension.rs b/src/runtimes/python/extension.rs index b5f34c54..873aab6f 100644 --- a/src/runtimes/python/extension.rs +++ b/src/runtimes/python/extension.rs @@ -55,7 +55,7 @@ management, install it first with `pip install uv`.\n" ) } - fn prepare_steps(&self) -> Vec { + fn prepare_steps(&self, _ctx: &CompileContext) -> Vec { let mut steps = vec![generate_python_install(&self.config)]; // Emit PipAuthenticate only when feed-url is set (config alone is not // sufficient — PipAuthenticate needs a feed to authenticate against) diff --git a/src/secrets.rs b/src/secrets.rs index b984be3b..98de5f19 100644 --- a/src/secrets.rs +++ b/src/secrets.rs @@ -19,9 +19,10 @@ use anyhow::{Context, Result}; use std::path::{Path, PathBuf}; use crate::ado::{ - AdoAuth, AdoContext, MatchedDefinition, PATH_SEGMENT, get_definition_full, - normalize_masked_secret_variable_values, resolve_ado_context, resolve_auth, - resolve_definitions, + AdoAuth, AdoContext, MatchedDefinition, PATH_SEGMENT, + discovery::{DiscoveryScope, resolve_definitions_via_discovery}, + get_definition_full, normalize_masked_secret_variable_values, resolve_ado_context, + resolve_auth, resolve_definitions, }; /// Description of one pipeline variable, for listing only. @@ -69,9 +70,7 @@ pub fn apply_variable_set( value: &str, allow_override: Option, ) -> serde_json::Value { - if definition.get("variables").is_none() - || !definition["variables"].is_object() - { + if definition.get("variables").is_none() || !definition["variables"].is_object() { definition["variables"] = serde_json::json!({}); } let resolved_override = allow_override.unwrap_or_else(|| { @@ -92,11 +91,11 @@ pub fn apply_variable_set( /// Pure: produce a copy of `definition` with the named variable /// removed. No-op if it wasn't present. -pub fn apply_variable_delete( - mut definition: serde_json::Value, - name: &str, -) -> serde_json::Value { - if let Some(vars) = definition.get_mut("variables").and_then(|v| v.as_object_mut()) { +pub fn apply_variable_delete(mut definition: serde_json::Value, name: &str) -> serde_json::Value { + if let Some(vars) = definition + .get_mut("variables") + .and_then(|v| v.as_object_mut()) + { vars.remove(name); } definition @@ -174,6 +173,133 @@ pub struct SetOptions<'a> { pub value_stdin: bool, pub dry_run: bool, pub definition_ids: Option<&'a [u64]>, + /// Use Preview-driven discovery across every definition in the + /// project (not just those whose root YAML is a local lock file). + pub all_repos: bool, + /// Filter discovery results to consumers of one specific ado-aw + /// template source path (e.g. `agents/security-scan.md`). When set + /// alongside `all_repos=false`, scopes discovery to the current repo. + pub source: Option<&'a str>, +} + +/// Decide between the legacy lexical resolver and Preview-driven +/// discovery based on which flags the caller passed. Returns +/// `Ok(Some(vec))` on success, `Ok(None)` only when the legacy path +/// signaled "no local fixtures found; exit clean". +async fn resolve_for_command( + client: &reqwest::Client, + ado_ctx: &AdoContext, + auth: &AdoAuth, + definition_ids: Option<&[u64]>, + all_repos: bool, + source_filter: Option<&str>, + repo_path: &Path, +) -> Result>> { + // Discovery code path: activated by --all-repos or --source. + // Explicit definition_ids always takes precedence (escape hatch). + if definition_ids.is_none() && (all_repos || source_filter.is_some()) { + // CurrentRepo scope without a resolvable git remote is a + // user-friendly footgun: discovery would silently return zero + // results. Surface a targeted hint *before* spending an HTTP + // round-trip on a doomed listing. + if !all_repos && ado_ctx.repo_name.is_empty() { + anyhow::bail!( + "--source filters by the current repository, but no Azure DevOps git remote \ + was detected in `{}`.\n\ + Either run from a checkout of an ADO repo, or pass --all-repos to search \ + the entire project.", + repo_path.display() + ); + } + + // `--source` requires an identifiable current (org, repo) so + // the marker-origin filter in discovery can disambiguate + // same-named source paths across repos. Without it, a strict + // marker (one carrying `org` / `repo`) would silently fail the + // origin check and the operator would see "no pipelines + // matched" with no explanation. The `!all_repos` guard above + // covers the missing-`repo_name` case for current-repo scope; + // here we also catch `--all-repos --source` paired with a + // missing or malformed `org_url` (e.g. `org_name()` resolves + // to `None`). + if source_filter.is_some() && (ado_ctx.org_name().is_none() || ado_ctx.repo_name.is_empty()) + { + anyhow::bail!( + "--source needs the current repository's Azure DevOps org and repo to \ + disambiguate same-named source paths across the project, but neither \ + could be resolved from `{}`.\n\ + Run from a checkout of an ADO repo, or use --definition-ids to act on \ + specific pipelines directly.", + repo_path.display() + ); + } + + let scope = if all_repos { + DiscoveryScope::AllRepos + } else { + DiscoveryScope::CurrentRepo + }; + + // Feed local lock files into discovery so its yamlFilename + // fast-path can skip a Preview call per locally-compiled + // pipeline. Best-effort: scan failures aren't fatal — discovery + // simply falls back to Preview for everything. + let local_lock_paths: Vec = match crate::detect::detect_pipelines(repo_path).await + { + Ok(detected) => detected.into_iter().map(|p| p.yaml_path).collect(), + Err(e) => { + log::debug!( + "Local-fixture scan failed during discovery ({e}); falling back to Preview \ + for every definition." + ); + Vec::new() + } + }; + let local_lock_slice = if local_lock_paths.is_empty() { + None + } else { + Some(local_lock_paths.as_slice()) + }; + + let matched = resolve_definitions_via_discovery( + client, + ado_ctx, + auth, + scope, + local_lock_slice, + source_filter, + ) + .await?; + return Ok(Some(matched)); + } + + // Legacy behaviour: explicit --definition-ids, or local-fixture + // lexical matching. Unchanged from before the discovery work. + resolve_definitions(client, ado_ctx, auth, definition_ids, repo_path).await +} + +/// Build the user-facing "no matches" hint, tailored to the flag +/// combination the caller used. Centralised here so `run_set`, +/// `run_list`, and `run_delete` keep the messages in sync. +fn empty_match_hint(all_repos: bool, source: Option<&str>) -> String { + match (all_repos, source) { + (false, Some(src)) => format!( + "No consumers of `{src}` were found in this repository. \ + If the template is consumed by pipelines in other repos in this \ + project, try `--all-repos` to widen the search." + ), + (true, Some(src)) => format!( + "No consumers of `{src}` were found anywhere in this project via \ + Preview-driven discovery. Run `ado-aw list --all-repos --source {src}` \ + to diagnose." + ), + (true, None) => "No ado-aw pipelines found in this project via Preview-driven discovery. \ + Run `ado-aw list --all-repos` to diagnose." + .to_string(), + (false, None) => "No ADO definitions matched any local fixture. Run `ado-aw list` to \ + diagnose, or try `--all-repos` to search the entire project." + .to_string(), + } } pub async fn run_set(opts: SetOptions<'_>) -> Result<()> { @@ -197,11 +323,13 @@ pub async fn run_set(opts: SetOptions<'_>) -> Result<()> { .build() .context("Failed to create HTTP client")?; - let Some(matched) = resolve_definitions( + let Some(matched) = resolve_for_command( &client, &ado_ctx, &auth, opts.definition_ids, + opts.all_repos, + opts.source, &repo_path, ) .await? @@ -210,10 +338,7 @@ pub async fn run_set(opts: SetOptions<'_>) -> Result<()> { }; if matched.is_empty() { - anyhow::bail!( - "No ADO definitions matched any local fixture. Run `ado-aw list` to \ - diagnose." - ); + anyhow::bail!("{}", empty_match_hint(opts.all_repos, opts.source)); } print_matched_summary(&matched); @@ -295,11 +420,7 @@ async fn apply_set_one( /// Resolve the variable value from the CLI inputs: explicit positional /// `value` first, then `--value-stdin` (reads exactly one line), then /// an interactive tty prompt with echo off. -fn resolve_value( - name: &str, - explicit: Option<&str>, - value_stdin: bool, -) -> Result { +fn resolve_value(name: &str, explicit: Option<&str>, value_stdin: bool) -> Result { if let Some(v) = explicit { return Ok(v.to_string()); } @@ -307,7 +428,10 @@ fn resolve_value( use std::io::BufRead; let mut line = String::new(); let stdin = std::io::stdin(); - stdin.lock().read_line(&mut line).context("Failed to read value from stdin")?; + stdin + .lock() + .read_line(&mut line) + .context("Failed to read value from stdin")?; let trimmed = line.trim_end_matches(['\r', '\n']).to_string(); if trimmed.is_empty() { anyhow::bail!("--value-stdin read an empty value"); @@ -329,6 +453,8 @@ pub struct ListOptions<'a> { pub path: Option<&'a Path>, pub json: bool, pub definition_ids: Option<&'a [u64]>, + pub all_repos: bool, + pub source: Option<&'a str>, } pub async fn run_list(opts: ListOptions<'_>) -> Result<()> { @@ -349,11 +475,13 @@ pub async fn run_list(opts: ListOptions<'_>) -> Result<()> { .build() .context("Failed to create HTTP client")?; - let Some(matched) = resolve_definitions( + let Some(matched) = resolve_for_command( &client, &ado_ctx, &auth, opts.definition_ids, + opts.all_repos, + opts.source, &repo_path, ) .await? @@ -362,10 +490,7 @@ pub async fn run_list(opts: ListOptions<'_>) -> Result<()> { }; if matched.is_empty() { - anyhow::bail!( - "No ADO definitions matched any local fixture. Run `ado-aw list` to \ - diagnose." - ); + anyhow::bail!("{}", empty_match_hint(opts.all_repos, opts.source)); } let mut payload = serde_json::json!({}); @@ -414,6 +539,8 @@ pub struct DeleteOptions<'a> { pub path: Option<&'a Path>, pub dry_run: bool, pub definition_ids: Option<&'a [u64]>, + pub all_repos: bool, + pub source: Option<&'a str>, } pub async fn run_delete(opts: DeleteOptions<'_>) -> Result<()> { @@ -436,11 +563,13 @@ pub async fn run_delete(opts: DeleteOptions<'_>) -> Result<()> { .build() .context("Failed to create HTTP client")?; - let Some(matched) = resolve_definitions( + let Some(matched) = resolve_for_command( &client, &ado_ctx, &auth, opts.definition_ids, + opts.all_repos, + opts.source, &repo_path, ) .await? @@ -449,10 +578,7 @@ pub async fn run_delete(opts: DeleteOptions<'_>) -> Result<()> { }; if matched.is_empty() { - anyhow::bail!( - "No ADO definitions matched any local fixture. Run `ado-aw list` to \ - diagnose." - ); + anyhow::bail!("{}", empty_match_hint(opts.all_repos, opts.source)); } print_matched_summary(&matched); @@ -471,7 +597,10 @@ pub async fn run_delete(opts: DeleteOptions<'_>) -> Result<()> { for m in &matched { match apply_delete_one(&client, &ado_ctx, &auth, m.id, opts.name).await { Ok(()) => { - println!(" ✓ '{}' removed from '{}' (id={})", opts.name, m.name, m.id); + println!( + " ✓ '{}' removed from '{}' (id={})", + opts.name, m.name, m.id + ); success += 1; } Err(e) => { @@ -547,6 +676,8 @@ pub async fn run_set_github_token( value_stdin: false, dry_run, definition_ids, + all_repos: false, + source: None, }) .await } @@ -604,6 +735,86 @@ mod tests { assert_eq!(out["variables"]["FOO"]["allowOverride"], false); } + // ============ resolve_for_command precondition ============ + + #[tokio::test] + async fn source_without_all_repos_bails_when_no_git_remote() { + // CurrentRepo scope + empty repo_name = no git remote was + // detected. `--source` users hitting this case must get a + // targeted error mentioning `--all-repos`, not a generic + // "no pipelines found" further down the pipeline. + let ctx = AdoContext { + org_url: "https://dev.azure.com/example".to_string(), + project: "p".to_string(), + repo_name: String::new(), + }; + let auth = AdoAuth::Pat("token".to_string()); + let client = reqwest::Client::builder().build().expect("client builds"); + let tmp = tempfile::tempdir().unwrap(); + + let err = resolve_for_command( + &client, + &ctx, + &auth, + None, + false, + Some("agents/foo.md"), + tmp.path(), + ) + .await + .expect_err("expected bail"); + + let msg = format!("{err}"); + assert!( + msg.contains("--all-repos"), + "error should suggest --all-repos; got: {msg}" + ); + assert!( + msg.contains("no Azure DevOps git remote"), + "error should explain the root cause; got: {msg}" + ); + } + + #[tokio::test] + async fn source_with_all_repos_bails_when_org_url_unresolvable() { + // `--all-repos --source` still needs the current (org, repo) + // to disambiguate same-named source paths via the marker's + // origin fields. If `org_url` doesn't yield an org slug (or + // `repo_name` is empty), the marker-origin filter would + // silently exclude every strict marker — surface a targeted + // error instead. + // + // Empty `org_url` is the realistic failure mode: a hand-built + // AdoContext from `--org "" --project p` or a corrupted ADO + // remote that parsed past `parse_ado_remote` would land here. + let ctx = AdoContext { + org_url: String::new(), // org_name() resolves to None + project: "p".to_string(), + repo_name: "some-repo".to_string(), + }; + let auth = AdoAuth::Pat("token".to_string()); + let client = reqwest::Client::builder().build().expect("client builds"); + let tmp = tempfile::tempdir().unwrap(); + + let err = resolve_for_command( + &client, + &ctx, + &auth, + None, + true, // --all-repos + Some("agents/foo.md"), + tmp.path(), + ) + .await + .expect_err("expected bail"); + + let msg = format!("{err}"); + assert!( + msg.contains("--source needs the current repository's"), + "error should explain the root cause; got: {msg}" + ); + } + #[test] fn set_preserves_other_variables() { let def = serde_json::json!({ diff --git a/src/tools/cache_memory/extension.rs b/src/tools/cache_memory/extension.rs index b58a2ee8..7fef1312 100644 --- a/src/tools/cache_memory/extension.rs +++ b/src/tools/cache_memory/extension.rs @@ -1,4 +1,4 @@ -use crate::compile::extensions::{CompilerExtension, ExtensionPhase}; +use crate::compile::extensions::{CompileContext, CompilerExtension, ExtensionPhase}; use crate::compile::types::CacheMemoryToolConfig; /// Cache memory tool extension. @@ -28,7 +28,7 @@ impl CompilerExtension for CacheMemoryExtension { ExtensionPhase::Tool } - fn prepare_steps(&self) -> Vec { + fn prepare_steps(&self, _ctx: &CompileContext) -> Vec { vec![generate_memory_download()] } diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 1ced57d8..f6fad546 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -3673,6 +3673,103 @@ fn assert_valid_yaml(compiled: &str, fixture_name: &str) { ); } +// ─── ado-aw marker step (always-on extension) ─────────────────────────────── + +/// Assert that compiled YAML carries exactly one `# ado-aw-metadata: {…}` +/// marker line whose JSON includes the expected source path and target. +/// +/// The marker step is injected by the always-on `ado-aw-marker` compiler +/// extension and is the canonical surface project-scope discovery uses +/// to find ado-aw pipelines in expanded YAML (see `src/detect.rs::parse_marker_step`). +fn assert_marker_step_present( + compiled: &str, + expected_source_suffix: &str, + expected_target: &str, + fixture_name: &str, +) { + let marker_lines: Vec<&str> = compiled + .lines() + .filter(|l| l.trim_start().starts_with("# ado-aw-metadata:")) + .collect(); + assert_eq!( + marker_lines.len(), + 1, + "{fixture_name}: expected exactly one '# ado-aw-metadata:' marker in compiled YAML, found {}\nLines: {:#?}", + marker_lines.len(), + marker_lines, + ); + let line = marker_lines[0]; + assert!( + line.contains(&format!("\"target\":\"{expected_target}\"")), + "{fixture_name}: marker line does not declare target={expected_target}: {line}" + ); + assert!( + line.contains("\"schema\":1"), + "{fixture_name}: marker line missing schema=1: {line}" + ); + assert!( + line.contains(&format!("\"source\":\"")) && line.contains(expected_source_suffix), + "{fixture_name}: marker line does not include source suffix {expected_source_suffix}: {line}" + ); + // The runtime echo on the next line should mirror the same data + // (this is the human-facing build-log surface). + assert!( + compiled.contains("ado-aw metadata: source="), + "{fixture_name}: compiled YAML missing runtime echo line for ado-aw marker" + ); + // displayName: "ado-aw" identifies the injected step uniquely. + assert!( + compiled.contains("displayName: \"ado-aw\""), + "{fixture_name}: compiled YAML missing displayName: \"ado-aw\" on injected step" + ); +} + +#[test] +fn test_marker_step_present_in_standalone_target() { + let compiled = compile_fixture("minimal-agent.md"); + assert_marker_step_present(&compiled, "minimal-agent.md", "standalone", "minimal-agent.md"); +} + +#[test] +fn test_marker_step_present_in_1es_target() { + let compiled = compile_fixture("1es-test-agent.md"); + assert_marker_step_present(&compiled, "1es-test-agent.md", "1es", "1es-test-agent.md"); +} + +#[test] +fn test_marker_step_present_in_job_target() { + let compiled = compile_fixture("job-agent.md"); + assert_marker_step_present(&compiled, "job-agent.md", "job", "job-agent.md"); +} + +#[test] +fn test_marker_step_present_in_stage_target() { + let compiled = compile_fixture("stage-agent.md"); + assert_marker_step_present(&compiled, "stage-agent.md", "stage", "stage-agent.md"); +} + +/// Regression: the always-on `ado-aw-marker` extension used to inject +/// the marker step via `setup_steps`, which forced every compiled +/// pipeline to spawn a dedicated Setup job (a whole pool agent + the +/// extra build-log noise) just to emit a single metadata comment. +/// After moving emission to `prepare_steps`, the marker lives inside +/// the always-present Agent job — a minimal fixture without `setup:`, +/// PR filters, or other extensions that need Setup must NOT emit a +/// `- job: Setup` block. +#[test] +fn test_marker_does_not_create_setup_job_for_minimal_pipeline() { + let compiled = compile_fixture("minimal-agent.md"); + assert!( + !compiled.contains("- job: Setup"), + "minimal pipeline must not emit a Setup job just for the ado-aw marker; got:\n{compiled}" + ); + // Still must carry the marker — just inside the Agent job now. + assert!( + compiled.contains("# ado-aw-metadata:"), + "minimal pipeline must still carry the marker line:\n{compiled}" + ); +} + /// Test that the 1ES fixture produces valid YAML with correct structure #[test] fn test_1es_compiled_output_is_valid_yaml() {