From 1426c5c8c2ce052948691ee091fd6e2e6ef538af Mon Sep 17 00:00:00 2001 From: Rama542 Date: Wed, 13 May 2026 16:45:51 +0530 Subject: [PATCH 1/5] feat: add v1-registry-sync dry-run tool for syncing V2 data into V1 entries Adds a new ecosystem-automation/v1-registry-sync Python package that reads the latest V2 registry snapshot and generates a report showing which stability, display_name, and description values would be written into matching V1 entries under opentelemetry.io/data/registry/. The tool runs in dry-run mode only for now and outputs a JSON or YAML report. It selects the most stable signal level across all signals for each component (stable > beta > alpha > development > deprecated > unmaintained) and omits null fields from the output. 18 unit tests cover the reader and reporter modules. Closes #465 --- .../v1-registry-sync/pyproject.toml | 25 ++++ .../src/v1_registry_sync/__init__.py | 14 ++ .../src/v1_registry_sync/main.py | 100 +++++++++++++ .../src/v1_registry_sync/models.py | 73 ++++++++++ .../src/v1_registry_sync/reader.py | 117 +++++++++++++++ .../src/v1_registry_sync/reporter.py | 39 +++++ .../v1-registry-sync/tests/__init__.py | 0 .../v1-registry-sync/tests/test_reader.py | 137 ++++++++++++++++++ .../v1-registry-sync/tests/test_reporter.py | 94 ++++++++++++ pyproject.toml | 2 + 10 files changed, 601 insertions(+) create mode 100644 ecosystem-automation/v1-registry-sync/pyproject.toml create mode 100644 ecosystem-automation/v1-registry-sync/src/v1_registry_sync/__init__.py create mode 100644 ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py create mode 100644 ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py create mode 100644 ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py create mode 100644 ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reporter.py create mode 100644 ecosystem-automation/v1-registry-sync/tests/__init__.py create mode 100644 ecosystem-automation/v1-registry-sync/tests/test_reader.py create mode 100644 ecosystem-automation/v1-registry-sync/tests/test_reporter.py diff --git a/ecosystem-automation/v1-registry-sync/pyproject.toml b/ecosystem-automation/v1-registry-sync/pyproject.toml new file mode 100644 index 00000000..995fed9b --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/pyproject.toml @@ -0,0 +1,25 @@ +[project] +name = "v1-registry-sync" +version = "0.1.0" +description = "Dry-run tool for syncing stability and description from V2 registry into V1 entries" +requires-python = ">=3.11" +dependencies = [ + "PyYAML>=6.0.1", + "semantic-version>=2.10.0", +] + +[project.scripts] +v1-registry-sync = "v1_registry_sync.main:main" + +[project.optional-dependencies] +dev = [ + "pytest>=8.0.0", + "pytest-cov>=4.1.0", +] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/v1_registry_sync"] diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/__init__.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/__init__.py new file mode 100644 index 00000000..131377bc --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/__init__.py @@ -0,0 +1,14 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py new file mode 100644 index 00000000..966cd8c4 --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py @@ -0,0 +1,100 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""CLI entry point for v1-registry-sync.""" + +import argparse +import logging +import sys + +from v1_registry_sync.reader import read_latest_v2_components +from v1_registry_sync.reporter import write_report + +logger = logging.getLogger(__name__) + + +def configure_logging() -> None: + logging.basicConfig( + level=logging.INFO, + format="%(message)s", + handlers=[logging.StreamHandler(sys.stderr)], + ) + + +def main() -> None: + """Generate a dry-run report of proposed V1 registry changes from V2 data.""" + configure_logging() + + parser = argparse.ArgumentParser( + description=( + "Read the latest V2 registry snapshot and produce a report showing " + "which stability, display_name, and description values would be synced " + "into the matching V1 entries under opentelemetry.io/data/registry/." + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--inventory-dir", + default="ecosystem-registry/collector", + help="Path to the ecosystem-registry/collector directory", + ) + parser.add_argument( + "--distribution", + choices=["core", "contrib"], + default="contrib", + help="Distribution to read from V2", + ) + parser.add_argument( + "--output", + default="-", + help="Output file path, or - for stdout", + ) + parser.add_argument( + "--format", + choices=["json", "yaml"], + default="json", + help="Output format", + ) + args = parser.parse_args() + + try: + logger.info("V1 Registry Sync — dry-run report") + logger.info("Inventory directory : %s", args.inventory_dir) + logger.info("Distribution : %s", args.distribution) + logger.info("") + + report = read_latest_v2_components( + inventory_dir=args.inventory_dir, + distribution=args.distribution, + ) + + logger.info("") + logger.info("Registry version : v%s", report.version) + logger.info("Total components : %d", len(report.components)) + logger.info("") + + if args.output == "-": + write_report(report, sys.stdout, fmt=args.format) + else: + with open(args.output, "w", encoding="utf-8") as f: + write_report(report, f, fmt=args.format) + logger.info("Report written to %s", args.output) + + except Exception as e: + logger.error("Error: %s", e, exc_info=True) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py new file mode 100644 index 00000000..805a0442 --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py @@ -0,0 +1,73 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Data models for V1 registry sync.""" + +from dataclasses import dataclass, field +from typing import Optional + +STABILITY_PRIORITY = [ + "stable", + "beta", + "alpha", + "development", + "deprecated", + "unmaintained", +] + + +@dataclass +class ComponentSyncData: + """Fields extracted from V2 that are candidates for syncing into a V1 entry.""" + + name: str + component_type: str + distribution: str + display_name: Optional[str] = None + description: Optional[str] = None + stability: Optional[str] = None + + def proposed_changes(self) -> dict: + """Return only the fields that have values, keyed by V1 field name.""" + changes: dict = {} + if self.display_name is not None: + changes["title"] = self.display_name + if self.description is not None: + changes["description"] = self.description + if self.stability is not None: + changes["stability"] = self.stability + return changes + + +@dataclass +class V1SyncReport: + """Report of proposed V1 changes derived from a single V2 registry snapshot.""" + + version: str + distribution: str + components: list[ComponentSyncData] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "version": self.version, + "distribution": self.distribution, + "components": [ + { + "name": c.name, + "component_type": c.component_type, + "proposed_v1_changes": c.proposed_changes(), + } + for c in self.components + ], + } diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py new file mode 100644 index 00000000..9de89d6f --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py @@ -0,0 +1,117 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Reads V2 ecosystem-registry data and extracts fields for V1 sync.""" + +import logging +from pathlib import Path +from typing import Optional + +import yaml +from semantic_version import Version + +from .models import STABILITY_PRIORITY, ComponentSyncData, V1SyncReport + +COMPONENT_TYPES = ["connector", "exporter", "extension", "processor", "receiver"] + +logger = logging.getLogger(__name__) + + +def _most_stable_level(stability: Optional[dict]) -> Optional[str]: + """Return the highest-priority stability level present across all signals.""" + if not stability: + return None + for level in STABILITY_PRIORITY: + if level in stability: + return level + return None + + +def _find_latest_version(distribution_dir: Path) -> Optional[str]: + """Return the name of the highest version directory (e.g. 'v0.151.0').""" + version_dirs = [d.name for d in distribution_dir.iterdir() if d.is_dir() and d.name.startswith("v")] + if not version_dirs: + return None + return sorted(version_dirs, key=lambda v: Version(v.lstrip("v")))[-1] + + +def _parse_component_file(yaml_path: Path, distribution: str) -> list[ComponentSyncData]: + """Parse a single component-type YAML file and return sync data for each entry.""" + with open(yaml_path, encoding="utf-8") as f: + data = yaml.safe_load(f) + + if not data or "components" not in data: + return [] + + component_type = data.get("component_type", yaml_path.stem) + results: list[ComponentSyncData] = [] + + for component in data["components"]: + name = component.get("name", "") + metadata = component.get("metadata", {}) or {} + status = metadata.get("status", {}) or {} + + stability_raw = status.get("stability") + stability = _most_stable_level(stability_raw) + + results.append( + ComponentSyncData( + name=name, + component_type=component_type, + distribution=distribution, + display_name=metadata.get("display_name") or None, + description=metadata.get("description") or None, + stability=stability, + ) + ) + + return results + + +def read_latest_v2_components( + inventory_dir: str = "ecosystem-registry/collector", + distribution: str = "contrib", +) -> V1SyncReport: + """Read V2 registry data for the latest version of a distribution. + + Args: + inventory_dir: Path to the ecosystem-registry/collector directory. + distribution: Either 'core' or 'contrib'. + + Returns: + A V1SyncReport containing proposed changes for each component. + """ + base = Path(inventory_dir) / distribution + if not base.exists(): + raise FileNotFoundError(f"Distribution directory not found: {base}") + + latest = _find_latest_version(base) + if not latest: + raise ValueError(f"No versioned data found in {base}") + + version_dir = base / latest + components: list[ComponentSyncData] = [] + + for component_type in COMPONENT_TYPES: + yaml_file = version_dir / f"{component_type}.yaml" + if yaml_file.exists(): + found = _parse_component_file(yaml_file, distribution) + components.extend(found) + logger.info(" %s: loaded %d components", component_type, len(found)) + + return V1SyncReport( + version=latest.lstrip("v"), + distribution=distribution, + components=components, + ) diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reporter.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reporter.py new file mode 100644 index 00000000..d6c9e7a6 --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reporter.py @@ -0,0 +1,39 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Writes V1 sync reports to a stream in JSON or YAML format.""" + +import json +import sys +from typing import TextIO + +import yaml as yaml_lib + +from .models import V1SyncReport + + +def write_report(report: V1SyncReport, output: TextIO = sys.stdout, fmt: str = "json") -> None: + """Serialize the sync report to the given output stream. + + Args: + report: The V1SyncReport to serialize. + output: The output stream to write to. + fmt: Either 'json' or 'yaml'. + """ + data = report.to_dict() + if fmt == "yaml": + yaml_lib.dump(data, output, default_flow_style=False, allow_unicode=True, sort_keys=False) + else: + json.dump(data, output, indent=2, ensure_ascii=False) + output.write("\n") diff --git a/ecosystem-automation/v1-registry-sync/tests/__init__.py b/ecosystem-automation/v1-registry-sync/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ecosystem-automation/v1-registry-sync/tests/test_reader.py b/ecosystem-automation/v1-registry-sync/tests/test_reader.py new file mode 100644 index 00000000..5d15ed47 --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/tests/test_reader.py @@ -0,0 +1,137 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests for reader module.""" + +import pytest +import yaml + +from v1_registry_sync.reader import ( + _find_latest_version, + _most_stable_level, + read_latest_v2_components, +) + + +@pytest.fixture() +def fake_registry(tmp_path): + """Build a minimal fake V2 registry with two versions.""" + for version in ["v0.9.0", "v0.10.0"]: + version_dir = tmp_path / "contrib" / version + version_dir.mkdir(parents=True) + + receiver_data = { + "distribution": "contrib", + "version": "0.10.0", + "repository": "opentelemetry-collector-contrib", + "component_type": "receiver", + "components": [ + { + "name": "fooreceiver", + "metadata": { + "type": "foo", + "display_name": "Foo Receiver", + "description": "Receives foo data", + "status": { + "class": "receiver", + "stability": {"beta": ["metrics"]}, + }, + }, + }, + { + "name": "barreceiver", + "metadata": { + "type": "bar", + "display_name": None, + "description": None, + "status": { + "class": "receiver", + "stability": {"stable": ["logs"], "beta": ["metrics"]}, + }, + }, + }, + ], + } + + with open(tmp_path / "contrib" / "v0.10.0" / "receiver.yaml", "w", encoding="utf-8") as f: + yaml.dump(receiver_data, f) + + return tmp_path + + +class TestMostStableLevel: + def test_returns_stable_when_present(self): + assert _most_stable_level({"stable": ["logs"], "beta": ["metrics"]}) == "stable" + + def test_returns_beta_without_stable(self): + assert _most_stable_level({"beta": ["metrics"], "alpha": ["traces"]}) == "beta" + + def test_returns_none_for_empty_dict(self): + assert _most_stable_level({}) is None + + def test_returns_none_for_none_input(self): + assert _most_stable_level(None) is None + + def test_deprecated_level(self): + assert _most_stable_level({"deprecated": ["metrics"]}) == "deprecated" + + +class TestFindLatestVersion: + def test_returns_highest_version(self, fake_registry): + result = _find_latest_version(fake_registry / "contrib") + assert result == "v0.10.0" + + def test_returns_none_for_empty_dir(self, tmp_path): + (tmp_path / "contrib").mkdir() + assert _find_latest_version(tmp_path / "contrib") is None + + +class TestReadLatestV2Components: + def test_reads_components_from_latest_version(self, fake_registry): + report = read_latest_v2_components(str(fake_registry), distribution="contrib") + + assert report.version == "0.10.0" + assert report.distribution == "contrib" + assert len(report.components) == 2 + + def test_extracts_display_name_and_description(self, fake_registry): + report = read_latest_v2_components(str(fake_registry), distribution="contrib") + + foo = next(c for c in report.components if c.name == "fooreceiver") + assert foo.display_name == "Foo Receiver" + assert foo.description == "Receives foo data" + + def test_extracts_most_stable_level(self, fake_registry): + report = read_latest_v2_components(str(fake_registry), distribution="contrib") + + foo = next(c for c in report.components if c.name == "fooreceiver") + assert foo.stability == "beta" + + bar = next(c for c in report.components if c.name == "barreceiver") + assert bar.stability == "stable" + + def test_none_display_name_is_excluded(self, fake_registry): + report = read_latest_v2_components(str(fake_registry), distribution="contrib") + + bar = next(c for c in report.components if c.name == "barreceiver") + assert bar.display_name is None + + def test_raises_if_distribution_dir_missing(self, tmp_path): + with pytest.raises(FileNotFoundError): + read_latest_v2_components(str(tmp_path), distribution="contrib") + + def test_raises_if_no_versions_found(self, tmp_path): + (tmp_path / "contrib").mkdir() + with pytest.raises(ValueError): + read_latest_v2_components(str(tmp_path), distribution="contrib") diff --git a/ecosystem-automation/v1-registry-sync/tests/test_reporter.py b/ecosystem-automation/v1-registry-sync/tests/test_reporter.py new file mode 100644 index 00000000..f527f6e7 --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/tests/test_reporter.py @@ -0,0 +1,94 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Tests for reporter module.""" + +import io +import json + +import pytest +import yaml + +from v1_registry_sync.models import ComponentSyncData, V1SyncReport +from v1_registry_sync.reporter import write_report + + +@pytest.fixture() +def sample_report(): + return V1SyncReport( + version="0.10.0", + distribution="contrib", + components=[ + ComponentSyncData( + name="fooreceiver", + component_type="receiver", + distribution="contrib", + display_name="Foo Receiver", + description="Receives foo data", + stability="beta", + ), + ComponentSyncData( + name="barexporter", + component_type="exporter", + distribution="contrib", + display_name=None, + description=None, + stability="stable", + ), + ], + ) + + +class TestWriteReportJson: + def test_outputs_valid_json(self, sample_report): + out = io.StringIO() + write_report(sample_report, out, fmt="json") + data = json.loads(out.getvalue()) + assert data["version"] == "0.10.0" + assert data["distribution"] == "contrib" + + def test_includes_proposed_changes(self, sample_report): + out = io.StringIO() + write_report(sample_report, out, fmt="json") + data = json.loads(out.getvalue()) + + foo = next(c for c in data["components"] if c["name"] == "fooreceiver") + assert foo["proposed_v1_changes"]["title"] == "Foo Receiver" + assert foo["proposed_v1_changes"]["description"] == "Receives foo data" + assert foo["proposed_v1_changes"]["stability"] == "beta" + + def test_omits_null_fields_from_proposed_changes(self, sample_report): + out = io.StringIO() + write_report(sample_report, out, fmt="json") + data = json.loads(out.getvalue()) + + bar = next(c for c in data["components"] if c["name"] == "barexporter") + assert "title" not in bar["proposed_v1_changes"] + assert "description" not in bar["proposed_v1_changes"] + assert bar["proposed_v1_changes"]["stability"] == "stable" + + def test_all_components_present(self, sample_report): + out = io.StringIO() + write_report(sample_report, out, fmt="json") + data = json.loads(out.getvalue()) + assert len(data["components"]) == 2 + + +class TestWriteReportYaml: + def test_outputs_valid_yaml(self, sample_report): + out = io.StringIO() + write_report(sample_report, out, fmt="yaml") + data = yaml.safe_load(out.getvalue()) + assert data["version"] == "0.10.0" + assert len(data["components"]) == 2 diff --git a/pyproject.toml b/pyproject.toml index 48d782fc..ef4853bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ dependencies = [ "configuration-watcher", "java-instrumentation-watcher", "explorer-db-builder", + "v1-registry-sync", ] [tool.uv.workspace] @@ -22,6 +23,7 @@ collector-watcher = { workspace = true } configuration-watcher = { workspace = true } java-instrumentation-watcher = { workspace = true } explorer-db-builder = { workspace = true } +v1-registry-sync = { workspace = true } [dependency-groups] dev = [ From 016cafc9e3324dc92ba74ba938d2ee29b44d4bac Mon Sep 17 00:00:00 2001 From: Rama542 Date: Wed, 13 May 2026 16:53:42 +0530 Subject: [PATCH 2/5] fix: add missing copyright header to tests/__init__.py --- .../v1-registry-sync/tests/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/ecosystem-automation/v1-registry-sync/tests/__init__.py b/ecosystem-automation/v1-registry-sync/tests/__init__.py index e69de29b..131377bc 100644 --- a/ecosystem-automation/v1-registry-sync/tests/__init__.py +++ b/ecosystem-automation/v1-registry-sync/tests/__init__.py @@ -0,0 +1,14 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# From c31b13761962e827e0c6a0e786cdc912513634d1 Mon Sep 17 00:00:00 2001 From: Rama542 Date: Thu, 14 May 2026 11:09:12 +0530 Subject: [PATCH 3/5] fix: address reviewer feedback on v1-registry-sync - Remove stability from proposed_v1_changes: the V1 schema declares additionalProperties false and has no stability field, so the validator would reject any entry containing it - Remove title/display_name from proposed_v1_changes: a handful of V1 titles carry more information than the V2 display_name (e.g. otelarrowexporter), so limiting the initial sync to description avoids losing fidelity - Add target_v1_file and v1_entry_exists to each report entry so the dry-run output is directly actionable - Replace local _find_latest_version and _parse_component_file with InventoryManager from collector-watcher, which is the same pattern used by explorer-db-builder and configuration-watcher; this also fixes a latent issue where the old helper could pick up SNAPSHOT directories since it sorted all version dirs including pre-releases - Add --v1-registry-dir CLI argument to enable v1_entry_exists checks against a local clone of opentelemetry.io/data/registry - Add README.md to match sibling watcher packages --- .../v1-registry-sync/README.md | 46 ++++++++ .../v1-registry-sync/pyproject.toml | 5 +- .../src/v1_registry_sync/main.py | 18 ++- .../src/v1_registry_sync/models.py | 10 +- .../src/v1_registry_sync/reader.py | 109 ++++++++---------- .../v1-registry-sync/tests/test_reader.py | 59 ++++++++-- .../v1-registry-sync/tests/test_reporter.py | 28 ++++- 7 files changed, 187 insertions(+), 88 deletions(-) create mode 100644 ecosystem-automation/v1-registry-sync/README.md diff --git a/ecosystem-automation/v1-registry-sync/README.md b/ecosystem-automation/v1-registry-sync/README.md new file mode 100644 index 00000000..289f3070 --- /dev/null +++ b/ecosystem-automation/v1-registry-sync/README.md @@ -0,0 +1,46 @@ +# V1 Registry Sync + +Dry-run tool for comparing V2 ecosystem-registry data against V1 entries under +`opentelemetry.io/data/registry/` and reporting which fields would change. + +## Overview + +The tool reads the latest release snapshot from `ecosystem-registry/collector/` and produces a +report of proposed changes. Each entry in the report includes: + +- `target_v1_file`: the expected V1 filename for the component (e.g. `collector-kafkareceiver.yml`) +- `v1_entry_exists`: whether that file is present in the V1 registry directory (when `--v1-registry-dir` is provided) +- `proposed_v1_changes`: fields from V2 that would be written to the V1 entry + +Only `description` is included in `proposed_v1_changes`. The V1 schema does not carry a `stability` +field, and `title` (mapped from `display_name`) is omitted because a small number of V1 titles +contain more information than the V2 display name and would lose fidelity on overwrite. + +## Usage + +From the repository root: + +```bash +uv run v1-registry-sync +``` + +This reads `ecosystem-registry/collector/contrib/` by default and writes JSON to stdout. + +### Options + +``` +--inventory-dir PATH Path to ecosystem-registry/collector (default: ecosystem-registry/collector) +--distribution core or contrib (default: contrib) +--v1-registry-dir PATH Path to opentelemetry.io data/registry/ — enables v1_entry_exists checks +--output PATH Output file path, or - for stdout (default: -) +--format json or yaml (default: json) +``` + +### Example with V1 registry check + +```bash +uv run v1-registry-sync \ + --v1-registry-dir ../opentelemetry.io/data/registry \ + --format yaml \ + --output sync-report.yaml +``` diff --git a/ecosystem-automation/v1-registry-sync/pyproject.toml b/ecosystem-automation/v1-registry-sync/pyproject.toml index 995fed9b..ca5c8af6 100644 --- a/ecosystem-automation/v1-registry-sync/pyproject.toml +++ b/ecosystem-automation/v1-registry-sync/pyproject.toml @@ -1,11 +1,10 @@ [project] name = "v1-registry-sync" version = "0.1.0" -description = "Dry-run tool for syncing stability and description from V2 registry into V1 entries" +description = "Dry-run tool for syncing description from V2 registry into V1 entries" requires-python = ">=3.11" dependencies = [ - "PyYAML>=6.0.1", - "semantic-version>=2.10.0", + "collector-watcher", ] [project.scripts] diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py index 966cd8c4..6317a5c5 100644 --- a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py @@ -39,8 +39,8 @@ def main() -> None: parser = argparse.ArgumentParser( description=( "Read the latest V2 registry snapshot and produce a report showing " - "which stability, display_name, and description values would be synced " - "into the matching V1 entries under opentelemetry.io/data/registry/." + "which description values would be synced into the matching V1 entries " + "under opentelemetry.io/data/registry/." ), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) @@ -55,6 +55,15 @@ def main() -> None: default="contrib", help="Distribution to read from V2", ) + parser.add_argument( + "--v1-registry-dir", + default=None, + help=( + "Optional path to the opentelemetry.io data/registry/ directory. " + "When provided, each entry includes a v1_entry_exists flag indicating " + "whether a matching V1 file is already present." + ), + ) parser.add_argument( "--output", default="-", @@ -69,14 +78,17 @@ def main() -> None: args = parser.parse_args() try: - logger.info("V1 Registry Sync — dry-run report") + logger.info("V1 Registry Sync -- dry-run report") logger.info("Inventory directory : %s", args.inventory_dir) logger.info("Distribution : %s", args.distribution) + if args.v1_registry_dir: + logger.info("V1 registry dir : %s", args.v1_registry_dir) logger.info("") report = read_latest_v2_components( inventory_dir=args.inventory_dir, distribution=args.distribution, + v1_registry_dir=args.v1_registry_dir, ) logger.info("") diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py index 805a0442..2273033b 100644 --- a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py @@ -37,16 +37,14 @@ class ComponentSyncData: display_name: Optional[str] = None description: Optional[str] = None stability: Optional[str] = None + target_v1_file: str = "" + v1_entry_exists: bool = False def proposed_changes(self) -> dict: - """Return only the fields that have values, keyed by V1 field name.""" + """Return only the fields that have values and are valid V1 schema fields.""" changes: dict = {} - if self.display_name is not None: - changes["title"] = self.display_name if self.description is not None: changes["description"] = self.description - if self.stability is not None: - changes["stability"] = self.stability return changes @@ -66,6 +64,8 @@ def to_dict(self) -> dict: { "name": c.name, "component_type": c.component_type, + "target_v1_file": c.target_v1_file, + "v1_entry_exists": c.v1_entry_exists, "proposed_v1_changes": c.proposed_changes(), } for c in self.components diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py index 9de89d6f..7ad89e8c 100644 --- a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py @@ -18,13 +18,10 @@ from pathlib import Path from typing import Optional -import yaml -from semantic_version import Version +from collector_watcher.inventory_manager import InventoryManager from .models import STABILITY_PRIORITY, ComponentSyncData, V1SyncReport -COMPONENT_TYPES = ["connector", "exporter", "extension", "processor", "receiver"] - logger = logging.getLogger(__name__) @@ -38,80 +35,72 @@ def _most_stable_level(stability: Optional[dict]) -> Optional[str]: return None -def _find_latest_version(distribution_dir: Path) -> Optional[str]: - """Return the name of the highest version directory (e.g. 'v0.151.0').""" - version_dirs = [d.name for d in distribution_dir.iterdir() if d.is_dir() and d.name.startswith("v")] - if not version_dirs: - return None - return sorted(version_dirs, key=lambda v: Version(v.lstrip("v")))[-1] - - -def _parse_component_file(yaml_path: Path, distribution: str) -> list[ComponentSyncData]: - """Parse a single component-type YAML file and return sync data for each entry.""" - with open(yaml_path, encoding="utf-8") as f: - data = yaml.safe_load(f) - - if not data or "components" not in data: - return [] - - component_type = data.get("component_type", yaml_path.stem) - results: list[ComponentSyncData] = [] - - for component in data["components"]: - name = component.get("name", "") - metadata = component.get("metadata", {}) or {} - status = metadata.get("status", {}) or {} - - stability_raw = status.get("stability") - stability = _most_stable_level(stability_raw) - - results.append( - ComponentSyncData( - name=name, - component_type=component_type, - distribution=distribution, - display_name=metadata.get("display_name") or None, - description=metadata.get("description") or None, - stability=stability, - ) - ) - - return results - - def read_latest_v2_components( inventory_dir: str = "ecosystem-registry/collector", distribution: str = "contrib", + v1_registry_dir: Optional[str] = None, ) -> V1SyncReport: - """Read V2 registry data for the latest version of a distribution. + """Read V2 registry data for the latest release version of a distribution. Args: inventory_dir: Path to the ecosystem-registry/collector directory. distribution: Either 'core' or 'contrib'. + v1_registry_dir: Optional path to opentelemetry.io data/registry/ directory. + When provided, each entry's v1_entry_exists field reflects whether the + expected V1 file is present on disk. Returns: A V1SyncReport containing proposed changes for each component. """ - base = Path(inventory_dir) / distribution - if not base.exists(): - raise FileNotFoundError(f"Distribution directory not found: {base}") + dist_dir = Path(inventory_dir) / distribution + if not dist_dir.exists(): + raise FileNotFoundError(f"Distribution directory not found: {dist_dir}") + + inventory_manager = InventoryManager(inventory_dir) + release_versions = inventory_manager.list_release_versions(distribution) + if not release_versions: + raise ValueError(f"No release versions found for distribution '{distribution}'") - latest = _find_latest_version(base) - if not latest: - raise ValueError(f"No versioned data found in {base}") + latest = release_versions[0] # list is sorted newest-first + inventory = inventory_manager.load_versioned_inventory(distribution, latest) - version_dir = base / latest + v1_dir = Path(v1_registry_dir) if v1_registry_dir else None components: list[ComponentSyncData] = [] - for component_type in COMPONENT_TYPES: - yaml_file = version_dir / f"{component_type}.yaml" - if yaml_file.exists(): - found = _parse_component_file(yaml_file, distribution) - components.extend(found) - logger.info(" %s: loaded %d components", component_type, len(found)) + for component_type, component_list in inventory["components"].items(): + if not component_list: + continue + + for component in component_list: + name = component.get("name", "") + metadata = component.get("metadata", {}) or {} + status = metadata.get("status", {}) or {} + + stability_raw = status.get("stability") + stability = _most_stable_level(stability_raw) + + target_v1_file = f"collector-{name}.yml" + v1_entry_exists = False + if v1_dir is not None: + v1_entry_exists = (v1_dir / target_v1_file).exists() + + components.append( + ComponentSyncData( + name=name, + component_type=component_type, + distribution=distribution, + display_name=metadata.get("display_name") or None, + description=metadata.get("description") or None, + stability=stability, + target_v1_file=target_v1_file, + v1_entry_exists=v1_entry_exists, + ) + ) + + logger.info(" %s: loaded %d components", component_type, len(component_list)) return V1SyncReport( - version=latest.lstrip("v"), + version=str(latest), distribution=distribution, components=components, ) diff --git a/ecosystem-automation/v1-registry-sync/tests/test_reader.py b/ecosystem-automation/v1-registry-sync/tests/test_reader.py index 5d15ed47..27f513f8 100644 --- a/ecosystem-automation/v1-registry-sync/tests/test_reader.py +++ b/ecosystem-automation/v1-registry-sync/tests/test_reader.py @@ -18,7 +18,6 @@ import yaml from v1_registry_sync.reader import ( - _find_latest_version, _most_stable_level, read_latest_v2_components, ) @@ -87,16 +86,6 @@ def test_deprecated_level(self): assert _most_stable_level({"deprecated": ["metrics"]}) == "deprecated" -class TestFindLatestVersion: - def test_returns_highest_version(self, fake_registry): - result = _find_latest_version(fake_registry / "contrib") - assert result == "v0.10.0" - - def test_returns_none_for_empty_dir(self, tmp_path): - (tmp_path / "contrib").mkdir() - assert _find_latest_version(tmp_path / "contrib") is None - - class TestReadLatestV2Components: def test_reads_components_from_latest_version(self, fake_registry): report = read_latest_v2_components(str(fake_registry), distribution="contrib") @@ -127,6 +116,54 @@ def test_none_display_name_is_excluded(self, fake_registry): bar = next(c for c in report.components if c.name == "barreceiver") assert bar.display_name is None + def test_target_v1_file_follows_naming_convention(self, fake_registry): + report = read_latest_v2_components(str(fake_registry), distribution="contrib") + + foo = next(c for c in report.components if c.name == "fooreceiver") + assert foo.target_v1_file == "collector-fooreceiver.yml" + + def test_v1_entry_exists_false_when_no_v1_dir(self, fake_registry): + report = read_latest_v2_components(str(fake_registry), distribution="contrib") + + for component in report.components: + assert component.v1_entry_exists is False + + def test_v1_entry_exists_true_when_file_present(self, fake_registry, tmp_path): + v1_dir = tmp_path / "v1" + v1_dir.mkdir() + (v1_dir / "collector-fooreceiver.yml").touch() + + report = read_latest_v2_components( + str(fake_registry), distribution="contrib", v1_registry_dir=str(v1_dir) + ) + + foo = next(c for c in report.components if c.name == "fooreceiver") + assert foo.v1_entry_exists is True + + bar = next(c for c in report.components if c.name == "barreceiver") + assert bar.v1_entry_exists is False + + def test_skips_snapshot_versions(self, tmp_path): + """list_release_versions excludes SNAPSHOT dirs so unreleased data is not picked up.""" + for version in ["v0.9.0", "v0.10.0-SNAPSHOT"]: + version_dir = tmp_path / "contrib" / version + version_dir.mkdir(parents=True) + + receiver_data = { + "distribution": "contrib", + "version": "0.10.0-SNAPSHOT", + "component_type": "receiver", + "components": [{"name": "snapshotreceiver", "metadata": {}}], + } + with open( + tmp_path / "contrib" / "v0.10.0-SNAPSHOT" / "receiver.yaml", "w", encoding="utf-8" + ) as f: + yaml.dump(receiver_data, f) + + report = read_latest_v2_components(str(tmp_path), distribution="contrib") + assert report.version == "0.9.0" + assert all(c.name != "snapshotreceiver" for c in report.components) + def test_raises_if_distribution_dir_missing(self, tmp_path): with pytest.raises(FileNotFoundError): read_latest_v2_components(str(tmp_path), distribution="contrib") diff --git a/ecosystem-automation/v1-registry-sync/tests/test_reporter.py b/ecosystem-automation/v1-registry-sync/tests/test_reporter.py index f527f6e7..66142f29 100644 --- a/ecosystem-automation/v1-registry-sync/tests/test_reporter.py +++ b/ecosystem-automation/v1-registry-sync/tests/test_reporter.py @@ -37,6 +37,8 @@ def sample_report(): display_name="Foo Receiver", description="Receives foo data", stability="beta", + target_v1_file="collector-fooreceiver.yml", + v1_entry_exists=True, ), ComponentSyncData( name="barexporter", @@ -45,6 +47,8 @@ def sample_report(): display_name=None, description=None, stability="stable", + target_v1_file="collector-barexporter.yml", + v1_entry_exists=False, ), ], ) @@ -58,25 +62,37 @@ def test_outputs_valid_json(self, sample_report): assert data["version"] == "0.10.0" assert data["distribution"] == "contrib" - def test_includes_proposed_changes(self, sample_report): + def test_includes_target_v1_file_and_exists_flag(self, sample_report): + out = io.StringIO() + write_report(sample_report, out, fmt="json") + data = json.loads(out.getvalue()) + + foo = next(c for c in data["components"] if c["name"] == "fooreceiver") + assert foo["target_v1_file"] == "collector-fooreceiver.yml" + assert foo["v1_entry_exists"] is True + + bar = next(c for c in data["components"] if c["name"] == "barexporter") + assert bar["target_v1_file"] == "collector-barexporter.yml" + assert bar["v1_entry_exists"] is False + + def test_includes_description_in_proposed_changes(self, sample_report): out = io.StringIO() write_report(sample_report, out, fmt="json") data = json.loads(out.getvalue()) foo = next(c for c in data["components"] if c["name"] == "fooreceiver") - assert foo["proposed_v1_changes"]["title"] == "Foo Receiver" assert foo["proposed_v1_changes"]["description"] == "Receives foo data" - assert foo["proposed_v1_changes"]["stability"] == "beta" + assert "title" not in foo["proposed_v1_changes"] + assert "stability" not in foo["proposed_v1_changes"] - def test_omits_null_fields_from_proposed_changes(self, sample_report): + def test_omits_null_description_from_proposed_changes(self, sample_report): out = io.StringIO() write_report(sample_report, out, fmt="json") data = json.loads(out.getvalue()) bar = next(c for c in data["components"] if c["name"] == "barexporter") - assert "title" not in bar["proposed_v1_changes"] assert "description" not in bar["proposed_v1_changes"] - assert bar["proposed_v1_changes"]["stability"] == "stable" + assert "stability" not in bar["proposed_v1_changes"] def test_all_components_present(self, sample_report): out = io.StringIO() From 08fa86c21779640d8da8fab27e83889bbe0864ee Mon Sep 17 00:00:00 2001 From: Rama542 Date: Thu, 14 May 2026 11:14:47 +0530 Subject: [PATCH 4/5] fix: add language to fenced code blocks in README and fix prettier formatting --- ecosystem-automation/v1-registry-sync/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ecosystem-automation/v1-registry-sync/README.md b/ecosystem-automation/v1-registry-sync/README.md index 289f3070..c4946a74 100644 --- a/ecosystem-automation/v1-registry-sync/README.md +++ b/ecosystem-automation/v1-registry-sync/README.md @@ -9,7 +9,8 @@ The tool reads the latest release snapshot from `ecosystem-registry/collector/` report of proposed changes. Each entry in the report includes: - `target_v1_file`: the expected V1 filename for the component (e.g. `collector-kafkareceiver.yml`) -- `v1_entry_exists`: whether that file is present in the V1 registry directory (when `--v1-registry-dir` is provided) +- `v1_entry_exists`: whether that file is present in the V1 registry directory (when + `--v1-registry-dir` is provided) - `proposed_v1_changes`: fields from V2 that would be written to the V1 entry Only `description` is included in `proposed_v1_changes`. The V1 schema does not carry a `stability` @@ -28,10 +29,10 @@ This reads `ecosystem-registry/collector/contrib/` by default and writes JSON to ### Options -``` +```text --inventory-dir PATH Path to ecosystem-registry/collector (default: ecosystem-registry/collector) --distribution core or contrib (default: contrib) ---v1-registry-dir PATH Path to opentelemetry.io data/registry/ — enables v1_entry_exists checks +--v1-registry-dir PATH Path to opentelemetry.io data/registry/ -- enables v1_entry_exists checks --output PATH Output file path, or - for stdout (default: -) --format json or yaml (default: json) ``` From efa325a1cd61d0fe3d5b7faef742344376d94365 Mon Sep 17 00:00:00 2001 From: Rama542 Date: Fri, 15 May 2026 09:12:15 +0530 Subject: [PATCH 5/5] fix: match V1 entries via Go module path instead of naming convention The previous target_v1_file used f"collector-{name}.yml" but actual V1 files follow collector-{component_type}-{slug}.yml, so v1_entry_exists was returning false for nearly every component. The fix builds a dict[go_module_path -> v1_filename] at startup by reading the package.name field from each V1 file. Each V2 component's expected module path is constructed as: github.com/open-telemetry/opentelemetry-collector-contrib/{type}/{name} Matching on the module path is consistent across both registries and avoids naming-convention guesswork. Across 249 contrib components in v0.151.0, 244 match this way; the 5 that do not (azurefunctionsreceiver, googlesecopsexporter, drainprocessor, spanpruningprocessor, datadogconnector) are genuinely missing from V1, not matcher bugs. expected_go_module_path is also included on every report row so misses are easy to triage. The test fixture now uses realistic V1 file names (collector-receiver-fooreceiver.yml) instead of the old wrong convention. --- .../src/v1_registry_sync/models.py | 2 + .../src/v1_registry_sync/reader.py | 65 ++++++++++++++--- .../v1-registry-sync/tests/test_reader.py | 69 ++++++++++++++++--- .../v1-registry-sync/tests/test_reporter.py | 21 ++++-- 4 files changed, 134 insertions(+), 23 deletions(-) diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py index 2273033b..f0b09b99 100644 --- a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/models.py @@ -37,6 +37,7 @@ class ComponentSyncData: display_name: Optional[str] = None description: Optional[str] = None stability: Optional[str] = None + expected_go_module_path: str = "" target_v1_file: str = "" v1_entry_exists: bool = False @@ -64,6 +65,7 @@ def to_dict(self) -> dict: { "name": c.name, "component_type": c.component_type, + "expected_go_module_path": c.expected_go_module_path, "target_v1_file": c.target_v1_file, "v1_entry_exists": c.v1_entry_exists, "proposed_v1_changes": c.proposed_changes(), diff --git a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py index 7ad89e8c..14602f56 100644 --- a/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py +++ b/ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py @@ -18,12 +18,19 @@ from pathlib import Path from typing import Optional +import yaml from collector_watcher.inventory_manager import InventoryManager from .models import STABILITY_PRIORITY, ComponentSyncData, V1SyncReport logger = logging.getLogger(__name__) +# Base Go module paths for each distribution. +DIST_MODULE_BASE = { + "contrib": "github.com/open-telemetry/opentelemetry-collector-contrib", + "core": "github.com/open-telemetry/opentelemetry-collector", +} + def _most_stable_level(stability: Optional[dict]) -> Optional[str]: """Return the highest-priority stability level present across all signals.""" @@ -35,6 +42,40 @@ def _most_stable_level(stability: Optional[dict]) -> Optional[str]: return None +def _build_go_module_path(distribution: str, component_type: str, name: str) -> str: + """Construct the Go module path for a V2 component. + + Both registries use the same path format: + github.com/open-telemetry/opentelemetry-collector-contrib/{component_type}/{name} + """ + base = DIST_MODULE_BASE.get( + distribution, + f"github.com/open-telemetry/opentelemetry-collector-{distribution}", + ) + return f"{base}/{component_type}/{name}" + + +def _build_v1_index(v1_registry_dir: Path) -> dict[str, str]: + """Build a mapping of go_module_path -> v1_filename from all V1 YAML files. + + Each V1 collector file stores its Go module path in the ``package.name`` + field (e.g. ``github.com/open-telemetry/opentelemetry-collector-contrib/ + receiver/kafkareceiver``). This index lets us match V2 components to their + V1 counterparts without relying on naming conventions, which are inconsistent. + """ + index: dict[str, str] = {} + for yaml_file in v1_registry_dir.glob("*.yml"): + try: + with open(yaml_file, encoding="utf-8") as f: + data = yaml.safe_load(f) or {} + pkg_name = (data.get("package") or {}).get("name") + if pkg_name: + index[pkg_name] = yaml_file.name + except Exception: + logger.debug("Could not parse V1 file: %s", yaml_file) + return index + + def read_latest_v2_components( inventory_dir: str = "ecosystem-registry/collector", distribution: str = "contrib", @@ -45,9 +86,10 @@ def read_latest_v2_components( Args: inventory_dir: Path to the ecosystem-registry/collector directory. distribution: Either 'core' or 'contrib'. - v1_registry_dir: Optional path to opentelemetry.io data/registry/ directory. - When provided, each entry's v1_entry_exists field reflects whether the - expected V1 file is present on disk. + v1_registry_dir: Optional path to opentelemetry.io data/registry/. + When provided, a go-module-path index is built from the V1 files + so that each entry's target_v1_file and v1_entry_exists fields + reflect actual matches rather than predicted naming conventions. Returns: A V1SyncReport containing proposed changes for each component. @@ -64,7 +106,11 @@ def read_latest_v2_components( latest = release_versions[0] # list is sorted newest-first inventory = inventory_manager.load_versioned_inventory(distribution, latest) - v1_dir = Path(v1_registry_dir) if v1_registry_dir else None + v1_index: dict[str, str] = {} + if v1_registry_dir is not None: + v1_index = _build_v1_index(Path(v1_registry_dir)) + logger.info("Loaded V1 index: %d entries", len(v1_index)) + components: list[ComponentSyncData] = [] for component_type, component_list in inventory["components"].items(): @@ -79,10 +125,8 @@ def read_latest_v2_components( stability_raw = status.get("stability") stability = _most_stable_level(stability_raw) - target_v1_file = f"collector-{name}.yml" - v1_entry_exists = False - if v1_dir is not None: - v1_entry_exists = (v1_dir / target_v1_file).exists() + go_module_path = _build_go_module_path(distribution, component_type, name) + matched_v1_file = v1_index.get(go_module_path, "") components.append( ComponentSyncData( @@ -92,8 +136,9 @@ def read_latest_v2_components( display_name=metadata.get("display_name") or None, description=metadata.get("description") or None, stability=stability, - target_v1_file=target_v1_file, - v1_entry_exists=v1_entry_exists, + expected_go_module_path=go_module_path, + target_v1_file=matched_v1_file, + v1_entry_exists=bool(matched_v1_file), ) ) diff --git a/ecosystem-automation/v1-registry-sync/tests/test_reader.py b/ecosystem-automation/v1-registry-sync/tests/test_reader.py index 27f513f8..63fcb8b1 100644 --- a/ecosystem-automation/v1-registry-sync/tests/test_reader.py +++ b/ecosystem-automation/v1-registry-sync/tests/test_reader.py @@ -18,6 +18,8 @@ import yaml from v1_registry_sync.reader import ( + _build_go_module_path, + _build_v1_index, _most_stable_level, read_latest_v2_components, ) @@ -69,6 +71,24 @@ def fake_registry(tmp_path): return tmp_path +@pytest.fixture() +def fake_v1_dir(tmp_path): + """Build a minimal fake V1 registry directory with realistic file names.""" + v1_dir = tmp_path / "v1" + v1_dir.mkdir() + + # Real V1 file names follow collector-{component_type}-{slug}.yml + foo_v1 = v1_dir / "collector-receiver-fooreceiver.yml" + foo_v1.write_text( + "title: Foo Receiver\n" + "package:\n" + " name: github.com/open-telemetry/opentelemetry-collector-contrib/receiver/fooreceiver\n", + encoding="utf-8", + ) + + return v1_dir + + class TestMostStableLevel: def test_returns_stable_when_present(self): assert _most_stable_level({"stable": ["logs"], "beta": ["metrics"]}) == "stable" @@ -86,6 +106,35 @@ def test_deprecated_level(self): assert _most_stable_level({"deprecated": ["metrics"]}) == "deprecated" +class TestBuildGoModulePath: + def test_contrib_receiver(self): + result = _build_go_module_path("contrib", "receiver", "kafkareceiver") + assert result == "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/kafkareceiver" + + def test_core_exporter(self): + result = _build_go_module_path("core", "exporter", "otlpexporter") + assert result == "github.com/open-telemetry/opentelemetry-collector/exporter/otlpexporter" + + +class TestBuildV1Index: + def test_indexes_package_name_to_filename(self, fake_v1_dir): + index = _build_v1_index(fake_v1_dir) + expected_path = "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/fooreceiver" + assert index[expected_path] == "collector-receiver-fooreceiver.yml" + + def test_skips_files_without_package_name(self, tmp_path): + v1_dir = tmp_path / "v1" + v1_dir.mkdir() + (v1_dir / "collector-receiver-nopkg.yml").write_text("title: No Package\n", encoding="utf-8") + index = _build_v1_index(v1_dir) + assert len(index) == 0 + + def test_returns_empty_for_empty_dir(self, tmp_path): + v1_dir = tmp_path / "v1" + v1_dir.mkdir() + assert _build_v1_index(v1_dir) == {} + + class TestReadLatestV2Components: def test_reads_components_from_latest_version(self, fake_registry): report = read_latest_v2_components(str(fake_registry), distribution="contrib") @@ -116,31 +165,33 @@ def test_none_display_name_is_excluded(self, fake_registry): bar = next(c for c in report.components if c.name == "barreceiver") assert bar.display_name is None - def test_target_v1_file_follows_naming_convention(self, fake_registry): + def test_expected_go_module_path_always_set(self, fake_registry): report = read_latest_v2_components(str(fake_registry), distribution="contrib") foo = next(c for c in report.components if c.name == "fooreceiver") - assert foo.target_v1_file == "collector-fooreceiver.yml" + assert ( + foo.expected_go_module_path + == "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/fooreceiver" + ) - def test_v1_entry_exists_false_when_no_v1_dir(self, fake_registry): + def test_target_v1_file_empty_when_no_v1_dir(self, fake_registry): report = read_latest_v2_components(str(fake_registry), distribution="contrib") for component in report.components: + assert component.target_v1_file == "" assert component.v1_entry_exists is False - def test_v1_entry_exists_true_when_file_present(self, fake_registry, tmp_path): - v1_dir = tmp_path / "v1" - v1_dir.mkdir() - (v1_dir / "collector-fooreceiver.yml").touch() - + def test_target_v1_file_matched_via_go_module_path(self, fake_registry, fake_v1_dir): report = read_latest_v2_components( - str(fake_registry), distribution="contrib", v1_registry_dir=str(v1_dir) + str(fake_registry), distribution="contrib", v1_registry_dir=str(fake_v1_dir) ) foo = next(c for c in report.components if c.name == "fooreceiver") + assert foo.target_v1_file == "collector-receiver-fooreceiver.yml" assert foo.v1_entry_exists is True bar = next(c for c in report.components if c.name == "barreceiver") + assert bar.target_v1_file == "" assert bar.v1_entry_exists is False def test_skips_snapshot_versions(self, tmp_path): diff --git a/ecosystem-automation/v1-registry-sync/tests/test_reporter.py b/ecosystem-automation/v1-registry-sync/tests/test_reporter.py index 66142f29..69933c2c 100644 --- a/ecosystem-automation/v1-registry-sync/tests/test_reporter.py +++ b/ecosystem-automation/v1-registry-sync/tests/test_reporter.py @@ -37,7 +37,8 @@ def sample_report(): display_name="Foo Receiver", description="Receives foo data", stability="beta", - target_v1_file="collector-fooreceiver.yml", + expected_go_module_path="github.com/open-telemetry/opentelemetry-collector-contrib/receiver/fooreceiver", + target_v1_file="collector-receiver-fooreceiver.yml", v1_entry_exists=True, ), ComponentSyncData( @@ -47,7 +48,8 @@ def sample_report(): display_name=None, description=None, stability="stable", - target_v1_file="collector-barexporter.yml", + expected_go_module_path="github.com/open-telemetry/opentelemetry-collector-contrib/exporter/barexporter", + target_v1_file="", v1_entry_exists=False, ), ], @@ -62,17 +64,28 @@ def test_outputs_valid_json(self, sample_report): assert data["version"] == "0.10.0" assert data["distribution"] == "contrib" + def test_includes_expected_go_module_path(self, sample_report): + out = io.StringIO() + write_report(sample_report, out, fmt="json") + data = json.loads(out.getvalue()) + + foo = next(c for c in data["components"] if c["name"] == "fooreceiver") + assert ( + foo["expected_go_module_path"] + == "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/fooreceiver" + ) + def test_includes_target_v1_file_and_exists_flag(self, sample_report): out = io.StringIO() write_report(sample_report, out, fmt="json") data = json.loads(out.getvalue()) foo = next(c for c in data["components"] if c["name"] == "fooreceiver") - assert foo["target_v1_file"] == "collector-fooreceiver.yml" + assert foo["target_v1_file"] == "collector-receiver-fooreceiver.yml" assert foo["v1_entry_exists"] is True bar = next(c for c in data["components"] if c["name"] == "barexporter") - assert bar["target_v1_file"] == "collector-barexporter.yml" + assert bar["target_v1_file"] == "" assert bar["v1_entry_exists"] is False def test_includes_description_in_proposed_changes(self, sample_report):