Skip to content
Merged
25 changes: 25 additions & 0 deletions ecosystem-automation/v1-registry-sync/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[project]
name = "v1-registry-sync"
version = "0.1.0"
description = "Dry-run tool for syncing stability and description from V2 registry into V1 entries"
requires-python = ">=3.11"
dependencies = [
"PyYAML>=6.0.1",
"semantic-version>=2.10.0",
]

[project.scripts]
v1-registry-sync = "v1_registry_sync.main:main"

[project.optional-dependencies]
dev = [
"pytest>=8.0.0",
"pytest-cov>=4.1.0",
]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/v1_registry_sync"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
100 changes: 100 additions & 0 deletions ecosystem-automation/v1-registry-sync/src/v1_registry_sync/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""CLI entry point for v1-registry-sync."""

import argparse
import logging
import sys

from v1_registry_sync.reader import read_latest_v2_components
from v1_registry_sync.reporter import write_report

logger = logging.getLogger(__name__)


def configure_logging() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(message)s",
handlers=[logging.StreamHandler(sys.stderr)],
)


def main() -> None:
"""Generate a dry-run report of proposed V1 registry changes from V2 data."""
configure_logging()

parser = argparse.ArgumentParser(
description=(
"Read the latest V2 registry snapshot and produce a report showing "
"which stability, display_name, and description values would be synced "
"into the matching V1 entries under opentelemetry.io/data/registry/."
),
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--inventory-dir",
default="ecosystem-registry/collector",
help="Path to the ecosystem-registry/collector directory",
)
parser.add_argument(
"--distribution",
choices=["core", "contrib"],
default="contrib",
help="Distribution to read from V2",
)
parser.add_argument(
"--output",
default="-",
help="Output file path, or - for stdout",
)
parser.add_argument(
"--format",
choices=["json", "yaml"],
default="json",
help="Output format",
)
args = parser.parse_args()

try:
logger.info("V1 Registry Sync — dry-run report")
logger.info("Inventory directory : %s", args.inventory_dir)
logger.info("Distribution : %s", args.distribution)
logger.info("")

report = read_latest_v2_components(
inventory_dir=args.inventory_dir,
distribution=args.distribution,
)

logger.info("")
logger.info("Registry version : v%s", report.version)
logger.info("Total components : %d", len(report.components))
logger.info("")

if args.output == "-":
write_report(report, sys.stdout, fmt=args.format)
else:
with open(args.output, "w", encoding="utf-8") as f:
write_report(report, f, fmt=args.format)
logger.info("Report written to %s", args.output)

except Exception as e:
logger.error("Error: %s", e, exc_info=True)
sys.exit(1)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Data models for V1 registry sync."""

from dataclasses import dataclass, field
from typing import Optional

STABILITY_PRIORITY = [
"stable",
"beta",
"alpha",
"development",
"deprecated",
"unmaintained",
]


@dataclass
class ComponentSyncData:
"""Fields extracted from V2 that are candidates for syncing into a V1 entry."""

name: str
component_type: str
distribution: str
display_name: Optional[str] = None
description: Optional[str] = None
stability: Optional[str] = None

def proposed_changes(self) -> dict:
"""Return only the fields that have values, keyed by V1 field name."""
changes: dict = {}
if self.display_name is not None:
changes["title"] = self.display_name
if self.description is not None:
changes["description"] = self.description
if self.stability is not None:
changes["stability"] = self.stability
return changes


@dataclass
class V1SyncReport:
"""Report of proposed V1 changes derived from a single V2 registry snapshot."""

version: str
distribution: str
components: list[ComponentSyncData] = field(default_factory=list)

def to_dict(self) -> dict:
return {
"version": self.version,
"distribution": self.distribution,
"components": [
{
"name": c.name,
"component_type": c.component_type,
"proposed_v1_changes": c.proposed_changes(),
}
for c in self.components
],
}
117 changes: 117 additions & 0 deletions ecosystem-automation/v1-registry-sync/src/v1_registry_sync/reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Reads V2 ecosystem-registry data and extracts fields for V1 sync."""

import logging
from pathlib import Path
from typing import Optional

import yaml
from semantic_version import Version

from .models import STABILITY_PRIORITY, ComponentSyncData, V1SyncReport

COMPONENT_TYPES = ["connector", "exporter", "extension", "processor", "receiver"]

logger = logging.getLogger(__name__)


def _most_stable_level(stability: Optional[dict]) -> Optional[str]:
"""Return the highest-priority stability level present across all signals."""
if not stability:
return None
for level in STABILITY_PRIORITY:
if level in stability:
return level
return None


def _find_latest_version(distribution_dir: Path) -> Optional[str]:
"""Return the name of the highest version directory (e.g. 'v0.151.0')."""
version_dirs = [d.name for d in distribution_dir.iterdir() if d.is_dir() and d.name.startswith("v")]
if not version_dirs:
return None
return sorted(version_dirs, key=lambda v: Version(v.lstrip("v")))[-1]


def _parse_component_file(yaml_path: Path, distribution: str) -> list[ComponentSyncData]:
"""Parse a single component-type YAML file and return sync data for each entry."""
with open(yaml_path, encoding="utf-8") as f:
data = yaml.safe_load(f)

if not data or "components" not in data:
return []

component_type = data.get("component_type", yaml_path.stem)
results: list[ComponentSyncData] = []

for component in data["components"]:
name = component.get("name", "")
metadata = component.get("metadata", {}) or {}
status = metadata.get("status", {}) or {}

stability_raw = status.get("stability")
stability = _most_stable_level(stability_raw)

results.append(
ComponentSyncData(
name=name,
component_type=component_type,
distribution=distribution,
display_name=metadata.get("display_name") or None,
description=metadata.get("description") or None,
stability=stability,
)
)

return results


def read_latest_v2_components(
inventory_dir: str = "ecosystem-registry/collector",
distribution: str = "contrib",
) -> V1SyncReport:
"""Read V2 registry data for the latest version of a distribution.

Args:
inventory_dir: Path to the ecosystem-registry/collector directory.
distribution: Either 'core' or 'contrib'.

Returns:
A V1SyncReport containing proposed changes for each component.
"""
base = Path(inventory_dir) / distribution
if not base.exists():
raise FileNotFoundError(f"Distribution directory not found: {base}")

latest = _find_latest_version(base)
if not latest:
raise ValueError(f"No versioned data found in {base}")

version_dir = base / latest
components: list[ComponentSyncData] = []

for component_type in COMPONENT_TYPES:
yaml_file = version_dir / f"{component_type}.yaml"
if yaml_file.exists():
found = _parse_component_file(yaml_file, distribution)
components.extend(found)
logger.info(" %s: loaded %d components", component_type, len(found))

return V1SyncReport(
version=latest.lstrip("v"),
distribution=distribution,
components=components,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Writes V1 sync reports to a stream in JSON or YAML format."""

import json
import sys
from typing import TextIO

import yaml as yaml_lib

from .models import V1SyncReport


def write_report(report: V1SyncReport, output: TextIO = sys.stdout, fmt: str = "json") -> None:
"""Serialize the sync report to the given output stream.

Args:
report: The V1SyncReport to serialize.
output: The output stream to write to.
fmt: Either 'json' or 'yaml'.
"""
data = report.to_dict()
if fmt == "yaml":
yaml_lib.dump(data, output, default_flow_style=False, allow_unicode=True, sort_keys=False)
else:
json.dump(data, output, indent=2, ensure_ascii=False)
output.write("\n")
14 changes: 14 additions & 0 deletions ecosystem-automation/v1-registry-sync/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Copyright The OpenTelemetry Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
Loading