Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions .chloggen/awsecscontainermetrics-instance-level.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
component: receiver/awsecscontainermetrics

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add instance_level_metrics config option to collect metrics from all tasks on an ECS Managed Instance

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [48396]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
When enabled, the receiver queries the /tasks and /tasks/stats endpoints available to
Managed Daemon Services, collecting container metrics for all tasks on the instance
instead of only the receiver's own task.

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
14 changes: 14 additions & 0 deletions receiver/awsecscontainermetricsreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@ This receiver collects task metadata and container stats at a fixed interval and

default: `20s`

#### instance_level_metrics:

When enabled, the receiver collects metrics for all tasks running on the instance by querying the `/tasks` and `/tasks/stats` endpoints. This requires the receiver to run as a [Managed Daemon Service](https://docs.aws.amazon.com/AmazonECS/latest/developerguide/ecs-managed-instances-daemon.html) on ECS Managed Instances, which provides access to instance-wide task metadata and stats.

default: `false`

Example:

```yaml
receivers:
awsecscontainermetrics:
collection_interval: 20s
instance_level_metrics: true
```

## Enabling the AWS ECS Container Metrics Receiver

Expand Down
5 changes: 5 additions & 0 deletions receiver/awsecscontainermetricsreceiver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ type Config struct {
// CollectionInterval is the interval at which metrics should be collected
CollectionInterval time.Duration `mapstructure:"collection_interval"`

// InstanceLevelMetrics enables collection of metrics for all tasks on the instance.
// This requires the receiver to run as a Managed Daemon Service on ECS Managed Instances,
// which provides access to the /tasks and /tasks/stats endpoints.
InstanceLevelMetrics bool `mapstructure:"instance_level_metrics"`

// prevent unkeyed literal initialization
_ struct{}
}
3 changes: 3 additions & 0 deletions receiver/awsecscontainermetricsreceiver/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@ properties:
description: CollectionInterval is the interval at which metrics should be collected
type: string
format: duration
instance_level_metrics:
description: InstanceLevelMetrics enables collection of metrics for all tasks on the instance. This requires the receiver to run as a Managed Daemon Service on ECS Managed Instances, which provides access to the /tasks and /tasks/stats endpoints.
type: boolean
7 changes: 7 additions & 0 deletions receiver/awsecscontainermetricsreceiver/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ func TestLoadConfig(t *testing.T) {
CollectionInterval: 10 * time.Second,
},
},
{
id: component.NewIDWithName(metadata.Type, "instance_level"),
expected: &Config{
CollectionInterval: defaultCollectionInterval,
InstanceLevelMetrics: true,
},
},
}

for _, tt := range tests {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ import "time"

const TaskStatsPath = "/task/stats"

const (
InstanceStatsPath = "/tasks/stats"
InstanceMetadataPath = "/tasks"
)

// ContainerStats defines the structure for container stats
type ContainerStats struct {
Name string `json:"name"`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package awsecscontainermetrics // import "github.com/open-telemetry/opentelemetr
import (
"encoding/json"
"fmt"
"maps"

"go.uber.org/zap"

Expand Down Expand Up @@ -49,3 +50,59 @@ func (p *StatsProvider) GetStats() (map[string]*ContainerStats, ecsutil.TaskMeta

return stats, metadata, nil
}

// GetInstanceStats fetches stats and metadata for all tasks on the instance.
// This uses the /tasks/stats and /tasks endpoints available to Managed Daemon Services.
func (p *StatsProvider) GetInstanceStats() ([]TaskStatsEntry, error) {
metadataResp, err := p.rc.GetResponse(InstanceMetadataPath)
if err != nil {
return nil, fmt.Errorf("cannot read data from instance metadata endpoint: %w", err)
}

var allTaskMetadata []ecsutil.TaskMetadata
err = json.Unmarshal(metadataResp, &allTaskMetadata)
if err != nil {
return nil, fmt.Errorf("cannot unmarshal instance task metadata: %w", err)
}

statsResp, err := p.rc.GetResponse(InstanceStatsPath)
if err != nil {
return nil, fmt.Errorf("cannot read data from instance stats endpoint: %w", err)
}

var rawStats []map[string]*ContainerStats
err = json.Unmarshal(statsResp, &rawStats)
if err != nil {
return nil, fmt.Errorf("cannot unmarshal instance stats: %w", err)
}

// Flatten the array of single-entry maps into one map keyed by container ID
allStats := make(map[string]*ContainerStats)
for _, entry := range rawStats {
maps.Copy(allStats, entry)
}

// Build per-task results pairing metadata with its container stats
results := make([]TaskStatsEntry, 0, len(allTaskMetadata))
for i := range allTaskMetadata {
taskStats := make(map[string]*ContainerStats)
for j := range allTaskMetadata[i].Containers {
id := allTaskMetadata[i].Containers[j].DockerID
if s, ok := allStats[id]; ok {
taskStats[id] = s
}
}
results = append(results, TaskStatsEntry{
Stats: taskStats,
Metadata: allTaskMetadata[i],
})
}

return results, nil
}

// TaskStatsEntry pairs a task's metadata with the container stats for that task.
type TaskStatsEntry struct {
Stats map[string]*ContainerStats
Metadata ecsutil.TaskMetadata
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,13 @@ func (f testRestClient) GetResponse(path string) ([]byte, error) {
return []byte("wrong-json-body"), nil
}

if path == TaskStatsPath {
switch path {
case TaskStatsPath:
return os.ReadFile("../../testdata/task_stats.json")
case InstanceMetadataPath:
return os.ReadFile("../../testdata/instance_tasks_metadata.json")
case InstanceStatsPath:
return os.ReadFile("../../testdata/instance_tasks_stats.json")
}

return nil, nil
Expand Down Expand Up @@ -77,3 +82,42 @@ func TestGetStats(t *testing.T) {
})
}
}

func TestGetInstanceStats(t *testing.T) {
tests := []struct {
name string
client ecsutil.RestClient
wantError string
}{
{
name: "success",
client: &testRestClient{T: t},
wantError: "",
},
{
name: "failure",
client: &testRestClient{T: t, fail: true},
wantError: "cannot read data from instance metadata endpoint: failed",
},
{
name: "invalid-json",
client: &testRestClient{T: t, invalidJSON: true},
wantError: "cannot unmarshal instance task metadata: invalid character 'w' looking for beginning of value",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
provider := NewStatsProvider(tt.client, zap.NewNop())
entries, err := provider.GetInstanceStats()
if tt.wantError == "" {
require.NoError(t, err)
require.Len(t, entries, 2)
assert.Equal(t, "arn:aws:ecs:eu-west-1:111222333444:cluster/mi-cluster", entries[0].Metadata.Cluster)
assert.NotEmpty(t, entries[0].Stats)
assert.NotEmpty(t, entries[1].Stats)
} else {
assert.Equal(t, tt.wantError, err.Error())
}
})
}
}
28 changes: 28 additions & 0 deletions receiver/awsecscontainermetricsreceiver/receiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ func (aecmr *awsEcsContainerMetricsReceiver) Shutdown(context.Context) error {
// collectDataFromEndpoint collects container stats from Amazon ECS Task Metadata Endpoint
func (aecmr *awsEcsContainerMetricsReceiver) collectDataFromEndpoint(ctx context.Context) error {
aecmr.provider = awsecscontainermetrics.NewStatsProvider(aecmr.restClient, aecmr.logger)

if aecmr.config.InstanceLevelMetrics {
return aecmr.collectInstanceLevelMetrics(ctx)
}
return aecmr.collectTaskLevelMetrics(ctx)
}

func (aecmr *awsEcsContainerMetricsReceiver) collectTaskLevelMetrics(ctx context.Context) error {
stats, metadata, err := aecmr.provider.GetStats()
if err != nil {
aecmr.logger.Error("Failed to collect stats", zap.Error(err))
Expand All @@ -91,3 +99,23 @@ func (aecmr *awsEcsContainerMetricsReceiver) collectDataFromEndpoint(ctx context

return nil
}

func (aecmr *awsEcsContainerMetricsReceiver) collectInstanceLevelMetrics(ctx context.Context) error {
entries, err := aecmr.provider.GetInstanceStats()
if err != nil {
aecmr.logger.Error("Failed to collect instance-level stats", zap.Error(err))
return err
}

for i := range entries {
mds := awsecscontainermetrics.MetricsData(entries[i].Stats, entries[i].Metadata, aecmr.logger)
for _, md := range mds {
err = aecmr.nextConsumer.ConsumeMetrics(ctx, md)
if err != nil {
return err
}
}
}

return nil
}
29 changes: 28 additions & 1 deletion receiver/awsecscontainermetricsreceiver/receiver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,13 @@ func (f fakeRestClient) GetResponse(path string) ([]byte, error) {
if body, err := ecsutiltest.GetTestdataResponseByPath(f.T, path); body != nil || err != nil {
return body, err
}
if path == awsecscontainermetrics.TaskStatsPath {
switch path {
case awsecscontainermetrics.TaskStatsPath:
return os.ReadFile("testdata/task_stats.json")
case awsecscontainermetrics.InstanceMetadataPath:
return os.ReadFile("testdata/instance_tasks_metadata.json")
case awsecscontainermetrics.InstanceStatsPath:
return os.ReadFile("testdata/instance_tasks_stats.json")
}
return nil, nil
}
Expand Down Expand Up @@ -90,6 +95,28 @@ func TestCollectDataFromEndpointWithConsumerError(t *testing.T) {
require.EqualError(t, err, "Test Error for Metrics Consumer")
}

func TestCollectInstanceLevelMetrics(t *testing.T) {
cfg := createDefaultConfig().(*Config)
cfg.InstanceLevelMetrics = true
sink := new(consumertest.MetricsSink)
metricsReceiver, err := newAWSECSContainermetrics(
zap.NewNop(),
cfg,
sink,
&fakeRestClient{t},
)

require.NoError(t, err)
require.NotNil(t, metricsReceiver)

r := metricsReceiver.(*awsEcsContainerMetricsReceiver)
ctx := t.Context()

err = r.collectDataFromEndpoint(ctx)
require.NoError(t, err)
require.Positive(t, sink.DataPointCount())
}

type invalidFakeClient struct{}

func (invalidFakeClient) GetResponse(string) ([]byte, error) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
awsecscontainermetrics:
awsecscontainermetrics/collection_interval_settings:
collection_interval: 10s
awsecscontainermetrics/instance_level:
instance_level_metrics: true
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
[
{
"Cluster": "arn:aws:ecs:eu-west-1:111222333444:cluster/mi-cluster",
"TaskARN": "arn:aws:ecs:eu-west-1:111222333444:task/mi-cluster/081d2c75d3a2419d95ab6f8979fb61fb",
"Family": "daemon-app-monitor",
"Revision": "2",
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 0.25,
"Memory": 512
},
"PullStartedAt": "2026-05-14T17:56:14.38242749Z",
"PullStoppedAt": "2026-05-14T17:56:29.673031691Z",
"AvailabilityZone": "eu-west-1b",
"LaunchType": "MANAGED_INSTANCES",
"Containers": [
{
"DockerId": "081d2c75d3a2419d95ab6f8979fb61fb-1547245599",
"Name": "monitor-agent",
"DockerName": "monitor-agent",
"Image": "111222333444.dkr.ecr.eu-west-1.amazonaws.com/app-monitor:latest",
"ImageID": "sha256:fb9bbacc4f8c120a057a64f84ab8c2dfde29bef164544f9fbcba7406c679e887",
"Labels": {
"com.amazonaws.ecs.cluster": "arn:aws:ecs:eu-west-1:111222333444:cluster/mi-cluster",
"com.amazonaws.ecs.container-name": "monitor-agent",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:eu-west-1:111222333444:task/mi-cluster/081d2c75d3a2419d95ab6f8979fb61fb",
"com.amazonaws.ecs.task-definition-family": "daemon-app-monitor",
"com.amazonaws.ecs.task-definition-version": "2"
},
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 2
},
"CreatedAt": "2026-05-14T17:56:31.901628055Z",
"StartedAt": "2026-05-14T17:56:31.901628055Z",
"Type": "NORMAL",
"ContainerARN": "arn:aws:ecs:eu-west-1:111222333444:container/mi-cluster/081d2c75d3a2419d95ab6f8979fb61fb/be452a3e-7694-43fe-ad76-9d91f6a021e1"
}
],
"Group": "daemon:app-monitor"
},
{
"Cluster": "arn:aws:ecs:eu-west-1:111222333444:cluster/mi-cluster",
"TaskARN": "arn:aws:ecs:eu-west-1:111222333444:task/mi-cluster/45f36d5449924905a16c3d03c1fba67a",
"Family": "web-frontend",
"Revision": "1",
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 1,
"Memory": 2048
},
"PullStartedAt": "2026-05-14T17:56:45.700683062Z",
"PullStoppedAt": "2026-05-14T17:56:47.679100551Z",
"AvailabilityZone": "eu-west-1b",
"LaunchType": "MANAGED_INSTANCES",
"Containers": [
{
"DockerId": "45f36d5449924905a16c3d03c1fba67a-1950351559",
"Name": "nginx-proxy",
"DockerName": "nginx-proxy",
"Image": "111222333444.dkr.ecr.eu-west-1.amazonaws.com/web-frontend:latest",
"ImageID": "sha256:fb9bbacc4f8c120a057a64f84ab8c2dfde29bef164544f9fbcba7406c679e887",
"Labels": {
"com.amazonaws.ecs.cluster": "arn:aws:ecs:eu-west-1:111222333444:cluster/mi-cluster",
"com.amazonaws.ecs.container-name": "nginx-proxy",
"com.amazonaws.ecs.task-arn": "arn:aws:ecs:eu-west-1:111222333444:task/mi-cluster/45f36d5449924905a16c3d03c1fba67a",
"com.amazonaws.ecs.task-definition-family": "web-frontend",
"com.amazonaws.ecs.task-definition-version": "1"
},
"DesiredStatus": "RUNNING",
"KnownStatus": "RUNNING",
"Limits": {
"CPU": 2
},
"CreatedAt": "2026-05-14T17:56:47.93414642Z",
"StartedAt": "2026-05-14T17:56:47.93414642Z",
"Type": "NORMAL",
"ContainerARN": "arn:aws:ecs:eu-west-1:111222333444:container/mi-cluster/45f36d5449924905a16c3d03c1fba67a/d6b0a829-c58e-4e99-8069-57385c0ec17e"
}
],
"ServiceName": "web-frontend-service",
"Group": "service:web-frontend-service"
}
]
Loading