diff --git a/.chloggen/fix-genainormalizer-openinference-messages.yaml b/.chloggen/fix-genainormalizer-openinference-messages.yaml new file mode 100644 index 0000000000000..8827a6bdf2c37 --- /dev/null +++ b/.chloggen/fix-genainormalizer-openinference-messages.yaml @@ -0,0 +1,29 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: bug_fix + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: processor/genainormalizer + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Fix OpenInference message normalization for flattened indexed attributes + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [48421] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + OpenInference represents messages as flattened indexed span attributes + (e.g. llm.input_messages.0.message.role) which were not matched by the + exact-key lookup. The processor now reconstructs these into GenAI semconv + JSON message strings (gen_ai.input.messages / gen_ai.output.messages). + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/processor/genainormalizerprocessor/README.md b/processor/genainormalizerprocessor/README.md index 4f8dbea37e259..c290019705859 100644 --- a/processor/genainormalizerprocessor/README.md +++ b/processor/genainormalizerprocessor/README.md @@ -96,8 +96,8 @@ Attribute renames: | `llm.token_count.completion` | `gen_ai.usage.output_tokens` | | `llm.model_name` | `gen_ai.request.model` | | `llm.provider` | `gen_ai.provider.name` | -| `llm.input_messages` | `gen_ai.input.messages` | -| `llm.output_messages` | `gen_ai.output.messages` | +| `llm.input_messages.N.message.*` | `gen_ai.input.messages` (reconstructed as JSON, see below) | +| `llm.output_messages.N.message.*` | `gen_ai.output.messages` (reconstructed as JSON, see below) | | `embedding.model_name` | `gen_ai.request.model` | | `tool.name` | `gen_ai.tool.name` | | `tool.description` | `gen_ai.tool.description` | @@ -127,6 +127,21 @@ When a mapped attribute lands on `gen_ai.operation.name`, the string value is no Target reference: [OTel GenAI operation names](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/). +### Message reconstruction + +OpenInference represents messages as flattened indexed span attributes (e.g., `llm.input_messages.0.message.role`, `llm.input_messages.0.message.content`). The processor reconstructs these into a single JSON string attribute following the [GenAI input messages schema](https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#messages) and sets it as `gen_ai.input.messages` (or `gen_ai.output.messages`). + +Supported OpenInference message fields: + +- `llm.{input,output}_messages.N.message.role` +- `llm.{input,output}_messages.N.message.content` +- `llm.{input,output}_messages.N.message.tool_calls.M.tool_call.id` +- `llm.{input,output}_messages.N.message.tool_calls.M.tool_call.function.name` +- `llm.{input,output}_messages.N.message.tool_calls.M.tool_call.function.arguments` +- `llm.{input,output}_messages.N.message.tool_call_id` + +Messages with a `tool_call_id` are emitted with role `tool` in the GenAI output. + ## Relationship to other processors The [`schemaprocessor`](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/processor/schemaprocessor) translates between OTel semantic convention versions using `schema_url` and the OTel schema file format. Source conventions normalized by this processor do not set `schema_url` and do not publish OTel schema files, so `schemaprocessor` cannot be used for this translation today. diff --git a/processor/genainormalizerprocessor/internal/openinference/mappings.go b/processor/genainormalizerprocessor/internal/openinference/mappings.go index cba845844fff5..cf9918994c2a2 100644 --- a/processor/genainormalizerprocessor/internal/openinference/mappings.go +++ b/processor/genainormalizerprocessor/internal/openinference/mappings.go @@ -22,9 +22,8 @@ var LookupTable = map[string]string{ "llm.model_name": otelsemconv.GenAIRequestModel, "llm.provider": otelsemconv.GenAIProviderName, - // Input/output content - "llm.input_messages": otelsemconv.GenAIInputMessages, - "llm.output_messages": otelsemconv.GenAIOutputMessages, + // Input/output content: flattened message attributes (llm.input_messages.N.message.*) + // are handled by ReconstructMessages, not by simple key rename. // Embeddings "embedding.model_name": otelsemconv.GenAIRequestModel, diff --git a/processor/genainormalizerprocessor/internal/openinference/messages.go b/processor/genainormalizerprocessor/internal/openinference/messages.go new file mode 100644 index 0000000000000..29571941a094f --- /dev/null +++ b/processor/genainormalizerprocessor/internal/openinference/messages.go @@ -0,0 +1,289 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package openinference // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/genainormalizerprocessor/internal/openinference" + +import ( + "encoding/json" + "fmt" + "sort" + "strconv" + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/genainormalizerprocessor/internal/otelsemconv" +) + +// messagePrefix pairs an OpenInference flattened attribute prefix with the +// GenAI semconv target key it should be reconstructed into. +type messagePrefix struct { + prefix string + target string +} + +var messagePrefixes = []messagePrefix{ + {"llm.input_messages.", otelsemconv.GenAIInputMessages}, + {"llm.output_messages.", otelsemconv.GenAIOutputMessages}, +} + +// chatMessage is the GenAI semconv ChatMessage JSON structure. +type chatMessage struct { + Role string `json:"role"` + Parts []interface{} `json:"parts"` +} + +type textPart struct { + Type string `json:"type"` + Content string `json:"content"` +} + +type toolCallRequestPart struct { + Type string `json:"type"` + ID string `json:"id,omitempty"` + Name string `json:"name"` + Arguments interface{} `json:"arguments,omitempty"` +} + +type toolCallResponsePart struct { + Type string `json:"type"` + ID string `json:"id,omitempty"` + Response string `json:"response"` +} + +// toolCallFields collects the parts of a single tool call from flattened attrs. +type toolCallFields struct { + id string + name string + arguments string +} + +// messageFields collects all parsed fields for a single message. +type messageFields struct { + role string + content string + toolCallID string + toolCalls map[int]*toolCallFields +} + +// ReconstructMessages scans attrs for OpenInference flattened message attributes +// (llm.input_messages.N.message.* and llm.output_messages.N.message.*), +// reconstructs them into GenAI semconv JSON, and sets the target attributes. +// Returns true if any attributes were written. +func ReconstructMessages(attrs pcommon.Map, removeOriginals, overwrite bool) bool { + wrote := false + for _, mp := range messagePrefixes { + if reconstructPrefix(attrs, mp.prefix, mp.target, removeOriginals, overwrite) { + wrote = true + } + } + return wrote +} + +func reconstructPrefix(attrs pcommon.Map, prefix, target string, removeOriginals, overwrite bool) bool { + messages := make(map[int]*messageFields) + var keysToRemove []string + + attrs.Range(func(k string, v pcommon.Value) bool { + if !strings.HasPrefix(k, prefix) { + return true + } + rest := k[len(prefix):] + idx, fieldPath, ok := parseIndexedField(rest) + if !ok { + return true + } + + mf, exists := messages[idx] + if !exists { + mf = &messageFields{toolCalls: make(map[int]*toolCallFields)} + messages[idx] = mf + } + + applyField(mf, fieldPath, v.AsString()) + keysToRemove = append(keysToRemove, k) + return true + }) + + if len(messages) == 0 { + return false + } + + if _, existed := attrs.Get(target); existed && !overwrite { + return false + } + + result := buildMessages(messages) + jsonBytes, err := json.Marshal(result) + if err != nil { + return false + } + + attrs.PutStr(target, string(jsonBytes)) + + if removeOriginals { + for _, k := range keysToRemove { + attrs.Remove(k) + } + } + + return true +} + +// parseIndexedField splits "N.message.field.path" into (N, "field.path", true). +func parseIndexedField(s string) (int, string, bool) { + dotIdx := strings.IndexByte(s, '.') + if dotIdx < 0 { + return 0, "", false + } + idx, err := strconv.Atoi(s[:dotIdx]) + if err != nil { + return 0, "", false + } + rest := s[dotIdx+1:] + // Strip the "message." prefix that OpenInference always includes + const messagePrefix = "message." + if !strings.HasPrefix(rest, messagePrefix) { + return 0, "", false + } + return idx, rest[len(messagePrefix):], true +} + +// applyField sets the appropriate field on messageFields based on the field path. +func applyField(mf *messageFields, fieldPath, value string) { + switch { + case fieldPath == "role": + mf.role = value + case fieldPath == "content": + mf.content = value + case fieldPath == "tool_call_id": + mf.toolCallID = value + case strings.HasPrefix(fieldPath, "tool_calls."): + parseToolCallField(mf, fieldPath[len("tool_calls."):], value) + } +} + +// parseToolCallField parses "M.tool_call.{id|function.name|function.arguments}". +func parseToolCallField(mf *messageFields, s, value string) { + dotIdx := strings.IndexByte(s, '.') + if dotIdx < 0 { + return + } + idx, err := strconv.Atoi(s[:dotIdx]) + if err != nil { + return + } + rest := s[dotIdx+1:] + const tcPrefix = "tool_call." + if !strings.HasPrefix(rest, tcPrefix) { + return + } + field := rest[len(tcPrefix):] + + tc, exists := mf.toolCalls[idx] + if !exists { + tc = &toolCallFields{} + mf.toolCalls[idx] = tc + } + + switch field { + case "id": + tc.id = value + case "function.name": + tc.name = value + case "function.arguments": + tc.arguments = value + } +} + +// buildMessages converts the parsed message map into a sorted slice of +// GenAI semconv ChatMessage objects. +func buildMessages(messages map[int]*messageFields) []chatMessage { + indices := make([]int, 0, len(messages)) + for idx := range messages { + indices = append(indices, idx) + } + sort.Ints(indices) + + result := make([]chatMessage, 0, len(indices)) + for _, idx := range indices { + mf := messages[idx] + msg := buildSingleMessage(mf) + result = append(result, msg) + } + return result +} + +func buildSingleMessage(mf *messageFields) chatMessage { + msg := chatMessage{Role: mf.role} + + if mf.toolCallID != "" { + // Tool result message + if mf.role == "user" { + msg.Role = "tool" + } + msg.Parts = []interface{}{ + toolCallResponsePart{ + Type: "tool_call_response", + ID: mf.toolCallID, + Response: mf.content, + }, + } + return msg + } + + if len(mf.toolCalls) > 0 { + // Assistant message with tool calls + tcIndices := make([]int, 0, len(mf.toolCalls)) + for idx := range mf.toolCalls { + tcIndices = append(tcIndices, idx) + } + sort.Ints(tcIndices) + + parts := make([]interface{}, 0, len(tcIndices)) + for _, idx := range tcIndices { + tc := mf.toolCalls[idx] + part := toolCallRequestPart{ + Type: "tool_call", + ID: tc.id, + Name: tc.name, + } + if tc.arguments != "" { + var parsed interface{} + if err := json.Unmarshal([]byte(tc.arguments), &parsed); err == nil { + part.Arguments = parsed + } else { + part.Arguments = tc.arguments + } + } + parts = append(parts, part) + } + msg.Parts = parts + return msg + } + + if mf.content != "" { + msg.Parts = []interface{}{ + textPart{Type: "text", Content: mf.content}, + } + } else { + msg.Parts = []interface{}{} + } + + return msg +} + +// MessageAggregator implements the processor's attributeAggregator interface +// for OpenInference flattened message attributes. +type MessageAggregator struct{} + +// AggregateAttributes reconstructs flattened OpenInference message attributes +// into GenAI semconv JSON message strings. +func (MessageAggregator) AggregateAttributes(attrs pcommon.Map, removeOriginals, overwrite bool) bool { + return ReconstructMessages(attrs, removeOriginals, overwrite) +} + +// FormatMessageKey is a helper for tests to construct flattened attribute keys. +func FormatMessageKey(direction string, idx int, field string) string { + return fmt.Sprintf("llm.%s_messages.%d.message.%s", direction, idx, field) +} diff --git a/processor/genainormalizerprocessor/internal/openinference/messages_test.go b/processor/genainormalizerprocessor/internal/openinference/messages_test.go new file mode 100644 index 0000000000000..bcf5c6db5501f --- /dev/null +++ b/processor/genainormalizerprocessor/internal/openinference/messages_test.go @@ -0,0 +1,192 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package openinference + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/genainormalizerprocessor/internal/otelsemconv" +) + +func newAttrs() pcommon.Map { + return pcommon.NewMap() +} + +func TestReconstructMessages_SimpleUserAssistant(t *testing.T) { + attrs := newAttrs() + attrs.PutStr("llm.input_messages.0.message.role", "user") + attrs.PutStr("llm.input_messages.0.message.content", "Hello") + attrs.PutStr("llm.input_messages.1.message.role", "assistant") + attrs.PutStr("llm.input_messages.1.message.content", "Hi there!") + + wrote := ReconstructMessages(attrs, true, false) + require.True(t, wrote) + + v, ok := attrs.Get(otelsemconv.GenAIInputMessages) + require.True(t, ok) + + var messages []chatMessage + require.NoError(t, json.Unmarshal([]byte(v.Str()), &messages)) + require.Len(t, messages, 2) + + assert.Equal(t, "user", messages[0].Role) + assert.Equal(t, "assistant", messages[1].Role) + + // Verify content parts + part0 := messages[0].Parts[0].(map[string]interface{}) + assert.Equal(t, "text", part0["type"]) + assert.Equal(t, "Hello", part0["content"]) + + // Originals removed + _, ok = attrs.Get("llm.input_messages.0.message.role") + assert.False(t, ok) +} + +func TestReconstructMessages_ToolCallFlow(t *testing.T) { + attrs := newAttrs() + // User message + attrs.PutStr("llm.input_messages.0.message.role", "user") + attrs.PutStr("llm.input_messages.0.message.content", "What is the weather?") + // Assistant message with tool call + attrs.PutStr("llm.input_messages.1.message.role", "assistant") + attrs.PutStr("llm.input_messages.1.message.tool_calls.0.tool_call.id", "call_123") + attrs.PutStr("llm.input_messages.1.message.tool_calls.0.tool_call.function.name", "get_weather") + attrs.PutStr("llm.input_messages.1.message.tool_calls.0.tool_call.function.arguments", `{"location":"Seattle"}`) + // Tool result message + attrs.PutStr("llm.input_messages.2.message.role", "user") + attrs.PutStr("llm.input_messages.2.message.tool_call_id", "call_123") + attrs.PutStr("llm.input_messages.2.message.content", `{"weather":"sunny"}`) + + wrote := ReconstructMessages(attrs, true, false) + require.True(t, wrote) + + v, ok := attrs.Get(otelsemconv.GenAIInputMessages) + require.True(t, ok) + + var messages []json.RawMessage + require.NoError(t, json.Unmarshal([]byte(v.Str()), &messages)) + require.Len(t, messages, 3) + + // Check tool result message has role "tool" (remapped from "user") + var msg2 map[string]interface{} + require.NoError(t, json.Unmarshal(messages[2], &msg2)) + assert.Equal(t, "tool", msg2["role"]) + parts := msg2["parts"].([]interface{}) + part := parts[0].(map[string]interface{}) + assert.Equal(t, "tool_call_response", part["type"]) + assert.Equal(t, "call_123", part["id"]) + + // Check assistant tool call message + var msg1 map[string]interface{} + require.NoError(t, json.Unmarshal(messages[1], &msg1)) + assert.Equal(t, "assistant", msg1["role"]) + parts1 := msg1["parts"].([]interface{}) + tc := parts1[0].(map[string]interface{}) + assert.Equal(t, "tool_call", tc["type"]) + assert.Equal(t, "get_weather", tc["name"]) + assert.Equal(t, "call_123", tc["id"]) +} + +func TestReconstructMessages_OutputMessages(t *testing.T) { + attrs := newAttrs() + attrs.PutStr("llm.output_messages.0.message.role", "assistant") + attrs.PutStr("llm.output_messages.0.message.content", "The answer is 42.") + + wrote := ReconstructMessages(attrs, true, false) + require.True(t, wrote) + + v, ok := attrs.Get(otelsemconv.GenAIOutputMessages) + require.True(t, ok) + + var messages []chatMessage + require.NoError(t, json.Unmarshal([]byte(v.Str()), &messages)) + require.Len(t, messages, 1) + assert.Equal(t, "assistant", messages[0].Role) +} + +func TestReconstructMessages_NoMatchReturnsNoWrite(t *testing.T) { + attrs := newAttrs() + attrs.PutStr("http.method", "GET") + + wrote := ReconstructMessages(attrs, true, false) + assert.False(t, wrote) +} + +func TestReconstructMessages_OverwriteFalseSkipsExisting(t *testing.T) { + attrs := newAttrs() + attrs.PutStr(otelsemconv.GenAIInputMessages, "existing") + attrs.PutStr("llm.input_messages.0.message.role", "user") + attrs.PutStr("llm.input_messages.0.message.content", "Hello") + + wrote := ReconstructMessages(attrs, false, false) + assert.False(t, wrote) + + v, _ := attrs.Get(otelsemconv.GenAIInputMessages) + assert.Equal(t, "existing", v.Str()) +} + +func TestReconstructMessages_OverwriteTrueReplacesExisting(t *testing.T) { + attrs := newAttrs() + attrs.PutStr(otelsemconv.GenAIInputMessages, "existing") + attrs.PutStr("llm.input_messages.0.message.role", "user") + attrs.PutStr("llm.input_messages.0.message.content", "Hello") + + wrote := ReconstructMessages(attrs, false, true) + require.True(t, wrote) + + v, _ := attrs.Get(otelsemconv.GenAIInputMessages) + assert.NotEqual(t, "existing", v.Str()) +} + +func TestReconstructMessages_RemoveOriginalsFalseKeepsSources(t *testing.T) { + attrs := newAttrs() + attrs.PutStr("llm.input_messages.0.message.role", "user") + attrs.PutStr("llm.input_messages.0.message.content", "Hello") + + wrote := ReconstructMessages(attrs, false, false) + require.True(t, wrote) + + _, ok := attrs.Get("llm.input_messages.0.message.role") + assert.True(t, ok) +} + +func TestReconstructMessages_MultipleToolCalls(t *testing.T) { + attrs := newAttrs() + attrs.PutStr("llm.input_messages.0.message.role", "assistant") + attrs.PutStr("llm.input_messages.0.message.tool_calls.0.tool_call.id", "call_1") + attrs.PutStr("llm.input_messages.0.message.tool_calls.0.tool_call.function.name", "tool_a") + attrs.PutStr("llm.input_messages.0.message.tool_calls.0.tool_call.function.arguments", `{}`) + attrs.PutStr("llm.input_messages.0.message.tool_calls.1.tool_call.id", "call_2") + attrs.PutStr("llm.input_messages.0.message.tool_calls.1.tool_call.function.name", "tool_b") + attrs.PutStr("llm.input_messages.0.message.tool_calls.1.tool_call.function.arguments", `{"x":1}`) + + wrote := ReconstructMessages(attrs, true, false) + require.True(t, wrote) + + v, _ := attrs.Get(otelsemconv.GenAIInputMessages) + var messages []map[string]interface{} + require.NoError(t, json.Unmarshal([]byte(v.Str()), &messages)) + require.Len(t, messages, 1) + + parts := messages[0]["parts"].([]interface{}) + require.Len(t, parts, 2) + assert.Equal(t, "tool_a", parts[0].(map[string]interface{})["name"]) + assert.Equal(t, "tool_b", parts[1].(map[string]interface{})["name"]) +} + +func TestFormatMessageKey(t *testing.T) { + assert.Equal(t, + "llm.input_messages.0.message.role", + FormatMessageKey("input", 0, "role"), + ) + assert.Equal(t, + "llm.output_messages.2.message.content", + FormatMessageKey("output", 2, "content"), + ) +} diff --git a/processor/genainormalizerprocessor/processor.go b/processor/genainormalizerprocessor/processor.go index 2765e1b10495c..7bbbd91e89915 100644 --- a/processor/genainormalizerprocessor/processor.go +++ b/processor/genainormalizerprocessor/processor.go @@ -20,10 +20,18 @@ type valueTransformer interface { TransformValue(targetKey, value string) string } +// attributeAggregator reconstructs structured attributes from multiple source +// attributes. For example, OpenInference flattened message attributes are +// aggregated into GenAI semconv JSON message strings. +type attributeAggregator interface { + AggregateAttributes(attrs pcommon.Map, removeOriginals, overwrite bool) bool +} + // sourceNormalizer holds per-source state used during normalization. type sourceNormalizer struct { lookupTable map[string]string transformValue valueTransformer + aggregator attributeAggregator removeOriginals bool overwrite bool } @@ -39,6 +47,7 @@ func newSourceNormalizer(src Source) sourceNormalizer { if src.Name == SourceOpenInference { sn.lookupTable = openinference.LookupTable sn.transformValue = openinference.Transformer{} + sn.aggregator = openinference.MessageAggregator{} } return sn } @@ -80,8 +89,8 @@ func (p *genaiNormalizerProcessor) processTraces(_ context.Context, td ptrace.Tr return td, nil } -// normalizeAttributes applies the source's rename rules to attrs. It returns -// true if at least one attribute was written. +// normalizeAttributes applies the source's rename rules and attribute +// aggregation to attrs. It returns true if at least one attribute was written. func (sn *sourceNormalizer) normalizeAttributes(attrs pcommon.Map) bool { type rename struct { from string @@ -96,10 +105,6 @@ func (sn *sourceNormalizer) normalizeAttributes(attrs pcommon.Map) bool { return true }) - if len(renames) == 0 { - return false - } - wrote := false for _, r := range renames { val, ok := attrs.Get(r.from) @@ -129,6 +134,13 @@ func (sn *sourceNormalizer) normalizeAttributes(attrs pcommon.Map) bool { attrs.Remove(r.from) } } + + if sn.aggregator != nil { + if sn.aggregator.AggregateAttributes(attrs, sn.removeOriginals, sn.overwrite) { + wrote = true + } + } + return wrote } diff --git a/processor/genainormalizerprocessor/processor_test.go b/processor/genainormalizerprocessor/processor_test.go index 5b7f5d7e9a9d1..d1be072aa9dc6 100644 --- a/processor/genainormalizerprocessor/processor_test.go +++ b/processor/genainormalizerprocessor/processor_test.go @@ -484,3 +484,64 @@ func TestNormalize_OpenInferenceEndToEnd(t *testing.T) { assert.False(t, ok, "expected %s to be removed", k) } } + +// TestNormalize_OpenInferenceFlattenedMessages exercises the flattened +// OpenInference message attributes end-to-end, matching the real-world +// format from issue #48421. +func TestNormalize_OpenInferenceFlattenedMessages(t *testing.T) { + cfg := &Config{ + Sources: []Source{{Name: SourceOpenInference, RemoveOriginals: true}}, + } + sink := new(consumertest.TracesSink) + p, err := createTracesProcessor(t.Context(), processortest.NewNopSettings(metadata.Type), cfg, sink) + require.NoError(t, err) + + td, span := newSpan() + attrs := span.Attributes() + attrs.PutStr("llm.model_name", "claude-sonnet-4-6") + attrs.PutStr("openinference.span.kind", "LLM") + // Flattened input messages (from the issue's real-world example) + attrs.PutStr("llm.input_messages.0.message.role", "user") + attrs.PutStr("llm.input_messages.0.message.content", "What is the weather like in San Francisco?") + attrs.PutStr("llm.input_messages.1.message.role", "assistant") + attrs.PutStr("llm.input_messages.1.message.tool_calls.0.tool_call.id", "toolu_01") + attrs.PutStr("llm.input_messages.1.message.tool_calls.0.tool_call.function.name", "get_weather") + attrs.PutStr("llm.input_messages.1.message.tool_calls.0.tool_call.function.arguments", `{"location":"San Francisco, CA"}`) + attrs.PutStr("llm.input_messages.2.message.role", "user") + attrs.PutStr("llm.input_messages.2.message.tool_call_id", "toolu_01") + attrs.PutStr("llm.input_messages.2.message.content", `{"weather":"sunny","temperature":"75"}`) + // Flattened output message + attrs.PutStr("llm.output_messages.0.message.role", "assistant") + attrs.PutStr("llm.output_messages.0.message.content", "The weather in San Francisco is sunny with 75°F.") + + require.NoError(t, p.ConsumeTraces(t.Context(), td)) + out := sink.AllTraces()[0].ResourceSpans().At(0).ScopeSpans().At(0).Spans().At(0).Attributes() + + // gen_ai.input.messages must be populated + inputVal, ok := out.Get("gen_ai.input.messages") + require.True(t, ok, "gen_ai.input.messages must be set") + assert.Contains(t, inputVal.Str(), `"role":"user"`) + assert.Contains(t, inputVal.Str(), `"role":"assistant"`) + assert.Contains(t, inputVal.Str(), `"role":"tool"`) + + // gen_ai.output.messages must be populated + outputVal, ok := out.Get("gen_ai.output.messages") + require.True(t, ok, "gen_ai.output.messages must be set") + assert.Contains(t, outputVal.Str(), "sunny") + + // Flattened originals removed + for _, k := range []string{ + "llm.input_messages.0.message.role", + "llm.input_messages.0.message.content", + "llm.input_messages.1.message.tool_calls.0.tool_call.id", + "llm.output_messages.0.message.role", + } { + _, ok := out.Get(k) + assert.False(t, ok, "expected %s to be removed", k) + } + + // Scalar renames still work alongside message reconstruction + v, ok := out.Get("gen_ai.request.model") + require.True(t, ok) + assert.Equal(t, "claude-sonnet-4-6", v.Str()) +}