From 3bda7899f1bdffb48cc831cbb68c62ad069a9a2c Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Tue, 12 May 2026 13:34:04 -0400 Subject: [PATCH 01/16] create transformprocessor/internal/logparsingfuncs directory --- ...cessor-internal-log-parsing-functions.yaml | 27 +++++++++++++++++++ .github/CODEOWNERS | 1 + .../internal/logparsingfuncs/metadata.yaml | 3 +++ 3 files changed, 31 insertions(+) create mode 100644 .chloggen/feat_transform-processor-internal-log-parsing-functions.yaml create mode 100644 processor/transformprocessor/internal/logparsingfuncs/metadata.yaml diff --git a/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml b/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml new file mode 100644 index 0000000000000..dbc2480ad8076 --- /dev/null +++ b/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: processor/transformprocessor + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add an internal directory for log parsing functions in the transform processor. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [#44908] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [] diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index bd3a5663504d5..da296724a6ef1 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -243,6 +243,7 @@ processor/spanpruningprocessor/ @open-telemetry processor/sumologicprocessor/ @open-telemetry/collector-contrib-approvers @rnishtala-sumo @pankaj101A @jagan2221 processor/tailsamplingprocessor/ @open-telemetry/collector-contrib-approvers @portertech @jmacd @csmarchbanks @carsonip processor/transformprocessor/ @open-telemetry/collector-contrib-approvers @TylerHelmuth @evan-bradley @edmocosta @bogdandrutu +processor/transformprocessor/internal/logparsingfuncs @open-telemetry/collector-contrib-approvers @Caleb-Hurshman @Dylan-M processor/unrollprocessor/ @open-telemetry/collector-contrib-approvers @axw @schmikei @rnishtala-sumo receiver/activedirectorydsreceiver/ @open-telemetry/collector-contrib-approvers @pjanotti receiver/aerospikereceiver/ @open-telemetry/collector-contrib-approvers @antonblock diff --git a/processor/transformprocessor/internal/logparsingfuncs/metadata.yaml b/processor/transformprocessor/internal/logparsingfuncs/metadata.yaml new file mode 100644 index 0000000000000..471080122b385 --- /dev/null +++ b/processor/transformprocessor/internal/logparsingfuncs/metadata.yaml @@ -0,0 +1,3 @@ +status: + codeowners: + active: [Caleb-Hurshman] From cfa2948b2703674dc763a55b278b8cfa4ac452b9 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Tue, 12 May 2026 13:40:44 -0400 Subject: [PATCH 02/16] add Dylan-M as codeowner --- .../transformprocessor/internal/logparsingfuncs/metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/processor/transformprocessor/internal/logparsingfuncs/metadata.yaml b/processor/transformprocessor/internal/logparsingfuncs/metadata.yaml index 471080122b385..d0b3692f4a695 100644 --- a/processor/transformprocessor/internal/logparsingfuncs/metadata.yaml +++ b/processor/transformprocessor/internal/logparsingfuncs/metadata.yaml @@ -1,3 +1,3 @@ status: codeowners: - active: [Caleb-Hurshman] + active: [Caleb-Hurshman, Dylan-M] From 4f9d0ec41ce653c559565e4326ddfcaa12be8b4e Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Tue, 12 May 2026 14:21:08 -0400 Subject: [PATCH 03/16] generate labels, codeowners --- ...at_transform-processor-internal-log-parsing-functions.yaml | 4 ++-- .github/CODEOWNERS | 2 +- .github/ISSUE_TEMPLATE/beta_stability.yaml | 1 + .github/ISSUE_TEMPLATE/bug_report.yaml | 1 + .github/ISSUE_TEMPLATE/feature_request.yaml | 1 + .github/ISSUE_TEMPLATE/other.yaml | 1 + .github/ISSUE_TEMPLATE/unmaintained.yaml | 1 + .github/component_labels.txt | 1 + 8 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml b/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml index dbc2480ad8076..bf3a3e60bec66 100644 --- a/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml +++ b/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml @@ -4,13 +4,13 @@ change_type: enhancement # The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) -component: processor/transformprocessor +component: processor/transform # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). note: Add an internal directory for log parsing functions in the transform processor. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. -issues: [#44908] +issues: [44908] # (Optional) One or more lines of additional information to render under the primary note. # These lines will be padded with 2 spaces and then inserted directly into the document. diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index da296724a6ef1..e85ba46536434 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -243,7 +243,7 @@ processor/spanpruningprocessor/ @open-telemetry processor/sumologicprocessor/ @open-telemetry/collector-contrib-approvers @rnishtala-sumo @pankaj101A @jagan2221 processor/tailsamplingprocessor/ @open-telemetry/collector-contrib-approvers @portertech @jmacd @csmarchbanks @carsonip processor/transformprocessor/ @open-telemetry/collector-contrib-approvers @TylerHelmuth @evan-bradley @edmocosta @bogdandrutu -processor/transformprocessor/internal/logparsingfuncs @open-telemetry/collector-contrib-approvers @Caleb-Hurshman @Dylan-M +processor/transformprocessor/internal/logparsingfuncs/ @open-telemetry/collector-contrib-approvers @Caleb-Hurshman @Dylan-M processor/unrollprocessor/ @open-telemetry/collector-contrib-approvers @axw @schmikei @rnishtala-sumo receiver/activedirectorydsreceiver/ @open-telemetry/collector-contrib-approvers @pjanotti receiver/aerospikereceiver/ @open-telemetry/collector-contrib-approvers @antonblock diff --git a/.github/ISSUE_TEMPLATE/beta_stability.yaml b/.github/ISSUE_TEMPLATE/beta_stability.yaml index dc992eea44a6a..a8cf88c7bd67a 100644 --- a/.github/ISSUE_TEMPLATE/beta_stability.yaml +++ b/.github/ISSUE_TEMPLATE/beta_stability.yaml @@ -240,6 +240,7 @@ body: - processor/sumologic - processor/tailsampling - processor/transform + - processor/transform/internal/logparsingfuncs - processor/unroll - receiver/activedirectoryds - receiver/aerospike diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index 4fc1232d8ec6b..c5fa9794b4d9d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -243,6 +243,7 @@ body: - processor/sumologic - processor/tailsampling - processor/transform + - processor/transform/internal/logparsingfuncs - processor/unroll - receiver/activedirectoryds - receiver/aerospike diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml index 99fb70a74129d..1aaf918dbed00 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -237,6 +237,7 @@ body: - processor/sumologic - processor/tailsampling - processor/transform + - processor/transform/internal/logparsingfuncs - processor/unroll - receiver/activedirectoryds - receiver/aerospike diff --git a/.github/ISSUE_TEMPLATE/other.yaml b/.github/ISSUE_TEMPLATE/other.yaml index 34727aeebc205..03e78dc8c1973 100644 --- a/.github/ISSUE_TEMPLATE/other.yaml +++ b/.github/ISSUE_TEMPLATE/other.yaml @@ -237,6 +237,7 @@ body: - processor/sumologic - processor/tailsampling - processor/transform + - processor/transform/internal/logparsingfuncs - processor/unroll - receiver/activedirectoryds - receiver/aerospike diff --git a/.github/ISSUE_TEMPLATE/unmaintained.yaml b/.github/ISSUE_TEMPLATE/unmaintained.yaml index 8f7ce35cd6105..f6ddad95c23e8 100644 --- a/.github/ISSUE_TEMPLATE/unmaintained.yaml +++ b/.github/ISSUE_TEMPLATE/unmaintained.yaml @@ -242,6 +242,7 @@ body: - processor/sumologic - processor/tailsampling - processor/transform + - processor/transform/internal/logparsingfuncs - processor/unroll - receiver/activedirectoryds - receiver/aerospike diff --git a/.github/component_labels.txt b/.github/component_labels.txt index f377c5c37426e..b4ceaa3ac6344 100644 --- a/.github/component_labels.txt +++ b/.github/component_labels.txt @@ -224,6 +224,7 @@ processor/spanpruningprocessor processor/spanpruning processor/sumologicprocessor processor/sumologic processor/tailsamplingprocessor processor/tailsampling processor/transformprocessor processor/transform +processor/transformprocessor/internal/logparsingfuncs processor/transform/internal/logparsingfuncs processor/unrollprocessor processor/unroll receiver/activedirectorydsreceiver receiver/activedirectoryds receiver/aerospikereceiver receiver/aerospike From b1c334dc305920b98f3210b15f8dae99d81d5a9e Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Tue, 12 May 2026 14:47:05 -0400 Subject: [PATCH 04/16] make generate-chloggen-components --- .chloggen/config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.chloggen/config.yaml b/.chloggen/config.yaml index 0c87c8f876875..b3c98ea2ce5f1 100644 --- a/.chloggen/config.yaml +++ b/.chloggen/config.yaml @@ -227,6 +227,7 @@ components: - processor/tail_sampling - processor/tencentcvmdetector - processor/transform + - processor/transformprocessor/internal/logparsingfuncs - processor/unroll - processor/upclouddetector - processor/vultrdetector From 74ab0f66493a31c0b4f7abb6a5d0fa24e32cefcf Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Wed, 13 May 2026 13:34:54 -0400 Subject: [PATCH 05/16] remove unnecessary chlog entry --- ...cessor-internal-log-parsing-functions.yaml | 27 ------------------- 1 file changed, 27 deletions(-) delete mode 100644 .chloggen/feat_transform-processor-internal-log-parsing-functions.yaml diff --git a/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml b/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml deleted file mode 100644 index bf3a3e60bec66..0000000000000 --- a/.chloggen/feat_transform-processor-internal-log-parsing-functions.yaml +++ /dev/null @@ -1,27 +0,0 @@ -# Use this changelog template to create an entry for release notes. - -# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' -change_type: enhancement - -# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) -component: processor/transform - -# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Add an internal directory for log parsing functions in the transform processor. - -# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. -issues: [44908] - -# (Optional) One or more lines of additional information to render under the primary note. -# These lines will be padded with 2 spaces and then inserted directly into the document. -# Use pipe (|) for multiline entries. -subtext: - -# If your change doesn't affect end users or the exported elements of any package, -# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. -# Optional: The change log or logs in which this entry should be included. -# e.g. '[user]' or '[user, api]' -# Include 'user' if the change is relevant to end users. -# Include 'api' if there is a change to a library API. -# Default: '[user]' -change_logs: [] From c746c4164bc20cf99acd2e933f3e697ae7cb92b2 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Thu, 11 Dec 2025 11:49:15 -0500 Subject: [PATCH 06/16] initial implementation --- .chloggen/feat_ottl-leef-parser.yaml | 27 + pkg/ottl/ottlfuncs/README.md | 16 + pkg/ottl/ottlfuncs/func_parse_leef.go | 266 ++++++ pkg/ottl/ottlfuncs/func_parse_leef_test.go | 917 +++++++++++++++++++++ pkg/ottl/ottlfuncs/functions.go | 1 + 5 files changed, 1227 insertions(+) create mode 100644 .chloggen/feat_ottl-leef-parser.yaml create mode 100644 pkg/ottl/ottlfuncs/func_parse_leef.go create mode 100644 pkg/ottl/ottlfuncs/func_parse_leef_test.go diff --git a/.chloggen/feat_ottl-leef-parser.yaml b/.chloggen/feat_ottl-leef-parser.yaml new file mode 100644 index 0000000000000..c1f8547cd0938 --- /dev/null +++ b/.chloggen/feat_ottl-leef-parser.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: pkg/ottl/ottlfuncs + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add `ParseLEEF` function to parse Log Event Extended Format (LEEF) messages. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [44908] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 9b8b19faa8e43..6f2fefabcad67 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -528,6 +528,7 @@ Available Converters: - [ParseInt](#parseint) - [ParseJSON](#parsejson) - [ParseKeyValue](#parsekeyvalue) +- [ParseLEEF](#parseleef) - [ParseSeverity](#parseseverity) - [ParseSimplifiedXML](#parsesimplifiedxml) - [ParseXML](#parsexml) @@ -1777,6 +1778,21 @@ Examples: - `ParseKeyValue("k1!v1_k2!v2_k3!v3", "!", "_")` - `ParseKeyValue(log.attributes["pairs"])` +### ParseLEEF + +`ParseLEEF(target)` + +The `ParseLEEF` Converter returns a `pcommon.Map` that is a result of parsing the target string as Log Event Extended Format (LEEF). + +`target` is a Getter that returns a string. If the returned string is empty, nil, or cannot be parsed as LEEF, an error will be returned. + +`ParseLEEF` can parse both LEEF 1.0 and LEEF 2.0. + +Examples: + +- `ParseLEEF("<30>Aug 19 12:33:31 ibm.guardium.test guard_sender[4486]: LEEF:1.0|IBM|Guardium|8.0|Login failures|ruleID=20026|ruleDesc=Login failures|severity=INFO|devTime=2013-8-19 6:34:41|serverType=DB2|classification=|category=|dbProtocolVersion=3.0|usrName=|sourceProgram=DB2JCC_APPLICATION|start=1376908481000|dbUser=user|dst=10.30.2.124|dstPort=50000|src=10.30.5.152|srcPort=38754|protocol=TCP|type=LOGIN_FAILED|violationID=15|sql=|error=08001-XXXX:30082-01")` +- `ParseLEEF("<25>Jun 11 13:47:19 ibm.guardium.test guard_sender[3432]: LEEF:1.0|IBM|Guardium|8.0|Unauthorized Users on Cardholder Objects - Alert|ruleID=159|ruleDesc=Unauthorized Users on Cardholder Objects - Alert|severity=MED|devTime=2013-6-11 12:46:21|serverType=MS SQL SERVER|classification=Violation|category=PCI|dbProtocolVersion=8.0|usrName=|sourceProgram=ABCDEF.EXE|start=1370965581000|dbUser=SYSTEM|dst=172.16.107.92|dstPort=1433|src=172.16.107.92|srcPort=60621|protocol=TCP|type=SQL_LANG|violationID=0|sql=SELECT * FROM EPOAgentHandlerAssignment INNER JOIN EPOAgentHandlerAssignmentPriority ON (EPOAgentHandlerAssignment.AutoID = EPOAgentHandlerAssignmentPriority.AssignmentID) ORDER BY EPOAgentHandlerAssignmentPriority.Priority ASC|error=TDS_MS-")` + ### ParseSeverity `ParseSeverity(target, severityMapping)` diff --git a/pkg/ottl/ottlfuncs/func_parse_leef.go b/pkg/ottl/ottlfuncs/func_parse_leef.go new file mode 100644 index 0000000000000..7d6acbce20921 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_parse_leef.go @@ -0,0 +1,266 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + +import ( + "context" + "encoding/hex" + "errors" + "fmt" + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +type ParseLEEFArguments[K any] struct { + Target ottl.StringGetter[K] +} + +func NewParseLEEFFactory[K any]() ottl.Factory[K] { + return ottl.NewFactory("ParseLEEF", &ParseLEEFArguments[K]{}, createParseLEEFFunction[K]) +} + +func createParseLEEFFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { + args, ok := oArgs.(*ParseLEEFArguments[K]) + if !ok { + return nil, errors.New("ParseLEEFFactory args must be of type *ParseLEEFArguments[K]") + } + + return parseLEEF(args.Target), nil +} + +func parseLEEF[K any](target ottl.StringGetter[K]) ottl.ExprFunc[K] { + return func(ctx context.Context, tCtx K) (any, error) { + source, err := target.Get(ctx, tCtx) + if err != nil { + return nil, err + } + + if source == "" { + return nil, errors.New("cannot parse empty LEEF message") + } + + return parseLEEFMessage(source) + } +} + +func parseLEEFMessage(message string) (pcommon.Map, error) { + // Handle optional syslog header by finding "LEEF:" in the message + // The syslog header (if present) precedes the LEEF header and is separated by a space + leefStart := strings.Index(message, "LEEF:") + if leefStart == -1 { + return pcommon.Map{}, errors.New("invalid LEEF message: 'LEEF:' not found") + } + + // Extract just the LEEF portion (skip syslog header if present) + leefMessage := message[leefStart:] + + // Find the first pipe to get the version field + firstPipe := strings.Index(leefMessage, "|") + if firstPipe == -1 { + return pcommon.Map{}, errors.New("invalid LEEF message: missing pipe delimiter in header") + } + + versionField := leefMessage[:firstPipe] + version, err := parseLEEFVersion(versionField) + if err != nil { + return pcommon.Map{}, err + } + + // Parse the rest based on version + remainder := leefMessage[firstPipe+1:] + + var header leefHeader + var attributes string + + switch version { + case "1.0": + header, attributes, err = parseLEEF1Header(remainder) + case "2.0": + header, attributes, err = parseLEEF2Header(remainder) + default: + return pcommon.Map{}, fmt.Errorf("unsupported LEEF version: %s", version) + } + + if err != nil { + return pcommon.Map{}, err + } + + header.version = version + + // Parse attributes if present + var parsedAttrs map[string]any + if attributes != "" { + parsedAttrs, err = parseLEEFAttributes(attributes, header.delimiter) + if err != nil { + return pcommon.Map{}, err + } + } else { + parsedAttrs = make(map[string]any) + } + + return buildLEEFResult(header, parsedAttrs) +} + +type leefHeader struct { + version string + vendor string + productName string + productVersion string + eventID string + delimiter string +} + +func parseLEEFVersion(field string) (string, error) { + if !strings.HasPrefix(field, "LEEF:") { + return "", fmt.Errorf("invalid LEEF message: must start with 'LEEF:', got %q", field) + } + + version := strings.TrimPrefix(field, "LEEF:") + if version != "1.0" && version != "2.0" { + return "", fmt.Errorf("unsupported LEEF version: %s (supported: 1.0, 2.0)", version) + } + + return version, nil +} + +func parseLEEF1Header(remainder string) (leefHeader, string, error) { + // LEEF 1.0: Vendor|Product|Version|EventID|attributes + // Attributes are tab-delimited + parts := strings.SplitN(remainder, "|", 5) + if len(parts) < 4 { + return leefHeader{}, "", fmt.Errorf("invalid LEEF 1.0 header: expected at least 4 fields (vendor, product, version, eventID), got %d", len(parts)) + } + + header := leefHeader{ + vendor: parts[0], + productName: parts[1], + productVersion: parts[2], + eventID: parts[3], + delimiter: "\t", // LEEF 1.0 uses tab as default delimiter + } + + var attributes string + if len(parts) == 5 { + attributes = parts[4] + } + + return header, attributes, nil +} + +func parseLEEF2Header(remainder string) (leefHeader, string, error) { + // LEEF 2.0: Vendor|Product|Version|EventID|Delimiter|attributes + // or: Vendor|Product|Version|EventID||attributes (empty delimiter means tab) + parts := strings.SplitN(remainder, "|", 6) + if len(parts) < 5 { + return leefHeader{}, "", fmt.Errorf("invalid LEEF 2.0 header: expected at least 5 fields (vendor, product, version, eventID, delimiter), got %d", len(parts)) + } + + delimiterSpec := parts[4] + delimiter, err := parseDelimiter(delimiterSpec) + if err != nil { + return leefHeader{}, "", fmt.Errorf("invalid LEEF 2.0 delimiter: %w", err) + } + + header := leefHeader{ + vendor: parts[0], + productName: parts[1], + productVersion: parts[2], + eventID: parts[3], + delimiter: delimiter, + } + + var attributes string + if len(parts) == 6 { + attributes = parts[5] + } + + return header, attributes, nil +} + +func parseDelimiter(spec string) (string, error) { + // Empty delimiter defaults to tab + if spec == "" { + return "\t", nil + } + + // Hex-encoded delimiter (e.g., "0x09" for tab, "0x5e" for caret) + if strings.HasPrefix(spec, "0x") || strings.HasPrefix(spec, "0X") { + hexStr := spec[2:] + if len(hexStr) == 0 { + return "", errors.New("empty hex value") + } + decoded, err := hex.DecodeString(hexStr) + if err != nil { + return "", fmt.Errorf("invalid hex delimiter %q: %w", spec, err) + } + if len(decoded) != 1 { + return "", fmt.Errorf("hex delimiter must decode to a single byte, got %d bytes", len(decoded)) + } + return string(decoded), nil + } + + // Single character delimiter + if len(spec) == 1 { + return spec, nil + } + + // For backwards compatibility, allow multi-character delimiters + return spec, nil +} + +func parseLEEFAttributes(attributes string, delimiter string) (map[string]any, error) { + if attributes == "" { + return make(map[string]any), nil + } + + result := make(map[string]any) + + // Split by delimiter to get key=value pairs + pairs := strings.Split(attributes, delimiter) + + for _, pair := range pairs { + pair = strings.TrimSpace(pair) + if pair == "" { + continue + } + + // Split on first '=' to get key and value + eqIndex := strings.Index(pair, "=") + if eqIndex == -1 { + // Key without value - skip or treat as empty value + continue + } + + key := pair[:eqIndex] + value := pair[eqIndex+1:] + + if key == "" { + continue + } + + result[key] = value + } + + return result, nil +} + +func buildLEEFResult(header leefHeader, attributes map[string]any) (pcommon.Map, error) { + result := pcommon.NewMap() + + result.PutStr("version", header.version) + result.PutStr("vendor", header.vendor) + result.PutStr("product_name", header.productName) + result.PutStr("product_version", header.productVersion) + result.PutStr("event_id", header.eventID) + + attrsMap := result.PutEmptyMap("attributes") + if err := attrsMap.FromRaw(attributes); err != nil { + return pcommon.Map{}, fmt.Errorf("failed to convert attributes: %w", err) + } + + return result, nil +} diff --git a/pkg/ottl/ottlfuncs/func_parse_leef_test.go b/pkg/ottl/ottlfuncs/func_parse_leef_test.go new file mode 100644 index 0000000000000..30af993214289 --- /dev/null +++ b/pkg/ottl/ottlfuncs/func_parse_leef_test.go @@ -0,0 +1,917 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package ottlfuncs + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" +) + +func Test_parseLEEF(t *testing.T) { + tests := []struct { + name string + target ottl.StringGetter[any] + expected map[string]any + }{ + { + name: "LEEF 1.0 simple", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=10.50.1.1\tdst=2.10.20.20\tsev=5", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Microsoft", + "product_name": "MSExchange", + "product_version": "4.0 SP1", + "event_id": "15345", + "attributes": map[string]any{ + "src": "10.50.1.1", + "dst": "2.10.20.20", + "sev": "5", + }, + }, + }, + { + name: "LEEF 1.0 with many attributes", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|QRadar|QRM|1.0|NEW_PORT_DISCOVERED|src=7.5.6.6\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=3881\tdstPort=21\tusrName=joe.black", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "QRadar", + "product_name": "QRM", + "product_version": "1.0", + "event_id": "NEW_PORT_DISCOVERED", + "attributes": map[string]any{ + "src": "7.5.6.6", + "dst": "172.50.123.1", + "sev": "5", + "cat": "anomaly", + "srcPort": "3881", + "dstPort": "21", + "usrName": "joe.black", + }, + }, + }, + { + name: "LEEF 1.0 header only no attributes", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|EventID|", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "EventID", + "attributes": map[string]any{}, + }, + }, + { + name: "LEEF 1.0 no trailing pipe", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|EventID", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "EventID", + "attributes": map[string]any{}, + }, + }, + { + name: "LEEF 2.0 with caret delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Lancope", + "product_name": "StealthWatch", + "product_version": "1.0", + "event_id": "41", + "attributes": map[string]any{ + "src": "10.0.1.8", + "dst": "10.0.0.5", + "sev": "5", + }, + }, + }, + { + name: "LEEF 2.0 with hex tab delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|100|0x09|key1=val1\tkey2=val2", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "100", + "attributes": map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + { + name: "LEEF 2.0 with hex caret delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|100|0x5e|key1=val1^key2=val2", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "100", + "attributes": map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + { + name: "LEEF 2.0 with empty delimiter defaults to tab", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|100||key1=val1\tkey2=val2", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "100", + "attributes": map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + { + name: "LEEF 2.0 header only", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|EventID|^|", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "EventID", + "attributes": map[string]any{}, + }, + }, + { + name: "LEEF 2.0 no trailing pipe after delimiter", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|EventID|^", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "EventID", + "attributes": map[string]any{}, + }, + }, + { + name: "attribute value with spaces", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|Event|msg=This is a message with spaces\tsrc=1.2.3.4", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "Event", + "attributes": map[string]any{ + "msg": "This is a message with spaces", + "src": "1.2.3.4", + }, + }, + }, + { + name: "attribute value with equals sign", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|Event|url=http://example.com?foo=bar\tsrc=1.2.3.4", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "Event", + "attributes": map[string]any{ + "url": "http://example.com?foo=bar", + "src": "1.2.3.4", + }, + }, + }, + { + name: "attribute with empty value", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|Event|key1=\tkey2=value2", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "Event", + "attributes": map[string]any{ + "key1": "", + "key2": "value2", + }, + }, + }, + { + name: "LEEF 2.0 uppercase hex", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|100|0X5E|key1=val1^key2=val2", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "100", + "attributes": map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + { + name: "header fields with special characters", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor-Name_123|Product.Name|1.0-beta|Event_ID_123|key=value", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor-Name_123", + "product_name": "Product.Name", + "product_version": "1.0-beta", + "event_id": "Event_ID_123", + "attributes": map[string]any{ + "key": "value", + }, + }, + }, + { + name: "real world QRadar example", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|IBM|QRadar|7.3.2|Authentication|^|src=192.168.1.100^dst=10.0.0.1^usrName=admin^cat=auth^sev=3^devTime=Jan 15 2024 10:30:45^devTimeFormat=MMM dd yyyy HH:mm:ss", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "IBM", + "product_name": "QRadar", + "product_version": "7.3.2", + "event_id": "Authentication", + "attributes": map[string]any{ + "src": "192.168.1.100", + "dst": "10.0.0.1", + "usrName": "admin", + "cat": "auth", + "sev": "3", + "devTime": "Jan 15 2024 10:30:45", + "devTimeFormat": "MMM dd yyyy HH:mm:ss", + }, + }, + }, + { + name: "network security event", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Cisco|ASA|9.8|FirewallDeny|src=10.1.1.1\tdst=192.168.1.1\tsrcPort=12345\tdstPort=443\tproto=TCP\tsev=7", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Cisco", + "product_name": "ASA", + "product_version": "9.8", + "event_id": "FirewallDeny", + "attributes": map[string]any{ + "src": "10.1.1.1", + "dst": "192.168.1.1", + "srcPort": "12345", + "dstPort": "443", + "proto": "TCP", + "sev": "7", + }, + }, + }, + { + name: "duplicate delimiter in attributes section", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|Event|^|key1=val1^^key2=val2", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "Event", + "attributes": map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + { + name: "trailing delimiter in attributes", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|Event|key1=val1\tkey2=val2\t", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "Event", + "attributes": map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + { + name: "leading delimiter in attributes", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|Event|\tkey1=val1\tkey2=val2", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Vendor", + "product_name": "Product", + "product_version": "1.0", + "event_id": "Event", + "attributes": map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + }, + { + name: "IBM Guardium login failure event with syslog header", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + // Full sample from https://www.ibm.com/docs/en/dsm?topic=guardium-sample-event-messages + // Includes syslog header (RFC 3164 format) + // Note: LEEF 1.0 uses tab delimiter for attributes per spec at + // https://www.ibm.com/docs/en/dsm?topic=overview-leef-event-components + return "<30>Aug 19 12:33:31 ibm.guardium.test guard_sender[4486]: LEEF:1.0|IBM|Guardium|8.0|Login failures|ruleID=20026\truleDesc=Login failures\tseverity=INFO\tdevTime=2013-8-19 6:34:41\tserverType=DB2\tclassification=\tcategory=\tdbProtocolVersion=3.0\tusrName=\tsourceProgram=DB2JCC_APPLICATION\tstart=1376908481000\tdbUser=user\tdst=10.30.2.124\tdstPort=50000\tsrc=10.30.5.152\tsrcPort=38754\tprotocol=TCP\ttype=LOGIN_FAILED\tviolationID=15\tsql=\terror=08001-XXXX:30082-01", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "IBM", + "product_name": "Guardium", + "product_version": "8.0", + "event_id": "Login failures", + "attributes": map[string]any{ + "ruleID": "20026", + "ruleDesc": "Login failures", + "severity": "INFO", + "devTime": "2013-8-19 6:34:41", + "serverType": "DB2", + "classification": "", + "category": "", + "dbProtocolVersion": "3.0", + "usrName": "", + "sourceProgram": "DB2JCC_APPLICATION", + "start": "1376908481000", + "dbUser": "user", + "dst": "10.30.2.124", + "dstPort": "50000", + "src": "10.30.5.152", + "srcPort": "38754", + "protocol": "TCP", + "type": "LOGIN_FAILED", + "violationID": "15", + "sql": "", + "error": "08001-XXXX:30082-01", + }, + }, + }, + { + name: "IBM Guardium unauthorized access event with syslog header", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + // Full sample from https://www.ibm.com/docs/en/dsm?topic=guardium-sample-event-messages + // Includes syslog header (RFC 3164 format) + // Note: LEEF 1.0 uses tab delimiter for attributes per spec at + // https://www.ibm.com/docs/en/dsm?topic=overview-leef-event-components + return "<25>Jun 11 13:47:19 ibm.guardium.test guard_sender[3432]: LEEF:1.0|IBM|Guardium|8.0|Unauthorized Users on Cardholder Objects - Alert|ruleID=159\truleDesc=Unauthorized Users on Cardholder Objects - Alert\tseverity=MED\tdevTime=2013-6-11 12:46:21\tserverType=MS SQL SERVER\tclassification=Violation\tcategory=PCI\tdbProtocolVersion=8.0\tusrName=\tsourceProgram=ABCDEF.EXE\tstart=1370965581000\tdbUser=SYSTEM\tdst=172.16.107.92\tdstPort=1433\tsrc=172.16.107.92\tsrcPort=60621\tprotocol=TCP\ttype=SQL_LANG\tviolationID=0\tsql=SELECT * FROM EPOAgentHandlerAssignment INNER JOIN EPOAgentHandlerAssignmentPriority ON (EPOAgentHandlerAssignment.AutoID = EPOAgentHandlerAssignmentPriority.AssignmentID) ORDER BY EPOAgentHandlerAssignmentPriority.Priority ASC\terror=TDS_MS-", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "IBM", + "product_name": "Guardium", + "product_version": "8.0", + "event_id": "Unauthorized Users on Cardholder Objects - Alert", + "attributes": map[string]any{ + "ruleID": "159", + "ruleDesc": "Unauthorized Users on Cardholder Objects - Alert", + "severity": "MED", + "devTime": "2013-6-11 12:46:21", + "serverType": "MS SQL SERVER", + "classification": "Violation", + "category": "PCI", + "dbProtocolVersion": "8.0", + "usrName": "", + "sourceProgram": "ABCDEF.EXE", + "start": "1370965581000", + "dbUser": "SYSTEM", + "dst": "172.16.107.92", + "dstPort": "1433", + "src": "172.16.107.92", + "srcPort": "60621", + "protocol": "TCP", + "type": "SQL_LANG", + "violationID": "0", + "sql": "SELECT * FROM EPOAgentHandlerAssignment INNER JOIN EPOAgentHandlerAssignmentPriority ON (EPOAgentHandlerAssignment.AutoID = EPOAgentHandlerAssignmentPriority.AssignmentID) ORDER BY EPOAgentHandlerAssignmentPriority.Priority ASC", + "error": "TDS_MS-", + }, + }, + }, + { + name: "syslog header RFC 5424 format", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + // RFC 5424 syslog format with structured data + return "<113>1 2019-01-18T11:07:53.520+07:00 hostname LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5", nil + }, + }, + expected: map[string]any{ + "version": "2.0", + "vendor": "Lancope", + "product_name": "StealthWatch", + "product_version": "1.0", + "event_id": "41", + "attributes": map[string]any{ + "src": "10.0.1.8", + "dst": "10.0.0.5", + "sev": "5", + }, + }, + }, + { + name: "syslog header simple", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "<13>Jan 18 11:07:53 192.168.1.1 LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=192.0.2.0\tdst=172.50.123.1", nil + }, + }, + expected: map[string]any{ + "version": "1.0", + "vendor": "Microsoft", + "product_name": "MSExchange", + "product_version": "4.0 SP1", + "event_id": "15345", + "attributes": map[string]any{ + "src": "192.0.2.0", + "dst": "172.50.123.1", + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + exprFunc := parseLEEF(tt.target) + result, err := exprFunc(t.Context(), nil) + require.NoError(t, err) + + resultMap, ok := result.(pcommon.Map) + require.True(t, ok, "result should be pcommon.Map") + + // Check top-level fields + assertMapValue(t, resultMap, "version", tt.expected["version"]) + assertMapValue(t, resultMap, "vendor", tt.expected["vendor"]) + assertMapValue(t, resultMap, "product_name", tt.expected["product_name"]) + assertMapValue(t, resultMap, "product_version", tt.expected["product_version"]) + assertMapValue(t, resultMap, "event_id", tt.expected["event_id"]) + + // Check attributes + expectedAttrs := tt.expected["attributes"].(map[string]any) + attrsVal, ok := resultMap.Get("attributes") + require.True(t, ok, "attributes field should exist") + attrsMap := attrsVal.Map() + assert.Equal(t, len(expectedAttrs), attrsMap.Len(), "attributes count mismatch") + + for k, v := range expectedAttrs { + attrVal, ok := attrsMap.Get(k) + assert.True(t, ok, "attribute %q should exist", k) + assert.Equal(t, v, attrVal.Str(), "attribute %q value mismatch", k) + } + }) + } +} + +func Test_parseLEEF_error(t *testing.T) { + tests := []struct { + name string + target ottl.StringGetter[any] + expectedError string + }{ + { + name: "empty input", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "", nil + }, + }, + expectedError: "cannot parse empty LEEF message", + }, + { + name: "not a LEEF message", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "CEF:0|Vendor|Product|1.0|100|Event Name|5|src=1.2.3.4", nil + }, + }, + expectedError: "'LEEF:' not found", + }, + { + name: "unsupported LEEF version", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:3.0|Vendor|Product|1.0|EventID|key=value", nil + }, + }, + expectedError: "unsupported LEEF version: 3.0", + }, + { + name: "invalid LEEF version format", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:abc|Vendor|Product|1.0|EventID|key=value", nil + }, + }, + expectedError: "unsupported LEEF version: abc", + }, + { + name: "missing pipes in header", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|OnlyVendor", nil + }, + }, + expectedError: "invalid LEEF 1.0 header: expected at least 4 fields", + }, + { + name: "LEEF 1.0 too few header fields", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product", nil + }, + }, + expectedError: "invalid LEEF 1.0 header: expected at least 4 fields", + }, + { + name: "LEEF 2.0 too few header fields", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0", nil + }, + }, + expectedError: "invalid LEEF 2.0 header: expected at least 5 fields", + }, + { + name: "no pipe delimiter at all", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0", nil + }, + }, + expectedError: "missing pipe delimiter in header", + }, + { + name: "invalid hex delimiter - odd length", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|EventID|0x9|key=value", nil + }, + }, + expectedError: "invalid hex delimiter", + }, + { + name: "invalid hex delimiter - not hex", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|EventID|0xGG|key=value", nil + }, + }, + expectedError: "invalid hex delimiter", + }, + { + name: "invalid hex delimiter - too many bytes", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|EventID|0x0909|key=value", nil + }, + }, + expectedError: "hex delimiter must decode to a single byte", + }, + { + name: "invalid hex delimiter - empty hex", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:2.0|Vendor|Product|1.0|EventID|0x|key=value", nil + }, + }, + expectedError: "empty hex value", + }, + { + name: "plain text not LEEF", + target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "This is just plain text log message", nil + }, + }, + expectedError: "'LEEF:' not found", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + exprFunc := parseLEEF(tt.target) + _, err := exprFunc(t.Context(), nil) + require.Error(t, err) + assert.Contains(t, err.Error(), tt.expectedError) + }) + } +} + +func Test_parseLEEF_target_error(t *testing.T) { + target := ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return nil, assert.AnError + }, + } + exprFunc := parseLEEF(target) + _, err := exprFunc(t.Context(), nil) + require.Error(t, err) +} + +func Test_createParseLEEFFunction(t *testing.T) { + factory := NewParseLEEFFactory[any]() + assert.Equal(t, "ParseLEEF", factory.Name()) + + args := &ParseLEEFArguments[any]{ + Target: ottl.StandardStringGetter[any]{ + Getter: func(context.Context, any) (any, error) { + return "LEEF:1.0|Vendor|Product|1.0|Event|key=value", nil + }, + }, + } + + exprFunc, err := factory.CreateFunction(ottl.FunctionContext{}, args) + require.NoError(t, err) + + result, err := exprFunc(t.Context(), nil) + require.NoError(t, err) + assert.NotNil(t, result) +} + +func Test_createParseLEEFFunction_wrongArgs(t *testing.T) { + factory := NewParseLEEFFactory[any]() + + _, err := factory.CreateFunction(ottl.FunctionContext{}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "ParseLEEFFactory args must be of type *ParseLEEFArguments[K]") +} + +func Test_parseDelimiter(t *testing.T) { + tests := []struct { + name string + input string + expected string + hasError bool + }{ + { + name: "empty defaults to tab", + input: "", + expected: "\t", + }, + { + name: "single character", + input: "^", + expected: "^", + }, + { + name: "pipe character", + input: "|", + expected: "|", + }, + { + name: "hex tab", + input: "0x09", + expected: "\t", + }, + { + name: "hex caret lowercase", + input: "0x5e", + expected: "^", + }, + { + name: "hex caret uppercase", + input: "0x5E", + expected: "^", + }, + { + name: "hex with uppercase prefix", + input: "0X5e", + expected: "^", + }, + { + name: "hex space", + input: "0x20", + expected: " ", + }, + { + name: "multi-character delimiter", + input: "||", + expected: "||", + }, + { + name: "invalid hex - odd length", + input: "0x9", + hasError: true, + }, + { + name: "invalid hex - not hex chars", + input: "0xZZ", + hasError: true, + }, + { + name: "invalid hex - too many bytes", + input: "0x0909", + hasError: true, + }, + { + name: "invalid hex - empty", + input: "0x", + hasError: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseDelimiter(tt.input) + if tt.hasError { + require.Error(t, err) + } else { + require.NoError(t, err) + assert.Equal(t, tt.expected, result) + } + }) + } +} + +func Test_parseLEEFAttributes(t *testing.T) { + tests := []struct { + name string + input string + delimiter string + expected map[string]any + }{ + { + name: "simple tab delimited", + input: "key1=val1\tkey2=val2", + delimiter: "\t", + expected: map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + { + name: "caret delimited", + input: "key1=val1^key2=val2^key3=val3", + delimiter: "^", + expected: map[string]any{ + "key1": "val1", + "key2": "val2", + "key3": "val3", + }, + }, + { + name: "empty attributes", + input: "", + delimiter: "\t", + expected: map[string]any{}, + }, + { + name: "value with equals", + input: "url=http://example.com?a=b", + delimiter: "\t", + expected: map[string]any{ + "url": "http://example.com?a=b", + }, + }, + { + name: "key without value skipped", + input: "key1=val1\tkeyonly\tkey2=val2", + delimiter: "\t", + expected: map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + { + name: "empty value", + input: "key1=\tkey2=val2", + delimiter: "\t", + expected: map[string]any{ + "key1": "", + "key2": "val2", + }, + }, + { + name: "whitespace handling", + input: " key1=val1 \t key2=val2 ", + delimiter: "\t", + expected: map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + { + name: "duplicate delimiters", + input: "key1=val1^^key2=val2", + delimiter: "^", + expected: map[string]any{ + "key1": "val1", + "key2": "val2", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := parseLEEFAttributes(tt.input, tt.delimiter) + require.NoError(t, err) + assert.Equal(t, tt.expected, result) + }) + } +} + +func assertMapValue(t *testing.T, m pcommon.Map, key string, expected any) { + t.Helper() + val, ok := m.Get(key) + require.True(t, ok, "key %q should exist", key) + assert.Equal(t, expected, val.Str(), "value for key %q mismatch", key) +} diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go index ad2c849d92e6b..5d16d81829ad9 100644 --- a/pkg/ottl/ottlfuncs/functions.go +++ b/pkg/ottl/ottlfuncs/functions.go @@ -87,6 +87,7 @@ func converters[K any]() []ottl.Factory[K] { NewParseCSVFactory[K](), NewParseJSONFactory[K](), NewParseKeyValueFactory[K](), + NewParseLEEFFactory[K](), NewParseSimplifiedXMLFactory[K](), NewParseXMLFactory[K](), NewRemoveXMLFactory[K](), From 2ecb0eea7b689122b5752b2aa8933f4904021b8d Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Thu, 11 Dec 2025 16:26:39 -0500 Subject: [PATCH 07/16] lint fixes --- .chloggen/feat_ottl-leef-parser.yaml | 2 +- pkg/ottl/ottlfuncs/func_parse_leef.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.chloggen/feat_ottl-leef-parser.yaml b/.chloggen/feat_ottl-leef-parser.yaml index c1f8547cd0938..0e0ec7444721b 100644 --- a/.chloggen/feat_ottl-leef-parser.yaml +++ b/.chloggen/feat_ottl-leef-parser.yaml @@ -4,7 +4,7 @@ change_type: enhancement # The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) -component: pkg/ottl/ottlfuncs +component: pkg/ottl # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). note: Add `ParseLEEF` function to parse Log Event Extended Format (LEEF) messages. diff --git a/pkg/ottl/ottlfuncs/func_parse_leef.go b/pkg/ottl/ottlfuncs/func_parse_leef.go index 7d6acbce20921..1813c510c0b76 100644 --- a/pkg/ottl/ottlfuncs/func_parse_leef.go +++ b/pkg/ottl/ottlfuncs/func_parse_leef.go @@ -190,7 +190,7 @@ func parseDelimiter(spec string) (string, error) { // Hex-encoded delimiter (e.g., "0x09" for tab, "0x5e" for caret) if strings.HasPrefix(spec, "0x") || strings.HasPrefix(spec, "0X") { hexStr := spec[2:] - if len(hexStr) == 0 { + if hexStr == "" { return "", errors.New("empty hex value") } decoded, err := hex.DecodeString(hexStr) From 78ec4dabe915fc8240ed70fe28b1840aaf7a0323 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Thu, 11 Dec 2025 17:18:32 -0500 Subject: [PATCH 08/16] more lint fixes --- pkg/ottl/ottlfuncs/func_parse_leef.go | 13 ++++--------- pkg/ottl/ottlfuncs/func_parse_leef_test.go | 3 +-- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/pkg/ottl/ottlfuncs/func_parse_leef.go b/pkg/ottl/ottlfuncs/func_parse_leef.go index 1813c510c0b76..d35b88fe77c39 100644 --- a/pkg/ottl/ottlfuncs/func_parse_leef.go +++ b/pkg/ottl/ottlfuncs/func_parse_leef.go @@ -94,12 +94,7 @@ func parseLEEFMessage(message string) (pcommon.Map, error) { // Parse attributes if present var parsedAttrs map[string]any if attributes != "" { - parsedAttrs, err = parseLEEFAttributes(attributes, header.delimiter) - if err != nil { - return pcommon.Map{}, err - } - } else { - parsedAttrs = make(map[string]any) + parsedAttrs = parseLEEFAttributes(attributes, header.delimiter) } return buildLEEFResult(header, parsedAttrs) @@ -212,9 +207,9 @@ func parseDelimiter(spec string) (string, error) { return spec, nil } -func parseLEEFAttributes(attributes string, delimiter string) (map[string]any, error) { +func parseLEEFAttributes(attributes, delimiter string) map[string]any { if attributes == "" { - return make(map[string]any), nil + return make(map[string]any) } result := make(map[string]any) @@ -245,7 +240,7 @@ func parseLEEFAttributes(attributes string, delimiter string) (map[string]any, e result[key] = value } - return result, nil + return result } func buildLEEFResult(header leefHeader, attributes map[string]any) (pcommon.Map, error) { diff --git a/pkg/ottl/ottlfuncs/func_parse_leef_test.go b/pkg/ottl/ottlfuncs/func_parse_leef_test.go index 30af993214289..6e3607d236f23 100644 --- a/pkg/ottl/ottlfuncs/func_parse_leef_test.go +++ b/pkg/ottl/ottlfuncs/func_parse_leef_test.go @@ -902,8 +902,7 @@ func Test_parseLEEFAttributes(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - result, err := parseLEEFAttributes(tt.input, tt.delimiter) - require.NoError(t, err) + result := parseLEEFAttributes(tt.input, tt.delimiter) assert.Equal(t, tt.expected, result) }) } From c0f790930b4aaf27a0f582a67f8b229ba24d2e51 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Fri, 2 Jan 2026 12:04:55 -0500 Subject: [PATCH 09/16] lint fix --- pkg/ottl/ottlfuncs/func_parse_leef.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pkg/ottl/ottlfuncs/func_parse_leef.go b/pkg/ottl/ottlfuncs/func_parse_leef.go index d35b88fe77c39..694805777cbea 100644 --- a/pkg/ottl/ottlfuncs/func_parse_leef.go +++ b/pkg/ottl/ottlfuncs/func_parse_leef.go @@ -215,9 +215,7 @@ func parseLEEFAttributes(attributes, delimiter string) map[string]any { result := make(map[string]any) // Split by delimiter to get key=value pairs - pairs := strings.Split(attributes, delimiter) - - for _, pair := range pairs { + for pair := range strings.SplitSeq(attributes, delimiter) { pair = strings.TrimSpace(pair) if pair == "" { continue From 21d62697329231829e75a3496a453cfde6d3b7f6 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Mon, 11 May 2026 10:20:42 -0400 Subject: [PATCH 10/16] move implementation from OTTL to transform processor --- ...r.yaml => feat_transform-leef-parser.yaml} | 4 +- pkg/ottl/ottlfuncs/README.md | 16 -- pkg/ottl/ottlfuncs/functions.go | 1 - processor/transformprocessor/README.md | 28 +++ .../internal/logs}/func_parse_leef.go | 40 ++-- .../internal/logs}/func_parse_leef_test.go | 173 +++++++++--------- .../internal/logs/functions.go | 13 +- .../internal/logs/functions_test.go | 1 + 8 files changed, 141 insertions(+), 135 deletions(-) rename .chloggen/{feat_ottl-leef-parser.yaml => feat_transform-leef-parser.yaml} (91%) rename {pkg/ottl/ottlfuncs => processor/transformprocessor/internal/logs}/func_parse_leef.go (75%) rename {pkg/ottl/ottlfuncs => processor/transformprocessor/internal/logs}/func_parse_leef_test.go (79%) diff --git a/.chloggen/feat_ottl-leef-parser.yaml b/.chloggen/feat_transform-leef-parser.yaml similarity index 91% rename from .chloggen/feat_ottl-leef-parser.yaml rename to .chloggen/feat_transform-leef-parser.yaml index 0e0ec7444721b..b6bfdabef0ef9 100644 --- a/.chloggen/feat_ottl-leef-parser.yaml +++ b/.chloggen/feat_transform-leef-parser.yaml @@ -4,10 +4,10 @@ change_type: enhancement # The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) -component: pkg/ottl +component: processor/transform # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Add `ParseLEEF` function to parse Log Event Extended Format (LEEF) messages. +note: Add `parse_leef` function to parse Log Event Extended Format (LEEF) messages. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. issues: [44908] diff --git a/pkg/ottl/ottlfuncs/README.md b/pkg/ottl/ottlfuncs/README.md index 6f2fefabcad67..9b8b19faa8e43 100644 --- a/pkg/ottl/ottlfuncs/README.md +++ b/pkg/ottl/ottlfuncs/README.md @@ -528,7 +528,6 @@ Available Converters: - [ParseInt](#parseint) - [ParseJSON](#parsejson) - [ParseKeyValue](#parsekeyvalue) -- [ParseLEEF](#parseleef) - [ParseSeverity](#parseseverity) - [ParseSimplifiedXML](#parsesimplifiedxml) - [ParseXML](#parsexml) @@ -1778,21 +1777,6 @@ Examples: - `ParseKeyValue("k1!v1_k2!v2_k3!v3", "!", "_")` - `ParseKeyValue(log.attributes["pairs"])` -### ParseLEEF - -`ParseLEEF(target)` - -The `ParseLEEF` Converter returns a `pcommon.Map` that is a result of parsing the target string as Log Event Extended Format (LEEF). - -`target` is a Getter that returns a string. If the returned string is empty, nil, or cannot be parsed as LEEF, an error will be returned. - -`ParseLEEF` can parse both LEEF 1.0 and LEEF 2.0. - -Examples: - -- `ParseLEEF("<30>Aug 19 12:33:31 ibm.guardium.test guard_sender[4486]: LEEF:1.0|IBM|Guardium|8.0|Login failures|ruleID=20026|ruleDesc=Login failures|severity=INFO|devTime=2013-8-19 6:34:41|serverType=DB2|classification=|category=|dbProtocolVersion=3.0|usrName=|sourceProgram=DB2JCC_APPLICATION|start=1376908481000|dbUser=user|dst=10.30.2.124|dstPort=50000|src=10.30.5.152|srcPort=38754|protocol=TCP|type=LOGIN_FAILED|violationID=15|sql=|error=08001-XXXX:30082-01")` -- `ParseLEEF("<25>Jun 11 13:47:19 ibm.guardium.test guard_sender[3432]: LEEF:1.0|IBM|Guardium|8.0|Unauthorized Users on Cardholder Objects - Alert|ruleID=159|ruleDesc=Unauthorized Users on Cardholder Objects - Alert|severity=MED|devTime=2013-6-11 12:46:21|serverType=MS SQL SERVER|classification=Violation|category=PCI|dbProtocolVersion=8.0|usrName=|sourceProgram=ABCDEF.EXE|start=1370965581000|dbUser=SYSTEM|dst=172.16.107.92|dstPort=1433|src=172.16.107.92|srcPort=60621|protocol=TCP|type=SQL_LANG|violationID=0|sql=SELECT * FROM EPOAgentHandlerAssignment INNER JOIN EPOAgentHandlerAssignmentPriority ON (EPOAgentHandlerAssignment.AutoID = EPOAgentHandlerAssignmentPriority.AssignmentID) ORDER BY EPOAgentHandlerAssignmentPriority.Priority ASC|error=TDS_MS-")` - ### ParseSeverity `ParseSeverity(target, severityMapping)` diff --git a/pkg/ottl/ottlfuncs/functions.go b/pkg/ottl/ottlfuncs/functions.go index 5d16d81829ad9..ad2c849d92e6b 100644 --- a/pkg/ottl/ottlfuncs/functions.go +++ b/pkg/ottl/ottlfuncs/functions.go @@ -87,7 +87,6 @@ func converters[K any]() []ottl.Factory[K] { NewParseCSVFactory[K](), NewParseJSONFactory[K](), NewParseKeyValueFactory[K](), - NewParseLEEFFactory[K](), NewParseSimplifiedXMLFactory[K](), NewParseXMLFactory[K](), NewRemoveXMLFactory[K](), diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 03877206a1316..f3998774b1ed6 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -273,6 +273,10 @@ In addition to the common OTTL functions, the processor defines its own function - [aggregate_on_attribute_value](#aggregate_on_attribute_value) - [merge_histogram_buckets](#merge_histogram_buckets) +**Logs only functions** + +- [parse_leef](#parse_leef) + **Traces only functions** - [set_semconv_span_name](#set_semconv_span_name) @@ -691,6 +695,30 @@ Examples: # counts: [5, 11, 1] ``` +### parse_leef + +`parse_leef(target)` + +The `parse_leef` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Log Event Extended Format (LEEF)](https://www.ibm.com/docs/en/dsm?topic=overview-leef-event-components) message. + +`target` is a Getter that returns a string. If the returned string is empty, or cannot be parsed as LEEF, an error will be returned. + +`parse_leef` can parse both LEEF 1.0 and LEEF 2.0 messages. The function is tolerant of an optional syslog header preceding the `LEEF:` token. The returned map has the following top-level fields: + +* `version` — the LEEF version (`"1.0"` or `"2.0"`). +* `vendor`, `product_name`, `product_version`, `event_id` — the LEEF header fields. +* `attributes` — a map of the parsed key/value attribute pairs. + +For LEEF 1.0 the attribute delimiter is always a tab. For LEEF 2.0 the delimiter is taken from the header and may be specified as a single character or as a hex value (e.g. `0x09`). + +Examples: + +- `parse_leef(body)` + +- `parse_leef("LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=10.50.1.1\tdst=2.10.20.20\tsev=5")` + +- `parse_leef("LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5")` + ### set_semconv_span_name `set_semconv_span_name(semconvVersion, Optional[originalSpanNameAttribute])` diff --git a/pkg/ottl/ottlfuncs/func_parse_leef.go b/processor/transformprocessor/internal/logs/func_parse_leef.go similarity index 75% rename from pkg/ottl/ottlfuncs/func_parse_leef.go rename to processor/transformprocessor/internal/logs/func_parse_leef.go index 694805777cbea..b934971d93b14 100644 --- a/pkg/ottl/ottlfuncs/func_parse_leef.go +++ b/processor/transformprocessor/internal/logs/func_parse_leef.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package ottlfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" +package logs // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logs" import ( "context" @@ -13,27 +13,28 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" ) -type ParseLEEFArguments[K any] struct { - Target ottl.StringGetter[K] +type parseLEEFArguments struct { + Target ottl.StringGetter[*ottllog.TransformContext] } -func NewParseLEEFFactory[K any]() ottl.Factory[K] { - return ottl.NewFactory("ParseLEEF", &ParseLEEFArguments[K]{}, createParseLEEFFunction[K]) +func newParseLEEFFactory() ottl.Factory[*ottllog.TransformContext] { + return ottl.NewFactory("parse_leef", &parseLEEFArguments{}, createParseLEEFFunction) } -func createParseLEEFFunction[K any](_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[K], error) { - args, ok := oArgs.(*ParseLEEFArguments[K]) +func createParseLEEFFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[*ottllog.TransformContext], error) { + args, ok := oArgs.(*parseLEEFArguments) if !ok { - return nil, errors.New("ParseLEEFFactory args must be of type *ParseLEEFArguments[K]") + return nil, errors.New("parseLEEFFactory args must be of type *parseLEEFArguments") } return parseLEEF(args.Target), nil } -func parseLEEF[K any](target ottl.StringGetter[K]) ottl.ExprFunc[K] { - return func(ctx context.Context, tCtx K) (any, error) { +func parseLEEF(target ottl.StringGetter[*ottllog.TransformContext]) ottl.ExprFunc[*ottllog.TransformContext] { + return func(ctx context.Context, tCtx *ottllog.TransformContext) (any, error) { source, err := target.Get(ctx, tCtx) if err != nil { return nil, err @@ -48,17 +49,13 @@ func parseLEEF[K any](target ottl.StringGetter[K]) ottl.ExprFunc[K] { } func parseLEEFMessage(message string) (pcommon.Map, error) { - // Handle optional syslog header by finding "LEEF:" in the message - // The syslog header (if present) precedes the LEEF header and is separated by a space leefStart := strings.Index(message, "LEEF:") if leefStart == -1 { return pcommon.Map{}, errors.New("invalid LEEF message: 'LEEF:' not found") } - // Extract just the LEEF portion (skip syslog header if present) leefMessage := message[leefStart:] - // Find the first pipe to get the version field firstPipe := strings.Index(leefMessage, "|") if firstPipe == -1 { return pcommon.Map{}, errors.New("invalid LEEF message: missing pipe delimiter in header") @@ -70,7 +67,6 @@ func parseLEEFMessage(message string) (pcommon.Map, error) { return pcommon.Map{}, err } - // Parse the rest based on version remainder := leefMessage[firstPipe+1:] var header leefHeader @@ -91,7 +87,6 @@ func parseLEEFMessage(message string) (pcommon.Map, error) { header.version = version - // Parse attributes if present var parsedAttrs map[string]any if attributes != "" { parsedAttrs = parseLEEFAttributes(attributes, header.delimiter) @@ -123,8 +118,6 @@ func parseLEEFVersion(field string) (string, error) { } func parseLEEF1Header(remainder string) (leefHeader, string, error) { - // LEEF 1.0: Vendor|Product|Version|EventID|attributes - // Attributes are tab-delimited parts := strings.SplitN(remainder, "|", 5) if len(parts) < 4 { return leefHeader{}, "", fmt.Errorf("invalid LEEF 1.0 header: expected at least 4 fields (vendor, product, version, eventID), got %d", len(parts)) @@ -135,7 +128,7 @@ func parseLEEF1Header(remainder string) (leefHeader, string, error) { productName: parts[1], productVersion: parts[2], eventID: parts[3], - delimiter: "\t", // LEEF 1.0 uses tab as default delimiter + delimiter: "\t", } var attributes string @@ -147,8 +140,6 @@ func parseLEEF1Header(remainder string) (leefHeader, string, error) { } func parseLEEF2Header(remainder string) (leefHeader, string, error) { - // LEEF 2.0: Vendor|Product|Version|EventID|Delimiter|attributes - // or: Vendor|Product|Version|EventID||attributes (empty delimiter means tab) parts := strings.SplitN(remainder, "|", 6) if len(parts) < 5 { return leefHeader{}, "", fmt.Errorf("invalid LEEF 2.0 header: expected at least 5 fields (vendor, product, version, eventID, delimiter), got %d", len(parts)) @@ -177,12 +168,10 @@ func parseLEEF2Header(remainder string) (leefHeader, string, error) { } func parseDelimiter(spec string) (string, error) { - // Empty delimiter defaults to tab if spec == "" { return "\t", nil } - // Hex-encoded delimiter (e.g., "0x09" for tab, "0x5e" for caret) if strings.HasPrefix(spec, "0x") || strings.HasPrefix(spec, "0X") { hexStr := spec[2:] if hexStr == "" { @@ -198,12 +187,10 @@ func parseDelimiter(spec string) (string, error) { return string(decoded), nil } - // Single character delimiter if len(spec) == 1 { return spec, nil } - // For backwards compatibility, allow multi-character delimiters return spec, nil } @@ -214,17 +201,14 @@ func parseLEEFAttributes(attributes, delimiter string) map[string]any { result := make(map[string]any) - // Split by delimiter to get key=value pairs for pair := range strings.SplitSeq(attributes, delimiter) { pair = strings.TrimSpace(pair) if pair == "" { continue } - // Split on first '=' to get key and value eqIndex := strings.Index(pair, "=") if eqIndex == -1 { - // Key without value - skip or treat as empty value continue } diff --git a/pkg/ottl/ottlfuncs/func_parse_leef_test.go b/processor/transformprocessor/internal/logs/func_parse_leef_test.go similarity index 79% rename from pkg/ottl/ottlfuncs/func_parse_leef_test.go rename to processor/transformprocessor/internal/logs/func_parse_leef_test.go index 6e3607d236f23..35a13b545ea0d 100644 --- a/pkg/ottl/ottlfuncs/func_parse_leef_test.go +++ b/processor/transformprocessor/internal/logs/func_parse_leef_test.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package ottlfuncs +package logs import ( "context" @@ -12,18 +12,19 @@ import ( "go.opentelemetry.io/collector/pdata/pcommon" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" ) func Test_parseLEEF(t *testing.T) { tests := []struct { name string - target ottl.StringGetter[any] + target ottl.StringGetter[*ottllog.TransformContext] expected map[string]any }{ { name: "LEEF 1.0 simple", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=10.50.1.1\tdst=2.10.20.20\tsev=5", nil }, }, @@ -42,8 +43,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 1.0 with many attributes", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|QRadar|QRM|1.0|NEW_PORT_DISCOVERED|src=7.5.6.6\tdst=172.50.123.1\tsev=5\tcat=anomaly\tsrcPort=3881\tdstPort=21\tusrName=joe.black", nil }, }, @@ -66,8 +67,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 1.0 header only no attributes", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|EventID|", nil }, }, @@ -82,8 +83,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 1.0 no trailing pipe", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|EventID", nil }, }, @@ -98,8 +99,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 2.0 with caret delimiter", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5", nil }, }, @@ -118,8 +119,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 2.0 with hex tab delimiter", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|100|0x09|key1=val1\tkey2=val2", nil }, }, @@ -137,8 +138,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 2.0 with hex caret delimiter", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|100|0x5e|key1=val1^key2=val2", nil }, }, @@ -156,8 +157,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 2.0 with empty delimiter defaults to tab", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|100||key1=val1\tkey2=val2", nil }, }, @@ -175,8 +176,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 2.0 header only", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|EventID|^|", nil }, }, @@ -191,8 +192,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 2.0 no trailing pipe after delimiter", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|EventID|^", nil }, }, @@ -207,8 +208,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "attribute value with spaces", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|Event|msg=This is a message with spaces\tsrc=1.2.3.4", nil }, }, @@ -226,8 +227,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "attribute value with equals sign", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|Event|url=http://example.com?foo=bar\tsrc=1.2.3.4", nil }, }, @@ -245,8 +246,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "attribute with empty value", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|Event|key1=\tkey2=value2", nil }, }, @@ -264,8 +265,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "LEEF 2.0 uppercase hex", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|100|0X5E|key1=val1^key2=val2", nil }, }, @@ -283,8 +284,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "header fields with special characters", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor-Name_123|Product.Name|1.0-beta|Event_ID_123|key=value", nil }, }, @@ -301,8 +302,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "real world QRadar example", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|IBM|QRadar|7.3.2|Authentication|^|src=192.168.1.100^dst=10.0.0.1^usrName=admin^cat=auth^sev=3^devTime=Jan 15 2024 10:30:45^devTimeFormat=MMM dd yyyy HH:mm:ss", nil }, }, @@ -325,8 +326,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "network security event", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Cisco|ASA|9.8|FirewallDeny|src=10.1.1.1\tdst=192.168.1.1\tsrcPort=12345\tdstPort=443\tproto=TCP\tsev=7", nil }, }, @@ -348,8 +349,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "duplicate delimiter in attributes section", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|Event|^|key1=val1^^key2=val2", nil }, }, @@ -367,8 +368,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "trailing delimiter in attributes", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|Event|key1=val1\tkey2=val2\t", nil }, }, @@ -386,8 +387,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "leading delimiter in attributes", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|Event|\tkey1=val1\tkey2=val2", nil }, }, @@ -405,8 +406,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "IBM Guardium login failure event with syslog header", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { // Full sample from https://www.ibm.com/docs/en/dsm?topic=guardium-sample-event-messages // Includes syslog header (RFC 3164 format) // Note: LEEF 1.0 uses tab delimiter for attributes per spec at @@ -447,8 +448,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "IBM Guardium unauthorized access event with syslog header", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { // Full sample from https://www.ibm.com/docs/en/dsm?topic=guardium-sample-event-messages // Includes syslog header (RFC 3164 format) // Note: LEEF 1.0 uses tab delimiter for attributes per spec at @@ -489,8 +490,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "syslog header RFC 5424 format", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { // RFC 5424 syslog format with structured data return "<113>1 2019-01-18T11:07:53.520+07:00 hostname LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5", nil }, @@ -510,8 +511,8 @@ func Test_parseLEEF(t *testing.T) { }, { name: "syslog header simple", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "<13>Jan 18 11:07:53 192.168.1.1 LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=192.0.2.0\tdst=172.50.123.1", nil }, }, @@ -564,13 +565,13 @@ func Test_parseLEEF(t *testing.T) { func Test_parseLEEF_error(t *testing.T) { tests := []struct { name string - target ottl.StringGetter[any] + target ottl.StringGetter[*ottllog.TransformContext] expectedError string }{ { name: "empty input", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "", nil }, }, @@ -578,8 +579,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "not a LEEF message", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "CEF:0|Vendor|Product|1.0|100|Event Name|5|src=1.2.3.4", nil }, }, @@ -587,8 +588,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "unsupported LEEF version", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:3.0|Vendor|Product|1.0|EventID|key=value", nil }, }, @@ -596,8 +597,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "invalid LEEF version format", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:abc|Vendor|Product|1.0|EventID|key=value", nil }, }, @@ -605,8 +606,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "missing pipes in header", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|OnlyVendor", nil }, }, @@ -614,8 +615,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "LEEF 1.0 too few header fields", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product", nil }, }, @@ -623,8 +624,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "LEEF 2.0 too few header fields", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0", nil }, }, @@ -632,8 +633,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "no pipe delimiter at all", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0", nil }, }, @@ -641,8 +642,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "invalid hex delimiter - odd length", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|EventID|0x9|key=value", nil }, }, @@ -650,8 +651,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "invalid hex delimiter - not hex", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|EventID|0xGG|key=value", nil }, }, @@ -659,8 +660,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "invalid hex delimiter - too many bytes", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|EventID|0x0909|key=value", nil }, }, @@ -668,8 +669,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "invalid hex delimiter - empty hex", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:2.0|Vendor|Product|1.0|EventID|0x|key=value", nil }, }, @@ -677,8 +678,8 @@ func Test_parseLEEF_error(t *testing.T) { }, { name: "plain text not LEEF", - target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "This is just plain text log message", nil }, }, @@ -697,8 +698,8 @@ func Test_parseLEEF_error(t *testing.T) { } func Test_parseLEEF_target_error(t *testing.T) { - target := ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + target := ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return nil, assert.AnError }, } @@ -708,12 +709,12 @@ func Test_parseLEEF_target_error(t *testing.T) { } func Test_createParseLEEFFunction(t *testing.T) { - factory := NewParseLEEFFactory[any]() - assert.Equal(t, "ParseLEEF", factory.Name()) + factory := newParseLEEFFactory() + assert.Equal(t, "parse_leef", factory.Name()) - args := &ParseLEEFArguments[any]{ - Target: ottl.StandardStringGetter[any]{ - Getter: func(context.Context, any) (any, error) { + args := &parseLEEFArguments{ + Target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { return "LEEF:1.0|Vendor|Product|1.0|Event|key=value", nil }, }, @@ -728,11 +729,11 @@ func Test_createParseLEEFFunction(t *testing.T) { } func Test_createParseLEEFFunction_wrongArgs(t *testing.T) { - factory := NewParseLEEFFactory[any]() + factory := newParseLEEFFactory() _, err := factory.CreateFunction(ottl.FunctionContext{}, nil) require.Error(t, err) - assert.Contains(t, err.Error(), "ParseLEEFFactory args must be of type *ParseLEEFArguments[K]") + assert.Contains(t, err.Error(), "parseLEEFFactory args must be of type *parseLEEFArguments") } func Test_parseDelimiter(t *testing.T) { diff --git a/processor/transformprocessor/internal/logs/functions.go b/processor/transformprocessor/internal/logs/functions.go index c536f7662985b..444716c9d3b5e 100644 --- a/processor/transformprocessor/internal/logs/functions.go +++ b/processor/transformprocessor/internal/logs/functions.go @@ -4,12 +4,21 @@ package logs // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logs" import ( + "maps" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" ) func LogFunctions() map[string]ottl.Factory[*ottllog.TransformContext] { - // No logs-only functions yet. - return ottlfuncs.StandardFuncs[*ottllog.TransformContext]() + functions := ottlfuncs.StandardFuncs[*ottllog.TransformContext]() + + logFunctions := ottl.CreateFactoryMap( + newParseLEEFFactory(), + ) + + maps.Copy(functions, logFunctions) + + return functions } diff --git a/processor/transformprocessor/internal/logs/functions_test.go b/processor/transformprocessor/internal/logs/functions_test.go index c801f6f85eff2..9300e6bc40bc8 100644 --- a/processor/transformprocessor/internal/logs/functions_test.go +++ b/processor/transformprocessor/internal/logs/functions_test.go @@ -15,6 +15,7 @@ import ( func Test_LogFunctions(t *testing.T) { expected := ottlfuncs.StandardFuncs[*ottllog.TransformContext]() + expected["parse_leef"] = newParseLEEFFactory() actual := LogFunctions() require.Len(t, actual, len(expected)) for k := range actual { From 2027d54fa046fc96a4505a34e86bb5576b09576e Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Wed, 13 May 2026 14:59:54 -0400 Subject: [PATCH 11/16] move to dedicated internal directory --- .../internal/{logs => logparsingfuncs}/func_parse_leef.go | 4 ++-- .../{logs => logparsingfuncs}/func_parse_leef_test.go | 6 +++--- processor/transformprocessor/internal/logs/functions.go | 3 ++- .../transformprocessor/internal/logs/functions_test.go | 3 ++- 4 files changed, 9 insertions(+), 7 deletions(-) rename processor/transformprocessor/internal/{logs => logparsingfuncs}/func_parse_leef.go (96%) rename processor/transformprocessor/internal/{logs => logparsingfuncs}/func_parse_leef_test.go (99%) diff --git a/processor/transformprocessor/internal/logs/func_parse_leef.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_leef.go similarity index 96% rename from processor/transformprocessor/internal/logs/func_parse_leef.go rename to processor/transformprocessor/internal/logparsingfuncs/func_parse_leef.go index b934971d93b14..49684d5702316 100644 --- a/processor/transformprocessor/internal/logs/func_parse_leef.go +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_leef.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package logs // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logs" +package logparsingfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logparsingfuncs" import ( "context" @@ -20,7 +20,7 @@ type parseLEEFArguments struct { Target ottl.StringGetter[*ottllog.TransformContext] } -func newParseLEEFFactory() ottl.Factory[*ottllog.TransformContext] { +func NewParseLEEFFactory() ottl.Factory[*ottllog.TransformContext] { return ottl.NewFactory("parse_leef", &parseLEEFArguments{}, createParseLEEFFunction) } diff --git a/processor/transformprocessor/internal/logs/func_parse_leef_test.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_leef_test.go similarity index 99% rename from processor/transformprocessor/internal/logs/func_parse_leef_test.go rename to processor/transformprocessor/internal/logparsingfuncs/func_parse_leef_test.go index 35a13b545ea0d..7f81d6359c5a1 100644 --- a/processor/transformprocessor/internal/logs/func_parse_leef_test.go +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_leef_test.go @@ -1,7 +1,7 @@ // Copyright The OpenTelemetry Authors // SPDX-License-Identifier: Apache-2.0 -package logs +package logparsingfuncs import ( "context" @@ -709,7 +709,7 @@ func Test_parseLEEF_target_error(t *testing.T) { } func Test_createParseLEEFFunction(t *testing.T) { - factory := newParseLEEFFactory() + factory := NewParseLEEFFactory() assert.Equal(t, "parse_leef", factory.Name()) args := &parseLEEFArguments{ @@ -729,7 +729,7 @@ func Test_createParseLEEFFunction(t *testing.T) { } func Test_createParseLEEFFunction_wrongArgs(t *testing.T) { - factory := newParseLEEFFactory() + factory := NewParseLEEFFactory() _, err := factory.CreateFunction(ottl.FunctionContext{}, nil) require.Error(t, err) diff --git a/processor/transformprocessor/internal/logs/functions.go b/processor/transformprocessor/internal/logs/functions.go index 444716c9d3b5e..c84b621798234 100644 --- a/processor/transformprocessor/internal/logs/functions.go +++ b/processor/transformprocessor/internal/logs/functions.go @@ -9,13 +9,14 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logparsingfuncs" ) func LogFunctions() map[string]ottl.Factory[*ottllog.TransformContext] { functions := ottlfuncs.StandardFuncs[*ottllog.TransformContext]() logFunctions := ottl.CreateFactoryMap( - newParseLEEFFactory(), + logparsingfuncs.NewParseLEEFFactory(), ) maps.Copy(functions, logFunctions) diff --git a/processor/transformprocessor/internal/logs/functions_test.go b/processor/transformprocessor/internal/logs/functions_test.go index 9300e6bc40bc8..a1adde91dce48 100644 --- a/processor/transformprocessor/internal/logs/functions_test.go +++ b/processor/transformprocessor/internal/logs/functions_test.go @@ -11,11 +11,12 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/ottlfuncs" + "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logparsingfuncs" ) func Test_LogFunctions(t *testing.T) { expected := ottlfuncs.StandardFuncs[*ottllog.TransformContext]() - expected["parse_leef"] = newParseLEEFFactory() + expected["parse_leef"] = logparsingfuncs.NewParseLEEFFactory() actual := LogFunctions() require.Len(t, actual, len(expected)) for k := range actual { From a97c80c38227bb40c0bee0573212a708d4c844b3 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Fri, 15 May 2026 10:40:29 -0400 Subject: [PATCH 12/16] CLF parsing func + tests --- .chloggen/transformprocessor-parse-clf.yaml | 30 ++ processor/transformprocessor/README.md | 366 ++++++++++-------- .../logparsingfuncs/func_parse_clf.go | 97 +++++ .../logparsingfuncs/func_parse_clf_test.go | 320 +++++++++++++++ .../internal/logs/functions.go | 1 + .../internal/logs/functions_test.go | 1 + 6 files changed, 653 insertions(+), 162 deletions(-) create mode 100644 .chloggen/transformprocessor-parse-clf.yaml create mode 100644 processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go create mode 100644 processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go diff --git a/.chloggen/transformprocessor-parse-clf.yaml b/.chloggen/transformprocessor-parse-clf.yaml new file mode 100644 index 0000000000000..83e6443f10448 --- /dev/null +++ b/.chloggen/transformprocessor-parse-clf.yaml @@ -0,0 +1,30 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog) +component: processor/transform + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add `parse_clf` OTTL function for parsing Common Log Format (CLF) HTTP access log entries. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [48349] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + `parse_clf` is available in log statements and returns a map with the parsed + `remote_host`, `rfc931`, `authuser`, `timestamp`, `request`, `method`, + `request_uri`, `protocol`, `status`, and `bytes` fields. + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index f3998774b1ed6..351090ef39489 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -1,20 +1,23 @@ + # Transform Processor -| Status | | -| ------------- |-----------| -| Stability | [development]: profiles | -| | [beta]: traces, metrics, logs | -| Distributions | [contrib], [k8s] | -| Warnings | [Unsound Transformations, Identity Conflict, Orphaned Telemetry, Other](#warnings) | -| Issues | [![Open issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aopen%20label%3Aprocessor%2Ftransform%20&label=open&color=orange&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Aprocessor%2Ftransform) [![Closed issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aclosed%20label%3Aprocessor%2Ftransform%20&label=closed&color=blue&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aclosed+is%3Aissue+label%3Aprocessor%2Ftransform) | -| Code coverage | [![codecov](https://codecov.io/github/open-telemetry/opentelemetry-collector-contrib/graph/main/badge.svg?component=processor_transform)](https://app.codecov.io/gh/open-telemetry/opentelemetry-collector-contrib/tree/main/?components%5B0%5D=processor_transform&displayType=list) | -| [Code Owners](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/CONTRIBUTING.md#becoming-a-code-owner) | [@TylerHelmuth](https://www.github.com/TylerHelmuth), [@evan-bradley](https://www.github.com/evan-bradley), [@edmocosta](https://www.github.com/edmocosta), [@bogdandrutu](https://www.github.com/bogdandrutu) \| Seeking more code owners! | -| Emeritus | [@anuraaga](https://www.github.com/anuraaga), [@kentquirk](https://www.github.com/kentquirk) | + +| Status | | +| -------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Stability | [development]: profiles | +| | [beta]: traces, metrics, logs | +| Distributions | [contrib], [k8s] | +| Warnings | [Unsound Transformations, Identity Conflict, Orphaned Telemetry, Other](#warnings) | +| Issues | [![Open issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aopen%20label%3Aprocessor%2Ftransform%20&label=open&color=orange&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Aprocessor%2Ftransform) [![Closed issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aclosed%20label%3Aprocessor%2Ftransform%20&label=closed&color=blue&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aclosed+is%3Aissue+label%3Aprocessor%2Ftransform) | +| Code coverage | [![codecov](https://codecov.io/github/open-telemetry/opentelemetry-collector-contrib/graph/main/badge.svg?component=processor_transform)](https://app.codecov.io/gh/open-telemetry/opentelemetry-collector-contrib/tree/main/?components%5B0%5D=processor_transform&displayType=list) | +| [Code Owners](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/CONTRIBUTING.md#becoming-a-code-owner) | [@TylerHelmuth](https://www.github.com/TylerHelmuth), [@evan-bradley](https://www.github.com/evan-bradley), [@edmocosta](https://www.github.com/edmocosta), [@bogdandrutu](https://www.github.com/bogdandrutu) \| Seeking more code owners! | +| Emeritus | [@anuraaga](https://www.github.com/anuraaga), [@kentquirk](https://www.github.com/kentquirk) | [development]: https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-stability.md#development [beta]: https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-stability.md#beta [contrib]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib [k8s]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-k8s + > [!NOTE] @@ -55,7 +58,7 @@ and allows you to configure a list of statements for the processor to execute. T Within each `` list, only certain OTTL Path prefixes can be used: | Signal | Path Prefix Values | -|--------------------|------------------------------------------------| +| ------------------ | ---------------------------------------------- | | trace_statements | `resource`, `scope`, `span`, and `spanevent` | | metric_statements | `resource`, `scope`, `metric`, and `datapoint` | | log_statements | `resource`, `scope`, and `log` | @@ -67,11 +70,11 @@ This means, for example, that you cannot use the Path `span.attributes` within t If the top-level `error_mode` is not specified, `propagate` will be used. The top-level `error_mode` can be overridden at statement group level, offering more granular control over error handling. If the statement group `error_mode` is not specified, the top-level `error_mode` is applied. -| error_mode | description | -|------------|---------------------------------------------------------------------------------------------------------------------------------------------| -| ignore | The processor ignores errors returned by statements, logs the error, and continues on to the next statement. This is the recommended mode. | -| silent | The processor ignores errors returned by statements, does not log the error, and continues on to the next statement. | -| propagate | The processor returns the error up the pipeline. This will result in the payload being dropped from the collector. | +| error_mode | description | +| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| ignore | The processor ignores errors returned by statements, logs the error, and continues on to the next statement. This is the recommended mode. | +| silent | The processor ignores errors returned by statements, does not log the error, and continues on to the next statement. | +| propagate | The processor returns the error up the pipeline. This will result in the payload being dropped from the collector. | ### Basic Config @@ -121,8 +124,8 @@ transform: - set(profile.original_payload_format, "json") ``` -In some situations a combination of Paths, functions, or enums is not allowed, and the solution -might require multiple [Advanced Config](#advanced-config) configuration groups. +In some situations a combination of Paths, functions, or enums is not allowed, and the solution +might require multiple [Advanced Config](#advanced-config) configuration groups. See [Context Inference](#context-inference) for more details. ### Advanced Config @@ -141,7 +144,7 @@ transform: _statements: - context: string error_mode: propagate - conditions: + conditions: - string - string statements: @@ -182,7 +185,7 @@ transform: ``` The Transform Processor will enforce that all the Paths, functions, and enums used in a group's `statements` are parsable. -In some situations a combination of Paths, functions, or enums is not allowed, and it might require multiple configuration groups. +In some situations a combination of Paths, functions, or enums is not allowed, and it might require multiple configuration groups. See [Context Inference](#context-inference) for more details. ### Context inference @@ -239,7 +242,7 @@ The solution is to separate the statements into separate [Advanced Config](#adva ```yaml metric_statements: - statements: - - convert_sum_to_gauge() where metric.name == "system.processes.count" + - convert_sum_to_gauge() where metric.name == "system.processes.count" - statements: - limit(datapoint.attributes, 100, ["host.name"]) ``` @@ -251,7 +254,9 @@ You can learn more in-depth details on the capabilities and limitations of the O ## Supported functions: These common functions can be used for any Signal. + + - [OTTL Functions](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl/ottlfuncs) In addition to the common OTTL functions, the processor defines its own functions to help with transformations specific to this processor: @@ -275,6 +280,8 @@ In addition to the common OTTL functions, the processor defines its own function **Logs only functions** +- [parse_clf](#parse_clf) + - [parse_leef](#parse_leef) **Traces only functions** @@ -299,7 +306,7 @@ Examples: Converts incoming metrics of type "Gauge" to type "Sum", retaining the metric's datapoints and setting its aggregation temporality and monotonicity accordingly. Noop for metrics that are not of type "Gauge". -`aggregation_temporality` is a string (`"cumulative"` or `"delta"`) that specifies the resultant metric's aggregation temporality. `is_monotonic` is a boolean that specifies the resultant metric's monotonicity. +`aggregation_temporality` is a string (`"cumulative"` or `"delta"`) that specifies the resultant metric's aggregation temporality. `is_monotonic` is a boolean that specifies the resultant metric's monotonicity. **NOTE:** This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use at your own risk. @@ -307,12 +314,11 @@ Examples: - `convert_gauge_to_sum("cumulative", false)` - - `convert_gauge_to_sum("delta", true)` ### extract_count_metric -> [!NOTE] +> [!NOTE] > This function supports Histograms, ExponentialHistograms and Summaries. `extract_count_metric(is_monotonic, Optional[suffix])` @@ -327,7 +333,7 @@ The name for the new metric will be ``. The fields The new metric that is created will be passed to all subsequent statements in the metrics statements list. -> [!WARNING] +> [!WARNING] > This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use only if you're confident you know what the resulting monotonicity should be. Examples: @@ -338,7 +344,7 @@ Examples: ### extract_percentile_metric -> [!NOTE] +> [!NOTE] > This function supports Histograms and ExponentialHistograms. `extract_percentile_metric(percentile, Optional[suffix])` @@ -367,7 +373,7 @@ Examples: ### extract_sum_metric -> [!NOTE] +> [!NOTE] > This function supports Histograms, ExponentialHistograms and Summaries. `extract_sum_metric(is_monotonic, Optional[suffix])` @@ -382,7 +388,7 @@ The name for the new metric will be ``. The fields The new metric that is created will be passed to all subsequent statements in the metrics statements list. -> [!WARNING] +> [!WARNING] > This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use only if you're confident you know what the resulting monotonicity should be. Examples: @@ -402,7 +408,7 @@ The `convert_summary_count_val_to_sum` function creates a new Sum metric from a `suffix` is an optional string that defines the suffix for the metric name. By default, it is set to `_count`. For backward compatibility, this default does not follow the [semantic naming conventions](https://opentelemetry.io/docs/specs/semconv/general/naming/#general-naming-considerations) and should ideally be `.count` instead. This default is expected to change in a future release. -The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. +The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. **NOTE:** This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use at your own risk. @@ -441,7 +447,7 @@ The `convert_summary_sum_val_to_sum` function creates a new Sum metric from a Su `suffix` is an optional string that defines the suffix for the metric name. By default, it is set to `_sum`. For backward compatibility, this default does not follow the [semantic naming conventions](https://opentelemetry.io/docs/specs/semconv/general/naming/#general-naming-considerations) and should ideally be `.sum` instead. This default is expected to change in a future release. -The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. +The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. **NOTE:** This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use at your own risk. @@ -459,7 +465,7 @@ The `copy_metric` function copies the current metric, adding it to the end of th `name` is an optional string. `description` is an optional string. `unit` is an optional string. -The new metric will be exactly the same as the current metric. You can use the optional parameters to set the new metric's name, description, and unit. +The new metric will be exactly the same as the current metric. You can use the optional parameters to set the new metric's name, description, and unit. **NOTE:** The new metric is appended to the end of the metric slice and therefore will be included in all the metric statements. It is a best practice to ALWAYS include a Where clause when copying a metric that WILL NOT match the new metric. @@ -467,13 +473,11 @@ Examples: - `copy_metric(name="http.request.status_code", unit="s") where metric.name == "http.status_code` - - `copy_metric(desc="new desc") where metric.description == "old desc"` - ### convert_exponential_histogram_to_histogram -__Warning:__ The approach used in this function to convert exponential histograms to explicit histograms __is not__ part of the __OpenTelemetry Specification__. +**Warning:** The approach used in this function to convert exponential histograms to explicit histograms **is not** part of the **OpenTelemetry Specification**. `convert_exponential_histogram_to_histogram(distribution, [ExplicitBounds])` @@ -483,70 +487,74 @@ This function requires 2 arguments: - `distribution` - This argument defines the distribution algorithm used to allocate the exponential histogram datapoints into a new Explicit Histogram. There are 4 options: - - __upper__ - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds provided. This approach works better for small/narrow exponential histograms where the difference between the upper bounds and lower bounds are small. + - **upper** - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds provided. This approach works better for small/narrow exponential histograms where the difference between the upper bounds and lower bounds are small. + + _For example, Given:_ + + 1. count = 10 + 2. Boundaries: [5, 10, 15, 20, 25] + 3. Upper Bound: 15 + _Process:_ + 4. Start with zeros: [0, 0, 0, 0, 0] + 5. Iterate the boundaries and compare $upper = 15$ with each boundary: + - _For example, Given:_ - 1. count = 10 - 2. Boundaries: [5, 10, 15, 20, 25] - 3. Upper Bound: 15 - _Process:_ - 4. Start with zeros: [0, 0, 0, 0, 0] - 5. Iterate the boundaries and compare $upper = 15$ with each boundary: - $15>5$ (_skip_) - $15>10$ (_skip_) - $15<=15$ (allocate count to this boundary) - 6. Allocate count: [0, 0, __10__, 0, 0] - 7. Final Counts: [0, 0, __10__, 0, 0] - - __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculating the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms. + 6. Allocate count: [0, 0, __10__, 0, 0] + 7. Final Counts: [0, 0, __10__, 0, 0] + - **midpoint** - This approach works in a similar way to the **upper** approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculating the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms. - >The __uniform__ and __random__ distribution algorithms both utilise the concept of intersecting boundaries. - Intersecting boundaries are any boundary in the `boundaries array` that falls between or on the lower and upper values of the Exponential Histogram boundaries. - _For Example:_ if you have an Exponential Histogram bucket with a lower bound of 10 and upper of 20, and your boundaries array is [5, 10, 15, 20, 25], the intersecting boundaries are 10, 15, and 20 because they lie within the range [10, 20]. + > The **uniform** and **random** distribution algorithms both utilise the concept of intersecting boundaries. + > Intersecting boundaries are any boundary in the `boundaries array` that falls between or on the lower and upper values of the Exponential Histogram boundaries. + > _For Example:_ if you have an Exponential Histogram bucket with a lower bound of 10 and upper of 20, and your boundaries array is [5, 10, 15, 20, 25], the intersecting boundaries are 10, 15, and 20 because they lie within the range [10, 20]. - - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the intersecting __ExplicitBounds__. The algorithm works as follows: + - **uniform** - This approach distributes the datapoints for each bucket uniformly across the intersecting **ExplicitBounds**. The algorithm works as follows: - - If there are valid intersecting boundaries, the function evenly distributes the count across these boundaries. - - Calculate the count to be allocated to each boundary. - - If there is a remainder after dividing the count equally, it distributes the remainder by incrementing the count for some of the boundaries until the remainder is exhausted. + - If there are valid intersecting boundaries, the function evenly distributes the count across these boundaries. + - Calculate the count to be allocated to each boundary. + - If there is a remainder after dividing the count equally, it distributes the remainder by incrementing the count for some of the boundaries until the remainder is exhausted. _For example Given:_ - 1. count = 10 - 2. Exponential Histogram Bounds: [10, 20] - 3. Boundaries: [5, 10, 15, 20, 25] - 4. Intersecting Boundaries: [10, 15, 20] - 5. Number of Intersecting Boundaries: 3 - 6. Using the formula: $count/numOfIntersections=10/3=3r1$ - - _Uniform Allocation:_ - - 7. Start with zeros: [0, 0, 0, 0, 0] - 8. Allocate 3 to each: [0, 3, 3, 3, 0] - 9. Distribute remainder $r$ 1: [0, 4, 3, 3, 0] - 10. Final Counts: [0, 4, 3, 3, 0] - - - __random__ - This approach distributes the datapoints for each bucket randomly across the intersecting __ExplicitBounds__. This approach works in a similar manner to the uniform distribution algorithm with the main difference being that points are distributed randomly instead of uniformly. This works as follows: - - If there are valid intersecting boundaries, calculate the proportion of the count that should be allocated to each boundary based on the overlap of the boundary with the provided range (lower to upper). - - For each boundary, a random fraction of the calculated proportion is allocated. - - Any remaining count (_due to rounding or random distribution_) is then distributed randomly among the intersecting boundaries. - - If the bucket range does not intersect with any boundaries, the entire count is assigned to the start boundary. - -- `ExplicitBounds` represents the list of bucket boundaries for the new histogram. This argument is __required__ and __cannot be empty__. - -__WARNINGS:__ - -- The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries and identify the best distribution approach for your data in order to minimize this loss. + + 1. count = 10 + 2. Exponential Histogram Bounds: [10, 20] + 3. Boundaries: [5, 10, 15, 20, 25] + 4. Intersecting Boundaries: [10, 15, 20] + 5. Number of Intersecting Boundaries: 3 + 6. Using the formula: $count/numOfIntersections=10/3=3r1$ + + _Uniform Allocation:_ + + 7. Start with zeros: [0, 0, 0, 0, 0] + 8. Allocate 3 to each: [0, 3, 3, 3, 0] + 9. Distribute remainder $r$ 1: [0, 4, 3, 3, 0] + 10. Final Counts: [0, 4, 3, 3, 0] + + - **random** - This approach distributes the datapoints for each bucket randomly across the intersecting **ExplicitBounds**. This approach works in a similar manner to the uniform distribution algorithm with the main difference being that points are distributed randomly instead of uniformly. This works as follows: + - If there are valid intersecting boundaries, calculate the proportion of the count that should be allocated to each boundary based on the overlap of the boundary with the provided range (lower to upper). + - For each boundary, a random fraction of the calculated proportion is allocated. + - Any remaining count (_due to rounding or random distribution_) is then distributed randomly among the intersecting boundaries. + - If the bucket range does not intersect with any boundaries, the entire count is assigned to the start boundary. + +- `ExplicitBounds` represents the list of bucket boundaries for the new histogram. This argument is **required** and **cannot be empty**. + +**WARNINGS:** + +- The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries and identify the best distribution approach for your data in order to minimize this loss. For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. -- __Negative Bucket Counts__ are not supported in Explicit Histograms, as such negative bucket counts are ignored. +- **Negative Bucket Counts** are not supported in Explicit Histograms, as such negative bucket counts are ignored. -- __ZeroCounts__ are only allocated if the ExplicitBounds array contains a zero boundary. That is, if the Explicit Boundaries that you provide does not start with `0`, the function will not allocate any zero counts from the Exponential Histogram. +- **ZeroCounts** are only allocated if the ExplicitBounds array contains a zero boundary. That is, if the Explicit Boundaries that you provide does not start with `0`, the function will not allocate any zero counts from the Exponential Histogram. This function should only be used when Exponential Histograms are not suitable for the downstream consumers or if upstream metric sources are unable to generate Explicit Histograms. -__Example__: +**Example**: - `convert_exponential_histogram_to_histogram("random", [0.0, 10.0, 100.0, 1000.0, 10000.0])` @@ -605,7 +613,7 @@ Examples: - `aggregate_on_attributes("max") where metric.name == "system.memory.usage"` - `aggregate_on_attributes("max", []) where metric.name == "system.memory.usage"` -The `aggregate_on_attributes` function can also be used in conjunction with +The `aggregate_on_attributes` function can also be used in conjunction with [keep_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#keep_matching_keys) or [delete_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#delete_matching_keys). @@ -613,8 +621,8 @@ For example, to remove attribute keys matching a regex and aggregate the metrics ```yaml statements: - - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" - - aggregate_on_attributes("sum") where metric.name == "system.memory.usage" + - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" + - aggregate_on_attributes("sum") where metric.name == "system.memory.usage" ``` To aggregate only using a specified set of attributes, you can use `keep_matching_keys`. @@ -649,7 +657,7 @@ Examples: - `aggregate_on_attribute_value("sum", "attr1", ["val1", "val2"], "new_val") where metric.name == "system.memory.usage"` -The `aggregate_on_attribute_value` function can also be used in conjunction with +The `aggregate_on_attribute_value` function can also be used in conjunction with [keep_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#keep_matching_keys) or [delete_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#delete_matching_keys). @@ -657,8 +665,8 @@ For example, to remove attribute keys matching a regex and aggregate the metrics ```yaml statements: - - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" - - aggregate_on_attribute_value("sum", "attr1", ["val1", "val2"], "new_val") where metric.name == "system.memory.usage" + - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" + - aggregate_on_attribute_value("sum", "attr1", ["val1", "val2"], "new_val") where metric.name == "system.memory.usage" ``` To aggregate only using a specified set of attributes, you can use `keep_matching_keys`. @@ -672,12 +680,13 @@ The `merge_histogram_buckets` function merges a specific bucket of a histogram w `bound` is a float64 value that specifies which bucket boundary to remove. The function will merge the bucket that ends at this boundary with the next bucket. The function: -- Preserves the total count and sum of the histogram. -- Only works on histogram metrics (no-op for other metric types). -- Uses floating-point tolerance (epsilon = 1e-12) when matching the bound. -- Makes no changes if: - - The bound is not found. - - The histogram is empty. + +- Preserves the total count and sum of the histogram. +- Only works on histogram metrics (no-op for other metric types). +- Uses floating-point tolerance (epsilon = 1e-12) when matching the bound. +- Makes no changes if: + - The bound is not found. + - The histogram is empty. - The histogram structure is invalid (mismatched bounds and counts). Examples: @@ -685,7 +694,6 @@ Examples: ```yaml # Merge the bucket ending at 0.5 with the next bucket - merge_histogram_buckets(0.5) where metric.name == "http_request_duration" - # Given a histogram with: # bounds: [0.1, 0.5, 1.0] # counts: [5, 8, 3, 1] @@ -695,6 +703,37 @@ Examples: # counts: [5, 11, 1] ``` +### parse_clf + +`parse_clf(target)` + +The `parse_clf` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Common Log Format (CLF)](https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format) HTTP access log entry. + +`target` is a Getter that returns a string. If the returned string is empty, or cannot be parsed as CLF, an error will be returned. + +The CLF entry is expected to have the form: + +``` +remotehost rfc931 authuser [date] "request" status bytes +``` + +The returned map has the following fields: + +- `remote_host` — the client's DNS name or IP address. +- `rfc931` — the remote logname of the user (CLF uses `-` when unknown). +- `authuser` — the authenticated user (CLF uses `-` when unknown). +- `timestamp` — the contents of the bracketed date field, preserved as a string. +- `request` — the raw request line as sent by the client. +- `method`, `request_uri`, `protocol` — the parsed components of the request line, only set when the request line is well-formed. +- `status` — the HTTP status code as an integer. +- `bytes` — the content-length of the response as an integer. Omitted when CLF reports `-` (e.g. on a 304 response). + +Examples: + +- `parse_clf(body)` + +- `parse_clf("127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326")` + ### parse_leef `parse_leef(target)` @@ -705,9 +744,9 @@ The `parse_leef` function returns a `pcommon.Map` that is the result of parsing `parse_leef` can parse both LEEF 1.0 and LEEF 2.0 messages. The function is tolerant of an optional syslog header preceding the `LEEF:` token. The returned map has the following top-level fields: -* `version` — the LEEF version (`"1.0"` or `"2.0"`). -* `vendor`, `product_name`, `product_version`, `event_id` — the LEEF header fields. -* `attributes` — a map of the parsed key/value attribute pairs. +- `version` — the LEEF version (`"1.0"` or `"2.0"`). +- `vendor`, `product_name`, `product_version`, `event_id` — the LEEF header fields. +- `attributes` — a map of the parsed key/value attribute pairs. For LEEF 1.0 the attribute delimiter is always a tab. For LEEF 2.0 the delimiter is taken from the header and may be specified as a single character or as a hex value (e.g. `0x09`). @@ -729,52 +768,51 @@ The primary use case of the `set_semconv_span_name()` function is to address hig Parameters: -* `semconvVersion` is the version of the Semantic Conventions used to generate the `span.name`, older semconv attributes are supported. Versions `1.37.0` to `1.40.0` are supported. -* `originalSpanNameAttribute` is the optional name of the attribute used to copy the original `span.name` if different from the name derived from semantic conventions. +- `semconvVersion` is the version of the Semantic Conventions used to generate the `span.name`, older semconv attributes are supported. Versions `1.37.0` to `1.40.0` are supported. +- `originalSpanNameAttribute` is the optional name of the attribute used to copy the original `span.name` if different from the name derived from semantic conventions. Sanitization examples: -* Span with high-cardinality name but recommended semantic convention attributes - * Incoming span: - ``` - span.name: GET /api/v1/users/123 # /!\ high cardinality - span.kind: server - span.attributes - http.request.method: GET - http.route: /api/v1/users/{id} - url.path: /api/v1/users/123 - ``` - * Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` - * No loss of information on `span.name` occurs because the recommended attribute `http.route` is present. -* Span with high-cardinality name lacking recommended semantic convention attribute `http.route` - * Incoming span: - ``` - span.name: GET /api/v1/users/123 # /!\ high cardinality - span.kind: server - span.attributes - http.request.method: GET - url.path: /api/v1/users/123 - ``` - * Span name after applying `set_semconv_span_name("1.40.0")`: `GET` - * Loss of information on `span.name` occurs because the recommended attribute `http.route` is missing. +- Span with high-cardinality name but recommended semantic convention attributes + - Incoming span: + ``` + span.name: GET /api/v1/users/123 # /!\ high cardinality + span.kind: server + span.attributes + http.request.method: GET + http.route: /api/v1/users/{id} + url.path: /api/v1/users/123 + ``` + - Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` + - No loss of information on `span.name` occurs because the recommended attribute `http.route` is present. +- Span with high-cardinality name lacking recommended semantic convention attribute `http.route` + - Incoming span: + ``` + span.name: GET /api/v1/users/123 # /!\ high cardinality + span.kind: server + span.attributes + http.request.method: GET + url.path: /api/v1/users/123 + ``` + - Span name after applying `set_semconv_span_name("1.40.0")`: `GET` + - Loss of information on `span.name` occurs because the recommended attribute `http.route` is missing. Note that this loss of information is mitigated if the instrumentation produced attributes that contain the URL path like `url.path` or `url.full`. -* Compliant span name is unchanged - * Incoming span: - ``` - span.name: GET /api/v1/users/{id} - span.kind: server - span.attributes - http.request.method: GET - http.route: /api/v1/users/{id} - url.path: /api/v1/users/123 - ``` - * Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` - +- Compliant span name is unchanged + - Incoming span: + ``` + span.name: GET /api/v1/users/{id} + span.kind: server + span.attributes + http.request.method: GET + http.route: /api/v1/users/{id} + url.path: /api/v1/users/123 + ``` + - Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` Backward compatibility: `set_semconv_span_name` will map the following attributes to their equivalents per the v1.39.0 semantic conventions: | v1.40.0 Attribute | Older attribute | -|-----------------------|--------------------| +| --------------------- | ------------------ | | `http.request.method` | `http.method` | | `rpc.method` | `rpc.grpc.method` | | `rpc.service` | `rpc.grpc.service` | @@ -792,16 +830,19 @@ Examples: ## Examples ### Perform transformation if field does not exist + Set attribute `test` to `"pass"` if the attribute `test` does not exist: + ```yaml transform: error_mode: ignore trace_statements: - # accessing a map with a key that does not exist will return nil. + # accessing a map with a key that does not exist will return nil. - set(span.attributes["test"], "pass") where span.attributes["test"] == nil -``` +``` ### Rename attribute + There are 2 ways to rename an attribute key: You can either set a new attribute and delete the old: @@ -811,8 +852,8 @@ transform: error_mode: ignore trace_statements: - set(resource.attributes["namespace"], resource.attributes["k8s.namespace.name"]) - - delete_key(resource.attributes, "k8s.namespace.name") -``` + - delete_key(resource.attributes, "k8s.namespace.name") +``` Or you can update the key using regex: @@ -821,9 +862,10 @@ transform: error_mode: ignore trace_statements: - replace_all_patterns(resource.attributes, "key", "k8s\\.namespace\\.name", "namespace") -``` +``` ### Move field to attribute + Set attribute `body` to the value of the log body: ```yaml @@ -831,10 +873,12 @@ transform: error_mode: ignore log_statements: - set(log.attributes["body"], log.body) -``` +``` ### Combine two attributes -Set attribute `test` to the value of attributes `"foo"` and `"bar"` combined. + +Set attribute `test` to the value of attributes `"foo"` and `"bar"` combined. + ```yaml transform: error_mode: ignore @@ -866,13 +910,13 @@ transform: - statements: # Parse body as JSON and merge the resulting map with the cache map, ignoring non-json bodies. # cache is a field exposed by OTTL that is a temporary storage place for complex operations. - - merge_maps(log.cache, ParseJSON(log.body), "upsert") where IsMatch(log.body, "^\\{") - + - merge_maps(log.cache, ParseJSON(log.body), "upsert") where IsMatch(log.body, "^\\{") + # Set attributes using the values merged into cache. # If the attribute doesn't exist in cache then nothing happens. - set(log.attributes["attr1"], log.cache["attr1"]) - set(log.attributes["attr2"], log.cache["attr2"]) - + # To access nested maps you can chain index ([]) operations. # If nested or attr3 do not exist in cache then nothing happens. - set(log.attributes["nested.attr3"], log.cache["nested"]["attr3"]) @@ -943,7 +987,7 @@ view into how OTTL views the underlying data. receivers: file_log: start_at: beginning - include: [ test.log ] + include: [test.log] processors: transform: @@ -952,7 +996,6 @@ processors: - set(resource.attributes["test"], "pass") - set(scope.attributes["test"], ["pass"]) - set(log.attributes["test"], true) - exporters: debug: @@ -985,15 +1028,14 @@ service: See [CONTRIBUTING.md](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/transformprocessor/CONTRIBUTING.md). - ## Warnings -The Transform Processor uses the [OpenTelemetry Transformation Language](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/README.md) (OTTL) which allows users to modify all aspects of their telemetry. Some specific risks are listed below, but this is not an exhaustive list. In general, understand your data before using the Transform Processor. +The Transform Processor uses the [OpenTelemetry Transformation Language](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/README.md) (OTTL) which allows users to modify all aspects of their telemetry. Some specific risks are listed below, but this is not an exhaustive list. In general, understand your data before using the Transform Processor. -- [Unsound Transformations](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#unsound-transformations): Several Metric-only functions allow you to transform one metric data type to another or create new metrics from an existing metrics. Transformations between metric data types are not defined in the [metrics data model](https://github.com/open-telemetry/opentelemetry-specification/blob/main//specification/metrics/data-model.md). These functions have the expectation that you understand the incoming data and know that it can be meaningfully converted to a new metric data type or can meaningfully be used to create new metrics. - - Although the OTTL allows the `set` function to be used with `metric.data_type`, its implementation in the Transform Processor is NOOP. To modify a data type you must use a function specific to that purpose. -- [Identity Conflict](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#identity-conflict): Transformation of metrics have the potential to affect the identity of a metric leading to an Identity Crisis. Be especially cautious when transforming metric name and when reducing/changing existing attributes. Adding new attributes is safe. -- [Orphaned Telemetry](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#orphaned-telemetry): The processor allows you to modify `span_id`, `trace_id`, and `parent_span_id` for traces and `span_id`, and `trace_id` logs. Modifying these fields could lead to orphaned spans or logs. +- [Unsound Transformations](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#unsound-transformations): Several Metric-only functions allow you to transform one metric data type to another or create new metrics from an existing metrics. Transformations between metric data types are not defined in the [metrics data model](https://github.com/open-telemetry/opentelemetry-specification/blob/main//specification/metrics/data-model.md). These functions have the expectation that you understand the incoming data and know that it can be meaningfully converted to a new metric data type or can meaningfully be used to create new metrics. + - Although the OTTL allows the `set` function to be used with `metric.data_type`, its implementation in the Transform Processor is NOOP. To modify a data type you must use a function specific to that purpose. +- [Identity Conflict](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#identity-conflict): Transformation of metrics have the potential to affect the identity of a metric leading to an Identity Crisis. Be especially cautious when transforming metric name and when reducing/changing existing attributes. Adding new attributes is safe. +- [Orphaned Telemetry](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#orphaned-telemetry): The processor allows you to modify `span_id`, `trace_id`, and `parent_span_id` for traces and `span_id`, and `trace_id` logs. Modifying these fields could lead to orphaned spans or logs. ## Feature Gate @@ -1006,14 +1048,14 @@ This option is useful when applying transformations which alter the resource or The feature is currently only available for log processing. #### Example Usage - + `config.yaml`: - - ```yaml - transform: - flatten_data: true - log_statements: - - set(resource.attributes["to"], log.attributes["from"]) - ``` - - Run collector: `./otelcol --config config.yaml --feature-gates=transform.flatten.logs` + +```yaml +transform: + flatten_data: true + log_statements: + - set(resource.attributes["to"], log.attributes["from"]) +``` + +Run collector: `./otelcol --config config.yaml --feature-gates=transform.flatten.logs` diff --git a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go new file mode 100644 index 0000000000000..a87c4aa8d83f6 --- /dev/null +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go @@ -0,0 +1,97 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package logparsingfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logparsingfuncs" + +import ( + "context" + "errors" + "fmt" + "regexp" + "strconv" + "strings" + + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" +) + +type parseCLFArguments struct { + Target ottl.StringGetter[*ottllog.TransformContext] +} + +func NewParseCLFFactory() ottl.Factory[*ottllog.TransformContext] { + return ottl.NewFactory("parse_clf", &parseCLFArguments{}, createParseCLFFunction) +} + +func createParseCLFFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[*ottllog.TransformContext], error) { + args, ok := oArgs.(*parseCLFArguments) + if !ok { + return nil, errors.New("parseCLFFactory args must be of type *parseCLFArguments") + } + + return parseCLF(args.Target), nil +} + +func parseCLF(target ottl.StringGetter[*ottllog.TransformContext]) ottl.ExprFunc[*ottllog.TransformContext] { + return func(ctx context.Context, tCtx *ottllog.TransformContext) (any, error) { + source, err := target.Get(ctx, tCtx) + if err != nil { + return nil, err + } + + if source == "" { + return nil, errors.New("cannot parse empty CLF message") + } + + return parseCLFMessage(source) + } +} + +// clfRegex matches the Common Log Format: +// +// remotehost rfc931 authuser [date] "request" status bytes +// +// See https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format +var clfRegex = regexp.MustCompile(`^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\S+) (\S+)$`) + +func parseCLFMessage(message string) (pcommon.Map, error) { + matches := clfRegex.FindStringSubmatch(strings.TrimSpace(message)) + if matches == nil { + return pcommon.Map{}, errors.New("invalid CLF message: does not match expected format") + } + + result := pcommon.NewMap() + result.PutStr("remote_host", matches[1]) + result.PutStr("rfc931", matches[2]) + result.PutStr("authuser", matches[3]) + result.PutStr("timestamp", matches[4]) + + request := matches[5] + result.PutStr("request", request) + + if requestParts := strings.SplitN(request, " ", 3); len(requestParts) == 3 { + result.PutStr("method", requestParts[0]) + result.PutStr("request_uri", requestParts[1]) + result.PutStr("protocol", requestParts[2]) + } + + status := matches[6] + statusInt, err := strconv.ParseInt(status, 10, 64) + if err != nil { + return pcommon.Map{}, fmt.Errorf("invalid status code %q: %w", status, err) + } + result.PutInt("status", statusInt) + + bytesStr := matches[7] + if bytesStr != "-" { + bytesInt, err := strconv.ParseInt(bytesStr, 10, 64) + if err != nil { + return pcommon.Map{}, fmt.Errorf("invalid bytes value %q: %w", bytesStr, err) + } + result.PutInt("bytes", bytesInt) + } + + return result, nil +} diff --git a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go new file mode 100644 index 0000000000000..d62ccda8a47ac --- /dev/null +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go @@ -0,0 +1,320 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package logparsingfuncs + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/pdata/pcommon" + + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" +) + +func Test_parseCLF(t *testing.T) { + tests := []struct { + name string + input string + expected map[string]any + }{ + { + name: "canonical example from the W3C spec", + input: `127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] "GET /apache_pb.gif HTTP/1.0" 200 2326`, + expected: map[string]any{ + "remote_host": "127.0.0.1", + "rfc931": "-", + "authuser": "frank", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "GET /apache_pb.gif HTTP/1.0", + "method": "GET", + "request_uri": "/apache_pb.gif", + "protocol": "HTTP/1.0", + "status": int64(200), + "bytes": int64(2326), + }, + }, + { + name: "all dashes for unknown fields", + input: `- - - [10/Oct/2000:13:55:36 -0700] "GET / HTTP/1.1" 200 0`, + expected: map[string]any{ + "remote_host": "-", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "GET / HTTP/1.1", + "method": "GET", + "request_uri": "/", + "protocol": "HTTP/1.1", + "status": int64(200), + "bytes": int64(0), + }, + }, + { + name: "bytes is dash (no content)", + input: `192.168.1.1 - - [10/Oct/2000:13:55:36 -0700] "GET /redirect HTTP/1.1" 304 -`, + expected: map[string]any{ + "remote_host": "192.168.1.1", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "GET /redirect HTTP/1.1", + "method": "GET", + "request_uri": "/redirect", + "protocol": "HTTP/1.1", + "status": int64(304), + // bytes intentionally omitted + }, + }, + { + name: "IPv6 remote host", + input: `2001:db8::1 - - [10/Oct/2000:13:55:36 -0700] "POST /api/v1/users HTTP/1.1" 201 512`, + expected: map[string]any{ + "remote_host": "2001:db8::1", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "POST /api/v1/users HTTP/1.1", + "method": "POST", + "request_uri": "/api/v1/users", + "protocol": "HTTP/1.1", + "status": int64(201), + "bytes": int64(512), + }, + }, + { + name: "hostname remote host with rfc931 user", + input: `client.example.com bob alice [25/Dec/2023:00:00:00 +0000] "DELETE /resource HTTP/2.0" 204 0`, + expected: map[string]any{ + "remote_host": "client.example.com", + "rfc931": "bob", + "authuser": "alice", + "timestamp": "25/Dec/2023:00:00:00 +0000", + "request": "DELETE /resource HTTP/2.0", + "method": "DELETE", + "request_uri": "/resource", + "protocol": "HTTP/2.0", + "status": int64(204), + "bytes": int64(0), + }, + }, + { + name: "request_uri with query string", + input: `10.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /search?q=hello+world&page=2 HTTP/1.1" 200 1024`, + expected: map[string]any{ + "remote_host": "10.0.0.1", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "GET /search?q=hello+world&page=2 HTTP/1.1", + "method": "GET", + "request_uri": "/search?q=hello+world&page=2", + "protocol": "HTTP/1.1", + "status": int64(200), + "bytes": int64(1024), + }, + }, + { + name: "leading and trailing whitespace tolerated", + input: " 127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] \"GET / HTTP/1.1\" 200 42 ", + expected: map[string]any{ + "remote_host": "127.0.0.1", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "GET / HTTP/1.1", + "method": "GET", + "request_uri": "/", + "protocol": "HTTP/1.1", + "status": int64(200), + "bytes": int64(42), + }, + }, + { + name: "malformed request line is preserved but not split", + input: `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "MALFORMED" 400 0`, + expected: map[string]any{ + "remote_host": "127.0.0.1", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "MALFORMED", + "status": int64(400), + "bytes": int64(0), + }, + }, + { + name: "empty request line", + input: `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "" 408 0`, + expected: map[string]any{ + "remote_host": "127.0.0.1", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "", + "status": int64(408), + "bytes": int64(0), + }, + }, + { + name: "large byte counts", + input: `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET /download HTTP/1.1" 200 4294967296`, + expected: map[string]any{ + "remote_host": "127.0.0.1", + "rfc931": "-", + "authuser": "-", + "timestamp": "10/Oct/2000:13:55:36 -0700", + "request": "GET /download HTTP/1.1", + "method": "GET", + "request_uri": "/download", + "protocol": "HTTP/1.1", + "status": int64(200), + "bytes": int64(4294967296), + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + target := ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { + return tt.input, nil + }, + } + exprFunc := parseCLF(target) + result, err := exprFunc(t.Context(), nil) + require.NoError(t, err) + + resultMap, ok := result.(pcommon.Map) + require.True(t, ok, "result should be pcommon.Map") + + assertCLFMap(t, resultMap, tt.expected) + }) + } +} + +func Test_parseCLF_errors(t *testing.T) { + tests := []struct { + name string + input string + expectedError string + }{ + { + name: "plain text", + input: "this is not a CLF message", + expectedError: "does not match expected format", + }, + { + name: "missing brackets around date", + input: `127.0.0.1 - - 10/Oct/2000:13:55:36 -0700 "GET / HTTP/1.1" 200 42`, + expectedError: "does not match expected format", + }, + { + name: "missing quotes around request", + input: `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] GET / HTTP/1.1 200 42`, + expectedError: "does not match expected format", + }, + { + name: "non-numeric status", + input: `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET / HTTP/1.1" OK 42`, + expectedError: "invalid status code", + }, + { + name: "non-numeric bytes", + input: `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET / HTTP/1.1" 200 lots`, + expectedError: "invalid bytes value", + }, + { + name: "too few fields", + input: `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET / HTTP/1.1" 200`, + expectedError: "does not match expected format", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + target := ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { + return tt.input, nil + }, + } + exprFunc := parseCLF(target) + _, err := exprFunc(t.Context(), nil) + require.Error(t, err) + assert.Contains(t, err.Error(), tt.expectedError) + }) + } +} + +func Test_parseCLF_empty(t *testing.T) { + target := ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { + return "", nil + }, + } + exprFunc := parseCLF(target) + _, err := exprFunc(t.Context(), nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "cannot parse empty CLF message") +} + +func Test_parseCLF_target_error(t *testing.T) { + target := ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { + return nil, assert.AnError + }, + } + exprFunc := parseCLF(target) + _, err := exprFunc(t.Context(), nil) + require.Error(t, err) +} + +func Test_createParseCLFFunction(t *testing.T) { + factory := NewParseCLFFactory() + assert.Equal(t, "parse_clf", factory.Name()) + + args := &parseCLFArguments{ + Target: ottl.StandardStringGetter[*ottllog.TransformContext]{ + Getter: func(context.Context, *ottllog.TransformContext) (any, error) { + return `127.0.0.1 - - [10/Oct/2000:13:55:36 -0700] "GET / HTTP/1.1" 200 42`, nil + }, + }, + } + + exprFunc, err := factory.CreateFunction(ottl.FunctionContext{}, args) + require.NoError(t, err) + + result, err := exprFunc(t.Context(), nil) + require.NoError(t, err) + assert.NotNil(t, result) +} + +func Test_createParseCLFFunction_wrongArgs(t *testing.T) { + factory := NewParseCLFFactory() + + _, err := factory.CreateFunction(ottl.FunctionContext{}, nil) + require.Error(t, err) + assert.Contains(t, err.Error(), "parseCLFFactory args must be of type *parseCLFArguments") +} + +func assertCLFMap(t *testing.T, m pcommon.Map, expected map[string]any) { + t.Helper() + + assert.Equal(t, len(expected), m.Len(), "field count mismatch; got map %v", m.AsRaw()) + + for k, v := range expected { + val, ok := m.Get(k) + require.True(t, ok, "key %q should exist", k) + switch want := v.(type) { + case string: + assert.Equal(t, want, val.Str(), "value for key %q mismatch", k) + case int64: + assert.Equal(t, want, val.Int(), "value for key %q mismatch", k) + default: + t.Fatalf("unexpected expected-value type for key %q: %T", k, v) + } + } +} diff --git a/processor/transformprocessor/internal/logs/functions.go b/processor/transformprocessor/internal/logs/functions.go index c84b621798234..2bcbe3c1b0ca9 100644 --- a/processor/transformprocessor/internal/logs/functions.go +++ b/processor/transformprocessor/internal/logs/functions.go @@ -16,6 +16,7 @@ func LogFunctions() map[string]ottl.Factory[*ottllog.TransformContext] { functions := ottlfuncs.StandardFuncs[*ottllog.TransformContext]() logFunctions := ottl.CreateFactoryMap( + logparsingfuncs.NewParseCLFFactory(), logparsingfuncs.NewParseLEEFFactory(), ) diff --git a/processor/transformprocessor/internal/logs/functions_test.go b/processor/transformprocessor/internal/logs/functions_test.go index a1adde91dce48..9fd58b75b7c75 100644 --- a/processor/transformprocessor/internal/logs/functions_test.go +++ b/processor/transformprocessor/internal/logs/functions_test.go @@ -16,6 +16,7 @@ import ( func Test_LogFunctions(t *testing.T) { expected := ottlfuncs.StandardFuncs[*ottllog.TransformContext]() + expected["parse_clf"] = logparsingfuncs.NewParseCLFFactory() expected["parse_leef"] = logparsingfuncs.NewParseLEEFFactory() actual := LogFunctions() require.Len(t, actual, len(expected)) From b5ac6d07e8e9904f8da827ae3a6e4025ba92eab2 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Fri, 15 May 2026 11:08:06 -0400 Subject: [PATCH 13/16] tweak changelog description --- .chloggen/transformprocessor-parse-clf.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.chloggen/transformprocessor-parse-clf.yaml b/.chloggen/transformprocessor-parse-clf.yaml index 83e6443f10448..4c41ac5ec4daf 100644 --- a/.chloggen/transformprocessor-parse-clf.yaml +++ b/.chloggen/transformprocessor-parse-clf.yaml @@ -7,7 +7,7 @@ change_type: enhancement component: processor/transform # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Add `parse_clf` OTTL function for parsing Common Log Format (CLF) HTTP access log entries. +note: Add `parse_clf` function for parsing Common Log Format (CLF) HTTP access log entries. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. issues: [48349] From 1a7812afd467cbe278e5fa6753a9707309735093 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Fri, 15 May 2026 11:31:00 -0400 Subject: [PATCH 14/16] make generate --- processor/transformprocessor/README.md | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 351090ef39489..7ef913d0a21ee 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -1,23 +1,20 @@ - # Transform Processor - -| Status | | -| -------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| Stability | [development]: profiles | -| | [beta]: traces, metrics, logs | -| Distributions | [contrib], [k8s] | -| Warnings | [Unsound Transformations, Identity Conflict, Orphaned Telemetry, Other](#warnings) | -| Issues | [![Open issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aopen%20label%3Aprocessor%2Ftransform%20&label=open&color=orange&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Aprocessor%2Ftransform) [![Closed issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aclosed%20label%3Aprocessor%2Ftransform%20&label=closed&color=blue&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aclosed+is%3Aissue+label%3Aprocessor%2Ftransform) | -| Code coverage | [![codecov](https://codecov.io/github/open-telemetry/opentelemetry-collector-contrib/graph/main/badge.svg?component=processor_transform)](https://app.codecov.io/gh/open-telemetry/opentelemetry-collector-contrib/tree/main/?components%5B0%5D=processor_transform&displayType=list) | -| [Code Owners](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/CONTRIBUTING.md#becoming-a-code-owner) | [@TylerHelmuth](https://www.github.com/TylerHelmuth), [@evan-bradley](https://www.github.com/evan-bradley), [@edmocosta](https://www.github.com/edmocosta), [@bogdandrutu](https://www.github.com/bogdandrutu) \| Seeking more code owners! | -| Emeritus | [@anuraaga](https://www.github.com/anuraaga), [@kentquirk](https://www.github.com/kentquirk) | +| Status | | +| ------------- |-----------| +| Stability | [development]: profiles | +| | [beta]: traces, metrics, logs | +| Distributions | [contrib], [k8s] | +| Warnings | [Unsound Transformations, Identity Conflict, Orphaned Telemetry, Other](#warnings) | +| Issues | [![Open issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aopen%20label%3Aprocessor%2Ftransform%20&label=open&color=orange&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aopen+is%3Aissue+label%3Aprocessor%2Ftransform) [![Closed issues](https://img.shields.io/github/issues-search/open-telemetry/opentelemetry-collector-contrib?query=is%3Aissue%20is%3Aclosed%20label%3Aprocessor%2Ftransform%20&label=closed&color=blue&logo=opentelemetry)](https://github.com/open-telemetry/opentelemetry-collector-contrib/issues?q=is%3Aclosed+is%3Aissue+label%3Aprocessor%2Ftransform) | +| Code coverage | [![codecov](https://codecov.io/github/open-telemetry/opentelemetry-collector-contrib/graph/main/badge.svg?component=processor_transform)](https://app.codecov.io/gh/open-telemetry/opentelemetry-collector-contrib/tree/main/?components%5B0%5D=processor_transform&displayType=list) | +| [Code Owners](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/CONTRIBUTING.md#becoming-a-code-owner) | [@TylerHelmuth](https://www.github.com/TylerHelmuth), [@evan-bradley](https://www.github.com/evan-bradley), [@edmocosta](https://www.github.com/edmocosta), [@bogdandrutu](https://www.github.com/bogdandrutu) \| Seeking more code owners! | +| Emeritus | [@anuraaga](https://www.github.com/anuraaga), [@kentquirk](https://www.github.com/kentquirk) | [development]: https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-stability.md#development [beta]: https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/component-stability.md#beta [contrib]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib [k8s]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-k8s - > [!NOTE] From 3b2ebd3c3b11dc8f6a97bd1b995793481008ff98 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Fri, 15 May 2026 11:51:54 -0400 Subject: [PATCH 15/16] clean up README diff, improve test robustness --- processor/transformprocessor/README.md | 308 +++++++++--------- .../logparsingfuncs/func_parse_clf.go | 6 +- .../logparsingfuncs/func_parse_clf_test.go | 9 +- 3 files changed, 160 insertions(+), 163 deletions(-) diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 7ef913d0a21ee..496f4b2890803 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -55,7 +55,7 @@ and allows you to configure a list of statements for the processor to execute. T Within each `` list, only certain OTTL Path prefixes can be used: | Signal | Path Prefix Values | -| ------------------ | ---------------------------------------------- | +|--------------------|------------------------------------------------| | trace_statements | `resource`, `scope`, `span`, and `spanevent` | | metric_statements | `resource`, `scope`, `metric`, and `datapoint` | | log_statements | `resource`, `scope`, and `log` | @@ -67,11 +67,11 @@ This means, for example, that you cannot use the Path `span.attributes` within t If the top-level `error_mode` is not specified, `propagate` will be used. The top-level `error_mode` can be overridden at statement group level, offering more granular control over error handling. If the statement group `error_mode` is not specified, the top-level `error_mode` is applied. -| error_mode | description | -| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------ | -| ignore | The processor ignores errors returned by statements, logs the error, and continues on to the next statement. This is the recommended mode. | -| silent | The processor ignores errors returned by statements, does not log the error, and continues on to the next statement. | -| propagate | The processor returns the error up the pipeline. This will result in the payload being dropped from the collector. | +| error_mode | description | +|------------|---------------------------------------------------------------------------------------------------------------------------------------------| +| ignore | The processor ignores errors returned by statements, logs the error, and continues on to the next statement. This is the recommended mode. | +| silent | The processor ignores errors returned by statements, does not log the error, and continues on to the next statement. | +| propagate | The processor returns the error up the pipeline. This will result in the payload being dropped from the collector. | ### Basic Config @@ -121,8 +121,8 @@ transform: - set(profile.original_payload_format, "json") ``` -In some situations a combination of Paths, functions, or enums is not allowed, and the solution -might require multiple [Advanced Config](#advanced-config) configuration groups. +In some situations a combination of Paths, functions, or enums is not allowed, and the solution +might require multiple [Advanced Config](#advanced-config) configuration groups. See [Context Inference](#context-inference) for more details. ### Advanced Config @@ -141,7 +141,7 @@ transform: _statements: - context: string error_mode: propagate - conditions: + conditions: - string - string statements: @@ -182,7 +182,7 @@ transform: ``` The Transform Processor will enforce that all the Paths, functions, and enums used in a group's `statements` are parsable. -In some situations a combination of Paths, functions, or enums is not allowed, and it might require multiple configuration groups. +In some situations a combination of Paths, functions, or enums is not allowed, and it might require multiple configuration groups. See [Context Inference](#context-inference) for more details. ### Context inference @@ -239,7 +239,7 @@ The solution is to separate the statements into separate [Advanced Config](#adva ```yaml metric_statements: - statements: - - convert_sum_to_gauge() where metric.name == "system.processes.count" + - convert_sum_to_gauge() where metric.name == "system.processes.count" - statements: - limit(datapoint.attributes, 100, ["host.name"]) ``` @@ -251,9 +251,7 @@ You can learn more in-depth details on the capabilities and limitations of the O ## Supported functions: These common functions can be used for any Signal. - - - [OTTL Functions](https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/pkg/ottl/ottlfuncs) In addition to the common OTTL functions, the processor defines its own functions to help with transformations specific to this processor: @@ -278,7 +276,6 @@ In addition to the common OTTL functions, the processor defines its own function **Logs only functions** - [parse_clf](#parse_clf) - - [parse_leef](#parse_leef) **Traces only functions** @@ -303,7 +300,7 @@ Examples: Converts incoming metrics of type "Gauge" to type "Sum", retaining the metric's datapoints and setting its aggregation temporality and monotonicity accordingly. Noop for metrics that are not of type "Gauge". -`aggregation_temporality` is a string (`"cumulative"` or `"delta"`) that specifies the resultant metric's aggregation temporality. `is_monotonic` is a boolean that specifies the resultant metric's monotonicity. +`aggregation_temporality` is a string (`"cumulative"` or `"delta"`) that specifies the resultant metric's aggregation temporality. `is_monotonic` is a boolean that specifies the resultant metric's monotonicity. **NOTE:** This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use at your own risk. @@ -311,11 +308,12 @@ Examples: - `convert_gauge_to_sum("cumulative", false)` + - `convert_gauge_to_sum("delta", true)` ### extract_count_metric -> [!NOTE] +> [!NOTE] > This function supports Histograms, ExponentialHistograms and Summaries. `extract_count_metric(is_monotonic, Optional[suffix])` @@ -330,7 +328,7 @@ The name for the new metric will be ``. The fields The new metric that is created will be passed to all subsequent statements in the metrics statements list. -> [!WARNING] +> [!WARNING] > This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use only if you're confident you know what the resulting monotonicity should be. Examples: @@ -341,7 +339,7 @@ Examples: ### extract_percentile_metric -> [!NOTE] +> [!NOTE] > This function supports Histograms and ExponentialHistograms. `extract_percentile_metric(percentile, Optional[suffix])` @@ -370,7 +368,7 @@ Examples: ### extract_sum_metric -> [!NOTE] +> [!NOTE] > This function supports Histograms, ExponentialHistograms and Summaries. `extract_sum_metric(is_monotonic, Optional[suffix])` @@ -385,7 +383,7 @@ The name for the new metric will be ``. The fields The new metric that is created will be passed to all subsequent statements in the metrics statements list. -> [!WARNING] +> [!WARNING] > This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use only if you're confident you know what the resulting monotonicity should be. Examples: @@ -405,7 +403,7 @@ The `convert_summary_count_val_to_sum` function creates a new Sum metric from a `suffix` is an optional string that defines the suffix for the metric name. By default, it is set to `_count`. For backward compatibility, this default does not follow the [semantic naming conventions](https://opentelemetry.io/docs/specs/semconv/general/naming/#general-naming-considerations) and should ideally be `.count` instead. This default is expected to change in a future release. -The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. +The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. **NOTE:** This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use at your own risk. @@ -444,7 +442,7 @@ The `convert_summary_sum_val_to_sum` function creates a new Sum metric from a Su `suffix` is an optional string that defines the suffix for the metric name. By default, it is set to `_sum`. For backward compatibility, this default does not follow the [semantic naming conventions](https://opentelemetry.io/docs/specs/semconv/general/naming/#general-naming-considerations) and should ideally be `.sum` instead. This default is expected to change in a future release. -The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. +The name for the new metric will be ``. The fields that are copied are: `timestamp`, `starttimestamp`, `attributes`, and `description`. The new metric that is created will be passed to all functions in the metrics statements list. Function conditions will apply. **NOTE:** This function may cause a metric to break semantics for [Sum metrics](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/data-model.md#sums). Use at your own risk. @@ -462,7 +460,7 @@ The `copy_metric` function copies the current metric, adding it to the end of th `name` is an optional string. `description` is an optional string. `unit` is an optional string. -The new metric will be exactly the same as the current metric. You can use the optional parameters to set the new metric's name, description, and unit. +The new metric will be exactly the same as the current metric. You can use the optional parameters to set the new metric's name, description, and unit. **NOTE:** The new metric is appended to the end of the metric slice and therefore will be included in all the metric statements. It is a best practice to ALWAYS include a Where clause when copying a metric that WILL NOT match the new metric. @@ -470,11 +468,13 @@ Examples: - `copy_metric(name="http.request.status_code", unit="s") where metric.name == "http.status_code` + - `copy_metric(desc="new desc") where metric.description == "old desc"` + ### convert_exponential_histogram_to_histogram -**Warning:** The approach used in this function to convert exponential histograms to explicit histograms **is not** part of the **OpenTelemetry Specification**. +__Warning:__ The approach used in this function to convert exponential histograms to explicit histograms __is not__ part of the __OpenTelemetry Specification__. `convert_exponential_histogram_to_histogram(distribution, [ExplicitBounds])` @@ -484,74 +484,70 @@ This function requires 2 arguments: - `distribution` - This argument defines the distribution algorithm used to allocate the exponential histogram datapoints into a new Explicit Histogram. There are 4 options: - - **upper** - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds provided. This approach works better for small/narrow exponential histograms where the difference between the upper bounds and lower bounds are small. - - _For example, Given:_ - - 1. count = 10 - 2. Boundaries: [5, 10, 15, 20, 25] - 3. Upper Bound: 15 - _Process:_ - 4. Start with zeros: [0, 0, 0, 0, 0] - 5. Iterate the boundaries and compare $upper = 15$ with each boundary: - + - __upper__ - This approach identifies the highest possible value of each exponential bucket (_the upper bound_) and uses it to distribute the datapoints by comparing the upper bound of each bucket with the ExplicitBounds provided. This approach works better for small/narrow exponential histograms where the difference between the upper bounds and lower bounds are small. + _For example, Given:_ + 1. count = 10 + 2. Boundaries: [5, 10, 15, 20, 25] + 3. Upper Bound: 15 + _Process:_ + 4. Start with zeros: [0, 0, 0, 0, 0] + 5. Iterate the boundaries and compare $upper = 15$ with each boundary: - $15>5$ (_skip_) - $15>10$ (_skip_) - $15<=15$ (allocate count to this boundary) + 6. Allocate count: [0, 0, __10__, 0, 0] + 7. Final Counts: [0, 0, __10__, 0, 0] - 6. Allocate count: [0, 0, __10__, 0, 0] - 7. Final Counts: [0, 0, __10__, 0, 0] + - __midpoint__ - This approach works in a similar way to the __upper__ approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculating the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms. - - **midpoint** - This approach works in a similar way to the **upper** approach, but instead of using the upper bound, it uses the midpoint of each exponential bucket. The midpoint is identified by calculating the average of the upper and lower bounds. This approach also works better for small/narrow exponential histograms. - > The **uniform** and **random** distribution algorithms both utilise the concept of intersecting boundaries. - > Intersecting boundaries are any boundary in the `boundaries array` that falls between or on the lower and upper values of the Exponential Histogram boundaries. - > _For Example:_ if you have an Exponential Histogram bucket with a lower bound of 10 and upper of 20, and your boundaries array is [5, 10, 15, 20, 25], the intersecting boundaries are 10, 15, and 20 because they lie within the range [10, 20]. + >The __uniform__ and __random__ distribution algorithms both utilise the concept of intersecting boundaries. + Intersecting boundaries are any boundary in the `boundaries array` that falls between or on the lower and upper values of the Exponential Histogram boundaries. + _For Example:_ if you have an Exponential Histogram bucket with a lower bound of 10 and upper of 20, and your boundaries array is [5, 10, 15, 20, 25], the intersecting boundaries are 10, 15, and 20 because they lie within the range [10, 20]. - - **uniform** - This approach distributes the datapoints for each bucket uniformly across the intersecting **ExplicitBounds**. The algorithm works as follows: + - __uniform__ - This approach distributes the datapoints for each bucket uniformly across the intersecting __ExplicitBounds__. The algorithm works as follows: - - If there are valid intersecting boundaries, the function evenly distributes the count across these boundaries. - - Calculate the count to be allocated to each boundary. - - If there is a remainder after dividing the count equally, it distributes the remainder by incrementing the count for some of the boundaries until the remainder is exhausted. + - If there are valid intersecting boundaries, the function evenly distributes the count across these boundaries. + - Calculate the count to be allocated to each boundary. + - If there is a remainder after dividing the count equally, it distributes the remainder by incrementing the count for some of the boundaries until the remainder is exhausted. _For example Given:_ - - 1. count = 10 - 2. Exponential Histogram Bounds: [10, 20] - 3. Boundaries: [5, 10, 15, 20, 25] - 4. Intersecting Boundaries: [10, 15, 20] - 5. Number of Intersecting Boundaries: 3 - 6. Using the formula: $count/numOfIntersections=10/3=3r1$ - - _Uniform Allocation:_ - - 7. Start with zeros: [0, 0, 0, 0, 0] - 8. Allocate 3 to each: [0, 3, 3, 3, 0] - 9. Distribute remainder $r$ 1: [0, 4, 3, 3, 0] - 10. Final Counts: [0, 4, 3, 3, 0] - - - **random** - This approach distributes the datapoints for each bucket randomly across the intersecting **ExplicitBounds**. This approach works in a similar manner to the uniform distribution algorithm with the main difference being that points are distributed randomly instead of uniformly. This works as follows: - - If there are valid intersecting boundaries, calculate the proportion of the count that should be allocated to each boundary based on the overlap of the boundary with the provided range (lower to upper). - - For each boundary, a random fraction of the calculated proportion is allocated. - - Any remaining count (_due to rounding or random distribution_) is then distributed randomly among the intersecting boundaries. - - If the bucket range does not intersect with any boundaries, the entire count is assigned to the start boundary. - -- `ExplicitBounds` represents the list of bucket boundaries for the new histogram. This argument is **required** and **cannot be empty**. - -**WARNINGS:** - -- The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries and identify the best distribution approach for your data in order to minimize this loss. + 1. count = 10 + 2. Exponential Histogram Bounds: [10, 20] + 3. Boundaries: [5, 10, 15, 20, 25] + 4. Intersecting Boundaries: [10, 15, 20] + 5. Number of Intersecting Boundaries: 3 + 6. Using the formula: $count/numOfIntersections=10/3=3r1$ + + _Uniform Allocation:_ + + 7. Start with zeros: [0, 0, 0, 0, 0] + 8. Allocate 3 to each: [0, 3, 3, 3, 0] + 9. Distribute remainder $r$ 1: [0, 4, 3, 3, 0] + 10. Final Counts: [0, 4, 3, 3, 0] + + - __random__ - This approach distributes the datapoints for each bucket randomly across the intersecting __ExplicitBounds__. This approach works in a similar manner to the uniform distribution algorithm with the main difference being that points are distributed randomly instead of uniformly. This works as follows: + - If there are valid intersecting boundaries, calculate the proportion of the count that should be allocated to each boundary based on the overlap of the boundary with the provided range (lower to upper). + - For each boundary, a random fraction of the calculated proportion is allocated. + - Any remaining count (_due to rounding or random distribution_) is then distributed randomly among the intersecting boundaries. + - If the bucket range does not intersect with any boundaries, the entire count is assigned to the start boundary. + +- `ExplicitBounds` represents the list of bucket boundaries for the new histogram. This argument is __required__ and __cannot be empty__. + +__WARNINGS:__ + +- The process of converting an ExponentialHistogram to an Explicit Histogram is not perfect and may result in a loss of precision. It is important to define an appropriate set of bucket boundaries and identify the best distribution approach for your data in order to minimize this loss. For example, selecting Boundaries that are too high or too low may result histogram buckets that are too wide or too narrow, respectively. -- **Negative Bucket Counts** are not supported in Explicit Histograms, as such negative bucket counts are ignored. +- __Negative Bucket Counts__ are not supported in Explicit Histograms, as such negative bucket counts are ignored. -- **ZeroCounts** are only allocated if the ExplicitBounds array contains a zero boundary. That is, if the Explicit Boundaries that you provide does not start with `0`, the function will not allocate any zero counts from the Exponential Histogram. +- __ZeroCounts__ are only allocated if the ExplicitBounds array contains a zero boundary. That is, if the Explicit Boundaries that you provide does not start with `0`, the function will not allocate any zero counts from the Exponential Histogram. This function should only be used when Exponential Histograms are not suitable for the downstream consumers or if upstream metric sources are unable to generate Explicit Histograms. -**Example**: +__Example__: - `convert_exponential_histogram_to_histogram("random", [0.0, 10.0, 100.0, 1000.0, 10000.0])` @@ -610,7 +606,7 @@ Examples: - `aggregate_on_attributes("max") where metric.name == "system.memory.usage"` - `aggregate_on_attributes("max", []) where metric.name == "system.memory.usage"` -The `aggregate_on_attributes` function can also be used in conjunction with +The `aggregate_on_attributes` function can also be used in conjunction with [keep_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#keep_matching_keys) or [delete_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#delete_matching_keys). @@ -618,8 +614,8 @@ For example, to remove attribute keys matching a regex and aggregate the metrics ```yaml statements: - - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" - - aggregate_on_attributes("sum") where metric.name == "system.memory.usage" + - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" + - aggregate_on_attributes("sum") where metric.name == "system.memory.usage" ``` To aggregate only using a specified set of attributes, you can use `keep_matching_keys`. @@ -654,7 +650,7 @@ Examples: - `aggregate_on_attribute_value("sum", "attr1", ["val1", "val2"], "new_val") where metric.name == "system.memory.usage"` -The `aggregate_on_attribute_value` function can also be used in conjunction with +The `aggregate_on_attribute_value` function can also be used in conjunction with [keep_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#keep_matching_keys) or [delete_matching_keys](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/ottlfuncs#delete_matching_keys). @@ -662,8 +658,8 @@ For example, to remove attribute keys matching a regex and aggregate the metrics ```yaml statements: - - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" - - aggregate_on_attribute_value("sum", "attr1", ["val1", "val2"], "new_val") where metric.name == "system.memory.usage" + - delete_matching_keys(resource.attributes, "(?i).*myRegex.*") where metric.name == "system.memory.usage" + - aggregate_on_attribute_value("sum", "attr1", ["val1", "val2"], "new_val") where metric.name == "system.memory.usage" ``` To aggregate only using a specified set of attributes, you can use `keep_matching_keys`. @@ -677,13 +673,12 @@ The `merge_histogram_buckets` function merges a specific bucket of a histogram w `bound` is a float64 value that specifies which bucket boundary to remove. The function will merge the bucket that ends at this boundary with the next bucket. The function: - -- Preserves the total count and sum of the histogram. -- Only works on histogram metrics (no-op for other metric types). -- Uses floating-point tolerance (epsilon = 1e-12) when matching the bound. -- Makes no changes if: - - The bound is not found. - - The histogram is empty. +- Preserves the total count and sum of the histogram. +- Only works on histogram metrics (no-op for other metric types). +- Uses floating-point tolerance (epsilon = 1e-12) when matching the bound. +- Makes no changes if: + - The bound is not found. + - The histogram is empty. - The histogram structure is invalid (mismatched bounds and counts). Examples: @@ -691,6 +686,7 @@ Examples: ```yaml # Merge the bucket ending at 0.5 with the next bucket - merge_histogram_buckets(0.5) where metric.name == "http_request_duration" + # Given a histogram with: # bounds: [0.1, 0.5, 1.0] # counts: [5, 8, 3, 1] @@ -728,7 +724,6 @@ The returned map has the following fields: Examples: - `parse_clf(body)` - - `parse_clf("127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326")` ### parse_leef @@ -750,9 +745,7 @@ For LEEF 1.0 the attribute delimiter is always a tab. For LEEF 2.0 the delimiter Examples: - `parse_leef(body)` - - `parse_leef("LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=10.50.1.1\tdst=2.10.20.20\tsev=5")` - - `parse_leef("LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5")` ### set_semconv_span_name @@ -765,51 +758,52 @@ The primary use case of the `set_semconv_span_name()` function is to address hig Parameters: -- `semconvVersion` is the version of the Semantic Conventions used to generate the `span.name`, older semconv attributes are supported. Versions `1.37.0` to `1.40.0` are supported. -- `originalSpanNameAttribute` is the optional name of the attribute used to copy the original `span.name` if different from the name derived from semantic conventions. +* `semconvVersion` is the version of the Semantic Conventions used to generate the `span.name`, older semconv attributes are supported. Versions `1.37.0` to `1.40.0` are supported. +* `originalSpanNameAttribute` is the optional name of the attribute used to copy the original `span.name` if different from the name derived from semantic conventions. Sanitization examples: -- Span with high-cardinality name but recommended semantic convention attributes - - Incoming span: - ``` - span.name: GET /api/v1/users/123 # /!\ high cardinality - span.kind: server - span.attributes - http.request.method: GET - http.route: /api/v1/users/{id} - url.path: /api/v1/users/123 - ``` - - Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` - - No loss of information on `span.name` occurs because the recommended attribute `http.route` is present. -- Span with high-cardinality name lacking recommended semantic convention attribute `http.route` - - Incoming span: - ``` - span.name: GET /api/v1/users/123 # /!\ high cardinality - span.kind: server - span.attributes - http.request.method: GET - url.path: /api/v1/users/123 - ``` - - Span name after applying `set_semconv_span_name("1.40.0")`: `GET` - - Loss of information on `span.name` occurs because the recommended attribute `http.route` is missing. +* Span with high-cardinality name but recommended semantic convention attributes + * Incoming span: + ``` + span.name: GET /api/v1/users/123 # /!\ high cardinality + span.kind: server + span.attributes + http.request.method: GET + http.route: /api/v1/users/{id} + url.path: /api/v1/users/123 + ``` + * Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` + * No loss of information on `span.name` occurs because the recommended attribute `http.route` is present. +* Span with high-cardinality name lacking recommended semantic convention attribute `http.route` + * Incoming span: + ``` + span.name: GET /api/v1/users/123 # /!\ high cardinality + span.kind: server + span.attributes + http.request.method: GET + url.path: /api/v1/users/123 + ``` + * Span name after applying `set_semconv_span_name("1.40.0")`: `GET` + * Loss of information on `span.name` occurs because the recommended attribute `http.route` is missing. Note that this loss of information is mitigated if the instrumentation produced attributes that contain the URL path like `url.path` or `url.full`. -- Compliant span name is unchanged - - Incoming span: - ``` - span.name: GET /api/v1/users/{id} - span.kind: server - span.attributes - http.request.method: GET - http.route: /api/v1/users/{id} - url.path: /api/v1/users/123 - ``` - - Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` +* Compliant span name is unchanged + * Incoming span: + ``` + span.name: GET /api/v1/users/{id} + span.kind: server + span.attributes + http.request.method: GET + http.route: /api/v1/users/{id} + url.path: /api/v1/users/123 + ``` + * Span name after applying `set_semconv_span_name("1.40.0")`: `GET /api/v1/users/{id}` + Backward compatibility: `set_semconv_span_name` will map the following attributes to their equivalents per the v1.39.0 semantic conventions: | v1.40.0 Attribute | Older attribute | -| --------------------- | ------------------ | +|-----------------------|--------------------| | `http.request.method` | `http.method` | | `rpc.method` | `rpc.grpc.method` | | `rpc.service` | `rpc.grpc.service` | @@ -827,19 +821,16 @@ Examples: ## Examples ### Perform transformation if field does not exist - Set attribute `test` to `"pass"` if the attribute `test` does not exist: - ```yaml transform: error_mode: ignore trace_statements: - # accessing a map with a key that does not exist will return nil. + # accessing a map with a key that does not exist will return nil. - set(span.attributes["test"], "pass") where span.attributes["test"] == nil -``` +``` ### Rename attribute - There are 2 ways to rename an attribute key: You can either set a new attribute and delete the old: @@ -849,8 +840,8 @@ transform: error_mode: ignore trace_statements: - set(resource.attributes["namespace"], resource.attributes["k8s.namespace.name"]) - - delete_key(resource.attributes, "k8s.namespace.name") -``` + - delete_key(resource.attributes, "k8s.namespace.name") +``` Or you can update the key using regex: @@ -859,10 +850,9 @@ transform: error_mode: ignore trace_statements: - replace_all_patterns(resource.attributes, "key", "k8s\\.namespace\\.name", "namespace") -``` +``` ### Move field to attribute - Set attribute `body` to the value of the log body: ```yaml @@ -870,12 +860,10 @@ transform: error_mode: ignore log_statements: - set(log.attributes["body"], log.body) -``` +``` ### Combine two attributes - -Set attribute `test` to the value of attributes `"foo"` and `"bar"` combined. - +Set attribute `test` to the value of attributes `"foo"` and `"bar"` combined. ```yaml transform: error_mode: ignore @@ -907,13 +895,13 @@ transform: - statements: # Parse body as JSON and merge the resulting map with the cache map, ignoring non-json bodies. # cache is a field exposed by OTTL that is a temporary storage place for complex operations. - - merge_maps(log.cache, ParseJSON(log.body), "upsert") where IsMatch(log.body, "^\\{") - + - merge_maps(log.cache, ParseJSON(log.body), "upsert") where IsMatch(log.body, "^\\{") + # Set attributes using the values merged into cache. # If the attribute doesn't exist in cache then nothing happens. - set(log.attributes["attr1"], log.cache["attr1"]) - set(log.attributes["attr2"], log.cache["attr2"]) - + # To access nested maps you can chain index ([]) operations. # If nested or attr3 do not exist in cache then nothing happens. - set(log.attributes["nested.attr3"], log.cache["nested"]["attr3"]) @@ -984,7 +972,7 @@ view into how OTTL views the underlying data. receivers: file_log: start_at: beginning - include: [test.log] + include: [ test.log ] processors: transform: @@ -993,6 +981,7 @@ processors: - set(resource.attributes["test"], "pass") - set(scope.attributes["test"], ["pass"]) - set(log.attributes["test"], true) + exporters: debug: @@ -1025,14 +1014,15 @@ service: See [CONTRIBUTING.md](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/processor/transformprocessor/CONTRIBUTING.md). + ## Warnings -The Transform Processor uses the [OpenTelemetry Transformation Language](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/README.md) (OTTL) which allows users to modify all aspects of their telemetry. Some specific risks are listed below, but this is not an exhaustive list. In general, understand your data before using the Transform Processor. +The Transform Processor uses the [OpenTelemetry Transformation Language](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/main/pkg/ottl/README.md) (OTTL) which allows users to modify all aspects of their telemetry. Some specific risks are listed below, but this is not an exhaustive list. In general, understand your data before using the Transform Processor. -- [Unsound Transformations](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#unsound-transformations): Several Metric-only functions allow you to transform one metric data type to another or create new metrics from an existing metrics. Transformations between metric data types are not defined in the [metrics data model](https://github.com/open-telemetry/opentelemetry-specification/blob/main//specification/metrics/data-model.md). These functions have the expectation that you understand the incoming data and know that it can be meaningfully converted to a new metric data type or can meaningfully be used to create new metrics. - - Although the OTTL allows the `set` function to be used with `metric.data_type`, its implementation in the Transform Processor is NOOP. To modify a data type you must use a function specific to that purpose. -- [Identity Conflict](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#identity-conflict): Transformation of metrics have the potential to affect the identity of a metric leading to an Identity Crisis. Be especially cautious when transforming metric name and when reducing/changing existing attributes. Adding new attributes is safe. -- [Orphaned Telemetry](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#orphaned-telemetry): The processor allows you to modify `span_id`, `trace_id`, and `parent_span_id` for traces and `span_id`, and `trace_id` logs. Modifying these fields could lead to orphaned spans or logs. +- [Unsound Transformations](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#unsound-transformations): Several Metric-only functions allow you to transform one metric data type to another or create new metrics from an existing metrics. Transformations between metric data types are not defined in the [metrics data model](https://github.com/open-telemetry/opentelemetry-specification/blob/main//specification/metrics/data-model.md). These functions have the expectation that you understand the incoming data and know that it can be meaningfully converted to a new metric data type or can meaningfully be used to create new metrics. + - Although the OTTL allows the `set` function to be used with `metric.data_type`, its implementation in the Transform Processor is NOOP. To modify a data type you must use a function specific to that purpose. +- [Identity Conflict](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#identity-conflict): Transformation of metrics have the potential to affect the identity of a metric leading to an Identity Crisis. Be especially cautious when transforming metric name and when reducing/changing existing attributes. Adding new attributes is safe. +- [Orphaned Telemetry](https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/standard-warnings.md#orphaned-telemetry): The processor allows you to modify `span_id`, `trace_id`, and `parent_span_id` for traces and `span_id`, and `trace_id` logs. Modifying these fields could lead to orphaned spans or logs. ## Feature Gate @@ -1045,14 +1035,14 @@ This option is useful when applying transformations which alter the resource or The feature is currently only available for log processing. #### Example Usage - + `config.yaml`: - -```yaml -transform: - flatten_data: true - log_statements: - - set(resource.attributes["to"], log.attributes["from"]) -``` - -Run collector: `./otelcol --config config.yaml --feature-gates=transform.flatten.logs` + + ```yaml + transform: + flatten_data: true + log_statements: + - set(resource.attributes["to"], log.attributes["from"]) + ``` + + Run collector: `./otelcol --config config.yaml --feature-gates=transform.flatten.logs` diff --git a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go index a87c4aa8d83f6..387c93c51dc3b 100644 --- a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go @@ -59,7 +59,7 @@ var clfRegex = regexp.MustCompile(`^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\S func parseCLFMessage(message string) (pcommon.Map, error) { matches := clfRegex.FindStringSubmatch(strings.TrimSpace(message)) if matches == nil { - return pcommon.Map{}, errors.New("invalid CLF message: does not match expected format") + return pcommon.NewMap(), errors.New("invalid CLF message: does not match expected format") } result := pcommon.NewMap() @@ -80,7 +80,7 @@ func parseCLFMessage(message string) (pcommon.Map, error) { status := matches[6] statusInt, err := strconv.ParseInt(status, 10, 64) if err != nil { - return pcommon.Map{}, fmt.Errorf("invalid status code %q: %w", status, err) + return pcommon.NewMap(), fmt.Errorf("invalid status code %q: %w", status, err) } result.PutInt("status", statusInt) @@ -88,7 +88,7 @@ func parseCLFMessage(message string) (pcommon.Map, error) { if bytesStr != "-" { bytesInt, err := strconv.ParseInt(bytesStr, 10, 64) if err != nil { - return pcommon.Map{}, fmt.Errorf("invalid bytes value %q: %w", bytesStr, err) + return pcommon.NewMap(), fmt.Errorf("invalid bytes value %q: %w", bytesStr, err) } result.PutInt("bytes", bytesInt) } diff --git a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go index d62ccda8a47ac..55e6fbd1a118b 100644 --- a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go @@ -20,6 +20,7 @@ func Test_parseCLF(t *testing.T) { name string input string expected map[string]any + absent []string }{ { name: "canonical example from the W3C spec", @@ -66,8 +67,8 @@ func Test_parseCLF(t *testing.T) { "request_uri": "/redirect", "protocol": "HTTP/1.1", "status": int64(304), - // bytes intentionally omitted }, + absent: []string{"bytes"}, }, { name: "IPv6 remote host", @@ -145,6 +146,7 @@ func Test_parseCLF(t *testing.T) { "status": int64(400), "bytes": int64(0), }, + absent: []string{"method", "request_uri", "protocol"}, }, { name: "empty request line", @@ -158,6 +160,7 @@ func Test_parseCLF(t *testing.T) { "status": int64(408), "bytes": int64(0), }, + absent: []string{"method", "request_uri", "protocol"}, }, { name: "large byte counts", @@ -192,6 +195,10 @@ func Test_parseCLF(t *testing.T) { require.True(t, ok, "result should be pcommon.Map") assertCLFMap(t, resultMap, tt.expected) + for _, k := range tt.absent { + _, ok := resultMap.Get(k) + assert.False(t, ok, "key %q should be absent", k) + } }) } } From 978dac4a616164fa59850fce86c19e0b7021bac8 Mon Sep 17 00:00:00 2001 From: Caleb-Hurshman Date: Mon, 18 May 2026 10:08:49 -0400 Subject: [PATCH 16/16] update function naming -> ParseCLF --- .chloggen/transformprocessor-parse-clf.yaml | 4 +-- processor/transformprocessor/README.md | 28 +++++++++---------- .../logparsingfuncs/func_parse_clf.go | 2 +- .../logparsingfuncs/func_parse_clf_test.go | 2 +- .../internal/logs/functions_test.go | 4 +-- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/.chloggen/transformprocessor-parse-clf.yaml b/.chloggen/transformprocessor-parse-clf.yaml index 4c41ac5ec4daf..f381a28128f88 100644 --- a/.chloggen/transformprocessor-parse-clf.yaml +++ b/.chloggen/transformprocessor-parse-clf.yaml @@ -7,7 +7,7 @@ change_type: enhancement component: processor/transform # A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). -note: Add `parse_clf` function for parsing Common Log Format (CLF) HTTP access log entries. +note: Add `ParseCLF` function for parsing Common Log Format (CLF) HTTP access log entries. # Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. issues: [48349] @@ -16,7 +16,7 @@ issues: [48349] # These lines will be padded with 2 spaces and then inserted directly into the document. # Use pipe (|) for multiline entries. subtext: | - `parse_clf` is available in log statements and returns a map with the parsed + `ParseCLF` is available in log statements and returns a map with the parsed `remote_host`, `rfc931`, `authuser`, `timestamp`, `request`, `method`, `request_uri`, `protocol`, `status`, and `bytes` fields. diff --git a/processor/transformprocessor/README.md b/processor/transformprocessor/README.md index 496f4b2890803..4bd87a6a06618 100644 --- a/processor/transformprocessor/README.md +++ b/processor/transformprocessor/README.md @@ -275,8 +275,8 @@ In addition to the common OTTL functions, the processor defines its own function **Logs only functions** -- [parse_clf](#parse_clf) -- [parse_leef](#parse_leef) +- [ParseCLF](#parseclf) +- [ParseLEEF](#parseleef) **Traces only functions** @@ -696,11 +696,11 @@ Examples: # counts: [5, 11, 1] ``` -### parse_clf +### ParseCLF -`parse_clf(target)` +`ParseCLF(target)` -The `parse_clf` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Common Log Format (CLF)](https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format) HTTP access log entry. +The `ParseCLF` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Common Log Format (CLF)](https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format) HTTP access log entry. `target` is a Getter that returns a string. If the returned string is empty, or cannot be parsed as CLF, an error will be returned. @@ -723,18 +723,18 @@ The returned map has the following fields: Examples: -- `parse_clf(body)` -- `parse_clf("127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326")` +- `ParseCLF(body)` +- `ParseCLF("127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326")` -### parse_leef +### ParseLEEF -`parse_leef(target)` +`ParseLEEF(target)` -The `parse_leef` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Log Event Extended Format (LEEF)](https://www.ibm.com/docs/en/dsm?topic=overview-leef-event-components) message. +The `ParseLEEF` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Log Event Extended Format (LEEF)](https://www.ibm.com/docs/en/dsm?topic=overview-leef-event-components) message. `target` is a Getter that returns a string. If the returned string is empty, or cannot be parsed as LEEF, an error will be returned. -`parse_leef` can parse both LEEF 1.0 and LEEF 2.0 messages. The function is tolerant of an optional syslog header preceding the `LEEF:` token. The returned map has the following top-level fields: +`ParseLEEF` can parse both LEEF 1.0 and LEEF 2.0 messages. The function is tolerant of an optional syslog header preceding the `LEEF:` token. The returned map has the following top-level fields: - `version` — the LEEF version (`"1.0"` or `"2.0"`). - `vendor`, `product_name`, `product_version`, `event_id` — the LEEF header fields. @@ -744,9 +744,9 @@ For LEEF 1.0 the attribute delimiter is always a tab. For LEEF 2.0 the delimiter Examples: -- `parse_leef(body)` -- `parse_leef("LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=10.50.1.1\tdst=2.10.20.20\tsev=5")` -- `parse_leef("LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5")` +- `ParseLEEF(body)` +- `ParseLEEF("LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=10.50.1.1\tdst=2.10.20.20\tsev=5")` +- `ParseLEEF("LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5")` ### set_semconv_span_name diff --git a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go index 387c93c51dc3b..2b55c6a6f6b75 100644 --- a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf.go @@ -22,7 +22,7 @@ type parseCLFArguments struct { } func NewParseCLFFactory() ottl.Factory[*ottllog.TransformContext] { - return ottl.NewFactory("parse_clf", &parseCLFArguments{}, createParseCLFFunction) + return ottl.NewFactory("ParseCLF", &parseCLFArguments{}, createParseCLFFunction) } func createParseCLFFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[*ottllog.TransformContext], error) { diff --git a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go index 55e6fbd1a118b..8ddd2a38ce5ad 100644 --- a/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go +++ b/processor/transformprocessor/internal/logparsingfuncs/func_parse_clf_test.go @@ -281,7 +281,7 @@ func Test_parseCLF_target_error(t *testing.T) { func Test_createParseCLFFunction(t *testing.T) { factory := NewParseCLFFactory() - assert.Equal(t, "parse_clf", factory.Name()) + assert.Equal(t, "ParseCLF", factory.Name()) args := &parseCLFArguments{ Target: ottl.StandardStringGetter[*ottllog.TransformContext]{ diff --git a/processor/transformprocessor/internal/logs/functions_test.go b/processor/transformprocessor/internal/logs/functions_test.go index 9fd58b75b7c75..48fb03ccede64 100644 --- a/processor/transformprocessor/internal/logs/functions_test.go +++ b/processor/transformprocessor/internal/logs/functions_test.go @@ -16,8 +16,8 @@ import ( func Test_LogFunctions(t *testing.T) { expected := ottlfuncs.StandardFuncs[*ottllog.TransformContext]() - expected["parse_clf"] = logparsingfuncs.NewParseCLFFactory() - expected["parse_leef"] = logparsingfuncs.NewParseLEEFFactory() + expected["ParseCLF"] = logparsingfuncs.NewParseCLFFactory() + expected["ParseLEEF"] = logparsingfuncs.NewParseLEEFFactory() actual := LogFunctions() require.Len(t, actual, len(expected)) for k := range actual {