Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .chloggen/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ components:
- processor/tail_sampling
- processor/tencentcvmdetector
- processor/transform
- processor/transformprocessor/internal/logparsingfuncs
- processor/unroll
- processor/upclouddetector
- processor/vultrdetector
Expand Down
27 changes: 27 additions & 0 deletions .chloggen/feat_transform-leef-parser.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
component: processor/transform

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `parse_leef` function to parse Log Event Extended Format (LEEF) messages.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [44908]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
30 changes: 30 additions & 0 deletions .chloggen/transformprocessor-parse-clf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. receiver/filelog)
component: processor/transform

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add `ParseCLF` function for parsing Common Log Format (CLF) HTTP access log entries.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [48349]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
`ParseCLF` is available in log statements and returns a map with the parsed
`remote_host`, `rfc931`, `authuser`, `timestamp`, `request`, `method`,
`request_uri`, `protocol`, `status`, and `bytes` fields.

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ processor/spanpruningprocessor/ @open-telemetry
processor/sumologicprocessor/ @open-telemetry/collector-contrib-approvers @rnishtala-sumo @pankaj101A @jagan2221
processor/tailsamplingprocessor/ @open-telemetry/collector-contrib-approvers @portertech @jmacd @csmarchbanks @carsonip
processor/transformprocessor/ @open-telemetry/collector-contrib-approvers @TylerHelmuth @evan-bradley @edmocosta @bogdandrutu
processor/transformprocessor/internal/logparsingfuncs/ @open-telemetry/collector-contrib-approvers @Caleb-Hurshman @Dylan-M
processor/unrollprocessor/ @open-telemetry/collector-contrib-approvers @axw @schmikei @rnishtala-sumo
receiver/activedirectorydsreceiver/ @open-telemetry/collector-contrib-approvers @pjanotti
receiver/aerospikereceiver/ @open-telemetry/collector-contrib-approvers @antonblock
Expand Down
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/beta_stability.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ body:
- processor/sumologic
- processor/tailsampling
- processor/transform
- processor/transform/internal/logparsingfuncs
- processor/unroll
- receiver/activedirectoryds
- receiver/aerospike
Expand Down
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ body:
- processor/sumologic
- processor/tailsampling
- processor/transform
- processor/transform/internal/logparsingfuncs
- processor/unroll
- receiver/activedirectoryds
- receiver/aerospike
Expand Down
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/feature_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ body:
- processor/sumologic
- processor/tailsampling
- processor/transform
- processor/transform/internal/logparsingfuncs
- processor/unroll
- receiver/activedirectoryds
- receiver/aerospike
Expand Down
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/other.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ body:
- processor/sumologic
- processor/tailsampling
- processor/transform
- processor/transform/internal/logparsingfuncs
- processor/unroll
- receiver/activedirectoryds
- receiver/aerospike
Expand Down
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/unmaintained.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ body:
- processor/sumologic
- processor/tailsampling
- processor/transform
- processor/transform/internal/logparsingfuncs
- processor/unroll
- receiver/activedirectoryds
- receiver/aerospike
Expand Down
1 change: 1 addition & 0 deletions .github/component_labels.txt
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ processor/spanpruningprocessor processor/spanpruning
processor/sumologicprocessor processor/sumologic
processor/tailsamplingprocessor processor/tailsampling
processor/transformprocessor processor/transform
processor/transformprocessor/internal/logparsingfuncs processor/transform/internal/logparsingfuncs
processor/unrollprocessor processor/unroll
receiver/activedirectorydsreceiver receiver/activedirectoryds
receiver/aerospikereceiver receiver/aerospike
Expand Down
57 changes: 57 additions & 0 deletions processor/transformprocessor/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,11 @@ In addition to the common OTTL functions, the processor defines its own function
- [aggregate_on_attribute_value](#aggregate_on_attribute_value)
- [merge_histogram_buckets](#merge_histogram_buckets)

**Logs only functions**

- [ParseCLF](#parseclf)
- [ParseLEEF](#parseleef)

**Traces only functions**

- [set_semconv_span_name](#set_semconv_span_name)
Expand Down Expand Up @@ -691,6 +696,58 @@ Examples:
# counts: [5, 11, 1]
```

### ParseCLF

`ParseCLF(target)`

The `ParseCLF` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Common Log Format (CLF)](https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format) HTTP access log entry.

`target` is a Getter that returns a string. If the returned string is empty, or cannot be parsed as CLF, an error will be returned.

The CLF entry is expected to have the form:

```
remotehost rfc931 authuser [date] "request" status bytes
```

The returned map has the following fields:

- `remote_host` — the client's DNS name or IP address.
- `rfc931` — the remote logname of the user (CLF uses `-` when unknown).
- `authuser` — the authenticated user (CLF uses `-` when unknown).
- `timestamp` — the contents of the bracketed date field, preserved as a string.
- `request` — the raw request line as sent by the client.
- `method`, `request_uri`, `protocol` — the parsed components of the request line, only set when the request line is well-formed.
- `status` — the HTTP status code as an integer.
- `bytes` — the content-length of the response as an integer. Omitted when CLF reports `-` (e.g. on a 304 response).

Examples:

- `ParseCLF(body)`
- `ParseCLF("127.0.0.1 - frank [10/Oct/2000:13:55:36 -0700] \"GET /apache_pb.gif HTTP/1.0\" 200 2326")`

### ParseLEEF

`ParseLEEF(target)`

The `ParseLEEF` function returns a `pcommon.Map` that is the result of parsing the `target` string as a [Log Event Extended Format (LEEF)](https://www.ibm.com/docs/en/dsm?topic=overview-leef-event-components) message.

`target` is a Getter that returns a string. If the returned string is empty, or cannot be parsed as LEEF, an error will be returned.

`ParseLEEF` can parse both LEEF 1.0 and LEEF 2.0 messages. The function is tolerant of an optional syslog header preceding the `LEEF:` token. The returned map has the following top-level fields:

- `version` — the LEEF version (`"1.0"` or `"2.0"`).
- `vendor`, `product_name`, `product_version`, `event_id` — the LEEF header fields.
- `attributes` — a map of the parsed key/value attribute pairs.

For LEEF 1.0 the attribute delimiter is always a tab. For LEEF 2.0 the delimiter is taken from the header and may be specified as a single character or as a hex value (e.g. `0x09`).

Examples:

- `ParseLEEF(body)`
- `ParseLEEF("LEEF:1.0|Microsoft|MSExchange|4.0 SP1|15345|src=10.50.1.1\tdst=2.10.20.20\tsev=5")`
- `ParseLEEF("LEEF:2.0|Lancope|StealthWatch|1.0|41|^|src=10.0.1.8^dst=10.0.0.5^sev=5")`

### set_semconv_span_name

`set_semconv_span_name(semconvVersion, Optional[originalSpanNameAttribute])`
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package logparsingfuncs // import "github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor/internal/logparsingfuncs"

import (
"context"
"errors"
"fmt"
"regexp"
"strconv"
"strings"

"go.opentelemetry.io/collector/pdata/pcommon"

"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl"
"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog"
)

type parseCLFArguments struct {
Target ottl.StringGetter[*ottllog.TransformContext]
}

func NewParseCLFFactory() ottl.Factory[*ottllog.TransformContext] {
return ottl.NewFactory("ParseCLF", &parseCLFArguments{}, createParseCLFFunction)
}

func createParseCLFFunction(_ ottl.FunctionContext, oArgs ottl.Arguments) (ottl.ExprFunc[*ottllog.TransformContext], error) {
args, ok := oArgs.(*parseCLFArguments)
if !ok {
return nil, errors.New("parseCLFFactory args must be of type *parseCLFArguments")
}

return parseCLF(args.Target), nil
}

func parseCLF(target ottl.StringGetter[*ottllog.TransformContext]) ottl.ExprFunc[*ottllog.TransformContext] {
return func(ctx context.Context, tCtx *ottllog.TransformContext) (any, error) {
source, err := target.Get(ctx, tCtx)
if err != nil {
return nil, err
}

if source == "" {
return nil, errors.New("cannot parse empty CLF message")
}

return parseCLFMessage(source)
}
}

// clfRegex matches the Common Log Format:
//
// remotehost rfc931 authuser [date] "request" status bytes
//
// See https://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format
var clfRegex = regexp.MustCompile(`^(\S+) (\S+) (\S+) \[([^\]]+)\] "([^"]*)" (\S+) (\S+)$`)

func parseCLFMessage(message string) (pcommon.Map, error) {
matches := clfRegex.FindStringSubmatch(strings.TrimSpace(message))
if matches == nil {
return pcommon.NewMap(), errors.New("invalid CLF message: does not match expected format")
}

result := pcommon.NewMap()
result.PutStr("remote_host", matches[1])
result.PutStr("rfc931", matches[2])
result.PutStr("authuser", matches[3])
result.PutStr("timestamp", matches[4])

request := matches[5]
result.PutStr("request", request)

if requestParts := strings.SplitN(request, " ", 3); len(requestParts) == 3 {
result.PutStr("method", requestParts[0])
result.PutStr("request_uri", requestParts[1])
result.PutStr("protocol", requestParts[2])
}

status := matches[6]
statusInt, err := strconv.ParseInt(status, 10, 64)
if err != nil {
return pcommon.NewMap(), fmt.Errorf("invalid status code %q: %w", status, err)
}
result.PutInt("status", statusInt)

bytesStr := matches[7]
if bytesStr != "-" {
bytesInt, err := strconv.ParseInt(bytesStr, 10, 64)
if err != nil {
return pcommon.NewMap(), fmt.Errorf("invalid bytes value %q: %w", bytesStr, err)
}
result.PutInt("bytes", bytesInt)
}

return result, nil
}
Loading
Loading