Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .chloggen/oracledb-sql-comment-extraction.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
change_type: enhancement
component: receiver/oracledb
note: Add SQL comment extraction support for APM correlation. Users can now configure `allowed_comment_keys` to extract key-value pairs from SQL query comments and include them as telemetry attributes for correlation with APM traces.
issues: [48338]
change_logs: [user]
106 changes: 106 additions & 0 deletions internal/common/sqlcomments/extractor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package sqlcomments // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/sqlcomments"

import (
"regexp"
"strings"
)

var (
// leadingBlockCommentRegex matches one or more leading /* */ block comments
leadingBlockCommentRegex = regexp.MustCompile(`^\s*(/\*.*?\*/\s*)+`)
// commentContentRegex extracts content between /* and */ delimiters
commentContentRegex = regexp.MustCompile(`/\*(.*?)\*/`)
)

// ExtractAndFilterComments extracts leading /* */ block comments from SQL,
// parses them as key=value pairs, and returns only allowed keys.
// Returns comma-separated filtered pairs, or empty string if no allowed keys found.
// Format: "key1=value1,key2=value2"
//
// This function is designed to be secure by default:
// - If allowedKeys is empty or nil, returns empty string (no extraction)
// - Only keys explicitly listed in allowedKeys are included in the result
// - Duplicate keys use first occurrence only
// - Malformed pairs (without =) are silently skipped
//
// Example:
//
// sqlText := "/* key1=value1,key2=value2 */ SELECT * FROM users"
// allowedKeys := []string{"key1"}
// result := ExtractAndFilterComments(sqlText, allowedKeys)
// // result == "key1=value1"
func ExtractAndFilterComments(sqlText string, allowedKeys []string) string {
// Early exit: if no allowed keys, return empty immediately (secure by default)
if len(allowedKeys) == 0 {
return ""
}

// Extract leading block comments using regex
matches := leadingBlockCommentRegex.FindString(sqlText)
if matches == "" {
return ""
}

// Strip /* and */ delimiters from all comments
// Match each individual comment block
commentMatches := commentContentRegex.FindAllStringSubmatch(matches, -1)
if len(commentMatches) == 0 {
return ""
}

// Concatenate all comment contents
var allComments strings.Builder
for i, match := range commentMatches {
if len(match) > 1 {
if i > 0 {
allComments.WriteString(",")
}
allComments.WriteString(strings.TrimSpace(match[1]))
}
}

commentContent := allComments.String()
if commentContent == "" {
return ""
}

// Parse key=value pairs and filter by allowed keys
pairs := strings.Split(commentContent, ",")
var filteredPairs []string
seenKeys := make(map[string]bool)

// Create a set of allowed keys for O(1) lookup
allowedSet := make(map[string]bool)
for _, key := range allowedKeys {
allowedSet[key] = true
}

for _, pair := range pairs {
pair = strings.TrimSpace(pair)
if pair == "" {
continue
}

// Split by first = only
parts := strings.SplitN(pair, "=", 2)
if len(parts) != 2 {
// Malformed pair, skip it
continue
}

key := strings.TrimSpace(parts[0])
value := strings.TrimSpace(parts[1])

// Check if key is allowed and not already seen (use first occurrence)
if allowedSet[key] && !seenKeys[key] {
seenKeys[key] = true
filteredPairs = append(filteredPairs, key+"="+value)
}
}

// Join filtered pairs with comma (no space)
return strings.Join(filteredPairs, ",")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return strings.Join(filteredPairs, ",")
return filteredPairs

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, i want to understand about the suggested change more please?
The sql comments is designed to float in comma seperated key value pairs in query.comments attr

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It goes hand in hand with the other comment.

}
202 changes: 202 additions & 0 deletions internal/common/sqlcomments/extractor_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

package sqlcomments

import (
"testing"
)

func TestExtractAndFilterComments(t *testing.T) {
t.Run("single allowed key", func(t *testing.T) {
sqlText := "/* nr_service_guid=abc-123 */ SELECT * FROM t"
allowedKeys := []string{"nr_service_guid"}
want := "nr_service_guid=abc-123"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("multiple allowed keys", func(t *testing.T) {
sqlText := "/* nr_service_guid=abc,app_id=xyz */ SELECT * FROM t"
allowedKeys := []string{"nr_service_guid", "app_id"}
want := "nr_service_guid=abc,app_id=xyz"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("no matches", func(t *testing.T) {
sqlText := "/* other=val */ SELECT * FROM t"
allowedKeys := []string{"nr_service_guid"}
want := ""

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("empty allowlist", func(t *testing.T) {
sqlText := "/* nr_service_guid=abc */ SELECT * FROM t"
allowedKeys := []string{}
want := ""

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("nil allowlist", func(t *testing.T) {
sqlText := "/* nr_service_guid=abc */ SELECT * FROM t"
var allowedKeys []string
want := ""

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("multiple comments", func(t *testing.T) {
sqlText := "/* a=1 */ /* b=2 */ SELECT * FROM t"
allowedKeys := []string{"a", "b"}
want := "a=1,b=2"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("not leading comment", func(t *testing.T) {
sqlText := "SELECT * FROM t /* a=1 */"
allowedKeys := []string{"a"}
want := ""

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("whitespace before comment", func(t *testing.T) {
sqlText := " /* nr_service_guid=abc */ SELECT * FROM t"
allowedKeys := []string{"nr_service_guid"}
want := "nr_service_guid=abc"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("keys with spaces trimmed", func(t *testing.T) {
sqlText := "/* key1 = value1 , key2 = value2 */ SELECT * FROM t"
allowedKeys := []string{"key1", "key2"}
want := "key1=value1,key2=value2"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("partial match filters correctly", func(t *testing.T) {
sqlText := "/* allowed=yes,notallowed=no,also_allowed=maybe */ SELECT * FROM t"
allowedKeys := []string{"allowed", "also_allowed"}
want := "allowed=yes,also_allowed=maybe"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("malformed pairs skipped", func(t *testing.T) {
sqlText := "/* valid=1,invalid,another=2 */ SELECT * FROM t"
allowedKeys := []string{"valid", "invalid", "another"}
want := "valid=1,another=2"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("empty comment", func(t *testing.T) {
sqlText := "/**/ SELECT * FROM t"
allowedKeys := []string{"any"}
want := ""

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("no comments", func(t *testing.T) {
sqlText := "SELECT * FROM t"
allowedKeys := []string{"any"}
want := ""

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("unclosed comment", func(t *testing.T) {
sqlText := "/* unclosed SELECT * FROM t"
allowedKeys := []string{"any"}
want := ""

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("values with special characters", func(t *testing.T) {
sqlText := `/* guid=abc-123-def,path=/api/v1/users */ SELECT * FROM t`
allowedKeys := []string{"guid", "path"}
want := "guid=abc-123-def,path=/api/v1/users"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})

t.Run("duplicate keys use first", func(t *testing.T) {
sqlText := "/* key=first,key=second */ SELECT * FROM t"
allowedKeys := []string{"key"}
want := "key=first"

got := ExtractAndFilterComments(sqlText, allowedKeys)

if got != want {
t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want)
}
})
}
4 changes: 3 additions & 1 deletion receiver/oracledbreceiver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@ type TopQueryCollection struct {
MaxQuerySampleCount uint `mapstructure:"max_query_sample_count"`
TopQueryCount uint `mapstructure:"top_query_count"`
CollectionInterval time.Duration `mapstructure:"collection_interval"`
AllowedCommentKeys []string `mapstructure:"allowed_comment_keys"`
}

type QuerySample struct {
MaxRowsPerQuery uint64 `mapstructure:"max_rows_per_query"`
MaxRowsPerQuery uint64 `mapstructure:"max_rows_per_query"`
AllowedCommentKeys []string `mapstructure:"allowed_comment_keys"`

// prevent unkeyed literal initialization
_ struct{}
Expand Down
8 changes: 8 additions & 0 deletions receiver/oracledbreceiver/config.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,20 @@ $defs:
query_sample:
type: object
properties:
allowed_comment_keys:
type: array
items:
type: string
max_rows_per_query:
type: integer
x-customType: uint64
top_query_collection:
type: object
properties:
allowed_comment_keys:
type: array
items:
type: string
collection_interval:
type: string
format: duration
Expand Down
2 changes: 2 additions & 0 deletions receiver/oracledbreceiver/documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ sample query
| oracledb.procedure_type | Type of the database object that a query is accessing. | Any Str | - |
| oracledb.osuser | Name of the operating system user that initiated or is running the Oracle database session. | Any Str | - |
| oracledb.duration_sec | Total time taken by a database query to execute. | Any Double | - |
| query.comments | Filtered SQL query comments extracted from leading block comments. Contains comma-separated key=value pairs for keys specified in allowed_comment_keys configuration. Used for correlation with APM traces. | Any Str | - |
| oracledb.query.started | The timestamp when the SQL statement started execution, in ISO 8601 format (UTC). | Any Str | - |
| oracledb.session.started | The timestamp when the session logged on, in ISO 8601 format (UTC). | Any Str | - |
| oracledb.session.duration | The total time in seconds that the session has been connected. | Any Double | - |
Expand Down Expand Up @@ -505,6 +506,7 @@ Collection of event metrics for top N queries, filtered based on the highest CPU
| oracledb.procedure_id | The identifier of the stored procedure or function being executed by the query. | Any Int | - |
| oracledb.procedure_name | Name of the database object that a query is accessing. | Any Str | - |
| oracledb.procedure_type | Type of the database object that a query is accessing. | Any Str | - |
| query.comments | Filtered SQL query comments extracted from leading block comments. Contains comma-separated key=value pairs for keys specified in allowed_comment_keys configuration. Used for correlation with APM traces. | Any Str | - |

## Resource Attributes

Expand Down
3 changes: 3 additions & 0 deletions receiver/oracledbreceiver/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ require (
github.com/DataDog/datadog-agent/pkg/obfuscate v0.77.0-devel.0.20260213154712-e02b9359151a
github.com/google/go-cmp v0.7.0
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.152.0
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.152.0
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.152.0
github.com/sijms/go-ora/v2 v2.9.0
Expand Down Expand Up @@ -82,6 +83,8 @@ retract (
v0.65.0
)

replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/common => ../../internal/common

replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden => ../../pkg/golden

replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil => ../../pkg/pdatautil
Expand Down
Loading
Loading