diff --git a/.chloggen/oracledb-sql-comment-extraction.yaml b/.chloggen/oracledb-sql-comment-extraction.yaml new file mode 100644 index 0000000000000..34804c371165f --- /dev/null +++ b/.chloggen/oracledb-sql-comment-extraction.yaml @@ -0,0 +1,5 @@ +change_type: enhancement +component: receiver/oracledb +note: Add SQL comment extraction support for APM correlation. Users can now configure `allowed_comment_keys` to extract key-value pairs from SQL query comments and include them as telemetry attributes for correlation with APM traces. +issues: [48338] +change_logs: [user] diff --git a/internal/common/sqlcomments/extractor.go b/internal/common/sqlcomments/extractor.go new file mode 100644 index 0000000000000..c1230872f27fb --- /dev/null +++ b/internal/common/sqlcomments/extractor.go @@ -0,0 +1,106 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sqlcomments // import "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/sqlcomments" + +import ( + "regexp" + "strings" +) + +var ( + // leadingBlockCommentRegex matches one or more leading /* */ block comments + leadingBlockCommentRegex = regexp.MustCompile(`^\s*(/\*.*?\*/\s*)+`) + // commentContentRegex extracts content between /* and */ delimiters + commentContentRegex = regexp.MustCompile(`/\*(.*?)\*/`) +) + +// ExtractAndFilterComments extracts leading /* */ block comments from SQL, +// parses them as key=value pairs, and returns only allowed keys. +// Returns comma-separated filtered pairs, or empty string if no allowed keys found. +// Format: "key1=value1,key2=value2" +// +// This function is designed to be secure by default: +// - If allowedKeys is empty or nil, returns empty string (no extraction) +// - Only keys explicitly listed in allowedKeys are included in the result +// - Duplicate keys use first occurrence only +// - Malformed pairs (without =) are silently skipped +// +// Example: +// +// sqlText := "/* key1=value1,key2=value2 */ SELECT * FROM users" +// allowedKeys := []string{"key1"} +// result := ExtractAndFilterComments(sqlText, allowedKeys) +// // result == "key1=value1" +func ExtractAndFilterComments(sqlText string, allowedKeys []string) string { + // Early exit: if no allowed keys, return empty immediately (secure by default) + if len(allowedKeys) == 0 { + return "" + } + + // Extract leading block comments using regex + matches := leadingBlockCommentRegex.FindString(sqlText) + if matches == "" { + return "" + } + + // Strip /* and */ delimiters from all comments + // Match each individual comment block + commentMatches := commentContentRegex.FindAllStringSubmatch(matches, -1) + if len(commentMatches) == 0 { + return "" + } + + // Concatenate all comment contents + var allComments strings.Builder + for i, match := range commentMatches { + if len(match) > 1 { + if i > 0 { + allComments.WriteString(",") + } + allComments.WriteString(strings.TrimSpace(match[1])) + } + } + + commentContent := allComments.String() + if commentContent == "" { + return "" + } + + // Parse key=value pairs and filter by allowed keys + pairs := strings.Split(commentContent, ",") + var filteredPairs []string + seenKeys := make(map[string]bool) + + // Create a set of allowed keys for O(1) lookup + allowedSet := make(map[string]bool) + for _, key := range allowedKeys { + allowedSet[key] = true + } + + for _, pair := range pairs { + pair = strings.TrimSpace(pair) + if pair == "" { + continue + } + + // Split by first = only + parts := strings.SplitN(pair, "=", 2) + if len(parts) != 2 { + // Malformed pair, skip it + continue + } + + key := strings.TrimSpace(parts[0]) + value := strings.TrimSpace(parts[1]) + + // Check if key is allowed and not already seen (use first occurrence) + if allowedSet[key] && !seenKeys[key] { + seenKeys[key] = true + filteredPairs = append(filteredPairs, key+"="+value) + } + } + + // Join filtered pairs with comma (no space) + return strings.Join(filteredPairs, ",") +} diff --git a/internal/common/sqlcomments/extractor_test.go b/internal/common/sqlcomments/extractor_test.go new file mode 100644 index 0000000000000..20114774a5a3a --- /dev/null +++ b/internal/common/sqlcomments/extractor_test.go @@ -0,0 +1,202 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package sqlcomments + +import ( + "testing" +) + +func TestExtractAndFilterComments(t *testing.T) { + t.Run("single allowed key", func(t *testing.T) { + sqlText := "/* nr_service_guid=abc-123 */ SELECT * FROM t" + allowedKeys := []string{"nr_service_guid"} + want := "nr_service_guid=abc-123" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("multiple allowed keys", func(t *testing.T) { + sqlText := "/* nr_service_guid=abc,app_id=xyz */ SELECT * FROM t" + allowedKeys := []string{"nr_service_guid", "app_id"} + want := "nr_service_guid=abc,app_id=xyz" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("no matches", func(t *testing.T) { + sqlText := "/* other=val */ SELECT * FROM t" + allowedKeys := []string{"nr_service_guid"} + want := "" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("empty allowlist", func(t *testing.T) { + sqlText := "/* nr_service_guid=abc */ SELECT * FROM t" + allowedKeys := []string{} + want := "" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("nil allowlist", func(t *testing.T) { + sqlText := "/* nr_service_guid=abc */ SELECT * FROM t" + var allowedKeys []string + want := "" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("multiple comments", func(t *testing.T) { + sqlText := "/* a=1 */ /* b=2 */ SELECT * FROM t" + allowedKeys := []string{"a", "b"} + want := "a=1,b=2" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("not leading comment", func(t *testing.T) { + sqlText := "SELECT * FROM t /* a=1 */" + allowedKeys := []string{"a"} + want := "" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("whitespace before comment", func(t *testing.T) { + sqlText := " /* nr_service_guid=abc */ SELECT * FROM t" + allowedKeys := []string{"nr_service_guid"} + want := "nr_service_guid=abc" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("keys with spaces trimmed", func(t *testing.T) { + sqlText := "/* key1 = value1 , key2 = value2 */ SELECT * FROM t" + allowedKeys := []string{"key1", "key2"} + want := "key1=value1,key2=value2" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("partial match filters correctly", func(t *testing.T) { + sqlText := "/* allowed=yes,notallowed=no,also_allowed=maybe */ SELECT * FROM t" + allowedKeys := []string{"allowed", "also_allowed"} + want := "allowed=yes,also_allowed=maybe" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("malformed pairs skipped", func(t *testing.T) { + sqlText := "/* valid=1,invalid,another=2 */ SELECT * FROM t" + allowedKeys := []string{"valid", "invalid", "another"} + want := "valid=1,another=2" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("empty comment", func(t *testing.T) { + sqlText := "/**/ SELECT * FROM t" + allowedKeys := []string{"any"} + want := "" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("no comments", func(t *testing.T) { + sqlText := "SELECT * FROM t" + allowedKeys := []string{"any"} + want := "" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("unclosed comment", func(t *testing.T) { + sqlText := "/* unclosed SELECT * FROM t" + allowedKeys := []string{"any"} + want := "" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("values with special characters", func(t *testing.T) { + sqlText := `/* guid=abc-123-def,path=/api/v1/users */ SELECT * FROM t` + allowedKeys := []string{"guid", "path"} + want := "guid=abc-123-def,path=/api/v1/users" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) + + t.Run("duplicate keys use first", func(t *testing.T) { + sqlText := "/* key=first,key=second */ SELECT * FROM t" + allowedKeys := []string{"key"} + want := "key=first" + + got := ExtractAndFilterComments(sqlText, allowedKeys) + + if got != want { + t.Errorf("ExtractAndFilterComments() = %q, want %q", got, want) + } + }) +} diff --git a/receiver/oracledbreceiver/config.go b/receiver/oracledbreceiver/config.go index 389d638766361..a1189c152edbb 100644 --- a/receiver/oracledbreceiver/config.go +++ b/receiver/oracledbreceiver/config.go @@ -33,10 +33,12 @@ type TopQueryCollection struct { MaxQuerySampleCount uint `mapstructure:"max_query_sample_count"` TopQueryCount uint `mapstructure:"top_query_count"` CollectionInterval time.Duration `mapstructure:"collection_interval"` + AllowedCommentKeys []string `mapstructure:"allowed_comment_keys"` } type QuerySample struct { - MaxRowsPerQuery uint64 `mapstructure:"max_rows_per_query"` + MaxRowsPerQuery uint64 `mapstructure:"max_rows_per_query"` + AllowedCommentKeys []string `mapstructure:"allowed_comment_keys"` // prevent unkeyed literal initialization _ struct{} diff --git a/receiver/oracledbreceiver/config.schema.yaml b/receiver/oracledbreceiver/config.schema.yaml index d0b47bc9424ba..a414eb3c1cb0a 100644 --- a/receiver/oracledbreceiver/config.schema.yaml +++ b/receiver/oracledbreceiver/config.schema.yaml @@ -2,12 +2,20 @@ $defs: query_sample: type: object properties: + allowed_comment_keys: + type: array + items: + type: string max_rows_per_query: type: integer x-customType: uint64 top_query_collection: type: object properties: + allowed_comment_keys: + type: array + items: + type: string collection_interval: type: string format: duration diff --git a/receiver/oracledbreceiver/documentation.md b/receiver/oracledbreceiver/documentation.md index d94e7e17297c3..0c537e1d5574a 100644 --- a/receiver/oracledbreceiver/documentation.md +++ b/receiver/oracledbreceiver/documentation.md @@ -465,6 +465,7 @@ sample query | oracledb.procedure_type | Type of the database object that a query is accessing. | Any Str | - | | oracledb.osuser | Name of the operating system user that initiated or is running the Oracle database session. | Any Str | - | | oracledb.duration_sec | Total time taken by a database query to execute. | Any Double | - | +| query.comments | Filtered SQL query comments extracted from leading block comments. Contains comma-separated key=value pairs for keys specified in allowed_comment_keys configuration. Used for correlation with APM traces. | Any Str | - | | oracledb.query.started | The timestamp when the SQL statement started execution, in ISO 8601 format (UTC). | Any Str | - | | oracledb.session.started | The timestamp when the session logged on, in ISO 8601 format (UTC). | Any Str | - | | oracledb.session.duration | The total time in seconds that the session has been connected. | Any Double | - | @@ -505,6 +506,7 @@ Collection of event metrics for top N queries, filtered based on the highest CPU | oracledb.procedure_id | The identifier of the stored procedure or function being executed by the query. | Any Int | - | | oracledb.procedure_name | Name of the database object that a query is accessing. | Any Str | - | | oracledb.procedure_type | Type of the database object that a query is accessing. | Any Str | - | +| query.comments | Filtered SQL query comments extracted from leading block comments. Contains comma-separated key=value pairs for keys specified in allowed_comment_keys configuration. Used for correlation with APM traces. | Any Str | - | ## Resource Attributes diff --git a/receiver/oracledbreceiver/go.mod b/receiver/oracledbreceiver/go.mod index 90f99f394eacf..642af83815c10 100644 --- a/receiver/oracledbreceiver/go.mod +++ b/receiver/oracledbreceiver/go.mod @@ -6,6 +6,7 @@ require ( github.com/DataDog/datadog-agent/pkg/obfuscate v0.77.0-devel.0.20260213154712-e02b9359151a github.com/google/go-cmp v0.7.0 github.com/hashicorp/golang-lru/v2 v2.0.7 + github.com/open-telemetry/opentelemetry-collector-contrib/internal/common v0.152.0 github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden v0.152.0 github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest v0.152.0 github.com/sijms/go-ora/v2 v2.9.0 @@ -82,6 +83,8 @@ retract ( v0.65.0 ) +replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/common => ../../internal/common + replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden => ../../pkg/golden replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil => ../../pkg/pdatautil diff --git a/receiver/oracledbreceiver/internal/metadata/generated_logs.go b/receiver/oracledbreceiver/internal/metadata/generated_logs.go index 64934a3578e7b..91390ac65be88 100644 --- a/receiver/oracledbreceiver/internal/metadata/generated_logs.go +++ b/receiver/oracledbreceiver/internal/metadata/generated_logs.go @@ -18,7 +18,7 @@ type eventDbServerQuerySample struct { config EventConfig // event config provided by user. } -func (e *eventDbServerQuerySample) recordEvent(ctx context.Context, timestamp pcommon.Timestamp, dbQueryTextAttributeValue string, dbSystemNameAttributeValue string, userNameAttributeValue string, dbNamespaceAttributeValue string, clientAddressAttributeValue string, clientPortAttributeValue int64, networkPeerAddressAttributeValue string, networkPeerPortAttributeValue int64, oracledbPlanHashValueAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbSidAttributeValue string, oracledbSerialAttributeValue string, oracledbProcessAttributeValue string, oracledbSchemanameAttributeValue string, oracledbProgramAttributeValue string, oracledbModuleAttributeValue string, oracledbStatusAttributeValue string, oracledbStateAttributeValue string, oracledbWaitClassAttributeValue string, oracledbEventAttributeValue string, oracledbQueryWaitTimeAttributeValue float64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string, oracledbOsuserAttributeValue string, oracledbDurationSecAttributeValue float64, oracledbQueryStartedAttributeValue string, oracledbSessionStartedAttributeValue string, oracledbSessionDurationAttributeValue float64) { +func (e *eventDbServerQuerySample) recordEvent(ctx context.Context, timestamp pcommon.Timestamp, dbQueryTextAttributeValue string, dbSystemNameAttributeValue string, userNameAttributeValue string, dbNamespaceAttributeValue string, clientAddressAttributeValue string, clientPortAttributeValue int64, networkPeerAddressAttributeValue string, networkPeerPortAttributeValue int64, oracledbPlanHashValueAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbSidAttributeValue string, oracledbSerialAttributeValue string, oracledbProcessAttributeValue string, oracledbSchemanameAttributeValue string, oracledbProgramAttributeValue string, oracledbModuleAttributeValue string, oracledbStatusAttributeValue string, oracledbStateAttributeValue string, oracledbWaitClassAttributeValue string, oracledbEventAttributeValue string, oracledbQueryWaitTimeAttributeValue float64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string, oracledbOsuserAttributeValue string, oracledbDurationSecAttributeValue float64, queryCommentsAttributeValue string, oracledbQueryStartedAttributeValue string, oracledbSessionStartedAttributeValue string, oracledbSessionDurationAttributeValue float64) { if !e.config.Enabled { return } @@ -58,6 +58,7 @@ func (e *eventDbServerQuerySample) recordEvent(ctx context.Context, timestamp pc dp.Attributes().PutStr("oracledb.procedure_type", oracledbProcedureTypeAttributeValue) dp.Attributes().PutStr("oracledb.osuser", oracledbOsuserAttributeValue) dp.Attributes().PutDouble("oracledb.duration_sec", oracledbDurationSecAttributeValue) + dp.Attributes().PutStr("query.comments", queryCommentsAttributeValue) dp.Attributes().PutStr("oracledb.query.started", oracledbQueryStartedAttributeValue) dp.Attributes().PutStr("oracledb.session.started", oracledbSessionStartedAttributeValue) dp.Attributes().PutDouble("oracledb.session.duration", oracledbSessionDurationAttributeValue) @@ -84,7 +85,7 @@ type eventDbServerTopQuery struct { config EventConfig // event config provided by user. } -func (e *eventDbServerTopQuery) recordEvent(ctx context.Context, timestamp pcommon.Timestamp, dbSystemNameAttributeValue string, dbServerNameAttributeValue string, dbQueryTextAttributeValue string, oracledbQueryPlanAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbApplicationWaitTimeAttributeValue float64, oracledbBufferGetsAttributeValue int64, oracledbClusterWaitTimeAttributeValue float64, oracledbCommandTypeAttributeValue int64, oracledbConcurrencyWaitTimeAttributeValue float64, oracledbCPUTimeAttributeValue float64, oracledbDirectReadsAttributeValue int64, oracledbDirectWritesAttributeValue int64, oracledbDiskReadsAttributeValue int64, oracledbElapsedTimeAttributeValue float64, oracledbExecutionsAttributeValue int64, oracledbPhysicalReadBytesAttributeValue int64, oracledbPhysicalReadRequestsAttributeValue int64, oracledbPhysicalWriteBytesAttributeValue int64, oracledbPhysicalWriteRequestsAttributeValue int64, oracledbRowsProcessedAttributeValue int64, oracledbUserIoWaitTimeAttributeValue float64, oracledbProcedureExecutionCountAttributeValue int64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string) { +func (e *eventDbServerTopQuery) recordEvent(ctx context.Context, timestamp pcommon.Timestamp, dbSystemNameAttributeValue string, dbServerNameAttributeValue string, dbQueryTextAttributeValue string, oracledbQueryPlanAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbApplicationWaitTimeAttributeValue float64, oracledbBufferGetsAttributeValue int64, oracledbClusterWaitTimeAttributeValue float64, oracledbCommandTypeAttributeValue int64, oracledbConcurrencyWaitTimeAttributeValue float64, oracledbCPUTimeAttributeValue float64, oracledbDirectReadsAttributeValue int64, oracledbDirectWritesAttributeValue int64, oracledbDiskReadsAttributeValue int64, oracledbElapsedTimeAttributeValue float64, oracledbExecutionsAttributeValue int64, oracledbPhysicalReadBytesAttributeValue int64, oracledbPhysicalReadRequestsAttributeValue int64, oracledbPhysicalWriteBytesAttributeValue int64, oracledbPhysicalWriteRequestsAttributeValue int64, oracledbRowsProcessedAttributeValue int64, oracledbUserIoWaitTimeAttributeValue float64, oracledbProcedureExecutionCountAttributeValue int64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string, queryCommentsAttributeValue string) { if !e.config.Enabled { return } @@ -124,6 +125,7 @@ func (e *eventDbServerTopQuery) recordEvent(ctx context.Context, timestamp pcomm dp.Attributes().PutInt("oracledb.procedure_id", oracledbProcedureIDAttributeValue) dp.Attributes().PutStr("oracledb.procedure_name", oracledbProcedureNameAttributeValue) dp.Attributes().PutStr("oracledb.procedure_type", oracledbProcedureTypeAttributeValue) + dp.Attributes().PutStr("query.comments", queryCommentsAttributeValue) } @@ -271,11 +273,11 @@ func (lb *LogsBuilder) Emit(options ...ResourceLogsOption) plog.Logs { } // RecordDbServerQuerySampleEvent adds a log record of db.server.query_sample event. -func (lb *LogsBuilder) RecordDbServerQuerySampleEvent(ctx context.Context, timestamp pcommon.Timestamp, dbQueryTextAttributeValue string, dbSystemNameAttributeValue string, userNameAttributeValue string, dbNamespaceAttributeValue string, clientAddressAttributeValue string, clientPortAttributeValue int64, networkPeerAddressAttributeValue string, networkPeerPortAttributeValue int64, oracledbPlanHashValueAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbSidAttributeValue string, oracledbSerialAttributeValue string, oracledbProcessAttributeValue string, oracledbSchemanameAttributeValue string, oracledbProgramAttributeValue string, oracledbModuleAttributeValue string, oracledbStatusAttributeValue string, oracledbStateAttributeValue string, oracledbWaitClassAttributeValue string, oracledbEventAttributeValue string, oracledbQueryWaitTimeAttributeValue float64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string, oracledbOsuserAttributeValue string, oracledbDurationSecAttributeValue float64, oracledbQueryStartedAttributeValue string, oracledbSessionStartedAttributeValue string, oracledbSessionDurationAttributeValue float64) { - lb.eventDbServerQuerySample.recordEvent(ctx, timestamp, dbQueryTextAttributeValue, dbSystemNameAttributeValue, userNameAttributeValue, dbNamespaceAttributeValue, clientAddressAttributeValue, clientPortAttributeValue, networkPeerAddressAttributeValue, networkPeerPortAttributeValue, oracledbPlanHashValueAttributeValue, oracledbSQLIDAttributeValue, oracledbChildNumberAttributeValue, oracledbChildAddressAttributeValue, oracledbSidAttributeValue, oracledbSerialAttributeValue, oracledbProcessAttributeValue, oracledbSchemanameAttributeValue, oracledbProgramAttributeValue, oracledbModuleAttributeValue, oracledbStatusAttributeValue, oracledbStateAttributeValue, oracledbWaitClassAttributeValue, oracledbEventAttributeValue, oracledbQueryWaitTimeAttributeValue, oracledbProcedureIDAttributeValue, oracledbProcedureNameAttributeValue, oracledbProcedureTypeAttributeValue, oracledbOsuserAttributeValue, oracledbDurationSecAttributeValue, oracledbQueryStartedAttributeValue, oracledbSessionStartedAttributeValue, oracledbSessionDurationAttributeValue) +func (lb *LogsBuilder) RecordDbServerQuerySampleEvent(ctx context.Context, timestamp pcommon.Timestamp, dbQueryTextAttributeValue string, dbSystemNameAttributeValue string, userNameAttributeValue string, dbNamespaceAttributeValue string, clientAddressAttributeValue string, clientPortAttributeValue int64, networkPeerAddressAttributeValue string, networkPeerPortAttributeValue int64, oracledbPlanHashValueAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbSidAttributeValue string, oracledbSerialAttributeValue string, oracledbProcessAttributeValue string, oracledbSchemanameAttributeValue string, oracledbProgramAttributeValue string, oracledbModuleAttributeValue string, oracledbStatusAttributeValue string, oracledbStateAttributeValue string, oracledbWaitClassAttributeValue string, oracledbEventAttributeValue string, oracledbQueryWaitTimeAttributeValue float64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string, oracledbOsuserAttributeValue string, oracledbDurationSecAttributeValue float64, queryCommentsAttributeValue string, oracledbQueryStartedAttributeValue string, oracledbSessionStartedAttributeValue string, oracledbSessionDurationAttributeValue float64) { + lb.eventDbServerQuerySample.recordEvent(ctx, timestamp, dbQueryTextAttributeValue, dbSystemNameAttributeValue, userNameAttributeValue, dbNamespaceAttributeValue, clientAddressAttributeValue, clientPortAttributeValue, networkPeerAddressAttributeValue, networkPeerPortAttributeValue, oracledbPlanHashValueAttributeValue, oracledbSQLIDAttributeValue, oracledbChildNumberAttributeValue, oracledbChildAddressAttributeValue, oracledbSidAttributeValue, oracledbSerialAttributeValue, oracledbProcessAttributeValue, oracledbSchemanameAttributeValue, oracledbProgramAttributeValue, oracledbModuleAttributeValue, oracledbStatusAttributeValue, oracledbStateAttributeValue, oracledbWaitClassAttributeValue, oracledbEventAttributeValue, oracledbQueryWaitTimeAttributeValue, oracledbProcedureIDAttributeValue, oracledbProcedureNameAttributeValue, oracledbProcedureTypeAttributeValue, oracledbOsuserAttributeValue, oracledbDurationSecAttributeValue, queryCommentsAttributeValue, oracledbQueryStartedAttributeValue, oracledbSessionStartedAttributeValue, oracledbSessionDurationAttributeValue) } // RecordDbServerTopQueryEvent adds a log record of db.server.top_query event. -func (lb *LogsBuilder) RecordDbServerTopQueryEvent(ctx context.Context, timestamp pcommon.Timestamp, dbSystemNameAttributeValue string, dbServerNameAttributeValue string, dbQueryTextAttributeValue string, oracledbQueryPlanAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbApplicationWaitTimeAttributeValue float64, oracledbBufferGetsAttributeValue int64, oracledbClusterWaitTimeAttributeValue float64, oracledbCommandTypeAttributeValue int64, oracledbConcurrencyWaitTimeAttributeValue float64, oracledbCPUTimeAttributeValue float64, oracledbDirectReadsAttributeValue int64, oracledbDirectWritesAttributeValue int64, oracledbDiskReadsAttributeValue int64, oracledbElapsedTimeAttributeValue float64, oracledbExecutionsAttributeValue int64, oracledbPhysicalReadBytesAttributeValue int64, oracledbPhysicalReadRequestsAttributeValue int64, oracledbPhysicalWriteBytesAttributeValue int64, oracledbPhysicalWriteRequestsAttributeValue int64, oracledbRowsProcessedAttributeValue int64, oracledbUserIoWaitTimeAttributeValue float64, oracledbProcedureExecutionCountAttributeValue int64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string) { - lb.eventDbServerTopQuery.recordEvent(ctx, timestamp, dbSystemNameAttributeValue, dbServerNameAttributeValue, dbQueryTextAttributeValue, oracledbQueryPlanAttributeValue, oracledbSQLIDAttributeValue, oracledbChildNumberAttributeValue, oracledbChildAddressAttributeValue, oracledbApplicationWaitTimeAttributeValue, oracledbBufferGetsAttributeValue, oracledbClusterWaitTimeAttributeValue, oracledbCommandTypeAttributeValue, oracledbConcurrencyWaitTimeAttributeValue, oracledbCPUTimeAttributeValue, oracledbDirectReadsAttributeValue, oracledbDirectWritesAttributeValue, oracledbDiskReadsAttributeValue, oracledbElapsedTimeAttributeValue, oracledbExecutionsAttributeValue, oracledbPhysicalReadBytesAttributeValue, oracledbPhysicalReadRequestsAttributeValue, oracledbPhysicalWriteBytesAttributeValue, oracledbPhysicalWriteRequestsAttributeValue, oracledbRowsProcessedAttributeValue, oracledbUserIoWaitTimeAttributeValue, oracledbProcedureExecutionCountAttributeValue, oracledbProcedureIDAttributeValue, oracledbProcedureNameAttributeValue, oracledbProcedureTypeAttributeValue) +func (lb *LogsBuilder) RecordDbServerTopQueryEvent(ctx context.Context, timestamp pcommon.Timestamp, dbSystemNameAttributeValue string, dbServerNameAttributeValue string, dbQueryTextAttributeValue string, oracledbQueryPlanAttributeValue string, oracledbSQLIDAttributeValue string, oracledbChildNumberAttributeValue string, oracledbChildAddressAttributeValue string, oracledbApplicationWaitTimeAttributeValue float64, oracledbBufferGetsAttributeValue int64, oracledbClusterWaitTimeAttributeValue float64, oracledbCommandTypeAttributeValue int64, oracledbConcurrencyWaitTimeAttributeValue float64, oracledbCPUTimeAttributeValue float64, oracledbDirectReadsAttributeValue int64, oracledbDirectWritesAttributeValue int64, oracledbDiskReadsAttributeValue int64, oracledbElapsedTimeAttributeValue float64, oracledbExecutionsAttributeValue int64, oracledbPhysicalReadBytesAttributeValue int64, oracledbPhysicalReadRequestsAttributeValue int64, oracledbPhysicalWriteBytesAttributeValue int64, oracledbPhysicalWriteRequestsAttributeValue int64, oracledbRowsProcessedAttributeValue int64, oracledbUserIoWaitTimeAttributeValue float64, oracledbProcedureExecutionCountAttributeValue int64, oracledbProcedureIDAttributeValue int64, oracledbProcedureNameAttributeValue string, oracledbProcedureTypeAttributeValue string, queryCommentsAttributeValue string) { + lb.eventDbServerTopQuery.recordEvent(ctx, timestamp, dbSystemNameAttributeValue, dbServerNameAttributeValue, dbQueryTextAttributeValue, oracledbQueryPlanAttributeValue, oracledbSQLIDAttributeValue, oracledbChildNumberAttributeValue, oracledbChildAddressAttributeValue, oracledbApplicationWaitTimeAttributeValue, oracledbBufferGetsAttributeValue, oracledbClusterWaitTimeAttributeValue, oracledbCommandTypeAttributeValue, oracledbConcurrencyWaitTimeAttributeValue, oracledbCPUTimeAttributeValue, oracledbDirectReadsAttributeValue, oracledbDirectWritesAttributeValue, oracledbDiskReadsAttributeValue, oracledbElapsedTimeAttributeValue, oracledbExecutionsAttributeValue, oracledbPhysicalReadBytesAttributeValue, oracledbPhysicalReadRequestsAttributeValue, oracledbPhysicalWriteBytesAttributeValue, oracledbPhysicalWriteRequestsAttributeValue, oracledbRowsProcessedAttributeValue, oracledbUserIoWaitTimeAttributeValue, oracledbProcedureExecutionCountAttributeValue, oracledbProcedureIDAttributeValue, oracledbProcedureNameAttributeValue, oracledbProcedureTypeAttributeValue, queryCommentsAttributeValue) } diff --git a/receiver/oracledbreceiver/internal/metadata/generated_logs_test.go b/receiver/oracledbreceiver/internal/metadata/generated_logs_test.go index a4b638989b03e..479eda0e9b74c 100644 --- a/receiver/oracledbreceiver/internal/metadata/generated_logs_test.go +++ b/receiver/oracledbreceiver/internal/metadata/generated_logs_test.go @@ -131,10 +131,10 @@ func TestLogsBuilder(t *testing.T) { allEventsCount := 0 allEventsCount++ - lb.RecordDbServerQuerySampleEvent(ctx, timestamp, "db.query.text-val", "db.system.name-val", "user.name-val", "db.namespace-val", "client.address-val", 11, "network.peer.address-val", 17, "oracledb.plan_hash_value-val", "oracledb.sql_id-val", "oracledb.child_number-val", "oracledb.child_address-val", "oracledb.sid-val", "oracledb.serial-val", "oracledb.process-val", "oracledb.schemaname-val", "oracledb.program-val", "oracledb.module-val", "oracledb.status-val", "oracledb.state-val", "oracledb.wait_class-val", "oracledb.event-val", 24.100000, 21, "oracledb.procedure_name-val", "oracledb.procedure_type-val", "oracledb.osuser-val", 21.100000, "oracledb.query.started-val", "oracledb.session.started-val", 25.100000) + lb.RecordDbServerQuerySampleEvent(ctx, timestamp, "db.query.text-val", "db.system.name-val", "user.name-val", "db.namespace-val", "client.address-val", 11, "network.peer.address-val", 17, "oracledb.plan_hash_value-val", "oracledb.sql_id-val", "oracledb.child_number-val", "oracledb.child_address-val", "oracledb.sid-val", "oracledb.serial-val", "oracledb.process-val", "oracledb.schemaname-val", "oracledb.program-val", "oracledb.module-val", "oracledb.status-val", "oracledb.state-val", "oracledb.wait_class-val", "oracledb.event-val", 24.100000, 21, "oracledb.procedure_name-val", "oracledb.procedure_type-val", "oracledb.osuser-val", 21.100000, "query.comments-val", "oracledb.query.started-val", "oracledb.session.started-val", 25.100000) allEventsCount++ - lb.RecordDbServerTopQueryEvent(ctx, timestamp, "db.system.name-val", "db.server.name-val", "db.query.text-val", "oracledb.query_plan-val", "oracledb.sql_id-val", "oracledb.child_number-val", "oracledb.child_address-val", 30.100000, 20, 26.100000, 21, 30.100000, 17.100000, 21, 22, 19, 21.100000, 19, 28, 31, 29, 32, 23, 26.100000, 34, 21, "oracledb.procedure_name-val", "oracledb.procedure_type-val") + lb.RecordDbServerTopQueryEvent(ctx, timestamp, "db.system.name-val", "db.server.name-val", "db.query.text-val", "oracledb.query_plan-val", "oracledb.sql_id-val", "oracledb.child_number-val", "oracledb.child_address-val", 30.100000, 20, 26.100000, 21, 30.100000, 17.100000, 21, 22, 19, 21.100000, 19, 28, 31, 29, 32, 23, 26.100000, 34, 21, "oracledb.procedure_name-val", "oracledb.procedure_type-val", "query.comments-val") rb := lb.NewResourceBuilder() rb.SetHostName("host.name-val") @@ -253,6 +253,9 @@ func TestLogsBuilder(t *testing.T) { attrVal, ok = lr.Attributes().Get("oracledb.duration_sec") assert.True(t, ok) assert.Equal(t, 21.100000, attrVal.Double()) + attrVal, ok = lr.Attributes().Get("query.comments") + assert.True(t, ok) + assert.Equal(t, "query.comments-val", attrVal.Str()) attrVal, ok = lr.Attributes().Get("oracledb.query.started") assert.True(t, ok) assert.Equal(t, "oracledb.query.started-val", attrVal.Str()) @@ -353,6 +356,9 @@ func TestLogsBuilder(t *testing.T) { attrVal, ok = lr.Attributes().Get("oracledb.procedure_type") assert.True(t, ok) assert.Equal(t, "oracledb.procedure_type-val", attrVal.Str()) + attrVal, ok = lr.Attributes().Get("query.comments") + assert.True(t, ok) + assert.Equal(t, "query.comments-val", attrVal.Str()) } } }) diff --git a/receiver/oracledbreceiver/metadata.yaml b/receiver/oracledbreceiver/metadata.yaml index 14b7e5fbf644a..915728a5e7f56 100644 --- a/receiver/oracledbreceiver/metadata.yaml +++ b/receiver/oracledbreceiver/metadata.yaml @@ -181,6 +181,9 @@ attributes: oracledb.wait_class: description: The category of wait events a query or session is currently experiencing in Oracle Database. type: string + query.comments: + description: Filtered SQL query comments extracted from leading block comments. Contains comma-separated key=value pairs for keys specified in allowed_comment_keys configuration. Used for correlation with APM traces. + type: string session_status: description: Session status type: string @@ -232,6 +235,7 @@ events: - oracledb.procedure_type - oracledb.osuser - oracledb.duration_sec + - query.comments - oracledb.query.started - oracledb.session.started - oracledb.session.duration @@ -267,6 +271,7 @@ events: - oracledb.procedure_id - oracledb.procedure_name - oracledb.procedure_type + - query.comments metrics: oracledb.consistent_gets: description: Number of times a consistent read was requested for a block from the buffer cache. diff --git a/receiver/oracledbreceiver/scraper.go b/receiver/oracledbreceiver/scraper.go index d0fbae8096d49..ce815df9151d2 100644 --- a/receiver/oracledbreceiver/scraper.go +++ b/receiver/oracledbreceiver/scraper.go @@ -31,6 +31,7 @@ import ( "go.uber.org/multierr" "go.uber.org/zap" + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/sqlcomments" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/oracledbreceiver/internal/metadata" ) @@ -616,15 +617,16 @@ func (s *oracleScraper) collectStorageUsage(ctx context.Context, scrapeErrors *[ } type queryMetricCacheHit struct { - sqlID string - childNumber string - childAddress string - queryText string - metrics map[string]int64 - objectID int64 - objectName string - objectType string - commandType int64 + sqlID string + childNumber string + childAddress string + queryText string + queryComments string + metrics map[string]int64 + objectID int64 + objectName string + objectType string + commandType int64 } func (s *oracleScraper) scrapeLogs(ctx context.Context) (plog.Logs, error) { @@ -698,16 +700,20 @@ func (s *oracleScraper) collectTopNMetricData(ctx context.Context, logs plog.Log commandType, _ = strconv.ParseInt(row[commandTypeAttr], 10, 64) } + // Extract and filter comments from original SQL before obfuscation + queryComments := sqlcomments.ExtractAndFilterComments(row[sqlTextAttr], s.topQueryCollectCfg.AllowedCommentKeys) + hit := queryMetricCacheHit{ - sqlID: row[sqlIDAttr], - queryText: row[sqlTextAttr], - childNumber: row[childNumberAttr], - childAddress: row[childAddressAttr], - metrics: make(map[string]int64, len(metricNames)), - objectID: objectID, - objectName: row[objectNameAttr], - objectType: row[objectTypeAttr], - commandType: commandType, + sqlID: row[sqlIDAttr], + queryText: row[sqlTextAttr], + queryComments: queryComments, + childNumber: row[childNumberAttr], + childAddress: row[childAddressAttr], + metrics: make(map[string]int64, len(metricNames)), + objectID: objectID, + objectName: row[objectNameAttr], + objectType: row[objectTypeAttr], + commandType: commandType, } var possiblePurge bool @@ -758,7 +764,8 @@ func (s *oracleScraper) collectTopNMetricData(ctx context.Context, logs plog.Log rb := s.setupResourceBuilder(s.lb.NewResourceBuilder()) - for _, hit := range hits { + for i := range hits { + hit := &hits[i] planBytes, err := json.Marshal(childAddressToPlanMap[hit.childAddress]) if err != nil { s.logger.Error("Error marshaling plan data to JSON", zap.Error(err)) @@ -792,7 +799,8 @@ func (s *oracleScraper) collectTopNMetricData(ctx context.Context, logs plog.Log hit.metrics[procedureExecutionsMetric], hit.objectID, hit.objectName, - hit.objectType) + hit.objectType, + hit.queryComments) } hitCount := len(hits) @@ -893,10 +901,13 @@ func (s *oracleScraper) collectQuerySamples(ctx context.Context, logs plog.Logs) "traceparent": row[action], }) + // Extract and filter query comments from original SQL (before obfuscation) + queryComments := sqlcomments.ExtractAndFilterComments(row[sqlText], s.querySampleCfg.AllowedCommentKeys) + s.lb.RecordDbServerQuerySampleEvent(queryContext, timestamp, obfuscatedSQL, dbSystemNameVal, row[username], row[serviceName], row[hostName], clientPort, row[hostName], clientPort, queryPlanHashVal, row[sqlID], row[sqlChildNumber], row[childAddress], row[sid], row[serialNumber], row[process], row[schemaName], row[program], row[module], row[status], row[state], row[waitclass], row[event], waitTime, objID, row[objectName], row[objectType], - row[osUser], queryDuration, row[sqlExecStart], row[logonTime], sessionDurationSec) + row[osUser], queryDuration, queryComments, row[sqlExecStart], row[logonTime], sessionDurationSec) } s.lb.Emit(metadata.WithLogsResource(rb.Emit())).ResourceLogs().MoveAndAppendTo(logs.ResourceLogs()) @@ -910,14 +921,15 @@ func asFloatInSeconds(value int64) float64 { func (s *oracleScraper) obfuscateCacheHits(hits []queryMetricCacheHit) []queryMetricCacheHit { var obfuscatedHits []queryMetricCacheHit - for _, hit := range hits { + for i := range hits { + hit := &hits[i] // obfuscate and normalize the query text obfuscatedSQL, err := s.obfuscator.obfuscateSQLString(hit.queryText) if err != nil { s.logger.Warn("oracleScraper failed to obfuscate SQL query, skipping entry", zap.String("sql_id", hit.sqlID), zap.Error(err)) } else { hit.queryText = obfuscatedSQL - obfuscatedHits = append(obfuscatedHits, hit) + obfuscatedHits = append(obfuscatedHits, *hit) } } return obfuscatedHits @@ -931,9 +943,9 @@ func (s *oracleScraper) getChildAddressToPlanMap(ctx context.Context, hits []que var childAddressSlice []any placeholders := make([]string, len(hits)) - for i, hit := range hits { + for i := range hits { placeholders[i] = fmt.Sprintf("HEXTORAW(:%d)", i+1) - childAddressSlice = append(childAddressSlice, hit.childAddress) + childAddressSlice = append(childAddressSlice, hits[i].childAddress) } placeholdersCombined := strings.Join(placeholders, ", ") diff --git a/receiver/oracledbreceiver/scraper_test.go b/receiver/oracledbreceiver/scraper_test.go index edd407a7573fb..a4500103310dc 100644 --- a/receiver/oracledbreceiver/scraper_test.go +++ b/receiver/oracledbreceiver/scraper_test.go @@ -26,6 +26,7 @@ import ( "go.uber.org/zap/zapcore" "go.uber.org/zap/zaptest/observer" + "github.com/open-telemetry/opentelemetry-collector-contrib/internal/common/sqlcomments" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/golden" "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatatest/plogtest" "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/oracledbreceiver/internal/metadata" @@ -484,6 +485,54 @@ func TestSamplesQuery(t *testing.T) { } } +func TestScraperWithQueryComments(t *testing.T) { + t.Run("query samples with allowed comments", func(t *testing.T) { + // Create a mock scraper with allowed comment keys configured + cfg := createDefaultConfig().(*Config) + cfg.QuerySample.AllowedCommentKeys = []string{"nr_service_guid", "app_id"} + + // This test verifies that when AllowedCommentKeys is configured, + // the comment extraction happens and is passed through the pipeline. + // The actual generated_logs code will handle adding the attribute. + + // We can verify that extractAndFilterComments is called correctly + sqlWithComment := "/* nr_service_guid=test-123,app_id=myapp */ SELECT * FROM test_table" + result := sqlcomments.ExtractAndFilterComments(sqlWithComment, cfg.QuerySample.AllowedCommentKeys) + + expected := "nr_service_guid=test-123,app_id=myapp" + if result != expected { + t.Errorf("Expected %q but got %q", expected, result) + } + }) + + t.Run("query samples without allowed comments", func(t *testing.T) { + // Create a mock scraper with empty allowed comment keys + cfg := createDefaultConfig().(*Config) + cfg.QuerySample.AllowedCommentKeys = []string{} + + // Verify secure by default: empty allowlist returns empty string + sqlWithComment := "/* nr_service_guid=test-123 */ SELECT * FROM test_table" + result := sqlcomments.ExtractAndFilterComments(sqlWithComment, cfg.QuerySample.AllowedCommentKeys) + + if result != "" { + t.Errorf("Expected empty string but got %q", result) + } + }) + + t.Run("query samples with non-matching comments", func(t *testing.T) { + cfg := createDefaultConfig().(*Config) + cfg.QuerySample.AllowedCommentKeys = []string{"nr_service_guid"} + + // SQL has comments but none match the allowlist + sqlWithComment := "/* other_key=value */ SELECT * FROM test_table" + result := sqlcomments.ExtractAndFilterComments(sqlWithComment, cfg.QuerySample.AllowedCommentKeys) + + if result != "" { + t.Errorf("Expected empty string but got %q", result) + } + }) +} + func TestGetInstanceId(t *testing.T) { localhostName, _ := os.Hostname() diff --git a/receiver/oracledbreceiver/testdata/expectedQueryTextAndPlanQuery.yaml b/receiver/oracledbreceiver/testdata/expectedQueryTextAndPlanQuery.yaml index 045d496f0c6ca..b92bfbe3a72bb 100644 --- a/receiver/oracledbreceiver/testdata/expectedQueryTextAndPlanQuery.yaml +++ b/receiver/oracledbreceiver/testdata/expectedQueryTextAndPlanQuery.yaml @@ -97,6 +97,9 @@ resourceLogs: - key: oracledb.procedure_type value: stringValue: PROCEDURE + - key: query.comments + value: + stringValue: "" body: {} eventName: db.server.top_query timeUnixNano: "1759142512777891000" diff --git a/receiver/oracledbreceiver/testdata/expectedSamplesFile.yaml b/receiver/oracledbreceiver/testdata/expectedSamplesFile.yaml index e4d64e8c890ea..38bd06a09ea66 100644 --- a/receiver/oracledbreceiver/testdata/expectedSamplesFile.yaml +++ b/receiver/oracledbreceiver/testdata/expectedSamplesFile.yaml @@ -97,6 +97,9 @@ resourceLogs: - key: oracledb.duration_sec value: doubleValue: 1 + - key: query.comments + value: + stringValue: "" - key: oracledb.query.started value: stringValue: "2026-01-01T12:00:00Z"