Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gcp/modules/monitoring/fulcio/metrics.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# Alerts specific to fulcio, rekor or dex should be in the appropriate `modules/monitoring/[service]` directory

resource "google_logging_metric" "fulcio_k8s_pod_restart_failing_container" {
count = var.create_logging_metrics ? 1 : 0
description = "Counts the number of k8s_pod resource logs that contain the \"restarting failed container\" message"
filter = "resource.labels.namespace_name=\"fulcio-system\"\nresource.type=k8s_pod AND severity>=WARNING\n\"Back-off restarting failed container\"\n"

Expand All @@ -32,6 +33,7 @@ resource "google_logging_metric" "fulcio_k8s_pod_restart_failing_container" {
}

resource "google_logging_metric" "k8s_pod_unschedulable" {
count = var.create_logging_metrics ? 1 : 0
description = "Counts the number of k8s_pod resource logs that contain the unschedulable message"
filter = "resource.labels.namespace_name=\"fulcio-system\"\nresource.type=k8s_pod AND severity>=WARNING\n\"unschedulable\"\n"

Expand Down
6 changes: 6 additions & 0 deletions gcp/modules/monitoring/fulcio/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,9 @@ variable "uptime_check_period" {
type = string
default = "60s"
}

variable "create_logging_metrics" {
description = "Whether to create logging metrics. Another instance of the monitoring module may already be managing logging metrics for this service."
type = bool
default = true
}
8 changes: 8 additions & 0 deletions gcp/modules/monitoring/sigstore.tf
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ module "fulcio" {
prober_url = var.prober_fulcio_url
create_slos = var.create_slos
uptime_check_period = var.uptime_check_period
create_logging_metrics = var.fulcio_create_logging_metrics

depends_on = [
google_project_service.service
Expand All @@ -87,6 +88,7 @@ module "timestamp" {
prober_url = var.prober_timestamp_url
create_slos = var.create_slos
uptime_check_period = var.uptime_check_period
create_logging_metrics = var.timestamp_create_logging_metrics

depends_on = [
google_project_service.service
Expand Down Expand Up @@ -116,13 +118,19 @@ module "dex" {
module "tuf" {
source = "./tuf"

count = var.tuf_enabled ? 1 : 0

project_id = var.project_id
tuf_url = var.tuf_url

depends_on = [
google_project_service.service
]
}
moved {
from = module.tuf
to = module.tuf[0]
}

// Prober
module "prober" {
Expand Down
2 changes: 2 additions & 0 deletions gcp/modules/monitoring/timestamp/metrics.tf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
# This file contains alerts for the Timestamp Authority service

resource "google_logging_metric" "timestamp_k8s_pod_restart_failing_container" {
count = var.create_logging_metrics ? 1 : 0
description = "Counts the number of logs that contain the \"restarting failed container\" message"
filter = "resource.labels.namespace_name=\"tsa-system\"\nresource.type=k8s_pod AND severity>=WARNING\n\"Back-off restarting failed container\"\n"

Expand All @@ -31,6 +32,7 @@ resource "google_logging_metric" "timestamp_k8s_pod_restart_failing_container" {
}

resource "google_logging_metric" "k8s_pod_unschedulable" {
count = var.create_logging_metrics ? 1 : 0
description = "Counts the number of k8s_pod resource logs that contain the message \"unschedulable\""
filter = "resource.labels.namespace_name=\"tsa-system\"\nresource.type=k8s_pod AND severity>=WARNING\n\"unschedulable\"\n"

Expand Down
6 changes: 6 additions & 0 deletions gcp/modules/monitoring/timestamp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,9 @@ variable "uptime_check_period" {
type = string
default = "60s"
}

variable "create_logging_metrics" {
description = "Whether to create logging metrics. Another instance of the monitoring module may already be managing logging metrics for this service."
type = bool
default = true
}
18 changes: 18 additions & 0 deletions gcp/modules/monitoring/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,12 @@ variable "rekor_enabled" {
default = true
}

variable "tuf_enabled" {
description = "Enable TUF monitoring"
type = bool
default = true
}

variable "enable_k8s_cpu_utilization_alert" {
type = string
description = "whether to enable or disable the K8s CPU utilization alert"
Expand All @@ -152,3 +158,15 @@ variable "cloudsql_enabled" {
type = bool
default = true
}

variable "fulcio_create_logging_metrics" {
description = "Whether to create logging metrics. Another instance of the monitoring module may already be managing logging metrics for this service."
type = bool
default = true
}

variable "timestamp_create_logging_metrics" {
description = "Whether to create logging metrics. Another instance of the monitoring module may already be managing logging metrics for this service."
type = bool
default = true
}
3 changes: 3 additions & 0 deletions gcp/modules/sigstore/sigstore.tf
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ module "monitoring" {
enable_k8s_cpu_utilization_alert = var.enable_k8s_cpu_utilization_alert
uptime_check_period = var.monitoring.uptime_check_period
cloudsql_enabled = var.monitoring.cloudsql_enabled
tuf_enabled = var.monitoring.tuf_enabled
fulcio_create_logging_metrics = var.monitoring.fulcio_create_logging_metrics
timestamp_create_logging_metrics = var.monitoring.timestamp_create_logging_metrics

depends_on = [
module.gke-cluster,
Expand Down
58 changes: 32 additions & 26 deletions gcp/modules/sigstore/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -139,34 +139,40 @@ variable "tuf_main_page_suffix" {
variable "monitoring" {
description = "Monitoring and alerting"
type = object({
enabled = bool
fulcio_url = string
rekor_url = optional(string, "")
timestamp_url = string
dex_url = string
tuf_url = string
ctlog_url = optional(string, "")
notification_channel_ids = list(string)
timestamp_enabled = bool
ctlog_enabled = optional(bool, true)
rekor_enabled = optional(bool, true)
uptime_check_period = optional(string, "60s")
cloudsql_enabled = optional(bool, true)
enabled = bool
fulcio_url = string
rekor_url = optional(string, "")
timestamp_url = string
dex_url = string
tuf_url = string
ctlog_url = optional(string, "")
notification_channel_ids = list(string)
timestamp_enabled = bool
ctlog_enabled = optional(bool, true)
rekor_enabled = optional(bool, true)
uptime_check_period = optional(string, "60s")
cloudsql_enabled = optional(bool, true)
tuf_enabled = optional(bool, true)
fulcio_create_logging_metrics = optional(bool, true)
timestamp_create_logging_metrics = optional(bool, true)
})
default = {
enabled = false
fulcio_url = "fulcio.example.com"
rekor_url = "rekor.example.com"
timestamp_url = "timestamp.example.com"
dex_url = "oauth2.example.com"
tuf_url = "tuf.example.com"
ctlog_url = "ctlog.example.com"
notification_channel_ids = []
timestamp_enabled = false
ctlog_enabled = true
rekor_enabled = true
uptime_check_period = "60s"
cloudsql_enabled = true
enabled = false
fulcio_url = "fulcio.example.com"
rekor_url = "rekor.example.com"
timestamp_url = "timestamp.example.com"
dex_url = "oauth2.example.com"
tuf_url = "tuf.example.com"
ctlog_url = "ctlog.example.com"
notification_channel_ids = []
timestamp_enabled = false
ctlog_enabled = true
rekor_enabled = true
uptime_check_period = "60s"
cloudsql_enabled = true
tuf_enabled = true
fulcio_create_logging_metrics = true
timestamp_create_logging_metrics = true
}
}

Expand Down