目次
Terraform 公式サイト:datadog_monitor | Resources | DataDog/datadog | Terraform | Terraform Registry
resource "datadog_monitor" "foo" { name = "Name for monitor foo" type = "metric alert" message = "Monitor triggered. Notify: @hipchat-channel" escalation_message = "Escalation message @pagerduty" query = "avg(last_1h):avg:aws.ec2.cpu{environment:foo,host:foo} by {host} > 4" monitor_thresholds { warning = 2 critical = 4 } include_tags = true tags = ["foo:bar", "team:fooBar"] }
TerraformでDatadogのMonitorを書く #Terraform - Qiita
#monitor resource "datadog_monitor" "keepalive" { name = "[${var.project_name}][EC2] Linux: 死活監視 " type = "service check" message = var.message query = "\"datadog.agent.up\".over(\"datadog:enabled\",\"${var.project_account}\").by(\"host\").last(2).count_by_status()" monitor_thresholds { ok = 1 warning = 1 critical = 1 } notify_no_data = true no_data_timeframe = 2 new_host_delay = 300 renotify_interval = 0 timeout_h = 0 include_tags = true notify_audit = false tags = ["service:${var.project_name}"] }
TerraformでDatadogのモニタリング監視を構築・改善した話
resource "datadog_monitor" "container_web_cpu_utilization" { evaluation_delay = 0 include_tags = true message = <<-EOT {{#is_match "ecs_container_name.name" "prod"}} ### アラート内容 {{#is_warning}} - {{ecs_container_name}} のCPU使用率80%超過 {{/is_warning}} {{#is_alert}} - {{ecs_container_name}} のCPU使用率90%超過 {{/is_alert}} ### 対応 - AWSコンソールよりECSの{{ecs_container_name}}コンテナの状態を確認し、CPU使用率上昇の原因を特定すること https://ap-northeast-1.console.aws.amazon.com/ecs/v2/clusters/syukatsu-kaigi-jp/tasks?region=ap-northeast-1 ### 通知先 @slack-production-channel {{/is_match}} {{#is_match "ecs_container_name.name" "stg"}} ### アラート内容 {{#is_warning}} - {{ecs_container_name}} のCPU使用率80%超過 {{/is_warning}} {{#is_alert}} - {{ecs_container_name}} のCPU使用率90%超過 {{/is_alert}} ### 対応 - AWSコンソールよりECSの{{ecs_container_name}}コンテナの状態を確認し、CPU使用率上昇の原因を特定すること https://us-west-2.console.aws.amazon.com/ecs/v2/clusters/syukatsu-kaigi-stg-jp/tasks?region=us-west-2 ### 通知先 @slack-staging-channel {{/is_match}} EOT name = "CPU監視 {{ecs_container_name}}" new_group_delay = 0 no_data_timeframe = 0 notify_audit = false notify_by = [] notify_no_data = false priority = 0 query = "avg(last_30m):(avg:ecs.fargate.cpu.usage{ecs_container_name:*-container} by {ecs_container_name} / avg:ecs.fargate.cpu.limit{ecs_container_name:*-container} by {ecs_container_name}) * 0.0001 > 90" renotify_interval = 20 renotify_occurrences = 0 require_full_window = true tags = [ "service:<service_name>" ] timeout_h = 0 type = "query alert" monitor_thresholds { critical = "90" critical_recovery = "60" warning = "80" warning_recovery = "50" } }
DatadogのモニタをTerraformで管理してみる #Terraform - Qiita
resource "datadog_monitor" "rs_cpu_usage" { name = "[${var.environment}] CPU Usage is very high on {{host.name}}" type = "metric alert" message = "${var.notifies1} ${var.notifies2} ${var.notifies3}" query = "avg(last_5m):avg:azure.vm.percentage_cpu{type:microsoft.compute/virtualmachines} > 90" thresholds { "%" = "${var.detect_alert_count1}" warning = "80.0" critical = "90.0" } include_tags = "false" no_data_timeframe = "10" notify_no_data = "true" require_full_window = "false" }
[Datadog][Terraform]MonitorでMetricsのアラート設定をする – ADACHIN SERVER WIKI
ecs_alert.tf
resource "datadog_monitor" "ecs_cpu_alert" { name = "ecs_cpu_alert" type = "metric alert" query = "avg(last_5m):avg:aws.ecs.service.cpuutilization{clustername:hoge-${var.environment}} by {clustername} > 80" escalation_message = "ECS/Fargate CPU usage has exceeded 80%" notify_no_data = false notify_audit = false timeout_h = 1 include_tags = true monitor_thresholds { warning = 40 critical = 80 } message = <<-EOT @slack-alert-hoge {{#is_alert}} @slack-alert-hoge {{/is_alert}} {{#is_recovery}} @slack-alert-hoge {{/is_recovery}} EOT tags = [ "product:hoge", "service:hoge", "env:${var.environment}" ] } resource "datadog_monitor" "ecs_memory_alert" { name = "ecs_memory_alert" type = "metric alert" query = "avg(last_5m):avg:aws.ecs.service.memory_utilization{clustername:hoge-${var.environment}} by {servicename} > 80" escalation_message = "ECS/Fargate Memory usage has exceeded 80%" notify_no_data = false notify_audit = false timeout_h = 1 include_tags = true monitor_thresholds { warning = 70 critical = 80 } message = <<-EOT @slack-alert-hoge {{#is_alert}} @slack-alert-hoge {{/is_alert}} {{#is_recovery}} @slack-alert-hoge {{/is_recovery}} EOT tags = [ "product:hoge", "service:hoge", "env:${var.environment}" ] }
event_log_alert.tf
resource "datadog_monitor" "event_log_alert" { name = "event_log_alert" type = "event-v2 alert" query = "events(\"status:(error OR warn OR failed) AND hoge\").rollup(\"count\").last(\"5m\") > 0" notify_no_data = false notify_audit = false timeout_h = 1 monitor_thresholds { critical = 1 } message = <<-EOT @slack-alert-hoge {{#is_alert}} @slack-alert-hoge {{/is_alert}} {{#is_recovery}} @slack-alert-hoge {{/is_recovery}} EOT tags = [ "product:hoge", "service:hoge", "env:${var.environment}" ] }
cloudfront_5xx_error_rate_alert.tf
resource "datadog_monitor" "cloudfront_5xx_error_rate_alert" { name = "[hoge]cloudfront_5xx_error_rate_alert" type = "metric alert" query = "avg(last_5m):avg:aws.cloudfront.5xx_error_rate{distributionid:hoge OR distributionid:hoge OR distributionid:hoge} by {distributionid,aws_account} > 50" escalation_message = "CloudFront distributions have more than 50% 5xx error rate" notify_no_data = false notify_audit = false timeout_h = 1 include_tags = true monitor_thresholds { warning = 30 critical = 50 } message = <<-EOT @slack-alert-hoge {{#is_alert}} @slack-alert-hoge {{/is_alert}} {{#is_recovery}} @slack-alert-hoge {{/is_recovery}} EOT tags = [ "product:hoge", "service:hoge", "env:${var.environment}" ] }
一般向けサイト
ITエンジニア向けサイト
英語サイト
Portfolio
Copyright (c) 2025 システムパフォーマンス入門 All Rights Reserved.