解析 Prometheus 用的指标
考虑到 Prometheus 可以直接解析收集的指标(暴露格式),这在数据分析等方面非常方便。为此,我试着进行了实现。
度量标准的解析
使用Prometheus提供的库,我们实施了解析指标的功能,如下所示。
package main
import (
"fmt"
"log"
"strings"
"github.com/prometheus/common/expfmt"
)
func main() {
// メトリクス
data := `
sample_total 10
test_metric{label="t1", proc="a1"} 123.4
`
p := expfmt.TextParser{}
// パース処理
mf, err := p.TextToMetricFamilies(strings.NewReader(data))
if err != nil {
log.Fatal(err)
}
for k, v := range mf {
fmt.Printf("key=%s, name=%s, type=%s \n", k, v.GetName(), v.GetType())
// ラベルの取得と出力
for _, l := range v.GetMetric()[0].Label {
fmt.Printf("label name=%s, value=%s \n", l.GetName(), l.GetValue())
}
// 値の取得と出力
fmt.Printf("untyped value=%f \n", v.GetMetric()[0].Untyped.GetValue())
fmt.Println("---")
}
}
执行结果如下所示。
执行结果
$ cd sample1
$ go run main.go
key=sample_total, name=sample_total, type=UNTYPED
untyped value=10.000000
---
key=test_metric, name=test_metric, type=UNTYPED
label name=label, value=t1
label name=proc, value=a1
untyped value=123.400000
---
提取和计算直方图。
然后,我从CoreDNS的度量指标中选择了以下形式的直方图数据,只针对这些数据计算增量,并进行了实现。
# HELP coredns_dns_request_duration_seconds Histogram of the time (in seconds) each request took per zone.
# TYPE coredns_dns_request_duration_seconds histogram
coredns_dns_request_duration_seconds_bucket{server="dns://:53",view="",zone=".",le="0.00025"} 22
coredns_dns_request_duration_seconds_bucket{server="dns://:53",view="",zone=".",le="0.0005"} 22
...省略
通过判断 GetType 的返回值是否为 dto.MetricType_HISTOGRAM,可以确定 Histogram。
此外,对于直方图的情况,名称将排除掉_bucket部分。
package main
import (
"fmt"
"log"
"os"
"strings"
dto "github.com/prometheus/client_model/go"
"github.com/prometheus/common/expfmt"
)
// ラベルの連結
func joinLabel(ps []*dto.LabelPair) string {
rs := []string{}
for _, p := range ps {
rs = append(rs, fmt.Sprintf("%s:%s", p.GetName(), p.GetValue()))
}
return strings.Join(rs, ",")
}
func main() {
file := os.Args[1]
f, err := os.Open(file)
if err != nil {
log.Fatal(err)
}
defer f.Close()
p := expfmt.TextParser{}
mf, err := p.TextToMetricFamilies(f)
if err != nil {
log.Fatal(err)
}
for k, v := range mf {
// Histogram の判定
if v.GetType() == dto.MetricType_HISTOGRAM {
for _, m := range v.GetMetric() {
hist := m.GetHistogram()
label := joinLabel(m.GetLabel())
sum := hist.GetSampleSum() //合計の取得
lastCount := uint64(0)
for _, b := range hist.GetBucket() {
// 差分の算出
count := b.GetCumulativeCount() - lastCount
lastCount = b.GetCumulativeCount()
if count > 0 {
fmt.Printf(
"name=%s, label=[%s], sum=%f, le=%f, count=%d \n",
k, label, sum, b.GetUpperBound(), count,
)
}
}
}
}
}
}
执行结果如下。
执行结果
$ cd sample2
$ go run main.go coredns_metrics.txt
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=0.001000, count=1
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=0.002000, count=1
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=0.004000, count=2
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=1.024000, count=3
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.000250, count=22
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.001000, count=2
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.004000, count=2
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.008000, count=1
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=1.024000, count=4
name=coredns_health_request_duration_seconds, label=[], sum=0.380166, le=0.000250, count=3
name=coredns_health_request_duration_seconds, label=[], sum=0.380166, le=0.002500, count=680
name=coredns_health_request_duration_seconds, label=[], sum=0.380166, le=0.025000, count=1
name=coredns_dns_response_size_bytes, label=[proto:udp,server:dns://:53,view:,zone:.], sum=3501.000000, le=100.000000, count=11
name=coredns_dns_response_size_bytes, label=[proto:udp,server:dns://:53,view:,zone:.], sum=3501.000000, le=200.000000, count=20
name=coredns_dns_request_size_bytes, label=[proto:udp,server:dns://:53,view:,zone:.], sum=1957.000000, le=100.000000, count=31