解析 Prometheus 用的指标

考虑到 Prometheus 可以直接解析收集的指标(暴露格式),这在数据分析等方面非常方便。为此,我试着进行了实现。

度量标准的解析

使用Prometheus提供的库,我们实施了解析指标的功能,如下所示。

package main

import (
	"fmt"
	"log"
	"strings"

	"github.com/prometheus/common/expfmt"
)

func main() {
    // メトリクス
	data := `
sample_total 10
test_metric{label="t1", proc="a1"} 123.4
`

	p := expfmt.TextParser{}
	// パース処理
	mf, err := p.TextToMetricFamilies(strings.NewReader(data))

	if err != nil {
		log.Fatal(err)
	}

	for k, v := range mf {
		fmt.Printf("key=%s, name=%s, type=%s \n", k, v.GetName(), v.GetType())
		// ラベルの取得と出力
		for _, l := range v.GetMetric()[0].Label {
			fmt.Printf("label name=%s, value=%s \n", l.GetName(), l.GetValue())
		}
		// 値の取得と出力
		fmt.Printf("untyped value=%f \n", v.GetMetric()[0].Untyped.GetValue())

		fmt.Println("---")
	}
}

执行结果如下所示。

执行结果
$ cd sample1

$ go run main.go
key=sample_total, name=sample_total, type=UNTYPED 
untyped value=10.000000 
---
key=test_metric, name=test_metric, type=UNTYPED 
label name=label, value=t1 
label name=proc, value=a1 
untyped value=123.400000 
---

提取和计算直方图。

然后,我从CoreDNS的度量指标中选择了以下形式的直方图数据,只针对这些数据计算增量,并进行了实现。

# HELP coredns_dns_request_duration_seconds Histogram of the time (in seconds) each request took per zone.
# TYPE coredns_dns_request_duration_seconds histogram
coredns_dns_request_duration_seconds_bucket{server="dns://:53",view="",zone=".",le="0.00025"} 22
coredns_dns_request_duration_seconds_bucket{server="dns://:53",view="",zone=".",le="0.0005"} 22
...省略

通过判断 GetType 的返回值是否为 dto.MetricType_HISTOGRAM,可以确定 Histogram。

此外,对于直方图的情况,名称将排除掉_bucket部分。

package main

import (
	"fmt"
	"log"
	"os"
	"strings"

	dto "github.com/prometheus/client_model/go"
	"github.com/prometheus/common/expfmt"
)
// ラベルの連結
func joinLabel(ps []*dto.LabelPair) string {
	rs := []string{}

	for _, p := range ps {
		rs = append(rs, fmt.Sprintf("%s:%s", p.GetName(), p.GetValue()))
	}

	return strings.Join(rs, ",")
}

func main() {
	file := os.Args[1]
	f, err := os.Open(file)

	if err != nil {
		log.Fatal(err)
	}
	defer f.Close()

	p := expfmt.TextParser{}

	mf, err := p.TextToMetricFamilies(f)

	if err != nil {
		log.Fatal(err)
	}

	for k, v := range mf {
		// Histogram の判定
		if v.GetType() == dto.MetricType_HISTOGRAM {
			for _, m := range v.GetMetric() {
				hist := m.GetHistogram()

				label := joinLabel(m.GetLabel())
				sum := hist.GetSampleSum() //合計の取得

				lastCount := uint64(0)

				for _, b := range hist.GetBucket() {
                    // 差分の算出
					count := b.GetCumulativeCount() - lastCount
					lastCount = b.GetCumulativeCount()

					if count > 0 {
						fmt.Printf(
							"name=%s, label=[%s], sum=%f, le=%f, count=%d \n",
							k, label, sum, b.GetUpperBound(), count,
						)
					}
				}
			}
		}
	}
}

执行结果如下。

执行结果
$ cd sample2

$ go run main.go coredns_metrics.txt
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=0.001000, count=1 
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=0.002000, count=1 
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=0.004000, count=2 
name=coredns_forward_request_duration_seconds, label=[rcode:NOERROR,to:192.168.5.3:53], sum=3.013131, le=1.024000, count=3 
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.000250, count=22 
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.001000, count=2 
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.004000, count=2 
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=0.008000, count=1 
name=coredns_dns_request_duration_seconds, label=[server:dns://:53,view:,zone:.], sum=4.017252, le=1.024000, count=4 
name=coredns_health_request_duration_seconds, label=[], sum=0.380166, le=0.000250, count=3 
name=coredns_health_request_duration_seconds, label=[], sum=0.380166, le=0.002500, count=680 
name=coredns_health_request_duration_seconds, label=[], sum=0.380166, le=0.025000, count=1 
name=coredns_dns_response_size_bytes, label=[proto:udp,server:dns://:53,view:,zone:.], sum=3501.000000, le=100.000000, count=11 
name=coredns_dns_response_size_bytes, label=[proto:udp,server:dns://:53,view:,zone:.], sum=3501.000000, le=200.000000, count=20 
name=coredns_dns_request_size_bytes, label=[proto:udp,server:dns://:53,view:,zone:.], sum=1957.000000, le=100.000000, count=31 
广告
将在 10 秒后关闭
bannerAds