Skip to content

Instantly share code, notes, and snippets.

@tiagoapimenta
Last active April 20, 2018 16:09
Show Gist options
  • Save tiagoapimenta/3ecb4162e701252f4b1041bc7f9315e9 to your computer and use it in GitHub Desktop.
Save tiagoapimenta/3ecb4162e701252f4b1041bc7f9315e9 to your computer and use it in GitHub Desktop.
This patch should be applyed on 0.14.0 tag version
diff -rupN a/config/config.go b/config/config.go
--- a/config/config.go 2018-02-13 06:13:44.000000000 -0200
+++ b/config/config.go 2018-04-20 13:07:29.802965461 -0300
@@ -382,6 +382,7 @@ func (c *GlobalConfig) UnmarshalYAML(unm
type Route struct {
Receiver string `yaml:"receiver,omitempty" json:"receiver,omitempty"`
GroupBy []model.LabelName `yaml:"group_by,omitempty" json:"group_by,omitempty"`
+ SortBy []model.LabelName `yaml:"sort_by,omitempty" json:"sort_by,omitempty"`
Match map[string]string `yaml:"match,omitempty" json:"match,omitempty"`
MatchRE map[string]Regexp `yaml:"match_re,omitempty" json:"match_re,omitempty"`
@@ -424,6 +425,14 @@ func (r *Route) UnmarshalYAML(unmarshal
groupBy[ln] = struct{}{}
}
+ sortBy := map[model.LabelName]struct{}{}
+ for _, ln := range r.SortBy {
+ if _, ok := sortBy[ln]; ok {
+ return fmt.Errorf("duplicated label %q in sort_by", ln)
+ }
+ sortBy[ln] = struct{}{}
+ }
+
if r.GroupInterval != nil && time.Duration(*r.GroupInterval) == time.Duration(0) {
return fmt.Errorf("group_interval cannot be zero")
}
diff -rupN a/config/config_test.go b/config/config_test.go
--- a/config/config_test.go 2018-02-13 06:13:44.000000000 -0200
+++ b/config/config_test.go 2018-04-20 13:07:29.802965461 -0300
@@ -341,6 +341,10 @@ func TestEmptyFieldsAndRegex(t *testing.
"cluster",
"service",
},
+ SortBy: []model.LabelName{
+ "instance",
+ "severity",
+ },
Routes: []*Route{
{
Receiver: "team-X-mails",
diff -rupN a/config/testdata/conf.empty-fields.yml b/config/testdata/conf.empty-fields.yml
--- a/config/testdata/conf.empty-fields.yml 2018-02-13 06:13:44.000000000 -0200
+++ b/config/testdata/conf.empty-fields.yml 2018-04-20 13:07:29.802965461 -0300
@@ -15,6 +15,7 @@ templates:
route:
group_by: ['alertname', 'cluster', 'service']
+ sort_by: ['instance', 'severity']
receiver: team-X-mails
routes:
diff -rupN a/config/testdata/conf.good.yml b/config/testdata/conf.good.yml
--- a/config/testdata/conf.good.yml 2018-02-13 06:13:44.000000000 -0200
+++ b/config/testdata/conf.good.yml 2018-04-20 13:07:29.802965461 -0300
@@ -23,6 +23,10 @@ route:
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
+
+ # The labels by which an notification inside a group would be sorted inside
+ # the group.
+ sort_by: ['instance', 'severity']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
diff -rupN a/config/testdata/conf.opsgenie-default-apikey.yml b/config/testdata/conf.opsgenie-default-apikey.yml
--- a/config/testdata/conf.opsgenie-default-apikey.yml 2018-02-13 06:13:44.000000000 -0200
+++ b/config/testdata/conf.opsgenie-default-apikey.yml 2018-04-20 13:07:29.802965461 -0300
@@ -3,6 +3,7 @@ global:
route:
group_by: ['alertname', 'cluster', 'service']
+ sort_by: ['instance', 'severity']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
diff -rupN a/config/testdata/conf.opsgenie-no-apikey.yml b/config/testdata/conf.opsgenie-no-apikey.yml
--- a/config/testdata/conf.opsgenie-no-apikey.yml 2018-02-13 06:13:44.000000000 -0200
+++ b/config/testdata/conf.opsgenie-no-apikey.yml 2018-04-20 13:07:29.802965461 -0300
@@ -1,5 +1,6 @@
route:
group_by: ['alertname', 'cluster', 'service']
+ sort_by: ['instance', 'severity']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
diff -rupN a/config/testdata/conf.victorops-default-apikey.yml b/config/testdata/conf.victorops-default-apikey.yml
--- a/config/testdata/conf.victorops-default-apikey.yml 2018-02-13 06:13:44.000000000 -0200
+++ b/config/testdata/conf.victorops-default-apikey.yml 2018-04-20 13:07:29.802965461 -0300
@@ -3,6 +3,7 @@ global:
route:
group_by: ['alertname', 'cluster', 'service']
+ sort_by: ['instance', 'severity']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
diff -rupN a/config/testdata/conf.victorops-no-apikey.yml b/config/testdata/conf.victorops-no-apikey.yml
--- a/config/testdata/conf.victorops-no-apikey.yml 2018-02-13 06:13:44.000000000 -0200
+++ b/config/testdata/conf.victorops-no-apikey.yml 2018-04-20 13:07:29.802965461 -0300
@@ -1,5 +1,6 @@
route:
group_by: ['alertname', 'cluster', 'service']
+ sort_by: ['instance', 'severity']
group_wait: 30s
group_interval: 5m
repeat_interval: 3h
diff -rupN a/dispatch/dispatch.go b/dispatch/dispatch.go
--- a/dispatch/dispatch.go 2018-02-13 06:13:44.000000000 -0200
+++ b/dispatch/dispatch.go 2018-04-20 13:07:29.802965461 -0300
@@ -424,6 +424,31 @@ func (ag *aggrGroup) flush(notify func(.
alertsSlice = append(alertsSlice, alert)
}
+ sortLabels := make([]model.LabelName, 0)
+ if ag.opts != nil {
+ sortLabels = (*ag.opts).SortBy
+ }
+
+ sort.SliceStable(alertsSlice, func(i, j int) bool {
+ // Look at labels on sort_by
+ for _, override_key := range sortLabels {
+ key_i, ok_i := alertsSlice[i].Labels[override_key]
+ if !ok_i {
+ return false
+ }
+ key_j, ok_j := alertsSlice[j].Labels[override_key]
+ if !ok_j {
+ return true
+ }
+
+ if key_i != key_j {
+ return key_i < key_j
+ }
+ }
+
+ return alertsSlice[i].Labels.Before(alertsSlice[j].Labels)
+ })
+
ag.mtx.Unlock()
level.Debug(ag.logger).Log("msg", "Flushing", "alerts", fmt.Sprintf("%v", alertsSlice))
diff -rupN a/dispatch/route.go b/dispatch/route.go
--- a/dispatch/route.go 2018-02-13 06:13:44.000000000 -0200
+++ b/dispatch/route.go 2018-04-20 13:07:29.802965461 -0300
@@ -34,6 +34,9 @@ var DefaultRouteOpts = RouteOpts{
GroupBy: map[model.LabelName]struct{}{
model.AlertNameLabel: struct{}{},
},
+ SortBy: []model.LabelName{
+ model.InstanceLabel,
+ },
}
// A Route is a node that contains definitions of how to handle alerts.
@@ -71,6 +74,9 @@ func NewRoute(cr *config.Route, parent *
opts.GroupBy[ln] = struct{}{}
}
}
+ if cr.SortBy != nil {
+ opts.SortBy = cr.SortBy
+ }
if cr.GroupWait != nil {
opts.GroupWait = time.Duration(*cr.GroupWait)
}
@@ -159,6 +165,7 @@ type RouteOpts struct {
// What labels to group alerts by for notifications.
GroupBy map[model.LabelName]struct{}
+ SortBy []model.LabelName
// How long to wait to group matching alerts before sending
// a notificaiton
@@ -169,10 +176,14 @@ type RouteOpts struct {
func (ro *RouteOpts) String() string {
var labels []model.LabelName
+ var sortLabels []model.LabelName
for ln := range ro.GroupBy {
labels = append(labels, ln)
}
- return fmt.Sprintf("<RouteOpts send_to:%q group_by:%q timers:%q|%q>", ro.Receiver, labels, ro.GroupWait, ro.GroupInterval)
+ for _, ln := range ro.SortBy {
+ sortLabels = append(sortLabels, ln)
+ }
+ return fmt.Sprintf("<RouteOpts send_to:%q group_by:%q sort_by:%q timers:%q|%q>", ro.Receiver, labels, sortLabels, ro.GroupWait, ro.GroupInterval)
}
// MarshalJSON returns a JSON representation of the routing options.
@@ -180,11 +191,13 @@ func (ro *RouteOpts) MarshalJSON() ([]by
v := struct {
Receiver string `json:"receiver"`
GroupBy model.LabelNames `json:"groupBy"`
+ SortBy model.LabelNames `json:"sortBy"`
GroupWait time.Duration `json:"groupWait"`
GroupInterval time.Duration `json:"groupInterval"`
RepeatInterval time.Duration `json:"repeatInterval"`
}{
Receiver: ro.Receiver,
+ SortBy: ro.SortBy,
GroupWait: ro.GroupWait,
GroupInterval: ro.GroupInterval,
RepeatInterval: ro.RepeatInterval,
diff -rupN a/dispatch/route_test.go b/dispatch/route_test.go
--- a/dispatch/route_test.go 2018-02-13 06:13:44.000000000 -0200
+++ b/dispatch/route_test.go 2018-04-20 13:07:29.802965461 -0300
@@ -40,6 +40,7 @@ routes:
receiver: 'notify-testing'
group_by: []
+ sort_by: []
- match:
env: "production"
@@ -64,6 +65,7 @@ routes:
owner: 'team-(B|C)'
group_by: ['foo', 'bar']
+ sort_by: ['lipsum']
group_wait: 2m
receiver: 'notify-BC'
@@ -110,6 +112,7 @@ routes:
{
Receiver: "notify-A",
GroupBy: def.GroupBy,
+ SortBy: def.SortBy,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
@@ -126,6 +129,7 @@ routes:
{
Receiver: "notify-A",
GroupBy: def.GroupBy,
+ SortBy: def.SortBy,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
@@ -141,6 +145,7 @@ routes:
{
Receiver: "notify-BC",
GroupBy: lset("foo", "bar"),
+ SortBy: []model.LabelName{"lipsum"},
GroupWait: 2 * time.Minute,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
@@ -157,6 +162,7 @@ routes:
{
Receiver: "notify-testing",
GroupBy: lset(),
+ SortBy: []model.LabelName{},
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
@@ -173,6 +179,7 @@ routes:
{
Receiver: "notify-productionA",
GroupBy: def.GroupBy,
+ SortBy: def.SortBy,
GroupWait: 1 * time.Minute,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
@@ -180,6 +187,7 @@ routes:
{
Receiver: "notify-productionB",
GroupBy: lset("job"),
+ SortBy: def.SortBy,
GroupWait: 30 * time.Second,
GroupInterval: 5 * time.Minute,
RepeatInterval: 1 * time.Hour,
@@ -198,6 +206,7 @@ routes:
{
Receiver: "notify-def",
GroupBy: lset("role"),
+ SortBy: def.SortBy,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
@@ -214,6 +223,7 @@ routes:
{
Receiver: "notify-testing",
GroupBy: lset("role"),
+ SortBy: def.SortBy,
GroupWait: def.GroupWait,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
@@ -231,6 +241,7 @@ routes:
{
Receiver: "notify-testing",
GroupBy: lset("role"),
+ SortBy: def.SortBy,
GroupWait: 2 * time.Minute,
GroupInterval: def.GroupInterval,
RepeatInterval: def.RepeatInterval,
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment