Add histogram validation in remote-read and during reducing resolution (#17561)
Some checks failed
buf.build / lint and publish (push) Has been cancelled
CI / Go tests (push) Has been cancelled
CI / More Go tests (push) Has been cancelled
CI / Go tests with previous Go version (push) Has been cancelled
CI / UI tests (push) Has been cancelled
CI / Go tests on Windows (push) Has been cancelled
CI / Mixins tests (push) Has been cancelled
CI / Build Prometheus for common architectures (0) (push) Has been cancelled
CI / Build Prometheus for common architectures (1) (push) Has been cancelled
CI / Build Prometheus for common architectures (2) (push) Has been cancelled
CI / Build Prometheus for all architectures (0) (push) Has been cancelled
CI / Build Prometheus for all architectures (1) (push) Has been cancelled
CI / Build Prometheus for all architectures (10) (push) Has been cancelled
CI / Build Prometheus for all architectures (11) (push) Has been cancelled
CI / Build Prometheus for all architectures (2) (push) Has been cancelled
CI / Build Prometheus for all architectures (3) (push) Has been cancelled
CI / Build Prometheus for all architectures (4) (push) Has been cancelled
CI / Build Prometheus for all architectures (5) (push) Has been cancelled
CI / Build Prometheus for all architectures (6) (push) Has been cancelled
CI / Build Prometheus for all architectures (7) (push) Has been cancelled
CI / Build Prometheus for all architectures (8) (push) Has been cancelled
CI / Build Prometheus for all architectures (9) (push) Has been cancelled
CI / Report status of build Prometheus for all architectures (push) Has been cancelled
CI / Check generated parser (push) Has been cancelled
CI / golangci-lint (push) Has been cancelled
CI / fuzzing (push) Has been cancelled
CI / codeql (push) Has been cancelled
CI / Publish main branch artifacts (push) Has been cancelled
CI / Publish release artefacts (push) Has been cancelled
CI / Publish UI on npm Registry (push) Has been cancelled
Scorecards supply-chain security / Scorecards analysis (push) Has been cancelled

ReduceResolution is currently called before validation during
ingestion. This will cause a panic if there are not enough buckets in
the histogram. If there are too many buckets, the spurious buckets are
ignored, and therefore the error in the input histogram is masked.

Furthermore, invalid negative offsets might cause problems, too.

Therefore, we need to do some minimal validation in reduceResolution.
Fortunately, it is easy and shouldn't slow things down. Sadly, it
requires to return errors, which triggers a bunch of code changes.
Even here is a bright side, we can get rud of a few panics. (Remember:
Don't panic!)

In different news, we haven't done a full validation of histograms
read via remote-read. This is not so much a security concern (as you
can throw off Prometheus easily by feeding it bogus data via
remote-read) but more that remote-read sources might be makeshift and
could accidentally create invalid histograms. We really don't want to
panic in that case. So this commit does not only add a check of the
spans and buckets as needed for resolution reduction but also a full
validation during remote-read.

Signed-off-by: beorn7 <beorn@grafana.com>
This commit is contained in:
Björn Rabenstein
2025-11-21 00:22:24 +01:00
committed by GitHub
parent fc27eef43f
commit b8d19543b8
14 changed files with 395 additions and 115 deletions

View File

@@ -164,8 +164,8 @@ func (h *FloatHistogram) CopyToSchema(targetSchema int32) *FloatHistogram {
Sum: h.Sum,
}
c.PositiveSpans, c.PositiveBuckets = reduceResolution(h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, false, false)
c.NegativeSpans, c.NegativeBuckets = reduceResolution(h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, false, false)
c.PositiveSpans, c.PositiveBuckets = mustReduceResolution(h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, false, false)
c.NegativeSpans, c.NegativeBuckets = mustReduceResolution(h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, false, false)
return &c
}
@@ -393,13 +393,13 @@ func (h *FloatHistogram) Add(other *FloatHistogram) (res *FloatHistogram, counte
switch {
case other.Schema < h.Schema:
hPositiveSpans, hPositiveBuckets = reduceResolution(hPositiveSpans, hPositiveBuckets, h.Schema, other.Schema, false, true)
hNegativeSpans, hNegativeBuckets = reduceResolution(hNegativeSpans, hNegativeBuckets, h.Schema, other.Schema, false, true)
hPositiveSpans, hPositiveBuckets = mustReduceResolution(hPositiveSpans, hPositiveBuckets, h.Schema, other.Schema, false, true)
hNegativeSpans, hNegativeBuckets = mustReduceResolution(hNegativeSpans, hNegativeBuckets, h.Schema, other.Schema, false, true)
h.Schema = other.Schema
case other.Schema > h.Schema:
otherPositiveSpans, otherPositiveBuckets = reduceResolution(otherPositiveSpans, otherPositiveBuckets, other.Schema, h.Schema, false, false)
otherNegativeSpans, otherNegativeBuckets = reduceResolution(otherNegativeSpans, otherNegativeBuckets, other.Schema, h.Schema, false, false)
otherPositiveSpans, otherPositiveBuckets = mustReduceResolution(otherPositiveSpans, otherPositiveBuckets, other.Schema, h.Schema, false, false)
otherNegativeSpans, otherNegativeBuckets = mustReduceResolution(otherNegativeSpans, otherNegativeBuckets, other.Schema, h.Schema, false, false)
}
h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, false, hPositiveSpans, hPositiveBuckets, otherPositiveSpans, otherPositiveBuckets)
@@ -459,12 +459,12 @@ func (h *FloatHistogram) Sub(other *FloatHistogram) (res *FloatHistogram, counte
switch {
case other.Schema < h.Schema:
hPositiveSpans, hPositiveBuckets = reduceResolution(hPositiveSpans, hPositiveBuckets, h.Schema, other.Schema, false, true)
hNegativeSpans, hNegativeBuckets = reduceResolution(hNegativeSpans, hNegativeBuckets, h.Schema, other.Schema, false, true)
hPositiveSpans, hPositiveBuckets = mustReduceResolution(hPositiveSpans, hPositiveBuckets, h.Schema, other.Schema, false, true)
hNegativeSpans, hNegativeBuckets = mustReduceResolution(hNegativeSpans, hNegativeBuckets, h.Schema, other.Schema, false, true)
h.Schema = other.Schema
case other.Schema > h.Schema:
otherPositiveSpans, otherPositiveBuckets = reduceResolution(otherPositiveSpans, otherPositiveBuckets, other.Schema, h.Schema, false, false)
otherNegativeSpans, otherNegativeBuckets = reduceResolution(otherNegativeSpans, otherNegativeBuckets, other.Schema, h.Schema, false, false)
otherPositiveSpans, otherPositiveBuckets = mustReduceResolution(otherPositiveSpans, otherPositiveBuckets, other.Schema, h.Schema, false, false)
otherNegativeSpans, otherNegativeBuckets = mustReduceResolution(otherNegativeSpans, otherNegativeBuckets, other.Schema, h.Schema, false, false)
}
h.PositiveSpans, h.PositiveBuckets = addBuckets(h.Schema, h.ZeroThreshold, true, hPositiveSpans, hPositiveBuckets, otherPositiveSpans, otherPositiveBuckets)
@@ -1582,25 +1582,40 @@ func addCustomBucketsWithMismatches(
}
// ReduceResolution reduces the float histogram's spans, buckets into target schema.
// The target schema must be smaller than the current float histogram's schema.
// This will panic if the histogram has custom buckets or if the target schema is
// a custom buckets schema.
func (h *FloatHistogram) ReduceResolution(targetSchema int32) *FloatHistogram {
// An error is returned in the following cases:
// - The target schema is not smaller than the current histogram's schema.
// - The histogram has custom buckets.
// - The target schema is a custom buckets schema.
// - Any spans have an invalid offset.
// - The spans are inconsistent with the number of buckets.
func (h *FloatHistogram) ReduceResolution(targetSchema int32) error {
// Note that the follow three returns are not returning a
// histogram.Error because they are programming errors.
if h.UsesCustomBuckets() {
panic("cannot reduce resolution when there are custom buckets")
return errors.New("cannot reduce resolution when there are custom buckets")
}
if IsCustomBucketsSchema(targetSchema) {
panic("cannot reduce resolution to custom buckets schema")
return errors.New("cannot reduce resolution to custom buckets schema")
}
if targetSchema >= h.Schema {
panic(fmt.Errorf("cannot reduce resolution from schema %d to %d", h.Schema, targetSchema))
return fmt.Errorf("cannot reduce resolution from schema %d to %d", h.Schema, targetSchema)
}
h.PositiveSpans, h.PositiveBuckets = reduceResolution(h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, false, true)
h.NegativeSpans, h.NegativeBuckets = reduceResolution(h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, false, true)
var err error
if h.PositiveSpans, h.PositiveBuckets, err = reduceResolution(
h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, false, true,
); err != nil {
return err
}
if h.NegativeSpans, h.NegativeBuckets, err = reduceResolution(
h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, false, true,
); err != nil {
return err
}
h.Schema = targetSchema
return h
return nil
}
// checkSchemaAndBounds checks if two histograms are compatible because they

View File

@@ -4141,14 +4141,16 @@ func createRandomSpans(rng *rand.Rand, spanNum int32) ([]Span, []float64) {
func TestFloatHistogramReduceResolution(t *testing.T) {
tcs := map[string]struct {
origin *FloatHistogram
target *FloatHistogram
origin *FloatHistogram
targetSchema int32
target *FloatHistogram
errorMsg string
}{
"valid float histogram": {
origin: &FloatHistogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: -2, Length: 4},
{Offset: 0, Length: 0},
{Offset: 3, Length: 2},
},
@@ -4160,10 +4162,11 @@ func TestFloatHistogramReduceResolution(t *testing.T) {
},
NegativeBuckets: []float64{1, 3, 1, 2, 1, 1},
},
targetSchema: -1,
target: &FloatHistogram{
Schema: -1,
PositiveSpans: []Span{
{Offset: 0, Length: 3},
{Offset: -1, Length: 3},
{Offset: 1, Length: 1},
},
PositiveBuckets: []float64{1, 4, 2, 2},
@@ -4174,12 +4177,58 @@ func TestFloatHistogramReduceResolution(t *testing.T) {
NegativeBuckets: []float64{1, 4, 2, 2},
},
},
"not enough buckets": {
origin: &FloatHistogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: -2, Length: 4},
{Offset: 0, Length: 0},
{Offset: 3, Length: 2},
},
PositiveBuckets: []float64{1, 3, 1, 2, 1},
},
targetSchema: -1,
errorMsg: "have 5 buckets but spans need more: histogram spans specify different number of buckets than provided",
},
"too many buckets": {
origin: &FloatHistogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: -2, Length: 4},
{Offset: 0, Length: 0},
{Offset: 3, Length: 2},
},
PositiveBuckets: []float64{1, 3, 1, 2, 1, 1, 5},
},
targetSchema: -1,
errorMsg: "spans need 6 buckets, have 7 buckets: histogram spans specify different number of buckets than provided",
},
"negative offset": {
origin: &FloatHistogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: -2, Length: 4},
{Offset: -1, Length: 0},
{Offset: 3, Length: 2},
},
PositiveBuckets: []float64{1, 3, 1, 2, 1, 1},
},
targetSchema: -1,
errorMsg: "span number 2 with offset -1: histogram has a span whose offset is negative",
},
}
for _, tc := range tcs {
target := tc.origin.ReduceResolution(tc.target.Schema)
require.Equal(t, tc.target, target)
// Check that receiver histogram was mutated:
require.Equal(t, tc.target, tc.origin)
for tn, tc := range tcs {
t.Run(tn, func(t *testing.T) {
err := tc.origin.ReduceResolution(tc.targetSchema)
if tc.errorMsg != "" {
require.Equal(t, tc.errorMsg, err.Error())
// The returned error should be a histogram.Error.
require.ErrorAs(t, err, &Error{})
return
}
require.NoError(t, err)
require.Equal(t, tc.target, tc.origin)
})
}
}

View File

@@ -738,6 +738,8 @@ var exponentialBounds = [][]float64{
// deltas. Set it to false if the buckets contain absolute counts.
// Set inplace to true to reuse input slices and avoid allocations (otherwise
// new slices will be allocated for result).
// The functions returns an error if there are too many or too few buckets for the spans
// or if any span except the first has a negative offset.
func reduceResolution[IBC InternalBucketCount](
originSpans []Span,
originBuckets []IBC,
@@ -745,7 +747,7 @@ func reduceResolution[IBC InternalBucketCount](
targetSchema int32,
deltaBuckets bool,
inplace bool,
) ([]Span, []IBC) {
) ([]Span, []IBC, error) {
var (
targetSpans []Span // The spans in the target schema.
targetBuckets []IBC // The bucket counts in the target schema.
@@ -764,10 +766,18 @@ func reduceResolution[IBC InternalBucketCount](
targetBuckets = originBuckets[:0]
}
for _, span := range originSpans {
for n, span := range originSpans {
if n > 0 && span.Offset < 0 {
return nil, nil, fmt.Errorf("span number %d with offset %d: %w", n+1, span.Offset, ErrHistogramSpanNegativeOffset)
}
// Determine the index of the first bucket in this span.
bucketIdx += span.Offset
for j := 0; j < int(span.Length); j++ {
// Protect against too few buckets in the origin.
if bucketCountIdx >= len(originBuckets) {
return nil, nil, fmt.Errorf("have %d buckets but spans need more: %w", len(originBuckets), ErrHistogramSpansBucketsMismatch)
}
// Determine the index of the bucket in the target schema from the index in the original schema.
targetBucketIdx = targetIdx(bucketIdx, originSchema, targetSchema)
@@ -826,12 +836,33 @@ func reduceResolution[IBC InternalBucketCount](
targetBuckets = append(targetBuckets, originBuckets[bucketCountIdx])
}
}
bucketIdx++
bucketCountIdx++
}
}
if bucketCountIdx != len(originBuckets) {
return nil, nil, fmt.Errorf("spans need %d buckets, have %d buckets: %w", bucketCountIdx, len(originBuckets), ErrHistogramSpansBucketsMismatch)
}
return targetSpans, targetBuckets, nil
}
// mustReduceResolution works like reduceResolution, but panics instead of
// returning an error. Use mustReduceResolution if you are sure that the spans
// and buckets are valid.
func mustReduceResolution[IBC InternalBucketCount](
originSpans []Span,
originBuckets []IBC,
originSchema,
targetSchema int32,
deltaBuckets bool,
inplace bool,
) ([]Span, []IBC) {
targetSpans, targetBuckets, err := reduceResolution(
originSpans, originBuckets, originSchema, targetSchema, deltaBuckets, inplace,
)
if err != nil {
panic(err)
}
return targetSpans, targetBuckets
}

View File

@@ -142,7 +142,7 @@ func TestReduceResolutionHistogram(t *testing.T) {
for _, tc := range cases {
spansCopy, bucketsCopy := slices.Clone(tc.spans), slices.Clone(tc.buckets)
spans, buckets := reduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, true, false)
spans, buckets := mustReduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, true, false)
require.Equal(t, tc.expectedSpans, spans)
require.Equal(t, tc.expectedBuckets, buckets)
// Verify inputs were not mutated:
@@ -151,7 +151,7 @@ func TestReduceResolutionHistogram(t *testing.T) {
// Output slices reuse input slices:
const inplace = true
spans, buckets = reduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, true, inplace)
spans, buckets = mustReduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, true, inplace)
require.Equal(t, tc.expectedSpans, spans)
require.Equal(t, tc.expectedBuckets, buckets)
// Verify inputs were mutated which is now expected:
@@ -190,7 +190,7 @@ func TestReduceResolutionFloatHistogram(t *testing.T) {
for _, tc := range cases {
spansCopy, bucketsCopy := slices.Clone(tc.spans), slices.Clone(tc.buckets)
spans, buckets := reduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, false, false)
spans, buckets := mustReduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, false, false)
require.Equal(t, tc.expectedSpans, spans)
require.Equal(t, tc.expectedBuckets, buckets)
// Verify inputs were not mutated:
@@ -199,7 +199,7 @@ func TestReduceResolutionFloatHistogram(t *testing.T) {
// Output slices reuse input slices:
const inplace = true
spans, buckets = reduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, false, inplace)
spans, buckets = mustReduceResolution(tc.spans, tc.buckets, tc.schema, tc.targetSchema, false, inplace)
require.Equal(t, tc.expectedSpans, spans)
require.Equal(t, tc.expectedBuckets, buckets)
// Verify inputs were mutated which is now expected:

View File

@@ -14,6 +14,7 @@
package histogram
import (
"errors"
"fmt"
"math"
"slices"
@@ -617,26 +618,37 @@ func (c *cumulativeBucketIterator) At() Bucket[uint64] {
}
// ReduceResolution reduces the histogram's spans, buckets into target schema.
// The target schema must be smaller than the current histogram's schema.
// This will panic if the histogram has custom buckets or if the target schema is
// a custom buckets schema.
func (h *Histogram) ReduceResolution(targetSchema int32) *Histogram {
// An error is returned in the following cases:
// - The target schema is not smaller than the current histogram's schema.
// - The histogram has custom buckets.
// - The target schema is a custom buckets schema.
// - Any spans have an invalid offset.
// - The spans are inconsistent with the number of buckets.
func (h *Histogram) ReduceResolution(targetSchema int32) error {
// Note that the follow three returns are not returning a
// histogram.Error because they are programming errors.
if h.UsesCustomBuckets() {
panic("cannot reduce resolution when there are custom buckets")
return errors.New("cannot reduce resolution when there are custom buckets")
}
if IsCustomBucketsSchema(targetSchema) {
panic("cannot reduce resolution to custom buckets schema")
return errors.New("cannot reduce resolution to custom buckets schema")
}
if targetSchema >= h.Schema {
panic(fmt.Errorf("cannot reduce resolution from schema %d to %d", h.Schema, targetSchema))
return fmt.Errorf("cannot reduce resolution from schema %d to %d", h.Schema, targetSchema)
}
h.PositiveSpans, h.PositiveBuckets = reduceResolution(
var err error
if h.PositiveSpans, h.PositiveBuckets, err = reduceResolution(
h.PositiveSpans, h.PositiveBuckets, h.Schema, targetSchema, true, true,
)
h.NegativeSpans, h.NegativeBuckets = reduceResolution(
); err != nil {
return err
}
if h.NegativeSpans, h.NegativeBuckets, err = reduceResolution(
h.NegativeSpans, h.NegativeBuckets, h.Schema, targetSchema, true, true,
)
); err != nil {
return err
}
h.Schema = targetSchema
return h
return nil
}

View File

@@ -1719,14 +1719,16 @@ func BenchmarkHistogramValidation(b *testing.B) {
func TestHistogramReduceResolution(t *testing.T) {
tcs := map[string]struct {
origin *Histogram
target *Histogram
origin *Histogram
targetSchema int32
target *Histogram
errorMsg string
}{
"valid histogram": {
origin: &Histogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: 0, Length: 4},
{Offset: -2, Length: 4},
{Offset: 0, Length: 0},
{Offset: 3, Length: 2},
},
@@ -1738,10 +1740,11 @@ func TestHistogramReduceResolution(t *testing.T) {
},
NegativeBuckets: []int64{1, 2, -2, 1, -1, 0},
},
targetSchema: -1,
target: &Histogram{
Schema: -1,
PositiveSpans: []Span{
{Offset: 0, Length: 3},
{Offset: -1, Length: 3},
{Offset: 1, Length: 1},
},
PositiveBuckets: []int64{1, 3, -2, 0},
@@ -1752,12 +1755,58 @@ func TestHistogramReduceResolution(t *testing.T) {
NegativeBuckets: []int64{1, 3, -2, 0},
},
},
"not enough buckets": {
origin: &Histogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: -2, Length: 4},
{Offset: 0, Length: 0},
{Offset: 3, Length: 2},
},
PositiveBuckets: []int64{1, 2, -2, 1, -1},
},
targetSchema: -1,
errorMsg: "have 5 buckets but spans need more: histogram spans specify different number of buckets than provided",
},
"too many buckets": {
origin: &Histogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: -2, Length: 4},
{Offset: 0, Length: 0},
{Offset: 3, Length: 2},
},
PositiveBuckets: []int64{1, 2, -2, 1, -1, 0, 3},
},
targetSchema: -1,
errorMsg: "spans need 6 buckets, have 7 buckets: histogram spans specify different number of buckets than provided",
},
"negative offset": {
origin: &Histogram{
Schema: 0,
PositiveSpans: []Span{
{Offset: -2, Length: 4},
{Offset: -1, Length: 0},
{Offset: 3, Length: 2},
},
PositiveBuckets: []int64{1, 2, -2, 1, -1, 0},
},
targetSchema: -1,
errorMsg: "span number 2 with offset -1: histogram has a span whose offset is negative",
},
}
for _, tc := range tcs {
target := tc.origin.ReduceResolution(tc.target.Schema)
require.Equal(t, tc.target, target)
// Check that receiver histogram was mutated:
require.Equal(t, tc.target, tc.origin)
for tn, tc := range tcs {
t.Run(tn, func(t *testing.T) {
err := tc.origin.ReduceResolution(tc.targetSchema)
if tc.errorMsg != "" {
require.Equal(t, tc.errorMsg, err.Error())
// The returned error should be a histogram.Error.
require.ErrorAs(t, err, &Error{})
return
}
require.NoError(t, err)
require.Equal(t, tc.target, tc.origin)
})
}
}

View File

@@ -352,7 +352,7 @@ func (p *NHCBParser) swapExemplars() {
}
// processNHCB converts the collated classic histogram series to NHCB and caches the info
// to be returned to callers. Retruns true if the conversion was successful.
// to be returned to callers. Returns true if the conversion was successful.
func (p *NHCBParser) processNHCB() bool {
if p.state != stateCollecting {
return false