POC: new native histogram extrapolation idea

Compensate for under/over estimating the left hand side (start side)
of buckets.
If the bucket level interpolation is inside the duration to
start, then extrapolate only to this extra duration on the left.
Otherwise if the duration to overall zero (from the overall count) is
inside the bucket's zero time, than the bucket's estimate might be too
much, calculate the increase from zero through the start time to get the
increase from the start to the first sample.

Signed-off-by: György Krajcsovits <gyorgy.krajcsovits@grafana.com>
This commit is contained in:
György Krajcsovits
2025-06-27 15:19:51 +02:00
parent df4f1df43f
commit 31270ac35e
2 changed files with 97 additions and 15 deletions

View File

@@ -149,6 +149,9 @@ func extrapolatedRate(vals []parser.Value, args parser.Expressions, enh *EvalNod
if durationToStart >= extrapolationThreshold {
durationToStart = averageDurationBetweenSamples / 2
}
durationToZero := math.NaN()
if isCounter && resultFloat > 0 && len(samples.Floats) > 0 && samples.Floats[0].F >= 0 {
// Counters cannot be negative. If we have any slope at all
// (i.e. resultFloat went up), we can extrapolate the zero point
@@ -156,30 +159,98 @@ func extrapolatedRate(vals []parser.Value, args parser.Expressions, enh *EvalNod
// than the durationToStart, we take the zero point as the start
// of the series, thereby avoiding extrapolation to negative
// counter values.
// TODO(beorn7): Do this for histograms, too.
durationToZero := sampledInterval * (samples.Floats[0].F / resultFloat)
if durationToZero < durationToStart {
durationToStart = durationToZero
}
durationToZero = sampledInterval * (samples.Floats[0].F / resultFloat)
}
if isCounter && resultHistogram != nil && resultHistogram.Count > 0 && len(samples.Histograms) > 0 && samples.Histograms[0].H.Count >= 0 {
// Counters cannot be negative. If we have any slope at all
// (i.e. resultHistogram.Count went up), we can extrapolate the zero
// point of the counter. If the duration to the zero point is shorter
// than the durationToStart, we take the zero point as the start
// of the series, thereby avoiding extrapolation to negative
// counter values.
durationToZero = sampledInterval * (samples.Histograms[0].H.Count / resultHistogram.Count)
}
if durationToZero < durationToStart {
durationToStart = durationToZero
}
extrapolateToInterval += durationToStart
if durationToEnd >= extrapolationThreshold {
durationToEnd = averageDurationBetweenSamples / 2
}
extrapolateToInterval += durationToEnd
factor := extrapolateToInterval / sampledInterval
if len(samples.Floats) > 0 {
extrapolateToInterval += durationToStart + durationToEnd
factor := extrapolateToInterval / sampledInterval
if isRate {
factor /= ms.Range.Seconds()
}
resultFloat *= factor
return append(enh.Out, Sample{F: resultFloat}), annos
}
extrapolateToIntervalRight := extrapolateToInterval + durationToEnd
factorRight := extrapolateToIntervalRight / sampledInterval
factor := (extrapolateToInterval + durationToStart + durationToEnd) / sampledInterval
if isRate {
factor /= ms.Range.Seconds()
}
if resultHistogram == nil {
resultFloat *= factor
} else {
resultHistogram.Mul(factor)
factorRight /= ms.Range.Seconds()
}
return append(enh.Out, Sample{F: resultFloat, H: resultHistogram}), annos
extrapolateBucket := func(firstValue, resultValue float64) float64 {
if firstValue < 0 || resultValue <= 0 {
return resultValue * factor
}
bucketDurationToStart := sampledInterval * (firstValue / resultValue)
switch {
case bucketDurationToStart < durationToStart:
// This bucket extrapolates to zero later (more to the right on the
// timeline) than the predicted start, avoid interpolating below
// zero.
factorLeft := bucketDurationToStart / sampledInterval
if isRate {
factorRight /= ms.Range.Seconds()
}
return resultValue*factorLeft + resultValue*factorRight
case durationToZero < bucketDurationToStart:
// This bucket extrapolates to zero earlier (more to the left on the
// timeline) than the overall count, avoid underestimating.
// Note if we don't know the duration to Zero, than that's NaN and
// this is skipped.
compensateLeft := durationToStart / durationToZero
if isRate {
compensateLeft /= ms.Range.Seconds()
}
return firstValue*compensateLeft + resultValue*factorRight
default:
return resultValue * factor
}
}
resultHistogram.ZeroCount = extrapolateBucket(samples.Histograms[0].H.ZeroCount, resultHistogram.ZeroCount)
resultHistogram.Count *= factor
resultHistogram.Sum *= factor
if len(resultHistogram.PositiveBuckets) != len(samples.Histograms[0].H.PositiveBuckets) {
panic("extrapolatedRate: Cannot handle different number of positive buckets")
}
if len(resultHistogram.NegativeBuckets) != len(samples.Histograms[0].H.NegativeBuckets) {
panic("extrapolatedRate: Cannot handle different number of negative buckets")
}
for i := range resultHistogram.PositiveBuckets {
resultHistogram.PositiveBuckets[i] = extrapolateBucket(samples.Histograms[0].H.PositiveBuckets[i], resultHistogram.PositiveBuckets[i])
}
for i := range resultHistogram.NegativeBuckets {
resultHistogram.NegativeBuckets[i] = extrapolateBucket(samples.Histograms[0].H.NegativeBuckets[i], resultHistogram.NegativeBuckets[i])
}
return append(enh.Out, Sample{H: resultHistogram}), annos
}
// histogramRate is a helper function for extrapolatedRate. It requires

View File

@@ -1373,3 +1373,14 @@ eval instant at 1m histogram_fraction(-Inf, +Inf, histogram_nan)
expect info msg: PromQL info: input to histogram_fraction has NaN observations, which are excluded from all fractions for metric name "histogram_nan"
{case="100% NaNs"} 0.0
{case="20% NaNs"} 0.8
clear
# Carefully chosen interval and range so that the zero point of the count is inside the
# interpolation period.
load 1m
metric {{schema:0 count:15.0 sum:25.0 buckets:[5 10]}} {{schema:0 count:2490.0 sum:75.0 buckets:[15 2475]}}x55
eval instant at 55m increase(metric[90m])
{} {{count:2490 sum:50.303030303030305 counter_reset_hint:gauge buckets:[15 2475]}}
# old result: {} {{count:2497.5 sum:50.45454545454545 counter_reset_hint:gauge buckets:[10.09090909090909 2487.409090909091]}}