Handle infinity buckets conservatively

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
This commit is contained in:
Linas Medziunas
2026-01-27 09:37:27 +02:00
parent e326490882
commit 001fc1bfea
2 changed files with 218 additions and 52 deletions

View File

@@ -3140,39 +3140,56 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 {
panic(fmt.Errorf("operator %q not allowed for Scalar operations", op))
}
func handleInfinityBuckets(b histogram.Bucket[float64], le float64) (float64, float64) {
var underCount, bucketMidpoint float64
func handleInfinityBuckets(isUpperTrim bool, b histogram.Bucket[float64], rhs float64) (underCount, bucketMidpoint float64) {
// Case 1: Bucket with lower bound (-Inf, upper]
if math.IsInf(b.Lower, -1) {
switch {
case le >= b.Upper:
// le is greater than or equal to upper bound. Full count applies.
underCount = b.Count
bucketMidpoint = b.Upper
case le < 0:
// le is negative and less than zero — nothing to keep.
underCount = b.Count * 0.5
bucketMidpoint = b.Upper
default:
// Interpolating with treated lower bound as 0 (linear).
fraction := le / b.Upper
underCount = b.Count * fraction
bucketMidpoint = le / 2
// TRIM_UPPER (</) - remove values greater than rhs
if isUpperTrim {
if rhs >= b.Upper {
// As the rhs is greater than the upper bound, we keep the entire current bucket.
return b.Count, 0
}
if rhs >= 0 && b.Upper > rhs && !math.IsInf(b.Upper, 1) {
// If upper is finite and positive, we treat lower as 0 (despite it de facto being -Inf).
// This is only possible with NHCB, so we can always use linear interpolation.
return b.Count * rhs / b.Upper, (rhs + b.Upper) / 2
}
// Otherwise, we are targeting a valid trim, but as we don't know the exact distribution of values that belongs to an infinite bucket, we need to remove the entire bucket.
return 0, b.Upper
}
return underCount, bucketMidpoint
// TRIM_LOWER (>/) - remove values less than rhs
if rhs <= b.Lower {
// Impossible to happen because the lower bound is -Inf. Returning the entire current bucket.
return b.Count, 0
}
if rhs >= 0 && b.Upper > rhs && !math.IsInf(b.Upper, 1) {
// If upper is finite and positive, we treat lower as 0 (despite it de facto being -Inf).
// This is only possible with NHCB, so we can always use linear interpolation.
return b.Count * (1 - rhs/b.Upper), rhs / 2
}
// Otherwise, we are targeting a valid trim, but as we don't know the exact distribution of values that belongs to an infinite bucket, we need to remove the entire bucket.
return 0, b.Upper
}
// Case 2: Bucket with upper bound [lower, +Inf)
if math.IsInf(b.Upper, 1) {
if le <= b.Lower {
underCount = 0
bucketMidpoint = b.Lower
} else {
underCount = b.Count * 0.5
bucketMidpoint = b.Lower
if isUpperTrim {
// TRIM_UPPER (</) - remove values greater than rhs.
// We don't care about lower here, because:
// when rhs >= lower and the bucket extends to +Inf, some values in this bucket could be > rhs, so we conservatively remove the entire bucket;
// when rhs < lower, all values in this bucket are >= lower > rhs, so all values should be removed.
return 0, b.Lower
}
return underCount, bucketMidpoint
// TRIM_LOWER (>/) - remove values less than rhs.
if rhs <= b.Lower {
// rhs <= lower: all values in this bucket are >= lower >= rhs, so we keep the entire bucket.
return b.Count, 0
}
// lower < rhs: we are inside the infinity bucket, but as we don't know the exact distribution of values, we conservatively remove the entire bucket.
return 0, b.Lower
}
return underCount, bucketMidpoint
panic(fmt.Errorf("one of the buckets must be infinite for handleInfinityBuckets, got %v", b))
}
// computeSplit calculates the portion of the bucket's count <= rhs (trim point).
@@ -3204,14 +3221,14 @@ func computeSplit(b histogram.Bucket[float64], rhs float64, isPositive, isLinear
return b.Count * fraction
}
func computeBucketTrim(op parser.ItemType, b histogram.Bucket[float64], rhs float64, isPositive, isCustomBucket bool) (float64, float64) {
func computeBucketTrim(b histogram.Bucket[float64], rhs float64, isUpperTrim, isPositive, isCustomBucket bool) (float64, float64) {
if math.IsInf(b.Lower, -1) || math.IsInf(b.Upper, 1) {
return handleInfinityBuckets(b, rhs)
return handleInfinityBuckets(isUpperTrim, b, rhs)
}
underCount := computeSplit(b, rhs, isPositive, isCustomBucket)
if op == parser.TRIM_UPPER {
if isUpperTrim {
product := math.Abs(b.Lower) * math.Abs(rhs)
return underCount, computeMidpoint(b, product, isCustomBucket, isPositive)
}
@@ -3221,15 +3238,15 @@ func computeBucketTrim(op parser.ItemType, b histogram.Bucket[float64], rhs floa
}
// Helper function to trim native histogram buckets.
func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser.ItemType) {
// TODO: move trimHistogram to model/histogram/float_histogram.go (making it a method of FloatHistogram).
func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTrim bool) {
updatedCount := 0.0
origSum := trimmedHist.Sum
removedSum := 0.0
hasPositive, hasNegative := false, false
isCustomBucket := trimmedHist.UsesCustomBuckets()
switch op {
case parser.TRIM_UPPER:
if isUpperTrim {
// Calculate the fraction to keep for buckets that contain the trim value.
// For TRIM_UPPER, we keep observations below the trim point (rhs).
// Example: histogram </ float.
@@ -3251,7 +3268,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
hasPositive = true
}
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket)
switch {
case bucket.Upper <= rhs:
@@ -3281,7 +3298,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
}
hasNegative = true
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket)
switch {
case bucket.Upper <= rhs:
@@ -3298,8 +3315,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
trimmedHist.NegativeBuckets[i] = 0
}
}
case parser.TRIM_LOWER:
} else { // !isUpperTrim
// For TRIM_LOWER, we keep observations above the trim point (rhs).
// Example: histogram >/ float.
for i, iter := 0, trimmedHist.PositiveBucketIterator(); iter.Next(); i++ {
@@ -3320,7 +3336,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
hasPositive = true
}
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket)
switch {
case bucket.Lower >= rhs:
@@ -3347,7 +3363,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
}
hasNegative = true
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket)
switch {
case bucket.Lower >= rhs:
@@ -3369,22 +3385,26 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
// Handle the zero count bucket.
if trimmedHist.ZeroCount > 0 {
keepCount := computeSplit(trimmedHist.ZeroBucket(), rhs, true, true)
if op == parser.TRIM_LOWER {
if !isUpperTrim {
keepCount = trimmedHist.ZeroCount - keepCount
}
trimmedHist.ZeroCount = keepCount
updatedCount += keepCount
}
// Apply new sum.
newSum := origSum - removedSum
newSum := 0.0
if updatedCount != 0 {
// Calculate new sum only when there are at least some observations remaining.
// Otherwise, make it zero.
newSum = origSum - removedSum
// Clamp correction.
if !hasNegative && newSum < 0 {
newSum = 0
}
if !hasPositive && newSum > 0 {
newSum = 0
// Clamp correction.
if !hasNegative && newSum < 0 {
newSum = 0
}
if !hasPositive && newSum > 0 {
newSum = 0
}
}
// Update the histogram's count and sum.
@@ -3459,11 +3479,11 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram
return 0, hlhs.Copy().Div(rhs).Compact(0), true, nil, nil
case parser.TRIM_UPPER:
trimmedHist := hlhs.Copy()
trimHistogram(trimmedHist, rhs, op)
trimHistogram(trimmedHist, rhs, true)
return 0, trimmedHist, true, nil, nil
case parser.TRIM_LOWER:
trimmedHist := hlhs.Copy()
trimHistogram(trimmedHist, rhs, op)
trimHistogram(trimmedHist, rhs, false)
return 0, trimmedHist, true, nil, nil
case parser.ADD, parser.SUB, parser.POW, parser.MOD, parser.EQLC, parser.NEQ, parser.GTR, parser.LSS, parser.GTE, parser.LTE, parser.ATAN2:
return 0, nil, false, nil, annotations.NewIncompatibleTypesInBinOpInfo("histogram", parser.ItemTypeStr[op], "float", pos)

View File

@@ -1924,9 +1924,27 @@ eval instant at 1m cbh >/ 15
eval instant at 1m cbh </ 7.5
cbh{} {{schema:-53 count:4 sum:27.5 custom_values:[5 10 15 20] buckets:[1 3]}}
# Custom buckets: trim uses half of overflow bucket in interpolation if cutoff is above last bucket
# Custom buckets: trim drops +Inf bucket entirely even if cutoff is above its lower bound
eval instant at 1m cbh </ 50
cbh{} {{schema:-53 count:14.5 sum:162.5 custom_values:[5 10 15 20] buckets:[1 6 4 3 0.5]}}
cbh{} {{schema:-53 count:14 sum:152.5 custom_values:[5 10 15 20] buckets:[1 6 4 3]}}
eval instant at 1m cbh </ -Inf
cbh{} {{schema:-53 custom_values:[5 10 15 20]}}
eval instant at 1m cbh >/ +Inf
cbh{} {{schema:-53 custom_values:[5 10 15 20]}}
eval instant at 1m cbh </ +Inf
cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}}
eval instant at 1m cbh >/ -Inf
cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}}
eval instant at 1m cbh >/ 0
cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}}
eval instant at 1m cbh </ 0
cbh {{schema:-53 custom_values:[5 10 15 20]}}
# Custom buckets: negative values
eval instant at 1m cbh_has_neg </ 2
@@ -1936,7 +1954,7 @@ eval instant at 1m cbh_has_neg </ -4
cbh_has_neg{} {{schema:-53 count:2.4 sum:6.5 custom_values:[-10 5 10 15 20] buckets:[2 0.4]}}
eval instant at 1m cbh_has_neg </ -15
cbh_has_neg{} {{schema:-53 count:1 sum:17.5 custom_values:[-10 5 10 15 20] buckets:[1]}}
cbh_has_neg{} {{schema:-53 custom_values:[-10 5 10 15 20]}}
# Zero Bucket Edge Case: Interpolation Around Zero
eval instant at 1m zero_bucket </ -0.005
@@ -1945,4 +1963,132 @@ eval instant at 1m zero_bucket </ -0.005
eval instant at 1m zero_bucket >/ 0
zero_bucket{} {{count:7.5 sum:-18.77081528017131 z_bucket:2.5 z_bucket_w:0.01 buckets:[2 3]}}
load 1m
cbh_one_bucket {{schema:-53 sum:100.0 count:100 buckets:[100]}}
# Skip [-Inf; +Inf] bucket (100).
eval instant at 1m cbh_one_bucket </ 10.0
cbh_one_bucket {{schema:-53 sum:0.0 count:0 buckets:[0]}}
# Skip [-Inf; +Inf] bucket (100).
eval instant at 1m cbh_one_bucket >/ 10.0
cbh_one_bucket {{schema:-53 sum:0.0 count:0 buckets:[0]}}
# Keep [-Inf; +Inf] bucket (100).
eval instant at 1m cbh_one_bucket </ +Inf
cbh_one_bucket {{schema:-53 sum:100 count:100 buckets:[100]}}
# Skip [-Inf; +Inf] bucket (100).
eval instant at 1m cbh_one_bucket >/ +Inf
cbh_one_bucket {{schema:-53 sum:0 count:0 buckets:[0]}}
# Keep [-Inf; +Inf] bucket (100).
eval instant at 1m cbh_one_bucket >/ -Inf
cbh_one_bucket {{schema:-53 sum:100 count:100 buckets:[100]}}
# Skip [-Inf; +Inf] bucket (100).
eval instant at 1m cbh_one_bucket </ -Inf
cbh_one_bucket {{schema:-53 sum:0 count:0 buckets:[0]}}
load 1m
cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[1 100]}}
# Skip (0; +Inf] bucket (100).
eval instant at 1m cbh_two_buckets_split_at_zero </ 10.0
cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:1 custom_values:[0] buckets:[1 0]}}
# Skip (0; +Inf] bucket (100).
eval instant at 1m cbh_two_buckets_split_at_zero </ 0.0
cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:1 custom_values:[0] buckets:[1 0]}}
# Skip both buckets (1, 100).
eval instant at 1m cbh_two_buckets_split_at_zero </ -10.0
cbh_two_buckets_split_at_zero {{schema:-53 sum:0.0 count:0 custom_values:[0] buckets:[0 0]}}
# Skip [-Inf, 0] bucket (1).
eval instant at 1m cbh_two_buckets_split_at_zero >/ -10.0
cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}}
# Skip [-Inf, 0] bucket (1).
eval instant at 1m cbh_two_buckets_split_at_zero >/ 0.0
cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_zero >/ 10.0
cbh_two_buckets_split_at_zero {{schema:-53 sum:0.0 count:0 custom_values:[0] buckets:[0 0]}}
load 1m
cbh_two_buckets_split_at_positive {{schema:-53 sum:33 count:101 custom_values:[5] buckets:[1 100]}}
# Skip (5; +Inf] bucket (100).
eval instant at 1m cbh_two_buckets_split_at_positive </ 10.0
cbh_two_buckets_split_at_positive {{schema:-53 sum:-467.0 count:1 custom_values:[5] buckets:[1 0]}}
# Skip (5; +Inf] bucket (100) and 2/5 of [-Inf, 5] bucket (0.4 * 1).
eval instant at 1m cbh_two_buckets_split_at_positive </ 2.0
cbh_two_buckets_split_at_positive {{schema:-53 sum:-469.1 count:0.4 custom_values:[5] buckets:[0.4 0]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_positive </ 0.0
cbh_two_buckets_split_at_positive {{schema:-53 custom_values:[5]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_positive </ -10.0
cbh_two_buckets_split_at_positive {{schema:-53 sum:0.0 count:0 custom_values:[5] buckets:[0 0]}}
# Skip [-Inf, 5] bucket (1).
eval instant at 1m cbh_two_buckets_split_at_positive >/ -10.0
cbh_two_buckets_split_at_positive {{schema:-53 sum:28.0 count:100 custom_values:[5] buckets:[0 100]}}
# Keep both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_positive >/ 0.0
cbh_two_buckets_split_at_positive {{schema:-53 sum:33.0 count:101 custom_values:[5] buckets:[1 100]}}
# Keep (5, 100] bucket (100) and 3/5 of [-Inf, 5] bucket (0.6 * 1).
eval instant at 1m cbh_two_buckets_split_at_positive >/ 2.0
cbh_two_buckets_split_at_positive {{schema:-53 sum:32.6 count:100.6 custom_values:[5] buckets:[0.6 100]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_positive >/ 10.0
cbh_two_buckets_split_at_positive {{schema:-53 sum:0.0 count:0 custom_values:[5] buckets:[0 0]}}
load 1m
cbh_two_buckets_split_at_negative {{schema:-53 sum:33 count:101 custom_values:[-5] buckets:[1 100]}}
# Skip (-5; +Inf] bucket (100).
eval instant at 1m cbh_two_buckets_split_at_negative </ 10.0
cbh_two_buckets_split_at_negative {{schema:-53 sum:533.0 count:1 custom_values:[-5] buckets:[1 0]}}
# Skip (-5; +Inf] bucket (100).
eval instant at 1m cbh_two_buckets_split_at_negative </ 0.0
cbh_two_buckets_split_at_negative {{schema:-53 count:1 sum:533 custom_values:[-5] buckets:[1 0]}}
# Skip (-5; +Inf] bucket (100).
eval instant at 1m cbh_two_buckets_split_at_negative </ -2.0
cbh_two_buckets_split_at_negative {{schema:-53 sum:533.0 count:1 custom_values:[-5] buckets:[1 0]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_negative </ -10.0
cbh_two_buckets_split_at_negative {{schema:-53 sum:0.0 count:0 custom_values:[-5] buckets:[0 0]}}
# Skip [-Inf, -5] bucket (1).
eval instant at 1m cbh_two_buckets_split_at_negative >/ -10.0
cbh_two_buckets_split_at_negative {{schema:-53 sum:38.0 count:100 custom_values:[-5] buckets:[0 100]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_negative >/ -2.0
cbh_two_buckets_split_at_negative {{schema:-53 custom_values:[-5]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_negative >/ 0.0
cbh_two_buckets_split_at_negative {{schema:-53 custom_values:[-5]}}
# Skip both buckets (1 and 100).
eval instant at 1m cbh_two_buckets_split_at_negative >/ 10.0
cbh_two_buckets_split_at_negative {{schema:-53 sum:0.0 count:0 custom_values:[-5] buckets:[0 0]}}
clear