Handle infinity buckets conservatively

Signed-off-by: Linas Medziunas <linas.medziunas@gmail.com>
2026-04-30 23:11:34 +08:00 · 2026-01-27 09:37:27 +02:00
parent e326490882
commit 001fc1bfea
2 changed files with 218 additions and 52 deletions
--- a/promql/engine.go
+++ b/promql/engine.go
@@ -3140,39 +3140,56 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 {
 	panic(fmt.Errorf("operator %q not allowed for Scalar operations", op))
 }

-func handleInfinityBuckets(b histogram.Bucket[float64], le float64) (float64, float64) {
-	var underCount, bucketMidpoint float64
-
+func handleInfinityBuckets(isUpperTrim bool, b histogram.Bucket[float64], rhs float64) (underCount, bucketMidpoint float64) {
+	// Case 1: Bucket with lower bound (-Inf, upper]
 	if math.IsInf(b.Lower, -1) {
-		switch {
-		case le >= b.Upper:
-			// le is greater than or equal to upper bound. Full count applies.
-			underCount = b.Count
-			bucketMidpoint = b.Upper
-		case le < 0:
-			// le is negative and less than zero — nothing to keep.
-			underCount = b.Count * 0.5
-			bucketMidpoint = b.Upper
-		default:
-			// Interpolating with treated lower bound as 0 (linear).
-			fraction := le / b.Upper
-			underCount = b.Count * fraction
-			bucketMidpoint = le / 2
+		// TRIM_UPPER (</) - remove values greater than rhs
+		if isUpperTrim {
+			if rhs >= b.Upper {
+				// As the rhs is greater than the upper bound, we keep the entire current bucket.
+				return b.Count, 0
+			}
+			if rhs >= 0 && b.Upper > rhs && !math.IsInf(b.Upper, 1) {
+				// If upper is finite and positive, we treat lower as 0 (despite it de facto being -Inf).
+				// This is only possible with NHCB, so we can always use linear interpolation.
+				return b.Count * rhs / b.Upper, (rhs + b.Upper) / 2
+			}
+			// Otherwise, we are targeting a valid trim, but as we don't know the exact distribution of values that belongs to an infinite bucket, we need to remove the entire bucket.
+			return 0, b.Upper
 		}
-		return underCount, bucketMidpoint
+		// TRIM_LOWER (>/) - remove values less than rhs
+		if rhs <= b.Lower {
+			// Impossible to happen because the lower bound is -Inf. Returning the entire current bucket.
+			return b.Count, 0
+		}
+		if rhs >= 0 && b.Upper > rhs && !math.IsInf(b.Upper, 1) {
+			// If upper is finite and positive, we treat lower as 0 (despite it de facto being -Inf).
+			// This is only possible with NHCB, so we can always use linear interpolation.
+			return b.Count * (1 - rhs/b.Upper), rhs / 2
+		}
+		// Otherwise, we are targeting a valid trim, but as we don't know the exact distribution of values that belongs to an infinite bucket, we need to remove the entire bucket.
+		return 0, b.Upper
 	}

+	// Case 2: Bucket with upper bound [lower, +Inf)
 	if math.IsInf(b.Upper, 1) {
-		if le <= b.Lower {
-			underCount = 0
-			bucketMidpoint = b.Lower
-		} else {
-			underCount = b.Count * 0.5
-			bucketMidpoint = b.Lower
+		if isUpperTrim {
+			// TRIM_UPPER (</) - remove values greater than rhs.
+			// We don't care about lower here, because:
+			//   when rhs >= lower and the bucket extends to +Inf, some values in this bucket could be > rhs, so we conservatively remove the entire bucket;
+			//   when rhs < lower, all values in this bucket are >= lower > rhs, so all values should be removed.
+			return 0, b.Lower
 		}
-		return underCount, bucketMidpoint
+		// TRIM_LOWER (>/) - remove values less than rhs.
+		if rhs <= b.Lower {
+			// rhs <= lower: all values in this bucket are >= lower >= rhs, so we keep the entire bucket.
+			return b.Count, 0
+		}
+		// lower < rhs: we are inside the infinity bucket, but as we don't know the exact distribution of values, we conservatively remove the entire bucket.
+		return 0, b.Lower
 	}
-	return underCount, bucketMidpoint
+
+	panic(fmt.Errorf("one of the buckets must be infinite for handleInfinityBuckets, got %v", b))
 }

 // computeSplit calculates the portion of the bucket's count <= rhs (trim point).
@@ -3204,14 +3221,14 @@ func computeSplit(b histogram.Bucket[float64], rhs float64, isPositive, isLinear
 	return b.Count * fraction
 }

-func computeBucketTrim(op parser.ItemType, b histogram.Bucket[float64], rhs float64, isPositive, isCustomBucket bool) (float64, float64) {
+func computeBucketTrim(b histogram.Bucket[float64], rhs float64, isUpperTrim, isPositive, isCustomBucket bool) (float64, float64) {
 	if math.IsInf(b.Lower, -1) || math.IsInf(b.Upper, 1) {
-		return handleInfinityBuckets(b, rhs)
+		return handleInfinityBuckets(isUpperTrim, b, rhs)
 	}

 	underCount := computeSplit(b, rhs, isPositive, isCustomBucket)

-	if op == parser.TRIM_UPPER {
+	if isUpperTrim {
 		product := math.Abs(b.Lower) * math.Abs(rhs)
 		return underCount, computeMidpoint(b, product, isCustomBucket, isPositive)
 	}
@@ -3221,15 +3238,15 @@ func computeBucketTrim(op parser.ItemType, b histogram.Bucket[float64], rhs floa
 }

 // Helper function to trim native histogram buckets.
-func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser.ItemType) {
+// TODO: move trimHistogram to model/histogram/float_histogram.go (making it a method of FloatHistogram).
+func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTrim bool) {
 	updatedCount := 0.0
 	origSum := trimmedHist.Sum
 	removedSum := 0.0
 	hasPositive, hasNegative := false, false
 	isCustomBucket := trimmedHist.UsesCustomBuckets()

-	switch op {
-	case parser.TRIM_UPPER:
+	if isUpperTrim {
 		// Calculate the fraction to keep for buckets that contain the trim value.
 		// For TRIM_UPPER, we keep observations below the trim point (rhs).
 		// Example: histogram </ float.
@@ -3251,7 +3268,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
 				hasPositive = true
 			}

-			keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
+			keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket)

 			switch {
 			case bucket.Upper <= rhs:
@@ -3281,7 +3298,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
 			}
 			hasNegative = true

-			keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
+			keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket)

 			switch {
 			case bucket.Upper <= rhs:
@@ -3298,8 +3315,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
 				trimmedHist.NegativeBuckets[i] = 0
 			}
 		}
-
-	case parser.TRIM_LOWER:
+	} else { // !isUpperTrim
 		// For TRIM_LOWER, we keep observations above the trim point (rhs).
 		// Example: histogram >/ float.
 		for i, iter := 0, trimmedHist.PositiveBucketIterator(); iter.Next(); i++ {
@@ -3320,7 +3336,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
 				hasPositive = true
 			}

-			keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
+			keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket)

 			switch {
 			case bucket.Lower >= rhs:
@@ -3347,7 +3363,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
 			}
 			hasNegative = true

-			keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
+			keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket)

 			switch {
 			case bucket.Lower >= rhs:
@@ -3369,22 +3385,26 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
 	// Handle the zero count bucket.
 	if trimmedHist.ZeroCount > 0 {
 		keepCount := computeSplit(trimmedHist.ZeroBucket(), rhs, true, true)
-		if op == parser.TRIM_LOWER {
+		if !isUpperTrim {
 			keepCount = trimmedHist.ZeroCount - keepCount
 		}
 		trimmedHist.ZeroCount = keepCount
 		updatedCount += keepCount
 	}

-	// Apply new sum.
-	newSum := origSum - removedSum
+	newSum := 0.0
+	if updatedCount != 0 {
+		// Calculate new sum only when there are at least some observations remaining.
+		// Otherwise, make it zero.
+		newSum = origSum - removedSum

-	// Clamp correction.
-	if !hasNegative && newSum < 0 {
-		newSum = 0
-	}
-	if !hasPositive && newSum > 0 {
-		newSum = 0
+		// Clamp correction.
+		if !hasNegative && newSum < 0 {
+			newSum = 0
+		}
+		if !hasPositive && newSum > 0 {
+			newSum = 0
+		}
 	}

 	// Update the histogram's count and sum.
@@ -3459,11 +3479,11 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram
 				return 0, hlhs.Copy().Div(rhs).Compact(0), true, nil, nil
 			case parser.TRIM_UPPER:
 				trimmedHist := hlhs.Copy()
-				trimHistogram(trimmedHist, rhs, op)
+				trimHistogram(trimmedHist, rhs, true)
 				return 0, trimmedHist, true, nil, nil
 			case parser.TRIM_LOWER:
 				trimmedHist := hlhs.Copy()
-				trimHistogram(trimmedHist, rhs, op)
+				trimHistogram(trimmedHist, rhs, false)
 				return 0, trimmedHist, true, nil, nil
 			case parser.ADD, parser.SUB, parser.POW, parser.MOD, parser.EQLC, parser.NEQ, parser.GTR, parser.LSS, parser.GTE, parser.LTE, parser.ATAN2:
 				return 0, nil, false, nil, annotations.NewIncompatibleTypesInBinOpInfo("histogram", parser.ItemTypeStr[op], "float", pos)
--- a/promql/promqltest/testdata/native_histograms.test
+++ b/promql/promqltest/testdata/native_histograms.test
@@ -1924,9 +1924,27 @@ eval instant at 1m cbh >/ 15
 eval instant at 1m cbh </ 7.5
  cbh{} {{schema:-53 count:4 sum:27.5 custom_values:[5 10 15 20] buckets:[1 3]}}

-# Custom buckets: trim uses half of overflow bucket in interpolation if cutoff is above last bucket
+# Custom buckets: trim drops +Inf bucket entirely even if cutoff is above its lower bound
 eval instant at 1m cbh </ 50
-  cbh{} {{schema:-53 count:14.5 sum:162.5 custom_values:[5 10 15 20] buckets:[1 6 4 3 0.5]}}
+  cbh{} {{schema:-53 count:14 sum:152.5 custom_values:[5 10 15 20] buckets:[1 6 4 3]}}
+
+eval instant at 1m cbh </ -Inf
+  cbh{} {{schema:-53 custom_values:[5 10 15 20]}}
+
+eval instant at 1m cbh >/ +Inf
+  cbh{} {{schema:-53 custom_values:[5 10 15 20]}}
+
+eval instant at 1m cbh </ +Inf
+  cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}}
+
+eval instant at 1m cbh >/ -Inf
+  cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}}
+
+eval instant at 1m cbh >/ 0
+  cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}}
+
+eval instant at 1m cbh </ 0
+  cbh {{schema:-53 custom_values:[5 10 15 20]}}

 # Custom buckets: negative values
 eval instant at 1m cbh_has_neg </ 2
@@ -1936,7 +1954,7 @@ eval instant at 1m cbh_has_neg </ -4
  cbh_has_neg{} {{schema:-53 count:2.4 sum:6.5 custom_values:[-10 5 10 15 20] buckets:[2 0.4]}}

 eval instant at 1m cbh_has_neg </ -15
-  cbh_has_neg{} {{schema:-53 count:1 sum:17.5 custom_values:[-10 5 10 15 20] buckets:[1]}}
+  cbh_has_neg{} {{schema:-53 custom_values:[-10 5 10 15 20]}}

 # Zero Bucket Edge Case: Interpolation Around Zero
 eval instant at 1m zero_bucket </ -0.005
@@ -1945,4 +1963,132 @@ eval instant at 1m zero_bucket </ -0.005
 eval instant at 1m zero_bucket >/ 0
  zero_bucket{} {{count:7.5 sum:-18.77081528017131 z_bucket:2.5 z_bucket_w:0.01 buckets:[2 3]}}

+load 1m
+  cbh_one_bucket {{schema:-53 sum:100.0 count:100 buckets:[100]}}
+
+# Skip [-Inf; +Inf] bucket (100).
+eval instant at 1m cbh_one_bucket </ 10.0
+  cbh_one_bucket {{schema:-53 sum:0.0 count:0 buckets:[0]}}
+
+# Skip [-Inf; +Inf] bucket (100).
+eval instant at 1m cbh_one_bucket >/ 10.0
+  cbh_one_bucket {{schema:-53 sum:0.0 count:0 buckets:[0]}}
+
+# Keep [-Inf; +Inf] bucket (100).
+eval instant at 1m cbh_one_bucket </ +Inf
+  cbh_one_bucket {{schema:-53 sum:100 count:100 buckets:[100]}}
+
+# Skip [-Inf; +Inf] bucket (100).
+eval instant at 1m cbh_one_bucket >/ +Inf
+  cbh_one_bucket {{schema:-53 sum:0 count:0 buckets:[0]}}
+
+# Keep [-Inf; +Inf] bucket (100).
+eval instant at 1m cbh_one_bucket >/ -Inf
+  cbh_one_bucket {{schema:-53 sum:100 count:100 buckets:[100]}}
+
+# Skip [-Inf; +Inf] bucket (100).
+eval instant at 1m cbh_one_bucket </ -Inf
+  cbh_one_bucket {{schema:-53 sum:0 count:0 buckets:[0]}}
+
+
+load 1m
+  cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[1 100]}}
+
+# Skip (0; +Inf] bucket (100).
+eval instant at 1m cbh_two_buckets_split_at_zero </ 10.0
+  cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:1 custom_values:[0] buckets:[1 0]}}
+
+# Skip (0; +Inf] bucket (100).
+eval instant at 1m cbh_two_buckets_split_at_zero </ 0.0
+  cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:1 custom_values:[0] buckets:[1 0]}}
+
+# Skip both buckets (1, 100).
+eval instant at 1m cbh_two_buckets_split_at_zero </ -10.0
+  cbh_two_buckets_split_at_zero {{schema:-53 sum:0.0 count:0 custom_values:[0] buckets:[0 0]}}
+
+# Skip [-Inf, 0] bucket (1).
+eval instant at 1m cbh_two_buckets_split_at_zero >/ -10.0
+  cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}}
+
+# Skip [-Inf, 0] bucket (1).
+eval instant at 1m cbh_two_buckets_split_at_zero >/ 0.0
+  cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_zero >/ 10.0
+  cbh_two_buckets_split_at_zero {{schema:-53 sum:0.0 count:0 custom_values:[0] buckets:[0 0]}}
+
+
+load 1m
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:33 count:101 custom_values:[5] buckets:[1 100]}}
+
+# Skip (5; +Inf] bucket (100).
+eval instant at 1m cbh_two_buckets_split_at_positive </ 10.0
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:-467.0 count:1 custom_values:[5] buckets:[1 0]}}
+
+# Skip (5; +Inf] bucket (100) and 2/5 of [-Inf, 5] bucket (0.4 * 1).
+eval instant at 1m cbh_two_buckets_split_at_positive </ 2.0
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:-469.1 count:0.4 custom_values:[5] buckets:[0.4 0]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_positive </ 0.0
+  cbh_two_buckets_split_at_positive {{schema:-53 custom_values:[5]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_positive </ -10.0
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:0.0 count:0 custom_values:[5] buckets:[0 0]}}
+
+# Skip [-Inf, 5] bucket (1).
+eval instant at 1m cbh_two_buckets_split_at_positive >/ -10.0
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:28.0 count:100 custom_values:[5] buckets:[0 100]}}
+
+# Keep both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_positive >/ 0.0
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:33.0 count:101 custom_values:[5] buckets:[1 100]}}
+
+# Keep (5, 100] bucket (100) and 3/5 of [-Inf, 5] bucket (0.6 * 1).
+eval instant at 1m cbh_two_buckets_split_at_positive >/ 2.0
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:32.6 count:100.6 custom_values:[5] buckets:[0.6 100]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_positive >/ 10.0
+  cbh_two_buckets_split_at_positive {{schema:-53 sum:0.0 count:0 custom_values:[5] buckets:[0 0]}}
+
+
+load 1m
+  cbh_two_buckets_split_at_negative {{schema:-53 sum:33 count:101 custom_values:[-5] buckets:[1 100]}}
+
+# Skip (-5; +Inf] bucket (100).
+eval instant at 1m cbh_two_buckets_split_at_negative </ 10.0
+  cbh_two_buckets_split_at_negative {{schema:-53 sum:533.0 count:1 custom_values:[-5] buckets:[1 0]}}
+
+# Skip (-5; +Inf] bucket (100).
+eval instant at 1m cbh_two_buckets_split_at_negative </ 0.0
+  cbh_two_buckets_split_at_negative {{schema:-53 count:1 sum:533 custom_values:[-5] buckets:[1 0]}}
+
+# Skip (-5; +Inf] bucket (100).
+eval instant at 1m cbh_two_buckets_split_at_negative </ -2.0
+  cbh_two_buckets_split_at_negative {{schema:-53 sum:533.0 count:1 custom_values:[-5] buckets:[1 0]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_negative </ -10.0
+  cbh_two_buckets_split_at_negative {{schema:-53 sum:0.0 count:0 custom_values:[-5] buckets:[0 0]}}
+
+# Skip [-Inf, -5] bucket (1).
+eval instant at 1m cbh_two_buckets_split_at_negative >/ -10.0
+  cbh_two_buckets_split_at_negative {{schema:-53 sum:38.0 count:100 custom_values:[-5] buckets:[0 100]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_negative >/ -2.0
+  cbh_two_buckets_split_at_negative {{schema:-53 custom_values:[-5]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_negative >/ 0.0
+  cbh_two_buckets_split_at_negative {{schema:-53 custom_values:[-5]}}
+
+# Skip both buckets (1 and 100).
+eval instant at 1m cbh_two_buckets_split_at_negative >/ 10.0
+  cbh_two_buckets_split_at_negative {{schema:-53 sum:0.0 count:0 custom_values:[-5] buckets:[0 0]}}
+
+
 clear