diff --git a/promql/engine.go b/promql/engine.go index 24aac9d905..8c4fffd508 100644 --- a/promql/engine.go +++ b/promql/engine.go @@ -3140,39 +3140,56 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 { panic(fmt.Errorf("operator %q not allowed for Scalar operations", op)) } -func handleInfinityBuckets(b histogram.Bucket[float64], le float64) (float64, float64) { - var underCount, bucketMidpoint float64 - +func handleInfinityBuckets(isUpperTrim bool, b histogram.Bucket[float64], rhs float64) (underCount, bucketMidpoint float64) { + // Case 1: Bucket with lower bound (-Inf, upper] if math.IsInf(b.Lower, -1) { - switch { - case le >= b.Upper: - // le is greater than or equal to upper bound. Full count applies. - underCount = b.Count - bucketMidpoint = b.Upper - case le < 0: - // le is negative and less than zero — nothing to keep. - underCount = b.Count * 0.5 - bucketMidpoint = b.Upper - default: - // Interpolating with treated lower bound as 0 (linear). - fraction := le / b.Upper - underCount = b.Count * fraction - bucketMidpoint = le / 2 + // TRIM_UPPER (= b.Upper { + // As the rhs is greater than the upper bound, we keep the entire current bucket. + return b.Count, 0 + } + if rhs >= 0 && b.Upper > rhs && !math.IsInf(b.Upper, 1) { + // If upper is finite and positive, we treat lower as 0 (despite it de facto being -Inf). + // This is only possible with NHCB, so we can always use linear interpolation. + return b.Count * rhs / b.Upper, (rhs + b.Upper) / 2 + } + // Otherwise, we are targeting a valid trim, but as we don't know the exact distribution of values that belongs to an infinite bucket, we need to remove the entire bucket. + return 0, b.Upper } - return underCount, bucketMidpoint + // TRIM_LOWER (>/) - remove values less than rhs + if rhs <= b.Lower { + // Impossible to happen because the lower bound is -Inf. Returning the entire current bucket. + return b.Count, 0 + } + if rhs >= 0 && b.Upper > rhs && !math.IsInf(b.Upper, 1) { + // If upper is finite and positive, we treat lower as 0 (despite it de facto being -Inf). + // This is only possible with NHCB, so we can always use linear interpolation. + return b.Count * (1 - rhs/b.Upper), rhs / 2 + } + // Otherwise, we are targeting a valid trim, but as we don't know the exact distribution of values that belongs to an infinite bucket, we need to remove the entire bucket. + return 0, b.Upper } + // Case 2: Bucket with upper bound [lower, +Inf) if math.IsInf(b.Upper, 1) { - if le <= b.Lower { - underCount = 0 - bucketMidpoint = b.Lower - } else { - underCount = b.Count * 0.5 - bucketMidpoint = b.Lower + if isUpperTrim { + // TRIM_UPPER (= lower and the bucket extends to +Inf, some values in this bucket could be > rhs, so we conservatively remove the entire bucket; + // when rhs < lower, all values in this bucket are >= lower > rhs, so all values should be removed. + return 0, b.Lower } - return underCount, bucketMidpoint + // TRIM_LOWER (>/) - remove values less than rhs. + if rhs <= b.Lower { + // rhs <= lower: all values in this bucket are >= lower >= rhs, so we keep the entire bucket. + return b.Count, 0 + } + // lower < rhs: we are inside the infinity bucket, but as we don't know the exact distribution of values, we conservatively remove the entire bucket. + return 0, b.Lower } - return underCount, bucketMidpoint + + panic(fmt.Errorf("one of the buckets must be infinite for handleInfinityBuckets, got %v", b)) } // computeSplit calculates the portion of the bucket's count <= rhs (trim point). @@ -3204,14 +3221,14 @@ func computeSplit(b histogram.Bucket[float64], rhs float64, isPositive, isLinear return b.Count * fraction } -func computeBucketTrim(op parser.ItemType, b histogram.Bucket[float64], rhs float64, isPositive, isCustomBucket bool) (float64, float64) { +func computeBucketTrim(b histogram.Bucket[float64], rhs float64, isUpperTrim, isPositive, isCustomBucket bool) (float64, float64) { if math.IsInf(b.Lower, -1) || math.IsInf(b.Upper, 1) { - return handleInfinityBuckets(b, rhs) + return handleInfinityBuckets(isUpperTrim, b, rhs) } underCount := computeSplit(b, rhs, isPositive, isCustomBucket) - if op == parser.TRIM_UPPER { + if isUpperTrim { product := math.Abs(b.Lower) * math.Abs(rhs) return underCount, computeMidpoint(b, product, isCustomBucket, isPositive) } @@ -3221,15 +3238,15 @@ func computeBucketTrim(op parser.ItemType, b histogram.Bucket[float64], rhs floa } // Helper function to trim native histogram buckets. -func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser.ItemType) { +// TODO: move trimHistogram to model/histogram/float_histogram.go (making it a method of FloatHistogram). +func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, isUpperTrim bool) { updatedCount := 0.0 origSum := trimmedHist.Sum removedSum := 0.0 hasPositive, hasNegative := false, false isCustomBucket := trimmedHist.UsesCustomBuckets() - switch op { - case parser.TRIM_UPPER: + if isUpperTrim { // Calculate the fraction to keep for buckets that contain the trim value. // For TRIM_UPPER, we keep observations below the trim point (rhs). // Example: histogram / float. for i, iter := 0, trimmedHist.PositiveBucketIterator(); iter.Next(); i++ { @@ -3320,7 +3336,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser hasPositive = true } - keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, true, isCustomBucket) + keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, true, isCustomBucket) switch { case bucket.Lower >= rhs: @@ -3347,7 +3363,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser } hasNegative = true - keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, false, isCustomBucket) + keepCount, bucketMidpoint := computeBucketTrim(bucket, rhs, isUpperTrim, false, isCustomBucket) switch { case bucket.Lower >= rhs: @@ -3369,22 +3385,26 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser // Handle the zero count bucket. if trimmedHist.ZeroCount > 0 { keepCount := computeSplit(trimmedHist.ZeroBucket(), rhs, true, true) - if op == parser.TRIM_LOWER { + if !isUpperTrim { keepCount = trimmedHist.ZeroCount - keepCount } trimmedHist.ZeroCount = keepCount updatedCount += keepCount } - // Apply new sum. - newSum := origSum - removedSum + newSum := 0.0 + if updatedCount != 0 { + // Calculate new sum only when there are at least some observations remaining. + // Otherwise, make it zero. + newSum = origSum - removedSum - // Clamp correction. - if !hasNegative && newSum < 0 { - newSum = 0 - } - if !hasPositive && newSum > 0 { - newSum = 0 + // Clamp correction. + if !hasNegative && newSum < 0 { + newSum = 0 + } + if !hasPositive && newSum > 0 { + newSum = 0 + } } // Update the histogram's count and sum. @@ -3459,11 +3479,11 @@ func vectorElemBinop(op parser.ItemType, lhs, rhs float64, hlhs, hrhs *histogram return 0, hlhs.Copy().Div(rhs).Compact(0), true, nil, nil case parser.TRIM_UPPER: trimmedHist := hlhs.Copy() - trimHistogram(trimmedHist, rhs, op) + trimHistogram(trimmedHist, rhs, true) return 0, trimmedHist, true, nil, nil case parser.TRIM_LOWER: trimmedHist := hlhs.Copy() - trimHistogram(trimmedHist, rhs, op) + trimHistogram(trimmedHist, rhs, false) return 0, trimmedHist, true, nil, nil case parser.ADD, parser.SUB, parser.POW, parser.MOD, parser.EQLC, parser.NEQ, parser.GTR, parser.LSS, parser.GTE, parser.LTE, parser.ATAN2: return 0, nil, false, nil, annotations.NewIncompatibleTypesInBinOpInfo("histogram", parser.ItemTypeStr[op], "float", pos) diff --git a/promql/promqltest/testdata/native_histograms.test b/promql/promqltest/testdata/native_histograms.test index 8e3856f666..66542d6747 100644 --- a/promql/promqltest/testdata/native_histograms.test +++ b/promql/promqltest/testdata/native_histograms.test @@ -1924,9 +1924,27 @@ eval instant at 1m cbh >/ 15 eval instant at 1m cbh / +Inf + cbh{} {{schema:-53 custom_values:[5 10 15 20]}} + +eval instant at 1m cbh / -Inf + cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}} + +eval instant at 1m cbh >/ 0 + cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}} + +eval instant at 1m cbh / 0 zero_bucket{} {{count:7.5 sum:-18.77081528017131 z_bucket:2.5 z_bucket_w:0.01 buckets:[2 3]}} +load 1m + cbh_one_bucket {{schema:-53 sum:100.0 count:100 buckets:[100]}} + +# Skip [-Inf; +Inf] bucket (100). +eval instant at 1m cbh_one_bucket / 10.0 + cbh_one_bucket {{schema:-53 sum:0.0 count:0 buckets:[0]}} + +# Keep [-Inf; +Inf] bucket (100). +eval instant at 1m cbh_one_bucket / +Inf + cbh_one_bucket {{schema:-53 sum:0 count:0 buckets:[0]}} + +# Keep [-Inf; +Inf] bucket (100). +eval instant at 1m cbh_one_bucket >/ -Inf + cbh_one_bucket {{schema:-53 sum:100 count:100 buckets:[100]}} + +# Skip [-Inf; +Inf] bucket (100). +eval instant at 1m cbh_one_bucket / -10.0 + cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}} + +# Skip [-Inf, 0] bucket (1). +eval instant at 1m cbh_two_buckets_split_at_zero >/ 0.0 + cbh_two_buckets_split_at_zero {{schema:-53 sum:33.0 count:100 custom_values:[0] buckets:[0 100]}} + +# Skip both buckets (1 and 100). +eval instant at 1m cbh_two_buckets_split_at_zero >/ 10.0 + cbh_two_buckets_split_at_zero {{schema:-53 sum:0.0 count:0 custom_values:[0] buckets:[0 0]}} + + +load 1m + cbh_two_buckets_split_at_positive {{schema:-53 sum:33 count:101 custom_values:[5] buckets:[1 100]}} + +# Skip (5; +Inf] bucket (100). +eval instant at 1m cbh_two_buckets_split_at_positive / -10.0 + cbh_two_buckets_split_at_positive {{schema:-53 sum:28.0 count:100 custom_values:[5] buckets:[0 100]}} + +# Keep both buckets (1 and 100). +eval instant at 1m cbh_two_buckets_split_at_positive >/ 0.0 + cbh_two_buckets_split_at_positive {{schema:-53 sum:33.0 count:101 custom_values:[5] buckets:[1 100]}} + +# Keep (5, 100] bucket (100) and 3/5 of [-Inf, 5] bucket (0.6 * 1). +eval instant at 1m cbh_two_buckets_split_at_positive >/ 2.0 + cbh_two_buckets_split_at_positive {{schema:-53 sum:32.6 count:100.6 custom_values:[5] buckets:[0.6 100]}} + +# Skip both buckets (1 and 100). +eval instant at 1m cbh_two_buckets_split_at_positive >/ 10.0 + cbh_two_buckets_split_at_positive {{schema:-53 sum:0.0 count:0 custom_values:[5] buckets:[0 0]}} + + +load 1m + cbh_two_buckets_split_at_negative {{schema:-53 sum:33 count:101 custom_values:[-5] buckets:[1 100]}} + +# Skip (-5; +Inf] bucket (100). +eval instant at 1m cbh_two_buckets_split_at_negative / -10.0 + cbh_two_buckets_split_at_negative {{schema:-53 sum:38.0 count:100 custom_values:[-5] buckets:[0 100]}} + +# Skip both buckets (1 and 100). +eval instant at 1m cbh_two_buckets_split_at_negative >/ -2.0 + cbh_two_buckets_split_at_negative {{schema:-53 custom_values:[-5]}} + +# Skip both buckets (1 and 100). +eval instant at 1m cbh_two_buckets_split_at_negative >/ 0.0 + cbh_two_buckets_split_at_negative {{schema:-53 custom_values:[-5]}} + +# Skip both buckets (1 and 100). +eval instant at 1m cbh_two_buckets_split_at_negative >/ 10.0 + cbh_two_buckets_split_at_negative {{schema:-53 sum:0.0 count:0 custom_values:[-5] buckets:[0 0]}} + + clear