promql: Implement </ and >/ operators for trimming native histograms.

This implements the TRIM_UPPER (</) and TRIM_LOWER (>/) operators
that allow removing observations below or above a threshold from
a histogram. The implementation zeros out buckets outside the desired
range. It also recalculates the sum, including only bucket counts within
the specified threshold range.

Fixes #14651.

Signed-off-by: sujal shah <sujalshah28092004@gmail.com>
This commit is contained in:
sujal shah
2025-03-27 04:24:18 +05:30
committed by Linas Medziunas
parent e8bfcfcf1a
commit 8528d5c446
2 changed files with 119 additions and 138 deletions

View File

@@ -3140,101 +3140,83 @@ func scalarBinop(op parser.ItemType, lhs, rhs float64) float64 {
panic(fmt.Errorf("operator %q not allowed for Scalar operations", op))
}
// processCustomBucket handles custom bucket processing for histogram trimming.
// It returns the count to keep and the bucket midpoint for sum calculations.
func processCustomBucket(
bucket histogram.Bucket[float64],
rhs float64,
op parser.ItemType,
) (keepCount, bucketMidpoint float64) {
// Midpoint calculation
switch {
case math.IsInf(bucket.Lower, -1):
// First bucket: no lower bound, assume midpoint is near upper bound.
bucketMidpoint = bucket.Upper
case math.IsInf(bucket.Upper, 1):
bucketMidpoint = bucket.Lower
default:
bucketMidpoint = (bucket.Lower + bucket.Upper) / 2
func handleInfinityBuckets(b histogram.Bucket[float64], le float64) (float64, float64) {
var underCount, bucketMidpoint float64
if math.IsInf(b.Lower, -1) {
switch {
case le >= b.Upper:
// le is greater than or equal to upper bound. Full count applies
underCount = b.Count
bucketMidpoint = b.Upper
case le < 0:
// le is negative and less than zero — nothing to keep.
underCount = b.Count * 0.5
bucketMidpoint = b.Upper
default:
// Interpolating with treated lower bound as 0 (linear)
fraction := le / b.Upper
underCount = b.Count * fraction
bucketMidpoint = le / 2
}
return underCount, bucketMidpoint
}
// Fractional keepCount calculation
switch op {
case parser.TRIM_UPPER:
switch {
case math.IsInf(bucket.Lower, -1):
// Special case for -Inf lower bound
if rhs >= bucket.Upper {
// Trim point is above bucket upper bound, keep all
keepCount = bucket.Count
} else {
// Trim point is within bucket or below, keep none
keepCount = 0
}
case math.IsInf(bucket.Upper, 1):
// Special case for +Inf upper bound
if rhs <= bucket.Lower {
// Trim point is below bucket lower bound, keep none
keepCount = 0
} else {
// Trim point is within the bucket, keep a portion
// Since we can't interpolate with +Inf, assume keep half for simplicity
// Another approach would be to use a different interpolation scheme
keepCount = bucket.Count * 0.5
}
default:
// Normal case - finite bounds
switch {
case bucket.Upper <= rhs:
// Bucket entirely below trim point - keep all
keepCount = bucket.Count
case bucket.Lower < rhs:
// Bucket contains trim point - interpolate
fraction := (rhs - bucket.Lower) / (bucket.Upper - bucket.Lower)
keepCount = bucket.Count * fraction
default:
// Bucket entirely above trim point - discard
keepCount = 0
}
}
case parser.TRIM_LOWER:
switch {
case math.IsInf(bucket.Upper, 1):
// Special case for +Inf upper bound
if rhs <= bucket.Lower {
keepCount = bucket.Count
} else {
keepCount = 0
}
case math.IsInf(bucket.Lower, -1):
// Special case for -Inf lower bound
if rhs >= bucket.Upper {
keepCount = 0
} else {
keepCount = bucket.Count * 0.5
}
default:
switch {
case bucket.Lower >= rhs:
keepCount = bucket.Count
case bucket.Upper > rhs:
fraction := (bucket.Upper - rhs) / (bucket.Upper - bucket.Lower)
keepCount = bucket.Count * fraction
default:
keepCount = 0
}
if math.IsInf(b.Upper, 1) {
if le <= b.Lower {
underCount = 0
bucketMidpoint = b.Lower
} else {
underCount = b.Count * 0.5
bucketMidpoint = b.Lower
}
return underCount, bucketMidpoint
}
return keepCount, bucketMidpoint
return underCount, bucketMidpoint
}
func computeBucketTrim(op parser.ItemType, bucket histogram.Bucket[float64], rhs float64, isPostive, isCustomBucket bool) (float64, float64) {
if isCustomBucket {
return processCustomBucket(bucket, rhs, op)
// computeSplit calculates the portion of the bucket's count <= le (trim point).
func computeSplit(b histogram.Bucket[float64], le float64, isPositive, isCustom bool) float64 {
if le <= b.Lower {
return 0
}
return computeExponentialTrim(bucket, rhs, isPostive, op)
if le >= b.Upper {
return b.Count
}
var fraction float64
switch {
case isCustom || (b.Lower <= 0 && b.Upper >= 0):
fraction = (le - b.Lower) / (b.Upper - b.Lower)
default:
// Exponential interpolation
logLower := math.Log2(math.Abs(b.Lower))
logUpper := math.Log2(math.Abs(b.Upper))
logV := math.Log2(math.Abs(le))
if isPositive {
fraction = (logV - logLower) / (logUpper - logLower)
} else {
fraction = 1 - ((logV - logUpper) / (logLower - logUpper))
}
}
underCount := b.Count * fraction
return underCount
}
func computeBucketTrim(op parser.ItemType, b histogram.Bucket[float64], rhs float64, isPositive, isCustomBucket bool) (float64, float64) {
if math.IsInf(b.Lower, -1) || math.IsInf(b.Upper, 1) {
return handleInfinityBuckets(b, rhs)
}
product := math.Abs(b.Lower) * math.Abs(rhs)
underCount := computeSplit(b, rhs, isPositive, isCustomBucket)
if op == parser.TRIM_UPPER {
return underCount, computeMidpoint(b, product, isCustomBucket, isPositive)
}
product = math.Abs(rhs) * math.Abs(b.Upper)
return b.Count - underCount, computeMidpoint(b, product, isCustomBucket, isPositive)
}
// Helper function to trim native histogram buckets.
@@ -3252,8 +3234,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
for i, iter := 0, trimmedHist.PositiveBucketIterator(); iter.Next(); i++ {
hasPositive = true
bucket := iter.At()
var keepCount, bucketMidpoint float64
keepCount, bucketMidpoint = computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
// Bucket is entirely below the trim point - keep all
switch {
@@ -3280,8 +3261,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
for i, iter := 0, trimmedHist.NegativeBucketIterator(); iter.Next(); i++ {
hasNegative = true
bucket := iter.At()
var keepCount, bucketMidpoint float64
keepCount, bucketMidpoint = computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
switch {
case bucket.Upper <= rhs:
@@ -3305,8 +3285,7 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
for i, iter := 0, trimmedHist.PositiveBucketIterator(); iter.Next(); i++ {
hasPositive = true
bucket := iter.At()
var keepCount, bucketMidpoint float64
keepCount, bucketMidpoint = computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, true, isCustomBucket)
switch {
case bucket.Lower >= rhs:
@@ -3330,8 +3309,8 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
for i, iter := 0, trimmedHist.NegativeBucketIterator(); iter.Next(); i++ {
hasNegative = true
bucket := iter.At()
var keepCount, bucketMidpoint float64
keepCount, bucketMidpoint = computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
keepCount, bucketMidpoint := computeBucketTrim(op, bucket, rhs, false, isCustomBucket)
switch {
case bucket.Lower >= rhs:
updatedCount += bucket.Count
@@ -3403,36 +3382,19 @@ func trimHistogram(trimmedHist *histogram.FloatHistogram, rhs float64, op parser
trimmedHist.Compact(0)
}
func computeExponentialTrim(bucket histogram.Bucket[float64], rhs float64, isPositive bool, op parser.ItemType) (float64, float64) {
var fraction, bucketMidpoint, keepCount float64
logLower := math.Log2(math.Abs(bucket.Lower))
logUpper := math.Log2(math.Abs(bucket.Upper))
logRHS := math.Log2(math.Abs(rhs))
switch op {
case parser.TRIM_UPPER:
func computeMidpoint(b histogram.Bucket[float64], product float64, isCustom, isPositive bool) float64 {
midpoint := func(product float64, isPositive bool) float64 {
if isPositive {
fraction = (logRHS - logLower) / (logUpper - logLower)
bucketMidpoint = math.Sqrt(bucket.Lower * rhs)
} else {
fraction = 1 - ((logRHS - logUpper) / (logLower - logUpper))
bucketMidpoint = -math.Sqrt(math.Abs(bucket.Lower) * math.Abs(rhs))
}
case parser.TRIM_LOWER:
if isPositive {
fraction = (logUpper - logRHS) / (logUpper - logLower)
bucketMidpoint = math.Sqrt(rhs * bucket.Upper)
} else {
fraction = (logRHS - logUpper) / (logLower - logUpper)
bucketMidpoint = -math.Sqrt(math.Abs(rhs) * math.Abs(bucket.Upper))
return math.Sqrt(product)
}
return -math.Sqrt(product)
}
keepCount = bucket.Count * fraction
if isCustom {
return (b.Lower + b.Upper) / 2
}
return keepCount, bucketMidpoint
return midpoint(product, isPositive)
}
// vectorElemBinop evaluates a binary operation between two Vector elements.

View File

@@ -1878,52 +1878,71 @@ load 1m
h_test {{schema:0 sum:123.75 count:34 z_bucket:1 z_bucket_w:0.001 buckets:[2 4 8 16] n_buckets:[1 2]}}
h_test_2 {{schema:2 sum:12.8286080906 count:28 z_bucket:1 z_bucket_w:0.001 buckets:[1 2 4 7 3] n_buckets:[1 5 3 1]}}
cbh {{schema:-53 sum:172.5 count:15 custom_values:[5 10 15 20] buckets:[1 6 4 3 1]}}
cbh_has_neg {{schema:-53 sum:172.5 count:15 custom_values:[-10 5 10 15 20] buckets:[2 1 6 4 3 1]}}
zero_bucket {{schema:0 sum:-6.75 z_bucket:5 z_bucket_w:0.01 buckets:[2 3] n_buckets:[1 2 3]}}
# Native Histogram: Exponential Bucket Interpolation Tests
# Exponential buckets: trim uses exponential interpolation if cutoff is inside a bucket
eval instant at 1m h_test_2 </ 1.13
{__name__="h_test_2"} {{schema:2 count:13.410582181123704 sum:-9.282809901015558 z_bucket:1 z_bucket_w:0.001 buckets:[1 1.410582181123704] n_buckets:[1 5 3 1]}}
h_test_2 {{schema:2 count:13.410582181123704 sum:-9.282809901015558 z_bucket:1 z_bucket_w:0.001 buckets:[1 1.410582181123704] n_buckets:[1 5 3 1]}}
eval instant at 1m h_test_2 >/ 1.13
{__name__="h_test_2"} {{schema:2 count:14.589417818876296 sum:-1.5258511531197865 z_bucket_w:0.001 offset:1 buckets:[0.589417818876296 4 7 3]}}
h_test_2 {{schema:2 count:14.589417818876296 sum:-1.5258511531197865 z_bucket_w:0.001 offset:1 buckets:[0.589417818876296 4 7 3]}}
eval instant at 1m h_test_2 >/ -1.3
{__name__="h_test_2"} {{schema:2 count:25.54213947904476 sum:13.099057472672072 z_bucket:1 z_bucket_w:0.001 buckets:[1 2 4 7 3] n_buckets:[1 5 1.54213947904476]}}
h_test_2 {{schema:2 count:25.54213947904476 sum:13.099057472672072 z_bucket:1 z_bucket_w:0.001 buckets:[1 2 4 7 3] n_buckets:[1 5 1.54213947904476]}}
eval instant at 1m h_test_2 </ -1.3
{__name__="h_test_2"} {{schema:2 count:2.45786052095524 sum:-16.03281816946792 z_bucket_w:0.001 n_offset:2 n_buckets:[1.45786052095524 1]}}
h_test_2 {{schema:2 count:2.45786052095524 sum:-16.03281816946792 z_bucket_w:0.001 n_offset:2 n_buckets:[1.45786052095524 1]}}
# Native Histogram: Linear Bucket Trimming Tests
# Exponential buckets: trim on bucket boundary uses no interpolation
eval instant at 1m h_test </ 2
{__name__="h_test"} {{count:10 sum:10.612915010152392 z_bucket:1 z_bucket_w:0.001 buckets:[2 4] n_buckets:[1 2]}}
h_test{} {{count:10 sum:10.612915010152392 z_bucket:1 z_bucket_w:0.001 buckets:[2 4] n_buckets:[1 2]}}
eval instant at 1m h_test >/ 2
{__name__="h_test"} {{count:24 sum:113.14339828220179 z_bucket_w:0.001 offset:2 buckets:[8 16]}}
h_test{} {{count:24 sum:113.14339828220179 z_bucket_w:0.001 offset:2 buckets:[8 16]}}
eval instant at 1m h_test >/ -1
{__name__="h_test"} {{count:32 sum:120.92157287525382 z_bucket:1 z_bucket_w:0.001 buckets:[2 4 8 16] n_buckets:[1]}}
h_test{} {{count:32 sum:120.92157287525382 z_bucket:1 z_bucket_w:0.001 buckets:[2 4 8 16] n_buckets:[1]}}
eval instant at 1m h_test </ -1
{__name__="h_test"} {{count:2 sum:2.834740417100363 z_bucket_w:0.001 n_offset:1 n_buckets:[2]}}
h_test{} {{count:2 sum:2.834740417100363 z_bucket_w:0.001 n_offset:1 n_buckets:[2]}}
# Custom Buckets: Trim Operation Tests
# Custom buckets: trim on bucket boundary without interpolation
eval instant at 1m cbh </ 13
{__name__="cbh"} {{schema:-53 count:9.4 sum:80 custom_values:[5 10 15 20] buckets:[1 6 2.4]}}
cbh{} {{schema:-53 count:9.4 sum:80 custom_values:[5 10 15 20] buckets:[1 6 2.4]}}
eval instant at 1m cbh >/ 13
{__name__="cbh"} {{schema:-53 count:5.6 sum:92.5 custom_values:[5 10 15 20] offset:2 buckets:[1.6 3 1]}}
cbh{} {{schema:-53 count:5.6 sum:92.5 custom_values:[5 10 15 20] offset:2 buckets:[1.6 3 1]}}
eval instant at 1m cbh </ 15
{__name__="cbh"} {{schema:-53 count:11 sum:100 custom_values:[5 10 15 20] buckets:[1 6 4]}}
cbh{} {{schema:-53 count:11 sum:100 custom_values:[5 10 15 20] buckets:[1 6 4]}}
eval instant at 1m cbh >/ 15
{__name__="cbh"} {{schema:-53 count:4 sum:72.5 custom_values:[5 10 15 20] offset:3 buckets:[3 1]}}
cbh{} {{schema:-53 count:4 sum:72.5 custom_values:[5 10 15 20] offset:3 buckets:[3 1]}}
# Custom buckets: trim uses linear interpolation if cutoff is inside a bucket
eval instant at 1m cbh </ 7.5
cbh{} {{schema:-53 count:4 sum:27.5 custom_values:[5 10 15 20] buckets:[1 3]}}
# Custom buckets: trim uses half of overflow bucket in interpolation if cutoff is above last bucket
eval instant at 1m cbh </ 50
cbh{} {{schema:-53 count:14.5 sum:162.5 custom_values:[5 10 15 20] buckets:[1 6 4 3 0.5]}}
# Custom buckets: negative values
eval instant at 1m cbh_has_neg </ 2
cbh_has_neg{} {{schema:-53 count:2.8 sum:5.5 custom_values:[-10 5 10 15 20] buckets:[2 0.8]}}
eval instant at 1m cbh_has_neg </ -4
cbh_has_neg{} {{schema:-53 count:2.4 sum:6.5 custom_values:[-10 5 10 15 20] buckets:[2 0.4]}}
eval instant at 1m cbh_has_neg </ -15
cbh_has_neg{} {{schema:-53 count:1 sum:17.5 custom_values:[-10 5 10 15 20] buckets:[1]}}
# Zero Bucket Edge Case: Interpolation Around Zero
eval instant at 1m zero_bucket </ -0.005
{__name__="zero_bucket"} {{count:7.25 sum:-12.40685424949238 z_bucket:1.25 z_bucket_w:0.01 n_buckets:[1 2 3]}}
zero_bucket{} {{count:7.25 sum:-12.40685424949238 z_bucket:1.25 z_bucket_w:0.01 n_buckets:[1 2 3]}}
eval instant at 1m zero_bucket >/ 0
{__name__="zero_bucket"} {{count:7.5 sum:-18.77081528017131 z_bucket:2.5 z_bucket_w:0.01 buckets:[2 3]}}
zero_bucket{} {{count:7.5 sum:-18.77081528017131 z_bucket:2.5 z_bucket_w:0.01 buckets:[2 3]}}
clear