Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2564,11 +2564,29 @@ object DecimalAggregates extends Rule[LogicalPlan] {
/** Maximum number of decimal digits representable precisely in a Double */
private val MAX_DOUBLE_DIGITS = 15

/** Tighter than the AVG fast path's `prec + 4 <= MAX_DOUBLE_DIGITS` (= 11):
* the strict-subset keeps SPARK-37024 Double-regime exposure unchanged. */
private val AVG_PEEL_MAX_INNER_PRECISION = 7

/** Matches a scale-preserving widening decimal Cast; refuses CheckOverflow
* to preserve overflow semantics on the unscaled value. */
private object WidenedDecimalChild {
def unapply(e: Expression): Option[(Expression, Int, Int, Int)] = e match {
case Cast(inner @ DecimalExpression(p, s), DecimalType.Fixed(pPrime, sPrime), _, _)
if s == sPrime && pPrime >= p && !inner.isInstanceOf[CheckOverflow] =>
Some((inner, p, pPrime, s))
case _ => None
}
}

def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
_.containsAnyPattern(SUM, AVERAGE), ruleId) {
case q: LogicalPlan => q.transformExpressionsDownWithPruning(
_.containsAnyPattern(SUM, AVERAGE), ruleId) {
case we @ WindowExpression(ae @ AggregateExpression(af, _, _, _, _), _) => af match {
// Window arm: `ExtractWindowExpressions` hoists composite children
// (here the widening Cast) into a child Project, so widened-Cast
// peel is unreachable from this expression-level rule.
case Sum(e @ DecimalExpression(prec, scale), _) if prec + 10 <= MAX_LONG_DIGITS =>
MakeDecimal(we.copy(windowFunction = ae.copy(aggregateFunction = Sum(UnscaledValue(e)))),
prec + 10, scale)
Expand All @@ -2583,9 +2601,27 @@ object DecimalAggregates extends Rule[LogicalPlan] {
case _ => we
}
case ae @ AggregateExpression(af, _, _, _, _) => af match {
case Sum(WidenedDecimalChild(inner, p, pPrime, s), _)
if p + 10 <= MAX_LONG_DIGITS =>
Cast(
MakeDecimal(
ae.copy(aggregateFunction = Sum(UnscaledValue(inner))),
p + 10, s),
DecimalType.bounded(pPrime + 10, s),
Option(conf.sessionLocalTimeZone))

case Sum(e @ DecimalExpression(prec, scale), _) if prec + 10 <= MAX_LONG_DIGITS =>
MakeDecimal(ae.copy(aggregateFunction = Sum(UnscaledValue(e))), prec + 10, scale)

// Ordered before the un-widened Average arm: when pPrime in [8, 11],
// the outer Cast's DecimalType would otherwise match that arm first.
case Average(WidenedDecimalChild(inner, p, pPrime, s), _)
if p <= AVG_PEEL_MAX_INNER_PRECISION =>
val newAggExpr = ae.copy(aggregateFunction = Average(UnscaledValue(inner)))
Cast(
Divide(newAggExpr, Literal.create(math.pow(10.0, s), DoubleType)),
DecimalType.bounded(pPrime + 4, s + 4), Option(conf.sessionLocalTimeZone))

case Average(e @ DecimalExpression(prec, scale), _) if prec + 4 <= MAX_DOUBLE_DIGITS =>
val newAggExpr = ae.copy(aggregateFunction = Average(UnscaledValue(e)))
Cast(
Expand Down

Large diffs are not rendered by default.

74 changes: 74 additions & 0 deletions sql/core/benchmarks/DecimalAggregatesBenchmark-jdk21-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
================================================================================================
DecimalAggregates SUM widened-cast peel (Aggregate)
================================================================================================

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A1 p=7 s=2 p'=8: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2178 2236 56 4.6 217.8 1.0X
widened cast, peel off 2369 2381 9 4.2 236.9 0.9X
widened cast, peel on 2105 2118 12 4.8 210.5 1.0X

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A2 p=7 s=2 p'=17: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2103 2115 17 4.8 210.3 1.0X
widened cast, peel off 2366 2377 7 4.2 236.6 0.9X
widened cast, peel on 2100 2109 11 4.8 210.0 1.0X

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A3 p=5 s=0 p'=6: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2117 2138 29 4.7 211.7 1.0X
widened cast, peel off 2403 2416 13 4.2 240.3 0.9X
widened cast, peel on 2157 2164 7 4.6 215.7 1.0X

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A4 p=5 s=0 p'=15: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2151 2157 7 4.6 215.1 1.0X
widened cast, peel off 2420 2427 10 4.1 242.0 0.9X
widened cast, peel on 2152 2159 9 4.6 215.2 1.0X


================================================================================================
DecimalAggregates AVG widened-cast peel (Aggregate)
================================================================================================

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B1 p=7 s=2 p'=8: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2130 2136 5 4.7 213.0 1.0X
widened cast, peel off 2358 2367 15 4.2 235.8 0.9X
widened cast, peel on 2140 2150 7 4.7 214.0 1.0X

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B2 p=7 s=2 p'=12: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2147 2151 3 4.7 214.7 1.0X
widened cast, peel off 2359 2361 2 4.2 235.9 0.9X
widened cast, peel on 2126 2161 20 4.7 212.6 1.0X

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B3 p=5 s=0 p'=6: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2173 2185 9 4.6 217.3 1.0X
widened cast, peel off 2405 2413 7 4.2 240.5 0.9X
widened cast, peel on 2167 2177 12 4.6 216.7 1.0X

OpenJDK 64-Bit Server VM 21.0.11+10-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B4 p=5 s=0 p'=15: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 2173 2179 7 4.6 217.3 1.0X
widened cast, peel off 2393 2400 11 4.2 239.3 0.9X
widened cast, peel on 2172 2178 5 4.6 217.2 1.0X


74 changes: 74 additions & 0 deletions sql/core/benchmarks/DecimalAggregatesBenchmark-jdk25-results.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
================================================================================================
DecimalAggregates SUM widened-cast peel (Aggregate)
================================================================================================

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A1 p=7 s=2 p'=8: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1194 1230 57 8.4 119.4 1.0X
widened cast, peel off 1421 1433 11 7.0 142.1 0.8X
widened cast, peel on 1181 1188 5 8.5 118.1 1.0X

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A2 p=7 s=2 p'=17: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1174 1189 12 8.5 117.4 1.0X
widened cast, peel off 1401 1414 8 7.1 140.1 0.8X
widened cast, peel on 1169 1178 8 8.6 116.9 1.0X

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A3 p=5 s=0 p'=6: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1245 1254 10 8.0 124.5 1.0X
widened cast, peel off 1498 1503 5 6.7 149.8 0.8X
widened cast, peel on 1222 1232 10 8.2 122.2 1.0X

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
A4 p=5 s=0 p'=15: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1234 1238 3 8.1 123.4 1.0X
widened cast, peel off 1473 1478 7 6.8 147.3 0.8X
widened cast, peel on 1242 1255 16 8.1 124.2 1.0X


================================================================================================
DecimalAggregates AVG widened-cast peel (Aggregate)
================================================================================================

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B1 p=7 s=2 p'=8: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1178 1185 9 8.5 117.8 1.0X
widened cast, peel off 1434 1440 8 7.0 143.4 0.8X
widened cast, peel on 1232 1235 3 8.1 123.2 1.0X

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B2 p=7 s=2 p'=12: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1222 1229 7 8.2 122.2 1.0X
widened cast, peel off 1434 1444 10 7.0 143.4 0.9X
widened cast, peel on 1216 1223 6 8.2 121.6 1.0X

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B3 p=5 s=0 p'=6: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1267 1274 6 7.9 126.7 1.0X
widened cast, peel off 1505 1509 4 6.6 150.5 0.8X
widened cast, peel on 1272 1277 7 7.9 127.2 1.0X

OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure
Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
B4 p=5 s=0 p'=15: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative
------------------------------------------------------------------------------------------------------------------------
native (no cast, rule on) 1269 1275 5 7.9 126.9 1.0X
widened cast, peel off 1494 1501 9 6.7 149.4 0.8X
widened cast, peel on 1268 1274 6 7.9 126.8 1.0X


Loading