apache · gengliangwang · May 17, 2026
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/CastUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/CastUtils.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions;
+
+import org.apache.spark.sql.errors.QueryExecutionErrors;
+import org.apache.spark.sql.types.DataType;
+import org.apache.spark.sql.types.DataTypes;
+
+/**
+ * Static helpers used by {@code Cast.doGenCode} (and corresponding eval
+ * paths) for ANSI overflow-checked narrowing conversions. The source and
+ * target {@link DataType} objects referenced by the overflow error message
+ * are held in {@code private static final} fields so the happy path
+ * performs no per-row {@code references[]} lookups.
+ */
+public final class CastUtils {
+
+  private CastUtils() {}
+
+  private static final DataType INT = DataTypes.IntegerType;
+  private static final DataType LONG = DataTypes.LongType;
+  private static final DataType FLOAT = DataTypes.FloatType;
+  private static final DataType DOUBLE = DataTypes.DoubleType;
+
+  // ----- integral narrowing -> int (ANSI: throw on overflow) -----
+
+  public static int longToIntExact(long v) {
+    if (v == (int) v) return (int) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, LONG, INT);
+  }
+
+  // ----- fractional -> int (ANSI: throw on overflow) -----
+  // Mirrors castFractionToIntegralTypeCode: floor(v) <= MAX && ceil(v) >= MIN.
+
+  public static int floatToIntExact(float v) {
+    if (Math.floor(v) <= Integer.MAX_VALUE && Math.ceil(v) >= Integer.MIN_VALUE) return (int) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, FLOAT, INT);
+  }
+
+  public static int doubleToIntExact(double v) {
+    if (Math.floor(v) <= Integer.MAX_VALUE && Math.ceil(v) >= Integer.MIN_VALUE) return (int) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, DOUBLE, INT);
+  }
+
+  // ----- fractional -> long (ANSI: throw on overflow) -----
+
+  public static long floatToLongExact(float v) {
+    if (Math.floor(v) <= Long.MAX_VALUE && Math.ceil(v) >= Long.MIN_VALUE) return (long) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, FLOAT, LONG);
+  }
+
+  public static long doubleToLongExact(double v) {
+    if (Math.floor(v) <= Long.MAX_VALUE && Math.ceil(v) >= Long.MIN_VALUE) return (long) v;
+    throw QueryExecutionErrors.castingCauseOverflowError(v, DOUBLE, LONG);
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -897,6 +897,10 @@ case class Cast(
       buildCast[Long](_, t => timestampToLong(t))
     case _: TimeType =>
       buildCast[Long](_, t => timeToLong(t))
+    case FloatType if ansiEnabled =>
+      b => CastUtils.floatToLongExact(b.asInstanceOf[Float])
+    case DoubleType if ansiEnabled =>
+      b => CastUtils.doubleToLongExact(b.asInstanceOf[Double])
     case x: NumericType if ansiEnabled =>
       val exactNumeric = PhysicalNumericType.exactNumeric(x)
       b => exactNumeric.toLong(b)
@@ -939,6 +943,12 @@ case class Cast(
       })
     case _: TimeType =>
       buildCast[Long](_, t => timeToLong(t).toInt)
+    case LongType if ansiEnabled =>
+      b => CastUtils.longToIntExact(b.asInstanceOf[Long])
+    case FloatType if ansiEnabled =>
+      b => CastUtils.floatToIntExact(b.asInstanceOf[Float])
+    case DoubleType if ansiEnabled =>
+      b => CastUtils.doubleToIntExact(b.asInstanceOf[Double])
     case x: NumericType if ansiEnabled =>
       val exactNumeric = PhysicalNumericType.exactNumeric(x)
       b => exactNumeric.toInt(b)
@@ -1982,22 +1992,40 @@ case class Cast(
     }
   }
 
+  private[this] def integralPrefix(from: DataType): String = from match {
+    case ShortType => "short"
+    case IntegerType => "int"
+    case LongType => "long"
+  }
+
+  private[this] def fractionalPrefix(from: DataType): String = from match {
+    case FloatType => "float"
+    case DoubleType => "double"
+  }
+
   private[this] def castIntegralTypeToIntegralTypeExactCode(
       ctx: CodegenContext,
       integralType: String,
       from: DataType,
       to: DataType): CastFunction = {
     assert(ansiEnabled)
-    val fromDt = ctx.addReferenceObj("from", from, from.getClass.getName)
-    val toDt = ctx.addReferenceObj("to", to, to.getClass.getName)
-    (c, evPrim, _) =>
-      code"""
-        if ($c == ($integralType) $c) {
-          $evPrim = ($integralType) $c;
-        } else {
-          throw QueryExecutionErrors.castingCauseOverflowError($c, $fromDt, $toDt);
-        }
-      """
+    if (integralType == "int") {
+      val castUtils = classOf[CastUtils].getName
+      val method = s"${integralPrefix(from)}ToIntExact"
+      (c, evPrim, _) => code"$evPrim = $castUtils.$method($c);"
+    } else {
+      // Byte/short narrowing remains inline; refactored in a follow-up PR.
+      val fromDt = ctx.addReferenceObj("from", from, from.getClass.getName)
+      val toDt = ctx.addReferenceObj("to", to, to.getClass.getName)
+      (c, evPrim, _) =>
+        code"""
+          if ($c == ($integralType) $c) {
+            $evPrim = ($integralType) $c;
+          } else {
+            throw QueryExecutionErrors.castingCauseOverflowError($c, $fromDt, $toDt);
+          }
+        """
+    }
   }
 
 
@@ -2017,23 +2045,30 @@ case class Cast(
       from: DataType,
       to: DataType): CastFunction = {
     assert(ansiEnabled)
-    val (min, max) = lowerAndUpperBound(integralType)
-    val mathClass = classOf[Math].getName
-    val fromDt = ctx.addReferenceObj("from", from, from.getClass.getName)
-    val toDt = ctx.addReferenceObj("to", to, to.getClass.getName)
-    // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
-    // Or `Numeric.toLong` directly. For positive floating values, it is equivalent to `Math.floor`;
-    // for negative floating values, it is equivalent to `Math.ceil`.
-    // So, we can use the condition `Math.floor(x) <= upperBound && Math.ceil(x) >= lowerBound`
-    // to check if the floating value x is in the range of an integral type after rounding.
-    (c, evPrim, _) =>
-      code"""
-        if ($mathClass.floor($c) <= $max && $mathClass.ceil($c) >= $min) {
-          $evPrim = ($integralType) $c;
-        } else {
-          throw QueryExecutionErrors.castingCauseOverflowError($c, $fromDt, $toDt);
-        }
-      """
+    if (integralType == "int" || integralType == "long") {
+      val castUtils = classOf[CastUtils].getName
+      val method = s"${fractionalPrefix(from)}To${integralType.capitalize}Exact"
+      (c, evPrim, _) => code"$evPrim = $castUtils.$method($c);"
+    } else {
+      // Byte/short narrowing remains inline; refactored in a follow-up PR.
+      val (min, max) = lowerAndUpperBound(integralType)
+      val mathClass = classOf[Math].getName
+      val fromDt = ctx.addReferenceObj("from", from, from.getClass.getName)
+      val toDt = ctx.addReferenceObj("to", to, to.getClass.getName)
+      // When casting floating values to integral types, Spark uses the method `Numeric.toInt`
+      // Or `Numeric.toLong` directly. For positive floating values, it is equivalent to
+      // `Math.floor`; for negative floating values, it is equivalent to `Math.ceil`.
+      // So, we can use the condition `Math.floor(x) <= upperBound && Math.ceil(x) >= lowerBound`
+      // to check if the floating value x is in the range of an integral type after rounding.
+      (c, evPrim, _) =>
+        code"""
+          if ($mathClass.floor($c) <= $max && $mathClass.ceil($c) >= $min) {
+            $evPrim = ($integralType) $c;
+          } else {
+            throw QueryExecutionErrors.castingCauseOverflowError($c, $fromDt, $toDt);
+          }
+        """
+    }
   }
 
   private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match {