diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt index e03dae8c072ac..89cbac8fa75f4 100644 --- a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt +++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-jdk25-results.txt @@ -2,83 +2,86 @@ Identity Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Identity Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -BooleanUpdater 0 0 0 17171.8 0.1 1.0X -ByteUpdater (INT32 -> Byte) 0 0 0 3679.8 0.3 0.2X -ShortUpdater (INT32 -> Short) 1 1 0 1662.3 0.6 0.1X -IntegerUpdater 0 0 0 10261.9 0.1 0.6X -LongUpdater 0 0 0 5130.7 0.2 0.3X -FloatUpdater 0 0 0 10255.9 0.1 0.6X -DoubleUpdater 0 0 0 5127.2 0.2 0.3X -BinaryUpdater 15 16 0 67.7 14.8 0.0X +BooleanUpdater 0 0 0 30347.8 0.0 1.0X +ByteUpdater (INT32 -> Byte) 0 0 0 7797.8 0.1 0.3X +ShortUpdater (INT32 -> Short) 0 0 0 3113.4 0.3 0.1X +IntegerUpdater 0 0 0 17590.9 0.1 0.6X +LongUpdater 0 0 0 7354.1 0.1 0.2X +FloatUpdater 0 0 0 14533.9 0.1 0.5X +DoubleUpdater 0 0 0 8754.3 0.1 0.3X +BinaryUpdater 8 8 0 130.8 7.6 0.0X ================================================================================================ Type-converting Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Type-converting Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -IntegerToLongUpdater 0 0 0 6438.7 0.2 1.0X -IntegerToDoubleUpdater 0 0 0 6441.2 0.2 1.0X -FloatToDoubleUpdater 0 0 0 3199.5 0.3 0.5X -DateToTimestampNTZUpdater 1 1 0 884.9 1.1 0.1X -DowncastLongUpdater (INT64 -> Decimal(9,2)) 0 0 0 6713.8 0.1 1.0X +IntegerToLongUpdater 0 0 0 9826.5 0.1 1.0X +IntegerToDoubleUpdater 0 0 0 11575.6 0.1 1.2X +FloatToDoubleUpdater 0 0 0 5684.1 0.2 0.6X +DateToTimestampNTZUpdater 1 1 0 1766.3 0.6 0.2X +LongAsNanosUpdater (TimeType) 1 1 0 886.0 1.1 0.1X +DowncastLongUpdater (INT64 -> Decimal(9,2)) 0 0 0 11617.9 0.1 1.2X ================================================================================================ Rebase Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor -Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------- -IntegerWithRebaseUpdater (DATE legacy) 0 0 0 3664.5 0.3 1.0X -LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 0 0 0 2668.7 0.4 0.7X -LongAsMicrosUpdater (TIMESTAMP_MILLIS) 3 3 0 371.3 2.7 0.1X +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor +Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +IntegerWithRebaseUpdater (DATE legacy) 0 0 0 6456.3 0.2 1.0X +LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 0 0 0 4951.6 0.2 0.8X +LongAsMicrosUpdater (TIMESTAMP_MILLIS) 1 1 0 881.0 1.1 0.1X +DateToTimestampNTZWithRebaseUpdater (DATE legacy) 2 2 0 685.4 1.5 0.1X +LongAsMicrosRebaseUpdater (TIMESTAMP_MILLIS legacy) 1 1 0 837.8 1.2 0.1X ================================================================================================ Unsigned Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Unsigned Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -UnsignedIntegerUpdater (UINT32 -> Long) 0 0 0 6183.9 0.2 1.0X -UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 17 17 0 60.4 16.6 0.0X +UnsignedIntegerUpdater (UINT32 -> Long) 0 0 0 11588.4 0.1 1.0X +UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 9 9 0 114.0 8.8 0.0X ================================================================================================ Decimal Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Decimal Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -IntegerToDecimalUpdater 0 0 0 10268.1 0.1 1.0X -LongToDecimalUpdater 0 0 0 5122.2 0.2 0.5X -FixedLenByteArrayToDecimalUpdater 21 21 0 50.9 19.7 0.0X +IntegerToDecimalUpdater 0 0 0 17579.1 0.1 1.0X +LongToDecimalUpdater 0 0 0 8770.4 0.1 0.5X +FixedLenByteArrayToDecimalUpdater 11 12 0 94.6 10.6 0.0X ================================================================================================ FixedLenByteArray Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 25.0.3+9-LTS on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor FixedLenByteArray Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------------- -FixedLenByteArrayUpdater (len=16 -> Binary) 21 21 1 50.3 19.9 1.0X -FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 0 152.7 6.6 3.0X -FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 8 8 0 127.7 7.8 2.5X +FixedLenByteArrayUpdater (len=16 -> Binary) 11 12 1 93.3 10.7 1.0X +FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 3 3 0 333.6 3.0 3.6X +FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 4 5 0 235.7 4.2 2.5X diff --git a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt index 828e685788773..16b337df1b143 100644 --- a/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt +++ b/sql/core/benchmarks/ParquetVectorUpdaterBenchmark-results.txt @@ -2,83 +2,86 @@ Identity Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10 on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Identity Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -BooleanUpdater 0 0 0 14640.0 0.1 1.0X -ByteUpdater (INT32 -> Byte) 0 0 0 3686.8 0.3 0.3X -ShortUpdater (INT32 -> Short) 1 1 0 2054.1 0.5 0.1X -IntegerUpdater 0 0 0 7759.1 0.1 0.5X -LongUpdater 0 0 0 3876.1 0.3 0.3X -FloatUpdater 0 0 0 7762.5 0.1 0.5X -DoubleUpdater 0 0 0 5123.2 0.2 0.3X -BinaryUpdater 15 15 0 70.1 14.3 0.0X +BooleanUpdater 0 0 0 38864.9 0.0 1.0X +ByteUpdater (INT32 -> Byte) 0 0 0 7842.7 0.1 0.2X +ShortUpdater (INT32 -> Short) 0 0 0 3017.2 0.3 0.1X +IntegerUpdater 0 0 0 17085.6 0.1 0.4X +LongUpdater 0 0 0 8404.3 0.1 0.2X +FloatUpdater 0 0 0 17584.7 0.1 0.5X +DoubleUpdater 0 0 0 8787.3 0.1 0.2X +BinaryUpdater 8 8 0 135.0 7.4 0.0X ================================================================================================ Type-converting Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10 on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Type-converting Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -IntegerToLongUpdater 1 1 0 1281.0 0.8 1.0X -IntegerToDoubleUpdater 1 1 0 1550.0 0.6 1.2X -FloatToDoubleUpdater 1 1 0 1419.0 0.7 1.1X -DateToTimestampNTZUpdater 2 2 0 605.2 1.7 0.5X -DowncastLongUpdater (INT64 -> Decimal(9,2)) 1 1 0 1285.1 0.8 1.0X +IntegerToLongUpdater 1 1 0 2050.9 0.5 1.0X +IntegerToDoubleUpdater 0 1 0 2535.4 0.4 1.2X +FloatToDoubleUpdater 0 0 0 2548.2 0.4 1.2X +DateToTimestampNTZUpdater 18 19 0 58.8 17.0 0.0X +LongAsNanosUpdater (TimeType) 0 0 0 2253.7 0.4 1.1X +DowncastLongUpdater (INT64 -> Decimal(9,2)) 0 0 0 2276.0 0.4 1.1X ================================================================================================ Rebase Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor -Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative -------------------------------------------------------------------------------------------------------------------------------- -IntegerWithRebaseUpdater (DATE legacy) 0 0 0 2662.8 0.4 1.0X -LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 1 1 0 2084.1 0.5 0.8X -LongAsMicrosUpdater (TIMESTAMP_MILLIS) 2 2 0 454.8 2.2 0.2X +OpenJDK 64-Bit Server VM 17.0.19+10 on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor +Rebase Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +IntegerWithRebaseUpdater (DATE legacy) 0 0 0 4353.8 0.2 1.0X +LongWithRebaseUpdater (TIMESTAMP_MICROS legacy) 0 0 0 3487.0 0.3 0.8X +LongAsMicrosUpdater (TIMESTAMP_MILLIS) 0 0 0 2258.5 0.4 0.5X +DateToTimestampNTZWithRebaseUpdater (DATE legacy) 18 19 1 58.3 17.1 0.0X +LongAsMicrosRebaseUpdater (TIMESTAMP_MILLIS legacy) 1 1 0 1817.7 0.6 0.4X ================================================================================================ Unsigned Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10 on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Unsigned Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ----------------------------------------------------------------------------------------------------------------------------- -UnsignedIntegerUpdater (UINT32 -> Long) 1 1 0 1094.1 0.9 1.0X -UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 17 17 0 61.0 16.4 0.1X +UnsignedIntegerUpdater (UINT32 -> Long) 1 1 0 1979.6 0.5 1.0X +UnsignedLongUpdater (UINT64 -> Decimal(20,0)) 9 10 0 114.5 8.7 0.1X ================================================================================================ Decimal Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10 on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor Decimal Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -IntegerToDecimalUpdater 0 0 0 10261.0 0.1 1.0X -LongToDecimalUpdater 0 0 0 5118.9 0.2 0.5X -FixedLenByteArrayToDecimalUpdater 21 21 0 51.0 19.6 0.0X +IntegerToDecimalUpdater 0 0 0 17605.7 0.1 1.0X +LongToDecimalUpdater 0 0 0 8786.5 0.1 0.5X +FixedLenByteArrayToDecimalUpdater 11 12 0 93.0 10.8 0.0X ================================================================================================ FixedLenByteArray Updaters ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.19+10-LTS on Linux 6.17.0-1013-azure -AMD EPYC 7763 64-Core Processor +OpenJDK 64-Bit Server VM 17.0.19+10 on Linux 7.0.0-1004-azure +AMD EPYC 9V45 96-Core Processor FixedLenByteArray Updaters: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------------- -FixedLenByteArrayUpdater (len=16 -> Binary) 19 19 0 55.3 18.1 1.0X -FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 7 7 0 160.2 6.2 2.9X -FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 9 9 0 123.3 8.1 2.2X +FixedLenByteArrayUpdater (len=16 -> Binary) 11 11 0 97.4 10.3 1.0X +FixedLenByteArrayAsIntUpdater (len=4 -> Decimal(9,2)) 3 3 0 336.9 3.0 3.5X +FixedLenByteArrayAsLongUpdater (len=8 -> Decimal(18,4)) 4 5 0 234.7 4.3 2.4X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala index a78593096d5c8..6cbd6ede522f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterBenchmark.scala @@ -48,7 +48,7 @@ import org.apache.spark.sql.types._ * `DowncastLong`. * C. Rebase Updaters -- date/timestamp legacy-calendar rebase variants. * `IntegerWithRebase` (DATE), `LongWithRebase` (TIMESTAMP_MICROS), - * `LongAsMicros`. + * `LongAsMicros`, `DateToTimestampNTZWithRebase`, `LongAsMicrosRebase`. * D. Unsigned Updaters -- `UnsignedInteger`, `UnsignedLong`. * E. Decimal Updaters -- `IntegerToDecimal`, `LongToDecimal`, * `BinaryToDecimal`, `FixedLenByteArrayToDecimal`. @@ -264,6 +264,10 @@ object ParquetVectorUpdaterBenchmark extends BenchmarkBase { TimestampNTZType, descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.dateType()), longVec, intBytes) + addReadValuesCase(benchmark, "LongAsNanosUpdater (TimeType)", + TimeType(), + descriptor(PrimitiveTypeName.INT64), + longVec, longBytes) // 32-bit-decimal target with INT64 source routes via canReadAsLongDecimal + // is32BitDecimalType, both TRUE here, hence DowncastLongUpdater. addReadValuesCase(benchmark, "DowncastLongUpdater (INT64 -> Decimal(9,2))", @@ -303,6 +307,17 @@ object ParquetVectorUpdaterBenchmark extends BenchmarkBase { descriptor(PrimitiveTypeName.INT64, LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)), longVec, longBytes) + addReadValuesCase(benchmark, "DateToTimestampNTZWithRebaseUpdater (DATE legacy)", + TimestampNTZType, + descriptor(PrimitiveTypeName.INT32, LogicalTypeAnnotation.dateType()), + longVec, intBytes, + datetimeRebaseMode = "LEGACY") + addReadValuesCase(benchmark, "LongAsMicrosRebaseUpdater (TIMESTAMP_MILLIS legacy)", + TimestampType, + descriptor(PrimitiveTypeName.INT64, + LogicalTypeAnnotation.timestampType(true, LogicalTypeAnnotation.TimeUnit.MILLIS)), + longVec, longBytes, + datetimeRebaseMode = "LEGACY") benchmark.run() }