diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index e83b178e4dc7..903517c10774 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -587,16 +587,12 @@ protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, // key as null, too. // for (int column : outerSmallTableKeyColumnMap) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; + batch.cols[column].clearValue(batchIndex); } // Small table values are set to null. for (int column : smallTableValueColumnMap) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; + batch.cols[column].clearValue(batchIndex); } } } @@ -746,15 +742,13 @@ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws Hi // for (int column : outerSmallTableKeyColumnMap) { ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[0] = true; + colVector.clearValue(0); colVector.isRepeating = true; } for (int column : smallTableValueColumnMap) { ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[0] = true; + colVector.clearValue(0); colVector.isRepeating = true; } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java new file mode 100644 index 000000000000..35553d9cb445 --- /dev/null +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestVectorMapJoinOuterGenerateResultOperator.java @@ -0,0 +1,361 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.IntervalDayTimeColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VoidColumnVector; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * HIVE-29598: verifies {@link VectorMapJoinOuterGenerateResultOperator} clears + * every small-table slot for unmatched rows, so stale values cannot carry over + * past the null marking. + */ +class TestVectorMapJoinOuterGenerateResultOperator { + + /** Concrete subclass that exposes the generateOuterNulls* methods to tests. */ + private static final class TestableOuterOp extends VectorMapJoinOuterGenerateResultOperator { + @Override + protected String getLoggingPrefix() { + throw new UnsupportedOperationException("stubbed only to instantiate abstract class under test"); + } + + @Override + public void processBatch(VectorizedRowBatch batch) { + throw new UnsupportedOperationException("stubbed only to instantiate abstract class under test"); + } + } + + /** + * Records {@code clearSlotValue} invocations to verify the operator dispatches + * through {@code clearValue}, not just produces the slot-clearing side effect. + */ + private static class TrackingLongColumnVector extends LongColumnVector { + final List clearedIndices = new ArrayList<>(); + + TrackingLongColumnVector(int size) { + super(size); + } + + @Override + protected void clearSlotValue(int elementNum) { + super.clearSlotValue(elementNum); + clearedIndices.add(elementNum); + } + } + + @Test + void generateOuterNullsCallsClearValueOnEachMappedColumnForEachUnmatchedRow() throws HiveException, IOException { + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {0}; + op.smallTableValueColumnMap = new int[] {1, 2}; + + VectorizedRowBatch batch = new VectorizedRowBatch(3, 4); + TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4); + TrackingLongColumnVector valCol1 = new TrackingLongColumnVector(4); + TrackingLongColumnVector valCol2 = new TrackingLongColumnVector(4); + keyCol.vector[1] = 99L; + valCol1.vector[1] = 88L; + valCol2.vector[3] = 77L; + batch.cols[0] = keyCol; + batch.cols[1] = valCol1; + batch.cols[2] = valCol2; + + int[] noMatchs = new int[] {1, 3}; + op.generateOuterNulls(batch, noMatchs, noMatchs.length); + + assertEquals(Arrays.asList(1, 3), keyCol.clearedIndices); + assertEquals(Arrays.asList(1, 3), valCol1.clearedIndices); + assertEquals(Arrays.asList(1, 3), valCol2.clearedIndices); + + assertFalse(keyCol.noNulls); + assertTrue(keyCol.isNull[1]); + assertTrue(keyCol.isNull[3]); + assertFalse(keyCol.isNull[0]); + assertFalse(keyCol.isNull[2]); + + assertEquals(0L, keyCol.vector[1]); + assertEquals(0L, valCol1.vector[1]); + assertEquals(0L, valCol2.vector[3]); + } + + @Test + void generateOuterNullsRepeatedAllCallsClearValueAtIndexZeroForEachMappedColumn() throws HiveException { + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {0}; + op.smallTableValueColumnMap = new int[] {1}; + + VectorizedRowBatch batch = new VectorizedRowBatch(2, 4); + TrackingLongColumnVector keyCol = new TrackingLongColumnVector(4); + TrackingLongColumnVector valCol = new TrackingLongColumnVector(4); + keyCol.vector[0] = 42L; + valCol.vector[0] = 84L; + batch.cols[0] = keyCol; + batch.cols[1] = valCol; + + op.generateOuterNullsRepeatedAll(batch); + + assertEquals(Arrays.asList(0), keyCol.clearedIndices); + assertEquals(Arrays.asList(0), valCol.clearedIndices); + + // isRepeating is set by the operator, not by clearValue. + assertFalse(keyCol.noNulls); + assertTrue(keyCol.isNull[0]); + assertTrue(keyCol.isRepeating); + assertFalse(valCol.noNulls); + assertTrue(valCol.isNull[0]); + assertTrue(valCol.isRepeating); + + assertEquals(0L, keyCol.vector[0]); + assertEquals(0L, valCol.vector[0]); + } + + @Test + void generateOuterNullsSetsBookkeepingOnTypeWithNoClearSlotValueOverride() throws HiveException, IOException { + // VoidColumnVector inherits the base no-op clearSlotValue — verifies the + // operator still drives the null-marking through clearValue() on a type + // without a per-slot value to zero. + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {}; + op.smallTableValueColumnMap = new int[] {0}; + + VectorizedRowBatch batch = new VectorizedRowBatch(1, 4); + VoidColumnVector voidCol = new VoidColumnVector(4); + batch.cols[0] = voidCol; + + int[] noMatchs = new int[] {1, 3}; + op.generateOuterNulls(batch, noMatchs, noMatchs.length); + + assertFalse(voidCol.noNulls); + assertTrue(voidCol.isNull[1]); + assertTrue(voidCol.isNull[3]); + assertFalse(voidCol.isNull[0]); + assertFalse(voidCol.isNull[2]); + } + + /** + * For each {@link ColumnVector} subclass whose {@code clearSlotValue} is + * overridden, verifies the operator's call through {@code clearValue} reaches + * the override and clears the slot to the type's cleared state. + */ + @ParameterizedTest(name = "{0}") + @MethodSource("modifiedColumnVectorTypes") + void generateOuterNullsClearsSlotForEachModifiedType( + String typeName, + ColumnVector cv, + Runnable preLoad, + Runnable assertSlotCleared) throws HiveException, IOException { + + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {}; + op.smallTableValueColumnMap = new int[] {0}; + + VectorizedRowBatch batch = new VectorizedRowBatch(1, 4); + preLoad.run(); + batch.cols[0] = cv; + + int[] noMatchs = new int[] {2}; + op.generateOuterNulls(batch, noMatchs, noMatchs.length); + + assertTrue(cv.isNull[2]); + assertFalse(cv.noNulls); + assertSlotCleared.run(); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("modifiedColumnVectorTypesAtSlotZero") + void generateOuterNullsRepeatedAllClearsSlotForEachModifiedType( + String typeName, + ColumnVector cv, + Runnable preLoad, + Runnable assertSlotCleared) throws HiveException { + + TestableOuterOp op = new TestableOuterOp(); + op.outerSmallTableKeyColumnMap = new int[] {}; + op.smallTableValueColumnMap = new int[] {0}; + + VectorizedRowBatch batch = new VectorizedRowBatch(1, 4); + preLoad.run(); + batch.cols[0] = cv; + + op.generateOuterNullsRepeatedAll(batch); + + assertTrue(cv.isNull[0]); + assertFalse(cv.noNulls); + assertTrue(cv.isRepeating); + assertSlotCleared.run(); + } + + static Stream modifiedColumnVectorTypesAtSlotZero() { + final LongColumnVector longCv = new LongColumnVector(4); + final DoubleColumnVector doubleCv = new DoubleColumnVector(4); + final BytesColumnVector bytesCv = new BytesColumnVector(4); + final DecimalColumnVector decCv = new DecimalColumnVector(4, 18, 4); + final Decimal64ColumnVector dec64Cv = new Decimal64ColumnVector(4, 18, 4); + final TimestampColumnVector tsCv = new TimestampColumnVector(4); + final IntervalDayTimeColumnVector ivCv = new IntervalDayTimeColumnVector(4); + + return Stream.of( + Arguments.of( + "LongColumnVector", + longCv, + (Runnable) () -> longCv.vector[0] = 999L, + (Runnable) () -> assertEquals(0L, longCv.vector[0])), + Arguments.of( + "DoubleColumnVector", + doubleCv, + (Runnable) () -> doubleCv.vector[0] = 3.14, + (Runnable) () -> assertEquals(0.0, doubleCv.vector[0])), + Arguments.of( + "BytesColumnVector", + bytesCv, + (Runnable) () -> { + bytesCv.vector[0] = "stale".getBytes(StandardCharsets.UTF_8); + bytesCv.start[0] = 1; + bytesCv.length[0] = 3; + }, + (Runnable) () -> { + assertNull(bytesCv.vector[0]); + assertEquals(0, bytesCv.start[0]); + assertEquals(0, bytesCv.length[0]); + }), + Arguments.of( + "DecimalColumnVector", + decCv, + (Runnable) () -> decCv.vector[0].setFromLong(999L), + (Runnable) () -> assertEquals(0L, decCv.vector[0].serialize64(decCv.scale))), + Arguments.of( + "Decimal64ColumnVector", + dec64Cv, + (Runnable) () -> dec64Cv.vector[0] = 999L, + (Runnable) () -> assertEquals(0L, dec64Cv.vector[0])), + Arguments.of( + "TimestampColumnVector", + tsCv, + (Runnable) () -> { + tsCv.time[0] = 1234567890000L; + tsCv.nanos[0] = 999; + }, + (Runnable) () -> { + assertEquals(0L, tsCv.time[0]); + assertEquals(1, tsCv.nanos[0]); + }), + Arguments.of( + "IntervalDayTimeColumnVector", + ivCv, + (Runnable) () -> ivCv.set(0, new HiveIntervalDayTime(5, 0)), + (Runnable) () -> { + assertEquals(0L, ivCv.getTotalSeconds(0)); + assertEquals(1, ivCv.getNanos(0)); + }) + ); + } + + static Stream modifiedColumnVectorTypes() { + final LongColumnVector longCv = new LongColumnVector(4); + final DoubleColumnVector doubleCv = new DoubleColumnVector(4); + final BytesColumnVector bytesCv = new BytesColumnVector(4); + final DecimalColumnVector decCv = new DecimalColumnVector(4, 18, 4); + final Decimal64ColumnVector dec64Cv = new Decimal64ColumnVector(4, 18, 4); + final TimestampColumnVector tsCv = new TimestampColumnVector(4); + final IntervalDayTimeColumnVector ivCv = new IntervalDayTimeColumnVector(4); + + return Stream.of( + Arguments.of( + "LongColumnVector", + longCv, + (Runnable) () -> longCv.vector[2] = 999L, + (Runnable) () -> assertEquals(0L, longCv.vector[2])), + Arguments.of( + "DoubleColumnVector", + doubleCv, + (Runnable) () -> doubleCv.vector[2] = 3.14, + (Runnable) () -> assertEquals(0.0, doubleCv.vector[2])), + Arguments.of( + "BytesColumnVector", + bytesCv, + (Runnable) () -> { + bytesCv.vector[2] = "stale".getBytes(StandardCharsets.UTF_8); + bytesCv.start[2] = 1; + bytesCv.length[2] = 3; + }, + (Runnable) () -> { + assertNull(bytesCv.vector[2]); + assertEquals(0, bytesCv.start[2]); + assertEquals(0, bytesCv.length[2]); + }), + Arguments.of( + "DecimalColumnVector", + decCv, + (Runnable) () -> decCv.vector[2].setFromLong(999L), + (Runnable) () -> assertEquals(0L, decCv.vector[2].serialize64(decCv.scale))), + Arguments.of( + "Decimal64ColumnVector", + dec64Cv, + (Runnable) () -> dec64Cv.vector[2] = 999L, + (Runnable) () -> assertEquals(0L, dec64Cv.vector[2])), + Arguments.of( + "TimestampColumnVector", + tsCv, + (Runnable) () -> { + tsCv.time[2] = 1234567890000L; + tsCv.nanos[2] = 999; + }, + (Runnable) () -> { + // setNullValue convention: time = 0, nanos = 1 + assertEquals(0L, tsCv.time[2]); + assertEquals(1, tsCv.nanos[2]); + }), + Arguments.of( + "IntervalDayTimeColumnVector", + ivCv, + (Runnable) () -> ivCv.set(2, new HiveIntervalDayTime(5, 0)), + (Runnable) () -> { + // setNullValue convention: totalSeconds = 0, nanos = 1 + assertEquals(0L, ivCv.getTotalSeconds(2)); + assertEquals(1, ivCv.getNanos(2)); + }) + ); + } +} diff --git a/ql/src/test/queries/clientpositive/vector_outer_join7.q b/ql/src/test/queries/clientpositive/vector_outer_join7.q new file mode 100644 index 000000000000..141d8c3c68bd --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_outer_join7.q @@ -0,0 +1,43 @@ +SET hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; + +-- SORT_QUERY_RESULTS + +-- HIVE-29598: regression test for stale scratch-slot values in vectorized +-- outer-join MapJoin. MAX() acts as an aggregation barrier so Calcite cannot +-- inline the inner expression and simplify the bug surface away. + +CREATE TABLE t (k STRING, v STRING) STORED AS ORC; + +INSERT INTO t VALUES + ('A','1'),('A','2'),('A','3'), + ('B','2'),('B','3'), + ('C','3'), + ('D','1'),('D','3'); + +WITH + probe AS ( + SELECT k, v, (CAST(v AS INT) > 0) AS p_bool + FROM t WHERE CAST(v AS INT) >= 3 + ), + small_side AS ( + SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool + FROM t + ), + classified AS ( + SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value + FROM probe p + LEFT JOIN small_side s + ON p.k = s.k + AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT) + ), + diagnosed AS ( + SELECT k, v, MAX(observed_value) AS observed_value + FROM classified + GROUP BY k, v + ) +SELECT k, v, + observed_value AS observed_value_returned_by_select, + 1 AS required_value_per_sql_semantics +FROM diagnosed +WHERE observed_value = 0; diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out new file mode 100644 index 000000000000..df755cfe4732 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join7.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: CREATE TABLE t (k STRING, v STRING) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: INSERT INTO t VALUES + ('A','1'),('A','2'),('A','3'), + ('B','2'),('B','3'), + ('C','3'), + ('D','1'),('D','3') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: INSERT INTO t VALUES + ('A','1'),('A','2'),('A','3'), + ('B','2'),('B','3'), + ('C','3'), + ('D','1'),('D','3') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.k SCRIPT [] +POSTHOOK: Lineage: t.v SCRIPT [] +PREHOOK: query: WITH + probe AS ( + SELECT k, v, (CAST(v AS INT) > 0) AS p_bool + FROM t WHERE CAST(v AS INT) >= 3 + ), + small_side AS ( + SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool + FROM t + ), + classified AS ( + SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value + FROM probe p + LEFT JOIN small_side s + ON p.k = s.k + AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT) + ), + diagnosed AS ( + SELECT k, v, MAX(observed_value) AS observed_value + FROM classified + GROUP BY k, v + ) +SELECT k, v, + observed_value AS observed_value_returned_by_select, + 1 AS required_value_per_sql_semantics +FROM diagnosed +WHERE observed_value = 0 +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: WITH + probe AS ( + SELECT k, v, (CAST(v AS INT) > 0) AS p_bool + FROM t WHERE CAST(v AS INT) >= 3 + ), + small_side AS ( + SELECT k, v, (CAST(v AS INT) > 9999) AS s_bool + FROM t + ), + classified AS ( + SELECT p.k, p.v, CAST((s.s_bool OR p.p_bool) AS INT) AS observed_value + FROM probe p + LEFT JOIN small_side s + ON p.k = s.k + AND CAST(p.v AS INT) - 1 = CAST(s.v AS INT) + ), + diagnosed AS ( + SELECT k, v, MAX(observed_value) AS observed_value + FROM classified + GROUP BY k, v + ) +SELECT k, v, + observed_value AS observed_value_returned_by_select, + 1 AS required_value_per_sql_semantics +FROM diagnosed +WHERE observed_value = 0 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index ec98d2ab5b8c..cce0cb9ad949 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -504,6 +504,13 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum] = null; + start[elementNum] = 0; + length[elementNum] = 0; + } + @Override public void init() { initBuffer(0); diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 9f611dfd313b..ee5e3f3885ee 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -231,6 +231,32 @@ public abstract void setElement(int outputElementNum, int inputElementNum, public abstract void copySelected( boolean selectedInUse, int[] sel, int size, ColumnVector outputColVector); + /** + * Mark the slot null and put its underlying value into a defined cleared + * state. Sets {@code isNull[elementNum] = true} and {@code noNulls = false}, + * then dispatches to {@link #clearSlotValue(int)} for per-type clearing. + * + *

Defends against consumers that read {@code vector[i]} without first + * checking {@code isNull[i]}. Distinct from per-type {@code NULL_VALUE} + * sentinels (e.g. {@link LongColumnVector#NULL_VALUE}), which assume the + * isNull[]-first contract. Final by design — subclasses customize behavior + * by overriding {@link #clearSlotValue(int)}, never this method. + */ + public final void clearValue(int elementNum) { + noNulls = false; + isNull[elementNum] = true; + clearSlotValue(elementNum); + } + + /** + * Per-type slot-clearing hook invoked by {@link #clearValue(int)}. + * Subclasses override to zero out their value array at {@code elementNum}. + * Container and void types inherit the no-op default. + */ + protected void clearSlotValue(int elementNum) { + // Default no-op. + } + /** * Initialize the column vector. This method can be overridden by specific column vector types. * Use this method only if the individual type of the column vector is not known, otherwise its diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 5defd27623b2..e0cdd76de156 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -144,6 +144,11 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum].setFromLong(0L); + } + @Override public void stringifyValue(StringBuilder buffer, int row) { if (isRepeating) { diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index f833bde03f6e..fcf297585c63 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -220,6 +220,11 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum] = 0.0; + } + @Override public void stringifyValue(StringBuilder buffer, int row) { if (isRepeating) { diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java index 9324bc0c610d..2b61b09e38cb 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/IntervalDayTimeColumnVector.java @@ -311,6 +311,11 @@ public void setNullValue(int elementNum) { nanos[elementNum] = 1; } + @Override + protected void clearSlotValue(int elementNum) { + setNullValue(elementNum); + } + // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. @Override diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index bf423674b2aa..dc727b462a74 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -294,6 +294,11 @@ public void setElement(int outputElementNum, int inputElementNum, ColumnVector i } } + @Override + protected void clearSlotValue(int elementNum) { + vector[elementNum] = 0L; + } + @Override public void stringifyValue(StringBuilder buffer, int row) { if (isRepeating) { diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java index f97156c40381..c49149cdb281 100644 --- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java +++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/TimestampColumnVector.java @@ -378,6 +378,11 @@ public void setNullValue(int elementNum) { nanos[elementNum] = 1; } + @Override + protected void clearSlotValue(int elementNum) { + setNullValue(elementNum); + } + // Copy the current object contents into the output. Only copy selected entries, // as indicated by selectedInUse and the sel array. @Override diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java index be4ff70935a7..dc9c045727de 100644 --- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestBytesColumnVector.java @@ -25,7 +25,9 @@ import org.junit.Test; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertSame; import static org.junit.Assert.assertTrue; @@ -220,4 +222,31 @@ private static byte[] writeToBytesColumnVector(int rowIdx, BytesColumnVector col col.setValPreallocated(rowIdx, writeSize); return bytes; } + + @Test + public void testClearValue() { + BytesColumnVector cv = new BytesColumnVector(4); + byte[] data = "hello".getBytes(StandardCharsets.UTF_8); + cv.vector[0] = data; + cv.start[0] = 1; + cv.length[0] = 3; + + byte[] neighborData = "world".getBytes(StandardCharsets.UTF_8); + cv.vector[1] = neighborData; + cv.start[1] = 2; + cv.length[1] = 4; + + cv.clearValue(0); + + assertTrue(cv.isNull[0]); + assertFalse(cv.noNulls); + assertNull(cv.vector[0]); + assertEquals(0, cv.start[0]); + assertEquals(0, cv.length[0]); + + assertSame(neighborData, cv.vector[1]); + assertEquals(2, cv.start[1]); + assertEquals(4, cv.length[1]); + assertFalse(cv.isNull[1]); + } } diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java new file mode 100644 index 000000000000..2644ff8f1bcb --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDecimalColumnVector.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestDecimalColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + DecimalColumnVector cv = new DecimalColumnVector(4, 18, 4); + cv.vector[1].setFromLong(12345L); + cv.vector[2].setFromLong(67890L); + + cv.clearValue(1); + + assertTrue(cv.isNull[1]); + assertFalse(cv.noNulls); + assertEquals(0L, cv.vector[1].serialize64(cv.scale)); + // Neighbour slot untouched: still represents 67890. + assertEquals(67890L, cv.vector[2].serialize64((short) 0)); + assertFalse(cv.isNull[2]); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java new file mode 100644 index 000000000000..a67ff94ef327 --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestDoubleColumnVector.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestDoubleColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + DoubleColumnVector cv = new DoubleColumnVector(4); + cv.vector[1] = 3.14; + cv.vector[0] = 1.5; + cv.vector[3] = -2.5; + + cv.clearValue(1); + + assertTrue(cv.isNull[1]); + assertFalse(cv.noNulls); + assertEquals(0.0, cv.vector[1]); + assertEquals(1.5, cv.vector[0]); + assertEquals(-2.5, cv.vector[3]); + assertFalse(cv.isNull[0]); + assertFalse(cv.isNull[3]); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java new file mode 100644 index 000000000000..d715508e1484 --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestIntervalDayTimeColumnVector.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestIntervalDayTimeColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + IntervalDayTimeColumnVector cv = new IntervalDayTimeColumnVector(4); + cv.set(3, new HiveIntervalDayTime(5, 0)); + + cv.clearValue(3); + + assertTrue(cv.isNull[3]); + assertFalse(cv.noNulls); + // setNullValue convention: totalSeconds = 0, nanos = 1 + assertEquals(0L, cv.getTotalSeconds(3)); + assertEquals(1, cv.getNanos(3)); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java new file mode 100644 index 000000000000..c1c8acc25e97 --- /dev/null +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestLongColumnVector.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class TestLongColumnVector { + + @Test + void clearValueZeroesSlotAndMarksNull() { + LongColumnVector cv = new LongColumnVector(4); + cv.vector[2] = 2025L; + cv.vector[1] = 7L; + cv.vector[3] = 9L; + + cv.clearValue(2); + + assertTrue(cv.isNull[2]); + assertFalse(cv.noNulls); + assertEquals(0L, cv.vector[2]); + assertEquals(7L, cv.vector[1]); + assertEquals(9L, cv.vector[3]); + assertFalse(cv.isNull[1]); + assertFalse(cv.isNull[3]); + } +} diff --git a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java index 2d85b115d244..dda52797246a 100644 --- a/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java +++ b/storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.sql.Timestamp; @@ -246,4 +248,18 @@ private Thread startVectorManipulationThread(final int vectorLength, final long return thread; } + @Test + public void testClearValue() { + TimestampColumnVector cv = new TimestampColumnVector(4); + cv.time[2] = 1234567890000L; + cv.nanos[2] = 999; + + cv.clearValue(2); + + assertTrue(cv.isNull[2]); + assertFalse(cv.noNulls); + // setNullValue convention: time = 0, nanos = 1 + assertEquals(0L, cv.time[2]); + assertEquals(1, cv.nanos[2]); + } }