-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-56903][SQL] Spread NULL outer join keys across shuffle partitions #55927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
aa7138a
7fe801c
6c9aa3b
7c760ec
400cfbe
cbca318
9eb4ae4
4a021b6
59819f4
ad70bf1
c0dc4d6
f551dab
76bbb03
e778497
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,12 +81,17 @@ case object AllTuples extends Distribution { | |
| * | ||
| * @param requireAllClusterKeys When true, `Partitioning` which satisfies this distribution, | ||
| * must match all `clustering` expressions in the same ordering. | ||
| * @param allowNullKeySpreading When true, the default partitioning may spread rows whose | ||
| * clustering keys contain NULL values. This is a permission for | ||
| * consumers that do not require NULL-key co-location; ordinary | ||
| * [[HashPartitioning]] can still satisfy this distribution. | ||
| */ | ||
| case class ClusteredDistribution( | ||
| clustering: Seq[Expression], | ||
| requireAllClusterKeys: Boolean = SQLConf.get.getConf( | ||
| SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_DISTRIBUTION), | ||
| requiredNumPartitions: Option[Int] = None) extends Distribution { | ||
| requiredNumPartitions: Option[Int] = None, | ||
| allowNullKeySpreading: Boolean = false) extends Distribution { | ||
| require( | ||
| clustering != Nil, | ||
| "The clustering expressions of a ClusteredDistribution should not be Nil. " + | ||
|
|
@@ -97,7 +102,11 @@ case class ClusteredDistribution( | |
| assert(requiredNumPartitions.isEmpty || requiredNumPartitions.get == numPartitions, | ||
| s"This ClusteredDistribution requires ${requiredNumPartitions.get} partitions, but " + | ||
| s"the actual number of partitions is $numPartitions.") | ||
| HashPartitioning(clustering, numPartitions) | ||
| if (allowNullKeySpreading) { | ||
| NullAwareHashPartitioning(clustering, numPartitions) | ||
| } else { | ||
| HashPartitioning(clustering, numPartitions) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -282,7 +291,7 @@ trait HashPartitioningLike extends Expression with Partitioning with Unevaluable | |
| expressions.length == h.expressions.length && expressions.zip(h.expressions).forall { | ||
| case (l, r) => l.semanticEquals(r) | ||
| } | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _) => | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _, _) => | ||
| if (requireAllClusterKeys) { | ||
| // Checks `HashPartitioning` is partitioned on exactly same clustering keys of | ||
| // `ClusteredDistribution`. | ||
|
|
@@ -324,6 +333,45 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int) | |
| newChildren: IndexedSeq[Expression]): HashPartitioning = copy(expressions = newChildren) | ||
| } | ||
|
|
||
| /** | ||
| * Represents a hash partitioning for equi-join inputs where rows with a NULL join key do not need | ||
| * to be co-located. Non-NULL join keys preserve the same partitioning contract as | ||
| * [[HashPartitioning]], while rows with any NULL join key may be spread across partitions. As a | ||
| * result, this partitioning intentionally does not satisfy a strict [[ClusteredDistribution]]. | ||
| */ | ||
| case class NullAwareHashPartitioning(expressions: Seq[Expression], numPartitions: Int) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Design alternative worth considering: a The marker this carries is one bit ("NULL keys may be spread, so I don't deliver strict same-key co-location"). Encoding it as a parallel type means duplicating With a flag:
The one argument for distinct types is EXPLAIN-string visibility — a one-line Separately on this class's Scaladoc: worth calling out that
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea this is an alternative design. The pros and cons are: Pros:
Cons:
I'm a bit concerned about the cons since |
||
| extends HashPartitioningLike { | ||
|
|
||
| override def satisfies0(required: Distribution): Boolean = { | ||
| (required match { | ||
| case UnspecifiedDistribution => true | ||
| case AllTuples => numPartitions == 1 | ||
| case _ => false | ||
| }) || { | ||
| // Stateful operators require strict NULL-key co-location and therefore cannot consume | ||
| // null-aware hash partitioning as a compatible clustered layout. | ||
| required match { | ||
| case c @ ClusteredDistribution( | ||
| requiredClustering, requireAllClusterKeys, _, allowNullKeySpreading) | ||
| if allowNullKeySpreading => | ||
| if (requireAllClusterKeys) { | ||
| c.areAllClusterKeysMatched(expressions) | ||
| } else { | ||
| expressions.forall(x => requiredClustering.exists(_.semanticEquals(x))) | ||
| } | ||
| case _ => false | ||
| } | ||
| } | ||
| } | ||
|
|
||
| override def createShuffleSpec(distribution: ClusteredDistribution): ShuffleSpec = | ||
| NullAwareHashShuffleSpec(this, distribution) | ||
|
|
||
| override protected def withNewChildrenInternal( | ||
| newChildren: IndexedSeq[Expression]): NullAwareHashPartitioning = | ||
| copy(expressions = newChildren) | ||
| } | ||
|
|
||
| case class CoalescedBoundary(startReducerIndex: Int, endReducerIndex: Int) | ||
|
|
||
| /** | ||
|
|
@@ -345,6 +393,47 @@ case class CoalescedHashPartitioning(from: HashPartitioning, partitions: Seq[Coa | |
| copy(from = from.copy(expressions = newChildren)) | ||
| } | ||
|
|
||
| /** | ||
| * Represents a null-aware hash partitioning whose reducer ranges have been coalesced into fewer | ||
| * partitions. It preserves the same relaxed NULL-key co-location contract as | ||
| * [[NullAwareHashPartitioning]]. | ||
| */ | ||
| case class CoalescedNullAwareHashPartitioning( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missing Scaladoc here (and on
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added |
||
| from: NullAwareHashPartitioning, | ||
| partitions: Seq[CoalescedBoundary]) extends HashPartitioningLike { | ||
|
|
||
| override def expressions: Seq[Expression] = from.expressions | ||
|
|
||
| override def satisfies0(required: Distribution): Boolean = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This body is identical to Two cleaner shapes:
Side note: both overrides skip the |
||
| (required match { | ||
| case UnspecifiedDistribution => true | ||
| case AllTuples => numPartitions == 1 | ||
| case _ => false | ||
| }) || { | ||
| required match { | ||
| case c @ ClusteredDistribution( | ||
| requiredClustering, requireAllClusterKeys, _, allowNullKeySpreading) | ||
| if allowNullKeySpreading => | ||
| if (requireAllClusterKeys) { | ||
| c.areAllClusterKeysMatched(expressions) | ||
| } else { | ||
| expressions.forall(x => requiredClustering.exists(_.semanticEquals(x))) | ||
| } | ||
| case _ => false | ||
| } | ||
| } | ||
| } | ||
|
|
||
| override def createShuffleSpec(distribution: ClusteredDistribution): ShuffleSpec = | ||
| CoalescedHashShuffleSpec(from.createShuffleSpec(distribution), partitions) | ||
|
|
||
| override val numPartitions: Int = partitions.length | ||
|
|
||
| override protected def withNewChildrenInternal( | ||
| newChildren: IndexedSeq[Expression]): CoalescedNullAwareHashPartitioning = | ||
| copy(from = from.copy(expressions = newChildren)) | ||
| } | ||
|
|
||
| /** | ||
| * Represents a partitioning where rows are split across partitions based on transforms defined by | ||
| * `expressions`. | ||
|
|
@@ -482,7 +571,7 @@ case class KeyedPartitioning( | |
|
|
||
| def groupedSatisfies(required: Distribution): Boolean = { | ||
| required match { | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _) => | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _, _) => | ||
| if (requireAllClusterKeys) { | ||
| // Checks whether this partitioning is partitioned on exactly same clustering keys of | ||
| // `ClusteredDistribution`. | ||
|
|
@@ -657,7 +746,7 @@ case class RangePartitioning(ordering: Seq[SortOrder], numPartitions: Int) | |
| // `RangePartitioning(a, b, c)` satisfies `OrderedDistribution(a, b)`. | ||
| val minSize = Seq(requiredOrdering.size, ordering.size).min | ||
| requiredOrdering.take(minSize) == ordering.take(minSize) | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _) => | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _, _) => | ||
| val expressions = ordering.map(_.child) | ||
| if (requireAllClusterKeys) { | ||
| // Checks `RangePartitioning` is partitioned on exactly same clustering keys of | ||
|
|
@@ -782,7 +871,7 @@ case class ShufflePartitionIdPassThrough( | |
| super.satisfies0(required) || { | ||
| required match { | ||
| // TODO(SPARK-53428): Support Direct Passthrough Partitioning in the Streaming Joins | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _) => | ||
| case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _, _) => | ||
| val partitioningExpressions = expr.child :: Nil | ||
| if (requireAllClusterKeys) { | ||
| c.areAllClusterKeysMatched(partitioningExpressions) | ||
|
|
@@ -863,6 +952,25 @@ case class RangeShuffleSpec( | |
| } | ||
| } | ||
|
|
||
| private object HashShuffleSpecCompatibility { | ||
| def isCompatible( | ||
| leftDistribution: ClusteredDistribution, | ||
| leftNumPartitions: Int, | ||
| leftExpressions: Seq[Expression], | ||
| leftHashKeyPositions: Seq[mutable.BitSet], | ||
| rightDistribution: ClusteredDistribution, | ||
| rightNumPartitions: Int, | ||
| rightExpressions: Seq[Expression], | ||
| rightHashKeyPositions: Seq[mutable.BitSet]): Boolean = { | ||
| leftDistribution.clustering.length == rightDistribution.clustering.length && | ||
| leftNumPartitions == rightNumPartitions && | ||
| leftExpressions.length == rightExpressions.length && | ||
| leftHashKeyPositions.zip(rightHashKeyPositions).forall { case (left, right) => | ||
| left.intersect(right).nonEmpty | ||
| } | ||
| } | ||
| } | ||
|
|
||
| case class HashShuffleSpec( | ||
| partitioning: HashPartitioning, | ||
| distribution: ClusteredDistribution) extends ShuffleSpec { | ||
|
|
@@ -895,14 +1003,26 @@ case class HashShuffleSpec( | |
| // 3. both partitioning have the same number of expressions | ||
| // 4. each pair of partitioning expression from both sides has overlapping positions in their | ||
| // corresponding distributions. | ||
| distribution.clustering.length == otherDistribution.clustering.length && | ||
| partitioning.numPartitions == otherPartitioning.numPartitions && | ||
| partitioning.expressions.length == otherPartitioning.expressions.length && { | ||
| val otherHashKeyPositions = otherHashSpec.hashKeyPositions | ||
| hashKeyPositions.zip(otherHashKeyPositions).forall { case (left, right) => | ||
| left.intersect(right).nonEmpty | ||
| } | ||
| } | ||
| HashShuffleSpecCompatibility.isCompatible( | ||
| distribution, | ||
| partitioning.numPartitions, | ||
| partitioning.expressions, | ||
| hashKeyPositions, | ||
| otherDistribution, | ||
| otherPartitioning.numPartitions, | ||
| otherPartitioning.expressions, | ||
| otherHashSpec.hashKeyPositions) | ||
| case otherNullAwareSpec @ NullAwareHashShuffleSpec(otherPartitioning, otherDistribution) | ||
| if distribution.allowNullKeySpreading && otherDistribution.allowNullKeySpreading => | ||
| HashShuffleSpecCompatibility.isCompatible( | ||
| distribution, | ||
| partitioning.numPartitions, | ||
| partitioning.expressions, | ||
| hashKeyPositions, | ||
| otherDistribution, | ||
| otherPartitioning.numPartitions, | ||
| otherPartitioning.expressions, | ||
| otherNullAwareSpec.hashKeyPositions) | ||
| case ShuffleSpecCollection(specs) => | ||
| specs.exists(isCompatibleWith) | ||
| case _ => | ||
|
|
@@ -923,7 +1043,73 @@ case class HashShuffleSpec( | |
|
|
||
| override def createPartitioning(clustering: Seq[Expression]): Partitioning = { | ||
| val exprs = hashKeyPositions.map(v => clustering(v.head)) | ||
| HashPartitioning(exprs, partitioning.numPartitions) | ||
| if (distribution.allowNullKeySpreading) { | ||
| NullAwareHashPartitioning(exprs, partitioning.numPartitions) | ||
| } else { | ||
| HashPartitioning(exprs, partitioning.numPartitions) | ||
| } | ||
| } | ||
|
|
||
| override def numPartitions: Int = partitioning.numPartitions | ||
| } | ||
|
|
||
| /** | ||
| * Shuffle specification for [[NullAwareHashPartitioning]]. It is compatible only with shuffle | ||
| * layouts whose distributions explicitly allow NULL-key spreading. | ||
| */ | ||
| case class NullAwareHashShuffleSpec( | ||
| partitioning: NullAwareHashPartitioning, | ||
| distribution: ClusteredDistribution) extends ShuffleSpec { | ||
|
|
||
| lazy val hashKeyPositions: Seq[mutable.BitSet] = { | ||
| val distKeyToPos = mutable.Map.empty[Expression, mutable.BitSet] | ||
| distribution.clustering.zipWithIndex.foreach { case (distKey, distKeyPos) => | ||
| distKeyToPos.getOrElseUpdate(distKey.canonicalized, mutable.BitSet.empty).add(distKeyPos) | ||
| } | ||
| partitioning.expressions.map(k => distKeyToPos.getOrElse(k.canonicalized, mutable.BitSet.empty)) | ||
| } | ||
|
|
||
| override def isCompatibleWith(other: ShuffleSpec): Boolean = other match { | ||
|
peter-toth marked this conversation as resolved.
|
||
| case SinglePartitionShuffleSpec => | ||
| partitioning.numPartitions == 1 | ||
| case otherSpec @ NullAwareHashShuffleSpec(otherPartitioning, otherDistribution) => | ||
| HashShuffleSpecCompatibility.isCompatible( | ||
| distribution, | ||
| partitioning.numPartitions, | ||
| partitioning.expressions, | ||
| hashKeyPositions, | ||
| otherDistribution, | ||
| otherPartitioning.numPartitions, | ||
| otherPartitioning.expressions, | ||
| otherSpec.hashKeyPositions) | ||
| case otherHashSpec @ HashShuffleSpec(otherPartitioning, otherDistribution) | ||
| if distribution.allowNullKeySpreading && otherDistribution.allowNullKeySpreading => | ||
| HashShuffleSpecCompatibility.isCompatible( | ||
| distribution, | ||
| partitioning.numPartitions, | ||
| partitioning.expressions, | ||
| hashKeyPositions, | ||
| otherDistribution, | ||
| otherPartitioning.numPartitions, | ||
| otherPartitioning.expressions, | ||
| otherHashSpec.hashKeyPositions) | ||
| case ShuffleSpecCollection(specs) => | ||
| specs.exists(isCompatibleWith) | ||
| case _ => | ||
| false | ||
| } | ||
|
|
||
| override def canCreatePartitioning: Boolean = { | ||
| if (SQLConf.get.getConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION)) { | ||
| distribution.areAllClusterKeysMatched(partitioning.expressions) | ||
| } else { | ||
| true | ||
| } | ||
| } | ||
|
|
||
| override def createPartitioning(clustering: Seq[Expression]): Partitioning = { | ||
| val exprs = hashKeyPositions.map(v => clustering(v.head)) | ||
| NullAwareHashPartitioning(exprs, partitioning.numPartitions) | ||
| } | ||
|
|
||
| override def numPartitions: Int = partitioning.numPartitions | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Worth a Scaladoc on this field describing the contract: it's a permission, not a requirement (an ordinary
HashPartitioningstill satisfies this distribution when the flag istrue; the flag only weakens what the default partitioning produced bycreatePartitioninglooks like). And it's the consumer-side knob — the partitioning-side marker (NullAwareHashPartitioningtoday, or a flag onHashPartitioningper the comment below) is what tells downstream operators they need to re-shuffle for strictClusteredDistribution.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added