-
Notifications
You must be signed in to change notification settings - Fork 29.2k
[SPARK-56920][SQL] Support METRIC_VIEW creation on V2 catalogs #55487
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 16 commits
d275e7c
b575053
0509d5b
fcdc19f
aeee6a7
27f4368
a1c2927
d4474e7
0b26f95
437b71d
66b13eb
e980f24
fd3d8b2
90deee1
f2251d8
ed2db22
f22b27d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,58 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connector.catalog; | ||
|
|
||
| import org.apache.spark.annotation.Evolving; | ||
|
|
||
| /** | ||
| * Represents a dependency of a SQL object such as a view or metric view. | ||
| * <p> | ||
| * A dependency is one of: {@link TableDependency} or {@link FunctionDependency}. The | ||
| * {@code sealed} declaration enforces this structurally. | ||
| * <p> | ||
| * Note: today the only producer in Spark itself is metric-view dependency extraction, which | ||
| * emits {@link TableDependency} only. {@link FunctionDependency} and the | ||
| * {@link #function(String[])} factory are exposed as groundwork for future producers | ||
| * (e.g. SQL UDF dependency tracking); consumers iterating a {@link DependencyList} received | ||
| * from Spark today should expect to see only {@link TableDependency} instances. | ||
| * | ||
| * @since 4.2.0 | ||
| */ | ||
| @Evolving | ||
| public sealed interface Dependency permits TableDependency, FunctionDependency { | ||
|
|
||
| /** | ||
| * Construct a {@link TableDependency} from the structural multi-part name of the dependent | ||
| * table. {@code nameParts} should contain at least one element; for catalog-managed tables | ||
| * the first element is typically the catalog name and subsequent elements are namespace | ||
| * components followed by the table name. | ||
| */ | ||
| static TableDependency table(String[] nameParts) { | ||
| return new TableDependency(nameParts); | ||
| } | ||
|
|
||
| /** | ||
| * Construct a {@link FunctionDependency} from the structural multi-part name of the | ||
| * dependent function. {@code nameParts} should contain at least one element; for | ||
| * catalog-managed functions the first element is typically the catalog name and subsequent | ||
| * elements are namespace components followed by the function name. | ||
| */ | ||
| static FunctionDependency function(String[] nameParts) { | ||
| return new FunctionDependency(nameParts); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,75 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connector.catalog; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.Objects; | ||
|
|
||
| import org.apache.spark.annotation.Evolving; | ||
|
|
||
| /** | ||
| * A list of dependencies for a SQL object such as a view or metric view. | ||
| * <p> | ||
| * <ul> | ||
| * <li>When {@code null}, the dependency information is not provided.</li> | ||
| * <li>When the array is empty, dependencies are provided but the object has none.</li> | ||
| * <li>When the array is non-empty, each entry describes one dependency.</li> | ||
| * </ul> | ||
| * <p> | ||
| * Records' auto-generated {@code equals}/{@code hashCode} on array fields fall through to | ||
| * {@link Object#equals} (reference equality), so this record overrides them to use | ||
| * {@link Arrays#equals(Object[], Object[])} / {@link Arrays#hashCode(Object[])} on | ||
| * {@code dependencies}; per-element equality delegates to the element's overridden | ||
| * {@code equals} ({@link TableDependency} / {@link FunctionDependency} both implement value | ||
| * semantics on their {@code nameParts} array). The defensive-copy accessor override clones | ||
| * on read so callers cannot mutate the record's internal array. | ||
| * | ||
| * @param dependencies array of dependencies; must contain no null elements (defensive | ||
| * copy made; not validated element-wise -- callers passing nulls will | ||
| * surface NPEs in downstream consumers) | ||
| * @since 4.2.0 | ||
| */ | ||
| @Evolving | ||
| public record DependencyList(Dependency[] dependencies) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Java records auto-generate This undermines the "structural multi-part name" intent — consumers can't dedup, compare, or use these as Map keys. Please override |
||
|
|
||
| public DependencyList { | ||
| Objects.requireNonNull(dependencies, "dependencies must not be null"); | ||
|
chenwang-databricks marked this conversation as resolved.
|
||
| dependencies = dependencies.clone(); | ||
| } | ||
|
|
||
| /** Returns a defensive copy of the underlying dependencies array. */ | ||
| @Override | ||
| public Dependency[] dependencies() { return dependencies.clone(); } | ||
|
|
||
| @Override | ||
| public boolean equals(Object o) { | ||
| return o instanceof DependencyList that && Arrays.equals(dependencies, that.dependencies); | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() { return Arrays.hashCode(dependencies); } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "DependencyList[dependencies=" + Arrays.toString(dependencies) + "]"; | ||
| } | ||
|
|
||
| public static DependencyList of(Dependency[] dependencies) { | ||
| return new DependencyList(dependencies); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connector.catalog; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.Objects; | ||
|
|
||
| import org.apache.spark.annotation.Evolving; | ||
|
|
||
| /** | ||
| * A function dependency of a SQL object. | ||
| * <p> | ||
| * The dependent function is identified by its structural multi-part name. See | ||
| * {@link TableDependency} for the parts-form contract. | ||
| * <p> | ||
| * Records' auto-generated {@code equals}/{@code hashCode} on array fields fall through to | ||
| * {@link Object#equals} (reference equality), so this record overrides them to use | ||
| * {@link Arrays#equals(Object[], Object[])} / {@link Arrays#hashCode(Object[])} on | ||
| * {@code nameParts} and give value-based semantics. The defensive-copy accessor override | ||
| * also clones on read so callers cannot mutate the record's internal array. | ||
| * | ||
| * @param nameParts structural multi-part identifier; must be non-empty and contain no | ||
| * null elements (defensive copy made; not validated element-wise -- | ||
| * callers passing nulls will surface NPEs in downstream consumers) | ||
| * @since 4.2.0 | ||
| */ | ||
| @Evolving | ||
| public record FunctionDependency(String[] nameParts) implements Dependency { | ||
| public FunctionDependency { | ||
| Objects.requireNonNull(nameParts, "nameParts must not be null"); | ||
| if (nameParts.length == 0) { | ||
| throw new IllegalArgumentException("nameParts must not be empty"); | ||
| } | ||
| nameParts = nameParts.clone(); | ||
| } | ||
|
|
||
| /** Returns a defensive copy of the underlying parts array. */ | ||
| @Override | ||
| public String[] nameParts() { return nameParts.clone(); } | ||
|
|
||
| @Override | ||
| public boolean equals(Object o) { | ||
| return o instanceof FunctionDependency that && Arrays.equals(nameParts, that.nameParts); | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() { return Arrays.hashCode(nameParts); } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "FunctionDependency[nameParts=" + Arrays.toString(nameParts) + "]"; | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| /* | ||
| * Licensed to the Apache Software Foundation (ASF) under one or more | ||
| * contributor license agreements. See the NOTICE file distributed with | ||
| * this work for additional information regarding copyright ownership. | ||
| * The ASF licenses this file to You under the Apache License, Version 2.0 | ||
| * (the "License"); you may not use this file except in compliance with | ||
| * the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package org.apache.spark.sql.connector.catalog; | ||
|
|
||
| import java.util.Arrays; | ||
| import java.util.Objects; | ||
|
|
||
| import org.apache.spark.annotation.Evolving; | ||
|
|
||
| /** | ||
| * A table dependency of a SQL object. | ||
| * <p> | ||
| * The dependent table is identified by its structural multi-part name. {@code nameParts} | ||
| * arity matches the catalog's namespace depth plus one for the table name -- for a catalog | ||
| * with single-level namespaces the parts are {@code [catalog, schema, table]}; for a catalog | ||
| * with multi-level namespaces (e.g. Iceberg with {@code db1.db2}) the parts are | ||
| * {@code [catalog, db1, db2, ..., table]}; for v1 sources resolved through the session | ||
| * catalog, producers should normalize to {@code [spark_catalog, db, table]} so consumers see | ||
| * a stable arity per source kind. The structural form preserves arity and is unambiguous | ||
| * against quoted identifiers containing a literal {@code .}; consumers that need a flat | ||
| * string should join the parts themselves with a quoting scheme appropriate to their wire | ||
| * format. | ||
| * <p> | ||
| * Records' auto-generated {@code equals}/{@code hashCode} on array fields fall through to | ||
| * {@link Object#equals} (reference equality), so this record overrides them to use | ||
| * {@link Arrays#equals(Object[], Object[])} / {@link Arrays#hashCode(Object[])} on | ||
| * {@code nameParts} and give value-based semantics. The defensive-copy accessor override | ||
| * also clones on read so callers cannot mutate the record's internal array. | ||
| * | ||
| * @param nameParts structural multi-part identifier; must be non-empty and contain no | ||
| * null elements (defensive copy made; not validated element-wise -- | ||
| * callers passing nulls will surface NPEs in downstream consumers) | ||
| * @since 4.2.0 | ||
| */ | ||
| @Evolving | ||
| public record TableDependency(String[] nameParts) implements Dependency { | ||
| public TableDependency { | ||
| Objects.requireNonNull(nameParts, "nameParts must not be null"); | ||
| if (nameParts.length == 0) { | ||
| throw new IllegalArgumentException("nameParts must not be empty"); | ||
| } | ||
| nameParts = nameParts.clone(); | ||
| } | ||
|
|
||
| /** Returns a defensive copy of the underlying parts array. */ | ||
| @Override | ||
| public String[] nameParts() { return nameParts.clone(); } | ||
|
|
||
| @Override | ||
| public boolean equals(Object o) { | ||
| return o instanceof TableDependency that && Arrays.equals(nameParts, that.nameParts); | ||
| } | ||
|
|
||
| @Override | ||
| public int hashCode() { return Arrays.hashCode(nameParts); } | ||
|
|
||
| @Override | ||
| public String toString() { | ||
| return "TableDependency[nameParts=" + Arrays.toString(nameParts) + "]"; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FunctionDependencyis in the sealedpermitslist and exposed via theDependency.function(...)factory below, but no producer in this PR ever emits one --MetricViewHelper.collectTableDependenciesonly emitsTableDependency. Two options: (a) dropFunctionDependencyuntil it has a producer (the@Evolvingannotation is meant to evolve before stabilizing, so adding it later is cheap); (b) keep it as groundwork but mention in the PR description so reviewers don't trip on the dead surface, and add a sentence to this class-level Javadoc noting that consumers may receive onlyTableDependencyinstances today.