apache
diff --git a/‎bin/spark-pipelines
Lines changed: 33 additions & 0 deletions b/‎bin/spark-pipelines
Lines changed: 33 additions & 0 deletions
diff --git a/‎dev/sparktestsupport/modules.py
Lines changed: 5 additions & 1 deletion b/‎dev/sparktestsupport/modules.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎python/mypy.ini
Lines changed: 3 additions & 0 deletions b/‎python/mypy.ini
Lines changed: 3 additions & 0 deletions
diff --git a/‎python/pyspark/errors/error-conditions.json
Lines changed: 60 additions & 0 deletions b/‎python/pyspark/errors/error-conditions.json
Lines changed: 60 additions & 0 deletions
diff --git a/‎python/pyspark/sql/pipelines/__init__.py
Lines changed: 31 additions & 0 deletions b/‎python/pyspark/sql/pipelines/__init__.py
Lines changed: 31 additions & 0 deletions
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Default to standard python3 interpreter unless told otherwise
+if [[ -z "$PYSPARK_PYTHON" ]]; then
+  PYSPARK_PYTHON=python3
+fi
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+# Add the PySpark classes to the Python path:
+export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.9-src.zip:$PYTHONPATH"
+
+${SPARK_HOME}/bin/spark-submit --conf spark.api.mode=connect "${SPARK_HOME}"/python/pyspark/sql/pipelines/cli.py "$@"
@@ -553,7 +553,11 @@ def __hash__(self):
         "pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations",
         "pyspark.sql.tests.pandas.test_pandas_udf_window",
         "pyspark.sql.tests.pandas.test_pandas_sqlmetrics",
-        "pyspark.sql.tests.pandas.test_converter",
+        "pyspark.sql.tests.pandas.test_block_connect_access",
+        "pyspark.sql.tests.pipelines.test_blocking_connect_access"
+        "pyspark.sql.tests.pipelines.test_cli",
+        "pyspark.sql.tests.pipelines.test_decorators",
+        "pyspark.sql.tests.pipelines.test_graph_element_registry",
         "pyspark.sql.tests.test_python_datasource",
         "pyspark.sql.tests.test_python_streaming_datasource",
         "pyspark.sql.tests.test_readwriter",
 
@@ -188,3 +188,6 @@ ignore_missing_imports = True
 ; Ignore errors for proto generated code
 [mypy-pyspark.sql.connect.proto.*, pyspark.sql.connect.proto, pyspark.sql.streaming.proto]
 ignore_errors = True
+
+[mypy-pyspark.sql.pipelines.proto.*]
+ignore_errors = True
@@ -219,6 +219,11 @@
       "Unexpected filter <name>."
     ]
   },
+  "DECORATOR_ARGUMENT_NOT_CALLABLE": {
+    "message": [
+      "The first positional argument passed to @<decorator_name> must be callable. Either add @<decorator_name> with no parameters to your function, or pass options to @<decorator_name> using keyword arguments (e.g. <example_usage>)."
+    ]
+  },
   "DIFFERENT_PANDAS_DATAFRAME": {
     "message": [
       "DataFrames are not almost equal:",
@@ -447,6 +452,11 @@
       "StructField does not have typeName. Use typeName on its type explicitly instead."
     ]
   },
+  "GRAPH_ELEMENT_DEFINED_OUTSIDE_OF_DECLARATIVE_PIPELINE": {
+    "message": [
+      "APIs that define elements of a declarative pipeline can only be invoked within the context of defining a pipeline."
+    ]
+  },
   "INVALID_TYPE_DF_EQUALITY_ARG": {
     "message": [
       "Expected type <expected_type> for `<arg_name>` but got type <actual_type>."
@@ -552,6 +562,11 @@
       "Mixed type replacements are not supported."
     ]
   },
+  "MULTIPLE_PIPELINE_SPEC_FILES_FOUND": {
+    "message": [
+      "Multiple pipeline spec files found in the directory `<dir_path>`. Please remove one or choose a particular one with the --spec argument."
+    ]
+  },
   "NEGATIVE_VALUE": {
     "message": [
       "Value for `<arg_name>` must be greater than or equal to 0, got '<arg_value>'."
@@ -844,6 +859,46 @@
       "Pipe function `<func_name>` exited with error code <error_code>."
     ]
   },
+  "PIPELINE_SPEC_FIELD_NOT_DICT": {
+    "message": [
+      "Pipeline spec field `<field_name>` should be a dict, got <field_type>."
+    ]
+  },
+  "PIPELINE_SPEC_FILE_DOES_NOT_EXIST": {
+    "message": [
+      "The pipeline spec file `<spec_path>` does not exist."
+    ]
+  },
+  "ATTEMPT_ANALYSIS_IN_PIPELINE_QUERY_FUNCTION": {
+    "message": [
+      "Operations that trigger DataFrame analysis or execution are not allowed in pipeline query functions. Move code outside of the pipeline query function."
+    ]
+  },
+  "PIPELINE_SPEC_FILE_NOT_FOUND": {
+    "message": [
+      "No pipeline.yaml or pipeline.yml file provided in arguments or found in directory `<dir_path>` or readable ancestor directories."
+    ]
+  },
+  "PIPELINE_SPEC_DICT_KEY_NOT_STRING": {
+    "message": [
+      "For pipeline spec field `<field_name>`, key should be a string, got <key_type>."
+    ]
+  },
+  "PIPELINE_SPEC_DICT_VALUE_NOT_STRING": {
+    "message": [
+      "For pipeline spec field `<field_name>`, value for key `<key_name>` should be a string, got <value_type>."
+    ]
+  },
+  "PIPELINE_SPEC_UNEXPECTED_FIELD": {
+    "message": [
+      "Pipeline spec field `<field_name>` is unexpected."
+    ]
+  },
+  "PIPELINE_UNSUPPORTED_DEFINITIONS_FILE_EXTENSION": {
+    "message": [
+      "Pipeline definitions file `<file_path>` has an unsupported extension. Supported extensions are `.py` and `.sql`."
+    ]
+  },
   "PLOT_INVALID_TYPE_COLUMN": {
     "message": [
       "Column <col_name> must be one of <valid_types> for plotting, got <col_type>."
@@ -1145,6 +1200,11 @@
       "Pie plot requires either a `y` column or `subplots=True`."
     ]
   },
+  "UNSUPPORTED_PIPELINES_DATASET_TYPE": {
+    "message": [
+      "Unsupported pipelines dataset type: <dataset_type>."
+    ]
+  },
   "UNSUPPORTED_PLOT_BACKEND": {
     "message": [
       "`<backend>` is not supported, it should be one of the values from <supported_backends>"
 
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.pipelines.api import (
+    append_flow,
+    create_streaming_table,
+    materialized_view,
+    table,
+    temporary_view,
+)
+
+__all__ = [
+    "append_flow",
+    "create_streaming_table",
+    "materialized_view",
+    "table",
+    "temporary_view",
+]