diff --git a/.asf.yaml b/.asf.yaml
index 3935a525ff3c4..296aaff5c4a2d 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features
+# https://github.com/apache/infrastructure-asfyaml/blob/main/README.md
---
github:
description: "Apache Spark - A unified analytics engine for large-scale data processing"
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 6b2e72b3f23be..3e90bb329be56 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -50,6 +50,11 @@ on:
description: 'Number of job splits'
required: true
default: '1'
+ create-commit:
+ type: boolean
+ description: 'Commit the benchmark results to the current branch'
+ required: true
+ default: false
jobs:
matrix-gen:
@@ -195,10 +200,31 @@ jobs:
# To keep the directory structure and file permissions, tar them
# See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
echo "Preparing the benchmark results:"
- tar -cvf benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude=tpcds-sf-1-text --exclude-standard`
+ tar -cvf target/benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude=tpcds-sf-1-text --exclude-standard`
+ - name: Create a pull request with the results
+ if: ${{ inputs.create-commit && success() }}
+ run: |
+ git config --local user.name "${{ github.actor }}"
+ git config --local user.email "${{ github.event.pusher.email || format('{0}@users.noreply.github.com', github.actor) }}"
+ git add -A
+ git commit -m "Benchmark results for ${{ inputs.class }} (JDK ${{ inputs.jdk }}, Scala ${{ inputs.scala }}, split ${{ matrix.split }} of ${{ inputs.num-splits }})"
+ for i in {1..5}; do
+ echo "Attempt $i to push..."
+ git fetch origin ${{ github.ref_name }}
+ git rebase origin/${{ github.ref_name }}
+ if git push origin ${{ github.ref_name }}:${{ github.ref_name }}; then
+ echo "Push successful."
+ exit 0
+ else
+ echo "Push failed, retrying in 3 seconds..."
+ sleep 3
+ fi
+ done
+ echo "Error: Failed to push after 5 attempts."
+ exit 1
- name: Upload benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}-${{ matrix.split }}
- path: benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar
+ path: target/benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index ff005103a2461..b54a382dac053 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -112,7 +112,7 @@ jobs:
ui=false
docs=false
fi
- build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,profiler,protobuf,yarn,connect,sql,hive,pipelines"`
+ build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,utils-java,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,profiler,protobuf,yarn,connect,sql,hive,pipelines"`
precondition="
{
\"build\": \"$build\",
@@ -122,6 +122,8 @@ jobs:
\"tpcds-1g\": \"$tpcds\",
\"docker-integration-tests\": \"$docker\",
\"lint\" : \"true\",
+ \"java17\" : \"$build\",
+ \"java25\" : \"$build\",
\"docs\" : \"$docs\",
\"yarn\" : \"$yarn\",
\"k8s-integration-tests\" : \"$kubernetes\",
@@ -240,7 +242,7 @@ jobs:
# Note that the modules below are from sparktestsupport/modules.py.
modules:
- >-
- core, unsafe, kvstore, avro, utils,
+ core, unsafe, kvstore, avro, utils, utils-java,
network-common, network-shuffle, repl, launcher,
examples, sketch, variant
- >-
@@ -360,7 +362,7 @@ jobs:
- name: Install Python packages (Python 3.11)
if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 'yarn')
run: |
- python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
+ python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5'
python3.11 -m pip list
# Run the tests.
- name: Run tests
@@ -512,6 +514,8 @@ jobs:
pyspark-core, pyspark-errors, pyspark-streaming, pyspark-logger
- >-
pyspark-mllib, pyspark-ml, pyspark-ml-connect, pyspark-pipelines
+ - >-
+ pyspark-structured-streaming, pyspark-structured-streaming-connect
- >-
pyspark-connect
- >-
@@ -519,13 +523,9 @@ jobs:
- >-
pyspark-pandas-slow
- >-
- pyspark-pandas-connect-part0
- - >-
- pyspark-pandas-connect-part1
- - >-
- pyspark-pandas-connect-part2
+ pyspark-pandas-connect
- >-
- pyspark-pandas-connect-part3
+ pyspark-pandas-slow-connect
exclude:
# Always run if pyspark == 'true', even infra-image is skip (such as non-master job)
# In practice, the build will run in individual PR, but not against the individual commit
@@ -533,16 +533,15 @@ jobs:
- modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-sql, pyspark-resource, pyspark-testing' }}
- modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-core, pyspark-errors, pyspark-streaming, pyspark-logger' }}
- modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-mllib, pyspark-ml, pyspark-ml-connect' }}
+ - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-structured-streaming, pyspark-structured-streaming-connect' }}
- modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-connect' }}
# Always run if pyspark-pandas == 'true', even infra-image is skip (such as non-master job)
# In practice, the build will run in individual PR, but not against the individual commit
# in Apache Spark repository.
- modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas' }}
- modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow' }}
- - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part0' }}
- - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part1' }}
- - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part2' }}
- - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part3' }}
+ - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect' }}
+ - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow-connect' }}
env:
MODULES_TO_TEST: ${{ matrix.modules }}
HADOOP_PROFILE: ${{ inputs.hadoop }}
@@ -605,8 +604,9 @@ jobs:
run: |
for py in $(echo $PYTHON_TO_TEST | tr "," "\n")
do
- echo $py
+ $py --version
$py -m pip list
+ echo ""
done
- name: Install Conda for pip packaging test
if: contains(matrix.modules, 'pyspark-errors')
@@ -766,7 +766,7 @@ jobs:
python-version: '3.11'
- name: Install dependencies for Python CodeGen check
run: |
- python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
+ python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
python3.11 -m pip list
- name: Python CodeGen check for branch-3.5
if: inputs.branch == 'branch-3.5'
@@ -919,6 +919,42 @@ jobs:
- name: R linter
run: ./dev/lint-r
+ java17:
+ needs: [precondition]
+ if: fromJson(needs.precondition.outputs.required).java17 == 'true'
+ name: Java 17 build with Maven
+ runs-on: ubuntu-latest
+ timeout-minutes: 120
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-java@v4
+ with:
+ distribution: zulu
+ java-version: 17
+ - name: Build with Maven
+ run: |
+ export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+ export MAVEN_CLI_OPTS="--no-transfer-progress"
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl clean install
+
+ java25:
+ needs: [precondition]
+ if: fromJson(needs.precondition.outputs.required).java25 == 'true'
+ name: Java 25 build with Maven
+ runs-on: ubuntu-latest
+ timeout-minutes: 120
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-java@v4
+ with:
+ distribution: zulu
+ java-version: 25
+ - name: Build with Maven
+ run: |
+ export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
+ export MAVEN_CLI_OPTS="--no-transfer-progress"
+ ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl clean install
+
# Documentation build
docs:
needs: [precondition, infra-image]
@@ -998,10 +1034,14 @@ jobs:
# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
python3.9 -m pip install ipython_genutils # See SPARK-38517
- python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly<6.0.0'
+ python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.22' pyarrow pandas 'plotly<6.0.0'
python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
- - name: List Python packages
+ - name: List Python packages for branch-3.5 and branch-4.0
+ if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
run: python3.9 -m pip list
+ - name: List Python packages
+ if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+ run: python3.11 -m pip list
- name: Install dependencies for documentation generation
run: |
# Keep the version of Bundler here in sync with the following locations:
@@ -1010,7 +1050,8 @@ jobs:
gem install bundler -v 2.4.22
cd docs
bundle install --retry=100
- - name: Run documentation build
+ - name: Run documentation build for branch-3.5 and branch-4.0
+ if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0'
run: |
# We need this link to make sure `python3` points to `python3.9` which contains the prerequisite packages.
ln -s "$(which python3.9)" "/usr/local/bin/python3"
@@ -1031,6 +1072,30 @@ jobs:
echo "SKIP_SQLDOC: $SKIP_SQLDOC"
cd docs
bundle exec jekyll build
+ - name: Run documentation build
+ if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0'
+ run: |
+ # We need this link to make sure `python3` points to `python3.11` which contains the prerequisite packages.
+ ln -s "$(which python3.11)" "/usr/local/bin/python3"
+ # Build docs first with SKIP_API to ensure they are buildable without requiring any
+ # language docs to be built beforehand.
+ cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
+ if [ -f "./dev/is-changed.py" ]; then
+ # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
+ pyspark_modules=`cd dev && python3.11 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
+ if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi
+ if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi
+ fi
+ export PYSPARK_DRIVER_PYTHON=python3.11
+ export PYSPARK_PYTHON=python3.11
+ # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
+ echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
+ echo "SKIP_SCALADOC: $SKIP_SCALADOC"
+ echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
+ echo "SKIP_RDOC: $SKIP_RDOC"
+ echo "SKIP_SQLDOC: $SKIP_SQLDOC"
+ cd docs
+ bundle exec jekyll build
- name: Tar documentation
if: github.repository != 'apache/spark'
run: tar cjf site.tar.bz2 docs/_site
@@ -1259,9 +1324,9 @@ jobs:
sudo apt update
sudo apt-get install r-base
- name: Start Minikube
- uses: medyagh/setup-minikube@v0.0.19
+ uses: medyagh/setup-minikube@v0.0.20
with:
- kubernetes-version: "1.33.0"
+ kubernetes-version: "1.34.0"
# Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
cpus: 2
memory: 6144m
@@ -1279,8 +1344,10 @@ jobs:
kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
if [[ "${{ inputs.branch }}" == 'branch-3.5' ]]; then
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true
- else
+ elif [[ "${{ inputs.branch }}" == 'branch-4.0' ]]; then
kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.11.0/installer/volcano-development.yaml || true
+ else
+ kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.12.2/installer/volcano-development.yaml || true
fi
eval $(minikube docker-env)
build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
diff --git a/.github/workflows/build_infra_images_cache.yml b/.github/workflows/build_infra_images_cache.yml
index ccd47826ff099..430903b570ea0 100644
--- a/.github/workflows/build_infra_images_cache.yml
+++ b/.github/workflows/build_infra_images_cache.yml
@@ -33,13 +33,13 @@ on:
- 'dev/spark-test-image/python-minimum/Dockerfile'
- 'dev/spark-test-image/python-ps-minimum/Dockerfile'
- 'dev/spark-test-image/pypy-310/Dockerfile'
- - 'dev/spark-test-image/python-309/Dockerfile'
- 'dev/spark-test-image/python-310/Dockerfile'
- 'dev/spark-test-image/python-311/Dockerfile'
- 'dev/spark-test-image/python-311-classic-only/Dockerfile'
- 'dev/spark-test-image/python-312/Dockerfile'
- 'dev/spark-test-image/python-313/Dockerfile'
- 'dev/spark-test-image/python-313-nogil/Dockerfile'
+ - 'dev/spark-test-image/python-314/Dockerfile'
- 'dev/spark-test-image/numpy-213/Dockerfile'
- '.github/workflows/build_infra_images_cache.yml'
# Create infra image when cutting down branches/tags
@@ -153,19 +153,6 @@ jobs:
- name: Image digest (PySpark with PyPy 3.10)
if: hashFiles('dev/spark-test-image/pypy-310/Dockerfile') != ''
run: echo ${{ steps.docker_build_pyspark_pypy_310.outputs.digest }}
- - name: Build and push (PySpark with Python 3.9)
- if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != ''
- id: docker_build_pyspark_python_309
- uses: docker/build-push-action@v6
- with:
- context: ./dev/spark-test-image/python-309/
- push: true
- tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }}-static
- cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }}
- cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }},mode=max
- - name: Image digest (PySpark with Python 3.9)
- if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != ''
- run: echo ${{ steps.docker_build_pyspark_python_309.outputs.digest }}
- name: Build and push (PySpark with Python 3.10)
if: hashFiles('dev/spark-test-image/python-310/Dockerfile') != ''
id: docker_build_pyspark_python_310
@@ -244,6 +231,19 @@ jobs:
- name: Image digest (PySpark with Python 3.13 no GIL)
if: hashFiles('dev/spark-test-image/python-313-nogil/Dockerfile') != ''
run: echo ${{ steps.docker_build_pyspark_python_313_nogil.outputs.digest }}
+ - name: Build and push (PySpark with Python 3.14)
+ if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != ''
+ id: docker_build_pyspark_python_314
+ uses: docker/build-push-action@v6
+ with:
+ context: ./dev/spark-test-image/python-314/
+ push: true
+ tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }}-static
+ cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }}
+ cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }},mode=max
+ - name: Image digest (PySpark with Python 3.14)
+ if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != ''
+ run: echo ${{ steps.docker_build_pyspark_python_314.outputs.digest }}
- name: Build and push (PySpark with Numpy 2.1.3)
if: hashFiles('dev/spark-test-image/numpy-213/Dockerfile') != ''
id: docker_build_pyspark_numpy_213
diff --git a/.github/workflows/build_maven_java21_arm.yml b/.github/workflows/build_maven_java21_arm.yml
index 505bdd63189c0..16417bb1c5f22 100644
--- a/.github/workflows/build_maven_java21_arm.yml
+++ b/.github/workflows/build_maven_java21_arm.yml
@@ -21,7 +21,7 @@ name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, ARM)"
on:
schedule:
- - cron: '0 15 * * *'
+ - cron: '0 15 */2 * *'
workflow_dispatch:
jobs:
diff --git a/.github/workflows/build_maven_java21_macos15.yml b/.github/workflows/build_maven_java21_macos26.yml
similarity index 98%
rename from .github/workflows/build_maven_java21_macos15.yml
rename to .github/workflows/build_maven_java21_macos26.yml
index 14db1b1871bc4..c858a7f70b270 100644
--- a/.github/workflows/build_maven_java21_macos15.yml
+++ b/.github/workflows/build_maven_java21_macos26.yml
@@ -17,7 +17,7 @@
# under the License.
#
-name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, MacOS-15)"
+name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, MacOS-26)"
on:
schedule:
@@ -33,7 +33,7 @@ jobs:
if: github.repository == 'apache/spark'
with:
java: 21
- os: macos-15
+ os: macos-26
arch: arm64
envs: >-
{
diff --git a/.github/workflows/build_non_ansi.yml b/.github/workflows/build_non_ansi.yml
index 547a227e61d7e..debdaf4f8709d 100644
--- a/.github/workflows/build_non_ansi.yml
+++ b/.github/workflows/build_non_ansi.yml
@@ -40,6 +40,7 @@ jobs:
"PYSPARK_IMAGE_TO_TEST": "python-311",
"PYTHON_TO_TEST": "python3.11",
"SPARK_ANSI_SQL_MODE": "false",
+ "SPARK_TEST_SPARK_BLOOM_FILTER_SUITE_ENABLED": "true"
}
jobs: >-
{
diff --git a/.github/workflows/build_python_3.9.yml b/.github/workflows/build_python_3.14.yml
similarity index 89%
rename from .github/workflows/build_python_3.9.yml
rename to .github/workflows/build_python_3.14.yml
index 0df17699140ed..45ea43f1d491c 100644
--- a/.github/workflows/build_python_3.9.yml
+++ b/.github/workflows/build_python_3.14.yml
@@ -17,7 +17,7 @@
# under the License.
#
-name: "Build / Python-only (master, Python 3.9)"
+name: "Build / Python-only (master, Python 3.14)"
on:
schedule:
@@ -37,8 +37,8 @@ jobs:
hadoop: hadoop3
envs: >-
{
- "PYSPARK_IMAGE_TO_TEST": "python-309",
- "PYTHON_TO_TEST": "python3.9"
+ "PYSPARK_IMAGE_TO_TEST": "python-314",
+ "PYTHON_TO_TEST": "python3.14"
}
jobs: >-
{
diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml
index 8d82ff192ab07..5edb54de82b6d 100644
--- a/.github/workflows/build_python_connect.yml
+++ b/.github/workflows/build_python_connect.yml
@@ -72,7 +72,7 @@ jobs:
python packaging/client/setup.py sdist
cd dist
pip install pyspark*client-*.tar.gz
- pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting
+ pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting
- name: List Python packages
run: python -m pip list
- name: Run tests (local)
@@ -96,7 +96,7 @@ jobs:
# Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener.
./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect
# None of tests are dependent on each other in Pandas API on Spark so run them in parallel
- ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
+ ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
# Stop Spark Connect server.
./sbin/stop-connect-server.sh
diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml
index e68f288f0184f..0512e33d6cbea 100644
--- a/.github/workflows/build_python_connect35.yml
+++ b/.github/workflows/build_python_connect35.yml
@@ -68,10 +68,10 @@ jobs:
./build/sbt -Phive Test/package
- name: Install Python dependencies
run: |
- pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
+ pip install 'numpy==1.25.1' 'pyarrow>=18.0.0' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
# Add Python deps for Spark Connect.
- pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'
+ pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'
# Add torch as a testing dependency for TorchDistributor
pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval
diff --git a/.github/workflows/build_python_minimum.yml b/.github/workflows/build_python_minimum.yml
index 4e65503006489..3514a82f6217c 100644
--- a/.github/workflows/build_python_minimum.yml
+++ b/.github/workflows/build_python_minimum.yml
@@ -38,7 +38,7 @@ jobs:
envs: >-
{
"PYSPARK_IMAGE_TO_TEST": "python-minimum",
- "PYTHON_TO_TEST": "python3.9"
+ "PYTHON_TO_TEST": "python3.10"
}
jobs: >-
{
diff --git a/.github/workflows/build_python_ps_minimum.yml b/.github/workflows/build_python_ps_minimum.yml
index 3aa83ff06a996..ed80a904ebd7f 100644
--- a/.github/workflows/build_python_ps_minimum.yml
+++ b/.github/workflows/build_python_ps_minimum.yml
@@ -38,7 +38,7 @@ jobs:
envs: >-
{
"PYSPARK_IMAGE_TO_TEST": "python-ps-minimum",
- "PYTHON_TO_TEST": "python3.9"
+ "PYTHON_TO_TEST": "python3.10"
}
jobs: >-
{
diff --git a/.github/workflows/build_sparkr_window.yml b/.github/workflows/build_sparkr_window.yml
index e3ef9d7ba0752..8bbcdf79bd58b 100644
--- a/.github/workflows/build_sparkr_window.yml
+++ b/.github/workflows/build_sparkr_window.yml
@@ -16,7 +16,7 @@
# specific language governing permissions and limitations
# under the License.
#
-name: "Build / SparkR-only (master, 4.4.3, windows-2022)"
+name: "Build / SparkR-only (master, 4.4.3, windows-2025)"
on:
schedule:
@@ -26,7 +26,7 @@ on:
jobs:
build:
name: "Build module: sparkr"
- runs-on: windows-2022
+ runs-on: windows-2025
timeout-minutes: 120
if: github.repository == 'apache/spark'
steps:
diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index e0a5e411571a0..95c9aac33fc6c 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -67,7 +67,7 @@ jobs:
- hive2.3
modules:
- >-
- core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#variant
+ core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#utils-java,common#variant
- >-
graphx,streaming,hadoop-cloud
- >-
@@ -78,19 +78,13 @@ jobs:
connector#kafka-0-10,connector#kafka-0-10-sql,connector#kafka-0-10-token-provider,connector#spark-ganglia-lgpl,connector#protobuf,connector#avro,connector#kinesis-asl
- >-
sql#api,sql#catalyst,resource-managers#yarn,resource-managers#kubernetes#core
+ - >-
+ connect
# Here, we split Hive and SQL tests into some of slow ones and the rest of them.
included-tags: [ "" ]
excluded-tags: [ "" ]
comment: [ "" ]
include:
- # Connect tests
- - modules: connect
- java: ${{ inputs.java }}
- hadoop: ${{ inputs.hadoop }}
- hive: hive2.3
- # TODO(SPARK-47110): Reenble AmmoniteTest tests in Maven builds
- excluded-tags: org.apache.spark.tags.AmmoniteTest
- comment: ""
# Hive tests
- modules: sql#hive
java: ${{ inputs.java }}
@@ -181,15 +175,24 @@ jobs:
- name: Install Python packages (Python 3.11)
if: contains(matrix.modules, 'resource-managers#yarn') || (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')
run: |
- python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1'
+ python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5'
python3.11 -m pip list
- # Run the tests.
+ # Run the tests using script command.
+ # BSD's script command doesn't support -c option, and the usage is different from Linux's one.
+ # The kind of script command is tested by `script -qec true`.
- name: Run tests
env: ${{ fromJSON(inputs.envs) }}
+ shell: |
+ bash -c "if script -qec true 2>/dev/null; then script -qec bash\ {0}; else script -qe /dev/null bash {0}; fi"
run: |
+ # Fix for TTY related issues when launching the Ammonite REPL in tests.
+ export TERM=vt100
+ # `set -e` to make the exit status as expected due to use script command to run the commands
+ set -e
export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
export MAVEN_CLI_OPTS="--no-transfer-progress"
export JAVA_VERSION=${{ matrix.java }}
+ export INPUT_BRANCH=${{ inputs.branch }}
export ENABLE_KINESIS_TESTS=0
# Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10
export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"`
@@ -202,13 +205,24 @@ jobs:
if [[ "$INCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
+ elif [[ "$MODULES_TO_TEST" == "connect" && "$INPUT_BRANCH" == "branch-4.0" ]]; then
+ # SPARK-53914: Remove sql/connect/client/jdbc from `-pl` for branch-4.0, this branch can be deleted after the EOL of branch-4.0.
+ ./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jvm,sql/connect/common,sql/connect/server test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
- ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jvm,sql/connect/common,sql/connect/server test -fae
+ ./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jdbc,sql/connect/client/jvm,sql/connect/common,sql/connect/server test -fae
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
# To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
+ elif [[ "$MODULES_TO_TEST" == *"sql#pipelines"* && "$INPUT_BRANCH" == "branch-4.0" ]]; then
+ # SPARK-52441: Remove sql/pipelines from TEST_MODULES for branch-4.0, this branch can be deleted after the EOL of branch-4.0.
+ TEST_MODULES=${TEST_MODULES/,sql\/pipelines/}
+ ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
+ elif [[ "$MODULES_TO_TEST" == *"common#utils-java"* && "$INPUT_BRANCH" == "branch-4.0" ]]; then
+ # SPARK-53138: Remove common/utils-java from TEST_MODULES for branch-4.0, this branch can be deleted after the EOL of branch-4.0.
+ TEST_MODULES=${TEST_MODULES/,common\/utils-java/}
+ ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
else
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
fi
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 4bcc275064d3c..86ef00220b373 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -52,18 +52,18 @@ jobs:
with:
distribution: zulu
java-version: 17
- - name: Install Python 3.9
+ - name: Install Python 3.11
uses: actions/setup-python@v5
with:
- python-version: '3.9'
+ python-version: '3.11'
architecture: x64
cache: 'pip'
- name: Install Python dependencies
run: |
pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
- ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.2.3' 'plotly>=4.8' 'docutils<0.18.0' \
+ ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.2' 'plotly>=4.8' 'docutils<0.18.0' \
'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \
- 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+ 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
- name: Install Ruby for documentation generation
uses: ruby/setup-ruby@v1
diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml
index 9a87c4f7061b0..9a6afc095063c 100644
--- a/.github/workflows/python_hosted_runner_test.yml
+++ b/.github/workflows/python_hosted_runner_test.yml
@@ -74,6 +74,8 @@ jobs:
pyspark-core, pyspark-errors, pyspark-streaming
- >-
pyspark-mllib, pyspark-ml, pyspark-ml-connect
+ - >-
+ pyspark-structured-streaming, pyspark-structured-streaming-connect
- >-
pyspark-connect
- >-
@@ -81,13 +83,9 @@ jobs:
- >-
pyspark-pandas-slow
- >-
- pyspark-pandas-connect-part0
- - >-
- pyspark-pandas-connect-part1
- - >-
- pyspark-pandas-connect-part2
+ pyspark-pandas-connect
- >-
- pyspark-pandas-connect-part3
+ pyspark-pandas-slow-connect
env:
MODULES_TO_TEST: ${{ matrix.modules }}
PYTHON_TO_TEST: python${{inputs.python}}
@@ -149,8 +147,8 @@ jobs:
run: |
python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2'
python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0'
- python${{matrix.python}} -m pip install numpy 'pyarrow>=19.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
- python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \
+ python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
+ python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \
python${{matrix.python}} -m pip cache purge
- name: List Python packages
run: python${{matrix.python}} -m pip list
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 976aaf616295c..5de61c831cbef 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -74,7 +74,8 @@ on:
inputs:
branch:
description: 'Branch to release. Leave it empty to launch a dryrun. Dispatch this workflow only in the forked repository.'
- required: false
+ required: true
+ default: master
release-version:
description: 'Release version. Leave it empty to launch a dryrun.'
required: false
@@ -90,16 +91,34 @@ jobs:
release:
name: Release Apache Spark
runs-on: ubuntu-latest
- # Do not allow dispatching this workflow manually in the main repo.
- # and skip this workflow in forked repository when running as a
- # scheduled job (dryrun).
- if: ${{ (github.repository == 'apache/spark') != (inputs.branch != '' && inputs.release-version != '') }}
+ # Allow workflow to run only in the following cases:
+ # 1. In the apache/spark repository:
+ # - Only allow dry runs (i.e., both 'branch' and 'release-version' inputs are empty).
+ # 2. In forked repositories:
+ # - Allow real runs when both 'branch' and 'release-version' are provided.
+ # - Allow dry runs only if manually dispatched (not on a schedule).
+ if: |
+ (
+ github.repository == 'apache/spark' &&
+ inputs.branch == '' &&
+ inputs.release-version == ''
+ ) || (
+ github.repository != 'apache/spark' &&
+ (
+ (inputs.branch != '' && inputs.release-version != '') || github.event_name == 'workflow_dispatch'
+ )
+ )
steps:
- name: Checkout Spark repository
uses: actions/checkout@v4
with:
repository: apache/spark
ref: "${{ inputs.branch }}"
+ - name: Free up disk space
+ run: |
+ if [ -f ./dev/free_disk_space ]; then
+ ./dev/free_disk_space
+ fi
- name: Release Apache Spark
env:
GIT_BRANCH: "${{ inputs.branch }}"
@@ -132,22 +151,12 @@ jobs:
sleep 60
fi
- empty_count=0
- non_empty_count=0
- for val in "$GIT_BRANCH" "$RELEASE_VERSION" "$SPARK_RC_COUNT"; do
- if [ -z "$val" ]; then
- empty_count=$((empty_count+1))
- else
- non_empty_count=$((non_empty_count+1))
- fi
- done
-
- if [ "$empty_count" -gt 0 ] && [ "$non_empty_count" -gt 0 ]; then
- echo "Error: Either provide all inputs or leave them all empty for a dryrun."
+ if { [ -n "$RELEASE_VERSION" ] && [ -z "$SPARK_RC_COUNT" ]; } || { [ -z "$RELEASE_VERSION" ] && [ -n "$SPARK_RC_COUNT" ]; }; then
+ echo "Error: Either provide both 'Release version' and 'RC number', or leave both empty for a dryrun."
exit 1
fi
- if [ "$empty_count" -eq 3 ]; then
+ if [ -z "$RELEASE_VERSION" ] && [ -z "$SPARK_RC_COUNT" ]; then
echo "Dry run mode enabled"
export DRYRUN_MODE=1
ASF_PASSWORD="not_used"
@@ -155,7 +164,6 @@ jobs:
GPG_PASSPHRASE="not_used"
ASF_USERNAME="gurwls223"
export SKIP_TAG=1
- unset GIT_BRANCH
unset RELEASE_VERSION
else
echo "Full release mode enabled"
@@ -163,7 +171,7 @@ jobs:
fi
export ASF_PASSWORD GPG_PRIVATE_KEY GPG_PASSPHRASE ASF_USERNAME
- [ -n "$GIT_BRANCH" ] && export GIT_BRANCH
+ export GIT_BRANCH="${GIT_BRANCH:-master}"
[ -n "$RELEASE_VERSION" ] && export RELEASE_VERSION
if [ "$DRYRUN_MODE" = "1" ]; then
@@ -237,9 +245,18 @@ jobs:
cp "$file" "$file.bak"
for pattern in "${PATTERNS[@]}"; do
[ -n "$pattern" ] || continue # Skip empty patterns
- escaped_pattern=$(printf '%s\n' "$pattern" | sed 's/[\/&]/\\&/g')
- sed -i "s/${escaped_pattern}/***/g" "$file"
+
+ # Safely escape special characters for sed
+ escaped_pattern=${pattern//\\/\\\\} # Escape backslashes
+ escaped_pattern=${escaped_pattern//\//\\/} # Escape forward slashes
+ escaped_pattern=${escaped_pattern//&/\\&} # Escape &
+ escaped_pattern=${escaped_pattern//$'\n'/} # Remove newlines
+ escaped_pattern=${escaped_pattern//$'\r'/} # Remove carriage returns (optional)
+
+ # Redact the pattern
+ sed -i.bak "s/${escaped_pattern}/***/g" "$file"
done
+ rm -f "$file.bak"
done
# Zip logs/output
diff --git a/.gitignore b/.gitignore
index b6a1e63c41920..bbf02496498c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -125,3 +125,7 @@ sql/api/gen/
sql/api/src/main/gen/
sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.tokens
sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/gen/
+
+tpcds-sf-1/
+tpcds-sf-1-text/
+tpcds-kit/
diff --git a/connect-examples/server-library-example/client/src/main/resources/log4j2.xml b/.mvn/extensions.xml
similarity index 72%
rename from connect-examples/server-library-example/client/src/main/resources/log4j2.xml
rename to .mvn/extensions.xml
index 21b0d9719193e..da2aee8827a54 100644
--- a/connect-examples/server-library-example/client/src/main/resources/log4j2.xml
+++ b/.mvn/extensions.xml
@@ -15,16 +15,10 @@
~ See the License for the specific language governing permissions and
~ limitations under the License.
-->
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+ eu.maveniverse.maven.nisse
+ extension
+ 0.4.6
+
+
diff --git a/.mvn/jvm.config b/.mvn/jvm.config
index 81b88d8173419..b41dca0e56a6b 100644
--- a/.mvn/jvm.config
+++ b/.mvn/jvm.config
@@ -1 +1,4 @@
+-XX:+IgnoreUnrecognizedVMOptions
+-XX:+UnlockDiagnosticVMOptions
+-XX:GCLockerRetryAllocationCount=100
--enable-native-access=ALL-UNNAMED
diff --git a/.mvn/maven.config b/.mvn/maven.config
new file mode 100644
index 0000000000000..e61f1a94abdef
--- /dev/null
+++ b/.mvn/maven.config
@@ -0,0 +1 @@
+-Dnisse.compat.osDetector
diff --git a/LICENSE b/LICENSE
index 9b1e96a44a58c..ef8192ec38d05 100644
--- a/LICENSE
+++ b/LICENSE
@@ -217,6 +217,7 @@ core/src/main/resources/org/apache/spark/ui/static/vis*
connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.min.js
core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.css
+mllib-local/src/main/scala/scala/collection/compat/package.scala
Python Software Foundation License
----------------------------------
diff --git a/LICENSE-binary b/LICENSE-binary
index 0c3c7aecb71ac..6ce7249e02e3b 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -365,8 +365,6 @@ org.apache.xbean:xbean-asm9-shaded
org.apache.yetus:audience-annotations
org.apache.zookeeper:zookeeper
org.apache.zookeeper:zookeeper-jute
-org.codehaus.jackson:jackson-core-asl
-org.codehaus.jackson:jackson-mapper-asl
org.datanucleus:datanucleus-api-jdo
org.datanucleus:datanucleus-core
org.datanucleus:datanucleus-rdbms
@@ -388,7 +386,6 @@ org.glassfish.jersey.core:jersey-common
org.glassfish.jersey.core:jersey-server
org.glassfish.jersey.inject:jersey-hk2
org.javassist:javassist
-org.jetbrains:annotations
org.json4s:json4s-ast_2.13
org.json4s:json4s-core_2.13
org.json4s:json4s-jackson-core_2.13
@@ -401,7 +398,6 @@ org.rocksdb:rocksdbjni
org.scala-lang:scala-compiler
org.scala-lang:scala-library
org.scala-lang:scala-reflect
-org.scala-lang.modules:scala-collection-compat_2.13
org.scala-lang.modules:scala-parallel-collections_2.13
org.scala-lang.modules:scala-parser-combinators_2.13
org.scala-lang.modules:scala-xml_2.13
@@ -442,7 +438,6 @@ com.github.luben:zstd-jni
com.github.wendykierp:JTransforms
javolution:javolution
jline:jline
-org.jodd:jodd-core
pl.edu.icm:JLargeArrays
python/pyspark/errors/exceptions/tblib.py
@@ -465,6 +460,7 @@ org.codehaus.janino:janino
org.fusesource.leveldbjni:leveldbjni-all
org.jline:jline
org.jpmml:pmml-model
+org.locationtech.jts:jts-core
org.threeten:threeten-extra
python/lib/py4j-*-src.zip
@@ -483,7 +479,6 @@ dev.ludovic.netlib:blas
dev.ludovic.netlib:arpack
dev.ludovic.netlib:lapack
net.razorvine:pickle
-org.bouncycastle:bcprov-jdk18on
org.checkerframework:checker-qual
org.typelevel:algebra_2.13:jar
org.typelevel:cats-kernel_2.13
diff --git a/README.md b/README.md
index 0f0bf039550d7..65dfd67ac520e 100644
--- a/README.md
+++ b/README.md
@@ -32,11 +32,10 @@ This README file only contains basic setup instructions.
| | [](https://github.com/apache/spark/actions/workflows/build_rockdb_as_ui_backend.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_maven.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_maven_java21.yml) |
-| | [](https://github.com/apache/spark/actions/workflows/build_maven_java21_macos15.yml) |
+| | [](https://github.com/apache/spark/actions/workflows/build_maven_java21_macos15.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_maven_java21_arm.yml) |
-| | [](https://github.com/apache/spark/actions/workflows/build_coverage.yml) |
+| | [](https://github.com/apache/spark/actions/workflows/build_coverage.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_pypy3.10.yml) |
-| | [](https://github.com/apache/spark/actions/workflows/build_python_3.9.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_3.10.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_3.11_classic_only.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_3.11_arm.yml) |
@@ -45,6 +44,7 @@ This README file only contains basic setup instructions.
| | [](https://github.com/apache/spark/actions/workflows/build_python_3.12.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_3.13.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_3.13_nogil.yml) |
+| | [](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml) |
| | [](https://github.com/apache/spark/actions/workflows/build_python_connect35.yml) |
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a85ac5d9bc837..0e6012062313e 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -125,6 +125,18 @@
provided
+
+ org.apache.spark
+ spark-connect-client-jdbc_${scala.binary.version}
+ ${project.version}
+
+
+ org.apache.spark
+ spark-connect-shims_${scala.binary.version}
+
+
+ provided
+
-
- org.bouncycastle
- bcprov-jdk18on
- ${hadoop.deps.scope}
-
@@ -221,6 +223,20 @@
+
+ copy-connect-client-jdbc-jar
+ package
+
+ exec
+
+
+ cp
+
+ ${basedir}/../sql/connect/client/jdbc/target/spark-connect-client-jdbc_${scala.binary.version}-${project.version}.jar
+ ${basedir}/target/scala-${scala.binary.version}/jars/connect-repl
+
+
+
@@ -306,13 +322,6 @@
providedprovided
- provided
-
-
-
- hive-jackson-provided
-
- provided
diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh
index 4ab35ad28751e..8db58ad387e81 100644
--- a/bin/load-spark-env.sh
+++ b/bin/load-spark-env.sh
@@ -65,6 +65,6 @@ export SPARK_SCALA_VERSION=2.13
#fi
# Append jline option to enable the Beeline process to run in background.
-if [ -e /usr/bin/tty -a "`tty`" != "not a tty" -a ! -p /dev/stdin ]; then
+if [[ ( ! $(ps -o stat= -p $$ 2>/dev/null) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then
export SPARK_BEELINE_OPTS="$SPARK_BEELINE_OPTS -Djline.terminal=jline.UnsupportedTerminal"
fi
diff --git a/bin/spark-pipelines b/bin/spark-pipelines
index 52baeeafab08a..16ec90e3a1aec 100755
--- a/bin/spark-pipelines
+++ b/bin/spark-pipelines
@@ -30,4 +30,11 @@ fi
export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.9-src.zip:$PYTHONPATH"
-$PYSPARK_PYTHON "${SPARK_HOME}"/python/pyspark/pipelines/cli.py "$@"
+SDP_CLI_PY_FILE_PATH=$("${PYSPARK_PYTHON}" - <<'EOF'
+import pyspark, os
+from pathlib import Path
+print(Path(os.path.dirname(pyspark.__file__)) / "pipelines" / "cli.py")
+EOF
+)
+
+exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkPipelines "$SDP_CLI_PY_FILE_PATH" "$@"
diff --git a/build/sbt b/build/sbt
index db9d3b345ff6f..fe446fd813fcf 100755
--- a/build/sbt
+++ b/build/sbt
@@ -36,7 +36,7 @@ fi
declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r sbt_opts_file=".sbtopts"
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"
-declare -r default_sbt_opts="-Xss64m"
+declare -r default_sbt_opts="-Xss64m -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:GCLockerRetryAllocationCount=100 --enable-native-access=ALL-UNNAMED"
usage() {
cat <slf4j-api
-
- commons-io
- commons-io
- test
-
-
- org.apache.commons
- commons-lang3
- test
-
-
org.apache.logging.log4jlog4j-api
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java
index 5265881e990e9..a9d6784805f6d 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java
@@ -19,7 +19,7 @@
import java.util.Arrays;
-import com.google.common.base.Preconditions;
+import org.apache.spark.network.util.JavaUtils;
/**
* A factory for array wrappers so that arrays can be used as keys in a map, sorted or not.
@@ -38,7 +38,7 @@ class ArrayWrappers {
@SuppressWarnings("unchecked")
public static Comparable
-
- org.apache.commons
- commons-lang3
- ${leveldbjni.group}leveldbjni-all
@@ -174,7 +170,7 @@
org.apache.spark
- spark-common-utils_${scala.binary.version}
+ spark-common-utils-java_${scala.binary.version}${project.version}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
index dd7c2061ec95b..eed43a8d28d90 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java
@@ -26,11 +26,8 @@
import java.nio.channels.FileChannel;
import java.nio.file.StandardOpenOption;
-import com.google.common.io.ByteStreams;
import io.netty.channel.DefaultFileRegion;
import io.netty.handler.stream.ChunkedStream;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.LimitedInputStream;
@@ -100,7 +97,7 @@ public InputStream createInputStream() throws IOException {
boolean shouldClose = true;
try {
is = new FileInputStream(file);
- ByteStreams.skipFully(is, offset);
+ is.skipNBytes(offset);
InputStream r = new LimitedInputStream(is, length);
shouldClose = false;
return r;
@@ -152,10 +149,7 @@ public Object convertToNettyForSsl() throws IOException {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("file", file)
- .append("offset", offset)
- .append("length", length)
- .toString();
+ return "FileSegmentManagedBuffer[file=" + file + ",offset=" + offset +
+ ",length=" + length + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
index a40cfc8bc04b1..e7b8bafa92f4b 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java
@@ -23,8 +23,6 @@
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufInputStream;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* A {@link ManagedBuffer} backed by a Netty {@link ByteBuf}.
@@ -75,8 +73,6 @@ public Object convertToNettyForSsl() throws IOException {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("buf", buf)
- .toString();
+ return "NettyManagedBuffer[buf=" + buf + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
index 6eb8d4e2c731c..d97f853c58f1f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java
@@ -23,8 +23,6 @@
import io.netty.buffer.ByteBufInputStream;
import io.netty.buffer.Unpooled;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* A {@link ManagedBuffer} backed by {@link ByteBuffer}.
@@ -73,9 +71,7 @@ public Object convertToNettyForSsl() throws IOException {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("buf", buf)
- .toString();
+ return "NioManagedBuffer[buf=" + buf + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index a9df47645d36f..f02f2c63ecd4c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -21,20 +21,17 @@
import java.io.IOException;
import java.net.SocketAddress;
import java.nio.ByteBuffer;
+import java.util.Objects;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
import com.google.common.util.concurrent.SettableFuture;
import io.netty.channel.Channel;
import io.netty.util.concurrent.Future;
import io.netty.util.concurrent.GenericFutureListener;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
@@ -43,6 +40,7 @@
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NioManagedBuffer;
import org.apache.spark.network.protocol.*;
+import org.apache.spark.network.util.JavaUtils;
import static org.apache.spark.network.util.NettyUtils.getRemoteAddress;
@@ -81,8 +79,8 @@ public class TransportClient implements Closeable {
private volatile boolean timedOut;
public TransportClient(Channel channel, TransportResponseHandler handler) {
- this.channel = Preconditions.checkNotNull(channel);
- this.handler = Preconditions.checkNotNull(handler);
+ this.channel = Objects.requireNonNull(channel);
+ this.handler = Objects.requireNonNull(handler);
this.timedOut = false;
}
@@ -113,7 +111,7 @@ public String getClientId() {
* Trying to set a different client ID after it's been set will result in an exception.
*/
public void setClientId(String id) {
- Preconditions.checkState(clientId == null, "Client ID has already been set.");
+ JavaUtils.checkState(clientId == null, "Client ID has already been set.");
this.clientId = id;
}
@@ -290,10 +288,9 @@ public void onFailure(Throwable e) {
try {
return result.get(timeoutMs, TimeUnit.MILLISECONDS);
} catch (ExecutionException e) {
- Throwables.throwIfUnchecked(e.getCause());
throw new RuntimeException(e.getCause());
} catch (Exception e) {
- Throwables.throwIfUnchecked(e);
+ if (e instanceof RuntimeException re) throw re;
throw new RuntimeException(e);
}
}
@@ -338,11 +335,8 @@ public void close() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("remoteAddress", channel.remoteAddress())
- .append("clientId", clientId)
- .append("isActive", isActive())
- .toString();
+ return "TransportClient[remoteAddress=" + channel.remoteAddress() + "clientId=" + clientId +
+ ",isActive=" + isActive() + "]";
}
private static long requestId() {
@@ -369,8 +363,8 @@ public void operationComplete(Future super Void> future) throws Exception {
}
} else {
logger.error("Failed to send RPC {} to {}", future.cause(),
- MDC.of(LogKeys.REQUEST_ID$.MODULE$, requestId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
+ MDC.of(LogKeys.REQUEST_ID, requestId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
channel.close();
try {
String errorMsg = String.format("Failed to send RPC %s to %s: %s", requestId,
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index d64b8c8f838e9..2137b5f3136ef 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -21,16 +21,15 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.SocketAddress;
+import java.util.ArrayList;
import java.util.List;
+import java.util.Objects;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicReference;
import com.codahale.metrics.MetricSet;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-import com.google.common.collect.Lists;
import io.netty.bootstrap.Bootstrap;
import io.netty.buffer.PooledByteBufAllocator;
import io.netty.channel.Channel;
@@ -100,9 +99,9 @@ private static class ClientPool {
public TransportClientFactory(
TransportContext context,
List clientBootstraps) {
- this.context = Preconditions.checkNotNull(context);
+ this.context = Objects.requireNonNull(context);
this.conf = context.getConf();
- this.clientBootstraps = Lists.newArrayList(Preconditions.checkNotNull(clientBootstraps));
+ this.clientBootstraps = new ArrayList<>(Objects.requireNonNull(clientBootstraps));
this.connectionPool = new ConcurrentHashMap<>();
this.numConnectionsPerPeer = conf.numConnectionsPerPeer();
this.rand = new Random();
@@ -193,9 +192,9 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f
final String resolvMsg = resolvedAddress.isUnresolved() ? "failed" : "succeed";
if (hostResolveTimeMs > 2000) {
logger.warn("DNS resolution {} for {} took {} ms",
- MDC.of(LogKeys.STATUS$.MODULE$, resolvMsg),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, resolvedAddress),
- MDC.of(LogKeys.TIME$.MODULE$, hostResolveTimeMs));
+ MDC.of(LogKeys.STATUS, resolvMsg),
+ MDC.of(LogKeys.HOST_PORT, resolvedAddress),
+ MDC.of(LogKeys.TIME, hostResolveTimeMs));
} else {
logger.trace("DNS resolution {} for {} took {} ms",
resolvMsg, resolvedAddress, hostResolveTimeMs);
@@ -210,7 +209,7 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f
return cachedClient;
} else {
logger.info("Found inactive connection to {}, creating a new one.",
- MDC.of(LogKeys.HOST_PORT$.MODULE$, resolvedAddress));
+ MDC.of(LogKeys.HOST_PORT, resolvedAddress));
}
}
// If this connection should fast fail when last connection failed in last fast fail time
@@ -314,7 +313,7 @@ public void operationComplete(final Future handshakeFuture) {
logger.debug("{} successfully completed TLS handshake to ", address);
} else {
logger.info("failed to complete TLS handshake to {}", handshakeFuture.cause(),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, address));
+ MDC.of(LogKeys.HOST_PORT, address));
cf.channel().close();
}
}
@@ -340,17 +339,17 @@ public void operationComplete(final Future handshakeFuture) {
} catch (Exception e) { // catch non-RuntimeExceptions too as bootstrap may be written in Scala
long bootstrapTimeMs = (System.nanoTime() - preBootstrap) / 1000000;
logger.error("Exception while bootstrapping client after {} ms", e,
- MDC.of(LogKeys.BOOTSTRAP_TIME$.MODULE$, bootstrapTimeMs));
+ MDC.of(LogKeys.BOOTSTRAP_TIME, bootstrapTimeMs));
client.close();
- Throwables.throwIfUnchecked(e);
+ if (e instanceof RuntimeException re) throw re;
throw new RuntimeException(e);
}
long postBootstrap = System.nanoTime();
logger.info("Successfully created connection to {} after {} ms ({} ms spent in bootstraps)",
- MDC.of(LogKeys.HOST_PORT$.MODULE$, address),
- MDC.of(LogKeys.ELAPSED_TIME$.MODULE$, (postBootstrap - preConnect) / 1000000),
- MDC.of(LogKeys.BOOTSTRAP_TIME$.MODULE$, (postBootstrap - preBootstrap) / 1000000));
+ MDC.of(LogKeys.HOST_PORT, address),
+ MDC.of(LogKeys.ELAPSED_TIME, (postBootstrap - preConnect) / 1000000),
+ MDC.of(LogKeys.BOOTSTRAP_TIME, (postBootstrap - preBootstrap) / 1000000));
return client;
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index be4cf4a58abeb..d27fa08d829bb 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -26,8 +26,6 @@
import com.google.common.annotations.VisibleForTesting;
import io.netty.channel.Channel;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
@@ -45,6 +43,7 @@
import org.apache.spark.network.server.MessageHandler;
import static org.apache.spark.network.util.NettyUtils.getRemoteAddress;
import org.apache.spark.network.util.TransportFrameDecoder;
+import org.apache.spark.util.Pair;
/**
* Handler that processes server responses, in response to requests issued from a
@@ -96,7 +95,7 @@ public void removeRpcRequest(long requestId) {
public void addStreamCallback(String streamId, StreamCallback callback) {
updateTimeOfLastRequest();
- streamCallbacks.offer(ImmutablePair.of(streamId, callback));
+ streamCallbacks.offer(Pair.of(streamId, callback));
}
@VisibleForTesting
@@ -125,7 +124,7 @@ private void failOutstandingRequests(Throwable cause) {
}
for (Pair entry : streamCallbacks) {
try {
- entry.getValue().onFailure(entry.getKey(), cause);
+ entry.getRight().onFailure(entry.getLeft(), cause);
} catch (Exception e) {
logger.warn("StreamCallback.onFailure throws exception", e);
}
@@ -146,8 +145,8 @@ public void channelInactive() {
if (hasOutstandingRequests()) {
String remoteAddress = getRemoteAddress(channel);
logger.error("Still have {} requests outstanding when connection from {} is closed",
- MDC.of(LogKeys.COUNT$.MODULE$, numOutstandingRequests()),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
+ MDC.of(LogKeys.COUNT, numOutstandingRequests()),
+ MDC.of(LogKeys.HOST_PORT, remoteAddress));
failOutstandingRequests(new IOException("Connection from " + remoteAddress + " closed"));
}
}
@@ -157,8 +156,8 @@ public void exceptionCaught(Throwable cause) {
if (hasOutstandingRequests()) {
String remoteAddress = getRemoteAddress(channel);
logger.error("Still have {} requests outstanding when connection from {} is closed",
- MDC.of(LogKeys.COUNT$.MODULE$, numOutstandingRequests()),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
+ MDC.of(LogKeys.COUNT, numOutstandingRequests()),
+ MDC.of(LogKeys.HOST_PORT, remoteAddress));
failOutstandingRequests(cause);
}
}
@@ -169,8 +168,8 @@ public void handle(ResponseMessage message) throws Exception {
ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
if (listener == null) {
logger.warn("Ignoring response for block {} from {} since it is not outstanding",
- MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, resp.streamChunkId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
+ MDC.of(LogKeys.STREAM_CHUNK_ID, resp.streamChunkId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
resp.body().release();
} else {
outstandingFetches.remove(resp.streamChunkId);
@@ -181,9 +180,9 @@ public void handle(ResponseMessage message) throws Exception {
ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
if (listener == null) {
logger.warn("Ignoring response for block {} from {} ({}) since it is not outstanding",
- MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, resp.streamChunkId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
- MDC.of(LogKeys.ERROR$.MODULE$, resp.errorString));
+ MDC.of(LogKeys.STREAM_CHUNK_ID, resp.streamChunkId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)),
+ MDC.of(LogKeys.ERROR, resp.errorString));
} else {
outstandingFetches.remove(resp.streamChunkId);
listener.onFailure(resp.streamChunkId.chunkIndex(), new ChunkFetchFailureException(
@@ -193,9 +192,9 @@ public void handle(ResponseMessage message) throws Exception {
RpcResponseCallback listener = (RpcResponseCallback) outstandingRpcs.get(resp.requestId);
if (listener == null) {
logger.warn("Ignoring response for RPC {} from {} ({} bytes) since it is not outstanding",
- MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
- MDC.of(LogKeys.RESPONSE_BODY_SIZE$.MODULE$, resp.body().size()));
+ MDC.of(LogKeys.REQUEST_ID, resp.requestId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)),
+ MDC.of(LogKeys.RESPONSE_BODY_SIZE, resp.body().size()));
resp.body().release();
} else {
outstandingRpcs.remove(resp.requestId);
@@ -209,9 +208,9 @@ public void handle(ResponseMessage message) throws Exception {
BaseResponseCallback listener = outstandingRpcs.get(resp.requestId);
if (listener == null) {
logger.warn("Ignoring response for RPC {} from {} ({}) since it is not outstanding",
- MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
- MDC.of(LogKeys.ERROR$.MODULE$, resp.errorString));
+ MDC.of(LogKeys.REQUEST_ID, resp.requestId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)),
+ MDC.of(LogKeys.ERROR, resp.errorString));
} else {
outstandingRpcs.remove(resp.requestId);
listener.onFailure(new RuntimeException(resp.errorString));
@@ -223,9 +222,9 @@ public void handle(ResponseMessage message) throws Exception {
if (listener == null) {
logger.warn("Ignoring response for MergedBlockMetaRequest {} from {} ({} bytes) since "
+ "it is not outstanding",
- MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
- MDC.of(LogKeys.RESPONSE_BODY_SIZE$.MODULE$, resp.body().size()));
+ MDC.of(LogKeys.REQUEST_ID, resp.requestId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)),
+ MDC.of(LogKeys.RESPONSE_BODY_SIZE, resp.body().size()));
} else {
outstandingRpcs.remove(resp.requestId);
listener.onSuccess(resp.getNumChunks(), resp.body());
@@ -236,7 +235,7 @@ public void handle(ResponseMessage message) throws Exception {
} else if (message instanceof StreamResponse resp) {
Pair entry = streamCallbacks.poll();
if (entry != null) {
- StreamCallback callback = entry.getValue();
+ StreamCallback callback = entry.getRight();
if (resp.byteCount > 0) {
StreamInterceptor interceptor = new StreamInterceptor<>(
this, resp.streamId, resp.byteCount, callback);
@@ -262,7 +261,7 @@ public void handle(ResponseMessage message) throws Exception {
} else if (message instanceof StreamFailure resp) {
Pair entry = streamCallbacks.poll();
if (entry != null) {
- StreamCallback callback = entry.getValue();
+ StreamCallback callback = entry.getRight();
try {
callback.onFailure(resp.streamId, new RuntimeException(resp.error));
} catch (IOException ioe) {
@@ -270,7 +269,7 @@ public void handle(ResponseMessage message) throws Exception {
}
} else {
logger.warn("Stream failure with unknown callback: {}",
- MDC.of(LogKeys.ERROR$.MODULE$, resp.error));
+ MDC.of(LogKeys.ERROR, resp.error));
}
} else {
throw new IllegalStateException("Unknown response type: " + message.type());
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
index 8449a774a404a..f02fbc3aa26c7 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
@@ -21,10 +21,10 @@
import java.io.Closeable;
import java.security.GeneralSecurityException;
import java.util.Arrays;
+import java.util.Objects;
import java.util.Properties;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import com.google.common.primitives.Bytes;
import com.google.crypto.tink.subtle.AesGcmJce;
import com.google.crypto.tink.subtle.Hkdf;
@@ -33,6 +33,8 @@
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import static java.nio.charset.StandardCharsets.UTF_8;
+
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.TransportConf;
/**
@@ -61,10 +63,8 @@ class AuthEngine implements Closeable {
private TransportCipher sessionCipher;
AuthEngine(String appId, String preSharedSecret, TransportConf conf) {
- Preconditions.checkNotNull(appId);
- Preconditions.checkNotNull(preSharedSecret);
- this.appId = appId;
- this.preSharedSecret = preSharedSecret.getBytes(UTF_8);
+ this.appId = Objects.requireNonNull(appId);
+ this.preSharedSecret = Objects.requireNonNull(preSharedSecret).getBytes(UTF_8);
this.conf = conf;
this.cryptoConf = conf.cryptoConf();
// This is for backward compatibility with version 1.0 of this protocol,
@@ -126,7 +126,7 @@ private AuthMessage encryptEphemeralPublicKey(
private byte[] decryptEphemeralPublicKey(
AuthMessage encryptedPublicKey,
byte[] transcript) throws GeneralSecurityException {
- Preconditions.checkArgument(appId.equals(encryptedPublicKey.appId()));
+ JavaUtils.checkArgument(appId.equals(encryptedPublicKey.appId()), "appID is different.");
// Mix in the app ID, salt, and transcript into HKDF and use it as AES-GCM AAD
byte[] aadState = Bytes.concat(appId.getBytes(UTF_8), encryptedPublicKey.salt(), transcript);
// Use HKDF to derive an AES_GCM key from the pre-shared key, non-secret salt, and AAD state
@@ -162,7 +162,7 @@ AuthMessage challenge() throws GeneralSecurityException {
* @return An encrypted server ephemeral public key to be sent to the client.
*/
AuthMessage response(AuthMessage encryptedClientPublicKey) throws GeneralSecurityException {
- Preconditions.checkArgument(appId.equals(encryptedClientPublicKey.appId()));
+ JavaUtils.checkArgument(appId.equals(encryptedClientPublicKey.appId()), "appId is different.");
// Compute a shared secret given the client public key and the server private key
byte[] clientPublicKey =
decryptEphemeralPublicKey(encryptedClientPublicKey, EMPTY_TRANSCRIPT);
@@ -190,8 +190,7 @@ AuthMessage response(AuthMessage encryptedClientPublicKey) throws GeneralSecurit
*/
void deriveSessionCipher(AuthMessage encryptedClientPublicKey,
AuthMessage encryptedServerPublicKey) throws GeneralSecurityException {
- Preconditions.checkArgument(appId.equals(encryptedClientPublicKey.appId()));
- Preconditions.checkArgument(appId.equals(encryptedServerPublicKey.appId()));
+ JavaUtils.checkArgument(appId.equals(encryptedClientPublicKey.appId()), "appId is different.");
// Compute a shared secret given the server public key and the client private key,
// mixing in the protocol transcript.
byte[] serverPublicKey = decryptEphemeralPublicKey(
@@ -252,7 +251,7 @@ private byte[] getTranscript(AuthMessage... encryptedPublicKeys) {
}
TransportCipher sessionCipher() {
- Preconditions.checkState(sessionCipher != null);
+ JavaUtils.checkState(sessionCipher != null, "sessionCipher is null.");
return sessionCipher;
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
index 087e3d21e22bb..8ce4680f32437 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
@@ -20,8 +20,6 @@
import java.nio.ByteBuffer;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.Channel;
@@ -36,6 +34,7 @@
import org.apache.spark.network.sasl.SaslRpcHandler;
import org.apache.spark.network.server.AbstractAuthRpcHandler;
import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.TransportConf;
/**
@@ -93,7 +92,7 @@ protected boolean doAuthChallenge(
} catch (RuntimeException e) {
if (conf.saslFallback()) {
LOG.warn("Failed to parse new auth challenge, reverting to SASL for client {}.",
- MDC.of(LogKeys.HOST_PORT$.MODULE$, channel.remoteAddress()));
+ MDC.of(LogKeys.HOST_PORT, channel.remoteAddress()));
saslHandler = new SaslRpcHandler(conf, channel, null, secretKeyHolder);
message.position(position);
message.limit(limit);
@@ -111,7 +110,7 @@ protected boolean doAuthChallenge(
AuthEngine engine = null;
try {
String secret = secretKeyHolder.getSecretKey(challenge.appId());
- Preconditions.checkState(secret != null,
+ JavaUtils.checkState(secret != null,
"Trying to authenticate non-registered app %s.", challenge.appId());
LOG.debug("Authenticating challenge for app {}.", challenge.appId());
engine = new AuthEngine(challenge.appId(), secret, conf);
@@ -132,7 +131,7 @@ protected boolean doAuthChallenge(
try {
engine.close();
} catch (Exception e) {
- Throwables.throwIfUnchecked(e);
+ if (e instanceof RuntimeException re) throw re;
throw new RuntimeException(e);
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
index 85b893751b39c..de7d1ae5753d9 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
@@ -27,7 +27,6 @@
import javax.crypto.spec.IvParameterSpec;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.*;
@@ -37,6 +36,7 @@
import org.apache.spark.network.util.AbstractFileRegion;
import org.apache.spark.network.util.ByteArrayReadableChannel;
import org.apache.spark.network.util.ByteArrayWritableChannel;
+import org.apache.spark.network.util.JavaUtils;
/**
* Cipher for encryption and decryption.
@@ -239,7 +239,7 @@ static class EncryptedMessage extends AbstractFileRegion {
Object msg,
ByteArrayWritableChannel byteEncChannel,
ByteArrayWritableChannel byteRawChannel) {
- Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
+ JavaUtils.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
"Unrecognized message type: %s", msg.getClass().getName());
this.handler = handler;
this.isByteBuf = msg instanceof ByteBuf;
@@ -304,7 +304,7 @@ public boolean release(int decrement) {
@Override
public long transferTo(WritableByteChannel target, long position) throws IOException {
- Preconditions.checkArgument(position == transferred(), "Invalid position.");
+ JavaUtils.checkArgument(position == transferred(), "Invalid position.");
if (transferred == count) {
return 0;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
index c3540838bef09..e1cf22a612ea4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
@@ -18,15 +18,16 @@
package org.apache.spark.network.crypto;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import com.google.common.primitives.Longs;
import com.google.crypto.tink.subtle.*;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.*;
import io.netty.util.ReferenceCounted;
+
import org.apache.spark.network.util.AbstractFileRegion;
import org.apache.spark.network.util.ByteBufferWriteableChannel;
+import org.apache.spark.network.util.JavaUtils;
import javax.crypto.spec.SecretKeySpec;
import java.io.IOException;
@@ -118,7 +119,7 @@ static class GcmEncryptedMessage extends AbstractFileRegion {
Object plaintextMessage,
ByteBuffer plaintextBuffer,
ByteBuffer ciphertextBuffer) throws GeneralSecurityException {
- Preconditions.checkArgument(
+ JavaUtils.checkArgument(
plaintextMessage instanceof ByteBuf || plaintextMessage instanceof FileRegion,
"Unrecognized message type: %s", plaintextMessage.getClass().getName());
this.plaintextMessage = plaintextMessage;
@@ -221,10 +222,12 @@ public long transferTo(WritableByteChannel target, long position) throws IOExcep
int readLimit =
(int) Math.min(readableBytes, plaintextBuffer.remaining());
if (plaintextMessage instanceof ByteBuf byteBuf) {
- Preconditions.checkState(0 == plaintextBuffer.position());
+ JavaUtils.checkState(0 == plaintextBuffer.position(),
+ "plaintextBuffer.position is not 0");
plaintextBuffer.limit(readLimit);
byteBuf.readBytes(plaintextBuffer);
- Preconditions.checkState(readLimit == plaintextBuffer.position());
+ JavaUtils.checkState(readLimit == plaintextBuffer.position(),
+ "plaintextBuffer.position should be equal to readLimit.");
} else if (plaintextMessage instanceof FileRegion fileRegion) {
ByteBufferWriteableChannel plaintextChannel =
new ByteBufferWriteableChannel(plaintextBuffer);
@@ -347,7 +350,7 @@ private boolean initalizeDecrypter(ByteBuf ciphertextNettyBuf)
@Override
public void channelRead(ChannelHandlerContext ctx, Object ciphertextMessage)
throws GeneralSecurityException {
- Preconditions.checkArgument(ciphertextMessage instanceof ByteBuf,
+ JavaUtils.checkArgument(ciphertextMessage instanceof ByteBuf,
"Unrecognized message type: %s",
ciphertextMessage.getClass().getName());
ByteBuf ciphertextNettyBuf = (ByteBuf) ciphertextMessage;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java
index 2924218c2f08b..1170fd3f1ab33 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java
@@ -17,7 +17,7 @@
package org.apache.spark.network.protocol;
-import com.google.common.base.Objects;
+import java.util.Objects;
import org.apache.spark.network.buffer.ManagedBuffer;
@@ -48,7 +48,7 @@ public boolean isBodyInFrame() {
}
protected boolean equals(AbstractMessage other) {
- return isBodyInFrame == other.isBodyInFrame && Objects.equal(body, other.body);
+ return isBodyInFrame == other.isBodyInFrame && Objects.equals(body, other.body);
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
index cbad4c61b9b4a..736d8e6f5eea2 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* Response to {@link ChunkFetchRequest} when there is an error fetching the chunk.
@@ -70,9 +68,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamChunkId", streamChunkId)
- .append("errorString", errorString)
- .toString();
+ return "ChunkFetchFailure[streamChunkId=" + streamChunkId + ",errorString=" + errorString + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
index 2865388b3297c..cc042fdf76b77 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java
@@ -18,8 +18,6 @@
package org.apache.spark.network.protocol;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* Request to fetch a sequence of a single chunk of a stream. This will correspond to a single
@@ -64,8 +62,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamChunkId", streamChunkId)
- .toString();
+ return "ChunkFetchRequest[streamChunkId=" + streamChunkId + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
index aa89b2062f626..948190e7a2d53 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -83,9 +81,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamChunkId", streamChunkId)
- .append("buffer", body())
- .toString();
+ return "ChunkFetchSuccess[streamChunkId=" + streamChunkId + ",body=" + body() + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java
index 321ac13881c2a..84917eca17190 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java
@@ -21,7 +21,6 @@
import java.io.InputStream;
import javax.annotation.Nullable;
-import com.google.common.base.Preconditions;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.channel.ChannelHandlerContext;
@@ -29,6 +28,7 @@
import io.netty.handler.stream.ChunkedInput;
import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.util.JavaUtils;
/**
* A wrapper message that holds two separate pieces (a header and a body).
@@ -60,7 +60,7 @@ public class EncryptedMessageWithHeader implements ChunkedInput {
public EncryptedMessageWithHeader(
@Nullable ManagedBuffer managedBuffer, ByteBuf header, Object body, long bodyLength) {
- Preconditions.checkArgument(body instanceof InputStream || body instanceof ChunkedStream,
+ JavaUtils.checkArgument(body instanceof InputStream || body instanceof ChunkedStream,
"Body must be an InputStream or a ChunkedStream.");
this.managedBuffer = managedBuffer;
this.header = header;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java
index 3723730ebc06c..0b1476664f651 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java
@@ -17,10 +17,9 @@
package org.apache.spark.network.protocol;
-import com.google.common.base.Objects;
+import java.util.Objects;
+
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* Request to find the meta information for the specified merged block. The meta information
@@ -79,7 +78,7 @@ public static MergedBlockMetaRequest decode(ByteBuf buf) {
@Override
public int hashCode() {
- return Objects.hashCode(requestId, appId, shuffleId, shuffleMergeId, reduceId);
+ return Objects.hash(requestId, appId, shuffleId, shuffleMergeId, reduceId);
}
@Override
@@ -87,19 +86,14 @@ public boolean equals(Object other) {
if (other instanceof MergedBlockMetaRequest o) {
return requestId == o.requestId && shuffleId == o.shuffleId &&
shuffleMergeId == o.shuffleMergeId && reduceId == o.reduceId &&
- Objects.equal(appId, o.appId);
+ Objects.equals(appId, o.appId);
}
return false;
}
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("requestId", requestId)
- .append("appId", appId)
- .append("shuffleId", shuffleId)
- .append("shuffleMergeId", shuffleMergeId)
- .append("reduceId", reduceId)
- .toString();
+ return "MergedBlockMetaRequest[requestId=" + requestId + ",appId=" + appId + ",shuffleId=" +
+ shuffleId + ",shuffleMergeId=" + shuffleMergeId + ",reduceId=" + reduceId + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java
index d2edaf4532e11..255174e34600c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java
@@ -17,10 +17,9 @@
package org.apache.spark.network.protocol;
-import com.google.common.base.Objects;
+import java.util.Objects;
+
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -51,13 +50,12 @@ public Type type() {
@Override
public int hashCode() {
- return Objects.hashCode(requestId, numChunks);
+ return Objects.hash(requestId, numChunks);
}
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("requestId", requestId).append("numChunks", numChunks).toString();
+ return "MergedBlockMetaSuccess[requestId=" + requestId + ",numChunks=" + numChunks + "]";
}
@Override
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
index ab20fb908eb42..3f23c17939e6c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
@@ -66,8 +66,8 @@ public void encode(ChannelHandlerContext ctx, Message in, List out) thro
// Re-encode this message as a failure response.
String error = e.getMessage() != null ? e.getMessage() : "null";
logger.error("Error processing {} for client {}", e,
- MDC.of(LogKeys.MESSAGE$.MODULE$, in),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, ctx.channel().remoteAddress()));
+ MDC.of(LogKeys.MESSAGE, in),
+ MDC.of(LogKeys.HOST_PORT, ctx.channel().remoteAddress()));
encode(ctx, resp.createFailureResponse(error), out);
} else {
throw e;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
index e8eb83e7577bf..993ce2381caa5 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
@@ -22,13 +22,13 @@
import java.nio.channels.WritableByteChannel;
import javax.annotation.Nullable;
-import com.google.common.base.Preconditions;
import io.netty.buffer.ByteBuf;
import io.netty.channel.FileRegion;
import io.netty.util.ReferenceCountUtil;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.util.AbstractFileRegion;
+import org.apache.spark.network.util.JavaUtils;
/**
* A wrapper message that holds two separate pieces (a header and a body).
@@ -72,7 +72,7 @@ public class MessageWithHeader extends AbstractFileRegion {
ByteBuf header,
Object body,
long bodyLength) {
- Preconditions.checkArgument(body instanceof ByteBuf || body instanceof FileRegion,
+ JavaUtils.checkArgument(body instanceof ByteBuf || body instanceof FileRegion,
"Body must be a ByteBuf or a FileRegion.");
this.managedBuffer = managedBuffer;
this.header = header;
@@ -105,7 +105,7 @@ public long transferred() {
*/
@Override
public long transferTo(final WritableByteChannel target, final long position) throws IOException {
- Preconditions.checkArgument(position == totalBytesTransferred, "Invalid position.");
+ JavaUtils.checkArgument(position == totalBytesTransferred, "Invalid position.");
// Bytes written for header in this call.
long writtenHeader = 0;
if (header.readableBytes() > 0) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
index 91c818f3612a9..de1f91bc8d318 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -74,8 +72,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("body", body())
- .toString();
+ return "OneWayMessage[body=" + body() + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
index 02a45d68c650e..f48264a494f0e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/** Response to {@link RpcRequest} for a failed RPC. */
public final class RpcFailure extends AbstractMessage implements ResponseMessage {
@@ -68,9 +66,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("requestId", requestId)
- .append("errorString", errorString)
- .toString();
+ return "RpcFailure[requestId=" + requestId + ",errorString=" + errorString + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
index a7dbe1283b314..2619b176e331e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -80,9 +78,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("requestId", requestId)
- .append("body", body())
- .toString();
+ return "RpcRequest[requestId=" + requestId + ",body=" + body() + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
index 85709e36f83ee..a9805bcf686b4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -80,9 +78,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("requestId", requestId)
- .append("body", body())
- .toString();
+ return "RpcResponse[requestId=" + requestId + ",body=" + body() + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java
index abe6ccca7bfd6..083e45b3d6bdb 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java
@@ -71,8 +71,8 @@ public void encode(ChannelHandlerContext ctx, Message in, List out) thro
// Re-encode this message as a failure response.
String error = e.getMessage() != null ? e.getMessage() : "null";
logger.error("Error processing {} for client {}", e,
- MDC.of(LogKeys.MESSAGE$.MODULE$, in),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, ctx.channel().remoteAddress()));
+ MDC.of(LogKeys.MESSAGE, in),
+ MDC.of(LogKeys.HOST_PORT, ctx.channel().remoteAddress()));
encode(ctx, resp.createFailureResponse(error), out);
} else {
throw e;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
index c3b715009dffe..61aae3e36eceb 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* Encapsulates a request for a particular chunk of a stream.
@@ -61,9 +59,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamId", streamId)
- .append("chunkIndex", chunkIndex)
- .toString();
+ return "StreamChunkId[streamId=" + streamId + ",chunkIndex=" + chunkIndex + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
index 9a7bf2f65af3a..50cc25a4919a5 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* Message indicating an error when transferring a stream.
@@ -70,10 +68,7 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamId", streamId)
- .append("error", error)
- .toString();
+ return "StreamFailure[streamId=" + streamId + ",error=" + error + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
index 5906b4d380d6e..45ca2578b01a4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
/**
* Request to stream data from the remote end.
@@ -69,9 +67,7 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamId", streamId)
- .toString();
+ return "StreamRequest[streamId=" + streamId + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
index 0c0aa5c9a635b..d7c304e5c5b34 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.buffer.ManagedBuffer;
@@ -83,11 +81,8 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamId", streamId)
- .append("byteCount", byteCount)
- .append("body", body())
- .toString();
+ return "StreamResponse[streamId=" + streamId + ",byteCount=" + byteCount +
+ ",body=" + body() + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
index 4722f39dfa9db..09baaf60c3f92 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java
@@ -21,8 +21,6 @@
import java.nio.ByteBuffer;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NettyManagedBuffer;
@@ -99,9 +97,6 @@ public boolean equals(Object other) {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("requestId", requestId)
- .append("body", body())
- .toString();
+ return "UploadStream[requestId=" + requestId + ",body=" + body() + "]";
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java
index e1275689ae6a0..1cdb951d2d04e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java
@@ -23,7 +23,6 @@
import java.util.List;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.Channel;
@@ -35,6 +34,7 @@
import org.apache.spark.network.util.AbstractFileRegion;
import org.apache.spark.network.util.ByteArrayWritableChannel;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.NettyUtils;
/**
@@ -152,7 +152,7 @@ static class EncryptedMessage extends AbstractFileRegion {
private long transferred;
EncryptedMessage(SaslEncryptionBackend backend, Object msg, int maxOutboundBlockSize) {
- Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
+ JavaUtils.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
"Unrecognized message type: %s", msg.getClass().getName());
this.backend = backend;
this.isByteBuf = msg instanceof ByteBuf;
@@ -241,7 +241,7 @@ public boolean release(int decrement) {
public long transferTo(final WritableByteChannel target, final long position)
throws IOException {
- Preconditions.checkArgument(position == transferred(), "Invalid position.");
+ JavaUtils.checkArgument(position == transferred(), "Invalid position.");
long reportedWritten = 0L;
long actuallyWritten = 0L;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
index f32fd5145c7c5..24e01c924ef3a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
@@ -29,8 +29,8 @@
import javax.security.sasl.SaslServer;
import java.nio.charset.StandardCharsets;
import java.util.Map;
+import java.util.Objects;
-import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
@@ -182,13 +182,13 @@ public void handle(Callback[] callbacks) throws UnsupportedCallbackException {
/* Encode a byte[] identifier as a Base64-encoded string. */
public static String encodeIdentifier(String identifier) {
- Preconditions.checkNotNull(identifier, "User cannot be null if SASL is enabled");
+ Objects.requireNonNull(identifier, "User cannot be null if SASL is enabled");
return getBase64EncodedString(identifier);
}
/** Encode a password as a base64-encoded char[] array. */
public static char[] encodePassword(String password) {
- Preconditions.checkNotNull(password, "Password cannot be null if SASL is enabled");
+ Objects.requireNonNull(password, "Password cannot be null if SASL is enabled");
return getBase64EncodedString(password).toCharArray();
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java b/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java
index f60a74670d149..a0e9305265385 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java
@@ -18,8 +18,9 @@
package org.apache.spark.network.server;
import java.nio.ByteBuffer;
+import java.util.Objects;
-import com.google.common.base.Preconditions;
+import org.apache.spark.network.util.JavaUtils;
/**
* A special RuntimeException thrown when shuffle service experiences a non-fatal failure
@@ -101,14 +102,12 @@ public synchronized Throwable fillInStackTrace() {
public ByteBuffer getResponse() {
// Ensure we do not invoke this method if response is not set
- Preconditions.checkNotNull(response);
- return response;
+ return Objects.requireNonNull(response);
}
public ReturnCode getReturnCode() {
// Ensure we do not invoke this method if returnCode is not set
- Preconditions.checkNotNull(returnCode);
- return returnCode;
+ return Objects.requireNonNull(returnCode);
}
public enum ReturnCode {
@@ -171,7 +170,7 @@ public static boolean shouldNotRetryErrorCode(ReturnCode returnCode) {
}
public static String getErrorMsg(String blockId, ReturnCode errorCode) {
- Preconditions.checkArgument(errorCode != ReturnCode.SUCCESS);
+ JavaUtils.checkArgument(errorCode != ReturnCode.SUCCESS, "errorCode should not be SUCCESS.");
return "Block " + blockId + errorCode.errorMsgSuffix;
}
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
index cc0bed7ed5b6d..c7d4d671dec7d 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
@@ -19,7 +19,6 @@
import java.net.SocketAddress;
-import com.google.common.base.Throwables;
import io.netty.channel.Channel;
import io.netty.channel.ChannelFuture;
import io.netty.channel.ChannelFutureListener;
@@ -36,6 +35,7 @@
import org.apache.spark.network.protocol.ChunkFetchRequest;
import org.apache.spark.network.protocol.ChunkFetchSuccess;
import org.apache.spark.network.protocol.Encodable;
+import org.apache.spark.network.util.JavaUtils;
import static org.apache.spark.network.util.NettyUtils.*;
@@ -74,7 +74,7 @@ public ChunkFetchRequestHandler(
@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
logger.warn("Exception in connection from {}", cause,
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(ctx.channel())));
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(ctx.channel())));
ctx.close();
}
@@ -96,8 +96,8 @@ public void processFetchRequest(
long chunksBeingTransferred = streamManager.chunksBeingTransferred();
if (chunksBeingTransferred >= maxChunksBeingTransferred) {
logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
- MDC.of(LogKeys.NUM_CHUNKS$.MODULE$, chunksBeingTransferred),
- MDC.of(LogKeys.MAX_NUM_CHUNKS$.MODULE$, maxChunksBeingTransferred));
+ MDC.of(LogKeys.NUM_CHUNKS, chunksBeingTransferred),
+ MDC.of(LogKeys.MAX_NUM_CHUNKS, maxChunksBeingTransferred));
channel.close();
return;
}
@@ -111,10 +111,10 @@ public void processFetchRequest(
}
} catch (Exception e) {
logger.error("Error opening block {} for request from {}", e,
- MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, msg.streamChunkId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
+ MDC.of(LogKeys.STREAM_CHUNK_ID, msg.streamChunkId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
respond(channel, new ChunkFetchFailure(msg.streamChunkId,
- Throwables.getStackTraceAsString(e)));
+ JavaUtils.stackTraceToString(e)));
return;
}
@@ -153,8 +153,8 @@ private ChannelFuture respond(
} else {
logger.error("Error sending result {} to {}; closing connection",
future.cause(),
- MDC.of(LogKeys.RESULT$.MODULE$, result),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
+ MDC.of(LogKeys.RESULT, result),
+ MDC.of(LogKeys.HOST_PORT, remoteAddress));
channel.close();
}
});
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index f322293782dee..cb53d565e7e87 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -19,20 +19,20 @@
import java.util.Iterator;
import java.util.Map;
+import java.util.Objects;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import io.netty.channel.Channel;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.client.TransportClient;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.util.Pair;
/**
* StreamManager which allows registration of an Iterator<ManagedBuffer>, which are
@@ -72,7 +72,7 @@ private static class StreamState {
Channel channel,
boolean isBufferMaterializedOnNext) {
this.appId = appId;
- this.buffers = Preconditions.checkNotNull(buffers);
+ this.buffers = Objects.requireNonNull(buffers);
this.associatedChannel = channel;
this.isBufferMaterializedOnNext = isBufferMaterializedOnNext;
}
@@ -127,7 +127,7 @@ public static Pair parseStreamChunkId(String streamChunkId) {
"Stream id and chunk index should be specified.";
long streamId = Long.valueOf(array[0]);
int chunkIndex = Integer.valueOf(array[1]);
- return ImmutablePair.of(streamId, chunkIndex);
+ return Pair.of(streamId, chunkIndex);
}
@Override
@@ -167,7 +167,7 @@ public void connectionTerminated(Channel channel) {
public void checkAuthorization(TransportClient client, long streamId) {
if (client.getClientId() != null) {
StreamState state = streams.get(streamId);
- Preconditions.checkArgument(state != null, "Unknown stream ID.");
+ JavaUtils.checkArgument(state != null, "Unknown stream ID.");
if (!client.getClientId().equals(state.appId)) {
throw new SecurityException(String.format(
"Client %s not authorized to read stream %d (app %s).",
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index 283f0f0a431fd..d0df24873cbce 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -88,7 +88,7 @@ public TransportClient getClient() {
@Override
public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
logger.warn("Exception in connection from {}", cause,
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(ctx.channel())));
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(ctx.channel())));
requestHandler.exceptionCaught(cause);
responseHandler.exceptionCaught(cause);
ctx.close();
@@ -168,9 +168,9 @@ public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exc
logger.error("Connection to {} has been quiet for {} ms while there are outstanding " +
"requests. Assuming connection is dead; please adjust" +
" spark.{}.io.connectionTimeout if this is wrong.",
- MDC.of(LogKeys.HOST_PORT$.MODULE$, address),
- MDC.of(LogKeys.TIMEOUT$.MODULE$, requestTimeoutNs / 1000 / 1000),
- MDC.of(LogKeys.MODULE_NAME$.MODULE$, transportContext.getConf().getModuleName()));
+ MDC.of(LogKeys.HOST_PORT, address),
+ MDC.of(LogKeys.TIMEOUT, requestTimeoutNs / 1000 / 1000),
+ MDC.of(LogKeys.MODULE_NAME, transportContext.getConf().getModuleName()));
client.timeOut();
ctx.close();
} else if (closeIdleConnections) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
index 2727051894b7a..464d4d9eb378f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
@@ -21,7 +21,6 @@
import java.net.SocketAddress;
import java.nio.ByteBuffer;
-import com.google.common.base.Throwables;
import io.netty.channel.Channel;
import io.netty.channel.ChannelFuture;
@@ -33,6 +32,7 @@
import org.apache.spark.network.buffer.NioManagedBuffer;
import org.apache.spark.network.client.*;
import org.apache.spark.network.protocol.*;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.TransportFrameDecoder;
import static org.apache.spark.network.util.NettyUtils.getRemoteAddress;
@@ -132,8 +132,8 @@ private void processStreamRequest(final StreamRequest req) {
long chunksBeingTransferred = streamManager.chunksBeingTransferred();
if (chunksBeingTransferred >= maxChunksBeingTransferred) {
logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
- MDC.of(LogKeys.NUM_CHUNKS$.MODULE$, chunksBeingTransferred),
- MDC.of(LogKeys.MAX_NUM_CHUNKS$.MODULE$, maxChunksBeingTransferred));
+ MDC.of(LogKeys.NUM_CHUNKS, chunksBeingTransferred),
+ MDC.of(LogKeys.MAX_NUM_CHUNKS, maxChunksBeingTransferred));
channel.close();
return;
}
@@ -143,9 +143,9 @@ private void processStreamRequest(final StreamRequest req) {
buf = streamManager.openStream(req.streamId);
} catch (Exception e) {
logger.error("Error opening stream {} for request from {}", e,
- MDC.of(LogKeys.STREAM_ID$.MODULE$, req.streamId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
- respond(new StreamFailure(req.streamId, Throwables.getStackTraceAsString(e)));
+ MDC.of(LogKeys.STREAM_ID, req.streamId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
+ respond(new StreamFailure(req.streamId, JavaUtils.stackTraceToString(e)));
return;
}
@@ -172,14 +172,14 @@ public void onSuccess(ByteBuffer response) {
@Override
public void onFailure(Throwable e) {
- respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+ respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e)));
}
});
} catch (Exception e) {
logger.error("Error while invoking RpcHandler#receive() on RPC id {} from {}", e,
- MDC.of(LogKeys.REQUEST_ID$.MODULE$, req.requestId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
- respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+ MDC.of(LogKeys.REQUEST_ID, req.requestId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
+ respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e)));
} finally {
req.body().release();
}
@@ -199,7 +199,7 @@ public void onSuccess(ByteBuffer response) {
@Override
public void onFailure(Throwable e) {
- respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+ respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e)));
}
};
TransportFrameDecoder frameDecoder = (TransportFrameDecoder)
@@ -264,9 +264,9 @@ public String getID() {
new NioManagedBuffer(blockPushNonFatalFailure.getResponse())));
} else {
logger.error("Error while invoking RpcHandler#receive() on RPC id {} from {}", e,
- MDC.of(LogKeys.REQUEST_ID$.MODULE$, req.requestId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
- respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+ MDC.of(LogKeys.REQUEST_ID, req.requestId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
+ respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e)));
}
// We choose to totally fail the channel, rather than trying to recover as we do in other
// cases. We don't know how many bytes of the stream the client has already sent for the
@@ -282,7 +282,7 @@ private void processOneWayMessage(OneWayMessage req) {
rpcHandler.receive(reverseClient, req.body().nioByteBuffer());
} catch (Exception e) {
logger.error("Error while invoking RpcHandler#receive() for one-way message from {}.", e,
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
} finally {
req.body().release();
}
@@ -302,16 +302,16 @@ public void onSuccess(int numChunks, ManagedBuffer buffer) {
@Override
public void onFailure(Throwable e) {
logger.trace("Failed to send meta for {}", req);
- respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+ respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e)));
}
});
} catch (Exception e) {
logger.error("Error while invoking receiveMergeBlockMetaReq() for appId {} shuffleId {} "
- + "reduceId {} from {}", e, MDC.of(LogKeys.APP_ID$.MODULE$, req.appId),
- MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, req.shuffleId),
- MDC.of(LogKeys.REDUCE_ID$.MODULE$, req.reduceId),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
- respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
+ + "reduceId {} from {}", e, MDC.of(LogKeys.APP_ID, req.appId),
+ MDC.of(LogKeys.SHUFFLE_ID, req.shuffleId),
+ MDC.of(LogKeys.REDUCE_ID, req.reduceId),
+ MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)));
+ respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e)));
}
}
@@ -326,8 +326,8 @@ private ChannelFuture respond(Encodable result) {
logger.trace("Sent result {} to client {}", result, remoteAddress);
} else {
logger.error("Error sending result {} to {}; closing connection", future.cause(),
- MDC.of(LogKeys.RESULT$.MODULE$, result),
- MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
+ MDC.of(LogKeys.RESULT, result),
+ MDC.of(LogKeys.HOST_PORT, remoteAddress));
channel.close();
}
});
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index d1a19652f5649..be5d9e03c45c1 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -19,13 +19,13 @@
import java.io.Closeable;
import java.net.InetSocketAddress;
+import java.util.ArrayList;
import java.util.List;
+import java.util.Objects;
import java.util.concurrent.TimeUnit;
import com.codahale.metrics.Counter;
import com.codahale.metrics.MetricSet;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Lists;
import io.netty.bootstrap.ServerBootstrap;
import io.netty.buffer.PooledByteBufAllocator;
import io.netty.channel.ChannelFuture;
@@ -33,7 +33,6 @@
import io.netty.channel.ChannelOption;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.socket.SocketChannel;
-import org.apache.commons.lang3.SystemUtils;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
@@ -77,7 +76,7 @@ public TransportServer(
this.pooledAllocator = NettyUtils.createPooledByteBufAllocator(
conf.preferDirectBufs(), true /* allowCache */, conf.serverThreads());
}
- this.bootstraps = Lists.newArrayList(Preconditions.checkNotNull(bootstraps));
+ this.bootstraps = new ArrayList<>(Objects.requireNonNull(bootstraps));
boolean shouldClose = true;
try {
@@ -105,11 +104,13 @@ private void init(String hostToBind, int portToBind) {
EventLoopGroup workerGroup = NettyUtils.createEventLoop(ioMode, conf.serverThreads(),
conf.getModuleName() + "-server");
+ String name = System.getProperty("os.name");
+ boolean isNotWindows = !name.regionMatches(true, 0, "Windows", 0, 7);
bootstrap = new ServerBootstrap()
.group(bossGroup, workerGroup)
.channel(NettyUtils.getServerChannelClass(ioMode))
.option(ChannelOption.ALLOCATOR, pooledAllocator)
- .option(ChannelOption.SO_REUSEADDR, !SystemUtils.IS_OS_WINDOWS)
+ .option(ChannelOption.SO_REUSEADDR, isNotWindows)
.childOption(ChannelOption.ALLOCATOR, pooledAllocator);
this.metrics = new NettyMemoryMetrics(
diff --git a/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java b/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java
index a2e42e3eb39f6..f4b245ca7b128 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java
@@ -20,6 +20,7 @@
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.file.Files;
import java.security.GeneralSecurityException;
import java.security.KeyStore;
import java.security.KeyStoreException;
@@ -41,8 +42,6 @@
import javax.net.ssl.TrustManagerFactory;
import javax.net.ssl.X509TrustManager;
-import com.google.common.io.Files;
-
import io.netty.buffer.ByteBufAllocator;
import io.netty.handler.ssl.OpenSsl;
import io.netty.handler.ssl.SslContext;
@@ -378,7 +377,7 @@ private static TrustManager[] trustStoreManagers(
private static TrustManager[] defaultTrustManagers(File trustStore, String trustStorePassword)
throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException {
- try (InputStream input = Files.asByteSource(trustStore).openStream()) {
+ try (InputStream input = Files.newInputStream(trustStore.toPath())) {
KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType());
char[] passwordCharacters = trustStorePassword != null?
trustStorePassword.toCharArray() : null;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java b/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java
index 6b208d95bbfbc..6ab401b9a0d5a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java
@@ -19,9 +19,18 @@
/**
* Selector for which form of low-level IO we should use.
- * NIO is always available, while EPOLL is only available on Linux.
- * AUTO is used to select EPOLL if it's available, or NIO otherwise.
*/
public enum IOMode {
- NIO, EPOLL
+ /**
+ * Java NIO (Selector), cross-platform portable
+ */
+ NIO,
+ /**
+ * Native EPOLL via JNI, Linux only
+ */
+ EPOLL,
+ /**
+ * Native KQUEUE via JNI, MacOS/BSD only
+ */
+ KQUEUE
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
index 391931961a474..ec3e032102e4f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
@@ -50,7 +50,7 @@ public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper map
tmpDb = JniDBFactory.factory.open(dbFile, options);
} catch (NativeDB.DBException e) {
if (e.isNotFound() || e.getMessage().contains(" does not exist ")) {
- logger.info("Creating state database at {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile));
+ logger.info("Creating state database at {}", MDC.of(LogKeys.PATH, dbFile));
options.createIfMissing(true);
try {
tmpDb = JniDBFactory.factory.open(dbFile, options);
@@ -61,16 +61,16 @@ public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper map
// the leveldb file seems to be corrupt somehow. Lets just blow it away and create a new
// one, so we can keep processing new apps
logger.error("error opening leveldb file {}. Creating new file, will not be able to " +
- "recover state for existing applications", e, MDC.of(LogKeys.PATH$.MODULE$, dbFile));
+ "recover state for existing applications", e, MDC.of(LogKeys.PATH, dbFile));
if (dbFile.isDirectory()) {
for (File f : dbFile.listFiles()) {
if (!f.delete()) {
- logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, f.getPath()));
+ logger.warn("error deleting {}", MDC.of(LogKeys.PATH, f.getPath()));
}
}
}
if (!dbFile.delete()) {
- logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile.getPath()));
+ logger.warn("error deleting {}", MDC.of(LogKeys.PATH, dbFile.getPath()));
}
options.createIfMissing(true);
try {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java b/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
index e6cf02a590e29..79cf0eb7c6153 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
@@ -21,8 +21,7 @@
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
-
-import com.google.common.base.Preconditions;
+import java.util.Objects;
/**
* Wraps a {@link InputStream}, limiting the number of bytes which can be read.
@@ -50,10 +49,9 @@ public LimitedInputStream(InputStream in, long limit) {
* @param closeWrappedStream whether to close {@code in} when {@link #close} is called
*/
public LimitedInputStream(InputStream in, long limit, boolean closeWrappedStream) {
- super(in);
+ super(Objects.requireNonNull(in));
this.closeWrappedStream = closeWrappedStream;
- Preconditions.checkNotNull(in);
- Preconditions.checkArgument(limit >= 0, "limit must be non-negative");
+ JavaUtils.checkArgument(limit >= 0, "limit must be non-negative");
left = limit;
}
@Override public int available() throws IOException {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
index 2dd1c8f2e4a7d..da4b3109bbe1e 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
@@ -26,6 +26,9 @@
import io.netty.channel.epoll.EpollEventLoopGroup;
import io.netty.channel.epoll.EpollServerSocketChannel;
import io.netty.channel.epoll.EpollSocketChannel;
+import io.netty.channel.kqueue.KQueueEventLoopGroup;
+import io.netty.channel.kqueue.KQueueServerSocketChannel;
+import io.netty.channel.kqueue.KQueueSocketChannel;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.nio.NioServerSocketChannel;
import io.netty.channel.socket.nio.NioSocketChannel;
@@ -68,6 +71,7 @@ public static EventLoopGroup createEventLoop(IOMode mode, int numThreads, String
return switch (mode) {
case NIO -> new NioEventLoopGroup(numThreads, threadFactory);
case EPOLL -> new EpollEventLoopGroup(numThreads, threadFactory);
+ case KQUEUE -> new KQueueEventLoopGroup(numThreads, threadFactory);
};
}
@@ -76,6 +80,7 @@ public static Class extends Channel> getClientChannelClass(IOMode mode) {
return switch (mode) {
case NIO -> NioSocketChannel.class;
case EPOLL -> EpollSocketChannel.class;
+ case KQUEUE -> KQueueSocketChannel.class;
};
}
@@ -84,6 +89,7 @@ public static Class extends ServerChannel> getServerChannelClass(IOMode mode)
return switch (mode) {
case NIO -> NioServerSocketChannel.class;
case EPOLL -> EpollServerSocketChannel.class;
+ case KQUEUE -> KQueueServerSocketChannel.class;
};
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
index 1753c124c9935..cea9207d3470a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
@@ -67,7 +67,7 @@ public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper
tmpDb = RocksDB.open(dbOptions, dbFile.toString());
} catch (RocksDBException e) {
if (e.getStatus().getCode() == Status.Code.NotFound) {
- logger.info("Creating state database at {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile));
+ logger.info("Creating state database at {}", MDC.of(LogKeys.PATH, dbFile));
dbOptions.setCreateIfMissing(true);
try {
tmpDb = RocksDB.open(dbOptions, dbFile.toString());
@@ -78,16 +78,16 @@ public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper
// the RocksDB file seems to be corrupt somehow. Let's just blow it away and create
// a new one, so we can keep processing new apps
logger.error("error opening rocksdb file {}. Creating new file, will not be able to " +
- "recover state for existing applications", e, MDC.of(LogKeys.PATH$.MODULE$, dbFile));
+ "recover state for existing applications", e, MDC.of(LogKeys.PATH, dbFile));
if (dbFile.isDirectory()) {
for (File f : Objects.requireNonNull(dbFile.listFiles())) {
if (!f.delete()) {
- logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, f.getPath()));
+ logger.warn("error deleting {}", MDC.of(LogKeys.PATH, f.getPath()));
}
}
}
if (!dbFile.delete()) {
- logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile.getPath()));
+ logger.warn("error deleting {}", MDC.of(LogKeys.PATH, dbFile.getPath()));
}
dbOptions.setCreateIfMissing(true);
try {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 822b8aa310a22..003e72edf29ee 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -21,8 +21,6 @@
import java.util.Locale;
import java.util.Properties;
import java.util.concurrent.TimeUnit;
-import com.google.common.base.Preconditions;
-import com.google.common.primitives.Ints;
import io.netty.util.NettyRuntime;
/**
@@ -171,7 +169,7 @@ public int ioRetryWaitTimeMs() {
* memory mapping has high overhead for blocks close to or below the page size of the OS.
*/
public int memoryMapBytes() {
- return Ints.checkedCast(JavaUtils.byteStringAsBytes(
+ return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes(
conf.get("spark.storage.memoryMapThreshold", "2m")));
}
@@ -248,7 +246,7 @@ public boolean saslEncryption() {
* Maximum number of bytes to be encrypted at a time when SASL encryption is used.
*/
public int maxSaslEncryptedBlockSize() {
- return Ints.checkedCast(JavaUtils.byteStringAsBytes(
+ return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes(
conf.get("spark.network.sasl.maxEncryptedBlockSize", "64k")));
}
@@ -263,7 +261,7 @@ public boolean saslServerAlwaysEncrypt() {
* When Secure (SSL/TLS) Shuffle is enabled, the Chunk size to use for shuffling files.
*/
public int sslShuffleChunkSize() {
- return Ints.checkedCast(JavaUtils.byteStringAsBytes(
+ return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes(
conf.get("spark.network.ssl.maxEncryptedBlockSize", "64k")));
}
@@ -504,7 +502,7 @@ public int finalizeShuffleMergeHandlerThreads() {
if (!this.getModuleName().equalsIgnoreCase("shuffle")) {
return 0;
}
- Preconditions.checkArgument(separateFinalizeShuffleMerge(),
+ JavaUtils.checkArgument(separateFinalizeShuffleMerge(),
"Please set spark.shuffle.server.finalizeShuffleMergeThreadsPercent to a positive value");
int finalizeShuffleMergeThreadsPercent =
Integer.parseInt(conf.get("spark.shuffle.server.finalizeShuffleMergeThreadsPercent"));
@@ -567,7 +565,7 @@ public String mergedShuffleFileManagerImpl() {
* service unnecessarily.
*/
public int minChunkSizeInMergedShuffleFile() {
- return Ints.checkedCast(JavaUtils.byteStringAsBytes(
+ return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes(
conf.get("spark.shuffle.push.server.minChunkSizeInMergedShuffleFile", "2m")));
}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
index cef0e415aa40a..6b490068507aa 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java
@@ -20,7 +20,6 @@
import java.util.LinkedList;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.CompositeByteBuf;
import io.netty.buffer.Unpooled;
@@ -145,9 +144,9 @@ private ByteBuf decodeNext() {
}
if (frameBuf == null) {
- Preconditions.checkArgument(frameSize < MAX_FRAME_SIZE,
+ JavaUtils.checkArgument(frameSize < MAX_FRAME_SIZE,
"Too large frame: %s", frameSize);
- Preconditions.checkArgument(frameSize > 0,
+ JavaUtils.checkArgument(frameSize > 0,
"Frame length should be positive: %s", frameSize);
frameRemainingBytes = (int) frameSize;
@@ -252,7 +251,7 @@ public void handlerRemoved(ChannelHandlerContext ctx) throws Exception {
}
public void setInterceptor(Interceptor interceptor) {
- Preconditions.checkState(this.interceptor == null, "Already have an interceptor.");
+ JavaUtils.checkState(this.interceptor == null, "Already have an interceptor.");
this.interceptor = interceptor;
}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
index 576a106934fda..75ccd8d5789d4 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java
@@ -30,7 +30,6 @@
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
-import com.google.common.collect.Sets;
import com.google.common.io.Closeables;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
@@ -188,7 +187,7 @@ public void onFailure(int chunkIndex, Throwable e) {
@Test
public void fetchBufferChunk() throws Exception {
FetchResult res = fetchChunks(Arrays.asList(BUFFER_CHUNK_INDEX));
- assertEquals(Sets.newHashSet(BUFFER_CHUNK_INDEX), res.successChunks);
+ assertEquals(Set.of(BUFFER_CHUNK_INDEX), res.successChunks);
assertTrue(res.failedChunks.isEmpty());
assertBufferListsEqual(Arrays.asList(bufferChunk), res.buffers);
res.releaseBuffers();
@@ -197,7 +196,7 @@ public void fetchBufferChunk() throws Exception {
@Test
public void fetchFileChunk() throws Exception {
FetchResult res = fetchChunks(Arrays.asList(FILE_CHUNK_INDEX));
- assertEquals(Sets.newHashSet(FILE_CHUNK_INDEX), res.successChunks);
+ assertEquals(Set.of(FILE_CHUNK_INDEX), res.successChunks);
assertTrue(res.failedChunks.isEmpty());
assertBufferListsEqual(Arrays.asList(fileChunk), res.buffers);
res.releaseBuffers();
@@ -207,14 +206,14 @@ public void fetchFileChunk() throws Exception {
public void fetchNonExistentChunk() throws Exception {
FetchResult res = fetchChunks(Arrays.asList(12345));
assertTrue(res.successChunks.isEmpty());
- assertEquals(Sets.newHashSet(12345), res.failedChunks);
+ assertEquals(Set.of(12345), res.failedChunks);
assertTrue(res.buffers.isEmpty());
}
@Test
public void fetchBothChunks() throws Exception {
FetchResult res = fetchChunks(Arrays.asList(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX));
- assertEquals(Sets.newHashSet(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX), res.successChunks);
+ assertEquals(Set.of(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX), res.successChunks);
assertTrue(res.failedChunks.isEmpty());
assertBufferListsEqual(Arrays.asList(bufferChunk, fileChunk), res.buffers);
res.releaseBuffers();
@@ -223,8 +222,8 @@ public void fetchBothChunks() throws Exception {
@Test
public void fetchChunkAndNonExistent() throws Exception {
FetchResult res = fetchChunks(Arrays.asList(BUFFER_CHUNK_INDEX, 12345));
- assertEquals(Sets.newHashSet(BUFFER_CHUNK_INDEX), res.successChunks);
- assertEquals(Sets.newHashSet(12345), res.failedChunks);
+ assertEquals(Set.of(BUFFER_CHUNK_INDEX), res.successChunks);
+ assertEquals(Set.of(12345), res.failedChunks);
assertBufferListsEqual(Arrays.asList(bufferChunk), res.buffers);
res.releaseBuffers();
}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
index 74dffd87dcf30..e9a4c355ebe48 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java
@@ -27,8 +27,6 @@
import static org.mockito.Mockito.*;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.client.TransportClient;
import org.apache.spark.network.protocol.*;
@@ -36,6 +34,7 @@
import org.apache.spark.network.server.NoOpRpcHandler;
import org.apache.spark.network.server.OneForOneStreamManager;
import org.apache.spark.network.server.RpcHandler;
+import org.apache.spark.util.Pair;
public class ChunkFetchRequestHandlerSuite {
@@ -54,7 +53,7 @@ public void handleChunkFetchRequest() throws Exception {
.thenAnswer(invocationOnMock0 -> {
Object response = invocationOnMock0.getArguments()[0];
ExtendedChannelPromise channelFuture = new ExtendedChannelPromise(channel);
- responseAndPromisePairs.add(ImmutablePair.of(response, channelFuture));
+ responseAndPromisePairs.add(Pair.of(response, channelFuture));
return channelFuture;
});
diff --git a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
index 8c1299ebcd836..500d91868bbbd 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java
@@ -19,7 +19,6 @@
import java.util.List;
-import com.google.common.primitives.Ints;
import io.netty.buffer.Unpooled;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.FileRegion;
@@ -44,6 +43,7 @@
import org.apache.spark.network.protocol.StreamRequest;
import org.apache.spark.network.protocol.StreamResponse;
import org.apache.spark.network.util.ByteArrayWritableChannel;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.NettyUtils;
public class ProtocolSuite {
@@ -115,7 +115,8 @@ private static class FileRegionEncoder extends MessageToMessageEncoder out)
throws Exception {
- ByteArrayWritableChannel channel = new ByteArrayWritableChannel(Ints.checkedCast(in.count()));
+ ByteArrayWritableChannel channel =
+ new ByteArrayWritableChannel(JavaUtils.checkedCast(in.count()));
while (in.transferred() < in.count()) {
in.transferTo(channel, in.transferred());
}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
index 40495d6912c91..e229e32e91717 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java
@@ -24,10 +24,6 @@
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
-import com.google.common.collect.Sets;
-import com.google.common.io.Files;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
@@ -41,6 +37,7 @@
import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.MapConfigProvider;
import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.util.Pair;
public class RpcIntegrationSuite {
static TransportConf conf;
@@ -248,14 +245,14 @@ public void onFailure(Throwable e) {
@Test
public void singleRPC() throws Exception {
RpcResult res = sendRPC("hello/Aaron");
- assertEquals(Sets.newHashSet("Hello, Aaron!"), res.successMessages);
+ assertEquals(Set.of("Hello, Aaron!"), res.successMessages);
assertTrue(res.errorMessages.isEmpty());
}
@Test
public void doubleRPC() throws Exception {
RpcResult res = sendRPC("hello/Aaron", "hello/Reynold");
- assertEquals(Sets.newHashSet("Hello, Aaron!", "Hello, Reynold!"), res.successMessages);
+ assertEquals(Set.of("Hello, Aaron!", "Hello, Reynold!"), res.successMessages);
assertTrue(res.errorMessages.isEmpty());
}
@@ -263,28 +260,28 @@ public void doubleRPC() throws Exception {
public void returnErrorRPC() throws Exception {
RpcResult res = sendRPC("return error/OK");
assertTrue(res.successMessages.isEmpty());
- assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK"));
+ assertErrorsContain(res.errorMessages, Set.of("Returned: OK"));
}
@Test
public void throwErrorRPC() throws Exception {
RpcResult res = sendRPC("throw error/uh-oh");
assertTrue(res.successMessages.isEmpty());
- assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: uh-oh"));
+ assertErrorsContain(res.errorMessages, Set.of("Thrown: uh-oh"));
}
@Test
public void doubleTrouble() throws Exception {
RpcResult res = sendRPC("return error/OK", "throw error/uh-oh");
assertTrue(res.successMessages.isEmpty());
- assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK", "Thrown: uh-oh"));
+ assertErrorsContain(res.errorMessages, Set.of("Returned: OK", "Thrown: uh-oh"));
}
@Test
public void sendSuccessAndFailure() throws Exception {
RpcResult res = sendRPC("hello/Bob", "throw error/the", "hello/Builder", "return error/!");
- assertEquals(Sets.newHashSet("Hello, Bob!", "Hello, Builder!"), res.successMessages);
- assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: the", "Returned: !"));
+ assertEquals(Set.of("Hello, Bob!", "Hello, Builder!"), res.successMessages);
+ assertErrorsContain(res.errorMessages, Set.of("Thrown: the", "Returned: !"));
}
@Test
@@ -311,7 +308,7 @@ public void sendRpcWithStreamOneAtATime() throws Exception {
for (String stream : StreamTestHelper.STREAMS) {
RpcResult res = sendRpcWithStream(stream);
assertTrue(res.errorMessages.isEmpty(), "there were error messages!" + res.errorMessages);
- assertEquals(Sets.newHashSet(stream), res.successMessages);
+ assertEquals(Set.of(stream), res.successMessages);
}
}
@@ -322,7 +319,7 @@ public void sendRpcWithStreamConcurrently() throws Exception {
streams[i] = StreamTestHelper.STREAMS[i % StreamTestHelper.STREAMS.length];
}
RpcResult res = sendRpcWithStream(streams);
- assertEquals(Sets.newHashSet(StreamTestHelper.STREAMS), res.successMessages);
+ assertEquals(Set.of(StreamTestHelper.STREAMS), res.successMessages);
assertTrue(res.errorMessages.isEmpty());
}
@@ -342,8 +339,8 @@ public void sendRpcWithStreamFailures() throws Exception {
RpcResult exceptionInOnComplete =
sendRpcWithStream("fail/exception-oncomplete/smallBuffer", "smallBuffer");
assertErrorsContain(exceptionInOnComplete.errorMessages,
- Sets.newHashSet("Failure post-processing"));
- assertEquals(Sets.newHashSet("smallBuffer"), exceptionInOnComplete.successMessages);
+ Set.of("Failure post-processing"));
+ assertEquals(Set.of("smallBuffer"), exceptionInOnComplete.successMessages);
}
private void assertErrorsContain(Set errors, Set contains) {
@@ -365,14 +362,14 @@ private void assertErrorAndClosed(RpcResult result, String expectedError) {
// We expect 1 additional error due to closed connection and here are possible keywords in the
// error message.
- Set possibleClosedErrors = Sets.newHashSet(
+ Set possibleClosedErrors = Set.of(
"closed",
"Connection reset",
"java.nio.channels.ClosedChannelException",
"io.netty.channel.StacklessClosedChannelException",
"java.io.IOException: Broken pipe"
);
- Set containsAndClosed = Sets.newHashSet(expectedError);
+ Set containsAndClosed = new HashSet<>(Set.of(expectedError));
containsAndClosed.addAll(possibleClosedErrors);
Pair, Set> r = checkErrorsContain(errors, containsAndClosed);
@@ -392,8 +389,8 @@ private void assertErrorAndClosed(RpcResult result, String expectedError) {
private Pair, Set> checkErrorsContain(
Set errors,
Set contains) {
- Set remainingErrors = Sets.newHashSet(errors);
- Set notFound = Sets.newHashSet();
+ Set remainingErrors = new HashSet<>(errors);
+ Set notFound = new HashSet<>();
for (String contain : contains) {
Iterator it = remainingErrors.iterator();
boolean foundMatch = false;
@@ -408,7 +405,7 @@ private Pair, Set> checkErrorsContain(
notFound.add(contain);
}
}
- return new ImmutablePair<>(remainingErrors, notFound);
+ return new Pair<>(remainingErrors, notFound);
}
private static class VerifyingStreamCallback implements StreamCallbackWithID {
@@ -431,7 +428,8 @@ private static class VerifyingStreamCallback implements StreamCallbackWithID {
void verify() throws IOException {
if (streamId.equals("file")) {
- assertTrue(Files.equal(testData.testFile, outFile), "File stream did not match.");
+ assertTrue(JavaUtils.contentEquals(testData.testFile, outFile),
+ "File stream did not match.");
} else {
byte[] result = ((ByteArrayOutputStream)out).toByteArray();
ByteBuffer srcBuffer = testData.srcBuffer(streamId);
diff --git a/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java b/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java
index 4f4637e302b94..496af96cb1cac 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java
@@ -29,7 +29,6 @@
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
-import com.google.common.io.Files;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
@@ -43,6 +42,7 @@
import org.apache.spark.network.server.RpcHandler;
import org.apache.spark.network.server.StreamManager;
import org.apache.spark.network.server.TransportServer;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.MapConfigProvider;
import org.apache.spark.network.util.TransportConf;
@@ -212,7 +212,8 @@ public void run() {
callback.waitForCompletion(timeoutMs);
if (srcBuffer == null) {
- assertTrue(Files.equal(testData.testFile, outFile), "File stream did not match.");
+ assertTrue(JavaUtils.contentEquals(testData.testFile, outFile),
+ "File stream did not match.");
} else {
ByteBuffer base;
synchronized (srcBuffer) {
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java b/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
index d1e93e3cb5845..828d995ba444b 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java
@@ -21,11 +21,11 @@
import java.io.InputStream;
import java.nio.ByteBuffer;
-import com.google.common.base.Preconditions;
import io.netty.buffer.Unpooled;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.buffer.NettyManagedBuffer;
+import org.apache.spark.network.util.JavaUtils;
/**
* A ManagedBuffer implementation that contains 0, 1, 2, 3, ..., (len-1).
@@ -38,7 +38,7 @@ public class TestManagedBuffer extends ManagedBuffer {
private NettyManagedBuffer underlying;
public TestManagedBuffer(int len) {
- Preconditions.checkArgument(len <= Byte.MAX_VALUE);
+ JavaUtils.checkArgument(len <= Byte.MAX_VALUE, "length exceeds limit " + Byte.MAX_VALUE);
this.len = len;
byte[] byteArray = new byte[len];
for (int i = 0; i < len; i ++) {
diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
index d643fb4f662e3..9433e274b507f 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java
@@ -28,8 +28,6 @@
import static org.junit.jupiter.api.Assertions.*;
import static org.mockito.Mockito.*;
-import org.apache.commons.lang3.tuple.ImmutablePair;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.client.RpcResponseCallback;
import org.apache.spark.network.client.TransportClient;
@@ -39,6 +37,7 @@
import org.apache.spark.network.server.RpcHandler;
import org.apache.spark.network.server.StreamManager;
import org.apache.spark.network.server.TransportRequestHandler;
+import org.apache.spark.util.Pair;
public class TransportRequestHandlerSuite {
@@ -53,7 +52,7 @@ public void handleStreamRequest() throws Exception {
.thenAnswer(invocationOnMock0 -> {
Object response = invocationOnMock0.getArguments()[0];
ExtendedChannelPromise channelFuture = new ExtendedChannelPromise(channel);
- responseAndPromisePairs.add(ImmutablePair.of(response, channelFuture));
+ responseAndPromisePairs.add(Pair.of(response, channelFuture));
return channelFuture;
});
@@ -145,7 +144,7 @@ public MergedBlockMetaReqHandler getMergedBlockMetaReqHandler() {
when(channel.writeAndFlush(any())).thenAnswer(invocationOnMock0 -> {
Object response = invocationOnMock0.getArguments()[0];
ExtendedChannelPromise channelFuture = new ExtendedChannelPromise(channel);
- responseAndPromisePairs.add(ImmutablePair.of(response, channelFuture));
+ responseAndPromisePairs.add(Pair.of(response, channelFuture));
return channelFuture;
});
diff --git a/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java b/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java
index b57f0be920c7b..58faea2cf2dda 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java
@@ -225,11 +225,12 @@ public void closeFactoryBeforeCreateClient() {
}
@Test
- public void fastFailConnectionInTimeWindow() {
+ public void fastFailConnectionInTimeWindow() throws IOException, InterruptedException {
TransportClientFactory factory = context.createClientFactory();
TransportServer server = context.createServer();
int unreachablePort = server.getPort();
server.close();
+ Thread.sleep(1000);
Assertions.assertThrows(IOException.class,
() -> factory.createClient(TestUtils.getLocalHost(), unreachablePort, true));
Assertions.assertThrows(IOException.class,
@@ -258,6 +259,7 @@ public void unlimitedConnectionAndCreationTimeouts() throws IOException, Interru
TransportServer server = ctx.createServer();
int unreachablePort = server.getPort();
JavaUtils.closeQuietly(server);
+ Thread.sleep(1000);
IOException exception = Assertions.assertThrows(IOException.class,
() -> factory.createClient(TestUtils.getLocalHost(), unreachablePort, true));
assertNotEquals(exception.getCause(), null);
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index 628de9e780337..904deabba5867 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -21,7 +21,6 @@
import java.security.GeneralSecurityException;
import java.util.Map;
-import com.google.common.collect.ImmutableMap;
import com.google.crypto.tink.subtle.Hex;
import org.apache.spark.network.util.*;
@@ -49,7 +48,7 @@ abstract class AuthEngineSuite {
static TransportConf getConf(int authEngineVerison, boolean useCtr) {
String authEngineVersion = (authEngineVerison == 1) ? "1" : "2";
String mode = useCtr ? "AES/CTR/NoPadding" : "AES/GCM/NoPadding";
- Map confMap = ImmutableMap.of(
+ Map confMap = Map.of(
"spark.network.crypto.enabled", "true",
"spark.network.crypto.authEngineVersion", authEngineVersion,
"spark.network.crypto.cipher", mode
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
index cb5929f7c65b4..8d1773316878b 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
@@ -22,7 +22,6 @@
import java.util.List;
import java.util.Map;
-import com.google.common.collect.ImmutableMap;
import io.netty.channel.Channel;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;
@@ -214,7 +213,7 @@ private static class AuthTestCtx {
}
AuthTestCtx(RpcHandler rpcHandler, String mode) throws Exception {
- Map testConf = ImmutableMap.of(
+ Map testConf = Map.of(
"spark.network.crypto.enabled", "true",
"spark.network.crypto.cipher", mode);
this.conf = new TransportConf("rpc", new MapConfigProvider(testConf));
diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java
index a3750ce11172b..41b84f3895876 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java
@@ -21,9 +21,9 @@
import java.io.File;
import java.io.FileOutputStream;
import java.nio.file.Files;
+import java.util.ArrayList;
import java.util.List;
-import com.google.common.collect.Lists;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.Unpooled;
@@ -65,7 +65,7 @@ public void testMergedBlocksMetaEncodeDecode() throws Exception {
MergedBlockMetaSuccess expectedMeta = new MergedBlockMetaSuccess(requestId, 2,
new FileSegmentManagedBuffer(conf, chunkMetaFile, 0, chunkMetaFile.length()));
- List out = Lists.newArrayList();
+ List out = new ArrayList<>();
ChannelHandlerContext context = mock(ChannelHandlerContext.class);
when(context.alloc()).thenReturn(ByteBufAllocator.DEFAULT);
diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
index bf0424a1506a3..4feaf5cef3f2b 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -23,6 +23,7 @@
import java.io.File;
import java.lang.reflect.Method;
import java.nio.ByteBuffer;
+import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -36,8 +37,6 @@
import javax.security.sasl.SaslException;
import com.google.common.collect.ImmutableMap;
-import com.google.common.io.ByteStreams;
-import com.google.common.io.Files;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.Channel;
@@ -221,7 +220,7 @@ public void testEncryptedMessageChunking() throws Exception {
byte[] data = new byte[8 * 1024];
new Random().nextBytes(data);
- Files.write(data, file);
+ Files.write(file.toPath(), data);
SaslEncryptionBackend backend = mock(SaslEncryptionBackend.class);
// It doesn't really matter what we return here, as long as it's not null.
@@ -245,7 +244,7 @@ public void testEncryptedMessageChunking() throws Exception {
@Test
public void testFileRegionEncryption() throws Exception {
- Map testConf = ImmutableMap.of(
+ Map testConf = Map.of(
"spark.network.sasl.maxEncryptedBlockSize", "1k");
AtomicReference response = new AtomicReference<>();
@@ -262,7 +261,7 @@ public void testFileRegionEncryption() throws Exception {
byte[] data = new byte[8 * 1024];
new Random().nextBytes(data);
- Files.write(data, file);
+ Files.write(file.toPath(), data);
ctx = new SaslTestCtx(rpcHandler, true, false, testConf);
@@ -282,7 +281,7 @@ public void testFileRegionEncryption() throws Exception {
verify(callback, times(1)).onSuccess(anyInt(), any(ManagedBuffer.class));
verify(callback, never()).onFailure(anyInt(), any(Throwable.class));
- byte[] received = ByteStreams.toByteArray(response.get().createInputStream());
+ byte[] received = response.get().createInputStream().readAllBytes();
assertArrayEquals(data, received);
} finally {
file.delete();
@@ -299,7 +298,7 @@ public void testFileRegionEncryption() throws Exception {
public void testServerAlwaysEncrypt() {
Exception re = assertThrows(Exception.class,
() -> new SaslTestCtx(mock(RpcHandler.class), false, false,
- ImmutableMap.of("spark.network.sasl.serverAlwaysEncrypt", "true")));
+ Map.of("spark.network.sasl.serverAlwaysEncrypt", "true")));
assertTrue(re.getCause() instanceof SaslException);
}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java
index 47c9ef2490d2e..9673bbfbc2de7 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java
@@ -20,7 +20,6 @@
import java.util.Map;
import java.util.Properties;
-import com.google.common.collect.ImmutableMap;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
@@ -38,7 +37,7 @@ public void testConfConversion() {
String confVal2 = "val2";
String cryptoKey2 = CryptoUtils.COMMONS_CRYPTO_CONFIG_PREFIX + "A.b.c";
- Map conf = ImmutableMap.of(
+ Map conf = Map.of(
confKey1, confVal1,
confKey2, confVal2);
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java
index 81bfc55264c4c..f7299e157674e 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java
@@ -18,7 +18,6 @@
package org.apache.spark.network.util;
import com.fasterxml.jackson.databind.ObjectMapper;
-import org.apache.commons.lang3.SystemUtils;
import org.apache.spark.network.shuffledb.DBBackend;
import org.apache.spark.network.shuffledb.StoreVersion;
import org.junit.jupiter.api.Assertions;
@@ -38,7 +37,7 @@ public void testRockDBCheckVersionFailed() throws IOException, InterruptedExcept
@Test
public void testLevelDBCheckVersionFailed() throws IOException, InterruptedException {
- assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64"));
+ assumeFalse(JavaUtils.isMacOnAppleSilicon);
testCheckVersionFailed(DBBackend.LEVELDB, "leveldb");
}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
index 1336a587fd2eb..2edeb3f05c9b0 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
@@ -18,6 +18,7 @@
import java.io.File;
import java.io.IOException;
+import java.nio.file.Files;
import org.junit.jupiter.api.Test;
@@ -55,4 +56,33 @@ public void testCreateDirectory() throws IOException {
() -> JavaUtils.createDirectory(testDirPath, "scenario4"));
assertTrue(testDir.setWritable(true));
}
+
+ @Test
+ public void testListFiles() throws IOException {
+ File tmp = Files.createTempDirectory("testListFiles").toFile();
+ File file = new File(tmp, "file");
+
+ // Return emtpy set on non-existent input
+ assertFalse(file.exists());
+ assertEquals(0, JavaUtils.listFiles(file).size());
+ assertEquals(0, JavaUtils.listPaths(file).size());
+
+ // Return emtpy set on non-directory input
+ file.createNewFile();
+ assertTrue(file.exists());
+ assertEquals(0, JavaUtils.listFiles(file).size());
+ assertEquals(0, JavaUtils.listPaths(file).size());
+
+ // Return empty set on an empty directory location
+ File dir = new File(tmp, "dir");
+ dir.mkdir();
+ new File(dir, "1").createNewFile();
+ assertEquals(1, JavaUtils.listFiles(dir).size());
+ assertEquals(1, JavaUtils.listPaths(dir).size());
+
+ File symlink = new File(tmp, "symlink");
+ Files.createSymbolicLink(symlink.toPath(), dir.toPath());
+ assertEquals(1, JavaUtils.listFiles(symlink).size());
+ assertEquals(1, JavaUtils.listPaths(symlink).size());
+ }
}
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index adfc55d28c357..60ad971573997 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -42,6 +42,11 @@
${project.version}
+
+ org.apache.commons
+ commons-lang3
+
+
io.dropwizard.metricsmetrics-core
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
index d67f2a3099d35..625cb2e1257da 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
@@ -55,7 +55,7 @@ public void registerApp(String appId, String shuffleSecret) {
// to the applicationId since the secrets change between application attempts on yarn.
shuffleSecretMap.put(appId, shuffleSecret);
logger.info("Registered shuffle secret for application {}",
- MDC.of(LogKeys.APP_ID$.MODULE$, appId));
+ MDC.of(LogKeys.APP_ID, appId));
}
/**
@@ -72,7 +72,7 @@ public void registerApp(String appId, ByteBuffer shuffleSecret) {
public void unregisterApp(String appId) {
shuffleSecretMap.remove(appId);
logger.info("Unregistered shuffle secret for application {}",
- MDC.of(LogKeys.APP_ID$.MODULE$, appId));
+ MDC.of(LogKeys.APP_ID, appId));
}
/**
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java
index 6a029a1083a47..7a0b316a3a8ea 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java
@@ -18,11 +18,10 @@
package org.apache.spark.network.shuffle;
import java.util.Collections;
+import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
-import com.google.common.base.Preconditions;
-
/**
* Stores the applications which have recovery disabled.
*/
@@ -41,8 +40,7 @@ private AppsWithRecoveryDisabled() {
* @param appId application id
*/
public static void disableRecoveryOfApp(String appId) {
- Preconditions.checkNotNull(appId);
- INSTANCE.appsWithRecoveryDisabled.add(appId);
+ INSTANCE.appsWithRecoveryDisabled.add(Objects.requireNonNull(appId));
}
/**
@@ -51,8 +49,7 @@ public static void disableRecoveryOfApp(String appId) {
* @return true if the application is enabled for recovery; false otherwise.
*/
public static boolean isRecoveryEnabledForApp(String appId) {
- Preconditions.checkNotNull(appId);
- return !INSTANCE.appsWithRecoveryDisabled.contains(appId);
+ return !INSTANCE.appsWithRecoveryDisabled.contains(Objects.requireNonNull(appId));
}
/**
@@ -60,7 +57,6 @@ public static boolean isRecoveryEnabledForApp(String appId) {
* @param appId application id
*/
public static void removeApp(String appId) {
- Preconditions.checkNotNull(appId);
- INSTANCE.appsWithRecoveryDisabled.remove(appId);
+ INSTANCE.appsWithRecoveryDisabled.remove(Objects.requireNonNull(appId));
}
}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
index dcb0a52b0d66c..ceb5d64699744 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
@@ -173,7 +173,7 @@ public void onSuccess(ByteBuffer response) {
((LocalDirsForExecutors) msgObj).getLocalDirsByExec());
} catch (Throwable t) {
logger.warn("Error while trying to get the host local dirs for {}", t.getCause(),
- MDC.of(LogKeys.EXECUTOR_IDS$.MODULE$, Arrays.toString(getLocalDirsMessage.execIds)));
+ MDC.of(LogKeys.EXECUTOR_IDS, Arrays.toString(getLocalDirsMessage.execIds)));
hostLocalDirsCompletable.completeExceptionally(t);
}
}
@@ -181,7 +181,7 @@ public void onSuccess(ByteBuffer response) {
@Override
public void onFailure(Throwable t) {
logger.warn("Error while trying to get the host local dirs for {}", t.getCause(),
- MDC.of(LogKeys.EXECUTOR_IDS$.MODULE$, Arrays.toString(getLocalDirsMessage.execIds)));
+ MDC.of(LogKeys.EXECUTOR_IDS, Arrays.toString(getLocalDirsMessage.execIds)));
hostLocalDirsCompletable.completeExceptionally(t);
}
});
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
index 31ed10ad76f8f..298611cc8567f 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
@@ -20,10 +20,9 @@
import java.io.FileNotFoundException;
import java.net.ConnectException;
-import com.google.common.base.Throwables;
-
import org.apache.spark.annotation.Evolving;
import org.apache.spark.network.server.BlockPushNonFatalFailure;
+import org.apache.spark.network.util.JavaUtils;
/**
* Plugs into {@link RetryingBlockTransferor} to further control when an exception should be retried
@@ -105,12 +104,12 @@ class BlockFetchErrorHandler implements ErrorHandler {
@Override
public boolean shouldRetryError(Throwable t) {
- return !Throwables.getStackTraceAsString(t).contains(STALE_SHUFFLE_BLOCK_FETCH);
+ return !JavaUtils.stackTraceToString(t).contains(STALE_SHUFFLE_BLOCK_FETCH);
}
@Override
public boolean shouldLogError(Throwable t) {
- return !Throwables.getStackTraceAsString(t).contains(STALE_SHUFFLE_BLOCK_FETCH);
+ return !JavaUtils.stackTraceToString(t).contains(STALE_SHUFFLE_BLOCK_FETCH);
}
}
}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 5d33bfb345a9e..45d0ff69de900 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -21,8 +21,10 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
+import java.util.Objects;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
@@ -35,8 +37,6 @@
import com.codahale.metrics.Timer;
import com.codahale.metrics.Counter;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Sets;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
@@ -199,7 +199,7 @@ protected void handleMessage(
} else if (msgObj instanceof GetLocalDirsForExecutors msg) {
checkAuth(client, msg.appId);
- Set execIdsForBlockResolver = Sets.newHashSet(msg.execIds);
+ Set execIdsForBlockResolver = new HashSet<>(Set.of(msg.execIds));
boolean fetchMergedBlockDirs = execIdsForBlockResolver.remove(SHUFFLE_MERGER_IDENTIFIER);
Map localDirs = blockManager.getLocalDirs(msg.appId,
execIdsForBlockResolver);
@@ -224,9 +224,9 @@ protected void handleMessage(
} else if (msgObj instanceof RemoveShuffleMerge msg) {
checkAuth(client, msg.appId);
logger.info("Removing shuffle merge data for application {} shuffle {} shuffleMerge {}",
- MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
- MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
- MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId));
+ MDC.of(LogKeys.APP_ID, msg.appId),
+ MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId),
+ MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId));
mergeManager.removeShuffleMerge(msg);
} else if (msgObj instanceof DiagnoseCorruption msg) {
checkAuth(client, msg.appId);
@@ -585,7 +585,7 @@ public boolean hasNext() {
@Override
public ManagedBuffer next() {
- ManagedBuffer block = Preconditions.checkNotNull(mergeManager.getMergedBlockData(
+ ManagedBuffer block = Objects.requireNonNull(mergeManager.getMergedBlockData(
appId, shuffleId, shuffleMergeId, reduceIds[reduceIdx], chunkIds[reduceIdx][chunkIdx]));
if (chunkIdx < chunkIds[reduceIdx].length - 1) {
chunkIdx += 1;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
index 97723f77723d4..4fdd39c3471fc 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
@@ -27,7 +28,6 @@
import java.util.concurrent.Future;
import com.codahale.metrics.MetricSet;
-import com.google.common.collect.Lists;
import org.apache.spark.internal.LogKeys;
import org.apache.spark.internal.MDC;
@@ -82,7 +82,7 @@ public void init(String appId) {
this.appId = appId;
TransportContext context = new TransportContext(
transportConf, new NoOpRpcHandler(), true, true);
- List bootstraps = Lists.newArrayList();
+ List bootstraps = new ArrayList<>();
if (authEnabled) {
bootstraps.add(new AuthClientBootstrap(transportConf, appId, secretKeyHolder));
}
@@ -106,7 +106,7 @@ private void setComparableAppAttemptId(String appAttemptId) {
} catch (NumberFormatException e) {
logger.warn("Push based shuffle requires comparable application attemptId, " +
"but the appAttemptId {} cannot be parsed to Integer", e,
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId));
+ MDC.of(LogKeys.APP_ATTEMPT_ID, appAttemptId));
}
}
@@ -221,8 +221,8 @@ public void onFailure(Throwable e) {
});
} catch (Exception e) {
logger.error("Exception while sending finalizeShuffleMerge request to {}:{}", e,
- MDC.of(LogKeys.HOST$.MODULE$, host),
- MDC.of(LogKeys.PORT$.MODULE$, port));
+ MDC.of(LogKeys.HOST, host),
+ MDC.of(LogKeys.PORT, port));
listener.onShuffleMergeFailure(e);
}
}
@@ -322,8 +322,8 @@ public void onSuccess(ByteBuffer response) {
} catch (Throwable t) {
logger.warn("Error trying to remove blocks {} via external shuffle service from " +
"executor: {}", t,
- MDC.of(LogKeys.BLOCK_IDS$.MODULE$, Arrays.toString(blockIds)),
- MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, execId));
+ MDC.of(LogKeys.BLOCK_IDS, Arrays.toString(blockIds)),
+ MDC.of(LogKeys.EXECUTOR_ID, execId));
numRemovedBlocksFuture.complete(0);
}
}
@@ -331,8 +331,8 @@ public void onSuccess(ByteBuffer response) {
@Override
public void onFailure(Throwable e) {
logger.warn("Error trying to remove blocks {} via external shuffle service from " +
- "executor: {}", e, MDC.of(LogKeys.BLOCK_IDS$.MODULE$, Arrays.toString(blockIds)),
- MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, execId));
+ "executor: {}", e, MDC.of(LogKeys.BLOCK_IDS, Arrays.toString(blockIds)),
+ MDC.of(LogKeys.EXECUTOR_ID, execId));
numRemovedBlocksFuture.complete(0);
}
});
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index e43eedd8b25eb..b3002833fce1a 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -20,15 +20,13 @@
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
-import org.apache.commons.lang3.tuple.Pair;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
@@ -37,7 +35,6 @@
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.cache.Weigher;
-import com.google.common.collect.Maps;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
@@ -56,6 +53,7 @@
import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.NettyUtils;
import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.util.Pair;
/**
* Manages converting shuffle BlockIds into physical segments of local files, from a process outside
@@ -134,11 +132,11 @@ public ShuffleIndexInformation load(String filePath) throws IOException {
db = DBProvider.initDB(dbBackend, this.registeredExecutorFile, CURRENT_VERSION, mapper);
if (db != null) {
logger.info("Use {} as the implementation of {}",
- MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME$.MODULE$, dbBackend),
- MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY$.MODULE$, Constants.SHUFFLE_SERVICE_DB_BACKEND));
+ MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME, dbBackend),
+ MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY, Constants.SHUFFLE_SERVICE_DB_BACKEND));
executors = reloadRegisteredExecutors(db);
} else {
- executors = Maps.newConcurrentMap();
+ executors = new ConcurrentHashMap<>();
}
this.directoryCleaner = directoryCleaner;
}
@@ -154,8 +152,8 @@ public void registerExecutor(
ExecutorShuffleInfo executorInfo) {
AppExecId fullId = new AppExecId(appId, execId);
logger.info("Registered executor {} with {}",
- MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId),
- MDC.of(LogKeys.EXECUTOR_SHUFFLE_INFO$.MODULE$, executorInfo));
+ MDC.of(LogKeys.APP_EXECUTOR_ID, fullId),
+ MDC.of(LogKeys.EXECUTOR_SHUFFLE_INFO, executorInfo));
try {
if (db != null && AppsWithRecoveryDisabled.isRecoveryEnabledForApp(appId)) {
byte[] key = dbAppExecKey(fullId);
@@ -221,8 +219,8 @@ public ManagedBuffer getRddBlockData(
*/
public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
logger.info("Application {} removed, cleanupLocalDirs = {}",
- MDC.of(LogKeys.APP_ID$.MODULE$, appId),
- MDC.of(LogKeys.CLEANUP_LOCAL_DIRS$.MODULE$, cleanupLocalDirs));
+ MDC.of(LogKeys.APP_ID, appId),
+ MDC.of(LogKeys.CLEANUP_LOCAL_DIRS, cleanupLocalDirs));
Iterator> it = executors.entrySet().iterator();
while (it.hasNext()) {
Map.Entry entry = it.next();
@@ -237,14 +235,14 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
db.delete(dbAppExecKey(fullId));
} catch (IOException e) {
logger.error("Error deleting {} from executor state db", e,
- MDC.of(LogKeys.APP_ID$.MODULE$, appId));
+ MDC.of(LogKeys.APP_ID, appId));
}
}
if (cleanupLocalDirs) {
logger.info("Cleaning up executor {}'s {} local dirs",
- MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId),
- MDC.of(LogKeys.NUM_LOCAL_DIRS$.MODULE$, executor.localDirs.length));
+ MDC.of(LogKeys.APP_EXECUTOR_ID, fullId),
+ MDC.of(LogKeys.NUM_LOCAL_DIRS, executor.localDirs.length));
// Execute the actual deletion in a different thread, as it may take some time.
directoryCleaner.execute(() -> deleteExecutorDirs(executor.localDirs));
@@ -259,18 +257,18 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
*/
public void executorRemoved(String executorId, String appId) {
logger.info("Clean up non-shuffle and non-RDD files associated with the finished executor {}",
- MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, executorId));
+ MDC.of(LogKeys.EXECUTOR_ID, executorId));
AppExecId fullId = new AppExecId(appId, executorId);
final ExecutorShuffleInfo executor = executors.get(fullId);
if (executor == null) {
// Executor not registered, skip clean up of the local directories.
logger.info("Executor is not registered (appId={}, execId={})",
- MDC.of(LogKeys.APP_ID$.MODULE$, appId),
- MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, executorId));
+ MDC.of(LogKeys.APP_ID, appId),
+ MDC.of(LogKeys.EXECUTOR_ID, executorId));
} else {
logger.info("Cleaning up non-shuffle and non-RDD files in executor {}'s {} local dirs",
- MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId),
- MDC.of(LogKeys.NUM_LOCAL_DIRS$.MODULE$, executor.localDirs.length));
+ MDC.of(LogKeys.APP_EXECUTOR_ID, fullId),
+ MDC.of(LogKeys.NUM_LOCAL_DIRS, executor.localDirs.length));
// Execute the actual deletion in a different thread, as it may take some time.
directoryCleaner.execute(() -> deleteNonShuffleServiceServedFiles(executor.localDirs));
@@ -288,7 +286,7 @@ private void deleteExecutorDirs(String[] dirs) {
logger.debug("Successfully cleaned up directory: {}", localDir);
} catch (Exception e) {
logger.error("Failed to delete directory: {}", e,
- MDC.of(LogKeys.PATH$.MODULE$, localDir));
+ MDC.of(LogKeys.PATH, localDir));
}
}
}
@@ -311,7 +309,7 @@ private void deleteNonShuffleServiceServedFiles(String[] dirs) {
localDir);
} catch (Exception e) {
logger.error("Failed to delete files not served by shuffle service in directory: {}", e,
- MDC.of(LogKeys.PATH$.MODULE$, localDir));
+ MDC.of(LogKeys.PATH, localDir));
}
}
}
@@ -384,7 +382,7 @@ public int removeBlocks(String appId, String execId, String[] blockIds) {
numRemovedBlocks++;
} else {
logger.warn("Failed to delete block: {}",
- MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
+ MDC.of(LogKeys.PATH, file.getAbsolutePath()));
}
}
return numRemovedBlocks;
@@ -400,7 +398,7 @@ public Map getLocalDirs(String appId, Set execIds) {
}
return Pair.of(exec, info.localDirs);
})
- .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
+ .collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
}
/**
@@ -451,10 +449,7 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("execId", execId)
- .toString();
+ return "ExternalShuffleBlockResolver[appId=" + appId + ",execId=" + execId + "]";
}
}
@@ -477,7 +472,7 @@ private static AppExecId parseDbAppExecKey(String s) throws IOException {
@VisibleForTesting
static ConcurrentMap reloadRegisteredExecutors(DB db)
throws IOException {
- ConcurrentMap registeredExecutors = Maps.newConcurrentMap();
+ ConcurrentMap registeredExecutors = new ConcurrentHashMap<>();
if (db != null) {
try (DBIterator itr = db.iterator()) {
itr.seek(APP_KEY_PREFIX.getBytes(StandardCharsets.UTF_8));
@@ -489,7 +484,7 @@ static ConcurrentMap reloadRegisteredExecutors(D
}
AppExecId id = parseDbAppExecKey(key);
logger.info("Reloading registered executors: {}",
- MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, id));
+ MDC.of(LogKeys.APP_EXECUTOR_ID, id));
ExecutorShuffleInfo shuffleInfo =
mapper.readValue(e.getValue(), ExecutorShuffleInfo.class);
registeredExecutors.put(id, shuffleInfo);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
index 5541b7460ac96..ca8d9bbe65500 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java
@@ -20,8 +20,8 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.Objects;
-import com.google.common.base.Preconditions;
import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import org.roaringbitmap.RoaringBitmap;
@@ -43,7 +43,7 @@ public class MergedBlockMeta {
public MergedBlockMeta(int numChunks, ManagedBuffer chunksBitmapBuffer) {
this.numChunks = numChunks;
- this.chunksBitmapBuffer = Preconditions.checkNotNull(chunksBitmapBuffer);
+ this.chunksBitmapBuffer = Objects.requireNonNull(chunksBitmapBuffer);
}
public int getNumChunks() {
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
index d90ca1a88a267..05158a6600d0d 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
@@ -21,8 +21,6 @@
import java.util.Arrays;
import java.util.Map;
-import com.google.common.base.Preconditions;
-
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
import org.apache.spark.network.buffer.ManagedBuffer;
@@ -34,6 +32,7 @@
import org.apache.spark.network.shuffle.protocol.BlockPushReturnCode;
import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+import org.apache.spark.network.util.JavaUtils;
/**
* Similar to {@link OneForOneBlockFetcher}, but for pushing blocks to remote shuffle service to
@@ -90,7 +89,7 @@ public void onSuccess(ByteBuffer response) {
ReturnCode returnCode = BlockPushNonFatalFailure.getReturnCode(pushResponse.returnCode);
if (returnCode != ReturnCode.SUCCESS) {
String blockId = pushResponse.failureBlockId;
- Preconditions.checkArgument(!blockId.isEmpty());
+ JavaUtils.checkArgument(!blockId.isEmpty(), "BlockID should not be empty");
checkAndFailRemainingBlocks(index, new BlockPushNonFatalFailure(returnCode,
BlockPushNonFatalFailure.getErrorMsg(blockId, returnCode)));
} else {
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index 6e9bd548f5327..a48208bad5b8c 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -55,7 +55,6 @@
import com.codahale.metrics.Metric;
import com.codahale.metrics.MetricSet;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
@@ -187,8 +186,8 @@ public ShuffleIndexInformation load(String filePath) throws IOException {
db = DBProvider.initDB(dbBackend, this.recoveryFile, CURRENT_VERSION, mapper);
if (db != null) {
logger.info("Use {} as the implementation of {}",
- MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME$.MODULE$, dbBackend),
- MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY$.MODULE$, Constants.SHUFFLE_SERVICE_DB_BACKEND));
+ MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME, dbBackend),
+ MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY, Constants.SHUFFLE_SERVICE_DB_BACKEND));
reloadAndCleanUpAppShuffleInfo(db);
}
this.pushMergeMetrics = new PushMergeMetrics();
@@ -211,7 +210,7 @@ public boolean shouldLogError(Throwable t) {
protected AppShuffleInfo validateAndGetAppShuffleInfo(String appId) {
// TODO: [SPARK-33236] Change the message when this service is able to handle NM restart
AppShuffleInfo appShuffleInfo = appsShuffleInfo.get(appId);
- Preconditions.checkArgument(appShuffleInfo != null,
+ JavaUtils.checkArgument(appShuffleInfo != null,
"application " + appId + " is not registered or NM was restarted.");
return appShuffleInfo;
}
@@ -234,10 +233,10 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
if (mergePartitionsInfo == null) {
logger.info("{} attempt {} shuffle {} shuffleMerge {}: creating a new shuffle " +
"merge metadata",
- MDC.of(LogKeys.APP_ID$.MODULE$, appShuffleInfo.appId),
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId),
- MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, shuffleId),
- MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId));
+ MDC.of(LogKeys.APP_ID, appShuffleInfo.appId),
+ MDC.of(LogKeys.APP_ATTEMPT_ID, appShuffleInfo.attemptId),
+ MDC.of(LogKeys.SHUFFLE_ID, shuffleId),
+ MDC.of(LogKeys.SHUFFLE_MERGE_ID, shuffleMergeId));
return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
} else {
int latestShuffleMergeId = mergePartitionsInfo.shuffleMergeId;
@@ -256,10 +255,10 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
shuffleId, latestShuffleMergeId);
logger.info("{}: creating a new shuffle merge metadata since received " +
"shuffleMergeId {} is higher than latest shuffleMergeId {}",
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$,
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID,
currentAppAttemptShuffleMergeId),
- MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId),
- MDC.of(LogKeys.LATEST_SHUFFLE_MERGE_ID$.MODULE$, latestShuffleMergeId));
+ MDC.of(LogKeys.SHUFFLE_MERGE_ID, shuffleMergeId),
+ MDC.of(LogKeys.LATEST_SHUFFLE_MERGE_ID, latestShuffleMergeId));
submitCleanupTask(() ->
closeAndDeleteOutdatedPartitions(currentAppAttemptShuffleMergeId,
mergePartitionsInfo.shuffleMergePartitions));
@@ -293,13 +292,13 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
} catch (IOException e) {
logger.error("{} attempt {} shuffle {} shuffleMerge {}: cannot create merged shuffle " +
"partition with data file {}, index file {}, and meta file {}",
- MDC.of(LogKeys.APP_ID$.MODULE$, appShuffleInfo.appId),
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId),
- MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, shuffleId),
- MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId),
- MDC.of(LogKeys.DATA_FILE$.MODULE$, dataFile.getAbsolutePath()),
- MDC.of(LogKeys.INDEX_FILE$.MODULE$, indexFile.getAbsolutePath()),
- MDC.of(LogKeys.META_FILE$.MODULE$, metaFile.getAbsolutePath()));
+ MDC.of(LogKeys.APP_ID, appShuffleInfo.appId),
+ MDC.of(LogKeys.APP_ATTEMPT_ID, appShuffleInfo.attemptId),
+ MDC.of(LogKeys.SHUFFLE_ID, shuffleId),
+ MDC.of(LogKeys.SHUFFLE_MERGE_ID, shuffleMergeId),
+ MDC.of(LogKeys.DATA_FILE, dataFile.getAbsolutePath()),
+ MDC.of(LogKeys.INDEX_FILE, indexFile.getAbsolutePath()),
+ MDC.of(LogKeys.META_FILE, metaFile.getAbsolutePath()));
throw new RuntimeException(
String.format("Cannot initialize merged shuffle partition for appId %s shuffleId %s "
+ "shuffleMergeId %s reduceId %s", appShuffleInfo.appId, shuffleId, shuffleMergeId,
@@ -411,8 +410,8 @@ private void removeOldApplicationAttemptsFromDb(AppShuffleInfo info) {
@Override
public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
logger.info("Application {} removed, cleanupLocalDirs = {}",
- MDC.of(LogKeys.APP_ID$.MODULE$, appId),
- MDC.of(LogKeys.CLEANUP_LOCAL_DIRS$.MODULE$, cleanupLocalDirs));
+ MDC.of(LogKeys.APP_ID, appId),
+ MDC.of(LogKeys.CLEANUP_LOCAL_DIRS, cleanupLocalDirs));
// Cleanup the DB within critical section to gain the consistency between
// DB and in-memory hashmap.
AtomicReference ref = new AtomicReference<>(null);
@@ -523,7 +522,7 @@ void removeAppAttemptPathInfoFromDB(String appId, int attemptId) {
db.delete(key);
} catch (Exception e) {
logger.error("Failed to remove the application attempt {} local path in DB", e,
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId));
+ MDC.of(LogKeys.APP_ATTEMPT_ID, appAttemptId));
}
}
}
@@ -593,10 +592,10 @@ void deleteMergedFiles(
}
}
logger.info("Delete {} data files, {} index files, {} meta files for {}",
- MDC.of(LogKeys.NUM_DATA_FILES$.MODULE$, dataFilesDeleteCnt),
- MDC.of(LogKeys.NUM_INDEX_FILES$.MODULE$, indexFilesDeleteCnt),
- MDC.of(LogKeys.NUM_META_FILES$.MODULE$, metaFilesDeleteCnt),
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
+ MDC.of(LogKeys.NUM_DATA_FILES, dataFilesDeleteCnt),
+ MDC.of(LogKeys.NUM_INDEX_FILES, indexFilesDeleteCnt),
+ MDC.of(LogKeys.NUM_META_FILES, metaFilesDeleteCnt),
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId));
}
/**
@@ -609,7 +608,7 @@ void removeAppShufflePartitionInfoFromDB(AppAttemptShuffleMergeId appAttemptShuf
db.delete(getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId));
} catch (Exception e) {
logger.error("Error deleting {} from application shuffle merged partition info in DB", e,
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId));
}
}
}
@@ -629,7 +628,7 @@ void deleteExecutorDirs(AppShuffleInfo appShuffleInfo) {
}
} catch (Exception e) {
logger.error("Failed to delete directory: {}", e,
- MDC.of(LogKeys.PATH$.MODULE$, localDir));
+ MDC.of(LogKeys.PATH, localDir));
}
}
}
@@ -759,10 +758,10 @@ public ByteBuffer getCompletionResponse() {
@Override
public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalize shuffle merge",
- MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId),
- MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
- MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId));
+ MDC.of(LogKeys.APP_ID, msg.appId),
+ MDC.of(LogKeys.APP_ATTEMPT_ID, msg.appAttemptId),
+ MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId),
+ MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId));
AppShuffleInfo appShuffleInfo = validateAndGetAppShuffleInfo(msg.appId);
if (appShuffleInfo.attemptId != msg.appAttemptId) {
// If finalizeShuffleMerge from a former application attempt, it is considered late,
@@ -846,12 +845,12 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
} catch (IOException ioe) {
logger.warn("{} attempt {} shuffle {} shuffleMerge {}: exception while " +
"finalizing shuffle partition {}. Exception message: {}",
- MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId),
- MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
- MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId),
- MDC.of(LogKeys.REDUCE_ID$.MODULE$, partition.reduceId),
- MDC.of(LogKeys.EXCEPTION$.MODULE$, ioe.getMessage()));
+ MDC.of(LogKeys.APP_ID, msg.appId),
+ MDC.of(LogKeys.APP_ATTEMPT_ID, msg.appAttemptId),
+ MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId),
+ MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId),
+ MDC.of(LogKeys.REDUCE_ID, partition.reduceId),
+ MDC.of(LogKeys.EXCEPTION, ioe.getMessage()));
} finally {
partition.cleanable.clean();
}
@@ -863,10 +862,10 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
appShuffleInfo.shuffles.get(msg.shuffleId).setReduceIds(Ints.toArray(reduceIds));
}
logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalization of shuffle merge completed",
- MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId),
- MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
- MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId));
+ MDC.of(LogKeys.APP_ID, msg.appId),
+ MDC.of(LogKeys.APP_ATTEMPT_ID, msg.appAttemptId),
+ MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId),
+ MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId));
return mergeStatuses;
}
@@ -934,8 +933,8 @@ public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
if (originalAppShuffleInfo.get() != null) {
AppShuffleInfo appShuffleInfo = originalAppShuffleInfo.get();
logger.warn("Cleanup shuffle info and merged shuffle files for {}_{} as new " +
- "application attempt registered", MDC.of(LogKeys.APP_ID$.MODULE$, appId),
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId));
+ "application attempt registered", MDC.of(LogKeys.APP_ID, appId),
+ MDC.of(LogKeys.APP_ATTEMPT_ID, appShuffleInfo.attemptId));
// Clean up all the merge shuffle related information in the DB for the former attempt
submitCleanupTask(
() -> closeAndDeletePartitionsIfNeeded(appShuffleInfo, true)
@@ -992,12 +991,12 @@ private void shutdownMergedShuffleCleanerNow() {
List unfinishedTasks = mergedShuffleCleaner.shutdownNow();
logger.warn("There are still {} tasks not completed in mergedShuffleCleaner " +
"after {} ms.",
- MDC.of(LogKeys.COUNT$.MODULE$, unfinishedTasks.size()),
- MDC.of(LogKeys.TIMEOUT$.MODULE$, cleanerShutdownTimeout * 1000L));
+ MDC.of(LogKeys.COUNT, unfinishedTasks.size()),
+ MDC.of(LogKeys.TIMEOUT, cleanerShutdownTimeout * 1000L));
// Wait a while for tasks to respond to being cancelled
if (!mergedShuffleCleaner.awaitTermination(cleanerShutdownTimeout, TimeUnit.SECONDS)) {
logger.warn("mergedShuffleCleaner did not terminate in {} ms.",
- MDC.of(LogKeys.TIMEOUT$.MODULE$, cleanerShutdownTimeout * 1000L));
+ MDC.of(LogKeys.TIMEOUT, cleanerShutdownTimeout * 1000L));
}
} catch (InterruptedException ignored) {
Thread.currentThread().interrupt();
@@ -1017,7 +1016,7 @@ private void writeAppPathsInfoToDb(String appId, int attemptId, AppPathsInfo app
db.put(key, value);
} catch (Exception e) {
logger.error("Error saving registered app paths info for {}", e,
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId));
+ MDC.of(LogKeys.APP_ATTEMPT_ID, appAttemptId));
}
}
}
@@ -1035,7 +1034,7 @@ private void writeAppAttemptShuffleMergeInfoToDB(
db.put(dbKey, new byte[0]);
} catch (Exception e) {
logger.error("Error saving active app shuffle partition {}", e,
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId));
}
}
}
@@ -1137,7 +1136,7 @@ List reloadActiveAppAttemptsPathInfo(DB db) throws IOException {
dbKeysToBeRemoved.add(getDbAppAttemptPathsKey(existingAppAttemptId));
} catch (IOException e) {
logger.error("Failed to get the DB key for {}", e,
- MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, existingAppAttemptId));
+ MDC.of(LogKeys.APP_ATTEMPT_ID, existingAppAttemptId));
}
}
return new AppShuffleInfo(
@@ -1187,7 +1186,7 @@ List reloadFinalizedAppAttemptsShuffleMergeInfo(DB db) throws IOExceptio
getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId));
} catch (Exception e) {
logger.error("Error getting the DB key for {}", e, MDC.of(
- LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
+ LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId));
}
}
return new AppShuffleMergePartitionsInfo(partitionId.shuffleMergeId, true);
@@ -1216,7 +1215,7 @@ void removeOutdatedKeyValuesInDB(List dbKeysToBeRemoved) {
db.delete(key);
} catch (Exception e) {
logger.error("Error deleting dangling key {} in DB", e,
- MDC.of(LogKeys.KEY$.MODULE$, key));
+ MDC.of(LogKeys.KEY, key));
}
}
);
@@ -1267,12 +1266,12 @@ private PushBlockStreamCallback(
String streamId,
AppShufflePartitionInfo partitionInfo,
int mapIndex) {
- Preconditions.checkArgument(mergeManager != null);
+ JavaUtils.checkArgument(mergeManager != null, "mergeManager is null");
this.mergeManager = mergeManager;
- Preconditions.checkArgument(appShuffleInfo != null);
+ JavaUtils.checkArgument(appShuffleInfo != null, "appShuffleInfo is null");
this.appShuffleInfo = appShuffleInfo;
this.streamId = streamId;
- Preconditions.checkArgument(partitionInfo != null);
+ JavaUtils.checkArgument(partitionInfo != null, "partitionInfo is null");
this.partitionInfo = partitionInfo;
this.mapIndex = mapIndex;
abortIfNecessary();
@@ -1599,7 +1598,7 @@ public void onComplete(String streamId) throws IOException {
public void onFailure(String streamId, Throwable throwable) throws IOException {
if (ERROR_HANDLER.shouldLogError(throwable)) {
logger.error("Encountered issue when merging {}", throwable,
- MDC.of(LogKeys.STREAM_ID$.MODULE$, streamId));
+ MDC.of(LogKeys.STREAM_ID, streamId));
} else {
logger.debug("Encountered issue when merging {}", streamId, throwable);
}
@@ -1719,7 +1718,7 @@ public AppAttemptShuffleMergeId(
@JsonProperty("attemptId") int attemptId,
@JsonProperty("shuffleId") int shuffleId,
@JsonProperty("shuffleMergeId") int shuffleMergeId) {
- Preconditions.checkArgument(appId != null, "app id is null");
+ JavaUtils.checkArgument(appId != null, "app id is null");
this.appId = appId;
this.attemptId = attemptId;
this.shuffleId = shuffleId;
@@ -1860,8 +1859,8 @@ void updateChunkInfo(long chunkOffset, int mapIndex) throws IOException {
indexMetaUpdateFailed = false;
} catch (IOException ioe) {
logger.warn("{} reduceId {} update to index/meta failed",
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
- MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId),
+ MDC.of(LogKeys.REDUCE_ID, reduceId));
indexMetaUpdateFailed = true;
// Any exception here is propagated to the caller and the caller can decide whether to
// abort or not.
@@ -1913,8 +1912,8 @@ private void finalizePartition() throws IOException {
private void deleteAllFiles() {
if (!dataFile.delete()) {
logger.info("Error deleting data file for {} reduceId {}",
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
- MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId),
+ MDC.of(LogKeys.REDUCE_ID, reduceId));
}
metaFile.delete();
indexFile.delete();
@@ -1983,22 +1982,22 @@ private void closeAllFiles(
}
} catch (IOException ioe) {
logger.warn("Error closing data channel for {} reduceId {}",
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
- MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId),
+ MDC.of(LogKeys.REDUCE_ID, reduceId));
}
try {
metaFile.close();
} catch (IOException ioe) {
logger.warn("Error closing meta file for {} reduceId {}",
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
- MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId),
+ MDC.of(LogKeys.REDUCE_ID, reduceId));
}
try {
indexFile.close();
} catch (IOException ioe) {
logger.warn("Error closing index file for {} reduceId {}",
- MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
- MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
+ MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId),
+ MDC.of(LogKeys.REDUCE_ID, reduceId));
}
}
}
@@ -2043,9 +2042,9 @@ private AppPathsInfo(
this.subDirsPerLocalDir = subDirsPerLocalDir;
if (logger.isInfoEnabled()) {
logger.info("Updated active local dirs {} and sub dirs {} for application {}",
- MDC.of(LogKeys.PATHS$.MODULE$, Arrays.toString(activeLocalDirs)),
- MDC.of(LogKeys.NUM_SUB_DIRS$.MODULE$, subDirsPerLocalDir),
- MDC.of(LogKeys.APP_ID$.MODULE$, appId));
+ MDC.of(LogKeys.PATHS, Arrays.toString(activeLocalDirs)),
+ MDC.of(LogKeys.NUM_SUB_DIRS, subDirsPerLocalDir),
+ MDC.of(LogKeys.APP_ID, appId));
}
}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
index 31c454f63a92e..1dae2d54120cb 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
@@ -25,8 +25,6 @@
import java.util.concurrent.TimeUnit;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.Sets;
import com.google.common.util.concurrent.Uninterruptibles;
import org.apache.spark.internal.SparkLogger;
@@ -35,6 +33,7 @@
import org.apache.spark.internal.MDC;
import org.apache.spark.network.buffer.ManagedBuffer;
import org.apache.spark.network.sasl.SaslTimeoutException;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.NettyUtils;
import org.apache.spark.network.util.TransportConf;
@@ -131,7 +130,7 @@ public RetryingBlockTransferor(
this.listener = listener;
this.maxRetries = conf.maxIORetries();
this.retryWaitTime = conf.ioRetryWaitTimeMs();
- this.outstandingBlocksIds = Sets.newLinkedHashSet();
+ this.outstandingBlocksIds = new LinkedHashSet<>();
Collections.addAll(outstandingBlocksIds, blockIds);
this.currentListener = new RetryingBlockTransferListener();
this.errorHandler = errorHandler;
@@ -182,13 +181,13 @@ private void transferAllOutstanding() {
} catch (Exception e) {
if (numRetries > 0) {
logger.error("Exception while beginning {} of {} outstanding blocks (after {} retries)", e,
- MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
- MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, blockIdsToTransfer.length),
- MDC.of(LogKeys.NUM_RETRY$.MODULE$, numRetries));
+ MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()),
+ MDC.of(LogKeys.NUM_BLOCKS, blockIdsToTransfer.length),
+ MDC.of(LogKeys.NUM_RETRY, numRetries));
} else {
logger.error("Exception while beginning {} of {} outstanding blocks", e,
- MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
- MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, blockIdsToTransfer.length));
+ MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()),
+ MDC.of(LogKeys.NUM_BLOCKS, blockIdsToTransfer.length));
}
if (shouldRetry(e) && initiateRetry(e)) {
// successfully initiated a retry
@@ -216,11 +215,11 @@ synchronized boolean initiateRetry(Throwable e) {
currentListener = new RetryingBlockTransferListener();
logger.info("Retrying {} ({}/{}) for {} outstanding blocks after {} ms",
- MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
- MDC.of(LogKeys.NUM_RETRY$.MODULE$, retryCount),
- MDC.of(LogKeys.MAX_ATTEMPTS$.MODULE$, maxRetries),
- MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, outstandingBlocksIds.size()),
- MDC.of(LogKeys.RETRY_WAIT_TIME$.MODULE$, retryWaitTime));
+ MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()),
+ MDC.of(LogKeys.NUM_RETRY, retryCount),
+ MDC.of(LogKeys.MAX_ATTEMPTS, maxRetries),
+ MDC.of(LogKeys.NUM_BLOCKS, outstandingBlocksIds.size()),
+ MDC.of(LogKeys.RETRY_WAIT_TIME, retryWaitTime));
try {
executorService.execute(() -> {
@@ -247,7 +246,7 @@ private synchronized boolean shouldRetry(Throwable e) {
// If this is a non SASL request failure, reduce earlier SASL failures from retryCount
// since some subsequent SASL attempt was successful
if (!isSaslTimeout && saslRetryCount > 0) {
- Preconditions.checkState(retryCount >= saslRetryCount,
+ JavaUtils.checkState(retryCount >= saslRetryCount,
"retryCount must be greater than or equal to saslRetryCount");
retryCount -= saslRetryCount;
saslRetryCount = 0;
@@ -282,7 +281,7 @@ private void handleBlockTransferSuccess(String blockId, ManagedBuffer data) {
// If there were SASL failures earlier, remove them from retryCount, as there was
// a SASL success (and some other request post bootstrap was also successful).
if (saslRetryCount > 0) {
- Preconditions.checkState(retryCount >= saslRetryCount,
+ JavaUtils.checkState(retryCount >= saslRetryCount,
"retryCount must be greater than or equal to saslRetryCount");
retryCount -= saslRetryCount;
saslRetryCount = 0;
@@ -311,9 +310,9 @@ private void handleBlockTransferFailure(String blockId, Throwable exception) {
} else {
if (errorHandler.shouldLogError(exception)) {
logger.error("Failed to {} block {}, and will not retry ({} retries)", exception,
- MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
- MDC.of(LogKeys.BLOCK_ID$.MODULE$, blockId),
- MDC.of(LogKeys.NUM_RETRY$.MODULE$,retryCount));
+ MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()),
+ MDC.of(LogKeys.BLOCK_ID, blockId),
+ MDC.of(LogKeys.NUM_RETRY,retryCount));
} else {
logger.debug(
String.format("Failed to %s block %s, and will not retry (%s retries)",
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
index 62fcda701d948..2dbf38be954db 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
@@ -21,8 +21,6 @@
import java.util.concurrent.TimeUnit;
import java.util.zip.*;
-import com.google.common.io.ByteStreams;
-
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
import org.apache.spark.internal.LogKeys;
@@ -88,7 +86,7 @@ public static String getChecksumFileName(String blockName, String algorithm) {
private static long readChecksumByReduceId(File checksumFile, int reduceId) throws IOException {
try (DataInputStream in = new DataInputStream(new FileInputStream(checksumFile))) {
- ByteStreams.skipFully(in, reduceId * 8L);
+ in.skipNBytes(reduceId * 8L);
return in.readLong();
}
}
@@ -156,7 +154,7 @@ public static Cause diagnoseCorruption(
} catch (FileNotFoundException e) {
// Even if checksum is enabled, a checksum file may not exist if error throws during writing.
logger.warn("Checksum file {} doesn't exit",
- MDC.of(LogKeys.PATH$.MODULE$, checksumFile.getName()));
+ MDC.of(LogKeys.PATH, checksumFile.getName()));
cause = Cause.UNKNOWN_ISSUE;
} catch (Exception e) {
logger.warn("Unable to diagnose shuffle block corruption", e);
@@ -169,9 +167,9 @@ public static Cause diagnoseCorruption(
checksumByReader, checksumByWriter, checksumByReCalculation);
} else {
logger.info("Shuffle corruption diagnosis took {} ms, checksum file {}, cause {}",
- MDC.of(LogKeys.TIME$.MODULE$, duration),
- MDC.of(LogKeys.PATH$.MODULE$, checksumFile.getAbsolutePath()),
- MDC.of(LogKeys.REASON$.MODULE$, cause));
+ MDC.of(LogKeys.TIME, duration),
+ MDC.of(LogKeys.PATH, checksumFile.getAbsolutePath()),
+ MDC.of(LogKeys.REASON, cause));
}
return cause;
}
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java
index 0fca27cf26dfa..2bc57cc52f2cd 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java
@@ -17,7 +17,8 @@
package org.apache.spark.network.shuffle.protocol;
-import com.google.common.base.Objects;
+import java.util.Objects;
+
import io.netty.buffer.ByteBuf;
import org.apache.commons.lang3.builder.ToStringBuilder;
@@ -43,12 +44,14 @@ protected AbstractFetchShuffleBlocks(
this.shuffleId = shuffleId;
}
+ // checkstyle.off: RegexpSinglelineJava
public ToStringBuilder toStringHelper() {
return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
.append("appId", appId)
.append("execId", execId)
.append("shuffleId", shuffleId);
}
+ // checkstyle.on: RegexpSinglelineJava
/**
* Returns number of blocks in the request.
@@ -61,7 +64,7 @@ public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) return false;
AbstractFetchShuffleBlocks that = (AbstractFetchShuffleBlocks) o;
return shuffleId == that.shuffleId
- && Objects.equal(appId, that.appId) && Objects.equal(execId, that.execId);
+ && Objects.equals(appId, that.appId) && Objects.equals(execId, that.execId);
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java
index 05347c671e002..f4149b6875b26 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java
@@ -19,10 +19,7 @@
import java.util.Objects;
-import com.google.common.base.Preconditions;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
import org.apache.spark.network.server.BlockPushNonFatalFailure;
@@ -43,7 +40,7 @@ public class BlockPushReturnCode extends BlockTransferMessage {
public final String failureBlockId;
public BlockPushReturnCode(byte returnCode, String failureBlockId) {
- Preconditions.checkNotNull(BlockPushNonFatalFailure.getReturnCode(returnCode));
+ Objects.requireNonNull(BlockPushNonFatalFailure.getReturnCode(returnCode));
this.returnCode = returnCode;
this.failureBlockId = failureBlockId;
}
@@ -60,10 +57,8 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("returnCode", returnCode)
- .append("failureBlockId", failureBlockId)
- .toString();
+ return "BlockPushReturnCode[returnCode=" + returnCode +
+ ",failureBlockId=" + failureBlockId + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
index 2a050ce40b84b..9942d68297595 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
// Needed by ScalaDoc. See SPARK-7726
import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
@@ -44,9 +42,7 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("numRemovedBlocks", numRemovedBlocks)
- .toString();
+ return "BlocksRemoved[numRemovedBlocks=" + numRemovedBlocks + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java
index 5690eee53bd13..d9b9d4d8f36c2 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java
@@ -18,8 +18,6 @@
package org.apache.spark.network.shuffle.protocol;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.shuffle.checksum.Cause;
@@ -38,9 +36,7 @@ protected Type type() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("cause", cause)
- .toString();
+ return "CorruptionCause[cause=" + cause + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java
index 620b5ad71cd75..e509f45a9f0e3 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java
@@ -18,8 +18,6 @@
package org.apache.spark.network.shuffle.protocol;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
/** Request to get the cause of a corrupted block. Returns {@link CorruptionCause} */
@@ -56,15 +54,9 @@ protected Type type() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("execId", execId)
- .append("shuffleId", shuffleId)
- .append("mapId", mapId)
- .append("reduceId", reduceId)
- .append("checksum", checksum)
- .append("algorithm", algorithm)
- .toString();
+ return "DiagnoseCorruption[appId=" + appId + ",execId=" + execId + ",shuffleId=" + shuffleId +
+ ",mapId=" + mapId + ",reduceId=" + reduceId + ",checksum=" + checksum +
+ ",algorithm=" + algorithm + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
index 8a3ccdef2920b..c53ab911c30da 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java
@@ -23,8 +23,6 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encodable;
import org.apache.spark.network.protocol.Encoders;
@@ -60,11 +58,8 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("localDirs", Arrays.toString(localDirs))
- .append("subDirsPerLocalDir", subDirsPerLocalDir)
- .append("shuffleManager", shuffleManager)
- .toString();
+ return "ExecutorShuffleInfo[localDirs=" + Arrays.toString(localDirs) +
+ ",subDirsPerLocalDir=" + subDirsPerLocalDir + ",shuffleManager=" + shuffleManager + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java
index cf4cbcf1ed08e..a6e1ce374b07f 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java
@@ -60,11 +60,10 @@ public FetchShuffleBlockChunks(
@Override
public String toString() {
- return toStringHelper()
- .append("shuffleMergeId", shuffleMergeId)
- .append("reduceIds", Arrays.toString(reduceIds))
- .append("chunkIds", Arrays.deepToString(chunkIds))
- .toString();
+ return "FetchShuffleBlockChunks[appId=" + appId + ",execId=" + execId +
+ ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId +
+ ",reduceIds=" + Arrays.toString(reduceIds) +
+ ",chunkIds=" + Arrays.deepToString(chunkIds) + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
index 68550a2fba86e..686207767ca1e 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java
@@ -62,11 +62,9 @@ public FetchShuffleBlocks(
@Override
public String toString() {
- return toStringHelper()
- .append("mapIds", Arrays.toString(mapIds))
- .append("reduceIds", Arrays.deepToString(reduceIds))
- .append("batchFetchEnabled", batchFetchEnabled)
- .toString();
+ return "FetchShuffleBlocks[appId=" + appId + ",execId=" + execId + ",shuffleId=" + shuffleId +
+ ",mapIds=" + Arrays.toString(mapIds) + ",reduceIds=" + Arrays.deepToString(reduceIds) +
+ ",batchFetchEnabled=" + batchFetchEnabled + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
index cd5e005348f42..61152f48a85ba 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java
@@ -17,10 +17,9 @@
package org.apache.spark.network.shuffle.protocol;
-import com.google.common.base.Objects;
+import java.util.Objects;
+
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -54,23 +53,19 @@ protected BlockTransferMessage.Type type() {
@Override
public int hashCode() {
- return Objects.hashCode(appId, appAttemptId, shuffleId, shuffleMergeId);
+ return Objects.hash(appId, appAttemptId, shuffleId, shuffleMergeId);
}
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("attemptId", appAttemptId)
- .append("shuffleId", shuffleId)
- .append("shuffleMergeId", shuffleMergeId)
- .toString();
+ return "FinalizeShuffleMerge[appId=" + appId + ",attemptId=" + appAttemptId +
+ ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + "]";
}
@Override
public boolean equals(Object other) {
if (other instanceof FinalizeShuffleMerge o) {
- return Objects.equal(appId, o.appId)
+ return Objects.equals(appId, o.appId)
&& appAttemptId == o.appAttemptId
&& shuffleId == o.shuffleId
&& shuffleMergeId == o.shuffleMergeId;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java
index f118f0604d9e9..8bd106c94c283 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java
@@ -21,8 +21,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -49,10 +47,7 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("execIds", Arrays.toString(execIds))
- .toString();
+ return "GetLocalDirsForExecutors[appId=" + appId + ",execIds=" + Arrays.toString(execIds) + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java
index b65f351d3cf3e..060b565d420fd 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java
@@ -20,8 +20,6 @@
import java.util.*;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -64,11 +62,9 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("execIds", Arrays.toString(execIds))
- .append("numLocalDirsByExec", Arrays.toString(numLocalDirsByExec))
- .append("allLocalDirs", Arrays.toString(allLocalDirs))
- .toString();
+ return "LocalDirsForExecutors[execIds=" + Arrays.toString(execIds) +
+ ",numLocalDirsByExec=" + Arrays.toString(numLocalDirsByExec) +
+ ",allLocalDirs=" + Arrays.toString(allLocalDirs) + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
index 892c3a5e77958..d21449016972f 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java
@@ -19,10 +19,9 @@
import java.util.Arrays;
-import com.google.common.base.Objects;
+import java.util.Objects;
+
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.roaringbitmap.RoaringBitmap;
import org.apache.spark.network.protocol.Encoders;
@@ -86,18 +85,15 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("shuffleId", shuffleId)
- .append("shuffleMergeId", shuffleMergeId)
- .append("reduceId size", reduceIds.length)
- .toString();
+ return "MergeStatuses[shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId +
+ ",reduceId size=" + reduceIds.length + "]";
}
@Override
public boolean equals(Object other) {
if (other instanceof MergeStatuses o) {
- return Objects.equal(shuffleId, o.shuffleId)
- && Objects.equal(shuffleMergeId, o.shuffleMergeId)
+ return Objects.equals(shuffleId, o.shuffleId)
+ && Objects.equals(shuffleMergeId, o.shuffleMergeId)
&& Arrays.equals(bitmaps, o.bitmaps)
&& Arrays.equals(reduceIds, o.reduceIds)
&& Arrays.equals(sizes, o.sizes);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
index 49288eef5c5de..87b40eb1fc6ac 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java
@@ -21,8 +21,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -51,11 +49,8 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("execId", execId)
- .append("blockIds", Arrays.toString(blockIds))
- .toString();
+ return "OpenBlocks[appId=" + appId + ",execId=" + execId + ",blockIds=" +
+ Arrays.toString(blockIds) + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
index ceab54a1c0615..20e6e79c31980 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java
@@ -17,11 +17,9 @@
package org.apache.spark.network.shuffle.protocol;
-import com.google.common.base.Objects;
-import io.netty.buffer.ByteBuf;
+import java.util.Objects;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
+import io.netty.buffer.ByteBuf;
import org.apache.spark.network.protocol.Encoders;
@@ -68,27 +66,21 @@ protected Type type() {
@Override
public int hashCode() {
- return Objects.hashCode(appId, appAttemptId, shuffleId, shuffleMergeId, mapIndex , reduceId,
+ return Objects.hash(appId, appAttemptId, shuffleId, shuffleMergeId, mapIndex , reduceId,
index);
}
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("attemptId", appAttemptId)
- .append("shuffleId", shuffleId)
- .append("shuffleMergeId", shuffleMergeId)
- .append("mapIndex", mapIndex)
- .append("reduceId", reduceId)
- .append("index", index)
- .toString();
+ return "PushBlockStream[appId=" + appId + ",attemptId=" + appAttemptId +
+ ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + ",mapIndex=" + mapIndex +
+ ",reduceId=" + reduceId + ",index=" + index + "]";
}
@Override
public boolean equals(Object other) {
if (other instanceof PushBlockStream o) {
- return Objects.equal(appId, o.appId)
+ return Objects.equals(appId, o.appId)
&& appAttemptId == o.appAttemptId
&& shuffleId == o.shuffleId
&& shuffleMergeId == o.shuffleMergeId
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
index 9805af67b9f26..a5931126e4ff8 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -56,11 +54,8 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("execId", execId)
- .append("executorInfo", executorInfo)
- .toString();
+ return "RegisterExecutor[appId=" + appId + ", execId=" + execId +
+ ",executorInfo=" + executorInfo + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
index 7032942331c3e..2743824b3d21c 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java
@@ -21,8 +21,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -51,11 +49,8 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("execId", execId)
- .append("blockIds", Arrays.toString(blockIds))
- .toString();
+ return "RemoveBlocks[appId=" + appId + ",execId=" + execId +
+ ",blockIds=" + Arrays.toString(blockIds) + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java
index 8ce2e05e6097d..ac6d981b2e081 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java
@@ -17,10 +17,9 @@
package org.apache.spark.network.shuffle.protocol;
-import com.google.common.base.Objects;
+import java.util.Objects;
+
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -54,23 +53,19 @@ protected Type type() {
@Override
public int hashCode() {
- return Objects.hashCode(appId, appAttemptId, shuffleId, shuffleMergeId);
+ return Objects.hash(appId, appAttemptId, shuffleId, shuffleMergeId);
}
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("attemptId", appAttemptId)
- .append("shuffleId", shuffleId)
- .append("shuffleMergeId", shuffleMergeId)
- .toString();
+ return "RemoveShuffleMerge[appId=" + appId + ",attemptId=" + appAttemptId +
+ ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + "]";
}
@Override
public boolean equals(Object other) {
if (other != null && other instanceof RemoveShuffleMerge o) {
- return Objects.equal(appId, o.appId)
+ return Objects.equals(appId, o.appId)
&& appAttemptId == o.appAttemptId
&& shuffleId == o.shuffleId
&& shuffleMergeId == o.shuffleMergeId;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
index aebd6f0d5a620..629e3c472e0a6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java
@@ -20,8 +20,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
// Needed by ScalaDoc. See SPARK-7726
import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type;
@@ -49,10 +47,7 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("streamId", streamId)
- .append("numChunks", numChunks)
- .toString();
+ return "StreamHandle[streamId=" + streamId + ",numChunks=" + numChunks + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
index fad187971e09a..9222134e6bb7f 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java
@@ -21,8 +21,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -68,13 +66,8 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("appId", appId)
- .append("execId", execId)
- .append("blockId", blockId)
- .append("metadata size", metadata.length)
- .append("block size", blockData.length)
- .toString();
+ return "UploadBlock[appId=" + appId + ",execId=" + execId + ",blockId=" + blockId +
+ ",metadata size=" + metadata.length + ",block size=" + blockData.length + "]";
}
@Override
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
index 95d0b3835562d..45c4c5f98de74 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java
@@ -21,8 +21,6 @@
import java.util.Objects;
import io.netty.buffer.ByteBuf;
-import org.apache.commons.lang3.builder.ToStringBuilder;
-import org.apache.commons.lang3.builder.ToStringStyle;
import org.apache.spark.network.protocol.Encoders;
@@ -55,10 +53,7 @@ public int hashCode() {
@Override
public String toString() {
- return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
- .append("blockId", blockId)
- .append("metadata size", metadata.length)
- .toString();
+ return "UploadBlockStream[blockId=" + blockId + ",metadata size=" + metadata.length + "]";
}
@Override
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java
index ccb464c2ce5bd..0c091d88f98b5 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java
@@ -20,21 +20,17 @@
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
import java.util.*;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
-import java.util.stream.Stream;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableSet;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.MapConfigProvider;
import org.apache.spark.network.util.TransportConf;
@@ -46,15 +42,15 @@ public class CleanupNonShuffleServiceServedFilesSuite {
private static final String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager";
private static Set expectedShuffleFilesToKeep =
- ImmutableSet.of("shuffle_782_450_0.index", "shuffle_782_450_0.data");
+ Set.of("shuffle_782_450_0.index", "shuffle_782_450_0.data");
private static Set expectedShuffleAndRddFilesToKeep =
- ImmutableSet.of("shuffle_782_450_0.index", "shuffle_782_450_0.data", "rdd_12_34");
+ Set.of("shuffle_782_450_0.index", "shuffle_782_450_0.data", "rdd_12_34");
private TransportConf getConf(boolean isFetchRddEnabled) {
return new TransportConf(
"shuffle",
- new MapConfigProvider(ImmutableMap.of(
+ new MapConfigProvider(Map.of(
Constants.SHUFFLE_SERVICE_FETCH_RDD_ENABLED,
Boolean.toString(isFetchRddEnabled))));
}
@@ -200,28 +196,13 @@ private static void assertStillThere(TestShuffleDataContext dataContext) {
}
}
- private static Set collectFilenames(File[] files) throws IOException {
- Set result = new HashSet<>();
- for (File file : files) {
- if (file.exists()) {
- try (Stream walk = Files.walk(file.toPath())) {
- result.addAll(walk
- .filter(Files::isRegularFile)
- .map(x -> x.toFile().getName())
- .collect(Collectors.toSet()));
- }
- }
- }
- return result;
- }
-
private static void assertContainedFilenames(
TestShuffleDataContext dataContext,
Set expectedFilenames) throws IOException {
Set collectedFilenames = new HashSet<>();
for (String localDir : dataContext.localDirs) {
- File[] dirs = new File[] { new File(localDir) };
- collectedFilenames.addAll(collectFilenames(dirs));
+ JavaUtils.listFiles(new File(localDir)).stream().map(File::getName)
+ .collect(Collectors.toCollection(() -> collectedFilenames));
}
assertEquals(expectedFilenames, collectedFilenames);
}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
index f7edc8837fde7..2a3135e3c8aeb 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
@@ -27,7 +27,6 @@
import com.codahale.metrics.Meter;
import com.codahale.metrics.Metric;
import com.codahale.metrics.Timer;
-import com.google.common.io.ByteStreams;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.ArgumentCaptor;
@@ -136,7 +135,7 @@ private void checkDiagnosisResult(
CheckedInputStream checkedIn = new CheckedInputStream(
blockMarkers[0].createInputStream(), checksum);
byte[] buffer = new byte[10];
- ByteStreams.readFully(checkedIn, buffer, 0, (int) blockMarkers[0].size());
+ JavaUtils.readFully(checkedIn, buffer, 0, (int) blockMarkers[0].size());
long checksumByWriter = checkedIn.getChecksum().getValue();
// when checksumByWriter == checksumRecalculated and checksumByReader != checksumByWriter
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index 311827dbed4c5..488d02d63d552 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -19,12 +19,11 @@
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.io.CharStreams;
import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
+import org.apache.spark.network.util.JavaUtils;
import org.apache.spark.network.util.MapConfigProvider;
import org.apache.spark.network.util.TransportConf;
import org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.AppExecId;
@@ -83,23 +82,17 @@ public void testSortShuffleBlocks() throws IOException {
try (InputStream block0Stream = resolver.getBlockData(
"app0", "exec0", 0, 0, 0).createInputStream()) {
- String block0 =
- CharStreams.toString(new InputStreamReader(block0Stream, StandardCharsets.UTF_8));
- assertEquals(sortBlock0, block0);
+ assertEquals(sortBlock0, JavaUtils.toString(block0Stream));
}
try (InputStream block1Stream = resolver.getBlockData(
"app0", "exec0", 0, 0, 1).createInputStream()) {
- String block1 =
- CharStreams.toString(new InputStreamReader(block1Stream, StandardCharsets.UTF_8));
- assertEquals(sortBlock1, block1);
+ assertEquals(sortBlock1, JavaUtils.toString(block1Stream));
}
try (InputStream blocksStream = resolver.getContinuousBlocksData(
"app0", "exec0", 0, 0, 0, 2).createInputStream()) {
- String blocks =
- CharStreams.toString(new InputStreamReader(blocksStream, StandardCharsets.UTF_8));
- assertEquals(sortBlock0 + sortBlock1, blocks);
+ assertEquals(sortBlock0 + sortBlock1, JavaUtils.toString(blocksStream));
}
}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
index ec71f83ba743c..59381cabe063a 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java
@@ -32,7 +32,6 @@
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
-import com.google.common.collect.Sets;
import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
import org.apache.spark.network.server.OneForOneStreamManager;
import org.junit.jupiter.api.AfterAll;
@@ -222,7 +221,7 @@ public void testFetchOneSort() throws Exception {
try (ExternalBlockStoreClient client = createExternalBlockStoreClient()) {
registerExecutor(client, "exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
FetchResult exec0Fetch = fetchBlocks("exec-0", new String[] { "shuffle_0_0_0" });
- assertEquals(Sets.newHashSet("shuffle_0_0_0"), exec0Fetch.successBlocks);
+ assertEquals(Set.of("shuffle_0_0_0"), exec0Fetch.successBlocks);
assertTrue(exec0Fetch.failedBlocks.isEmpty());
assertBufferListsEqual(exec0Fetch.buffers, Arrays.asList(exec0Blocks[0]));
exec0Fetch.releaseBuffers();
@@ -235,7 +234,7 @@ public void testFetchThreeSort() throws Exception {
registerExecutor(client,"exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
FetchResult exec0Fetch = fetchBlocks("exec-0",
new String[]{"shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"});
- assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"),
+ assertEquals(Set.of("shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"),
exec0Fetch.successBlocks);
assertTrue(exec0Fetch.failedBlocks.isEmpty());
assertBufferListsEqual(exec0Fetch.buffers, Arrays.asList(exec0Blocks));
@@ -256,7 +255,7 @@ public void testFetchWrongBlockId() throws Exception {
registerExecutor(client, "exec-1", dataContext0.createExecutorInfo(SORT_MANAGER));
FetchResult execFetch = fetchBlocks("exec-1", new String[]{"broadcast_1"});
assertTrue(execFetch.successBlocks.isEmpty());
- assertEquals(Sets.newHashSet("broadcast_1"), execFetch.failedBlocks);
+ assertEquals(Set.of("broadcast_1"), execFetch.failedBlocks);
}
}
@@ -267,7 +266,7 @@ public void testFetchValidRddBlock() throws Exception {
String validBlockId = "rdd_" + RDD_ID + "_" + SPLIT_INDEX_VALID_BLOCK;
FetchResult execFetch = fetchBlocks("exec-1", new String[]{validBlockId});
assertTrue(execFetch.failedBlocks.isEmpty());
- assertEquals(Sets.newHashSet(validBlockId), execFetch.successBlocks);
+ assertEquals(Set.of(validBlockId), execFetch.successBlocks);
assertBuffersEqual(new NioManagedBuffer(ByteBuffer.wrap(exec0RddBlockValid)),
execFetch.buffers.get(0));
}
@@ -280,7 +279,7 @@ public void testFetchDeletedRddBlock() throws Exception {
String missingBlockId = "rdd_" + RDD_ID + "_" + SPLIT_INDEX_MISSING_FILE;
FetchResult execFetch = fetchBlocks("exec-1", new String[]{missingBlockId});
assertTrue(execFetch.successBlocks.isEmpty());
- assertEquals(Sets.newHashSet(missingBlockId), execFetch.failedBlocks);
+ assertEquals(Set.of(missingBlockId), execFetch.failedBlocks);
}
}
@@ -310,7 +309,7 @@ public void testFetchCorruptRddBlock() throws Exception {
String corruptBlockId = "rdd_" + RDD_ID + "_" + SPLIT_INDEX_CORRUPT_LENGTH;
FetchResult execFetch = fetchBlocks("exec-1", new String[]{corruptBlockId});
assertTrue(execFetch.successBlocks.isEmpty());
- assertEquals(Sets.newHashSet(corruptBlockId), execFetch.failedBlocks);
+ assertEquals(Set.of(corruptBlockId), execFetch.failedBlocks);
}
}
@@ -321,7 +320,7 @@ public void testFetchNonexistent() throws Exception {
FetchResult execFetch = fetchBlocks("exec-0",
new String[]{"shuffle_2_0_0"});
assertTrue(execFetch.successBlocks.isEmpty());
- assertEquals(Sets.newHashSet("shuffle_2_0_0"), execFetch.failedBlocks);
+ assertEquals(Set.of("shuffle_2_0_0"), execFetch.failedBlocks);
}
}
@@ -331,8 +330,8 @@ public void testFetchWrongExecutor() throws Exception {
registerExecutor(client,"exec-0", dataContext0.createExecutorInfo(SORT_MANAGER));
FetchResult execFetch0 = fetchBlocks("exec-0", new String[]{"shuffle_0_0_0" /* right */});
FetchResult execFetch1 = fetchBlocks("exec-0", new String[]{"shuffle_1_0_0" /* wrong */});
- assertEquals(Sets.newHashSet("shuffle_0_0_0"), execFetch0.successBlocks);
- assertEquals(Sets.newHashSet("shuffle_1_0_0"), execFetch1.failedBlocks);
+ assertEquals(Set.of("shuffle_0_0_0"), execFetch0.successBlocks);
+ assertEquals(Set.of("shuffle_1_0_0"), execFetch1.failedBlocks);
}
}
@@ -343,7 +342,7 @@ public void testFetchUnregisteredExecutor() throws Exception {
FetchResult execFetch = fetchBlocks("exec-2",
new String[]{"shuffle_0_0_0", "shuffle_1_0_0"});
assertTrue(execFetch.successBlocks.isEmpty());
- assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks);
+ assertEquals(Set.of("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks);
}
}
@@ -355,7 +354,7 @@ public void testFetchNoServer() throws Exception {
FetchResult execFetch = fetchBlocks("exec-0",
new String[]{"shuffle_1_0_0", "shuffle_1_0_1"}, clientConf, 1 /* port */);
assertTrue(execFetch.successBlocks.isEmpty());
- assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks);
+ assertEquals(Set.of("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks);
}
}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
index 76f82800c502a..170b72b409e12 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
@@ -19,8 +19,8 @@
import java.io.IOException;
import java.util.Arrays;
+import java.util.Map;
-import com.google.common.collect.ImmutableMap;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@@ -46,7 +46,7 @@ public class ExternalShuffleSecuritySuite {
protected TransportConf createTransportConf(boolean encrypt) {
if (encrypt) {
return new TransportConf("shuffle", new MapConfigProvider(
- ImmutableMap.of("spark.authenticate.enableSaslEncryption", "true")));
+ Map.of("spark.authenticate.enableSaslEncryption", "true")));
} else {
return new TransportConf("shuffle", MapConfigProvider.EMPTY);
}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index 7151d044105c7..f127568c8a333 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -23,7 +23,6 @@
import java.util.LinkedHashMap;
import java.util.concurrent.atomic.AtomicInteger;
-import com.google.common.collect.Maps;
import io.netty.buffer.Unpooled;
import org.junit.jupiter.api.Test;
@@ -57,7 +56,7 @@ public class OneForOneBlockFetcherSuite {
@Test
public void testFetchOne() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
@@ -72,7 +71,7 @@ public void testFetchOne() {
@Test
public void testUseOldProtocol() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
@@ -91,7 +90,7 @@ public void testUseOldProtocol() {
@Test
public void testFetchThreeShuffleBlocks() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("shuffle_0_0_1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
blocks.put("shuffle_0_0_2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
@@ -112,7 +111,7 @@ public void testFetchThreeShuffleBlocks() {
@Test
public void testBatchFetchThreeShuffleBlocks() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffle_0_0_0_3", new NioManagedBuffer(ByteBuffer.wrap(new byte[58])));
String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
@@ -129,7 +128,7 @@ public void testBatchFetchThreeShuffleBlocks() {
@Test
public void testFetchThree() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23])));
@@ -148,7 +147,7 @@ public void testFetchThree() {
@Test
public void testFailure() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("b1", null);
blocks.put("b2", null);
@@ -168,7 +167,7 @@ public void testFailure() {
@Test
public void testFailureAndSuccess() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("b1", null);
blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[21])));
@@ -190,14 +189,14 @@ public void testFailureAndSuccess() {
@Test
public void testEmptyBlockFetch() {
IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
- () -> fetchBlocks(Maps.newLinkedHashMap(), new String[] {},
+ () -> fetchBlocks(new LinkedHashMap<>(), new String[] {},
new OpenBlocks("app-id", "exec-id", new String[] {}), conf));
assertEquals("Zero-sized blockIds array", e.getMessage());
}
@Test
public void testFetchShuffleBlocksOrder() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1])));
blocks.put("shuffle_0_2_1", new NioManagedBuffer(ByteBuffer.wrap(new byte[2])));
blocks.put("shuffle_0_10_2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[3])));
@@ -217,7 +216,7 @@ public void testFetchShuffleBlocksOrder() {
@Test
public void testBatchFetchShuffleBlocksOrder() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffle_0_0_1_2", new NioManagedBuffer(ByteBuffer.wrap(new byte[1])));
blocks.put("shuffle_0_2_2_3", new NioManagedBuffer(ByteBuffer.wrap(new byte[2])));
blocks.put("shuffle_0_10_3_4", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[3])));
@@ -237,7 +236,7 @@ public void testBatchFetchShuffleBlocksOrder() {
@Test
public void testShuffleBlockChunksFetch() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffleChunk_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("shuffleChunk_0_0_0_1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
blocks.put("shuffleChunk_0_0_0_2",
@@ -255,7 +254,7 @@ public void testShuffleBlockChunksFetch() {
@Test
public void testShuffleBlockChunkFetchFailure() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shuffleChunk_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("shuffleChunk_0_0_0_1", null);
blocks.put("shuffleChunk_0_0_0_2",
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
index 32c6a8cd37eae..345ac7546af48 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java
@@ -23,7 +23,6 @@
import java.util.LinkedHashMap;
import java.util.Map;
-import com.google.common.collect.Maps;
import io.netty.buffer.Unpooled;
import org.junit.jupiter.api.Test;
@@ -47,7 +46,7 @@ public class OneForOneBlockPusherSuite {
@Test
public void testPushOne() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1])));
String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]);
@@ -61,7 +60,7 @@ public void testPushOne() {
@Test
public void testPushThree() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("shufflePush_0_0_1_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[23])));
blocks.put("shufflePush_0_0_2_0",
@@ -82,7 +81,7 @@ public void testPushThree() {
@Test
public void testServerFailures() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("shufflePush_0_0_1_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
blocks.put("shufflePush_0_0_2_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
@@ -102,7 +101,7 @@ public void testServerFailures() {
@Test
public void testHandlingRetriableFailures() {
- LinkedHashMap blocks = Maps.newLinkedHashMap();
+ LinkedHashMap blocks = new LinkedHashMap<>();
blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12])));
blocks.put("shufflePush_0_0_1_0", null);
blocks.put("shufflePush_0_0_2_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0])));
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
index edd5e1961a501..b7e24fe3da8fe 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
@@ -39,9 +39,7 @@
import java.util.concurrent.TimeUnit;
import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.collect.ImmutableMap;
-import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
@@ -68,6 +66,7 @@
import org.apache.spark.network.shuffle.protocol.RemoveShuffleMerge;
import org.apache.spark.network.util.MapConfigProvider;
import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.network.util.JavaUtils;
/**
* Tests for {@link RemoteBlockPushResolver}.
@@ -97,7 +96,7 @@ public class RemoteBlockPushResolverSuite {
public void before() throws IOException {
localDirs = createLocalDirs(2);
MapConfigProvider provider = new MapConfigProvider(
- ImmutableMap.of("spark.shuffle.push.server.minChunkSizeInMergedShuffleFile", "4"));
+ Map.of("spark.shuffle.push.server.minChunkSizeInMergedShuffleFile", "4"));
conf = new TransportConf("shuffle", provider);
pushResolver = new RemoteBlockPushResolver(conf, null);
registerExecutor(TEST_APP, prepareLocalDirs(localDirs, MERGE_DIRECTORY), MERGE_DIRECTORY_META);
@@ -107,7 +106,7 @@ public void before() throws IOException {
public void after() {
try {
for (Path local : localDirs) {
- FileUtils.deleteDirectory(local.toFile());
+ JavaUtils.deleteRecursively(local.toFile());
}
removeApplication(TEST_APP);
} catch (Exception e) {
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
index 84c8b1b3353f2..cbbade779ab68 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
@@ -29,7 +29,6 @@
import java.util.concurrent.TimeoutException;
import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Sets;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.mockito.stubbing.Answer;
@@ -353,15 +352,15 @@ public void testIOExceptionFailsConnectionEvenWithSaslException()
new TimeoutException());
IOException ioException = new IOException();
List extends Map> interactions = Arrays.asList(
- ImmutableMap.of("b0", saslExceptionInitial),
- ImmutableMap.of("b0", ioException),
- ImmutableMap.of("b0", saslExceptionInitial),
- ImmutableMap.of("b0", ioException),
- ImmutableMap.of("b0", saslExceptionFinal),
+ Map.of("b0", saslExceptionInitial),
+ Map.of("b0", ioException),
+ Map.of("b0", saslExceptionInitial),
+ Map.of("b0", ioException),
+ Map.of("b0", saslExceptionFinal),
// will not get invoked because the connection fails
- ImmutableMap.of("b0", ioException),
+ Map.of("b0", ioException),
// will not get invoked
- ImmutableMap.of("b0", block0)
+ Map.of("b0", block0)
);
configMap.put("spark.shuffle.sasl.enableRetries", "true");
performInteractions(interactions, listener);
@@ -425,7 +424,7 @@ private static void configureInteractions(List extends Map> in
Stubber stub = null;
// Contains all blockIds that are referenced across all interactions.
- LinkedHashSet blockIds = Sets.newLinkedHashSet();
+ LinkedHashSet blockIds = new LinkedHashSet<>();
for (Map interaction : interactions) {
blockIds.addAll(interaction.keySet());
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java
index aef3bc51bcd4b..bd9884e81ba92 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java
@@ -18,12 +18,11 @@
package org.apache.spark.network.shuffle;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import com.google.common.collect.Lists;
-
import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.Unpooled;
@@ -74,7 +73,7 @@ ShuffleTransportContext createShuffleTransportContext(boolean separateFinalizeTh
}
private ByteBuf getDecodableMessageBuf(Message req) throws Exception {
- List out = Lists.newArrayList();
+ List out = new ArrayList<>();
ChannelHandlerContext context = mock(ChannelHandlerContext.class);
when(context.alloc()).thenReturn(ByteBufAllocator.DEFAULT);
MessageEncoder.INSTANCE.encode(context, req, out);
@@ -118,7 +117,7 @@ public void testDecodeOfFinalizeShuffleMessage() throws Exception {
try (ShuffleTransportContext shuffleTransportContext = createShuffleTransportContext(true)) {
ShuffleTransportContext.ShuffleMessageDecoder decoder =
(ShuffleTransportContext.ShuffleMessageDecoder) shuffleTransportContext.getDecoder();
- List out = Lists.newArrayList();
+ List out = new ArrayList<>();
decoder.decode(mock(ChannelHandlerContext.class), messageBuf, out);
Assertions.assertEquals(1, out.size());
@@ -137,7 +136,7 @@ public void testDecodeOfAnyOtherRpcMessage() throws Exception {
try (ShuffleTransportContext shuffleTransportContext = createShuffleTransportContext(true)) {
ShuffleTransportContext.ShuffleMessageDecoder decoder =
(ShuffleTransportContext.ShuffleMessageDecoder) shuffleTransportContext.getDecoder();
- List out = Lists.newArrayList();
+ List out = new ArrayList<>();
decoder.decode(mock(ChannelHandlerContext.class), messageBuf, out);
Assertions.assertEquals(1, out.size());
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java
index 061d63dbcd72d..a04ec60ca1c1c 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java
@@ -17,7 +17,7 @@
package org.apache.spark.network.shuffle;
-import com.google.common.collect.ImmutableMap;
+import java.util.Map;
import org.apache.spark.network.ssl.SslSampleConfigs;
import org.apache.spark.network.util.TransportConf;
@@ -30,9 +30,7 @@ protected TransportConf createTransportConf(boolean encrypt) {
return new TransportConf(
"shuffle",
SslSampleConfigs.createDefaultConfigProviderForRpcNamespaceWithAdditionalEntries(
- ImmutableMap.of(
- "spark.authenticate.enableSaslEncryption",
- "true")
+ Map.of("spark.authenticate.enableSaslEncryption", "true")
)
);
} else {
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java
index 51463bbad5576..1a85838792d29 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java
@@ -17,7 +17,7 @@
package org.apache.spark.network.shuffle;
-import com.google.common.collect.ImmutableMap;
+import java.util.Map;
import org.apache.spark.network.ssl.SslSampleConfigs;
import org.apache.spark.network.util.TransportConf;
@@ -29,7 +29,7 @@ protected TransportConf createTransportConf(boolean separateFinalizeThread) {
return new TransportConf(
"shuffle",
SslSampleConfigs.createDefaultConfigProviderForRpcNamespaceWithAdditionalEntries(
- ImmutableMap.of(
+ Map.of(
"spark.shuffle.server.finalizeShuffleMergeThreadsPercent",
separateFinalizeThread ? "1" : "0")
)
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index 49b17824c3c72..4b8dc33c6bf52 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -22,6 +22,7 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.nio.file.Files;
import com.google.common.io.Closeables;
@@ -54,7 +55,7 @@ public void create() throws IOException {
localDirs[i] = JavaUtils.createDirectory(root, "spark").getAbsolutePath();
for (int p = 0; p < subDirsPerLocalDir; p ++) {
- new File(localDirs[i], String.format("%02x", p)).mkdirs();
+ Files.createDirectories(new File(localDirs[i], String.format("%02x", p)).toPath());
}
}
}
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 78289684960ed..c4451923b17a5 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -48,7 +48,7 @@
org.apache.sparkspark-tags_${scala.binary.version}
- test
+ provided
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
index e455e531de0dd..2b9457c58560f 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
@@ -1529,9 +1529,10 @@ public static UTF8String trimRight(
}
public static UTF8String[] splitSQL(final UTF8String input, final UTF8String delim,
- final int limit, final int collationId) {
+ final int limit, final int collationId, boolean legacySplitTruncate) {
if (CollationFactory.fetchCollation(collationId).isUtf8BinaryType) {
- return input.split(delim, limit);
+ return legacySplitTruncate ?
+ input.splitLegacyTruncate(delim, limit) : input.split(delim, limit);
} else if (CollationFactory.fetchCollation(collationId).isUtf8LcaseType) {
return lowercaseSplitSQL(input, delim, limit);
} else {
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 4bcd75a731059..59c23064858d0 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -22,7 +22,6 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;
import java.util.function.BiFunction;
-import java.util.function.ToLongFunction;
import java.util.stream.Stream;
import com.ibm.icu.text.CollationKey;
@@ -125,10 +124,19 @@ public static class Collation {
public final String version;
/**
- * Collation sensitive hash function. Output for two UTF8Strings will be the same if they are
- * equal according to the collation.
+ * Returns the sort key of the input UTF8String. Two UTF8String values are equal iff their
+ * sort keys are equal (compared as byte arrays).
+ * The sort key is defined as follows for collations without the RTRIM modifier:
+ * - UTF8_BINARY: It is the bytes of the string.
+ * - UTF8_LCASE: It is byte array we get by replacing all invalid UTF8 sequences with the
+ * Unicode replacement character and then converting all characters of the replaced string
+ * with their lowercase equivalents (the Greek capital and Greek small sigma both map to
+ * the Greek final sigma).
+ * - ICU collations: It is the byte array returned by the ICU library for the collated string.
+ * For strings with the RTRIM modifier, we right-trim the string and return the collation key
+ * of the resulting right-trimmed string.
*/
- public final ToLongFunction hashFunction;
+ public final Function sortKeyFunction;
/**
* Potentially faster way than using comparator to compare two UTF8Strings for equality.
@@ -182,7 +190,7 @@ public Collation(
Collator collator,
Comparator comparator,
String version,
- ToLongFunction hashFunction,
+ Function sortKeyFunction,
BiFunction equalsFunction,
boolean isUtf8BinaryType,
boolean isUtf8LcaseType,
@@ -192,7 +200,7 @@ public Collation(
this.collator = collator;
this.comparator = comparator;
this.version = version;
- this.hashFunction = hashFunction;
+ this.sortKeyFunction = sortKeyFunction;
this.isUtf8BinaryType = isUtf8BinaryType;
this.isUtf8LcaseType = isUtf8LcaseType;
this.equalsFunction = equalsFunction;
@@ -581,18 +589,18 @@ private static boolean isValidCollationId(int collationId) {
protected Collation buildCollation() {
if (caseSensitivity == CaseSensitivity.UNSPECIFIED) {
Comparator comparator;
- ToLongFunction hashFunction;
+ Function sortKeyFunction;
BiFunction equalsFunction;
boolean supportsSpaceTrimming = spaceTrimming != SpaceTrimming.NONE;
if (spaceTrimming == SpaceTrimming.NONE) {
comparator = UTF8String::binaryCompare;
- hashFunction = s -> (long) s.hashCode();
+ sortKeyFunction = s -> s.getBytes();
equalsFunction = UTF8String::equals;
} else {
comparator = (s1, s2) -> applyTrimmingPolicy(s1, spaceTrimming).binaryCompare(
applyTrimmingPolicy(s2, spaceTrimming));
- hashFunction = s -> (long) applyTrimmingPolicy(s, spaceTrimming).hashCode();
+ sortKeyFunction = s -> applyTrimmingPolicy(s, spaceTrimming).getBytes();
equalsFunction = (s1, s2) -> applyTrimmingPolicy(s1, spaceTrimming).equals(
applyTrimmingPolicy(s2, spaceTrimming));
}
@@ -603,25 +611,25 @@ protected Collation buildCollation() {
null,
comparator,
CollationSpecICU.ICU_VERSION,
- hashFunction,
+ sortKeyFunction,
equalsFunction,
/* isUtf8BinaryType = */ true,
/* isUtf8LcaseType = */ false,
spaceTrimming != SpaceTrimming.NONE);
} else {
Comparator comparator;
- ToLongFunction hashFunction;
+ Function sortKeyFunction;
if (spaceTrimming == SpaceTrimming.NONE) {
comparator = CollationAwareUTF8String::compareLowerCase;
- hashFunction = s ->
- (long) CollationAwareUTF8String.lowerCaseCodePoints(s).hashCode();
+ sortKeyFunction = s ->
+ CollationAwareUTF8String.lowerCaseCodePoints(s).getBytes();
} else {
comparator = (s1, s2) -> CollationAwareUTF8String.compareLowerCase(
applyTrimmingPolicy(s1, spaceTrimming),
applyTrimmingPolicy(s2, spaceTrimming));
- hashFunction = s -> (long) CollationAwareUTF8String.lowerCaseCodePoints(
- applyTrimmingPolicy(s, spaceTrimming)).hashCode();
+ sortKeyFunction = s -> CollationAwareUTF8String.lowerCaseCodePoints(
+ applyTrimmingPolicy(s, spaceTrimming)).getBytes();
}
return new Collation(
@@ -630,7 +638,7 @@ protected Collation buildCollation() {
null,
comparator,
CollationSpecICU.ICU_VERSION,
- hashFunction,
+ sortKeyFunction,
(s1, s2) -> comparator.compare(s1, s2) == 0,
/* isUtf8BinaryType = */ false,
/* isUtf8LcaseType = */ true,
@@ -1013,19 +1021,18 @@ protected Collation buildCollation() {
collator.freeze();
Comparator comparator;
- ToLongFunction hashFunction;
+ Function sortKeyFunction;
if (spaceTrimming == SpaceTrimming.NONE) {
- hashFunction = s -> (long) collator.getCollationKey(
- s.toValidString()).hashCode();
comparator = (s1, s2) ->
collator.compare(s1.toValidString(), s2.toValidString());
+ sortKeyFunction = s -> collator.getCollationKey(s.toValidString()).toByteArray();
} else {
comparator = (s1, s2) -> collator.compare(
applyTrimmingPolicy(s1, spaceTrimming).toValidString(),
applyTrimmingPolicy(s2, spaceTrimming).toValidString());
- hashFunction = s -> (long) collator.getCollationKey(
- applyTrimmingPolicy(s, spaceTrimming).toValidString()).hashCode();
+ sortKeyFunction = s -> collator.getCollationKey(
+ applyTrimmingPolicy(s, spaceTrimming).toValidString()).toByteArray();
}
return new Collation(
@@ -1034,7 +1041,7 @@ protected Collation buildCollation() {
collator,
comparator,
ICU_VERSION,
- hashFunction,
+ sortKeyFunction,
(s1, s2) -> comparator.compare(s1, s2) == 0,
/* isUtf8BinaryType = */ false,
/* isUtf8LcaseType = */ false,
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
index 135250e482b16..f950fd864c576 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
@@ -706,8 +706,10 @@ public static int collationAwareRegexFlags(final int collationId) {
public static UTF8String lowercaseRegex(final UTF8String regex) {
return UTF8String.concat(lowercaseRegexPrefix, regex);
}
- public static UTF8String collationAwareRegex(final UTF8String regex, final int collationId) {
- return supportsLowercaseRegex(collationId) ? lowercaseRegex(regex) : regex;
+ public static UTF8String collationAwareRegex(
+ final UTF8String regex, final int collationId, boolean notIgnoreEmpty) {
+ return supportsLowercaseRegex(collationId) && (notIgnoreEmpty || regex.numBytes() != 0)
+ ? lowercaseRegex(regex) : regex;
}
/**
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java
index 0ae238564d591..d52207ad860cd 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java
@@ -45,4 +45,5 @@ public class DateTimeConstants {
public static final long NANOS_PER_MICROS = 1000L;
public static final long NANOS_PER_MILLIS = MICROS_PER_MILLIS * NANOS_PER_MICROS;
public static final long NANOS_PER_SECOND = MILLIS_PER_SECOND * NANOS_PER_MILLIS;
+ public static final long NANOS_PER_DAY = MICROS_PER_DAY * NANOS_PER_MICROS;
}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
index f12408fb49313..310dbce9eaab6 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java
@@ -20,9 +20,8 @@
import java.nio.ByteOrder;
import java.util.Arrays;
-import com.google.common.primitives.Ints;
-
import org.apache.spark.unsafe.Platform;
+import org.apache.spark.network.util.JavaUtils;
public final class ByteArray {
@@ -169,7 +168,7 @@ public static byte[] concatWS(byte[] delimiter, byte[]... inputs) {
}
if (totalLength > 0) totalLength -= delimiter.length;
// Allocate a new byte array, and copy the inputs one by one into it
- final byte[] result = new byte[Ints.checkedCast(totalLength)];
+ final byte[] result = new byte[JavaUtils.checkedCast(totalLength)];
int offset = 0;
for (int i = 0; i < inputs.length; i++) {
byte[] input = inputs[i];
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java
new file mode 100644
index 0000000000000..48dc6f896e91a
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import java.io.Serializable;
+
+// This class represents the physical type for the GEOGRAPHY data type.
+public final class GeographyVal implements Comparable, Serializable {
+
+ // The GEOGRAPHY type is implemented as a byte array. We provide `getBytes` and `fromBytes`
+ // methods for readers and writers to access this underlying array of bytes.
+ private final byte[] value;
+
+ // We make the constructor private. We should use `fromBytes` to create new instances.
+ private GeographyVal(byte[] value) {
+ this.value = value;
+ }
+
+ public byte[] getBytes() {
+ return value;
+ }
+
+ public static GeographyVal fromBytes(byte[] bytes) {
+ if (bytes == null) {
+ return null;
+ } else {
+ return new GeographyVal(bytes);
+ }
+ }
+
+ // Comparison is not yet supported for GEOGRAPHY.
+ public int compareTo(GeographyVal g) {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java
new file mode 100644
index 0000000000000..2bb7f194c940d
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import java.io.Serializable;
+
+// This class represents the physical type for the GEOMETRY data type.
+public final class GeometryVal implements Comparable, Serializable {
+
+ // The GEOMETRY type is implemented as a byte array. We provide `getBytes` and `fromBytes`
+ // methods for readers and writers to access this underlying array of bytes.
+ private final byte[] value;
+
+ // We make the constructor private. We should use `fromBytes` to create new instances.
+ private GeometryVal(byte[] value) {
+ this.value = value;
+ }
+
+ public byte[] getBytes() {
+ return value;
+ }
+
+ public static GeometryVal fromBytes(byte[] bytes) {
+ if (bytes == null) {
+ return null;
+ } else {
+ return new GeometryVal(bytes);
+ }
+ }
+
+ // Comparison is not yet supported for GEOMETRY.
+ public int compareTo(GeometryVal g) {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index caf8461b0b5d6..87d004040c3a0 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -642,9 +642,13 @@ public UTF8String substring(final int start, final int until) {
}
int j = i;
- while (i < numBytes && c < until) {
- i += numBytesForFirstByte(getByte(i));
- c += 1;
+ if (until == Integer.MAX_VALUE) {
+ i = numBytes;
+ } else {
+ while (i < numBytes && c < until) {
+ i += numBytesForFirstByte(getByte(i));
+ c += 1;
+ }
}
if (i > j) {
@@ -663,9 +667,8 @@ public UTF8String substringSQL(int pos, int length) {
// refers to element i-1 in the sequence. If a start index i is less than 0, it refers
// to the -ith element before the end of the sequence. If a start index i is 0, it
// refers to the first element.
- int len = numChars();
// `len + pos` does not overflow as `len >= 0`.
- int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0);
+ int start = (pos > 0) ? pos -1 : ((pos < 0) ? numChars() + pos : 0);
int end;
if ((long) start + length > Integer.MAX_VALUE) {
@@ -1168,10 +1171,21 @@ public UTF8String reverse() {
}
public UTF8String repeat(int times) {
- if (times <= 0) {
+ if (times <= 0 || numBytes == 0) {
return EMPTY_UTF8;
}
+ if (times == 1) {
+ return this;
+ }
+
+ if (numBytes == 1) {
+ byte[] newBytes = new byte[times];
+ byte b = getByte(0);
+ Arrays.fill(newBytes, b);
+ return fromBytes(newBytes);
+ }
+
byte[] newBytes = new byte[Math.multiplyExact(numBytes, times)];
copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes);
@@ -1483,6 +1497,25 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
}
public UTF8String[] split(UTF8String pattern, int limit) {
+ // For the empty `pattern` a `split` function ignores trailing empty strings unless original
+ // string is empty.
+ if (numBytes() != 0 && pattern.numBytes() == 0) {
+ int newLimit = limit > numChars() || limit <= 0 ? numChars() : limit;
+ byte[] input = getBytes();
+ int byteIndex = 0;
+ UTF8String[] result = new UTF8String[newLimit];
+ for (int charIndex = 0; charIndex < newLimit - 1; charIndex++) {
+ int currCharNumBytes = numBytesForFirstByte(input[byteIndex]);
+ result[charIndex] = UTF8String.fromBytes(input, byteIndex, currCharNumBytes);
+ byteIndex += currCharNumBytes;
+ }
+ result[newLimit - 1] = UTF8String.fromBytes(input, byteIndex, numBytes() - byteIndex);
+ return result;
+ }
+ return split(pattern.toString(), limit);
+ }
+
+ public UTF8String[] splitLegacyTruncate(UTF8String pattern, int limit) {
// For the empty `pattern` a `split` function ignores trailing empty strings unless original
// string is empty.
if (numBytes() != 0 && pattern.numBytes() == 0) {
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java
new file mode 100644
index 0000000000000..639a8b2f77821
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+public class GeographyValSuite {
+
+ @Test
+ public void roundTripBytes() {
+ // A simple byte array to test the round trip (`fromBytes` -> `getBytes`).
+ byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 };
+ GeographyVal geographyVal = GeographyVal.fromBytes(bytes);
+ assertNotNull(geographyVal);
+ assertArrayEquals(bytes, geographyVal.getBytes());
+ }
+
+ @Test
+ public void roundNullHandling() {
+ // A simple null byte array to test null handling for GEOGRAPHY.
+ byte[] bytes = null;
+ GeographyVal geographyVal = GeographyVal.fromBytes(bytes);
+ assertNull(geographyVal);
+ }
+
+ @Test
+ public void testCompareTo() {
+ // Comparison is not yet supported for GEOGRAPHY.
+ byte[] bytes1 = new byte[] { 1, 2, 3 };
+ byte[] bytes2 = new byte[] { 4, 5, 6 };
+ GeographyVal geographyVal1 = GeographyVal.fromBytes(bytes1);
+ GeographyVal geographyVal2 = GeographyVal.fromBytes(bytes2);
+ try {
+ geographyVal1.compareTo(geographyVal2);
+ } catch (UnsupportedOperationException e) {
+ assert(e.toString().equals("java.lang.UnsupportedOperationException"));
+ }
+ }
+}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java
new file mode 100644
index 0000000000000..e38c6903e6ddc
--- /dev/null
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.unsafe.types;
+
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+public class GeometryValSuite {
+
+ @Test
+ public void roundTripBytes() {
+ // A simple byte array to test the round trip (`fromBytes` -> `getBytes`).
+ byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 };
+ GeometryVal geometryVal = GeometryVal.fromBytes(bytes);
+ assertNotNull(geometryVal);
+ assertArrayEquals(bytes, geometryVal.getBytes());
+ }
+
+ @Test
+ public void roundNullHandling() {
+ // A simple null byte array to test null handling for GEOMETRY.
+ byte[] bytes = null;
+ GeometryVal geometryVal = GeometryVal.fromBytes(bytes);
+ assertNull(geometryVal);
+ }
+
+ @Test
+ public void testCompareTo() {
+ // Comparison is not yet supported for GEOMETRY.
+ byte[] bytes1 = new byte[] { 1, 2, 3 };
+ byte[] bytes2 = new byte[] { 4, 5, 6 };
+ GeometryVal geometryVal1 = GeometryVal.fromBytes(bytes1);
+ GeometryVal geometryVal2 = GeometryVal.fromBytes(bytes2);
+ try {
+ geometryVal1.compareTo(geometryVal2);
+ } catch (UnsupportedOperationException e) {
+ assert(e.toString().equals("java.lang.UnsupportedOperationException"));
+ }
+ }
+}
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index c4a66fdffdd4d..26b96155377e8 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -24,7 +24,6 @@
import java.nio.charset.StandardCharsets;
import java.util.*;
-import com.google.common.collect.ImmutableMap;
import org.apache.spark.unsafe.Platform;
import org.apache.spark.unsafe.UTF8StringBuilder;
@@ -432,7 +431,7 @@ public void split() {
new UTF8String[]{fromString("a"), fromString("b")},
fromString("ab").split(fromString(""), 100));
assertArrayEquals(
- new UTF8String[]{fromString("a")},
+ new UTF8String[]{fromString("ab")},
fromString("ab").split(fromString(""), 1));
assertArrayEquals(
new UTF8String[]{fromString("")},
@@ -495,7 +494,7 @@ public void levenshteinDistance() {
public void translate() {
assertEquals(
fromString("1a2s3ae"),
- fromString("translate").translate(ImmutableMap.of(
+ fromString("translate").translate(Map.of(
"r", "1",
"n", "2",
"l", "3",
@@ -506,7 +505,7 @@ public void translate() {
fromString("translate").translate(new HashMap<>()));
assertEquals(
fromString("asae"),
- fromString("translate").translate(ImmutableMap.of(
+ fromString("translate").translate(Map.of(
"r", "\0",
"n", "\0",
"l", "\0",
@@ -514,7 +513,7 @@ public void translate() {
)));
assertEquals(
fromString("aa世b"),
- fromString("花花世界").translate(ImmutableMap.of(
+ fromString("花花世界").translate(Map.of(
"花", "a",
"界", "b"
)));
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
index 8e9d33efe7a6d..ddf588b6c64c7 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
@@ -17,7 +17,8 @@
package org.apache.spark.unsafe.types
-import scala.collection.parallel.immutable.ParSeq
+import java.util.stream.IntStream
+
import scala.jdk.CollectionConverters.MapHasAsScala
import com.ibm.icu.util.ULocale
@@ -139,7 +140,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
case class CollationTestCase[R](collationName: String, s1: String, s2: String, expectedResult: R)
- test("collation aware equality and hash") {
+ test("collation aware equality and sort key") {
val checks = Seq(
CollationTestCase("UTF8_BINARY", "aaa", "aaa", true),
CollationTestCase("UTF8_BINARY", "aaa", "AAA", false),
@@ -194,9 +195,9 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
assert(collation.equalsFunction(toUTF8(testCase.s1), toUTF8(testCase.s2)) ==
testCase.expectedResult)
- val hash1 = collation.hashFunction.applyAsLong(toUTF8(testCase.s1))
- val hash2 = collation.hashFunction.applyAsLong(toUTF8(testCase.s2))
- assert((hash1 == hash2) == testCase.expectedResult)
+ val sortKey1 = collation.sortKeyFunction.apply(toUTF8(testCase.s1)).asInstanceOf[Array[Byte]]
+ val sortKey2 = collation.sortKeyFunction.apply(toUTF8(testCase.s2)).asInstanceOf[Array[Byte]]
+ assert(sortKey1.sameElements(sortKey2) == testCase.expectedResult)
})
}
@@ -293,7 +294,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
(0 to 10).foreach(_ => {
val collator = fetchCollation("UNICODE").getCollator
- ParSeq(0 to 100).foreach { _ =>
+ IntStream.rangeClosed(0, 100).parallel().forEach { _ =>
collator.getCollationKey("aaa")
}
})
diff --git a/common/utils-java/pom.xml b/common/utils-java/pom.xml
new file mode 100644
index 0000000000000..ba3603f810856
--- /dev/null
+++ b/common/utils-java/pom.xml
@@ -0,0 +1,84 @@
+
+
+
+
+ 4.0.0
+
+ org.apache.spark
+ spark-parent_2.13
+ 4.1.0-SNAPSHOT
+ ../../pom.xml
+
+
+ spark-common-utils-java_2.13
+ jar
+ Spark Project Common Java Utils
+ https://spark.apache.org/
+
+ common-utils-java
+
+
+
+
+ org.apache.spark
+ spark-tags_${scala.binary.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+ org.slf4j
+ jul-to-slf4j
+
+
+ org.slf4j
+ jcl-over-slf4j
+
+
+ org.apache.logging.log4j
+ log4j-slf4j2-impl
+
+
+ org.apache.logging.log4j
+ log4j-api
+
+
+ org.apache.logging.log4j
+ log4j-core
+
+
+ org.apache.logging.log4j
+ log4j-1.2-api
+
+
+ org.apache.logging.log4j
+ log4j-layout-template-json
+
+
+
+ target/scala-${scala.binary.version}/classes
+ target/scala-${scala.binary.version}/test-classes
+
+
diff --git a/common/utils/src/main/java/org/apache/spark/QueryContext.java b/common/utils-java/src/main/java/org/apache/spark/QueryContext.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/QueryContext.java
rename to common/utils-java/src/main/java/org/apache/spark/QueryContext.java
diff --git a/common/utils/src/main/java/org/apache/spark/QueryContextType.java b/common/utils-java/src/main/java/org/apache/spark/QueryContextType.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/QueryContextType.java
rename to common/utils-java/src/main/java/org/apache/spark/QueryContextType.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FilterFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FilterFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/FilterFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FilterFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function0.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function0.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function0.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function0.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function2.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function2.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function2.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function2.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function3.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function3.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function3.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function3.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function4.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function4.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function4.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function4.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/MapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/MapFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/MapFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/MapFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/PairFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/PairFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java
diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/package-info.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/package-info.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/api/java/function/package-info.java
rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/package-info.java
diff --git a/common/utils-java/src/main/java/org/apache/spark/internal/LogKey.java b/common/utils-java/src/main/java/org/apache/spark/internal/LogKey.java
new file mode 100644
index 0000000000000..0bd0fecb43976
--- /dev/null
+++ b/common/utils-java/src/main/java/org/apache/spark/internal/LogKey.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal;
+
+/**
+ * All structured logging `keys` used in `MDC` must be extends `LogKey`
+ *
+ *
+ * `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs.
+ * Follow these guidelines when adding a new LogKey:
+ *
+ *
+ * Define all structured logging keys in `LogKeys.java`, and sort them alphabetically for
+ * ease of search.
+ *
+ *
+ * Use `UPPER_SNAKE_CASE` for key names.
+ *
+ *
+ * Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`,
+ * `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a
+ * general key instead of creating separate keys for each scenario such as
+ * `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`.
+ * This balances simplicity with the detail needed for effective logging.
+ *
+ *
+ * Use abbreviations in names if they are widely understood,
+ * such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES.
+ *
+ *
+ * For time-related keys, use milliseconds as the unit of time.
+ *
*
* Constant String Messages:
@@ -65,8 +66,10 @@
* you can define `custom LogKey` and use it in `java` code as follows:
*
*
- * // To add a `custom LogKey`, implement `LogKey`
- * public static class CUSTOM_LOG_KEY implements LogKey { }
+ * // Add a `CustomLogKeys`, implement `LogKey`
+ * public enum CustomLogKeys implements LogKey {
+ * CUSTOM_LOG_KEY
+ * }
* import org.apache.spark.internal.MDC;
* logger.error("Unable to delete key {} for cache", MDC.of(CUSTOM_LOG_KEY, "key"));
*/
@@ -222,8 +225,8 @@ private void withLogContext(
for (int index = 0; index < mdcs.length; index++) {
MDC mdc = mdcs[index];
String value = (mdc.value() != null) ? mdc.value().toString() : null;
- if (Logging$.MODULE$.isStructuredLoggingEnabled()) {
- context.put(mdc.key().name(), value);
+ if (SparkLoggerFactory.isStructuredLoggingEnabled()) {
+ context.put(mdc.key().name().toLowerCase(Locale.ROOT), value);
}
args[index] = value;
}
diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java b/common/utils-java/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
similarity index 77%
rename from common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
rename to common/utils-java/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
index a59c007362419..f5be570fa5b39 100644
--- a/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
+++ b/common/utils-java/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
@@ -23,6 +23,20 @@
public class SparkLoggerFactory {
+ private static volatile boolean structuredLoggingEnabled = false;
+
+ public static void enableStructuredLogging() {
+ structuredLoggingEnabled = true;
+ }
+
+ public static void disableStructuredLogging() {
+ structuredLoggingEnabled = false;
+ }
+
+ public static boolean isStructuredLoggingEnabled() {
+ return structuredLoggingEnabled;
+ }
+
public static SparkLogger getLogger(String name) {
return new SparkLogger(LoggerFactory.getLogger(name));
}
diff --git a/common/utils/src/main/java/org/apache/spark/memory/MemoryMode.java b/common/utils-java/src/main/java/org/apache/spark/memory/MemoryMode.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/memory/MemoryMode.java
rename to common/utils-java/src/main/java/org/apache/spark/memory/MemoryMode.java
diff --git a/common/utils/src/main/java/org/apache/spark/network/util/ByteUnit.java b/common/utils-java/src/main/java/org/apache/spark/network/util/ByteUnit.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/network/util/ByteUnit.java
rename to common/utils-java/src/main/java/org/apache/spark/network/util/ByteUnit.java
diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
similarity index 55%
rename from common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
rename to common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
index 94f9f02ed2c9b..cf500926fa3aa 100644
--- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -18,18 +18,26 @@
package org.apache.spark.network.util;
import java.io.*;
+import java.net.URL;
import java.nio.ByteBuffer;
import java.nio.channels.ReadableByteChannel;
import java.nio.charset.StandardCharsets;
+import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.LinkOption;
+import java.nio.file.Path;
+import java.nio.file.FileVisitOption;
+import java.nio.file.FileVisitResult;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.*;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
-import org.apache.commons.lang3.SystemUtils;
+import java.util.stream.Stream;
+import java.util.stream.Collectors;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
@@ -60,6 +68,109 @@ public static void closeQuietly(Closeable closeable) {
}
}
+ /** Delete a file or directory and its contents recursively without throwing exceptions. */
+ public static void deleteQuietly(File file) {
+ if (file != null && file.exists()) {
+ Path path = file.toPath();
+ try (Stream walk = Files.walk(path)) {
+ walk.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete);
+ } catch (Exception ignored) { /* No-op */ }
+ }
+ }
+
+ /** Registers the file or directory for deletion when the JVM exists. */
+ public static void forceDeleteOnExit(File file) throws IOException {
+ if (file != null && file.exists()) {
+ if (!file.isDirectory()) {
+ file.deleteOnExit();
+ } else {
+ Path path = file.toPath();
+ Files.walkFileTree(path, new SimpleFileVisitor() {
+ @Override
+ public FileVisitResult preVisitDirectory(Path p, BasicFileAttributes a)
+ throws IOException {
+ p.toFile().deleteOnExit();
+ return a.isSymbolicLink() ? FileVisitResult.SKIP_SUBTREE : FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult visitFile(Path p, BasicFileAttributes a) throws IOException {
+ p.toFile().deleteOnExit();
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ }
+ }
+ }
+
+ /** Move a file from src to dst. */
+ public static void moveFile(File src, File dst) throws IOException {
+ if (src == null || dst == null || !src.exists() || src.isDirectory() || dst.exists()) {
+ throw new IllegalArgumentException("Invalid input " + src + " or " + dst);
+ }
+ if (!src.renameTo(dst)) { // Try to use File.renameTo first
+ Files.move(src.toPath(), dst.toPath());
+ }
+ }
+
+ /** Move a directory from src to dst. */
+ public static void moveDirectory(File src, File dst) throws IOException {
+ if (src == null || dst == null || !src.exists() || !src.isDirectory() || dst.exists()) {
+ throw new IllegalArgumentException("Invalid input " + src + " or " + dst);
+ }
+ if (!src.renameTo(dst)) {
+ Path from = src.toPath().toAbsolutePath().normalize();
+ Path to = dst.toPath().toAbsolutePath().normalize();
+ if (to.startsWith(from)) {
+ throw new IllegalArgumentException("Cannot move directory to itself or its subdirectory");
+ }
+ moveDirectory(from, to);
+ }
+ }
+
+ private static void moveDirectory(Path src, Path dst) throws IOException {
+ Files.createDirectories(dst);
+ try (DirectoryStream stream = Files.newDirectoryStream(src)) {
+ for (Path from : stream) {
+ Path to = dst.resolve(from.getFileName());
+ if (Files.isDirectory(from)) {
+ moveDirectory(from, to);
+ } else {
+ Files.move(from, to, StandardCopyOption.REPLACE_EXISTING);
+ }
+ }
+ }
+ Files.delete(src);
+ }
+
+ /** Copy src to the target directory simply. File attribute times are not copied. */
+ public static void copyDirectory(File src, File dst) throws IOException {
+ if (src == null || dst == null || !src.exists() || !src.isDirectory() ||
+ (dst.exists() && !dst.isDirectory())) {
+ throw new IllegalArgumentException("Invalid input file " + src + " or directory " + dst);
+ }
+ Path from = src.toPath().toAbsolutePath().normalize();
+ Path to = dst.toPath().toAbsolutePath().normalize();
+ if (to.startsWith(from)) {
+ throw new IllegalArgumentException("Cannot copy directory to itself or its subdirectory");
+ }
+ Files.createDirectories(to);
+ Files.walkFileTree(from, new SimpleFileVisitor() {
+ @Override
+ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
+ throws IOException {
+ Files.createDirectories(to.resolve(from.relativize(dir)));
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+ Files.copy(file, to.resolve(from.relativize(file)), StandardCopyOption.REPLACE_EXISTING);
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ }
+
/** Returns a hash consistent with Spark's Utils.nonNegativeHash(). */
public static int nonNegativeHash(Object obj) {
if (obj == null) { return 0; }
@@ -83,6 +194,49 @@ public static String bytesToString(ByteBuffer b) {
return StandardCharsets.UTF_8.decode(b.slice()).toString();
}
+ public static long sizeOf(File file) throws IOException {
+ if (!file.exists()) {
+ throw new IllegalArgumentException(file.getAbsolutePath() + " not found");
+ }
+ return sizeOf(file.toPath());
+ }
+
+ public static long sizeOf(Path dirPath) throws IOException {
+ AtomicLong size = new AtomicLong(0);
+ Files.walkFileTree(dirPath, new SimpleFileVisitor() {
+ @Override
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+ size.addAndGet(attrs.size());
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ return size.get();
+ }
+
+ public static void cleanDirectory(File dir) throws IOException {
+ if (dir == null || !dir.exists() || !dir.isDirectory()) {
+ throw new IllegalArgumentException("Invalid input directory " + dir);
+ }
+ cleanDirectory(dir.toPath());
+ }
+
+ private static void cleanDirectory(Path rootDir) throws IOException {
+ Files.walkFileTree(rootDir, new SimpleFileVisitor() {
+ @Override
+ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
+ Files.delete(file);
+ return FileVisitResult.CONTINUE;
+ }
+
+ @Override
+ public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException {
+ if (e != null) throw e;
+ if (!dir.equals(rootDir)) Files.delete(dir);
+ return FileVisitResult.CONTINUE;
+ }
+ });
+ }
+
/**
* Delete a file or directory and its contents recursively.
* Don't follow directories if they are symlinks.
@@ -110,14 +264,13 @@ public static void deleteRecursively(File file, FilenameFilter filter)
// On Unix systems, use operating system command to run faster
// If that does not work out, fallback to the Java IO way
// We exclude Apple Silicon test environment due to the limited resource issues.
- if (SystemUtils.IS_OS_UNIX && filter == null && !(SystemUtils.IS_OS_MAC_OSX &&
- (System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null))) {
+ if (isUnix && filter == null && !(isMac && isTesting())) {
try {
deleteRecursivelyUsingUnixNative(file);
return;
} catch (IOException e) {
logger.warn("Attempt to delete using native Unix OS command failed for path = {}. " +
- "Falling back to Java IO way", e, MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
+ "Falling back to Java IO way", e, MDC.of(LogKeys.PATH, file.getAbsolutePath()));
}
}
@@ -212,6 +365,25 @@ private static File[] listFilesSafely(File file, FilenameFilter filter) throws I
}
}
+ public static Set listPaths(File dir) throws IOException {
+ if (dir == null) throw new IllegalArgumentException("Input directory is null");
+ if (!dir.exists() || !dir.isDirectory()) return Collections.emptySet();
+ try (var stream = Files.walk(dir.toPath(), FileVisitOption.FOLLOW_LINKS)) {
+ return stream.filter(Files::isRegularFile).collect(Collectors.toCollection(HashSet::new));
+ }
+ }
+
+ public static Set listFiles(File dir) throws IOException {
+ if (dir == null) throw new IllegalArgumentException("Input directory is null");
+ if (!dir.exists() || !dir.isDirectory()) return Collections.emptySet();
+ try (var stream = Files.walk(dir.toPath(), FileVisitOption.FOLLOW_LINKS)) {
+ return stream
+ .filter(Files::isRegularFile)
+ .map(Path::toFile)
+ .collect(Collectors.toCollection(HashSet::new));
+ }
+ }
+
private static final Map timeSuffixes;
private static final Map byteSuffixes;
@@ -415,7 +587,7 @@ public static File createDirectory(String root, String namePrefix) throws IOExce
dir = new File(root, namePrefix + "-" + UUID.randomUUID());
Files.createDirectories(dir.toPath());
} catch (IOException | SecurityException e) {
- logger.error("Failed to create directory {}", e, MDC.of(LogKeys.PATH$.MODULE$, dir));
+ logger.error("Failed to create directory {}", e, MDC.of(LogKeys.PATH, dir));
dir = null;
}
}
@@ -435,4 +607,154 @@ public static void readFully(ReadableByteChannel channel, ByteBuffer dst) throws
}
}
+ /**
+ * Read len bytes exactly, otherwise throw exceptions.
+ */
+ public static void readFully(InputStream in, byte[] arr, int off, int len) throws IOException {
+ if (in == null || len < 0 || (off < 0 || off > arr.length - len)) {
+ throw new IllegalArgumentException("Invalid input argument");
+ }
+ if (len != in.readNBytes(arr, off, len)) {
+ throw new EOFException("Fail to read " + len + " bytes.");
+ }
+ }
+
+ /**
+ * Copy the content of a URL into a file.
+ */
+ public static void copyURLToFile(URL url, File file) throws IOException {
+ if (url == null || file == null || (file.exists() && file.isDirectory())) {
+ throw new IllegalArgumentException("Invalid input " + url + " or " + file);
+ }
+ Files.createDirectories(file.getParentFile().toPath());
+ try (InputStream in = url.openStream()) {
+ Files.copy(in, file.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ }
+ }
+
+ public static String join(List arr, String sep) {
+ if (arr == null) return "";
+ StringJoiner joiner = new StringJoiner(sep == null ? "" : sep);
+ for (Object a : arr) {
+ joiner.add(a == null ? "" : a.toString());
+ }
+ return joiner.toString();
+ }
+
+ public static String stackTraceToString(Throwable t) {
+ if (t == null) {
+ return "";
+ }
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ try (PrintWriter writer = new PrintWriter(out)) {
+ t.printStackTrace(writer);
+ writer.flush();
+ }
+ return out.toString(StandardCharsets.UTF_8);
+ }
+
+ public static int checkedCast(long value) {
+ if (value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) {
+ throw new IllegalArgumentException("Cannot cast to integer.");
+ }
+ return (int) value;
+ }
+
+ /** Return true if the content of the files are equal or they both don't exist */
+ public static boolean contentEquals(File file1, File file2) throws IOException {
+ if (file1 == null && file2 != null || file1 != null && file2 == null) {
+ return false;
+ } else if (file1 == null && file2 == null || !file1.exists() && !file2.exists()) {
+ return true;
+ } else if (!file1.exists() || !file2.exists()) {
+ return false;
+ } else if (file1.isDirectory() || file2.isDirectory()) {
+ throw new IllegalArgumentException("Input is not a file: %s or %s".formatted(file1, file2));
+ } else if (file1.length() != file2.length()) {
+ return false;
+ } else {
+ Path path1 = file1.toPath();
+ Path path2 = file2.toPath();
+ return Files.isSameFile(path1, path2) || Files.mismatch(path1, path2) == -1L;
+ }
+ }
+
+ public static String toString(InputStream in) throws IOException {
+ return new String(in.readAllBytes(), StandardCharsets.UTF_8);
+ }
+
+ /**
+ * Indicates whether Spark is currently running unit tests.
+ */
+ public static boolean isTesting() {
+ return System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null;
+ }
+
+ /**
+ * The `os.name` system property.
+ */
+ public static String osName = System.getProperty("os.name");
+
+ /**
+ * The `os.version` system property.
+ */
+ public static String osVersion = System.getProperty("os.version");
+
+ /**
+ * The `java.version` system property.
+ */
+ public static String javaVersion = Runtime.version().toString();
+
+ /**
+ * The `os.arch` system property.
+ */
+ public static String osArch = System.getProperty("os.arch");
+
+ /**
+ * Whether the underlying operating system is Windows.
+ */
+ public static boolean isWindows = osName.regionMatches(true, 0, "Windows", 0, 7);
+
+ /**
+ * Whether the underlying operating system is Mac OS X.
+ */
+ public static boolean isMac = osName.regionMatches(true, 0, "Mac OS X", 0, 8);
+
+ /**
+ * Whether the underlying operating system is Mac OS X and processor is Apple Silicon.
+ */
+ public static boolean isMacOnAppleSilicon = isMac && osArch.equals("aarch64");
+
+ /**
+ * Whether the underlying operating system is Linux.
+ */
+ public static boolean isLinux = osName.regionMatches(true, 0, "Linux", 0, 5);
+
+ /**
+ * Whether the underlying operating system is UNIX.
+ */
+ public static boolean isUnix = Stream.of("AIX", "HP-UX", "Irix", "Linux", "Mac OS X", "Solaris",
+ "SunOS", "FreeBSD", "OpenBSD", "NetBSD")
+ .anyMatch(prefix -> osName.regionMatches(true, 0, prefix, 0, prefix.length()));
+
+ /**
+ * Throws IllegalArgumentException with the given message if the check is false.
+ * Keep this clone of CommandBuilderUtils.checkArgument synced with the original.
+ */
+ public static void checkArgument(boolean check, String msg, Object... args) {
+ if (!check) {
+ throw new IllegalArgumentException(String.format(msg, args));
+ }
+ }
+
+ /**
+ * Throws IllegalStateException with the given message if the check is false.
+ * Keep this clone of CommandBuilderUtils.checkState synced with the original.
+ */
+ public static void checkState(boolean check, String msg, Object... args) {
+ if (!check) {
+ throw new IllegalStateException(String.format(msg, args));
+ }
+ }
}
diff --git a/common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java b/common/utils-java/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java
similarity index 100%
rename from common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java
rename to common/utils-java/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java
diff --git a/common/utils-java/src/main/java/org/apache/spark/util/Pair.java b/common/utils-java/src/main/java/org/apache/spark/util/Pair.java
new file mode 100644
index 0000000000000..bdcc01b49dcf4
--- /dev/null
+++ b/common/utils-java/src/main/java/org/apache/spark/util/Pair.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util;
+
+/**
+ * An immutable pair of values. Note that the fields are intentionally designed to be `getLeft` and
+ * `getRight` instead of `left` and `right` in order to mitigate the migration burden
+ * from `org.apache.commons.lang3.tuple.Pair`.
+ */
+public record Pair(L getLeft, R getRight) {
+ public static Pair of(L left, R right) {
+ return new Pair<>(left, right);
+ }
+}
diff --git a/common/utils/src/main/resources/org/apache/spark/SparkLayout.json b/common/utils-java/src/main/resources/org/apache/spark/SparkLayout.json
similarity index 100%
rename from common/utils/src/main/resources/org/apache/spark/SparkLayout.json
rename to common/utils-java/src/main/resources/org/apache/spark/SparkLayout.json
diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties b/common/utils-java/src/main/resources/org/apache/spark/log4j2-defaults.properties
similarity index 100%
rename from common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties
rename to common/utils-java/src/main/resources/org/apache/spark/log4j2-defaults.properties
diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties b/common/utils-java/src/main/resources/org/apache/spark/log4j2-json-layout.properties
similarity index 100%
rename from common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties
rename to common/utils-java/src/main/resources/org/apache/spark/log4j2-json-layout.properties
diff --git a/connect-examples/server-library-example/common/src/main/protobuf/base.proto b/common/utils-java/src/test/java/org/apache/spark/util/CustomLogKeys.java
similarity index 75%
rename from connect-examples/server-library-example/common/src/main/protobuf/base.proto
rename to common/utils-java/src/test/java/org/apache/spark/util/CustomLogKeys.java
index 9d902a587ed37..cadacba7c5175 100644
--- a/connect-examples/server-library-example/common/src/main/protobuf/base.proto
+++ b/common/utils-java/src/test/java/org/apache/spark/util/CustomLogKeys.java
@@ -15,14 +15,10 @@
* limitations under the License.
*/
-syntax = 'proto3';
+package org.apache.spark.util;
-option java_multiple_files = true;
-option java_package = "org.apache.connect.examples.serverlibrary.proto";
+import org.apache.spark.internal.LogKey;
-message CustomTable {
- // Path to the custom table.
- string path = 1;
- // Name of the custom table.
- string name = 2;
+public enum CustomLogKeys implements LogKey {
+ CUSTOM_LOG_KEY
}
diff --git a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java b/common/utils-java/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
similarity index 90%
rename from common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
rename to common/utils-java/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
index 6bfe595def1d4..7f8f3f93a8d46 100644
--- a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
+++ b/common/utils-java/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
@@ -90,12 +90,7 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) {
}
@Override
- String expectedPatternForScalaCustomLogKey(Level level) {
- return toRegexPattern(level, ".*: Scala custom log message.\n");
- }
-
- @Override
- String expectedPatternForJavaCustomLogKey(Level level) {
- return toRegexPattern(level, ".*: Java custom log message.\n");
+ String expectedPatternForCustomLogKey(Level level) {
+ return toRegexPattern(level, ".*: Custom log message.\n");
}
}
diff --git a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java b/common/utils-java/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
similarity index 83%
rename from common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
rename to common/utils-java/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
index 186088ede1d0b..d86fe12c89243 100644
--- a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
+++ b/common/utils-java/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
@@ -22,11 +22,9 @@
import java.nio.file.Files;
import java.util.List;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.Level;
import org.junit.jupiter.api.Test;
-import org.apache.spark.internal.LogKey;
import org.apache.spark.internal.LogKeys;
import org.apache.spark.internal.MDC;
import org.apache.spark.internal.SparkLogger;
@@ -73,23 +71,20 @@ private void checkLogOutput(Level level, Runnable func, ExpectedResult result) {
private final String basicMsgWithEscapeChar =
"This is a log message\nThis is a new line \t other msg";
- private final MDC executorIDMDC = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1");
+ private final MDC executorIDMDC = MDC.of(LogKeys.EXECUTOR_ID, "1");
private final String msgWithMDC = "Lost executor {}.";
private final MDC[] mdcs = new MDC[] {
- MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1"),
- MDC.of(LogKeys.REASON$.MODULE$, "the shuffle data is too large")};
+ MDC.of(LogKeys.EXECUTOR_ID, "1"),
+ MDC.of(LogKeys.REASON, "the shuffle data is too large")};
private final String msgWithMDCs = "Lost executor {}, reason: {}";
private final MDC[] emptyMDCs = new MDC[0];
- private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, null);
+ private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID, null);
- private final MDC scalaCustomLogMDC =
- MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "Scala custom log message.");
-
- private final MDC javaCustomLogMDC =
- MDC.of(JavaCustomLogKeys.CUSTOM_LOG_KEY, "Java custom log message.");
+ private final MDC customLogMDC =
+ MDC.of(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.");
// test for basic message (without any mdc)
abstract String expectedPatternForBasicMsg(Level level);
@@ -118,10 +113,7 @@ String expectedPatternForMsgWithEmptyMDCsAndException(Level level) {
abstract String expectedPatternForMsgWithMDCValueIsNull(Level level);
// test for scala custom LogKey
- abstract String expectedPatternForScalaCustomLogKey(Level level);
-
- // test for java custom LogKey
- abstract String expectedPatternForJavaCustomLogKey(Level level);
+ abstract String expectedPatternForCustomLogKey(Level level);
@Test
public void testBasicMsg() {
@@ -241,34 +233,14 @@ public void testLoggerWithMDCValueIsNull() {
}
@Test
- public void testLoggerWithScalaCustomLogKey() {
- Runnable errorFn = () -> logger().error("{}", scalaCustomLogMDC);
- Runnable warnFn = () -> logger().warn("{}", scalaCustomLogMDC);
- Runnable infoFn = () -> logger().info("{}", scalaCustomLogMDC);
- List.of(
- Pair.of(Level.ERROR, errorFn),
- Pair.of(Level.WARN, warnFn),
- Pair.of(Level.INFO, infoFn)).forEach(pair ->
- checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForScalaCustomLogKey));
- }
-
- @Test
- public void testLoggerWithJavaCustomLogKey() {
- Runnable errorFn = () -> logger().error("{}", javaCustomLogMDC);
- Runnable warnFn = () -> logger().warn("{}", javaCustomLogMDC);
- Runnable infoFn = () -> logger().info("{}", javaCustomLogMDC);
+ public void testLoggerWithCustomLogKey() {
+ Runnable errorFn = () -> logger().error("{}", customLogMDC);
+ Runnable warnFn = () -> logger().warn("{}", customLogMDC);
+ Runnable infoFn = () -> logger().info("{}", customLogMDC);
List.of(
Pair.of(Level.ERROR, errorFn),
Pair.of(Level.WARN, warnFn),
Pair.of(Level.INFO, infoFn)).forEach(pair ->
- checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForJavaCustomLogKey));
+ checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForCustomLogKey));
}
}
-
-class JavaCustomLogKeys {
- // Custom `LogKey` must be `implements LogKey`
- public static class CUSTOM_LOG_KEY implements LogKey { }
-
- // Singleton
- public static final CUSTOM_LOG_KEY CUSTOM_LOG_KEY = new CUSTOM_LOG_KEY();
-}
diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils-java/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
similarity index 88%
rename from common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
rename to common/utils-java/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
index 1fab167adfeb0..88ac8ea34710a 100644
--- a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
+++ b/common/utils-java/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
@@ -24,7 +24,6 @@
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.BeforeAll;
-import org.apache.spark.internal.Logging$;
import org.apache.spark.internal.SparkLogger;
import org.apache.spark.internal.SparkLoggerFactory;
@@ -33,13 +32,13 @@ public class StructuredSparkLoggerSuite extends SparkLoggerSuiteBase {
// Enable Structured Logging before running the tests
@BeforeAll
public static void setup() {
- Logging$.MODULE$.enableStructuredLogging();
+ SparkLoggerFactory.enableStructuredLogging();
}
// Disable Structured Logging after running the tests
@AfterAll
public static void teardown() {
- Logging$.MODULE$.disableStructuredLogging();
+ SparkLoggerFactory.disableStructuredLogging();
}
private static final SparkLogger LOGGER =
@@ -176,28 +175,14 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) {
}
@Override
- String expectedPatternForScalaCustomLogKey(Level level) {
+ String expectedPatternForCustomLogKey(Level level) {
return compactAndToRegexPattern(level, """
{
"ts": "",
"level": "",
- "msg": "Scala custom log message.",
+ "msg": "Custom log message.",
"context": {
- "custom_log_key": "Scala custom log message."
- },
- "logger": ""
- }""");
- }
-
- @Override
- String expectedPatternForJavaCustomLogKey(Level level) {
- return compactAndToRegexPattern(level, """
- {
- "ts": "",
- "level": "",
- "msg": "Java custom log message.",
- "context": {
- "custom_log_key": "Java custom log message."
+ "custom_log_key": "Custom log message."
},
"logger": ""
}""");
diff --git a/common/utils-java/src/test/resources/log4j2.properties b/common/utils-java/src/test/resources/log4j2.properties
new file mode 100644
index 0000000000000..cb38f5b55a0ba
--- /dev/null
+++ b/common/utils-java/src/test/resources/log4j2.properties
@@ -0,0 +1,60 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = JsonTemplateLayout
+appender.file.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
+
+# Structured Logging Appender
+appender.structured.type = File
+appender.structured.name = structured
+appender.structured.fileName = target/structured.log
+appender.structured.layout.type = JsonTemplateLayout
+appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
+
+# Pattern Logging Appender
+appender.pattern.type = File
+appender.pattern.name = pattern
+appender.pattern.fileName = target/pattern.log
+appender.pattern.layout.type = PatternLayout
+appender.pattern.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+
+# Custom loggers
+logger.structured_logging.name = org.apache.spark.util.StructuredLoggingSuite
+logger.structured_logging.level = trace
+logger.structured_logging.appenderRefs = structured
+logger.structured_logging.appenderRef.structured.ref = structured
+
+logger.pattern_logging.name = org.apache.spark.util.PatternLoggingSuite
+logger.pattern_logging.level = trace
+logger.pattern_logging.appenderRefs = pattern
+logger.pattern_logging.appenderRef.pattern.ref = pattern
+
+logger.structured_logger.name = org.apache.spark.util.StructuredSparkLoggerSuite
+logger.structured_logger.level = trace
+logger.structured_logger.appenderRefs = structured
+logger.structured_logger.appenderRef.structured.ref = structured
+
+logger.pattern_logger.name = org.apache.spark.util.PatternSparkLoggerSuite
+logger.pattern_logger.level = trace
+logger.pattern_logger.appenderRefs = pattern
+logger.pattern_logger.appenderRef.pattern.ref = pattern
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 44771938439ae..df3bc5adb10bd 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -39,6 +39,18 @@
org.apache.sparkspark-tags_${scala.binary.version}
+
+ org.apache.spark
+ spark-common-utils-java_${scala.binary.version}
+ ${project.version}
+
+
+ org.apache.spark
+ spark-common-utils-java_${scala.binary.version}
+ ${project.version}
+ test-jar
+ test
+ org.apache.xbeanxbean-asm9-shaded
@@ -51,14 +63,6 @@
com.fasterxml.jackson.modulejackson-module-scala_${scala.binary.version}
-
- org.apache.commons
- commons-text
-
-
- commons-io
- commons-io
- org.apache.ivyivy
diff --git a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java
index 39808f58b08ae..26d66ae3433ad 100644
--- a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java
+++ b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java
@@ -60,9 +60,37 @@ default boolean isInternalError() {
return SparkThrowableHelper.isInternalError(this.getCondition());
}
+ // If null, the error message is not for a breaking change
+ default BreakingChangeInfo getBreakingChangeInfo() {
+ return SparkThrowableHelper.getBreakingChangeInfo(
+ this.getCondition()).getOrElse(() -> null);
+ }
+
default Map getMessageParameters() {
return new HashMap<>();
}
+ /**
+ * Returns the default message template for this error.
+ *
+ * The template is a machine-readable string with placeholders
+ * to be filled by {@code getMessageParameters()}.
+ *
+ * This is the default template known to Spark, but clients are
+ * free to generate their own messages (e.g., translations,
+ * alternate formats) using the provided error metadata.
+ *
+ * @return the default message template for this error, or null if unavailable
+ */
+ default String getDefaultMessageTemplate() {
+ try {
+ String cond = this.getCondition();
+ if (cond == null) return null;
+ return SparkThrowableHelper.getMessageTemplate(cond);
+ } catch (Throwable t) {
+ return null; // Unknown error condition
+ }
+ }
+
default QueryContext[] getQueryContext() { return new QueryContext[0]; }
}
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
index fe14f4e827938..c3f2c49a446bd 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -90,6 +90,42 @@
],
"sqlState" : "42000"
},
+ "APPROX_TOP_K_MAX_ITEMS_TRACKED_EXCEEDS_LIMIT" : {
+ "message" : [
+ "The max items tracked `maxItemsTracked`() of `approx_top_k` should be less than or equal to ."
+ ],
+ "sqlState" : "22023"
+ },
+ "APPROX_TOP_K_MAX_ITEMS_TRACKED_LESS_THAN_K" : {
+ "message" : [
+ "The max items tracked `maxItemsTracked`() of `approx_top_k` should be greater than or equal to `k`()."
+ ],
+ "sqlState" : "22023"
+ },
+ "APPROX_TOP_K_NON_POSITIVE_ARG" : {
+ "message" : [
+ "The value of in `approx_top_k` must be a positive integer, but got ."
+ ],
+ "sqlState" : "22023"
+ },
+ "APPROX_TOP_K_NULL_ARG" : {
+ "message" : [
+ "The value of in `approx_top_k` cannot be NULL."
+ ],
+ "sqlState" : "22004"
+ },
+ "APPROX_TOP_K_SKETCH_SIZE_NOT_MATCH" : {
+ "message" : [
+ "Combining approx_top_k sketches of different sizes is not allowed. Found sketches of size and ."
+ ],
+ "sqlState" : "42846"
+ },
+ "APPROX_TOP_K_SKETCH_TYPE_NOT_MATCH" : {
+ "message" : [
+ "Combining approx_top_k sketches of different types is not allowed. Found sketches of type and ."
+ ],
+ "sqlState" : "42846"
+ },
"ARITHMETIC_OVERFLOW" : {
"message" : [
". If necessary set to \"false\" to bypass this error."
@@ -249,6 +285,24 @@
],
"sqlState" : "0A000"
},
+ "CANNOT_LOAD_CHECKPOINT_FILE_MANAGER" : {
+ "message" : [
+ "Error loading streaming checkpoint file manager for path=."
+ ],
+ "subClass" : {
+ "ERROR_LOADING_CLASS" : {
+ "message" : [
+ "Error instantiating streaming checkpoint file manager for path= with className=. msg=."
+ ]
+ },
+ "UNCATEGORIZED" : {
+ "message" : [
+ ""
+ ]
+ }
+ },
+ "sqlState" : "58030"
+ },
"CANNOT_LOAD_FUNCTION_CLASS" : {
"message" : [
"Cannot load class when registering the function , please make sure it is on the classpath."
@@ -326,6 +380,11 @@
"The change log writer version cannot be ."
]
},
+ "INVALID_CHECKPOINT_LINEAGE" : {
+ "message" : [
+ "Invalid checkpoint lineage: . "
+ ]
+ },
"KEY_ROW_FORMAT_VALIDATION_FAILURE" : {
"message" : [
""
@@ -393,6 +452,12 @@
],
"sqlState" : "46110"
},
+ "CANNOT_MODIFY_STATIC_CONFIG" : {
+ "message" : [
+ "Cannot modify the value of the static Spark config: ."
+ ],
+ "sqlState" : "46110"
+ },
"CANNOT_PARSE_DECIMAL" : {
"message" : [
"Cannot parse decimal. Please ensure that the input is a valid number with optional decimal point or comma separators."
@@ -518,6 +583,12 @@
],
"sqlState" : "22KD3"
},
+ "CANNOT_USE_MULTI_ALIASES_IN_WATERMARK_CLAUSE" : {
+ "message" : [
+ "Multiple aliases are not supported in watermark clause."
+ ],
+ "sqlState" : "42000"
+ },
"CANNOT_WRITE_STATE_STORE" : {
"message" : [
"Error writing state store files for provider ."
@@ -853,6 +924,11 @@
"Please fit or load a model smaller than bytes."
]
},
+ "MODEL_SUMMARY_LOST" : {
+ "message" : [
+ "The model summary is lost because the cached model is offloaded."
+ ]
+ },
"UNSUPPORTED_EXCEPTION" : {
"message" : [
""
@@ -921,12 +997,24 @@
},
"sqlState" : "21S01"
},
+ "CYCLIC_FUNCTION_REFERENCE" : {
+ "message" : [
+ "Cyclic function reference detected: ."
+ ],
+ "sqlState" : "42887"
+ },
"DATAFLOW_GRAPH_NOT_FOUND" : {
"message" : [
"Dataflow graph with id could not be found"
],
"sqlState" : "KD011"
},
+ "DATATYPE_CANNOT_ORDER" : {
+ "message" : [
+ "Type does not support ordered operations."
+ ],
+ "sqlState" : "0A000"
+ },
"DATATYPE_MISMATCH" : {
"message" : [
"Cannot resolve due to data type mismatch:"
@@ -1435,6 +1523,12 @@
],
"sqlState" : "42711"
},
+ "DUPLICATE_VARIABLE_NAME_INSIDE_DECLARE" : {
+ "message" : [
+ "Found duplicate variable in the declare variable list. Please, remove one of them."
+ ],
+ "sqlState" : "42734"
+ },
"EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED" : {
"message" : [
"Previous node emitted a row with eventTime= which is older than current_watermark_value=",
@@ -1781,12 +1875,6 @@
],
"sqlState" : "39000"
},
- "FOUND_MULTIPLE_DATA_SOURCES" : {
- "message" : [
- "Detected multiple data sources with the name ''. Please check the data source isn't simultaneously registered and located in the classpath."
- ],
- "sqlState" : "42710"
- },
"GENERATED_COLUMN_WITH_DEFAULT_VALUE" : {
"message" : [
"A column cannot have both a default value and a generation expression but column has default value: () and generation expression: ()."
@@ -2443,6 +2531,29 @@
],
"sqlState" : "22P03"
},
+ "INVALID_CLONE_SESSION_REQUEST" : {
+ "message" : [
+ "Invalid session clone request."
+ ],
+ "subClass" : {
+ "TARGET_SESSION_ID_ALREADY_CLOSED" : {
+ "message" : [
+ "Cannot clone session to target session ID because a session with this ID was previously closed."
+ ]
+ },
+ "TARGET_SESSION_ID_ALREADY_EXISTS" : {
+ "message" : [
+ "Cannot clone session to target session ID because a session with this ID already exists."
+ ]
+ },
+ "TARGET_SESSION_ID_FORMAT" : {
+ "message" : [
+ "Target session ID for clone operation must be an UUID string of the format '00112233-4455-6677-8899-aabbccddeeff'."
+ ]
+ }
+ },
+ "sqlState" : "42K04"
+ },
"INVALID_COLUMN_NAME_AS_PATH" : {
"message" : [
"The datasource cannot save the column because its name contains some characters that are not allowed in file paths. Please, use an alias to rename it."
@@ -2678,6 +2789,12 @@
],
"sqlState" : "42001"
},
+ "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : {
+ "message" : [
+ "Expression type must be string type but got ."
+ ],
+ "sqlState" : "42K09"
+ },
"INVALID_EXTERNAL_TYPE" : {
"message" : [
"The external type is not valid for the type at the expression ."
@@ -2708,6 +2825,34 @@
],
"sqlState" : "42000"
},
+ "INVALID_FLOW_QUERY_TYPE" : {
+ "message" : [
+ "Flow returns an invalid relation type."
+ ],
+ "subClass" : {
+ "BATCH_RELATION_FOR_STREAMING_TABLE" : {
+ "message" : [
+ "Streaming tables may only be defined by streaming relations, but the flow attempts to write a batch relation to the streaming table . Consider using the STREAM operator in Spark-SQL to convert the batch relation into a streaming relation, or populating the streaming table with an append once-flow instead."
+ ]
+ },
+ "STREAMING_RELATION_FOR_MATERIALIZED_VIEW" : {
+ "message" : [
+ "Materialized views may only be defined by a batch relation, but the flow attempts to write a streaming relation to the materialized view ."
+ ]
+ },
+ "STREAMING_RELATION_FOR_ONCE_FLOW" : {
+ "message" : [
+ " is an append once-flow that is defined by a streaming relation. Append once-flows may only be defined by or return a batch relation."
+ ]
+ },
+ "STREAMING_RELATION_FOR_PERSISTED_VIEW" : {
+ "message" : [
+ "Persisted views may only be defined by a batch relation, but the flow attempts to write a streaming relation to the persisted view ."
+ ]
+ }
+ },
+ "sqlState" : "42000"
+ },
"INVALID_FORMAT" : {
"message" : [
"The format is invalid: ."
@@ -3349,6 +3494,11 @@
"expects a string literal, but got ."
]
},
+ "TIME_UNIT" : {
+ "message" : [
+ "expects one of the units 'HOUR', 'MINUTE', 'SECOND', 'MILLISECOND', 'MICROSECOND', but got ''."
+ ]
+ },
"ZERO_INDEX" : {
"message" : [
"expects %1$, %2$ and so on, but got %0$."
@@ -3407,7 +3557,7 @@
},
"INVALID_RECURSIVE_CTE" : {
"message" : [
- "Invalid recursive definition found. Recursive queries must contain an UNION or an UNION ALL statement with 2 children. The first child needs to be the anchor term without any recursive references."
+ "Invalid recursive definition found. Recursive queries must contain an UNION or an UNION ALL statement with 2 children. The first child needs to be the anchor term without any recursive references. Any top level inner CTE must not contain self references."
],
"sqlState" : "42836"
},
@@ -3416,14 +3566,9 @@
"Invalid recursive reference found inside WITH RECURSIVE clause."
],
"subClass" : {
- "NUMBER" : {
- "message" : [
- "Multiple self-references to one recursive CTE are not allowed."
- ]
- },
"PLACE" : {
"message" : [
- "Recursive references cannot be used on the right side of left outer/semi/anti joins, on the left side of right outer joins, in full outer joins, in aggregates, and in subquery expressions."
+ "Recursive references cannot be used on the right side of left outer/semi/anti joins, on the left side of right outer joins, in full outer joins, in aggregates, window functions or sorts"
]
}
},
@@ -3551,6 +3696,12 @@
],
"sqlState" : "42K08"
},
+ "INVALID_SQL_FUNCTION_DATA_ACCESS" : {
+ "message" : [
+ "Cannot create a SQL function with CONTAINS SQL that accesses a table/view or a SQL function that reads SQL data. Please use READS SQL DATA instead."
+ ],
+ "sqlState" : "42K0E"
+ },
"INVALID_SQL_FUNCTION_PLAN_STRUCTURE" : {
"message" : [
"Invalid SQL function plan structure",
@@ -3821,12 +3972,6 @@
},
"sqlState" : "42K0M"
},
- "INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : {
- "message" : [
- "Variable type must be string type but got ."
- ],
- "sqlState" : "42K09"
- },
"INVALID_VARIANT_CAST" : {
"message" : [
"The variant value `` cannot be cast into ``. Please use `try_variant_get` instead."
@@ -3941,7 +4086,7 @@
},
"JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR" : {
"message" : [
- "JDBC external engine syntax error. The error was caused by the query ."
+ "JDBC external engine syntax error. The error was caused by the query . ."
],
"subClass" : {
"DURING_OUTPUT_SCHEMA_RESOLUTION" : {
@@ -3975,18 +4120,25 @@
],
"sqlState" : "42K0L"
},
- "LABEL_ALREADY_EXISTS" : {
+ "LABEL_OR_FOR_VARIABLE_ALREADY_EXISTS" : {
"message" : [
- "The label